dvf/parse.js

73 lines
1.9 KiB
JavaScript
Raw Normal View History

2024-06-01 23:21:29 +02:00
import { parse } from 'csv-parse';
import fs from 'node:fs';
import zlib from 'zlib';
2024-06-02 01:30:12 +02:00
import mysql from 'mysql2';
import dotenv from 'dotenv';
dotenv.config();
2024-06-01 23:21:29 +02:00
if (process.argv.length != 3) {
throw new Error('You should give a project dir');
}
const file = process.argv[2];
2024-06-02 01:30:12 +02:00
// csv parser
const delimiter = (file.match(/\.gz$/)) ? ',' : '|';
2024-06-01 23:21:29 +02:00
const parser = parse({
delimiter,
columns: true,
});
2024-06-02 01:30:12 +02:00
// mysql to escape string
const connectionString = process.env.MYSQL;
if (!connectionString) {
throw new Error('MYSQL environment variable not set');
}
const connection = mysql.createConnection(process.env.MYSQL);
// generate sql
let columns = [];
const flushBatch = (batch) => {
if (batch.length === 0) return;
const values = batch.map(row => `(${row.map(val => connection.escape(val)).join(', ')})`).join(', ');
const sql = `INSERT INTO dvf (${columns.join(', ')}) VALUES ${values};`;
console.log(sql);
// connection.query(sql, (error, results) => {
// if (error) throw error;
// console.log('Inserted rows:', results.affectedRows);
// });
};
// quand quelques ligne de csv sont parsées on les assemble puis on genere le sql
2024-06-01 23:21:29 +02:00
parser.on('readable', function(){
let record;
2024-06-02 01:30:12 +02:00
const batch = [];
2024-06-01 23:21:29 +02:00
while ((record = parser.read()) !== null) {
2024-06-02 01:30:12 +02:00
// console.log(record);
// get columns and values to insert and escape them for sql
if (columns.length === 0) {
columns = Object.keys(record).map(col => connection.escapeId(col));
}
const values = Object.values(record);
batch.push(values);
2024-06-01 23:21:29 +02:00
}
2024-06-02 01:30:12 +02:00
flushBatch(batch);
2024-06-01 23:21:29 +02:00
});
parser.on('error', function(err){
console.error(err.message);
});
parser.on('end', function(){
2024-06-02 01:30:12 +02:00
connection.end();
2024-06-01 23:21:29 +02:00
});
2024-06-02 01:30:12 +02:00
// ouvre le fichier, et le décompresse si besoin
2024-06-01 23:21:29 +02:00
if (file.match(/\.gz$/)) fs.createReadStream(file).pipe(zlib.createGunzip()).pipe(parser);
else fs.createReadStream(file).pipe(parser);