dvf/parse.js
2024-06-02 01:30:12 +02:00

73 lines
1.9 KiB
JavaScript

import { parse } from 'csv-parse';
import fs from 'node:fs';
import zlib from 'zlib';
import mysql from 'mysql2';
import dotenv from 'dotenv';
dotenv.config();
if (process.argv.length != 3) {
throw new Error('You should give a project dir');
}
const file = process.argv[2];
// csv parser
const delimiter = (file.match(/\.gz$/)) ? ',' : '|';
const parser = parse({
delimiter,
columns: true,
});
// mysql to escape string
const connectionString = process.env.MYSQL;
if (!connectionString) {
throw new Error('MYSQL environment variable not set');
}
const connection = mysql.createConnection(process.env.MYSQL);
// generate sql
let columns = [];
const flushBatch = (batch) => {
if (batch.length === 0) return;
const values = batch.map(row => `(${row.map(val => connection.escape(val)).join(', ')})`).join(', ');
const sql = `INSERT INTO dvf (${columns.join(', ')}) VALUES ${values};`;
console.log(sql);
// connection.query(sql, (error, results) => {
// if (error) throw error;
// console.log('Inserted rows:', results.affectedRows);
// });
};
// quand quelques ligne de csv sont parsées on les assemble puis on genere le sql
parser.on('readable', function(){
let record;
const batch = [];
while ((record = parser.read()) !== null) {
// console.log(record);
// get columns and values to insert and escape them for sql
if (columns.length === 0) {
columns = Object.keys(record).map(col => connection.escapeId(col));
}
const values = Object.values(record);
batch.push(values);
}
flushBatch(batch);
});
parser.on('error', function(err){
console.error(err.message);
});
parser.on('end', function(){
connection.end();
});
// ouvre le fichier, et le décompresse si besoin
if (file.match(/\.gz$/)) fs.createReadStream(file).pipe(zlib.createGunzip()).pipe(parser);
else fs.createReadStream(file).pipe(parser);