Compare commits
3 Commits
e0fc56473c
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| aa960ae582 | |||
| 89e0d82830 | |||
| 65de913c1d |
18
README.md
18
README.md
@@ -22,6 +22,8 @@ Voila un sample des données parsées [dvf/sample.json](dvf/sample.json)
|
||||
https://files.data.gouv.fr/geo-dvf/latest/csv/
|
||||
|
||||
mkdir -p geodvf
|
||||
curl https://files.data.gouv.fr/geo-dvf/latest/csv/2025/full.csv.gz -o geodvf/2025.csv.gz
|
||||
curl https://files.data.gouv.fr/geo-dvf/latest/csv/2024/full.csv.gz -o geodvf/2024.csv.gz
|
||||
curl https://files.data.gouv.fr/geo-dvf/latest/csv/2023/full.csv.gz -o geodvf/2023.csv.gz
|
||||
curl https://files.data.gouv.fr/geo-dvf/latest/csv/2022/full.csv.gz -o geodvf/2022.csv.gz
|
||||
curl https://files.data.gouv.fr/geo-dvf/latest/csv/2021/full.csv.gz -o geodvf/2021.csv.gz
|
||||
@@ -30,13 +32,19 @@ https://files.data.gouv.fr/geo-dvf/latest/csv/
|
||||
|
||||
Voila un sample des données parsées [geodvf/sample.json](geodvf/sample.json)
|
||||
|
||||
# run
|
||||
# sync (automatique)
|
||||
|
||||
Cette commande ouvre les fichiers csv et chie des inserts mysql en batch
|
||||
Détecte les années manquantes en base, télécharge les csv.gz si besoin, parse et insère directement.
|
||||
|
||||
echo "MYSQL=mysql://user:password@host/database?charset=utf8mb4&connectionLimit=10" > .env
|
||||
node parse.js geodvf/2022.csv.gz | gzip > geodvf/2022.sql.gz
|
||||
pv geodvf/2023.sql.gz | gunzip | mysql -u user -ppassword -h host database
|
||||
node sync.js # importe les années manquantes
|
||||
node sync.js 2025 # force le re-import d'une année
|
||||
|
||||
# parse (manuel)
|
||||
|
||||
Génère du SQL sur stdout à partir d'un csv.gz (ancien workflow).
|
||||
|
||||
node parse.js geodvf/2025.csv.gz | gzip > geodvf/2025.sql.gz
|
||||
pv geodvf/2025.sql.gz | gunzip | mysql -u user -ppassword -h host database
|
||||
|
||||
```
|
||||
CREATE TABLE IF NOT EXISTS dvf (
|
||||
|
||||
6
lib/dotenv.js
Normal file
6
lib/dotenv.js
Normal file
@@ -0,0 +1,6 @@
|
||||
import { findUpSync } from 'find-up';
|
||||
import dotenv from 'dotenv';
|
||||
import path from 'node:path';
|
||||
|
||||
const foundenv = findUpSync('.env', { cwd: path.dirname(process.argv[1]) });
|
||||
dotenv.config({ path: foundenv, quiet: true });
|
||||
196
package-lock.json
generated
196
package-lock.json
generated
@@ -10,7 +10,8 @@
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"csv-parse": "^6.2.1",
|
||||
"dotenv": "^17.3.1",
|
||||
"dotenv": "^17.4.0",
|
||||
"find-up": "^8.0.0",
|
||||
"mysql2": "^3.20.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -857,9 +858,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/baseline-browser-mapping": {
|
||||
"version": "2.10.12",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.12.tgz",
|
||||
"integrity": "sha512-qyq26DxfY4awP2gIRXhhLWfwzwI+N5Nxk6iQi8EFizIaWIjqicQTE4sLnZZVdeKPRcVNoJOkkpfzoIYuvCKaIQ==",
|
||||
"version": "2.10.13",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.13.tgz",
|
||||
"integrity": "sha512-BL2sTuHOdy0YT1lYieUxTw/QMtPBC3pmlJC6xk8BBYVv6vcw3SGdKemQ+Xsx9ik2F/lYDO9tqsFQH1r9PFuHKw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
@@ -881,9 +882,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/browserslist": {
|
||||
"version": "4.28.1",
|
||||
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.1.tgz",
|
||||
"integrity": "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA==",
|
||||
"version": "4.28.2",
|
||||
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz",
|
||||
"integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
@@ -901,11 +902,11 @@
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"baseline-browser-mapping": "^2.9.0",
|
||||
"caniuse-lite": "^1.0.30001759",
|
||||
"electron-to-chromium": "^1.5.263",
|
||||
"node-releases": "^2.0.27",
|
||||
"update-browserslist-db": "^1.2.0"
|
||||
"baseline-browser-mapping": "^2.10.12",
|
||||
"caniuse-lite": "^1.0.30001782",
|
||||
"electron-to-chromium": "^1.5.328",
|
||||
"node-releases": "^2.0.36",
|
||||
"update-browserslist-db": "^1.2.3"
|
||||
},
|
||||
"bin": {
|
||||
"browserslist": "cli.js"
|
||||
@@ -998,9 +999,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/caniuse-lite": {
|
||||
"version": "1.0.30001781",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001781.tgz",
|
||||
"integrity": "sha512-RdwNCyMsNBftLjW6w01z8bKEvT6e/5tpPVEgtn22TiLGlstHOVecsX2KHFkD5e/vRnIE4EGzpuIODb3mtswtkw==",
|
||||
"version": "1.0.30001784",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001784.tgz",
|
||||
"integrity": "sha512-WU346nBTklUV9YfUl60fqRbU5ZqyXlqvo1SgigE1OAXK5bFL8LL9q1K7aap3N739l4BvNqnkm3YrGHiY9sfUQw==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
@@ -1338,9 +1339,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "17.3.1",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.3.1.tgz",
|
||||
"integrity": "sha512-IO8C/dzEb6O3F9/twg6ZLXz164a2fhTnEWb95H23Dm4OuN+92NmEAlTrupP9VW6Jm3sO26tQlqyvyi4CsnY9GA==",
|
||||
"version": "17.4.0",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.0.tgz",
|
||||
"integrity": "sha512-kCKF62fwtzwYm0IGBNjRUjtJgMfGapII+FslMHIjMR5KTnwEmBmWLDRSnc3XSNP8bNy34tekgQyDT0hr7pERRQ==",
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
@@ -1365,9 +1366,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/electron-to-chromium": {
|
||||
"version": "1.5.328",
|
||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.328.tgz",
|
||||
"integrity": "sha512-QNQ5l45DzYytThO21403XN3FvK0hOkWDG8viNf6jqS42msJ8I4tGDSpBCgvDRRPnkffafiwAym2X2eHeGD2V0w==",
|
||||
"version": "1.5.331",
|
||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.331.tgz",
|
||||
"integrity": "sha512-IbxXrsTlD3hRodkLnbxAPP4OuJYdWCeM3IOdT+CpcMoIwIoDfCmRpEtSPfwBXxVkg9xmBeY7Lz2Eo2TDn/HC3Q==",
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
@@ -1918,13 +1919,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-sonarjs/node_modules/minimatch": {
|
||||
"version": "10.2.4",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz",
|
||||
"integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==",
|
||||
"version": "10.2.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
|
||||
"integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
|
||||
"dev": true,
|
||||
"license": "BlueOak-1.0.0",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^5.0.2"
|
||||
"brace-expansion": "^5.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": "18 || 20 || >=22"
|
||||
@@ -2057,6 +2058,84 @@
|
||||
"node": ">=4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint/node_modules/find-up": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
|
||||
"integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"locate-path": "^6.0.0",
|
||||
"path-exists": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint/node_modules/locate-path": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
|
||||
"integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"p-locate": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint/node_modules/p-limit": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
|
||||
"integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"yocto-queue": "^0.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint/node_modules/p-locate": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz",
|
||||
"integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"p-limit": "^3.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint/node_modules/yocto-queue": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
|
||||
"integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/espree": {
|
||||
"version": "10.4.0",
|
||||
"resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz",
|
||||
@@ -2196,17 +2275,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/find-up": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
|
||||
"integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==",
|
||||
"dev": true,
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/find-up/-/find-up-8.0.0.tgz",
|
||||
"integrity": "sha512-JGG8pvDi2C+JxidYdIwQDyS/CgcrIdh18cvgxcBge3wSHRQOrooMD3GlFBcmMJAN9M42SAZjDp5zv1dglJjwww==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"locate-path": "^6.0.0",
|
||||
"path-exists": "^4.0.0"
|
||||
"locate-path": "^8.0.0",
|
||||
"unicorn-magic": "^0.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
@@ -3274,16 +3352,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/locate-path": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
|
||||
"integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==",
|
||||
"dev": true,
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-8.0.0.tgz",
|
||||
"integrity": "sha512-XT9ewWAC43tiAV7xDAPflMkG0qOPn2QjHqlgX8FOqmWa/rxnyYDulF9T0F7tRy1u+TVTmK/M//6VIOye+2zDXg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"p-locate": "^5.0.0"
|
||||
"p-locate": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
@@ -3628,32 +3705,30 @@
|
||||
}
|
||||
},
|
||||
"node_modules/p-limit": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
|
||||
"integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==",
|
||||
"dev": true,
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-4.0.0.tgz",
|
||||
"integrity": "sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"yocto-queue": "^0.1.0"
|
||||
"yocto-queue": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/p-locate": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz",
|
||||
"integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==",
|
||||
"dev": true,
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-6.0.0.tgz",
|
||||
"integrity": "sha512-wPrq66Llhl7/4AGC6I+cqxT07LhXvWL08LNXz1fENOw0Ap4sRZZ/gZpTTJ5jpurzzzfS2W/Ge9BY3LgLjCShcw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"p-limit": "^3.0.2"
|
||||
"p-limit": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
@@ -4621,6 +4696,18 @@
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/unicorn-magic": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/unicorn-magic/-/unicorn-magic-0.3.0.tgz",
|
||||
"integrity": "sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/update-browserslist-db": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
|
||||
@@ -4850,13 +4937,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/yocto-queue": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
|
||||
"integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==",
|
||||
"dev": true,
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
|
||||
"integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
"node": ">=12.20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
|
||||
@@ -22,7 +22,8 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"csv-parse": "^6.2.1",
|
||||
"dotenv": "^17.3.1",
|
||||
"dotenv": "^17.4.0",
|
||||
"find-up": "^8.0.0",
|
||||
"mysql2": "^3.20.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
14
parse.js
14
parse.js
@@ -1,10 +1,8 @@
|
||||
import { parse } from 'csv-parse';
|
||||
import fs from 'node:fs';
|
||||
import zlib from 'zlib';
|
||||
import zlib from 'node:zlib';
|
||||
import mysql from 'mysql2';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
import './lib/dotenv.js';
|
||||
|
||||
if (process.argv.length != 3) {
|
||||
throw new Error('You should give a project dir');
|
||||
@@ -43,8 +41,8 @@ const flushBatch = (batch) => {
|
||||
};
|
||||
|
||||
// quand quelques ligne de csv sont parsées on les assemble puis on genere le sql
|
||||
parser.on('readable', function(){
|
||||
let record;
|
||||
parser.on('readable', () =>{
|
||||
let record = null;
|
||||
const batch = [];
|
||||
|
||||
while ((record = parser.read()) !== null) {
|
||||
@@ -60,10 +58,10 @@ parser.on('readable', function(){
|
||||
|
||||
flushBatch(batch);
|
||||
});
|
||||
parser.on('error', function(err){
|
||||
parser.on('error', (err) =>{
|
||||
console.error(err.message);
|
||||
});
|
||||
parser.on('end', function(){
|
||||
parser.on('end', () =>{
|
||||
connection.end();
|
||||
});
|
||||
|
||||
|
||||
155
sync.js
Normal file
155
sync.js
Normal file
@@ -0,0 +1,155 @@
|
||||
import { parse } from 'csv-parse';
|
||||
import fs from 'node:fs';
|
||||
import zlib from 'node:zlib';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
import { Writable } from 'node:stream';
|
||||
import mysql from 'mysql2/promise';
|
||||
import './lib/dotenv.js';
|
||||
|
||||
const BASE_URL = 'https://files.data.gouv.fr/geo-dvf/latest/csv';
|
||||
const GEODVF_DIR = 'geodvf';
|
||||
|
||||
const connectionString = process.env.MYSQL;
|
||||
if (!connectionString) {
|
||||
throw new Error('MYSQL environment variable not set');
|
||||
}
|
||||
|
||||
// parse mysql connection string
|
||||
const url = new URL(connectionString);
|
||||
const dbConfig = {
|
||||
host: url.hostname,
|
||||
port: url.port || 3306,
|
||||
user: url.username,
|
||||
password: decodeURIComponent(url.password),
|
||||
database: url.pathname.slice(1),
|
||||
charset: 'utf8mb4',
|
||||
};
|
||||
|
||||
async function getYearsOnServer() {
|
||||
const res = await fetch(`${BASE_URL}/`);
|
||||
const html = await res.text();
|
||||
const years = [];
|
||||
for (const match of html.matchAll(/href="(\d{4})\/"/g)) {
|
||||
years.push(Number(match[1]));
|
||||
}
|
||||
return years.toSorted();
|
||||
}
|
||||
|
||||
async function getYearsInDb(connection) {
|
||||
const [rows] = await connection.query('SELECT DISTINCT YEAR(date_mutation) as annee FROM dvf ORDER BY annee');
|
||||
return rows.map(r => r.annee);
|
||||
}
|
||||
|
||||
async function downloadYear(year) {
|
||||
const file = `${GEODVF_DIR}/${year}.csv.gz`;
|
||||
if (fs.existsSync(file)) {
|
||||
console.log(` ${file} déjà présent, skip download`);
|
||||
return file;
|
||||
}
|
||||
const url = `${BASE_URL}/${year}/full.csv.gz`;
|
||||
console.log(` Téléchargement ${url} ...`);
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status} pour ${url}`);
|
||||
fs.mkdirSync(GEODVF_DIR, { recursive: true });
|
||||
const dest = fs.createWriteStream(file);
|
||||
await pipeline(res.body, dest);
|
||||
const size = fs.statSync(file).size;
|
||||
console.log(` Téléchargé ${file} (${(size / 1024 / 1024).toFixed(1)} Mo)`);
|
||||
return file;
|
||||
}
|
||||
|
||||
async function importYear(connection, year, file) {
|
||||
console.log(` Parsing et insertion de ${file} ...`);
|
||||
|
||||
let columns = null;
|
||||
let inserted = 0;
|
||||
const BATCH_SIZE = 5000;
|
||||
let batch = [];
|
||||
|
||||
const flush = async () => {
|
||||
if (batch.length === 0) return;
|
||||
const placeholders = batch.map(row => `(${row.map(() => '?').join(', ')})`).join(', ');
|
||||
const sql = `INSERT INTO dvf (${columns.join(', ')}) VALUES ${placeholders}`;
|
||||
const flat = batch.flat();
|
||||
await connection.query(sql, flat);
|
||||
inserted += batch.length;
|
||||
if (inserted % 100_000 < BATCH_SIZE) {
|
||||
process.stdout.write(` ... ${inserted.toLocaleString()} lignes insérées\r`);
|
||||
}
|
||||
batch = [];
|
||||
};
|
||||
|
||||
const parser = parse({ delimiter: ',', columns: true });
|
||||
|
||||
const writer = new Writable({
|
||||
objectMode: true,
|
||||
async write(record, _encoding, callback) {
|
||||
try {
|
||||
if (!columns) {
|
||||
columns = Object.keys(record).map(col => `\`${col}\``);
|
||||
}
|
||||
const values = Object.values(record).map(v => (v === '' ? null : v));
|
||||
batch.push(values);
|
||||
if (batch.length >= BATCH_SIZE) {
|
||||
await flush();
|
||||
}
|
||||
callback();
|
||||
} catch (err) {
|
||||
callback(err);
|
||||
}
|
||||
},
|
||||
async final(callback) {
|
||||
try {
|
||||
await flush();
|
||||
callback();
|
||||
} catch (err) {
|
||||
callback(err);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const input = fs.createReadStream(file).pipe(zlib.createGunzip());
|
||||
await pipeline(input, parser, writer);
|
||||
|
||||
console.log(` ${inserted.toLocaleString()} lignes insérées pour ${year}`);
|
||||
return inserted;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const forceYear = process.argv[2] ? Number(process.argv[2]) : null;
|
||||
|
||||
console.log('Connexion à la base...');
|
||||
const connection = await mysql.createConnection(dbConfig);
|
||||
|
||||
console.log('Vérification des années sur le serveur...');
|
||||
const serverYears = await getYearsOnServer();
|
||||
console.log(`Années disponibles : ${serverYears.join(', ')}`);
|
||||
|
||||
const dbYears = await getYearsInDb(connection);
|
||||
console.log(`Années en base : ${dbYears.join(', ')}`);
|
||||
|
||||
let missing = serverYears.filter(y => !dbYears.includes(y));
|
||||
if (forceYear) {
|
||||
missing = [forceYear];
|
||||
console.log(`Mode forcé : import de ${forceYear}`);
|
||||
}
|
||||
|
||||
if (missing.length === 0) {
|
||||
console.log('Tout est à jour !');
|
||||
await connection.end();
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Années manquantes : ${missing.join(', ')}`);
|
||||
|
||||
for (const year of missing) {
|
||||
console.log(`\n=== ${year} ===`);
|
||||
const file = await downloadYear(year);
|
||||
await importYear(connection, year, file);
|
||||
}
|
||||
|
||||
console.log('\nTerminé !');
|
||||
await connection.end();
|
||||
}
|
||||
|
||||
await main();
|
||||
Reference in New Issue
Block a user