chore: big wip
This commit is contained in:
parent
e3c1ffe0f1
commit
751dc4426c
11 changed files with 354 additions and 28 deletions
14
.gitignore
vendored
14
.gitignore
vendored
|
|
@ -104,11 +104,11 @@ screenshot-*.png
|
|||
/.data
|
||||
|
||||
# Geodae preprocessing
|
||||
scripts/node_modules/
|
||||
scripts/.yarn/*
|
||||
!scripts/.yarn/patches
|
||||
!scripts/.yarn/plugins
|
||||
!scripts/.yarn/releases
|
||||
!scripts/.yarn/sdks
|
||||
!scripts/.yarn/versions
|
||||
scripts/dae/node_modules/
|
||||
scripts/dae/.yarn/*
|
||||
!scripts/dae/.yarn/patches
|
||||
!scripts/dae/.yarn/plugins
|
||||
!scripts/dae/.yarn/releases
|
||||
!scripts/dae/.yarn/sdks
|
||||
!scripts/dae/.yarn/versions
|
||||
src/assets/db/*.db
|
||||
|
|
|
|||
|
|
@ -51,7 +51,10 @@
|
|||
"open:deeplink:ios": "yarn open:deeplink --ios",
|
||||
"open:deeplink": "npx uri-scheme open --android",
|
||||
"screenshot:ios": "scripts/screenshot-ios.sh",
|
||||
"screenshot:android": "scripts/screenshot-android.sh"
|
||||
"screenshot:android": "scripts/screenshot-android.sh",
|
||||
"dae:json-to-csv": "yarn --cwd scripts/dae json-to-csv",
|
||||
"dae:csv-to-db": "yarn --cwd scripts/dae csv-to-db",
|
||||
"dae:build": "yarn --cwd scripts/dae build"
|
||||
},
|
||||
"customExpoVersioning": {
|
||||
"versionCode": 241,
|
||||
|
|
|
|||
2
scripts/dae/.gitignore
vendored
Normal file
2
scripts/dae/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
geodae.json
|
||||
geodae.csv
|
||||
|
|
@ -4,4 +4,4 @@ enableGlobalCache: false
|
|||
|
||||
nodeLinker: node-modules
|
||||
|
||||
yarnPath: ../.yarn/releases/yarn-4.5.3.cjs
|
||||
yarnPath: ../../.yarn/releases/yarn-4.5.3.cjs
|
||||
|
|
@ -9,7 +9,6 @@ import { dirname, resolve } from "node:path";
|
|||
import { fileURLToPath } from "node:url";
|
||||
import { createRequire } from "node:module";
|
||||
import { parse } from "csv-parse";
|
||||
import { normalizeHoraires } from "./lib/normalize-horaires.mjs";
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
const Database = require("better-sqlite3");
|
||||
|
|
@ -151,9 +150,9 @@ async function main() {
|
|||
const horaires = cleanStr(record.horaires);
|
||||
const acces = cleanStr(record.acces);
|
||||
const disponible_24h = cleanInt(record.disponible_24h);
|
||||
const horaires_std = cleanStr(record.horaires_std) || "{}";
|
||||
const id = deterministicId(lat, lon, nom, adresse);
|
||||
const h3Cell = computeH3(lat, lon, H3_RES);
|
||||
const horairesStd = normalizeHoraires(horaires, disponible_24h);
|
||||
|
||||
batch.push({
|
||||
id,
|
||||
|
|
@ -162,7 +161,7 @@ async function main() {
|
|||
nom,
|
||||
adresse,
|
||||
horaires,
|
||||
horaires_std: JSON.stringify(horairesStd),
|
||||
horaires_std,
|
||||
acces,
|
||||
disponible_24h,
|
||||
h3: h3Cell,
|
||||
320
scripts/dae/geodae-to-csv.js
Normal file
320
scripts/dae/geodae-to-csv.js
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { normalizeHoraires } from "./lib/normalize-horaires.mjs";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const INPUT = join(__dirname, "geodae.json");
|
||||
const OUTPUT = join(__dirname, "geodae.csv");
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
function escapeCsv(value) {
|
||||
if (value == null) return "";
|
||||
// Replace newlines with spaces to keep one row per entry
|
||||
const str = String(value).replace(/[\r\n]+/g, " ").trim();
|
||||
if (str.includes('"') || str.includes(",")) {
|
||||
return '"' + str.replace(/"/g, '""') + '"';
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
const DAY_ABBREV = {
|
||||
lundi: "Lun",
|
||||
mardi: "Mar",
|
||||
mercredi: "Mer",
|
||||
jeudi: "Jeu",
|
||||
vendredi: "Ven",
|
||||
samedi: "Sam",
|
||||
dimanche: "Dim",
|
||||
};
|
||||
const DAY_ORDER = [
|
||||
"lundi",
|
||||
"mardi",
|
||||
"mercredi",
|
||||
"jeudi",
|
||||
"vendredi",
|
||||
"samedi",
|
||||
"dimanche",
|
||||
];
|
||||
|
||||
const DAY_NAMES_PATTERN =
|
||||
/lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche/i;
|
||||
const DAY_NAMES_EN_PATTERN =
|
||||
/\b(mon|tue|wed|thu|fri|sat|sun)\b|mo-|tu-|we-|th-|fr-|sa-|su-/i;
|
||||
const HOUR_PATTERN = /\d+[h:]\d*|\d+ ?heures?\b/;
|
||||
|
||||
function formatDays(arr) {
|
||||
if (!arr || arr.length === 0) return "";
|
||||
if (arr.length === 1) {
|
||||
const val = arr[0].toLowerCase().trim();
|
||||
if (val === "7j/7") return "7j/7";
|
||||
if (val === "non renseigné" || val === "non renseigne") return "";
|
||||
if (DAY_ABBREV[val]) return DAY_ABBREV[val];
|
||||
return arr[0].trim();
|
||||
}
|
||||
|
||||
// Sort days by canonical order
|
||||
const sorted = arr
|
||||
.filter((d) => d != null)
|
||||
.map((d) => d.toLowerCase().trim())
|
||||
.filter((d) => DAY_ORDER.includes(d))
|
||||
.sort((a, b) => DAY_ORDER.indexOf(a) - DAY_ORDER.indexOf(b));
|
||||
|
||||
if (sorted.length === 0) return arr.filter((d) => d != null).join(", ");
|
||||
if (sorted.length === 7) return "7j/7";
|
||||
|
||||
// Detect consecutive range
|
||||
const indices = sorted.map((d) => DAY_ORDER.indexOf(d));
|
||||
const isConsecutive = indices.every(
|
||||
(idx, i) => i === 0 || idx === indices[i - 1] + 1
|
||||
);
|
||||
|
||||
if (isConsecutive && sorted.length >= 2) {
|
||||
return (
|
||||
DAY_ABBREV[sorted[0]] + "-" + DAY_ABBREV[sorted[sorted.length - 1]]
|
||||
);
|
||||
}
|
||||
|
||||
return sorted.map((d) => DAY_ABBREV[d] || d).join(", ");
|
||||
}
|
||||
|
||||
function formatHours(arr) {
|
||||
if (!arr || arr.length === 0) return "";
|
||||
const cleaned = arr
|
||||
.filter((h) => h != null)
|
||||
.map((h) => h.trim())
|
||||
.filter(
|
||||
(h) =>
|
||||
h &&
|
||||
h.toLowerCase() !== "non renseigné" &&
|
||||
h.toLowerCase() !== "non renseigne"
|
||||
);
|
||||
return cleaned.join(" + ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if always available:
|
||||
* - 7j/7 + 24h/24
|
||||
* - OR public (Extérieur + libre access)
|
||||
*/
|
||||
function isAlwaysAvailable(p) {
|
||||
const is247 = is7j7(p.c_disp_j) && is24h(p.c_disp_h);
|
||||
|
||||
const isExterior =
|
||||
p.c_acc &&
|
||||
(p.c_acc.trim().toLowerCase() === "extérieur" ||
|
||||
p.c_acc.trim().toLowerCase() === "exterieur");
|
||||
const isPublic = isExterior && p.c_acc_lib === true;
|
||||
|
||||
return is247 || isPublic;
|
||||
}
|
||||
|
||||
function is7j7(arr) {
|
||||
if (!arr) return false;
|
||||
if (arr.some((d) => d && d.trim() === "7j/7")) return true;
|
||||
const days = arr
|
||||
.filter((d) => d != null)
|
||||
.map((d) => d.toLowerCase().trim())
|
||||
.filter((d) => DAY_ORDER.includes(d));
|
||||
return days.length === 7;
|
||||
}
|
||||
|
||||
function is24h(arr) {
|
||||
if (!arr) return false;
|
||||
return arr.some((h) => h && h.trim() === "24h/24");
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a single horaires string, merging days/hours/complement smartly.
|
||||
* Returns empty string if always available.
|
||||
*
|
||||
* Heuristic for complement deduplication:
|
||||
* - If complement contains day names → it already describes the full schedule → use complement only
|
||||
* - Else if complement contains hour patterns (refines "heures ouvrables") → use days + complement
|
||||
* - Else → use days + hours + complement (it's purely additional info)
|
||||
*/
|
||||
function buildHoraires(p) {
|
||||
const days = formatDays(p.c_disp_j);
|
||||
const hours = formatHours(p.c_disp_h);
|
||||
const complt = (p.c_disp_complt || "").replace(/[\r\n]+/g, " ").trim();
|
||||
|
||||
if (!complt) {
|
||||
// No complement: just days + hours
|
||||
if (days && hours) return days + " " + hours;
|
||||
return days || hours || "";
|
||||
}
|
||||
|
||||
// Has complement: decide how to merge
|
||||
const hasDayNames =
|
||||
DAY_NAMES_PATTERN.test(complt) || DAY_NAMES_EN_PATTERN.test(complt);
|
||||
const hasHours = HOUR_PATTERN.test(complt);
|
||||
|
||||
if (hasDayNames && hasHours) {
|
||||
// Complement is a detailed per-day schedule (e.g. "Lundi au jeudi : 8h30-18h ...")
|
||||
// Use complement only — it's more specific than the base timetable
|
||||
return complt;
|
||||
}
|
||||
|
||||
if (hasHours) {
|
||||
// Complement specifies actual hours (e.g. "8h-18h")
|
||||
// It refines the vague "heures ouvrables" → use days + complement
|
||||
if (days) return days + " " + complt;
|
||||
return complt;
|
||||
}
|
||||
|
||||
// Complement is purely additional info (e.g. "Ouvert le dimanche...", "fermeture 31/12")
|
||||
const base = days && hours ? days + " " + hours : days || hours || "";
|
||||
if (base) return base + " ; " + complt;
|
||||
return complt;
|
||||
}
|
||||
|
||||
function formatAddress(p) {
|
||||
const parts = [];
|
||||
const num = (p.c_adr_num || "").trim();
|
||||
const street = (p.c_adr_voie || "").trim();
|
||||
if (num && street) {
|
||||
parts.push(num + " " + street);
|
||||
} else if (street) {
|
||||
parts.push(street);
|
||||
}
|
||||
const cp = (p.c_com_cp || "").trim();
|
||||
const city = (p.c_com_nom || "").trim();
|
||||
if (cp && city) {
|
||||
parts.push(cp + " " + city);
|
||||
} else if (city) {
|
||||
parts.push(city);
|
||||
}
|
||||
return parts.join(", ");
|
||||
}
|
||||
|
||||
function formatAccess(p) {
|
||||
const parts = [];
|
||||
|
||||
// Indoor/Outdoor
|
||||
if (p.c_acc) parts.push(p.c_acc.trim());
|
||||
|
||||
// Free access
|
||||
if (p.c_acc_lib === true) parts.push("libre");
|
||||
|
||||
// Floor
|
||||
const floor = (p.c_acc_etg || "").trim().toLowerCase();
|
||||
if (
|
||||
floor &&
|
||||
floor !== "0" &&
|
||||
floor !== "rdc" &&
|
||||
floor !== "rez de chaussee" &&
|
||||
floor !== "rez de chaussée"
|
||||
) {
|
||||
parts.push("étage " + p.c_acc_etg.trim());
|
||||
}
|
||||
|
||||
// Complement
|
||||
const complt = (p.c_acc_complt || "").trim();
|
||||
if (complt) parts.push(complt);
|
||||
|
||||
return parts.join(", ");
|
||||
}
|
||||
|
||||
function getName(p) {
|
||||
const expt = (p.c_expt_rais || "").trim();
|
||||
const nom = (p.c_nom || "").trim();
|
||||
return expt || nom || "";
|
||||
}
|
||||
|
||||
function normalize(str) {
|
||||
if (!str) return "";
|
||||
return str
|
||||
.normalize("NFD")
|
||||
.replace(/[\u0300-\u036f]/g, "")
|
||||
.toLowerCase()
|
||||
.trim();
|
||||
}
|
||||
|
||||
function passesFilter(p) {
|
||||
// c_etat: accept "Actif" or null, reject "Non identifie"
|
||||
const etat = normalize(p.c_etat);
|
||||
if (etat && etat !== "actif") return false;
|
||||
|
||||
// c_etat_fonct: must be "En fonctionnement"
|
||||
const fonct = normalize(p.c_etat_fonct);
|
||||
if (fonct !== "en fonctionnement") return false;
|
||||
|
||||
// c_etat_valid: must be "validées"
|
||||
const valid = normalize(p.c_etat_valid);
|
||||
if (valid !== "validees") return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- Main ---
|
||||
|
||||
console.log("Reading geodae.json...");
|
||||
const data = JSON.parse(readFileSync(INPUT, "utf-8"));
|
||||
const features = data.features;
|
||||
console.log(`Total features: ${features.length}`);
|
||||
|
||||
const CSV_HEADER = [
|
||||
"latitude",
|
||||
"longitude",
|
||||
"nom",
|
||||
"adresse",
|
||||
"horaires",
|
||||
"horaires_std",
|
||||
"acces",
|
||||
"disponible_24h",
|
||||
];
|
||||
|
||||
const rows = [CSV_HEADER.join(",")];
|
||||
let filtered = 0;
|
||||
let kept = 0;
|
||||
let alwaysCount = 0;
|
||||
|
||||
for (const feature of features) {
|
||||
const p = feature.properties;
|
||||
|
||||
if (!passesFilter(p)) {
|
||||
filtered++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const lat = p.c_lat_coor1;
|
||||
const lon = p.c_long_coor1;
|
||||
if (lat == null || lon == null) {
|
||||
filtered++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const always = isAlwaysAvailable(p);
|
||||
if (always) alwaysCount++;
|
||||
|
||||
const disponible24h = always ? 1 : 0;
|
||||
|
||||
// When always available, leave horaires empty
|
||||
const horaires = always ? "" : buildHoraires(p);
|
||||
|
||||
// Normalize horaires into structured JSON
|
||||
const horairesStd = normalizeHoraires(horaires, disponible24h);
|
||||
|
||||
const row = [
|
||||
lat,
|
||||
lon,
|
||||
escapeCsv(getName(p)),
|
||||
escapeCsv(formatAddress(p)),
|
||||
escapeCsv(horaires),
|
||||
escapeCsv(JSON.stringify(horairesStd)),
|
||||
escapeCsv(formatAccess(p)),
|
||||
disponible24h,
|
||||
];
|
||||
|
||||
rows.push(row.join(","));
|
||||
kept++;
|
||||
}
|
||||
|
||||
writeFileSync(OUTPUT, rows.join("\n") + "\n", "utf-8");
|
||||
console.log(`Kept: ${kept}, Filtered out: ${filtered}`);
|
||||
console.log(`Always available (24h): ${alwaysCount}`);
|
||||
console.log(`Written to ${OUTPUT}`);
|
||||
18
scripts/dae/package.json
Normal file
18
scripts/dae/package.json
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"name": "geodae-pipeline",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"packageManager": "yarn@4.5.3",
|
||||
"scripts": {
|
||||
"json-to-csv": "node geodae-to-csv.js",
|
||||
"csv-to-db": "node csv-to-sqlite.mjs --input geodae.csv --output ../../src/assets/db/geodae.db",
|
||||
"csv-to-db:semicolon": "node csv-to-sqlite.mjs --input geodae.csv --output ../../src/assets/db/geodae.db --delimiter ';'",
|
||||
"build": "yarn json-to-csv && yarn csv-to-db"
|
||||
},
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^11.7.0",
|
||||
"csv-parse": "^5.6.0",
|
||||
"h3-js": "^4.2.1"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
{
|
||||
"name": "geodae-pipeline",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"packageManager": "yarn@4.5.3",
|
||||
"scripts": {
|
||||
"build-db": "node csv-to-sqlite.mjs --input ../.data/geodae.csv --output ../src/assets/db/geodae.db",
|
||||
"build-db:semicolon": "node csv-to-sqlite.mjs --input ../.data/geodae.csv --output ../src/assets/db/geodae.db --delimiter ';'"
|
||||
},
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^11.7.0",
|
||||
"csv-parse": "^5.6.0",
|
||||
"h3-js": "^4.2.1"
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue