diff --git a/.gitignore b/.gitignore index a041857..658ceed 100644 --- a/.gitignore +++ b/.gitignore @@ -104,11 +104,11 @@ screenshot-*.png /.data # Geodae preprocessing -scripts/node_modules/ -scripts/.yarn/* -!scripts/.yarn/patches -!scripts/.yarn/plugins -!scripts/.yarn/releases -!scripts/.yarn/sdks -!scripts/.yarn/versions +scripts/dae/node_modules/ +scripts/dae/.yarn/* +!scripts/dae/.yarn/patches +!scripts/dae/.yarn/plugins +!scripts/dae/.yarn/releases +!scripts/dae/.yarn/sdks +!scripts/dae/.yarn/versions src/assets/db/*.db diff --git a/package.json b/package.json index 4c91052..f560008 100644 --- a/package.json +++ b/package.json @@ -51,7 +51,10 @@ "open:deeplink:ios": "yarn open:deeplink --ios", "open:deeplink": "npx uri-scheme open --android", "screenshot:ios": "scripts/screenshot-ios.sh", - "screenshot:android": "scripts/screenshot-android.sh" + "screenshot:android": "scripts/screenshot-android.sh", + "dae:json-to-csv": "yarn --cwd scripts/dae json-to-csv", + "dae:csv-to-db": "yarn --cwd scripts/dae csv-to-db", + "dae:build": "yarn --cwd scripts/dae build" }, "customExpoVersioning": { "versionCode": 241, diff --git a/scripts/dae/.gitignore b/scripts/dae/.gitignore new file mode 100644 index 0000000..985c594 --- /dev/null +++ b/scripts/dae/.gitignore @@ -0,0 +1,2 @@ +geodae.json +geodae.csv diff --git a/scripts/.yarnrc.yml b/scripts/dae/.yarnrc.yml similarity index 62% rename from scripts/.yarnrc.yml rename to scripts/dae/.yarnrc.yml index 986aa42..c512f6f 100644 --- a/scripts/.yarnrc.yml +++ b/scripts/dae/.yarnrc.yml @@ -4,4 +4,4 @@ enableGlobalCache: false nodeLinker: node-modules -yarnPath: ../.yarn/releases/yarn-4.5.3.cjs +yarnPath: ../../.yarn/releases/yarn-4.5.3.cjs diff --git a/scripts/csv-to-sqlite.mjs b/scripts/dae/csv-to-sqlite.mjs similarity index 97% rename from scripts/csv-to-sqlite.mjs rename to scripts/dae/csv-to-sqlite.mjs index 02a2732..9b3243f 100644 --- a/scripts/csv-to-sqlite.mjs +++ b/scripts/dae/csv-to-sqlite.mjs @@ -9,7 +9,6 @@ import { dirname, resolve } from "node:path"; import { fileURLToPath } from "node:url"; import { createRequire } from "node:module"; import { parse } from "csv-parse"; -import { normalizeHoraires } from "./lib/normalize-horaires.mjs"; const require = createRequire(import.meta.url); const Database = require("better-sqlite3"); @@ -151,9 +150,9 @@ async function main() { const horaires = cleanStr(record.horaires); const acces = cleanStr(record.acces); const disponible_24h = cleanInt(record.disponible_24h); + const horaires_std = cleanStr(record.horaires_std) || "{}"; const id = deterministicId(lat, lon, nom, adresse); const h3Cell = computeH3(lat, lon, H3_RES); - const horairesStd = normalizeHoraires(horaires, disponible_24h); batch.push({ id, @@ -162,7 +161,7 @@ async function main() { nom, adresse, horaires, - horaires_std: JSON.stringify(horairesStd), + horaires_std, acces, disponible_24h, h3: h3Cell, diff --git a/scripts/dae/geodae-to-csv.js b/scripts/dae/geodae-to-csv.js new file mode 100644 index 0000000..0194497 --- /dev/null +++ b/scripts/dae/geodae-to-csv.js @@ -0,0 +1,320 @@ +#!/usr/bin/env node + +import { readFileSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { normalizeHoraires } from "./lib/normalize-horaires.mjs"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const INPUT = join(__dirname, "geodae.json"); +const OUTPUT = join(__dirname, "geodae.csv"); + +// --- Helpers --- + +function escapeCsv(value) { + if (value == null) return ""; + // Replace newlines with spaces to keep one row per entry + const str = String(value).replace(/[\r\n]+/g, " ").trim(); + if (str.includes('"') || str.includes(",")) { + return '"' + str.replace(/"/g, '""') + '"'; + } + return str; +} + +const DAY_ABBREV = { + lundi: "Lun", + mardi: "Mar", + mercredi: "Mer", + jeudi: "Jeu", + vendredi: "Ven", + samedi: "Sam", + dimanche: "Dim", +}; +const DAY_ORDER = [ + "lundi", + "mardi", + "mercredi", + "jeudi", + "vendredi", + "samedi", + "dimanche", +]; + +const DAY_NAMES_PATTERN = + /lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche/i; +const DAY_NAMES_EN_PATTERN = + /\b(mon|tue|wed|thu|fri|sat|sun)\b|mo-|tu-|we-|th-|fr-|sa-|su-/i; +const HOUR_PATTERN = /\d+[h:]\d*|\d+ ?heures?\b/; + +function formatDays(arr) { + if (!arr || arr.length === 0) return ""; + if (arr.length === 1) { + const val = arr[0].toLowerCase().trim(); + if (val === "7j/7") return "7j/7"; + if (val === "non renseigné" || val === "non renseigne") return ""; + if (DAY_ABBREV[val]) return DAY_ABBREV[val]; + return arr[0].trim(); + } + + // Sort days by canonical order + const sorted = arr + .filter((d) => d != null) + .map((d) => d.toLowerCase().trim()) + .filter((d) => DAY_ORDER.includes(d)) + .sort((a, b) => DAY_ORDER.indexOf(a) - DAY_ORDER.indexOf(b)); + + if (sorted.length === 0) return arr.filter((d) => d != null).join(", "); + if (sorted.length === 7) return "7j/7"; + + // Detect consecutive range + const indices = sorted.map((d) => DAY_ORDER.indexOf(d)); + const isConsecutive = indices.every( + (idx, i) => i === 0 || idx === indices[i - 1] + 1 + ); + + if (isConsecutive && sorted.length >= 2) { + return ( + DAY_ABBREV[sorted[0]] + "-" + DAY_ABBREV[sorted[sorted.length - 1]] + ); + } + + return sorted.map((d) => DAY_ABBREV[d] || d).join(", "); +} + +function formatHours(arr) { + if (!arr || arr.length === 0) return ""; + const cleaned = arr + .filter((h) => h != null) + .map((h) => h.trim()) + .filter( + (h) => + h && + h.toLowerCase() !== "non renseigné" && + h.toLowerCase() !== "non renseigne" + ); + return cleaned.join(" + "); +} + +/** + * Determine if always available: + * - 7j/7 + 24h/24 + * - OR public (Extérieur + libre access) + */ +function isAlwaysAvailable(p) { + const is247 = is7j7(p.c_disp_j) && is24h(p.c_disp_h); + + const isExterior = + p.c_acc && + (p.c_acc.trim().toLowerCase() === "extérieur" || + p.c_acc.trim().toLowerCase() === "exterieur"); + const isPublic = isExterior && p.c_acc_lib === true; + + return is247 || isPublic; +} + +function is7j7(arr) { + if (!arr) return false; + if (arr.some((d) => d && d.trim() === "7j/7")) return true; + const days = arr + .filter((d) => d != null) + .map((d) => d.toLowerCase().trim()) + .filter((d) => DAY_ORDER.includes(d)); + return days.length === 7; +} + +function is24h(arr) { + if (!arr) return false; + return arr.some((h) => h && h.trim() === "24h/24"); +} + +/** + * Build a single horaires string, merging days/hours/complement smartly. + * Returns empty string if always available. + * + * Heuristic for complement deduplication: + * - If complement contains day names → it already describes the full schedule → use complement only + * - Else if complement contains hour patterns (refines "heures ouvrables") → use days + complement + * - Else → use days + hours + complement (it's purely additional info) + */ +function buildHoraires(p) { + const days = formatDays(p.c_disp_j); + const hours = formatHours(p.c_disp_h); + const complt = (p.c_disp_complt || "").replace(/[\r\n]+/g, " ").trim(); + + if (!complt) { + // No complement: just days + hours + if (days && hours) return days + " " + hours; + return days || hours || ""; + } + + // Has complement: decide how to merge + const hasDayNames = + DAY_NAMES_PATTERN.test(complt) || DAY_NAMES_EN_PATTERN.test(complt); + const hasHours = HOUR_PATTERN.test(complt); + + if (hasDayNames && hasHours) { + // Complement is a detailed per-day schedule (e.g. "Lundi au jeudi : 8h30-18h ...") + // Use complement only — it's more specific than the base timetable + return complt; + } + + if (hasHours) { + // Complement specifies actual hours (e.g. "8h-18h") + // It refines the vague "heures ouvrables" → use days + complement + if (days) return days + " " + complt; + return complt; + } + + // Complement is purely additional info (e.g. "Ouvert le dimanche...", "fermeture 31/12") + const base = days && hours ? days + " " + hours : days || hours || ""; + if (base) return base + " ; " + complt; + return complt; +} + +function formatAddress(p) { + const parts = []; + const num = (p.c_adr_num || "").trim(); + const street = (p.c_adr_voie || "").trim(); + if (num && street) { + parts.push(num + " " + street); + } else if (street) { + parts.push(street); + } + const cp = (p.c_com_cp || "").trim(); + const city = (p.c_com_nom || "").trim(); + if (cp && city) { + parts.push(cp + " " + city); + } else if (city) { + parts.push(city); + } + return parts.join(", "); +} + +function formatAccess(p) { + const parts = []; + + // Indoor/Outdoor + if (p.c_acc) parts.push(p.c_acc.trim()); + + // Free access + if (p.c_acc_lib === true) parts.push("libre"); + + // Floor + const floor = (p.c_acc_etg || "").trim().toLowerCase(); + if ( + floor && + floor !== "0" && + floor !== "rdc" && + floor !== "rez de chaussee" && + floor !== "rez de chaussée" + ) { + parts.push("étage " + p.c_acc_etg.trim()); + } + + // Complement + const complt = (p.c_acc_complt || "").trim(); + if (complt) parts.push(complt); + + return parts.join(", "); +} + +function getName(p) { + const expt = (p.c_expt_rais || "").trim(); + const nom = (p.c_nom || "").trim(); + return expt || nom || ""; +} + +function normalize(str) { + if (!str) return ""; + return str + .normalize("NFD") + .replace(/[\u0300-\u036f]/g, "") + .toLowerCase() + .trim(); +} + +function passesFilter(p) { + // c_etat: accept "Actif" or null, reject "Non identifie" + const etat = normalize(p.c_etat); + if (etat && etat !== "actif") return false; + + // c_etat_fonct: must be "En fonctionnement" + const fonct = normalize(p.c_etat_fonct); + if (fonct !== "en fonctionnement") return false; + + // c_etat_valid: must be "validées" + const valid = normalize(p.c_etat_valid); + if (valid !== "validees") return false; + + return true; +} + +// --- Main --- + +console.log("Reading geodae.json..."); +const data = JSON.parse(readFileSync(INPUT, "utf-8")); +const features = data.features; +console.log(`Total features: ${features.length}`); + +const CSV_HEADER = [ + "latitude", + "longitude", + "nom", + "adresse", + "horaires", + "horaires_std", + "acces", + "disponible_24h", +]; + +const rows = [CSV_HEADER.join(",")]; +let filtered = 0; +let kept = 0; +let alwaysCount = 0; + +for (const feature of features) { + const p = feature.properties; + + if (!passesFilter(p)) { + filtered++; + continue; + } + + const lat = p.c_lat_coor1; + const lon = p.c_long_coor1; + if (lat == null || lon == null) { + filtered++; + continue; + } + + const always = isAlwaysAvailable(p); + if (always) alwaysCount++; + + const disponible24h = always ? 1 : 0; + + // When always available, leave horaires empty + const horaires = always ? "" : buildHoraires(p); + + // Normalize horaires into structured JSON + const horairesStd = normalizeHoraires(horaires, disponible24h); + + const row = [ + lat, + lon, + escapeCsv(getName(p)), + escapeCsv(formatAddress(p)), + escapeCsv(horaires), + escapeCsv(JSON.stringify(horairesStd)), + escapeCsv(formatAccess(p)), + disponible24h, + ]; + + rows.push(row.join(",")); + kept++; +} + +writeFileSync(OUTPUT, rows.join("\n") + "\n", "utf-8"); +console.log(`Kept: ${kept}, Filtered out: ${filtered}`); +console.log(`Always available (24h): ${alwaysCount}`); +console.log(`Written to ${OUTPUT}`); diff --git a/scripts/lib/normalize-horaires.mjs b/scripts/dae/lib/normalize-horaires.mjs similarity index 100% rename from scripts/lib/normalize-horaires.mjs rename to scripts/dae/lib/normalize-horaires.mjs diff --git a/scripts/lib/schema.sql b/scripts/dae/lib/schema.sql similarity index 100% rename from scripts/lib/schema.sql rename to scripts/dae/lib/schema.sql diff --git a/scripts/dae/package.json b/scripts/dae/package.json new file mode 100644 index 0000000..fc4b49d --- /dev/null +++ b/scripts/dae/package.json @@ -0,0 +1,18 @@ +{ + "name": "geodae-pipeline", + "version": "1.0.0", + "private": true, + "type": "module", + "packageManager": "yarn@4.5.3", + "scripts": { + "json-to-csv": "node geodae-to-csv.js", + "csv-to-db": "node csv-to-sqlite.mjs --input geodae.csv --output ../../src/assets/db/geodae.db", + "csv-to-db:semicolon": "node csv-to-sqlite.mjs --input geodae.csv --output ../../src/assets/db/geodae.db --delimiter ';'", + "build": "yarn json-to-csv && yarn csv-to-db" + }, + "dependencies": { + "better-sqlite3": "^11.7.0", + "csv-parse": "^5.6.0", + "h3-js": "^4.2.1" + } +} diff --git a/scripts/yarn.lock b/scripts/dae/yarn.lock similarity index 100% rename from scripts/yarn.lock rename to scripts/dae/yarn.lock diff --git a/scripts/package.json b/scripts/package.json deleted file mode 100644 index 50d2e5f..0000000 --- a/scripts/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "geodae-pipeline", - "version": "1.0.0", - "private": true, - "type": "module", - "packageManager": "yarn@4.5.3", - "scripts": { - "build-db": "node csv-to-sqlite.mjs --input ../.data/geodae.csv --output ../src/assets/db/geodae.db", - "build-db:semicolon": "node csv-to-sqlite.mjs --input ../.data/geodae.csv --output ../src/assets/db/geodae.db --delimiter ';'" - }, - "dependencies": { - "better-sqlite3": "^11.7.0", - "csv-parse": "^5.6.0", - "h3-js": "^4.2.1" - } -}