diff --git a/scripts/csv-to-sqlite.mjs b/scripts/csv-to-sqlite.mjs index 12365de..02a2732 100644 --- a/scripts/csv-to-sqlite.mjs +++ b/scripts/csv-to-sqlite.mjs @@ -9,6 +9,7 @@ import { dirname, resolve } from "node:path"; import { fileURLToPath } from "node:url"; import { createRequire } from "node:module"; import { parse } from "csv-parse"; +import { normalizeHoraires } from "./lib/normalize-horaires.mjs"; const require = createRequire(import.meta.url); const Database = require("better-sqlite3"); @@ -107,8 +108,8 @@ async function main() { // Prepare insert statement const insert = db.prepare( - `INSERT OR IGNORE INTO defibs (id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h, h3) - VALUES (@id, @latitude, @longitude, @nom, @adresse, @horaires, @acces, @disponible_24h, @h3)` + `INSERT OR IGNORE INTO defibs (id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h, h3) + VALUES (@id, @latitude, @longitude, @nom, @adresse, @horaires, @horaires_std, @acces, @disponible_24h, @h3)` ); const insertMany = db.transaction((rows) => { @@ -152,6 +153,7 @@ async function main() { const disponible_24h = cleanInt(record.disponible_24h); const id = deterministicId(lat, lon, nom, adresse); const h3Cell = computeH3(lat, lon, H3_RES); + const horairesStd = normalizeHoraires(horaires, disponible_24h); batch.push({ id, @@ -160,6 +162,7 @@ async function main() { nom, adresse, horaires, + horaires_std: JSON.stringify(horairesStd), acces, disponible_24h, h3: h3Cell, diff --git a/scripts/lib/normalize-horaires.mjs b/scripts/lib/normalize-horaires.mjs new file mode 100644 index 0000000..e4de249 --- /dev/null +++ b/scripts/lib/normalize-horaires.mjs @@ -0,0 +1,228 @@ +// Deterministic normalizer for French opening hours (horaires) strings. +// Outputs a structured object that a simple JSON parser can consume without heuristics. +// +// Output shape: +// { days: number[]|null, slots: {open,close}[]|null, is24h, businessHours, nightHours, events, notes } +// +// days: ISO 8601 day numbers (1=Mon … 7=Sun), null if unknown +// slots: [{open:"HH:MM", close:"HH:MM"}], null if no specific times +// is24h: available 24 hours +// businessHours: "heures ouvrables" was specified +// nightHours: "heures de nuit" was specified +// events: availability depends on events +// notes: unparsed/remaining text (seasonal info, conditions, etc.) + +const DAY_MAP = { lun: 1, mar: 2, mer: 3, jeu: 4, ven: 5, sam: 6, dim: 7 }; +const ALL_DAYS = [1, 2, 3, 4, 5, 6, 7]; + +// --- Day prefix extraction --- + +const SEVEN_DAYS_RE = /^7\s*j?\s*[/]\s*7\s*j?/i; +const DAY_RANGE_RE = + /^(lun|mar|mer|jeu|ven|sam|dim)\s*-\s*(lun|mar|mer|jeu|ven|sam|dim)/i; +const DAY_LIST_RE = + /^((lun|mar|mer|jeu|ven|sam|dim)(\s*,\s*(lun|mar|mer|jeu|ven|sam|dim))+)/i; +const DAY_SINGLE_RE = /^(lun|mar|mer|jeu|ven|sam|dim)\b/i; + +function dayRange(startName, endName) { + const start = DAY_MAP[startName.toLowerCase()]; + const end = DAY_MAP[endName.toLowerCase()]; + const days = []; + let d = start; + do { + days.push(d); + if (d === end) break; + d = (d % 7) + 1; + } while (days.length <= 7); + return days; +} + +function extractDayPrefix(text) { + const m7 = text.match(SEVEN_DAYS_RE); + if (m7) return { days: [...ALL_DAYS], end: m7[0].length }; + + const mRange = text.match(DAY_RANGE_RE); + if (mRange) + return { + days: dayRange(mRange[1], mRange[2]), + end: mRange[0].length, + }; + + const mList = text.match(DAY_LIST_RE); + if (mList) { + const names = mList[0].split(/\s*,\s*/); + return { + days: names.map((n) => DAY_MAP[n.trim().toLowerCase()]).filter(Boolean), + end: mList[0].length, + }; + } + + const mSingle = text.match(DAY_SINGLE_RE); + if (mSingle) + return { days: [DAY_MAP[mSingle[1].toLowerCase()]], end: mSingle[0].length }; + + return null; +} + +// --- Redundant day info stripping --- + +function stripRedundantDays(text) { + return ( + text + // "7J/7", "7j/7", "7/7", "7j/7j" + .replace(/\b7\s*[jJ]?\s*[/]\s*7\s*[jJ]?\b/g, "") + // "L au V", "Ma à D" (short abbreviations) + .replace( + /\b(?:L|Ma|Me|J|V|S|D)\s+(?:au|à)\s+(?:L|Ma|Me|J|V|S|D)\b/gi, + "" + ) + // "du lundi au dimanche" (full names) + .replace( + /\b(?:du\s+)?(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\s+(?:au|à)\s+(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\b/gi, + "" + ) + // "L au V" using abbreviated day names from data: "L Ma Me J V S D" + .replace( + /\b[LMJVSD]\s+(?:au|à)\s+[LMJVSD]\b/gi, + "" + ) + .replace(/^[,;:\-\s]+/, "") + .trim() + ); +} + +// --- Time slot extraction --- + +function fmtTime(h, m) { + const hh = parseInt(h, 10); + const mm = parseInt(m || "0", 10); + if (hh < 0 || hh > 24 || mm < 0 || mm > 59) return null; + return `${String(hh).padStart(2, "0")}:${String(mm).padStart(2, "0")}`; +} + +// Matches: 8h30/17h30, 8h-18h, 08:00-18:00, 8h à 18h, 8h a 18h +// IMPORTANT: no \s* between [:h] and (\d{0,2}) — minutes must be adjacent +// to the separator, otherwise "8h/12h 14h/17h" would merge into one match. +const TIME_RANGE_RE = + /(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g; + +// Matches standalone: 8h30, 14h (minutes adjacent to h) +const TIME_POINT_RE = /(\d{1,2})\s*h(\d{0,2})/g; + +function extractTimeSlots(text) { + const slots = []; + + // Pass 1: explicit ranges (8h/18h, 8h-18h, 08:00-18:00) + const re1 = new RegExp(TIME_RANGE_RE.source, "g"); + let match; + while ((match = re1.exec(text)) !== null) { + const open = fmtTime(match[1], match[2]); + const close = fmtTime(match[3], match[4]); + if (open && close) slots.push({ open, close }); + } + if (slots.length > 0) return slots; + + // Pass 2: pair standalone time points (7h 17h → {07:00, 17:00}) + const re2 = new RegExp(TIME_POINT_RE.source, "g"); + const points = []; + while ((match = re2.exec(text)) !== null) { + const t = fmtTime(match[1], match[2]); + if (t) points.push(t); + } + for (let i = 0; i + 1 < points.length; i += 2) { + slots.push({ open: points[i], close: points[i + 1] }); + } + + return slots; +} + +function removeTimeTokens(text) { + return text + .replace( + /(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g, + "" + ) + .replace(/(\d{1,2})\s*h(\d{0,2})/g, "") + .trim(); +} + +// --- Main normalizer --- + +export function normalizeHoraires(raw, disponible24h) { + const result = { + days: null, + slots: null, + is24h: disponible24h === 1, + businessHours: false, + nightHours: false, + events: false, + notes: "", + }; + + if (disponible24h === 1) { + result.days = [...ALL_DAYS]; + } + + if (!raw || raw.trim() === "") return result; + + let text = raw.trim(); + + // 1. Extract day prefix + const dayPrefix = extractDayPrefix(text); + if (dayPrefix) { + if (!result.days) result.days = dayPrefix.days; + text = text.slice(dayPrefix.end).trim(); + // Strip leading comma/semicolon + optional modifiers after day prefix + text = text.replace(/^[,;]\s*/, ""); + } + + // 2. "jours fériés" modifier (informational, strip it) + text = text.replace(/,?\s*jours?\s+f[ée]ri[ée]s?\s*/gi, "").trim(); + + // 3. 24h/24 detection + if (/24\s*h?\s*[/]\s*24\s*h?/i.test(text)) { + result.is24h = true; + text = text.replace(/24\s*h?\s*[/]\s*24\s*h?/gi, "").trim(); + if (!result.days) result.days = [...ALL_DAYS]; + } + + // 4. "heures ouvrables" + if (/heures?\s+ouvrables?/i.test(text)) { + result.businessHours = true; + text = text.replace(/heures?\s+ouvrables?/gi, "").trim(); + } + + // 5. "heures de nuit" + if (/heures?\s+de\s+nuit/i.test(text)) { + result.nightHours = true; + text = text.replace(/heures?\s+de\s+nuit/gi, "").trim(); + } + + // 6. "événements" + if (/[ée]v[éè]nements?/i.test(text)) { + result.events = true; + text = text.replace(/[ée]v[éè]nements?/gi, "").trim(); + } + + // 7. Strip redundant day info (e.g., "7J/7", "L au V") + text = stripRedundantDays(text); + + // 8. Extract time slots (max 4 to cover morning+afternoon+evening combos) + if (!result.is24h) { + const slots = extractTimeSlots(text); + if (slots.length > 0) { + result.slots = slots.slice(0, 4); + text = removeTimeTokens(text); + } + } + + // 9. Clean remaining text → notes + text = text + .replace(/^[;,\-/+.\s]+/, "") + .replace(/[;,\-/+.\s]+$/, "") + .replace(/\s+/g, " ") + .trim(); + if (text) result.notes = text; + + return result; +} diff --git a/scripts/lib/schema.sql b/scripts/lib/schema.sql index d4ea556..9f63375 100644 --- a/scripts/lib/schema.sql +++ b/scripts/lib/schema.sql @@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS defibs ( nom TEXT NOT NULL DEFAULT '', adresse TEXT NOT NULL DEFAULT '', horaires TEXT NOT NULL DEFAULT '', + horaires_std TEXT NOT NULL DEFAULT '{}', acces TEXT NOT NULL DEFAULT '', disponible_24h INTEGER NOT NULL DEFAULT 0, h3 TEXT NOT NULL DEFAULT '' diff --git a/src/db/defibsRepo.js b/src/db/defibsRepo.js index a37da9a..1a92f39 100644 --- a/src/db/defibsRepo.js +++ b/src/db/defibsRepo.js @@ -43,6 +43,14 @@ function bboxClause(lat, lon, radiusMeters) { * @property {string} nom * @property {string} adresse * @property {string} horaires + * @property {Object} horaires_std + * @property {number[]|null} horaires_std.days - ISO 8601 day numbers (1=Mon…7=Sun) + * @property {{open:string,close:string}[]|null} horaires_std.slots - Time ranges + * @property {boolean} horaires_std.is24h + * @property {boolean} horaires_std.businessHours + * @property {boolean} horaires_std.nightHours + * @property {boolean} horaires_std.events + * @property {string} horaires_std.notes * @property {string} acces * @property {number} disponible_24h */ @@ -117,7 +125,7 @@ async function queryCells(db, cells, dispo24h) { const chunk = cells.slice(i, i + SQL_VAR_LIMIT); const placeholders = chunk.map(() => "?").join(","); - let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h + let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h FROM defibs WHERE h3 IN (${placeholders})`; const params = [...chunk]; @@ -132,13 +140,22 @@ async function queryCells(db, cells, dispo24h) { return results; } +// Parse horaires_std JSON string into object. +function parseHorairesStd(row) { + try { + return { ...row, horaires_std: JSON.parse(row.horaires_std) }; + } catch { + return { ...row, horaires_std: null }; + } +} + // Compute distance, filter by radius, sort, and limit. function rankAndFilter(candidates, lat, lon, radiusMeters, limit) { const withDist = []; for (const row of candidates) { const distanceMeters = haversine(lat, lon, row.latitude, row.longitude); if (distanceMeters <= radiusMeters) { - withDist.push({ ...row, distanceMeters }); + withDist.push({ ...parseHorairesStd(row), distanceMeters }); } } withDist.sort((a, b) => a.distanceMeters - b.distanceMeters); @@ -166,7 +183,7 @@ export async function getNearbyDefibsBbox({ const db = await getDb(); const { clause, params } = bboxClause(lat, lon, radiusMeters); - let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h + let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h FROM defibs WHERE ${clause}`; if (disponible24hOnly) { sql += " AND disponible_24h = 1";