fix: normalize horaire

This commit is contained in:
devthejo 2026-03-05 18:02:47 +01:00
parent 31970b86fc
commit e3c1ffe0f1
No known key found for this signature in database
GPG key ID: 00CCA7A92B1D5351
4 changed files with 254 additions and 5 deletions

View file

@ -9,6 +9,7 @@ import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
import { createRequire } from "node:module"; import { createRequire } from "node:module";
import { parse } from "csv-parse"; import { parse } from "csv-parse";
import { normalizeHoraires } from "./lib/normalize-horaires.mjs";
const require = createRequire(import.meta.url); const require = createRequire(import.meta.url);
const Database = require("better-sqlite3"); const Database = require("better-sqlite3");
@ -107,8 +108,8 @@ async function main() {
// Prepare insert statement // Prepare insert statement
const insert = db.prepare( const insert = db.prepare(
`INSERT OR IGNORE INTO defibs (id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h, h3) `INSERT OR IGNORE INTO defibs (id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h, h3)
VALUES (@id, @latitude, @longitude, @nom, @adresse, @horaires, @acces, @disponible_24h, @h3)` VALUES (@id, @latitude, @longitude, @nom, @adresse, @horaires, @horaires_std, @acces, @disponible_24h, @h3)`
); );
const insertMany = db.transaction((rows) => { const insertMany = db.transaction((rows) => {
@ -152,6 +153,7 @@ async function main() {
const disponible_24h = cleanInt(record.disponible_24h); const disponible_24h = cleanInt(record.disponible_24h);
const id = deterministicId(lat, lon, nom, adresse); const id = deterministicId(lat, lon, nom, adresse);
const h3Cell = computeH3(lat, lon, H3_RES); const h3Cell = computeH3(lat, lon, H3_RES);
const horairesStd = normalizeHoraires(horaires, disponible_24h);
batch.push({ batch.push({
id, id,
@ -160,6 +162,7 @@ async function main() {
nom, nom,
adresse, adresse,
horaires, horaires,
horaires_std: JSON.stringify(horairesStd),
acces, acces,
disponible_24h, disponible_24h,
h3: h3Cell, h3: h3Cell,

View file

@ -0,0 +1,228 @@
// Deterministic normalizer for French opening hours (horaires) strings.
// Outputs a structured object that a simple JSON parser can consume without heuristics.
//
// Output shape:
// { days: number[]|null, slots: {open,close}[]|null, is24h, businessHours, nightHours, events, notes }
//
// days: ISO 8601 day numbers (1=Mon … 7=Sun), null if unknown
// slots: [{open:"HH:MM", close:"HH:MM"}], null if no specific times
// is24h: available 24 hours
// businessHours: "heures ouvrables" was specified
// nightHours: "heures de nuit" was specified
// events: availability depends on events
// notes: unparsed/remaining text (seasonal info, conditions, etc.)
const DAY_MAP = { lun: 1, mar: 2, mer: 3, jeu: 4, ven: 5, sam: 6, dim: 7 };
const ALL_DAYS = [1, 2, 3, 4, 5, 6, 7];
// --- Day prefix extraction ---
const SEVEN_DAYS_RE = /^7\s*j?\s*[/]\s*7\s*j?/i;
const DAY_RANGE_RE =
/^(lun|mar|mer|jeu|ven|sam|dim)\s*-\s*(lun|mar|mer|jeu|ven|sam|dim)/i;
const DAY_LIST_RE =
/^((lun|mar|mer|jeu|ven|sam|dim)(\s*,\s*(lun|mar|mer|jeu|ven|sam|dim))+)/i;
const DAY_SINGLE_RE = /^(lun|mar|mer|jeu|ven|sam|dim)\b/i;
function dayRange(startName, endName) {
const start = DAY_MAP[startName.toLowerCase()];
const end = DAY_MAP[endName.toLowerCase()];
const days = [];
let d = start;
do {
days.push(d);
if (d === end) break;
d = (d % 7) + 1;
} while (days.length <= 7);
return days;
}
function extractDayPrefix(text) {
const m7 = text.match(SEVEN_DAYS_RE);
if (m7) return { days: [...ALL_DAYS], end: m7[0].length };
const mRange = text.match(DAY_RANGE_RE);
if (mRange)
return {
days: dayRange(mRange[1], mRange[2]),
end: mRange[0].length,
};
const mList = text.match(DAY_LIST_RE);
if (mList) {
const names = mList[0].split(/\s*,\s*/);
return {
days: names.map((n) => DAY_MAP[n.trim().toLowerCase()]).filter(Boolean),
end: mList[0].length,
};
}
const mSingle = text.match(DAY_SINGLE_RE);
if (mSingle)
return { days: [DAY_MAP[mSingle[1].toLowerCase()]], end: mSingle[0].length };
return null;
}
// --- Redundant day info stripping ---
function stripRedundantDays(text) {
return (
text
// "7J/7", "7j/7", "7/7", "7j/7j"
.replace(/\b7\s*[jJ]?\s*[/]\s*7\s*[jJ]?\b/g, "")
// "L au V", "Ma à D" (short abbreviations)
.replace(
/\b(?:L|Ma|Me|J|V|S|D)\s+(?:au|à)\s+(?:L|Ma|Me|J|V|S|D)\b/gi,
""
)
// "du lundi au dimanche" (full names)
.replace(
/\b(?:du\s+)?(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\s+(?:au|à)\s+(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\b/gi,
""
)
// "L au V" using abbreviated day names from data: "L Ma Me J V S D"
.replace(
/\b[LMJVSD]\s+(?:au|à)\s+[LMJVSD]\b/gi,
""
)
.replace(/^[,;:\-\s]+/, "")
.trim()
);
}
// --- Time slot extraction ---
function fmtTime(h, m) {
const hh = parseInt(h, 10);
const mm = parseInt(m || "0", 10);
if (hh < 0 || hh > 24 || mm < 0 || mm > 59) return null;
return `${String(hh).padStart(2, "0")}:${String(mm).padStart(2, "0")}`;
}
// Matches: 8h30/17h30, 8h-18h, 08:00-18:00, 8h à 18h, 8h a 18h
// IMPORTANT: no \s* between [:h] and (\d{0,2}) — minutes must be adjacent
// to the separator, otherwise "8h/12h 14h/17h" would merge into one match.
const TIME_RANGE_RE =
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g;
// Matches standalone: 8h30, 14h (minutes adjacent to h)
const TIME_POINT_RE = /(\d{1,2})\s*h(\d{0,2})/g;
function extractTimeSlots(text) {
const slots = [];
// Pass 1: explicit ranges (8h/18h, 8h-18h, 08:00-18:00)
const re1 = new RegExp(TIME_RANGE_RE.source, "g");
let match;
while ((match = re1.exec(text)) !== null) {
const open = fmtTime(match[1], match[2]);
const close = fmtTime(match[3], match[4]);
if (open && close) slots.push({ open, close });
}
if (slots.length > 0) return slots;
// Pass 2: pair standalone time points (7h 17h → {07:00, 17:00})
const re2 = new RegExp(TIME_POINT_RE.source, "g");
const points = [];
while ((match = re2.exec(text)) !== null) {
const t = fmtTime(match[1], match[2]);
if (t) points.push(t);
}
for (let i = 0; i + 1 < points.length; i += 2) {
slots.push({ open: points[i], close: points[i + 1] });
}
return slots;
}
function removeTimeTokens(text) {
return text
.replace(
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g,
""
)
.replace(/(\d{1,2})\s*h(\d{0,2})/g, "")
.trim();
}
// --- Main normalizer ---
export function normalizeHoraires(raw, disponible24h) {
const result = {
days: null,
slots: null,
is24h: disponible24h === 1,
businessHours: false,
nightHours: false,
events: false,
notes: "",
};
if (disponible24h === 1) {
result.days = [...ALL_DAYS];
}
if (!raw || raw.trim() === "") return result;
let text = raw.trim();
// 1. Extract day prefix
const dayPrefix = extractDayPrefix(text);
if (dayPrefix) {
if (!result.days) result.days = dayPrefix.days;
text = text.slice(dayPrefix.end).trim();
// Strip leading comma/semicolon + optional modifiers after day prefix
text = text.replace(/^[,;]\s*/, "");
}
// 2. "jours fériés" modifier (informational, strip it)
text = text.replace(/,?\s*jours?\s+f[ée]ri[ée]s?\s*/gi, "").trim();
// 3. 24h/24 detection
if (/24\s*h?\s*[/]\s*24\s*h?/i.test(text)) {
result.is24h = true;
text = text.replace(/24\s*h?\s*[/]\s*24\s*h?/gi, "").trim();
if (!result.days) result.days = [...ALL_DAYS];
}
// 4. "heures ouvrables"
if (/heures?\s+ouvrables?/i.test(text)) {
result.businessHours = true;
text = text.replace(/heures?\s+ouvrables?/gi, "").trim();
}
// 5. "heures de nuit"
if (/heures?\s+de\s+nuit/i.test(text)) {
result.nightHours = true;
text = text.replace(/heures?\s+de\s+nuit/gi, "").trim();
}
// 6. "événements"
if (/[ée]v[éè]nements?/i.test(text)) {
result.events = true;
text = text.replace(/[ée]v[éè]nements?/gi, "").trim();
}
// 7. Strip redundant day info (e.g., "7J/7", "L au V")
text = stripRedundantDays(text);
// 8. Extract time slots (max 4 to cover morning+afternoon+evening combos)
if (!result.is24h) {
const slots = extractTimeSlots(text);
if (slots.length > 0) {
result.slots = slots.slice(0, 4);
text = removeTimeTokens(text);
}
}
// 9. Clean remaining text → notes
text = text
.replace(/^[;,\-/+.\s]+/, "")
.replace(/[;,\-/+.\s]+$/, "")
.replace(/\s+/g, " ")
.trim();
if (text) result.notes = text;
return result;
}

View file

@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS defibs (
nom TEXT NOT NULL DEFAULT '', nom TEXT NOT NULL DEFAULT '',
adresse TEXT NOT NULL DEFAULT '', adresse TEXT NOT NULL DEFAULT '',
horaires TEXT NOT NULL DEFAULT '', horaires TEXT NOT NULL DEFAULT '',
horaires_std TEXT NOT NULL DEFAULT '{}',
acces TEXT NOT NULL DEFAULT '', acces TEXT NOT NULL DEFAULT '',
disponible_24h INTEGER NOT NULL DEFAULT 0, disponible_24h INTEGER NOT NULL DEFAULT 0,
h3 TEXT NOT NULL DEFAULT '' h3 TEXT NOT NULL DEFAULT ''

View file

@ -43,6 +43,14 @@ function bboxClause(lat, lon, radiusMeters) {
* @property {string} nom * @property {string} nom
* @property {string} adresse * @property {string} adresse
* @property {string} horaires * @property {string} horaires
* @property {Object} horaires_std
* @property {number[]|null} horaires_std.days - ISO 8601 day numbers (1=Mon7=Sun)
* @property {{open:string,close:string}[]|null} horaires_std.slots - Time ranges
* @property {boolean} horaires_std.is24h
* @property {boolean} horaires_std.businessHours
* @property {boolean} horaires_std.nightHours
* @property {boolean} horaires_std.events
* @property {string} horaires_std.notes
* @property {string} acces * @property {string} acces
* @property {number} disponible_24h * @property {number} disponible_24h
*/ */
@ -117,7 +125,7 @@ async function queryCells(db, cells, dispo24h) {
const chunk = cells.slice(i, i + SQL_VAR_LIMIT); const chunk = cells.slice(i, i + SQL_VAR_LIMIT);
const placeholders = chunk.map(() => "?").join(","); const placeholders = chunk.map(() => "?").join(",");
let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h
FROM defibs WHERE h3 IN (${placeholders})`; FROM defibs WHERE h3 IN (${placeholders})`;
const params = [...chunk]; const params = [...chunk];
@ -132,13 +140,22 @@ async function queryCells(db, cells, dispo24h) {
return results; return results;
} }
// Parse horaires_std JSON string into object.
function parseHorairesStd(row) {
try {
return { ...row, horaires_std: JSON.parse(row.horaires_std) };
} catch {
return { ...row, horaires_std: null };
}
}
// Compute distance, filter by radius, sort, and limit. // Compute distance, filter by radius, sort, and limit.
function rankAndFilter(candidates, lat, lon, radiusMeters, limit) { function rankAndFilter(candidates, lat, lon, radiusMeters, limit) {
const withDist = []; const withDist = [];
for (const row of candidates) { for (const row of candidates) {
const distanceMeters = haversine(lat, lon, row.latitude, row.longitude); const distanceMeters = haversine(lat, lon, row.latitude, row.longitude);
if (distanceMeters <= radiusMeters) { if (distanceMeters <= radiusMeters) {
withDist.push({ ...row, distanceMeters }); withDist.push({ ...parseHorairesStd(row), distanceMeters });
} }
} }
withDist.sort((a, b) => a.distanceMeters - b.distanceMeters); withDist.sort((a, b) => a.distanceMeters - b.distanceMeters);
@ -166,7 +183,7 @@ export async function getNearbyDefibsBbox({
const db = await getDb(); const db = await getDb();
const { clause, params } = bboxClause(lat, lon, radiusMeters); const { clause, params } = bboxClause(lat, lon, radiusMeters);
let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h
FROM defibs WHERE ${clause}`; FROM defibs WHERE ${clause}`;
if (disponible24hOnly) { if (disponible24hOnly) {
sql += " AND disponible_24h = 1"; sql += " AND disponible_24h = 1";