fix: normalize horaire
This commit is contained in:
parent
31970b86fc
commit
e3c1ffe0f1
4 changed files with 254 additions and 5 deletions
|
|
@ -9,6 +9,7 @@ import { dirname, resolve } from "node:path";
|
||||||
import { fileURLToPath } from "node:url";
|
import { fileURLToPath } from "node:url";
|
||||||
import { createRequire } from "node:module";
|
import { createRequire } from "node:module";
|
||||||
import { parse } from "csv-parse";
|
import { parse } from "csv-parse";
|
||||||
|
import { normalizeHoraires } from "./lib/normalize-horaires.mjs";
|
||||||
|
|
||||||
const require = createRequire(import.meta.url);
|
const require = createRequire(import.meta.url);
|
||||||
const Database = require("better-sqlite3");
|
const Database = require("better-sqlite3");
|
||||||
|
|
@ -107,8 +108,8 @@ async function main() {
|
||||||
|
|
||||||
// Prepare insert statement
|
// Prepare insert statement
|
||||||
const insert = db.prepare(
|
const insert = db.prepare(
|
||||||
`INSERT OR IGNORE INTO defibs (id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h, h3)
|
`INSERT OR IGNORE INTO defibs (id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h, h3)
|
||||||
VALUES (@id, @latitude, @longitude, @nom, @adresse, @horaires, @acces, @disponible_24h, @h3)`
|
VALUES (@id, @latitude, @longitude, @nom, @adresse, @horaires, @horaires_std, @acces, @disponible_24h, @h3)`
|
||||||
);
|
);
|
||||||
|
|
||||||
const insertMany = db.transaction((rows) => {
|
const insertMany = db.transaction((rows) => {
|
||||||
|
|
@ -152,6 +153,7 @@ async function main() {
|
||||||
const disponible_24h = cleanInt(record.disponible_24h);
|
const disponible_24h = cleanInt(record.disponible_24h);
|
||||||
const id = deterministicId(lat, lon, nom, adresse);
|
const id = deterministicId(lat, lon, nom, adresse);
|
||||||
const h3Cell = computeH3(lat, lon, H3_RES);
|
const h3Cell = computeH3(lat, lon, H3_RES);
|
||||||
|
const horairesStd = normalizeHoraires(horaires, disponible_24h);
|
||||||
|
|
||||||
batch.push({
|
batch.push({
|
||||||
id,
|
id,
|
||||||
|
|
@ -160,6 +162,7 @@ async function main() {
|
||||||
nom,
|
nom,
|
||||||
adresse,
|
adresse,
|
||||||
horaires,
|
horaires,
|
||||||
|
horaires_std: JSON.stringify(horairesStd),
|
||||||
acces,
|
acces,
|
||||||
disponible_24h,
|
disponible_24h,
|
||||||
h3: h3Cell,
|
h3: h3Cell,
|
||||||
|
|
|
||||||
228
scripts/lib/normalize-horaires.mjs
Normal file
228
scripts/lib/normalize-horaires.mjs
Normal file
|
|
@ -0,0 +1,228 @@
|
||||||
|
// Deterministic normalizer for French opening hours (horaires) strings.
|
||||||
|
// Outputs a structured object that a simple JSON parser can consume without heuristics.
|
||||||
|
//
|
||||||
|
// Output shape:
|
||||||
|
// { days: number[]|null, slots: {open,close}[]|null, is24h, businessHours, nightHours, events, notes }
|
||||||
|
//
|
||||||
|
// days: ISO 8601 day numbers (1=Mon … 7=Sun), null if unknown
|
||||||
|
// slots: [{open:"HH:MM", close:"HH:MM"}], null if no specific times
|
||||||
|
// is24h: available 24 hours
|
||||||
|
// businessHours: "heures ouvrables" was specified
|
||||||
|
// nightHours: "heures de nuit" was specified
|
||||||
|
// events: availability depends on events
|
||||||
|
// notes: unparsed/remaining text (seasonal info, conditions, etc.)
|
||||||
|
|
||||||
|
const DAY_MAP = { lun: 1, mar: 2, mer: 3, jeu: 4, ven: 5, sam: 6, dim: 7 };
|
||||||
|
const ALL_DAYS = [1, 2, 3, 4, 5, 6, 7];
|
||||||
|
|
||||||
|
// --- Day prefix extraction ---
|
||||||
|
|
||||||
|
const SEVEN_DAYS_RE = /^7\s*j?\s*[/]\s*7\s*j?/i;
|
||||||
|
const DAY_RANGE_RE =
|
||||||
|
/^(lun|mar|mer|jeu|ven|sam|dim)\s*-\s*(lun|mar|mer|jeu|ven|sam|dim)/i;
|
||||||
|
const DAY_LIST_RE =
|
||||||
|
/^((lun|mar|mer|jeu|ven|sam|dim)(\s*,\s*(lun|mar|mer|jeu|ven|sam|dim))+)/i;
|
||||||
|
const DAY_SINGLE_RE = /^(lun|mar|mer|jeu|ven|sam|dim)\b/i;
|
||||||
|
|
||||||
|
function dayRange(startName, endName) {
|
||||||
|
const start = DAY_MAP[startName.toLowerCase()];
|
||||||
|
const end = DAY_MAP[endName.toLowerCase()];
|
||||||
|
const days = [];
|
||||||
|
let d = start;
|
||||||
|
do {
|
||||||
|
days.push(d);
|
||||||
|
if (d === end) break;
|
||||||
|
d = (d % 7) + 1;
|
||||||
|
} while (days.length <= 7);
|
||||||
|
return days;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractDayPrefix(text) {
|
||||||
|
const m7 = text.match(SEVEN_DAYS_RE);
|
||||||
|
if (m7) return { days: [...ALL_DAYS], end: m7[0].length };
|
||||||
|
|
||||||
|
const mRange = text.match(DAY_RANGE_RE);
|
||||||
|
if (mRange)
|
||||||
|
return {
|
||||||
|
days: dayRange(mRange[1], mRange[2]),
|
||||||
|
end: mRange[0].length,
|
||||||
|
};
|
||||||
|
|
||||||
|
const mList = text.match(DAY_LIST_RE);
|
||||||
|
if (mList) {
|
||||||
|
const names = mList[0].split(/\s*,\s*/);
|
||||||
|
return {
|
||||||
|
days: names.map((n) => DAY_MAP[n.trim().toLowerCase()]).filter(Boolean),
|
||||||
|
end: mList[0].length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const mSingle = text.match(DAY_SINGLE_RE);
|
||||||
|
if (mSingle)
|
||||||
|
return { days: [DAY_MAP[mSingle[1].toLowerCase()]], end: mSingle[0].length };
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Redundant day info stripping ---
|
||||||
|
|
||||||
|
function stripRedundantDays(text) {
|
||||||
|
return (
|
||||||
|
text
|
||||||
|
// "7J/7", "7j/7", "7/7", "7j/7j"
|
||||||
|
.replace(/\b7\s*[jJ]?\s*[/]\s*7\s*[jJ]?\b/g, "")
|
||||||
|
// "L au V", "Ma à D" (short abbreviations)
|
||||||
|
.replace(
|
||||||
|
/\b(?:L|Ma|Me|J|V|S|D)\s+(?:au|à)\s+(?:L|Ma|Me|J|V|S|D)\b/gi,
|
||||||
|
""
|
||||||
|
)
|
||||||
|
// "du lundi au dimanche" (full names)
|
||||||
|
.replace(
|
||||||
|
/\b(?:du\s+)?(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\s+(?:au|à)\s+(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\b/gi,
|
||||||
|
""
|
||||||
|
)
|
||||||
|
// "L au V" using abbreviated day names from data: "L Ma Me J V S D"
|
||||||
|
.replace(
|
||||||
|
/\b[LMJVSD]\s+(?:au|à)\s+[LMJVSD]\b/gi,
|
||||||
|
""
|
||||||
|
)
|
||||||
|
.replace(/^[,;:\-\s]+/, "")
|
||||||
|
.trim()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Time slot extraction ---
|
||||||
|
|
||||||
|
function fmtTime(h, m) {
|
||||||
|
const hh = parseInt(h, 10);
|
||||||
|
const mm = parseInt(m || "0", 10);
|
||||||
|
if (hh < 0 || hh > 24 || mm < 0 || mm > 59) return null;
|
||||||
|
return `${String(hh).padStart(2, "0")}:${String(mm).padStart(2, "0")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matches: 8h30/17h30, 8h-18h, 08:00-18:00, 8h à 18h, 8h a 18h
|
||||||
|
// IMPORTANT: no \s* between [:h] and (\d{0,2}) — minutes must be adjacent
|
||||||
|
// to the separator, otherwise "8h/12h 14h/17h" would merge into one match.
|
||||||
|
const TIME_RANGE_RE =
|
||||||
|
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g;
|
||||||
|
|
||||||
|
// Matches standalone: 8h30, 14h (minutes adjacent to h)
|
||||||
|
const TIME_POINT_RE = /(\d{1,2})\s*h(\d{0,2})/g;
|
||||||
|
|
||||||
|
function extractTimeSlots(text) {
|
||||||
|
const slots = [];
|
||||||
|
|
||||||
|
// Pass 1: explicit ranges (8h/18h, 8h-18h, 08:00-18:00)
|
||||||
|
const re1 = new RegExp(TIME_RANGE_RE.source, "g");
|
||||||
|
let match;
|
||||||
|
while ((match = re1.exec(text)) !== null) {
|
||||||
|
const open = fmtTime(match[1], match[2]);
|
||||||
|
const close = fmtTime(match[3], match[4]);
|
||||||
|
if (open && close) slots.push({ open, close });
|
||||||
|
}
|
||||||
|
if (slots.length > 0) return slots;
|
||||||
|
|
||||||
|
// Pass 2: pair standalone time points (7h 17h → {07:00, 17:00})
|
||||||
|
const re2 = new RegExp(TIME_POINT_RE.source, "g");
|
||||||
|
const points = [];
|
||||||
|
while ((match = re2.exec(text)) !== null) {
|
||||||
|
const t = fmtTime(match[1], match[2]);
|
||||||
|
if (t) points.push(t);
|
||||||
|
}
|
||||||
|
for (let i = 0; i + 1 < points.length; i += 2) {
|
||||||
|
slots.push({ open: points[i], close: points[i + 1] });
|
||||||
|
}
|
||||||
|
|
||||||
|
return slots;
|
||||||
|
}
|
||||||
|
|
||||||
|
function removeTimeTokens(text) {
|
||||||
|
return text
|
||||||
|
.replace(
|
||||||
|
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g,
|
||||||
|
""
|
||||||
|
)
|
||||||
|
.replace(/(\d{1,2})\s*h(\d{0,2})/g, "")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Main normalizer ---
|
||||||
|
|
||||||
|
export function normalizeHoraires(raw, disponible24h) {
|
||||||
|
const result = {
|
||||||
|
days: null,
|
||||||
|
slots: null,
|
||||||
|
is24h: disponible24h === 1,
|
||||||
|
businessHours: false,
|
||||||
|
nightHours: false,
|
||||||
|
events: false,
|
||||||
|
notes: "",
|
||||||
|
};
|
||||||
|
|
||||||
|
if (disponible24h === 1) {
|
||||||
|
result.days = [...ALL_DAYS];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!raw || raw.trim() === "") return result;
|
||||||
|
|
||||||
|
let text = raw.trim();
|
||||||
|
|
||||||
|
// 1. Extract day prefix
|
||||||
|
const dayPrefix = extractDayPrefix(text);
|
||||||
|
if (dayPrefix) {
|
||||||
|
if (!result.days) result.days = dayPrefix.days;
|
||||||
|
text = text.slice(dayPrefix.end).trim();
|
||||||
|
// Strip leading comma/semicolon + optional modifiers after day prefix
|
||||||
|
text = text.replace(/^[,;]\s*/, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. "jours fériés" modifier (informational, strip it)
|
||||||
|
text = text.replace(/,?\s*jours?\s+f[ée]ri[ée]s?\s*/gi, "").trim();
|
||||||
|
|
||||||
|
// 3. 24h/24 detection
|
||||||
|
if (/24\s*h?\s*[/]\s*24\s*h?/i.test(text)) {
|
||||||
|
result.is24h = true;
|
||||||
|
text = text.replace(/24\s*h?\s*[/]\s*24\s*h?/gi, "").trim();
|
||||||
|
if (!result.days) result.days = [...ALL_DAYS];
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. "heures ouvrables"
|
||||||
|
if (/heures?\s+ouvrables?/i.test(text)) {
|
||||||
|
result.businessHours = true;
|
||||||
|
text = text.replace(/heures?\s+ouvrables?/gi, "").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. "heures de nuit"
|
||||||
|
if (/heures?\s+de\s+nuit/i.test(text)) {
|
||||||
|
result.nightHours = true;
|
||||||
|
text = text.replace(/heures?\s+de\s+nuit/gi, "").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. "événements"
|
||||||
|
if (/[ée]v[éè]nements?/i.test(text)) {
|
||||||
|
result.events = true;
|
||||||
|
text = text.replace(/[ée]v[éè]nements?/gi, "").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. Strip redundant day info (e.g., "7J/7", "L au V")
|
||||||
|
text = stripRedundantDays(text);
|
||||||
|
|
||||||
|
// 8. Extract time slots (max 4 to cover morning+afternoon+evening combos)
|
||||||
|
if (!result.is24h) {
|
||||||
|
const slots = extractTimeSlots(text);
|
||||||
|
if (slots.length > 0) {
|
||||||
|
result.slots = slots.slice(0, 4);
|
||||||
|
text = removeTimeTokens(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 9. Clean remaining text → notes
|
||||||
|
text = text
|
||||||
|
.replace(/^[;,\-/+.\s]+/, "")
|
||||||
|
.replace(/[;,\-/+.\s]+$/, "")
|
||||||
|
.replace(/\s+/g, " ")
|
||||||
|
.trim();
|
||||||
|
if (text) result.notes = text;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS defibs (
|
||||||
nom TEXT NOT NULL DEFAULT '',
|
nom TEXT NOT NULL DEFAULT '',
|
||||||
adresse TEXT NOT NULL DEFAULT '',
|
adresse TEXT NOT NULL DEFAULT '',
|
||||||
horaires TEXT NOT NULL DEFAULT '',
|
horaires TEXT NOT NULL DEFAULT '',
|
||||||
|
horaires_std TEXT NOT NULL DEFAULT '{}',
|
||||||
acces TEXT NOT NULL DEFAULT '',
|
acces TEXT NOT NULL DEFAULT '',
|
||||||
disponible_24h INTEGER NOT NULL DEFAULT 0,
|
disponible_24h INTEGER NOT NULL DEFAULT 0,
|
||||||
h3 TEXT NOT NULL DEFAULT ''
|
h3 TEXT NOT NULL DEFAULT ''
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,14 @@ function bboxClause(lat, lon, radiusMeters) {
|
||||||
* @property {string} nom
|
* @property {string} nom
|
||||||
* @property {string} adresse
|
* @property {string} adresse
|
||||||
* @property {string} horaires
|
* @property {string} horaires
|
||||||
|
* @property {Object} horaires_std
|
||||||
|
* @property {number[]|null} horaires_std.days - ISO 8601 day numbers (1=Mon…7=Sun)
|
||||||
|
* @property {{open:string,close:string}[]|null} horaires_std.slots - Time ranges
|
||||||
|
* @property {boolean} horaires_std.is24h
|
||||||
|
* @property {boolean} horaires_std.businessHours
|
||||||
|
* @property {boolean} horaires_std.nightHours
|
||||||
|
* @property {boolean} horaires_std.events
|
||||||
|
* @property {string} horaires_std.notes
|
||||||
* @property {string} acces
|
* @property {string} acces
|
||||||
* @property {number} disponible_24h
|
* @property {number} disponible_24h
|
||||||
*/
|
*/
|
||||||
|
|
@ -117,7 +125,7 @@ async function queryCells(db, cells, dispo24h) {
|
||||||
const chunk = cells.slice(i, i + SQL_VAR_LIMIT);
|
const chunk = cells.slice(i, i + SQL_VAR_LIMIT);
|
||||||
const placeholders = chunk.map(() => "?").join(",");
|
const placeholders = chunk.map(() => "?").join(",");
|
||||||
|
|
||||||
let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h
|
let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h
|
||||||
FROM defibs WHERE h3 IN (${placeholders})`;
|
FROM defibs WHERE h3 IN (${placeholders})`;
|
||||||
const params = [...chunk];
|
const params = [...chunk];
|
||||||
|
|
||||||
|
|
@ -132,13 +140,22 @@ async function queryCells(db, cells, dispo24h) {
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse horaires_std JSON string into object.
|
||||||
|
function parseHorairesStd(row) {
|
||||||
|
try {
|
||||||
|
return { ...row, horaires_std: JSON.parse(row.horaires_std) };
|
||||||
|
} catch {
|
||||||
|
return { ...row, horaires_std: null };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Compute distance, filter by radius, sort, and limit.
|
// Compute distance, filter by radius, sort, and limit.
|
||||||
function rankAndFilter(candidates, lat, lon, radiusMeters, limit) {
|
function rankAndFilter(candidates, lat, lon, radiusMeters, limit) {
|
||||||
const withDist = [];
|
const withDist = [];
|
||||||
for (const row of candidates) {
|
for (const row of candidates) {
|
||||||
const distanceMeters = haversine(lat, lon, row.latitude, row.longitude);
|
const distanceMeters = haversine(lat, lon, row.latitude, row.longitude);
|
||||||
if (distanceMeters <= radiusMeters) {
|
if (distanceMeters <= radiusMeters) {
|
||||||
withDist.push({ ...row, distanceMeters });
|
withDist.push({ ...parseHorairesStd(row), distanceMeters });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
withDist.sort((a, b) => a.distanceMeters - b.distanceMeters);
|
withDist.sort((a, b) => a.distanceMeters - b.distanceMeters);
|
||||||
|
|
@ -166,7 +183,7 @@ export async function getNearbyDefibsBbox({
|
||||||
const db = await getDb();
|
const db = await getDb();
|
||||||
const { clause, params } = bboxClause(lat, lon, radiusMeters);
|
const { clause, params } = bboxClause(lat, lon, radiusMeters);
|
||||||
|
|
||||||
let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, acces, disponible_24h
|
let sql = `SELECT id, latitude, longitude, nom, adresse, horaires, horaires_std, acces, disponible_24h
|
||||||
FROM defibs WHERE ${clause}`;
|
FROM defibs WHERE ${clause}`;
|
||||||
if (disponible24hOnly) {
|
if (disponible24hOnly) {
|
||||||
sql += " AND disponible_24h = 1";
|
sql += " AND disponible_24h = 1";
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue