as-app/scripts/dae/lib/normalize-horaires.mjs
2026-03-05 18:30:23 +01:00

228 lines
6.6 KiB
JavaScript

// Deterministic normalizer for French opening hours (horaires) strings.
// Outputs a structured object that a simple JSON parser can consume without heuristics.
//
// Output shape:
// { days: number[]|null, slots: {open,close}[]|null, is24h, businessHours, nightHours, events, notes }
//
// days: ISO 8601 day numbers (1=Mon … 7=Sun), null if unknown
// slots: [{open:"HH:MM", close:"HH:MM"}], null if no specific times
// is24h: available 24 hours
// businessHours: "heures ouvrables" was specified
// nightHours: "heures de nuit" was specified
// events: availability depends on events
// notes: unparsed/remaining text (seasonal info, conditions, etc.)
const DAY_MAP = { lun: 1, mar: 2, mer: 3, jeu: 4, ven: 5, sam: 6, dim: 7 };
const ALL_DAYS = [1, 2, 3, 4, 5, 6, 7];
// --- Day prefix extraction ---
const SEVEN_DAYS_RE = /^7\s*j?\s*[/]\s*7\s*j?/i;
const DAY_RANGE_RE =
/^(lun|mar|mer|jeu|ven|sam|dim)\s*-\s*(lun|mar|mer|jeu|ven|sam|dim)/i;
const DAY_LIST_RE =
/^((lun|mar|mer|jeu|ven|sam|dim)(\s*,\s*(lun|mar|mer|jeu|ven|sam|dim))+)/i;
const DAY_SINGLE_RE = /^(lun|mar|mer|jeu|ven|sam|dim)\b/i;
function dayRange(startName, endName) {
const start = DAY_MAP[startName.toLowerCase()];
const end = DAY_MAP[endName.toLowerCase()];
const days = [];
let d = start;
do {
days.push(d);
if (d === end) break;
d = (d % 7) + 1;
} while (days.length <= 7);
return days;
}
function extractDayPrefix(text) {
const m7 = text.match(SEVEN_DAYS_RE);
if (m7) return { days: [...ALL_DAYS], end: m7[0].length };
const mRange = text.match(DAY_RANGE_RE);
if (mRange)
return {
days: dayRange(mRange[1], mRange[2]),
end: mRange[0].length,
};
const mList = text.match(DAY_LIST_RE);
if (mList) {
const names = mList[0].split(/\s*,\s*/);
return {
days: names.map((n) => DAY_MAP[n.trim().toLowerCase()]).filter(Boolean),
end: mList[0].length,
};
}
const mSingle = text.match(DAY_SINGLE_RE);
if (mSingle)
return { days: [DAY_MAP[mSingle[1].toLowerCase()]], end: mSingle[0].length };
return null;
}
// --- Redundant day info stripping ---
function stripRedundantDays(text) {
return (
text
// "7J/7", "7j/7", "7/7", "7j/7j"
.replace(/\b7\s*[jJ]?\s*[/]\s*7\s*[jJ]?\b/g, "")
// "L au V", "Ma à D" (short abbreviations)
.replace(
/\b(?:L|Ma|Me|J|V|S|D)\s+(?:au|à)\s+(?:L|Ma|Me|J|V|S|D)\b/gi,
""
)
// "du lundi au dimanche" (full names)
.replace(
/\b(?:du\s+)?(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\s+(?:au|à)\s+(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\b/gi,
""
)
// "L au V" using abbreviated day names from data: "L Ma Me J V S D"
.replace(
/\b[LMJVSD]\s+(?:au|à)\s+[LMJVSD]\b/gi,
""
)
.replace(/^[,;:\-\s]+/, "")
.trim()
);
}
// --- Time slot extraction ---
function fmtTime(h, m) {
const hh = parseInt(h, 10);
const mm = parseInt(m || "0", 10);
if (hh < 0 || hh > 24 || mm < 0 || mm > 59) return null;
return `${String(hh).padStart(2, "0")}:${String(mm).padStart(2, "0")}`;
}
// Matches: 8h30/17h30, 8h-18h, 08:00-18:00, 8h à 18h, 8h a 18h
// IMPORTANT: no \s* between [:h] and (\d{0,2}) — minutes must be adjacent
// to the separator, otherwise "8h/12h 14h/17h" would merge into one match.
const TIME_RANGE_RE =
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g;
// Matches standalone: 8h30, 14h (minutes adjacent to h)
const TIME_POINT_RE = /(\d{1,2})\s*h(\d{0,2})/g;
function extractTimeSlots(text) {
const slots = [];
// Pass 1: explicit ranges (8h/18h, 8h-18h, 08:00-18:00)
const re1 = new RegExp(TIME_RANGE_RE.source, "g");
let match;
while ((match = re1.exec(text)) !== null) {
const open = fmtTime(match[1], match[2]);
const close = fmtTime(match[3], match[4]);
if (open && close) slots.push({ open, close });
}
if (slots.length > 0) return slots;
// Pass 2: pair standalone time points (7h 17h → {07:00, 17:00})
const re2 = new RegExp(TIME_POINT_RE.source, "g");
const points = [];
while ((match = re2.exec(text)) !== null) {
const t = fmtTime(match[1], match[2]);
if (t) points.push(t);
}
for (let i = 0; i + 1 < points.length; i += 2) {
slots.push({ open: points[i], close: points[i + 1] });
}
return slots;
}
function removeTimeTokens(text) {
return text
.replace(
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g,
""
)
.replace(/(\d{1,2})\s*h(\d{0,2})/g, "")
.trim();
}
// --- Main normalizer ---
export function normalizeHoraires(raw, disponible24h) {
const result = {
days: null,
slots: null,
is24h: disponible24h === 1,
businessHours: false,
nightHours: false,
events: false,
notes: "",
};
if (disponible24h === 1) {
result.days = [...ALL_DAYS];
}
if (!raw || raw.trim() === "") return result;
let text = raw.trim();
// 1. Extract day prefix
const dayPrefix = extractDayPrefix(text);
if (dayPrefix) {
if (!result.days) result.days = dayPrefix.days;
text = text.slice(dayPrefix.end).trim();
// Strip leading comma/semicolon + optional modifiers after day prefix
text = text.replace(/^[,;]\s*/, "");
}
// 2. "jours fériés" modifier (informational, strip it)
text = text.replace(/,?\s*jours?\s+f[ée]ri[ée]s?\s*/gi, "").trim();
// 3. 24h/24 detection
if (/24\s*h?\s*[/]\s*24\s*h?/i.test(text)) {
result.is24h = true;
text = text.replace(/24\s*h?\s*[/]\s*24\s*h?/gi, "").trim();
if (!result.days) result.days = [...ALL_DAYS];
}
// 4. "heures ouvrables"
if (/heures?\s+ouvrables?/i.test(text)) {
result.businessHours = true;
text = text.replace(/heures?\s+ouvrables?/gi, "").trim();
}
// 5. "heures de nuit"
if (/heures?\s+de\s+nuit/i.test(text)) {
result.nightHours = true;
text = text.replace(/heures?\s+de\s+nuit/gi, "").trim();
}
// 6. "événements"
if (/[ée]v[éè]nements?/i.test(text)) {
result.events = true;
text = text.replace(/[ée]v[éè]nements?/gi, "").trim();
}
// 7. Strip redundant day info (e.g., "7J/7", "L au V")
text = stripRedundantDays(text);
// 8. Extract time slots (max 4 to cover morning+afternoon+evening combos)
if (!result.is24h) {
const slots = extractTimeSlots(text);
if (slots.length > 0) {
result.slots = slots.slice(0, 4);
text = removeTimeTokens(text);
}
}
// 9. Clean remaining text → notes
text = text
.replace(/^[;,\-/+.\s]+/, "")
.replace(/[;,\-/+.\s]+$/, "")
.replace(/\s+/g, " ")
.trim();
if (text) result.notes = text;
return result;
}