// Deterministic normalizer for French opening hours (horaires) strings. // Outputs a structured object that a simple JSON parser can consume without heuristics. // // Output shape: // { days: number[]|null, slots: {open,close}[]|null, is24h, businessHours, nightHours, events, notes } // // days: ISO 8601 day numbers (1=Mon … 7=Sun), null if unknown // slots: [{open:"HH:MM", close:"HH:MM"}], null if no specific times // is24h: available 24 hours // businessHours: "heures ouvrables" was specified // nightHours: "heures de nuit" was specified // events: availability depends on events // notes: unparsed/remaining text (seasonal info, conditions, etc.) const DAY_MAP = { lun: 1, mar: 2, mer: 3, jeu: 4, ven: 5, sam: 6, dim: 7 }; const ALL_DAYS = [1, 2, 3, 4, 5, 6, 7]; // --- Day prefix extraction --- const SEVEN_DAYS_RE = /^7\s*j?\s*[/]\s*7\s*j?/i; const DAY_RANGE_RE = /^(lun|mar|mer|jeu|ven|sam|dim)\s*-\s*(lun|mar|mer|jeu|ven|sam|dim)/i; const DAY_LIST_RE = /^((lun|mar|mer|jeu|ven|sam|dim)(\s*,\s*(lun|mar|mer|jeu|ven|sam|dim))+)/i; const DAY_SINGLE_RE = /^(lun|mar|mer|jeu|ven|sam|dim)\b/i; function dayRange(startName, endName) { const start = DAY_MAP[startName.toLowerCase()]; const end = DAY_MAP[endName.toLowerCase()]; const days = []; let d = start; do { days.push(d); if (d === end) break; d = (d % 7) + 1; } while (days.length <= 7); return days; } function extractDayPrefix(text) { const m7 = text.match(SEVEN_DAYS_RE); if (m7) return { days: [...ALL_DAYS], end: m7[0].length }; const mRange = text.match(DAY_RANGE_RE); if (mRange) return { days: dayRange(mRange[1], mRange[2]), end: mRange[0].length, }; const mList = text.match(DAY_LIST_RE); if (mList) { const names = mList[0].split(/\s*,\s*/); return { days: names.map((n) => DAY_MAP[n.trim().toLowerCase()]).filter(Boolean), end: mList[0].length, }; } const mSingle = text.match(DAY_SINGLE_RE); if (mSingle) return { days: [DAY_MAP[mSingle[1].toLowerCase()]], end: mSingle[0].length }; return null; } // --- Redundant day info stripping --- function stripRedundantDays(text) { return ( text // "7J/7", "7j/7", "7/7", "7j/7j" .replace(/\b7\s*[jJ]?\s*[/]\s*7\s*[jJ]?\b/g, "") // "L au V", "Ma à D" (short abbreviations) .replace( /\b(?:L|Ma|Me|J|V|S|D)\s+(?:au|à)\s+(?:L|Ma|Me|J|V|S|D)\b/gi, "" ) // "du lundi au dimanche" (full names) .replace( /\b(?:du\s+)?(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\s+(?:au|à)\s+(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\b/gi, "" ) // "L au V" using abbreviated day names from data: "L Ma Me J V S D" .replace( /\b[LMJVSD]\s+(?:au|à)\s+[LMJVSD]\b/gi, "" ) .replace(/^[,;:\-\s]+/, "") .trim() ); } // --- Time slot extraction --- function fmtTime(h, m) { const hh = parseInt(h, 10); const mm = parseInt(m || "0", 10); if (hh < 0 || hh > 24 || mm < 0 || mm > 59) return null; return `${String(hh).padStart(2, "0")}:${String(mm).padStart(2, "0")}`; } // Matches: 8h30/17h30, 8h-18h, 08:00-18:00, 8h à 18h, 8h a 18h // IMPORTANT: no \s* between [:h] and (\d{0,2}) — minutes must be adjacent // to the separator, otherwise "8h/12h 14h/17h" would merge into one match. const TIME_RANGE_RE = /(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g; // Matches standalone: 8h30, 14h (minutes adjacent to h) const TIME_POINT_RE = /(\d{1,2})\s*h(\d{0,2})/g; function extractTimeSlots(text) { const slots = []; // Pass 1: explicit ranges (8h/18h, 8h-18h, 08:00-18:00) const re1 = new RegExp(TIME_RANGE_RE.source, "g"); let match; while ((match = re1.exec(text)) !== null) { const open = fmtTime(match[1], match[2]); const close = fmtTime(match[3], match[4]); if (open && close) slots.push({ open, close }); } if (slots.length > 0) return slots; // Pass 2: pair standalone time points (7h 17h → {07:00, 17:00}) const re2 = new RegExp(TIME_POINT_RE.source, "g"); const points = []; while ((match = re2.exec(text)) !== null) { const t = fmtTime(match[1], match[2]); if (t) points.push(t); } for (let i = 0; i + 1 < points.length; i += 2) { slots.push({ open: points[i], close: points[i + 1] }); } return slots; } function removeTimeTokens(text) { return text .replace( /(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g, "" ) .replace(/(\d{1,2})\s*h(\d{0,2})/g, "") .trim(); } // --- Main normalizer --- export function normalizeHoraires(raw, disponible24h) { const result = { days: null, slots: null, is24h: disponible24h === 1, businessHours: false, nightHours: false, events: false, notes: "", }; if (disponible24h === 1) { result.days = [...ALL_DAYS]; } if (!raw || raw.trim() === "") return result; let text = raw.trim(); // 1. Extract day prefix const dayPrefix = extractDayPrefix(text); if (dayPrefix) { if (!result.days) result.days = dayPrefix.days; text = text.slice(dayPrefix.end).trim(); // Strip leading comma/semicolon + optional modifiers after day prefix text = text.replace(/^[,;]\s*/, ""); } // 2. "jours fériés" modifier (informational, strip it) text = text.replace(/,?\s*jours?\s+f[ée]ri[ée]s?\s*/gi, "").trim(); // 3. 24h/24 detection if (/24\s*h?\s*[/]\s*24\s*h?/i.test(text)) { result.is24h = true; text = text.replace(/24\s*h?\s*[/]\s*24\s*h?/gi, "").trim(); if (!result.days) result.days = [...ALL_DAYS]; } // 4. "heures ouvrables" if (/heures?\s+ouvrables?/i.test(text)) { result.businessHours = true; text = text.replace(/heures?\s+ouvrables?/gi, "").trim(); } // 5. "heures de nuit" if (/heures?\s+de\s+nuit/i.test(text)) { result.nightHours = true; text = text.replace(/heures?\s+de\s+nuit/gi, "").trim(); } // 6. "événements" if (/[ée]v[éè]nements?/i.test(text)) { result.events = true; text = text.replace(/[ée]v[éè]nements?/gi, "").trim(); } // 7. Strip redundant day info (e.g., "7J/7", "L au V") text = stripRedundantDays(text); // 8. Extract time slots (max 4 to cover morning+afternoon+evening combos) if (!result.is24h) { const slots = extractTimeSlots(text); if (slots.length > 0) { result.slots = slots.slice(0, 4); text = removeTimeTokens(text); } } // 9. Clean remaining text → notes text = text .replace(/^[;,\-/+.\s]+/, "") .replace(/[;,\-/+.\s]+$/, "") .replace(/\s+/g, " ") .trim(); if (text) result.notes = text; return result; }