228 lines
6.6 KiB
JavaScript
228 lines
6.6 KiB
JavaScript
// Deterministic normalizer for French opening hours (horaires) strings.
|
|
// Outputs a structured object that a simple JSON parser can consume without heuristics.
|
|
//
|
|
// Output shape:
|
|
// { days: number[]|null, slots: {open,close}[]|null, is24h, businessHours, nightHours, events, notes }
|
|
//
|
|
// days: ISO 8601 day numbers (1=Mon … 7=Sun), null if unknown
|
|
// slots: [{open:"HH:MM", close:"HH:MM"}], null if no specific times
|
|
// is24h: available 24 hours
|
|
// businessHours: "heures ouvrables" was specified
|
|
// nightHours: "heures de nuit" was specified
|
|
// events: availability depends on events
|
|
// notes: unparsed/remaining text (seasonal info, conditions, etc.)
|
|
|
|
const DAY_MAP = { lun: 1, mar: 2, mer: 3, jeu: 4, ven: 5, sam: 6, dim: 7 };
|
|
const ALL_DAYS = [1, 2, 3, 4, 5, 6, 7];
|
|
|
|
// --- Day prefix extraction ---
|
|
|
|
const SEVEN_DAYS_RE = /^7\s*j?\s*[/]\s*7\s*j?/i;
|
|
const DAY_RANGE_RE =
|
|
/^(lun|mar|mer|jeu|ven|sam|dim)\s*-\s*(lun|mar|mer|jeu|ven|sam|dim)/i;
|
|
const DAY_LIST_RE =
|
|
/^((lun|mar|mer|jeu|ven|sam|dim)(\s*,\s*(lun|mar|mer|jeu|ven|sam|dim))+)/i;
|
|
const DAY_SINGLE_RE = /^(lun|mar|mer|jeu|ven|sam|dim)\b/i;
|
|
|
|
function dayRange(startName, endName) {
|
|
const start = DAY_MAP[startName.toLowerCase()];
|
|
const end = DAY_MAP[endName.toLowerCase()];
|
|
const days = [];
|
|
let d = start;
|
|
do {
|
|
days.push(d);
|
|
if (d === end) break;
|
|
d = (d % 7) + 1;
|
|
} while (days.length <= 7);
|
|
return days;
|
|
}
|
|
|
|
function extractDayPrefix(text) {
|
|
const m7 = text.match(SEVEN_DAYS_RE);
|
|
if (m7) return { days: [...ALL_DAYS], end: m7[0].length };
|
|
|
|
const mRange = text.match(DAY_RANGE_RE);
|
|
if (mRange)
|
|
return {
|
|
days: dayRange(mRange[1], mRange[2]),
|
|
end: mRange[0].length,
|
|
};
|
|
|
|
const mList = text.match(DAY_LIST_RE);
|
|
if (mList) {
|
|
const names = mList[0].split(/\s*,\s*/);
|
|
return {
|
|
days: names.map((n) => DAY_MAP[n.trim().toLowerCase()]).filter(Boolean),
|
|
end: mList[0].length,
|
|
};
|
|
}
|
|
|
|
const mSingle = text.match(DAY_SINGLE_RE);
|
|
if (mSingle)
|
|
return { days: [DAY_MAP[mSingle[1].toLowerCase()]], end: mSingle[0].length };
|
|
|
|
return null;
|
|
}
|
|
|
|
// --- Redundant day info stripping ---
|
|
|
|
function stripRedundantDays(text) {
|
|
return (
|
|
text
|
|
// "7J/7", "7j/7", "7/7", "7j/7j"
|
|
.replace(/\b7\s*[jJ]?\s*[/]\s*7\s*[jJ]?\b/g, "")
|
|
// "L au V", "Ma à D" (short abbreviations)
|
|
.replace(
|
|
/\b(?:L|Ma|Me|J|V|S|D)\s+(?:au|à)\s+(?:L|Ma|Me|J|V|S|D)\b/gi,
|
|
""
|
|
)
|
|
// "du lundi au dimanche" (full names)
|
|
.replace(
|
|
/\b(?:du\s+)?(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\s+(?:au|à)\s+(?:lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)\b/gi,
|
|
""
|
|
)
|
|
// "L au V" using abbreviated day names from data: "L Ma Me J V S D"
|
|
.replace(
|
|
/\b[LMJVSD]\s+(?:au|à)\s+[LMJVSD]\b/gi,
|
|
""
|
|
)
|
|
.replace(/^[,;:\-\s]+/, "")
|
|
.trim()
|
|
);
|
|
}
|
|
|
|
// --- Time slot extraction ---
|
|
|
|
function fmtTime(h, m) {
|
|
const hh = parseInt(h, 10);
|
|
const mm = parseInt(m || "0", 10);
|
|
if (hh < 0 || hh > 24 || mm < 0 || mm > 59) return null;
|
|
return `${String(hh).padStart(2, "0")}:${String(mm).padStart(2, "0")}`;
|
|
}
|
|
|
|
// Matches: 8h30/17h30, 8h-18h, 08:00-18:00, 8h à 18h, 8h a 18h
|
|
// IMPORTANT: no \s* between [:h] and (\d{0,2}) — minutes must be adjacent
|
|
// to the separator, otherwise "8h/12h 14h/17h" would merge into one match.
|
|
const TIME_RANGE_RE =
|
|
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g;
|
|
|
|
// Matches standalone: 8h30, 14h (minutes adjacent to h)
|
|
const TIME_POINT_RE = /(\d{1,2})\s*h(\d{0,2})/g;
|
|
|
|
function extractTimeSlots(text) {
|
|
const slots = [];
|
|
|
|
// Pass 1: explicit ranges (8h/18h, 8h-18h, 08:00-18:00)
|
|
const re1 = new RegExp(TIME_RANGE_RE.source, "g");
|
|
let match;
|
|
while ((match = re1.exec(text)) !== null) {
|
|
const open = fmtTime(match[1], match[2]);
|
|
const close = fmtTime(match[3], match[4]);
|
|
if (open && close) slots.push({ open, close });
|
|
}
|
|
if (slots.length > 0) return slots;
|
|
|
|
// Pass 2: pair standalone time points (7h 17h → {07:00, 17:00})
|
|
const re2 = new RegExp(TIME_POINT_RE.source, "g");
|
|
const points = [];
|
|
while ((match = re2.exec(text)) !== null) {
|
|
const t = fmtTime(match[1], match[2]);
|
|
if (t) points.push(t);
|
|
}
|
|
for (let i = 0; i + 1 < points.length; i += 2) {
|
|
slots.push({ open: points[i], close: points[i + 1] });
|
|
}
|
|
|
|
return slots;
|
|
}
|
|
|
|
function removeTimeTokens(text) {
|
|
return text
|
|
.replace(
|
|
/(\d{1,2})\s*[:h](\d{0,2})\s*(?:[-/à]|\ba\b)\s*(\d{1,2})\s*[:h](\d{0,2})/g,
|
|
""
|
|
)
|
|
.replace(/(\d{1,2})\s*h(\d{0,2})/g, "")
|
|
.trim();
|
|
}
|
|
|
|
// --- Main normalizer ---
|
|
|
|
export function normalizeHoraires(raw, disponible24h) {
|
|
const result = {
|
|
days: null,
|
|
slots: null,
|
|
is24h: disponible24h === 1,
|
|
businessHours: false,
|
|
nightHours: false,
|
|
events: false,
|
|
notes: "",
|
|
};
|
|
|
|
if (disponible24h === 1) {
|
|
result.days = [...ALL_DAYS];
|
|
}
|
|
|
|
if (!raw || raw.trim() === "") return result;
|
|
|
|
let text = raw.trim();
|
|
|
|
// 1. Extract day prefix
|
|
const dayPrefix = extractDayPrefix(text);
|
|
if (dayPrefix) {
|
|
if (!result.days) result.days = dayPrefix.days;
|
|
text = text.slice(dayPrefix.end).trim();
|
|
// Strip leading comma/semicolon + optional modifiers after day prefix
|
|
text = text.replace(/^[,;]\s*/, "");
|
|
}
|
|
|
|
// 2. "jours fériés" modifier (informational, strip it)
|
|
text = text.replace(/,?\s*jours?\s+f[ée]ri[ée]s?\s*/gi, "").trim();
|
|
|
|
// 3. 24h/24 detection
|
|
if (/24\s*h?\s*[/]\s*24\s*h?/i.test(text)) {
|
|
result.is24h = true;
|
|
text = text.replace(/24\s*h?\s*[/]\s*24\s*h?/gi, "").trim();
|
|
if (!result.days) result.days = [...ALL_DAYS];
|
|
}
|
|
|
|
// 4. "heures ouvrables"
|
|
if (/heures?\s+ouvrables?/i.test(text)) {
|
|
result.businessHours = true;
|
|
text = text.replace(/heures?\s+ouvrables?/gi, "").trim();
|
|
}
|
|
|
|
// 5. "heures de nuit"
|
|
if (/heures?\s+de\s+nuit/i.test(text)) {
|
|
result.nightHours = true;
|
|
text = text.replace(/heures?\s+de\s+nuit/gi, "").trim();
|
|
}
|
|
|
|
// 6. "événements"
|
|
if (/[ée]v[éè]nements?/i.test(text)) {
|
|
result.events = true;
|
|
text = text.replace(/[ée]v[éè]nements?/gi, "").trim();
|
|
}
|
|
|
|
// 7. Strip redundant day info (e.g., "7J/7", "L au V")
|
|
text = stripRedundantDays(text);
|
|
|
|
// 8. Extract time slots (max 4 to cover morning+afternoon+evening combos)
|
|
if (!result.is24h) {
|
|
const slots = extractTimeSlots(text);
|
|
if (slots.length > 0) {
|
|
result.slots = slots.slice(0, 4);
|
|
text = removeTimeTokens(text);
|
|
}
|
|
}
|
|
|
|
// 9. Clean remaining text → notes
|
|
text = text
|
|
.replace(/^[;,\-/+.\s]+/, "")
|
|
.replace(/[;,\-/+.\s]+$/, "")
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
if (text) result.notes = text;
|
|
|
|
return result;
|
|
}
|