Skip to main content

ctxgraph_extract/
temporal.rs

1use chrono::{DateTime, NaiveDate, Utc};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::sync::LazyLock;
5
6/// Result of temporal parsing across all 5 layers.
7#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
8pub enum TemporalResult {
9    /// Layer 1+2: A fully resolved calendar date.
10    ExactDate(NaiveDate),
11
12    /// Layer 3: A relative offset from the reference timestamp.
13    RelativeDate {
14        offset_days: i64,
15        resolved: Option<NaiveDate>,
16    },
17
18    /// Layer 4: A fiscal/quarter date range.
19    DateRange {
20        start: NaiveDate,
21        end: NaiveDate,
22        label: String,
23    },
24
25    /// Layer 5: A duration expression.
26    Duration {
27        months: u32,
28        days: u32,
29        label: String,
30    },
31}
32
33// ---------------------------------------------------------------------------
34// Compiled regexes (built once)
35// ---------------------------------------------------------------------------
36
37static ISO_FULL: LazyLock<Regex> =
38    LazyLock::new(|| Regex::new(r"\b(\d{4})-(\d{2})-(\d{2})\b").unwrap());
39
40static ISO_MONTH: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"\b(\d{4})-(\d{2})\b").unwrap());
42
43// "March 11, 2026" or "Mar 11, 2026"
44static WRITTEN_MDY: LazyLock<Regex> = LazyLock::new(|| {
45    Regex::new(r"(?i)\b(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(\d{1,2}),?\s+(\d{4})\b").unwrap()
46});
47
48// "11 March 2026"
49static WRITTEN_DMY: LazyLock<Regex> = LazyLock::new(|| {
50    Regex::new(r"(?i)\b(\d{1,2})\s+(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(\d{4})\b").unwrap()
51});
52
53// "Mar 2026" / "March 2026"
54static WRITTEN_MY: LazyLock<Regex> = LazyLock::new(|| {
55    Regex::new(r"(?i)\b(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(\d{4})\b").unwrap()
56});
57
58// Layer 3 — relative expressions
59static REL_YESTERDAY: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r"(?i)\byesterday\b").unwrap());
61
62static REL_TODAY: LazyLock<Regex> =
63    LazyLock::new(|| Regex::new(r"(?i)\btoday\b").unwrap());
64
65static REL_TOMORROW: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"(?i)\btomorrow\b").unwrap());
67
68static REL_N_DAYS_AGO: LazyLock<Regex> =
69    LazyLock::new(|| Regex::new(r"(?i)\b(\d+)\s+days?\s+ago\b").unwrap());
70
71static REL_N_WEEKS_AGO: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new(r"(?i)\b(\d+)\s+weeks?\s+ago\b").unwrap());
73
74static REL_LAST_WEEK: LazyLock<Regex> =
75    LazyLock::new(|| Regex::new(r"(?i)\blast\s+week\b").unwrap());
76
77static REL_LAST_MONTH: LazyLock<Regex> =
78    LazyLock::new(|| Regex::new(r"(?i)\blast\s+month\b").unwrap());
79
80// Layer 4 — quarter / fiscal year
81static QUARTER: LazyLock<Regex> =
82    LazyLock::new(|| Regex::new(r"(?i)\bQ([1-4])\s+(\d{4})\b").unwrap());
83
84static FISCAL_YEAR_LONG: LazyLock<Regex> =
85    LazyLock::new(|| Regex::new(r"(?i)\bFY(\d{4})\b").unwrap());
86
87static FISCAL_YEAR_SHORT: LazyLock<Regex> =
88    LazyLock::new(|| Regex::new(r"(?i)\bFY(\d{2})\b").unwrap());
89
90// Layer 5 — durations
91static DUR_MONTHS: LazyLock<Regex> =
92    LazyLock::new(|| Regex::new(r"(?i)\b(?:for|over)\s+(\d+)\s+months?\b").unwrap());
93
94static DUR_WEEKS: LazyLock<Regex> =
95    LazyLock::new(|| Regex::new(r"(?i)\b(?:for|over)\s+(\d+)\s+weeks?\b").unwrap());
96
97static DUR_DAYS: LazyLock<Regex> =
98    LazyLock::new(|| Regex::new(r"(?i)\b(?:for|over)\s+(\d+)\s+days?\b").unwrap());
99
100// ---------------------------------------------------------------------------
101// Public API
102// ---------------------------------------------------------------------------
103
104/// Parse temporal expressions from `text`, resolving relative dates against
105/// `reference`.  Returns all matches across all 5 layers.
106pub fn parse_temporal(text: &str, reference: DateTime<Utc>) -> Vec<TemporalResult> {
107    let ref_date = reference.date_naive();
108    let mut results = Vec::new();
109
110    layer1_iso(text, &mut results);
111    layer2_written(text, &mut results);
112    layer3_relative(text, ref_date, &mut results);
113    layer4_fiscal(text, &mut results);
114    layer5_duration(text, &mut results);
115
116    results
117}
118
119// ---------------------------------------------------------------------------
120// Layer 1 — ISO-8601
121// ---------------------------------------------------------------------------
122
123fn layer1_iso(text: &str, out: &mut Vec<TemporalResult>) {
124    // Full dates first — we track their byte ranges so the month-only regex
125    // doesn't duplicate them.
126    let mut full_ranges: Vec<(usize, usize)> = Vec::new();
127
128    for cap in ISO_FULL.captures_iter(text) {
129        let m = cap.get(0).unwrap();
130        full_ranges.push((m.start(), m.end()));
131
132        let y: i32 = cap[1].parse().unwrap();
133        let m_val: u32 = cap[2].parse().unwrap();
134        let d: u32 = cap[3].parse().unwrap();
135        if let Some(date) = NaiveDate::from_ymd_opt(y, m_val, d) {
136            out.push(TemporalResult::ExactDate(date));
137        }
138    }
139
140    for cap in ISO_MONTH.captures_iter(text) {
141        let m = cap.get(0).unwrap();
142        // Skip if this span is inside a full-date match.
143        if full_ranges
144            .iter()
145            .any(|&(s, e)| m.start() >= s && m.end() <= e)
146        {
147            continue;
148        }
149        let y: i32 = cap[1].parse().unwrap();
150        let m_val: u32 = cap[2].parse().unwrap();
151        if let Some(date) = NaiveDate::from_ymd_opt(y, m_val, 1) {
152            out.push(TemporalResult::ExactDate(date));
153        }
154    }
155}
156
157// ---------------------------------------------------------------------------
158// Layer 2 — written dates
159// ---------------------------------------------------------------------------
160
161fn layer2_written(text: &str, out: &mut Vec<TemporalResult>) {
162    // "March 11, 2026"
163    for cap in WRITTEN_MDY.captures_iter(text) {
164        let month = parse_month_name(&cap[1]);
165        let day: u32 = cap[2].parse().unwrap();
166        let year: i32 = cap[3].parse().unwrap();
167        if let (Some(m), Some(date)) = (month, None::<NaiveDate>) {
168            let _ = (m, date); // satisfy compiler
169        }
170        if let Some(m) = month {
171            if let Some(date) = NaiveDate::from_ymd_opt(year, m, day) {
172                out.push(TemporalResult::ExactDate(date));
173            }
174        }
175    }
176
177    // "11 March 2026"
178    for cap in WRITTEN_DMY.captures_iter(text) {
179        let day: u32 = cap[1].parse().unwrap();
180        let month = parse_month_name(&cap[2]);
181        let year: i32 = cap[3].parse().unwrap();
182        if let Some(m) = month {
183            if let Some(date) = NaiveDate::from_ymd_opt(year, m, day) {
184                out.push(TemporalResult::ExactDate(date));
185            }
186        }
187    }
188
189    // "Mar 2026" — but skip if already matched by MDY (contains a day).
190    // We use a simple heuristic: check that the match is not a substring of
191    // a longer MDY match by verifying no digit precedes the month name in the
192    // captured region.
193    let mdy_ranges: Vec<(usize, usize)> = WRITTEN_MDY
194        .find_iter(text)
195        .map(|m| (m.start(), m.end()))
196        .collect();
197    let dmy_ranges: Vec<(usize, usize)> = WRITTEN_DMY
198        .find_iter(text)
199        .map(|m| (m.start(), m.end()))
200        .collect();
201
202    for cap in WRITTEN_MY.captures_iter(text) {
203        let m = cap.get(0).unwrap();
204        let overlaps_mdy = mdy_ranges
205            .iter()
206            .any(|&(s, e)| m.start() >= s && m.end() <= e);
207        let overlaps_dmy = dmy_ranges
208            .iter()
209            .any(|&(s, e)| m.start() >= s && m.end() <= e);
210        if overlaps_mdy || overlaps_dmy {
211            continue;
212        }
213        let month = parse_month_name(&cap[1]);
214        let year: i32 = cap[2].parse().unwrap();
215        if let Some(mo) = month {
216            if let Some(date) = NaiveDate::from_ymd_opt(year, mo, 1) {
217                out.push(TemporalResult::ExactDate(date));
218            }
219        }
220    }
221}
222
223fn parse_month_name(s: &str) -> Option<u32> {
224    match s.to_ascii_lowercase().as_str() {
225        "jan" | "january" => Some(1),
226        "feb" | "february" => Some(2),
227        "mar" | "march" => Some(3),
228        "apr" | "april" => Some(4),
229        "may" => Some(5),
230        "jun" | "june" => Some(6),
231        "jul" | "july" => Some(7),
232        "aug" | "august" => Some(8),
233        "sep" | "september" => Some(9),
234        "oct" | "october" => Some(10),
235        "nov" | "november" => Some(11),
236        "dec" | "december" => Some(12),
237        _ => None,
238    }
239}
240
241// ---------------------------------------------------------------------------
242// Layer 3 — relative dates
243// ---------------------------------------------------------------------------
244
245fn layer3_relative(text: &str, ref_date: NaiveDate, out: &mut Vec<TemporalResult>) {
246    if REL_TODAY.is_match(text) {
247        out.push(TemporalResult::RelativeDate {
248            offset_days: 0,
249            resolved: Some(ref_date),
250        });
251    }
252
253    if REL_YESTERDAY.is_match(text) {
254        let d = ref_date - chrono::Duration::days(1);
255        out.push(TemporalResult::RelativeDate {
256            offset_days: -1,
257            resolved: Some(d),
258        });
259    }
260
261    if REL_TOMORROW.is_match(text) {
262        let d = ref_date + chrono::Duration::days(1);
263        out.push(TemporalResult::RelativeDate {
264            offset_days: 1,
265            resolved: Some(d),
266        });
267    }
268
269    for cap in REL_N_DAYS_AGO.captures_iter(text) {
270        let n: i64 = cap[1].parse().unwrap();
271        let d = ref_date - chrono::Duration::days(n);
272        out.push(TemporalResult::RelativeDate {
273            offset_days: -n,
274            resolved: Some(d),
275        });
276    }
277
278    for cap in REL_N_WEEKS_AGO.captures_iter(text) {
279        let n: i64 = cap[1].parse().unwrap();
280        let days = n * 7;
281        let d = ref_date - chrono::Duration::days(days);
282        out.push(TemporalResult::RelativeDate {
283            offset_days: -days,
284            resolved: Some(d),
285        });
286    }
287
288    if REL_LAST_WEEK.is_match(text) {
289        let d = ref_date - chrono::Duration::days(7);
290        out.push(TemporalResult::RelativeDate {
291            offset_days: -7,
292            resolved: Some(d),
293        });
294    }
295
296    if REL_LAST_MONTH.is_match(text) {
297        let d = ref_date - chrono::Duration::days(30);
298        out.push(TemporalResult::RelativeDate {
299            offset_days: -30,
300            resolved: Some(d),
301        });
302    }
303}
304
305// ---------------------------------------------------------------------------
306// Layer 4 — fiscal / quarter
307// ---------------------------------------------------------------------------
308
309fn layer4_fiscal(text: &str, out: &mut Vec<TemporalResult>) {
310    for cap in QUARTER.captures_iter(text) {
311        let q: u32 = cap[1].parse().unwrap();
312        let year: i32 = cap[2].parse().unwrap();
313        let (start_month, end_month) = match q {
314            1 => (1, 3),
315            2 => (4, 6),
316            3 => (7, 9),
317            4 => (10, 12),
318            _ => continue,
319        };
320        if let (Some(start), Some(end)) = (
321            NaiveDate::from_ymd_opt(year, start_month, 1),
322            last_day_of_month(year, end_month),
323        ) {
324            out.push(TemporalResult::DateRange {
325                start,
326                end,
327                label: format!("Q{q} {year}"),
328            });
329        }
330    }
331
332    for cap in FISCAL_YEAR_LONG.captures_iter(text) {
333        let year: i32 = cap[1].parse().unwrap();
334        if let (Some(start), Some(end)) = (
335            NaiveDate::from_ymd_opt(year, 1, 1),
336            NaiveDate::from_ymd_opt(year, 12, 31),
337        ) {
338            out.push(TemporalResult::DateRange {
339                start,
340                end,
341                label: format!("FY{year}"),
342            });
343        }
344    }
345
346    // Short form FY26 -> 2026 (only if not already captured by long form).
347    // We skip if the two-digit number also appeared as part of a 4-digit FY.
348    let long_matches: Vec<String> = FISCAL_YEAR_LONG
349        .captures_iter(text)
350        .map(|c| c[1].to_string())
351        .collect();
352
353    for cap in FISCAL_YEAR_SHORT.captures_iter(text) {
354        let short: &str = &cap[1];
355        let year: i32 = 2000 + short.parse::<i32>().unwrap();
356        let year_str = year.to_string();
357        // Avoid double-counting FY2026 vs FY26
358        if long_matches.contains(&year_str) {
359            // Check if this specific match overlaps with a long-form match
360            // by seeing if the full match text is 4 digits.
361            continue;
362        }
363        if let (Some(start), Some(end)) = (
364            NaiveDate::from_ymd_opt(year, 1, 1),
365            NaiveDate::from_ymd_opt(year, 12, 31),
366        ) {
367            out.push(TemporalResult::DateRange {
368                start,
369                end,
370                label: format!("FY{year}"),
371            });
372        }
373    }
374}
375
376fn last_day_of_month(year: i32, month: u32) -> Option<NaiveDate> {
377    if month == 12 {
378        NaiveDate::from_ymd_opt(year, 12, 31)
379    } else {
380        NaiveDate::from_ymd_opt(year, month + 1, 1).map(|d| d - chrono::Duration::days(1))
381    }
382}
383
384// ---------------------------------------------------------------------------
385// Layer 5 — durations
386// ---------------------------------------------------------------------------
387
388fn layer5_duration(text: &str, out: &mut Vec<TemporalResult>) {
389    for cap in DUR_MONTHS.captures_iter(text) {
390        let n: u32 = cap[1].parse().unwrap();
391        let full = cap.get(0).unwrap().as_str();
392        out.push(TemporalResult::Duration {
393            months: n,
394            days: 0,
395            label: full.to_string(),
396        });
397    }
398
399    for cap in DUR_WEEKS.captures_iter(text) {
400        let n: u32 = cap[1].parse().unwrap();
401        let full = cap.get(0).unwrap().as_str();
402        out.push(TemporalResult::Duration {
403            months: 0,
404            days: n * 7,
405            label: full.to_string(),
406        });
407    }
408
409    for cap in DUR_DAYS.captures_iter(text) {
410        let n: u32 = cap[1].parse().unwrap();
411        let full = cap.get(0).unwrap().as_str();
412        out.push(TemporalResult::Duration {
413            months: 0,
414            days: n,
415            label: full.to_string(),
416        });
417    }
418}
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423
424    fn utc(y: i32, m: u32, d: u32) -> DateTime<Utc> {
425        NaiveDate::from_ymd_opt(y, m, d)
426            .unwrap()
427            .and_hms_opt(0, 0, 0)
428            .unwrap()
429            .and_utc()
430    }
431
432    #[test]
433    fn iso_full_date() {
434        let r = parse_temporal("meeting on 2026-03-11", utc(2026, 1, 1));
435        assert_eq!(r.len(), 1);
436        assert_eq!(
437            r[0],
438            TemporalResult::ExactDate(NaiveDate::from_ymd_opt(2026, 3, 11).unwrap())
439        );
440    }
441
442    #[test]
443    fn iso_month_only() {
444        let r = parse_temporal("report for 2026-03", utc(2026, 1, 1));
445        assert_eq!(r.len(), 1);
446        assert_eq!(
447            r[0],
448            TemporalResult::ExactDate(NaiveDate::from_ymd_opt(2026, 3, 1).unwrap())
449        );
450    }
451
452    #[test]
453    fn relative_yesterday() {
454        let r = parse_temporal("as of yesterday", utc(2026, 3, 11));
455        assert_eq!(r.len(), 1);
456        assert_eq!(
457            r[0],
458            TemporalResult::RelativeDate {
459                offset_days: -1,
460                resolved: Some(NaiveDate::from_ymd_opt(2026, 3, 10).unwrap()),
461            }
462        );
463    }
464}