Skip to main content

anno/
temporal.rs

1//! Temporal entity tracking, parsing, and diachronic NER.
2//!
3//! # The Problem: Entities Change Over Time
4//!
5//! Traditional NER treats entities as static facts, but the world changes:
6//!
7//! ```text
8//! ┌────────────────────────────────────────────────────────────────────────────┐
9//! │                     ENTITIES ARE NOT STATIC                                │
10//! ├────────────────────────────────────────────────────────────────────────────┤
11//! │                                                                            │
12//! │  "CEO of Microsoft"                                                        │
13//! │  ─────────────────                                                         │
14//! │                                                                            │
15//! │  2000:        Steve Ballmer                                                │
16//! │  2014-today:  Satya Nadella                                                │
17//! │                                                                            │
18//! │  "Capital of Germany"                                                      │
19//! │  ────────────────────                                                      │
20//! │                                                                            │
21//! │  1949-1990:   Bonn (West Germany)                                          │
22//! │  1990-today:  Berlin (unified Germany)                                     │
23//! │                                                                            │
24//! │  "USSR"                                                                    │
25//! │  ─────                                                                     │
26//! │                                                                            │
27//! │  1922-1991:   Existed as a country                                         │
28//! │  1991-today:  Historical reference only                                    │
29//! │                                                                            │
30//! └────────────────────────────────────────────────────────────────────────────┘
31//! ```
32//!
33//! # Temporal Entity Operations
34//!
35//! This module provides:
36//!
37//! 1. **Point-in-time queries**: Which entities were valid at timestamp T?
38//! 2. **Entity evolution**: How did entity E change over time?
39//! 3. **Temporal alignment**: Link entities across documents with different dates
40//! 4. **Version tracking**: Track multiple values for the same slot over time
41//!
42//! # Example
43//!
44//! ```rust
45//! use anno::temporal::{TemporalEntityTracker, TemporalQuery, EntityTimeline};
46//! use anno::{Entity, EntityType};
47//! use chrono::{TimeZone, Utc};
48//!
49//! let mut tracker = TemporalEntityTracker::new();
50//!
51//! // Add entities with temporal validity
52//! let mut ballmer = Entity::new("Steve Ballmer", EntityType::Person, 0, 13, 0.9);
53//! ballmer.set_valid_from(Utc.with_ymd_and_hms(2000, 1, 13, 0, 0, 0).unwrap());
54//! ballmer.set_valid_until(Utc.with_ymd_and_hms(2014, 2, 4, 0, 0, 0).unwrap());
55//! ballmer.normalized = Some("CEO_OF_MICROSOFT".into());
56//! tracker.add_entity(ballmer);
57//!
58//! let mut nadella = Entity::new("Satya Nadella", EntityType::Person, 0, 13, 0.95);
59//! nadella.set_valid_from(Utc.with_ymd_and_hms(2014, 2, 4, 0, 0, 0).unwrap());
60//! nadella.normalized = Some("CEO_OF_MICROSOFT".into());
61//! tracker.add_entity(nadella);
62//!
63//! // Query: Who was CEO in 2010?
64//! let query_2010 = Utc.with_ymd_and_hms(2010, 6, 1, 0, 0, 0).unwrap();
65//! let result = tracker.query_at(&query_2010);
66//! assert!(result.iter().any(|e| e.text.contains("Ballmer")));
67//!
68//! // Query: Who was CEO in 2020?
69//! let query_2020 = Utc.with_ymd_and_hms(2020, 6, 1, 0, 0, 0).unwrap();
70//! let result = tracker.query_at(&query_2020);
71//! assert!(result.iter().any(|e| e.text.contains("Nadella")));
72//! ```
73//!
74//! # Cultural Assumptions and Limitations
75//!
76//! **This module's default implementation assumes Western/Gregorian temporal concepts.**
77//!
78//! The trait-based design allows extending to other temporal ontologies, but
79//! users should be aware of these built-in assumptions:
80//!
81//! | Assumption | Western View | Alternative Views |
82//! |------------|--------------|-------------------|
83//! | Time structure | Linear, unidirectional | Cyclical (Hindu yugas, Mayan), spiral |
84//! | Reference point | Fixed (CE/BCE, Unix epoch) | Event-based ("when the rains came") |
85//! | Calendar | Gregorian (solar) | Lunar (Islamic), lunisolar (Hebrew, Chinese) |
86//! | Granularity | Clock-based (hours, minutes) | Event-based, seasonal, relational |
87//! | Precision | Valued and expected | May be culturally inappropriate |
88//!
89//! ## Non-Western Temporal Concepts
90//!
91//! ### African Temporal Philosophies
92//!
93//! Many African cultures conceptualize time differently from the Western linear model:
94//!
95//! - **Event-based time**: Time is marked by significant events, not abstract units.
96//!   "After the harvest" or "when the chief visited" may be more meaningful than dates.
97//! - **Relational time**: Time understood through social relationships and activities
98//!   rather than clock positions.
99//! - **Cyclical/seasonal**: Agricultural and ceremonial cycles structure time.
100//! - **Ubuntu temporality**: Time as fundamentally social and communal.
101//!
102//! The Swahili concept of "sasa" (present) and "zamani" (past that shapes present)
103//! differs from Western past/present/future trichotomy.
104//!
105//! ### East Asian Calendars
106//!
107//! - **Chinese calendar**: Lunisolar with 60-year cycles (干支), zodiac years
108//! - **Japanese eras**: Named periods tied to imperial reigns (令和, Reiwa)
109//! - **Korean**: Dangun calendar alongside Gregorian
110//!
111//! ### South Asian Concepts
112//!
113//! - **Hindu yugas**: Cosmic time cycles spanning millions of years
114//! - **Tithi**: Lunar days used for religious observances
115//! - **Panchang**: Five-limbed calendar system
116//!
117//! ### Islamic Calendar
118//!
119//! - **Hijri calendar**: Purely lunar, 12 months of 29-30 days
120//! - Religious dates drift through Gregorian seasons
121//!
122//! ### Indigenous Temporal Systems
123//!
124//! Many indigenous cultures use:
125//! - Seasonal markers ("when salmon run")
126//! - Astronomical events ("after the first frost")
127//! - Generational time ("in my grandmother's time")
128//! - Dreamtime (Australian Aboriginal non-linear temporality)
129//!
130//! ## Extending for Non-Western Time
131//!
132//! Implement the `TemporalOntology` trait to add support for different
133//! temporal systems. The trait design intentionally avoids assuming:
134//! - Linear time
135//! - Fixed reference points
136//! - Gregorian calendar
137//! - Clock-based precision
138//!
139//! See the trait documentation for examples.
140//!
141//! # Research Background
142//!
143//! Based on:
144//! - Campos et al. (2014): "Survey of Temporal Information Extraction Research"
145//! - Kanhabua & Nørvåg (2012): "A Survey of Time-aware Information Access"
146//! - Berberich et al. (2010): "Timetravel: Temporal Web Search"
147//! - Mbiti, John S. (1969): "African Religions and Philosophy" (African time concepts)
148//! - Adjaye, Joseph K. (1994): "Time in the Black Experience"
149
150use crate::{Entity, EntityType};
151use chrono::{DateTime, Duration, NaiveDate, TimeZone, Utc};
152use serde::{Deserialize, Serialize};
153use std::collections::HashMap;
154
155// =============================================================================
156// Temporal Ontology Traits
157// =============================================================================
158
159/// A temporal reference within a specific ontology.
160///
161/// This represents "a point or region in time" according to some temporal system.
162/// It intentionally does NOT assume:
163/// - Linear time (can represent cyclical or event-based time)
164/// - Fixed granularity (can be fuzzy, range, or precise)
165/// - Gregorian calendar (can be any calendar system)
166///
167/// # Design Philosophy
168///
169/// The trait uses associated types rather than concrete DateTime to allow:
170/// - Event-based references ("after the harvest")
171/// - Cyclical references ("Year of the Dragon")
172/// - Fuzzy references ("recently")
173/// - Composite references ("the third Monday of Ramadan")
174pub trait TemporalReference: Clone + std::fmt::Debug {
175    /// Can this reference be grounded to UTC?
176    ///
177    /// Returns `false` for:
178    /// - Event-based time ("when the war ended") without known dates
179    /// - Recurring patterns ("every Monday") without a specific instance
180    /// - Purely relational time ("in my grandfather's time")
181    fn is_groundable(&self) -> bool;
182
183    /// Attempt to convert to a UTC range.
184    ///
185    /// Returns `None` if this reference cannot be mapped to Gregorian time.
186    /// Even groundable references may have wide ranges (e.g., "the 90s" → 10 years).
187    fn to_utc_range(&self) -> Option<(DateTime<Utc>, DateTime<Utc>)>;
188
189    /// Get the original text of this reference.
190    fn source_text(&self) -> &str;
191
192    /// Confidence in the interpretation (0.0 to 1.0).
193    ///
194    /// Lower for ambiguous expressions like "soon" or "long ago".
195    fn confidence(&self) -> f64;
196}
197
198/// A temporal ontology defines how time is conceptualized and parsed.
199///
200/// Different cultures and contexts have fundamentally different notions of time.
201/// This trait allows implementing parsers and reasoners for any temporal system.
202///
203/// # Built-in Implementations
204///
205/// - [`GregorianOntology`]: Western/ISO 8601 time (default)
206///
207/// # Example: Custom Ontology
208///
209/// ```rust,ignore
210/// use anno::temporal::{TemporalOntology, TemporalReference};
211///
212/// /// Swahili temporal expressions
213/// struct SwahiliOntology;
214///
215/// #[derive(Clone, Debug)]
216/// enum SwahiliTime {
217///     /// "kesho" - tomorrow
218///     Kesho,
219///     /// "jana" - yesterday
220///     Jana,
221///     /// "sasa" - now/present (but conceptually broader than "now")
222///     Sasa,
223///     /// "zamani" - the past that shapes the present
224///     Zamani,
225///     /// Event-based: "wakati wa mavuno" - harvest time
226///     SeasonalEvent(String),
227/// }
228///
229/// impl TemporalOntology for SwahiliOntology {
230///     type Reference = SwahiliTime;
231///     type Error = String;
232///
233///     fn parse(&self, text: &str, context: Option<&TemporalContext>) -> Result<Self::Reference, Self::Error> {
234///         match text.to_lowercase().as_str() {
235///             "kesho" => Ok(SwahiliTime::Kesho),
236///             "jana" => Ok(SwahiliTime::Jana),
237///             "sasa" => Ok(SwahiliTime::Sasa),
238///             "zamani" => Ok(SwahiliTime::Zamani),
239///             _ if text.contains("mavuno") => Ok(SwahiliTime::SeasonalEvent("harvest".into())),
240///             _ => Err(format!("Unknown temporal expression: {}", text)),
241///         }
242///     }
243///
244///     fn supports_linear_time(&self) -> bool {
245///         // Swahili time concepts are not strictly linear
246///         false
247///     }
248/// }
249/// ```
250pub trait TemporalOntology {
251    /// The type of temporal reference this ontology produces.
252    type Reference: TemporalReference;
253
254    /// Error type for parsing failures.
255    type Error: std::fmt::Debug;
256
257    /// Parse a text expression into a temporal reference.
258    ///
259    /// The `context` parameter provides:
260    /// - Document date (for relative expressions)
261    /// - Geographic location (for local calendars)
262    /// - Previous references (for anaphora like "the next day")
263    fn parse(
264        &self,
265        text: &str,
266        context: Option<&TemporalContext>,
267    ) -> Result<Self::Reference, Self::Error>;
268
269    /// Does this ontology assume linear, unidirectional time?
270    ///
271    /// Returns `false` for cyclical (Hindu yugas), event-based (African),
272    /// or non-linear (Aboriginal Dreamtime) temporal systems.
273    fn supports_linear_time(&self) -> bool {
274        true
275    }
276
277    /// Does this ontology support conversion to UTC?
278    ///
279    /// Returns `false` for purely event-based or mythological time systems.
280    fn supports_utc_conversion(&self) -> bool {
281        true
282    }
283
284    /// Get the name of this temporal system for documentation.
285    fn name(&self) -> &str;
286
287    /// Get supported language codes (ISO 639-1).
288    fn supported_languages(&self) -> &[&str] {
289        &["en"]
290    }
291}
292
293/// Context for temporal parsing.
294///
295/// Provides information needed to resolve relative and context-dependent
296/// temporal expressions.
297#[derive(Debug, Clone, Default)]
298pub struct TemporalContext {
299    /// The publication/utterance date of the document.
300    ///
301    /// Used to resolve "yesterday", "next week", etc.
302    pub document_date: Option<DateTime<Utc>>,
303
304    /// Geographic location for local calendar conversion.
305    ///
306    /// Some calendars (Islamic, Hebrew) depend on location for precise dates.
307    pub location: Option<String>,
308
309    /// Previously mentioned temporal references (for anaphora resolution).
310    ///
311    /// "On Monday... the next day..." → Tuesday
312    pub previous_references: Vec<String>,
313
314    /// The language of the text being parsed.
315    pub language: Option<String>,
316
317    /// Cultural context hints.
318    ///
319    /// E.g., "academic" (fall semester = Sep-Dec), "fiscal" (Q1 = different dates)
320    pub domain: Option<String>,
321}
322
323impl TemporalContext {
324    /// Create a context with just a document date.
325    #[must_use]
326    pub fn with_document_date(date: DateTime<Utc>) -> Self {
327        Self {
328            document_date: Some(date),
329            ..Default::default()
330        }
331    }
332
333    /// Create a context with document date and language.
334    #[must_use]
335    pub fn with_date_and_language(date: DateTime<Utc>, language: impl Into<String>) -> Self {
336        Self {
337            document_date: Some(date),
338            language: Some(language.into()),
339            ..Default::default()
340        }
341    }
342}
343
344// =============================================================================
345// Gregorian Ontology (Default Western Implementation)
346// =============================================================================
347
348/// Western/Gregorian temporal ontology.
349///
350/// This is the default implementation, handling:
351/// - ISO 8601 dates and times
352/// - Common English temporal expressions
353/// - Relative references (yesterday, next week)
354/// - Fuzzy references (recently, soon)
355///
356/// **Limitations**: This implementation embeds Western assumptions about time.
357/// See the module documentation for non-Western alternatives.
358#[derive(Debug, Clone, Default)]
359pub struct GregorianOntology;
360
361impl TemporalOntology for GregorianOntology {
362    type Reference = GregorianReference;
363    type Error = String;
364
365    fn parse(
366        &self,
367        text: &str,
368        context: Option<&TemporalContext>,
369    ) -> Result<Self::Reference, Self::Error> {
370        // Delegate to the existing parse_temporal_expression function
371        let abstract_expr = parse_temporal_expression(text);
372
373        // If we have context, try to ground relative expressions
374        let grounded = if let Some(ctx) = context {
375            if let Some(doc_date) = ctx.document_date {
376                abstract_expr.ground(&doc_date)
377            } else {
378                Some(abstract_expr)
379            }
380        } else {
381            Some(abstract_expr)
382        };
383
384        grounded
385            .map(|expr| GregorianReference {
386                text: text.to_string(),
387                expression: expr,
388            })
389            .ok_or_else(|| format!("Could not parse temporal expression: {}", text))
390    }
391
392    fn name(&self) -> &str {
393        "Gregorian (Western)"
394    }
395
396    fn supported_languages(&self) -> &[&str] {
397        &["en", "de", "fr", "es", "it", "pt", "nl"]
398    }
399}
400
401/// A temporal reference in the Gregorian system.
402#[derive(Debug, Clone)]
403pub struct GregorianReference {
404    /// Original text
405    pub text: String,
406    /// Parsed abstract expression
407    pub expression: AbstractTemporalExpression,
408}
409
410impl TemporalReference for GregorianReference {
411    fn is_groundable(&self) -> bool {
412        self.expression.granularity.is_groundable() || self.expression.grounded_range.is_some()
413    }
414
415    fn to_utc_range(&self) -> Option<(DateTime<Utc>, DateTime<Utc>)> {
416        self.expression.grounded_range
417    }
418
419    fn source_text(&self) -> &str {
420        &self.text
421    }
422
423    fn confidence(&self) -> f64 {
424        self.expression.grounding_confidence
425    }
426}
427
428// =============================================================================
429// Calendar System Traits
430// =============================================================================
431
432/// A calendar system for date representation.
433///
434/// Different from [`TemporalOntology`] in that calendars are specifically
435/// about date representation, while ontologies are about temporal concepts.
436///
437/// # Built-in Implementations
438///
439/// - [`GregorianCalendar`]: Standard Western calendar
440///
441/// # Example: Islamic Calendar
442///
443/// ```rust,ignore
444/// struct HijriCalendar;
445///
446/// impl CalendarSystem for HijriCalendar {
447///     type Date = HijriDate;
448///
449///     fn to_gregorian(&self, date: &Self::Date) -> Option<NaiveDate> {
450///         // Islamic calendar is purely lunar (354 or 355 days/year)
451///         // Conversion requires astronomical calculation or lookup tables
452///         todo!()
453///     }
454///
455///     fn from_gregorian(&self, date: &NaiveDate) -> Option<Self::Date> {
456///         todo!()
457///     }
458/// }
459/// ```
460pub trait CalendarSystem {
461    /// The date type for this calendar.
462    type Date: Clone + std::fmt::Debug;
463
464    /// Convert to Gregorian date.
465    fn to_gregorian(&self, date: &Self::Date) -> Option<NaiveDate>;
466
467    /// Convert from Gregorian date.
468    #[allow(clippy::wrong_self_convention)]
469    fn from_gregorian(&self, date: &NaiveDate) -> Option<Self::Date>;
470
471    /// Get the calendar name.
472    fn name(&self) -> &str;
473
474    /// Is this calendar lunar, solar, or lunisolar?
475    fn calendar_type(&self) -> CalendarType {
476        CalendarType::Solar
477    }
478}
479
480/// Type of calendar system.
481#[derive(Debug, Clone, Copy, PartialEq, Eq)]
482pub enum CalendarType {
483    /// Solar calendar (e.g., Gregorian)
484    Solar,
485    /// Lunar calendar (e.g., Islamic Hijri)
486    Lunar,
487    /// Lunisolar calendar (e.g., Hebrew, Chinese)
488    Lunisolar,
489    /// Other (e.g., Mayan long count)
490    Other,
491}
492
493/// Standard Gregorian calendar implementation.
494#[derive(Debug, Clone, Default)]
495pub struct GregorianCalendar;
496
497impl CalendarSystem for GregorianCalendar {
498    type Date = NaiveDate;
499
500    fn to_gregorian(&self, date: &Self::Date) -> Option<NaiveDate> {
501        Some(*date)
502    }
503
504    fn from_gregorian(&self, date: &NaiveDate) -> Option<Self::Date> {
505        Some(*date)
506    }
507
508    fn name(&self) -> &str {
509        "Gregorian"
510    }
511}
512
513// =============================================================================
514// Core Types
515// =============================================================================
516
517/// A temporal scope for queries.
518#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
519pub enum TemporalScope {
520    /// Query at a specific point in time
521    PointInTime(DateTime<Utc>),
522    /// Query across a time range
523    Range {
524        /// Start of the time range (inclusive)
525        start: DateTime<Utc>,
526        /// End of the time range (exclusive)
527        end: DateTime<Utc>,
528    },
529    /// Query for entities valid at any point
530    AnyTime,
531    /// Query for currently valid entities (no end date)
532    Current,
533}
534
535impl TemporalScope {
536    /// Check if an entity is valid within this scope.
537    #[must_use]
538    pub fn contains(&self, entity: &Entity) -> bool {
539        match self {
540            Self::PointInTime(ts) => entity.valid_at(ts),
541            Self::Range { start, end } => {
542                // Entity overlaps with range if:
543                // entity_start <= end AND (entity_end is None OR entity_end >= start)
544                let entity_starts_before_end = match entity.valid_from.as_ref() {
545                    None => true,
546                    Some(ef) => ef <= end,
547                };
548                let entity_ends_after_start = match entity.valid_until.as_ref() {
549                    None => true,
550                    Some(eu) => eu >= start,
551                };
552                entity_starts_before_end && entity_ends_after_start
553            }
554            Self::AnyTime => true,
555            Self::Current => entity.valid_until.is_none(),
556        }
557    }
558}
559
560/// A temporal query for entity lookup.
561#[derive(Debug, Clone)]
562pub struct TemporalQuery {
563    /// The temporal scope
564    pub scope: TemporalScope,
565    /// Optional entity type filter
566    pub entity_type: Option<EntityType>,
567    /// Optional slot/role filter (e.g., "CEO_OF_MICROSOFT")
568    pub slot: Option<String>,
569    /// Include superseded (past) values
570    pub include_historical: bool,
571}
572
573impl TemporalQuery {
574    /// Create a point-in-time query.
575    #[must_use]
576    pub fn at(timestamp: DateTime<Utc>) -> Self {
577        Self {
578            scope: TemporalScope::PointInTime(timestamp),
579            entity_type: None,
580            slot: None,
581            include_historical: false,
582        }
583    }
584
585    /// Create a range query.
586    #[must_use]
587    pub fn between(start: DateTime<Utc>, end: DateTime<Utc>) -> Self {
588        Self {
589            scope: TemporalScope::Range { start, end },
590            entity_type: None,
591            slot: None,
592            include_historical: true,
593        }
594    }
595
596    /// Create a query for current values.
597    #[must_use]
598    pub fn current() -> Self {
599        Self {
600            scope: TemporalScope::Current,
601            entity_type: None,
602            slot: None,
603            include_historical: false,
604        }
605    }
606
607    /// Filter by entity type.
608    #[must_use]
609    pub fn with_type(mut self, entity_type: EntityType) -> Self {
610        self.entity_type = Some(entity_type);
611        self
612    }
613
614    /// Filter by slot/role.
615    #[must_use]
616    pub fn with_slot(mut self, slot: impl Into<String>) -> Self {
617        self.slot = Some(slot.into());
618        self
619    }
620
621    /// Include historical values.
622    #[must_use]
623    pub fn include_historical(mut self) -> Self {
624        self.include_historical = true;
625        self
626    }
627}
628
629// =============================================================================
630// Entity Timeline
631// =============================================================================
632
633/// Timeline of values for a single slot/role over time.
634///
635/// Example: "CEO of Microsoft" slot has values:
636/// - 2000-2014: Steve Ballmer
637/// - 2014-present: Satya Nadella
638#[derive(Debug, Clone, Serialize, Deserialize)]
639pub struct EntityTimeline {
640    /// The slot/role this timeline tracks
641    pub slot: String,
642    /// Values over time, sorted by start date
643    pub versions: Vec<TimelineEntry>,
644}
645
646/// A single entry in an entity timeline.
647#[derive(Debug, Clone, Serialize, Deserialize)]
648pub struct TimelineEntry {
649    /// The entity value for this period
650    pub entity: Entity,
651    /// Optional source/provenance
652    pub source: Option<String>,
653    /// Whether this was inferred vs. explicitly stated
654    pub inferred: bool,
655}
656
657impl EntityTimeline {
658    /// Create a new timeline for a slot.
659    #[must_use]
660    pub fn new(slot: impl Into<String>) -> Self {
661        Self {
662            slot: slot.into(),
663            versions: Vec::new(),
664        }
665    }
666
667    /// Add an entity to the timeline.
668    pub fn add(&mut self, entity: Entity, source: Option<String>) {
669        self.versions.push(TimelineEntry {
670            entity,
671            source,
672            inferred: false,
673        });
674        // Sort by valid_from (None goes first as "unknown/always")
675        self.versions
676            .sort_by(|a, b| match (&a.entity.valid_from, &b.entity.valid_from) {
677                (None, None) => std::cmp::Ordering::Equal,
678                (None, Some(_)) => std::cmp::Ordering::Less,
679                (Some(_), None) => std::cmp::Ordering::Greater,
680                (Some(a_from), Some(b_from)) => a_from.cmp(b_from),
681            });
682    }
683
684    /// Get the value at a specific point in time.
685    #[must_use]
686    pub fn value_at(&self, timestamp: &DateTime<Utc>) -> Option<&Entity> {
687        self.versions
688            .iter()
689            .rfind(|v| v.entity.valid_at(timestamp))
690            .map(|v| &v.entity)
691    }
692
693    /// Get the current value (no end date).
694    #[must_use]
695    pub fn current(&self) -> Option<&Entity> {
696        self.versions
697            .iter()
698            .rfind(|v| v.entity.valid_until.is_none())
699            .map(|v| &v.entity)
700    }
701
702    /// Get all historical values.
703    #[must_use]
704    pub fn history(&self) -> Vec<&Entity> {
705        self.versions.iter().map(|v| &v.entity).collect()
706    }
707
708    /// Check if there are gaps in the timeline.
709    #[must_use]
710    pub fn has_gaps(&self) -> bool {
711        if self.versions.len() < 2 {
712            return false;
713        }
714
715        for i in 0..self.versions.len() - 1 {
716            let current = &self.versions[i];
717            let next = &self.versions[i + 1];
718
719            // If current has an end and next has a start, check for gap
720            if let (Some(end), Some(start)) = (&current.entity.valid_until, &next.entity.valid_from)
721            {
722                if end < start {
723                    return true;
724                }
725            }
726        }
727        false
728    }
729
730    /// Check if there are overlapping values.
731    #[must_use]
732    pub fn has_overlaps(&self) -> bool {
733        if self.versions.len() < 2 {
734            return false;
735        }
736
737        for i in 0..self.versions.len() - 1 {
738            let current = &self.versions[i];
739            let next = &self.versions[i + 1];
740
741            // Overlap if current's end > next's start (or current has no end)
742            if let Some(next_start) = &next.entity.valid_from {
743                if current.entity.valid_until.is_none() {
744                    return true; // Current is still valid when next starts
745                }
746                if let Some(curr_end) = &current.entity.valid_until {
747                    if curr_end > next_start {
748                        return true;
749                    }
750                }
751            }
752        }
753        false
754    }
755}
756
757// =============================================================================
758// Temporal Entity Tracker
759// =============================================================================
760
761/// Tracks entities over time with temporal validity.
762///
763/// Provides point-in-time queries and evolution tracking.
764#[derive(Debug, Clone, Default)]
765pub struct TemporalEntityTracker {
766    /// All tracked entities
767    entities: Vec<Entity>,
768    /// Timelines by slot/role
769    timelines: HashMap<String, EntityTimeline>,
770}
771
772impl TemporalEntityTracker {
773    /// Create a new tracker.
774    #[must_use]
775    pub fn new() -> Self {
776        Self::default()
777    }
778
779    /// Add an entity to track.
780    pub fn add_entity(&mut self, entity: Entity) {
781        // If entity has a normalized slot, add to timeline
782        if let Some(ref slot) = entity.normalized {
783            let timeline = self
784                .timelines
785                .entry(slot.clone())
786                .or_insert_with(|| EntityTimeline::new(slot));
787            timeline.add(entity.clone(), None);
788        }
789
790        self.entities.push(entity);
791    }
792
793    /// Add an entity with explicit slot.
794    pub fn add_entity_with_slot(&mut self, entity: Entity, slot: impl Into<String>) {
795        let slot = slot.into();
796        let timeline = self
797            .timelines
798            .entry(slot.clone())
799            .or_insert_with(|| EntityTimeline::new(&slot));
800        timeline.add(entity.clone(), None);
801
802        self.entities.push(entity);
803    }
804
805    /// Query entities valid at a specific timestamp.
806    #[must_use]
807    pub fn query_at(&self, timestamp: &DateTime<Utc>) -> Vec<&Entity> {
808        self.entities
809            .iter()
810            .filter(|e| e.valid_at(timestamp))
811            .collect()
812    }
813
814    /// Execute a temporal query.
815    #[must_use]
816    pub fn query(&self, query: &TemporalQuery) -> Vec<&Entity> {
817        self.entities
818            .iter()
819            .filter(|e| {
820                // Check temporal scope
821                if !query.scope.contains(e) {
822                    return false;
823                }
824
825                // Check entity type
826                if let Some(ref et) = query.entity_type {
827                    if &e.entity_type != et {
828                        return false;
829                    }
830                }
831
832                // Check slot
833                if let Some(ref slot) = query.slot {
834                    if e.normalized.as_ref() != Some(slot) {
835                        return false;
836                    }
837                }
838
839                true
840            })
841            .collect()
842    }
843
844    /// Get the timeline for a specific slot.
845    #[must_use]
846    pub fn timeline(&self, slot: &str) -> Option<&EntityTimeline> {
847        self.timelines.get(slot)
848    }
849
850    /// Get all known slots.
851    #[must_use]
852    pub fn slots(&self) -> Vec<&str> {
853        self.timelines.keys().map(|s| s.as_str()).collect()
854    }
855
856    /// Get entities that changed within a time range.
857    #[must_use]
858    pub fn changed_between(&self, start: &DateTime<Utc>, end: &DateTime<Utc>) -> Vec<&Entity> {
859        self.entities
860            .iter()
861            .filter(|e| {
862                // Entity changed if its valid_from or valid_until is within range
863                let started_in_range = e
864                    .valid_from
865                    .as_ref()
866                    .is_some_and(|vf| vf >= start && vf <= end);
867                let ended_in_range = e
868                    .valid_until
869                    .as_ref()
870                    .is_some_and(|vu| vu >= start && vu <= end);
871                started_in_range || ended_in_range
872            })
873            .collect()
874    }
875
876    /// Get count of temporal vs atemporal entities.
877    #[must_use]
878    pub fn temporal_stats(&self) -> TemporalStats {
879        let mut stats = TemporalStats::default();
880
881        for entity in &self.entities {
882            stats.total += 1;
883            if entity.is_temporal() {
884                stats.temporal += 1;
885                if entity.valid_until.is_none() {
886                    stats.currently_valid += 1;
887                } else {
888                    stats.historical += 1;
889                }
890            } else {
891                stats.atemporal += 1;
892            }
893        }
894
895        stats
896    }
897}
898
899/// Statistics about temporal entities.
900#[derive(Debug, Clone, Default, Serialize, Deserialize)]
901pub struct TemporalStats {
902    /// Total entities
903    pub total: usize,
904    /// Entities with temporal bounds
905    pub temporal: usize,
906    /// Entities without temporal bounds (timeless facts)
907    pub atemporal: usize,
908    /// Temporal entities that are currently valid
909    pub currently_valid: usize,
910    /// Temporal entities that have ended
911    pub historical: usize,
912}
913
914// =============================================================================
915// Temporal Alignment
916// =============================================================================
917
918/// Aligns entities across documents with different publication dates.
919///
920/// When processing news from different dates, the same role might have
921/// different values depending on when the document was written.
922#[derive(Debug, Clone)]
923pub struct TemporalAligner {
924    /// Document timestamp to use as reference
925    pub document_date: Option<DateTime<Utc>>,
926    /// Whether to infer validity from document date
927    pub infer_from_document_date: bool,
928    /// Default validity duration for inferred entities
929    pub default_duration: Option<Duration>,
930}
931
932impl Default for TemporalAligner {
933    fn default() -> Self {
934        Self {
935            document_date: None,
936            infer_from_document_date: true,
937            default_duration: None,
938        }
939    }
940}
941
942impl TemporalAligner {
943    /// Create a new aligner for a specific document date.
944    #[must_use]
945    pub fn for_document(date: DateTime<Utc>) -> Self {
946        Self {
947            document_date: Some(date),
948            infer_from_document_date: true,
949            default_duration: None,
950        }
951    }
952
953    /// Annotate an entity with temporal information based on document date.
954    ///
955    /// If the entity doesn't have temporal bounds, this can infer them
956    /// from the document date.
957    pub fn annotate(&self, entity: &mut Entity) {
958        if !self.infer_from_document_date {
959            return;
960        }
961
962        // Don't override existing temporal bounds
963        if entity.is_temporal() {
964            return;
965        }
966
967        // If we have a document date, use it to infer validity
968        if let Some(doc_date) = &self.document_date {
969            // For "current state" assertions (e.g., "X is CEO"),
970            // assume valid from document date with unknown end
971            entity.valid_from = Some(*doc_date);
972
973            // If we have a default duration, set end date too
974            if let Some(duration) = &self.default_duration {
975                entity.valid_until = Some(*doc_date + *duration);
976            }
977        }
978    }
979
980    /// Align multiple entities from different document dates.
981    ///
982    /// Returns entities grouped by their inferred "slot" (if any).
983    pub fn align(&self, entities: Vec<(Entity, DateTime<Utc>)>) -> TemporalEntityTracker {
984        let mut tracker = TemporalEntityTracker::new();
985
986        for (mut entity, doc_date) in entities {
987            // Create a temporary aligner for this document
988            let aligner = Self::for_document(doc_date);
989            aligner.annotate(&mut entity);
990            tracker.add_entity(entity);
991        }
992
993        tracker
994    }
995}
996
997// =============================================================================
998// Temporal Relation Types
999// =============================================================================
1000
1001/// Temporal relations between events/entities.
1002///
1003/// Based on Allen's interval algebra.
1004#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1005pub enum TemporalRelation {
1006    /// A is completely before B
1007    Before,
1008    /// A meets B (end of A = start of B)
1009    Meets,
1010    /// A overlaps with start of B
1011    Overlaps,
1012    /// A starts when B starts but ends before
1013    Starts,
1014    /// A is completely during B
1015    During,
1016    /// A ends when B ends but starts after
1017    Finishes,
1018    /// A is identical to B
1019    Equal,
1020    // Inverses
1021    /// B is completely before A
1022    After,
1023    /// B meets A
1024    MetBy,
1025    /// B overlaps with end of A
1026    OverlappedBy,
1027    /// B starts when A starts
1028    StartedBy,
1029    /// B is during A
1030    Contains,
1031    /// B finishes when A finishes
1032    FinishedBy,
1033}
1034
1035impl TemporalRelation {
1036    /// Compute the temporal relation between two entities.
1037    #[must_use]
1038    pub fn between(a: &Entity, b: &Entity) -> Option<Self> {
1039        // Clone to owned values for consistent comparison
1040        let a_start = *a.valid_from.as_ref()?;
1041        let b_start = *b.valid_from.as_ref()?;
1042
1043        // If either has no end, treat as ongoing
1044        let a_end = a.valid_until.unwrap_or_else(Utc::now);
1045        let b_end = b.valid_until.unwrap_or_else(Utc::now);
1046
1047        // Allen's interval algebra relations
1048        // All comparisons now on owned DateTime<Utc> values
1049        if a_end < b_start {
1050            Some(Self::Before)
1051        } else if a_end == b_start {
1052            Some(Self::Meets)
1053        } else if a_start < b_start && a_end > b_start && a_end < b_end {
1054            Some(Self::Overlaps)
1055        } else if a_start == b_start && a_end < b_end {
1056            Some(Self::Starts)
1057        } else if a_start > b_start && a_end < b_end {
1058            Some(Self::During)
1059        } else if a_start > b_start && a_end == b_end {
1060            Some(Self::Finishes)
1061        } else if a_start == b_start && a_end == b_end {
1062            Some(Self::Equal)
1063        } else if a_start > b_end {
1064            Some(Self::After)
1065        } else if a_start == b_end {
1066            Some(Self::MetBy)
1067        } else if b_start < a_start && b_end > a_start && b_end < a_end {
1068            Some(Self::OverlappedBy)
1069        } else if b_start == a_start && b_end > a_end {
1070            Some(Self::StartedBy)
1071        } else if b_start < a_start && b_end > a_end {
1072            Some(Self::Contains)
1073        } else if b_start < a_start && b_end == a_end {
1074            Some(Self::FinishedBy)
1075        } else {
1076            None
1077        }
1078    }
1079
1080    /// Check if two entities are concurrent (overlap in time).
1081    #[must_use]
1082    pub fn is_concurrent(a: &Entity, b: &Entity) -> bool {
1083        matches!(
1084            Self::between(a, b),
1085            Some(Self::Overlaps)
1086                | Some(Self::Starts)
1087                | Some(Self::During)
1088                | Some(Self::Finishes)
1089                | Some(Self::Equal)
1090                | Some(Self::OverlappedBy)
1091                | Some(Self::StartedBy)
1092                | Some(Self::Contains)
1093                | Some(Self::FinishedBy)
1094        )
1095    }
1096}
1097
1098// =============================================================================
1099// Utility Functions for Date/Time Parsing
1100// =============================================================================
1101
1102/// Normalize a date string to ISO 8601 format (YYYY-MM-DD).
1103/// Returns None if the date cannot be parsed.
1104#[must_use]
1105pub fn normalize_date(text: &str) -> Option<String> {
1106    let text = text.trim();
1107
1108    // Try Japanese format: YYYY年MM月DD日
1109    if let Some(date) = parse_japanese_date(text) {
1110        return Some(date.format("%Y-%m-%d").to_string());
1111    }
1112
1113    // Try EU dot format: DD.MM.YYYY
1114    if let Some(date) = parse_eu_dot_date(text) {
1115        return Some(date.format("%Y-%m-%d").to_string());
1116    }
1117
1118    // Try common date formats
1119    let formats = [
1120        "%Y-%m-%d",  // 2024-01-15
1121        "%Y/%m/%d",  // 2024/01/15
1122        "%d-%m-%Y",  // 15-01-2024
1123        "%d/%m/%Y",  // 15/01/2024
1124        "%B %d, %Y", // January 15, 2024
1125        "%b %d, %Y", // Jan 15, 2024
1126        "%d %B %Y",  // 15 January 2024
1127        "%d %b %Y",  // 15 Jan 2024
1128        "%m/%d/%Y",  // 01/15/2024 (US format)
1129    ];
1130
1131    for fmt in &formats {
1132        if let Ok(date) = NaiveDate::parse_from_str(text, fmt) {
1133            return Some(date.format("%Y-%m-%d").to_string());
1134        }
1135    }
1136
1137    // Try year-only
1138    if let Ok(year) = text.parse::<i32>() {
1139        if (1000..=2100).contains(&year) {
1140            return Some(format!("{year}-01-01"));
1141        }
1142    }
1143
1144    None
1145}
1146
1147/// Parse a date string into a `DateTime<Utc>`.
1148/// Returns None if the date cannot be parsed.
1149#[must_use]
1150pub fn parse_date(text: &str) -> Option<DateTime<Utc>> {
1151    let text = text.trim();
1152
1153    // Try Japanese format: YYYY年MM月DD日
1154    if let Some(date) = parse_japanese_date(text) {
1155        if let Some(dt) = date.and_hms_opt(0, 0, 0) {
1156            return Some(Utc.from_utc_datetime(&dt));
1157        }
1158    }
1159
1160    // Try EU dot format: DD.MM.YYYY
1161    if let Some(date) = parse_eu_dot_date(text) {
1162        if let Some(dt) = date.and_hms_opt(0, 0, 0) {
1163            return Some(Utc.from_utc_datetime(&dt));
1164        }
1165    }
1166
1167    let formats = [
1168        "%Y-%m-%d",
1169        "%Y/%m/%d",
1170        "%d-%m-%Y",
1171        "%d/%m/%Y",
1172        "%B %d, %Y",
1173        "%b %d, %Y",
1174        "%d %B %Y",
1175        "%d %b %Y",
1176        "%m/%d/%Y",
1177    ];
1178
1179    for fmt in &formats {
1180        if let Ok(date) = NaiveDate::parse_from_str(text, fmt) {
1181            if let Some(dt) = date.and_hms_opt(0, 0, 0) {
1182                return Some(Utc.from_utc_datetime(&dt));
1183            }
1184        }
1185    }
1186
1187    // Try year-only
1188    if let Ok(year) = text.parse::<i32>() {
1189        if (1000..=2100).contains(&year) {
1190            if let Some(date) = NaiveDate::from_ymd_opt(year, 1, 1) {
1191                if let Some(dt) = date.and_hms_opt(0, 0, 0) {
1192                    return Some(Utc.from_utc_datetime(&dt));
1193                }
1194            }
1195        }
1196    }
1197
1198    None
1199}
1200
1201/// Parse Japanese date format: YYYY年MM月DD日
1202fn parse_japanese_date(text: &str) -> Option<NaiveDate> {
1203    // Match pattern: digits + 年 + digits + 月 + digits + 日
1204    let text = text.trim();
1205
1206    // Find the year part (before 年)
1207    let year_end = text.find('年')?;
1208    let year: i32 = text[..year_end].parse().ok()?;
1209
1210    // Find the month part (between 年 and 月)
1211    let month_start = year_end + '年'.len_utf8();
1212    let month_end = text[month_start..].find('月')? + month_start;
1213    let month: u32 = text[month_start..month_end].parse().ok()?;
1214
1215    // Find the day part (between 月 and 日)
1216    let day_start = month_end + '月'.len_utf8();
1217    let day_end = text[day_start..].find('日')? + day_start;
1218    let day: u32 = text[day_start..day_end].parse().ok()?;
1219
1220    NaiveDate::from_ymd_opt(year, month, day)
1221}
1222
1223/// Parse EU dot format: DD.MM.YYYY
1224fn parse_eu_dot_date(text: &str) -> Option<NaiveDate> {
1225    let parts: Vec<&str> = text.split('.').collect();
1226    if parts.len() != 3 {
1227        return None;
1228    }
1229
1230    let day: u32 = parts[0].parse().ok()?;
1231    let month: u32 = parts[1].parse().ok()?;
1232    let year: i32 = parts[2].parse().ok()?;
1233
1234    // Handle 2-digit years
1235    let year = if year < 100 {
1236        if year > 50 {
1237            1900 + year
1238        } else {
1239            2000 + year
1240        }
1241    } else {
1242        year
1243    };
1244
1245    NaiveDate::from_ymd_opt(year, month, day)
1246}
1247
1248/// Normalize a time string to ISO 8601 format (HH:MM).
1249/// Returns None if the time cannot be parsed.
1250#[must_use]
1251pub fn normalize_time(text: &str) -> Option<String> {
1252    let text = text.trim().to_uppercase();
1253
1254    // Handle 12-hour format with AM/PM
1255    let (time_part, is_pm) = if text.ends_with("PM") {
1256        (text.trim_end_matches("PM").trim(), true)
1257    } else if text.ends_with("AM") {
1258        (text.trim_end_matches("AM").trim(), false)
1259    } else {
1260        (text.as_str(), false)
1261    };
1262
1263    // Parse HH:MM or HH:MM:SS
1264    let parts: Vec<&str> = time_part.split(':').collect();
1265    match parts.len() {
1266        2 => {
1267            let hour: u32 = parts[0].parse().ok()?;
1268            let min: u32 = parts[1].parse().ok()?;
1269            let adjusted_hour = if is_pm && hour != 12 {
1270                hour + 12
1271            } else if !is_pm && hour == 12 {
1272                0
1273            } else {
1274                hour
1275            };
1276            if adjusted_hour < 24 && min < 60 {
1277                return Some(format!("{adjusted_hour:02}:{min:02}"));
1278            }
1279        }
1280        3 => {
1281            let hour: u32 = parts[0].parse().ok()?;
1282            let min: u32 = parts[1].parse().ok()?;
1283            let sec: u32 = parts[2].parse().ok()?;
1284            let adjusted_hour = if is_pm && hour != 12 {
1285                hour + 12
1286            } else if !is_pm && hour == 12 {
1287                0
1288            } else {
1289                hour
1290            };
1291            if adjusted_hour < 24 && min < 60 && sec < 60 {
1292                // Return HH:MM format (without seconds) for consistency
1293                return Some(format!("{adjusted_hour:02}:{min:02}"));
1294            }
1295        }
1296        _ => {}
1297    }
1298
1299    None
1300}
1301
1302// =============================================================================
1303// Abstract Temporal Expressions
1304// =============================================================================
1305
1306/// Granularity of a temporal expression.
1307///
1308/// Temporal expressions exist at different levels of specificity,
1309/// analogous to how entities exist at different levels of abstraction
1310/// in a hierarchical clustering layer (currently archived out of the main workspace).
1311///
1312/// ```text
1313/// ┌─────────────────────────────────────────────────────────────────────┐
1314/// │                    TEMPORAL GRANULARITY HIERARCHY                   │
1315/// ├─────────────────────────────────────────────────────────────────────┤
1316/// │                                                                     │
1317/// │  Level 0: Instant      "2024-01-15T14:30:00Z"                       │
1318/// │  Level 1: Day          "January 15, 2024"                           │
1319/// │  Level 2: Week         "the week of Jan 15"                         │
1320/// │  Level 3: Month        "January 2024"                               │
1321/// │  Level 4: Quarter      "Q1 2024"                                    │
1322/// │  Level 5: Year         "2024"                                       │
1323/// │  Level 6: Decade       "the 2020s"                                  │
1324/// │  Level 7: Century      "21st century"                               │
1325/// │  Level 8: Era          "modern era", "post-WWII"                    │
1326/// │                                                                     │
1327/// └─────────────────────────────────────────────────────────────────────┘
1328/// ```
1329#[derive(
1330    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default,
1331)]
1332pub enum TemporalGranularity {
1333    /// Precise timestamp with time component
1334    Instant,
1335    /// Single day
1336    Day,
1337    /// Week (7-day period)
1338    Week,
1339    /// Calendar month
1340    Month,
1341    /// Fiscal/calendar quarter
1342    Quarter,
1343    /// Calendar year
1344    Year,
1345    /// Decade (e.g., "the 90s")
1346    Decade,
1347    /// Century (e.g., "19th century")
1348    Century,
1349    /// Historical era (e.g., "Renaissance", "Cold War")
1350    Era,
1351    /// Unknown or unspecified granularity
1352    #[default]
1353    Unknown,
1354}
1355
1356impl TemporalGranularity {
1357    /// Get the numeric level (0 = most specific, higher = more abstract).
1358    #[must_use]
1359    pub fn level(&self) -> u8 {
1360        match self {
1361            Self::Instant => 0,
1362            Self::Day => 1,
1363            Self::Week => 2,
1364            Self::Month => 3,
1365            Self::Quarter => 4,
1366            Self::Year => 5,
1367            Self::Decade => 6,
1368            Self::Century => 7,
1369            Self::Era => 8,
1370            Self::Unknown => 255,
1371        }
1372    }
1373
1374    /// Can this granularity be converted to a concrete DateTime?
1375    #[must_use]
1376    pub fn is_groundable(&self) -> bool {
1377        matches!(
1378            self,
1379            Self::Instant | Self::Day | Self::Week | Self::Month | Self::Quarter | Self::Year
1380        )
1381    }
1382}
1383
1384/// Type of temporal expression based on how it relates to absolute time.
1385///
1386/// ```text
1387/// ┌─────────────────────────────────────────────────────────────────────┐
1388/// │                    TEMPORAL EXPRESSION TYPES                        │
1389/// ├─────────────────────────────────────────────────────────────────────┤
1390/// │                                                                     │
1391/// │  Absolute:    "January 15, 2024"     → directly mappable            │
1392/// │  Relative:    "yesterday", "next week" → needs document date        │
1393/// │  Anchored:    "before the war"       → needs event reference        │
1394/// │  Recurring:   "every Monday"         → pattern, not single point    │
1395/// │  Fuzzy:       "recently", "soon"     → vague, probabilistic         │
1396/// │  Partial:     "in the morning"       → missing date component       │
1397/// │                                                                     │
1398/// └─────────────────────────────────────────────────────────────────────┘
1399/// ```
1400#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1401pub enum TemporalExpressionType {
1402    /// Directly maps to calendar time: "2024-01-15", "January 2024"
1403    Absolute,
1404    /// Relative to document/utterance time: "yesterday", "next week", "3 days ago"
1405    Relative {
1406        /// Direction from anchor (negative = past, positive = future)
1407        offset_days: i32,
1408        /// The reference point (if known)
1409        anchor: Option<Box<DateTime<Utc>>>,
1410    },
1411    /// Anchored to an event rather than calendar: "before the war", "after graduation"
1412    EventAnchored {
1413        /// The anchor event description
1414        event: String,
1415        /// Temporal relation to the event
1416        relation: TemporalRelation,
1417    },
1418    /// Recurring pattern: "every Monday", "annually", "on weekends"
1419    Recurring {
1420        /// Pattern description
1421        pattern: String,
1422        /// Frequency (if extractable)
1423        frequency: Option<RecurrenceFrequency>,
1424    },
1425    /// Fuzzy/vague: "recently", "soon", "in the past", "long ago"
1426    Fuzzy {
1427        /// Direction (past/future/unknown)
1428        direction: FuzzyDirection,
1429        /// Approximate distance (if inferable)
1430        approximate_days: Option<(i32, i32)>, // (min, max) range
1431    },
1432    /// Partial specification: "in the morning", "on Tuesday" (missing year/date)
1433    Partial {
1434        /// What components are specified
1435        specified: PartialTimeComponents,
1436    },
1437}
1438
1439/// Direction for fuzzy temporal expressions.
1440#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1441pub enum FuzzyDirection {
1442    /// Past: "recently", "long ago"
1443    Past,
1444    /// Future: "soon", "eventually"
1445    Future,
1446    /// Unknown/either: "sometime"
1447    Unknown,
1448}
1449
1450/// Recurrence frequency for recurring patterns.
1451#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1452pub enum RecurrenceFrequency {
1453    /// Daily
1454    Daily,
1455    /// Weekly (specific day)
1456    Weekly,
1457    /// Biweekly
1458    Biweekly,
1459    /// Monthly
1460    Monthly,
1461    /// Quarterly
1462    Quarterly,
1463    /// Annually
1464    Annually,
1465    /// Custom/irregular
1466    Custom,
1467}
1468
1469/// Components specified in a partial temporal expression.
1470#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
1471pub struct PartialTimeComponents {
1472    /// Year specified
1473    pub year: bool,
1474    /// Month specified
1475    pub month: bool,
1476    /// Day specified
1477    pub day: bool,
1478    /// Day of week specified
1479    pub weekday: bool,
1480    /// Hour specified
1481    pub hour: bool,
1482    /// Minute specified
1483    pub minute: bool,
1484}
1485
1486/// An abstract temporal expression with full metadata.
1487///
1488/// This is the temporal analog to abstract entities in a hierarchical clustering layer -
1489/// it captures not just when something happened, but how precisely
1490/// we know when, and what kind of temporal reference it is.
1491#[derive(Debug, Clone, Serialize, Deserialize)]
1492pub struct AbstractTemporalExpression {
1493    /// Original text of the temporal expression
1494    pub text: String,
1495    /// Type of temporal expression
1496    pub expression_type: TemporalExpressionType,
1497    /// Granularity level
1498    pub granularity: TemporalGranularity,
1499    /// Grounded time range (if resolvable)
1500    /// For "January 2024", this would be (2024-01-01, 2024-01-31)
1501    pub grounded_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
1502    /// Confidence in the grounding (0.0 to 1.0)
1503    pub grounding_confidence: f64,
1504    /// Whether this requires external context to resolve
1505    pub requires_context: bool,
1506}
1507
1508impl AbstractTemporalExpression {
1509    /// Create a new absolute temporal expression.
1510    #[must_use]
1511    pub fn absolute(text: impl Into<String>, granularity: TemporalGranularity) -> Self {
1512        Self {
1513            text: text.into(),
1514            expression_type: TemporalExpressionType::Absolute,
1515            granularity,
1516            grounded_range: None,
1517            grounding_confidence: 1.0,
1518            requires_context: false,
1519        }
1520    }
1521
1522    /// Create a relative temporal expression.
1523    #[must_use]
1524    pub fn relative(text: impl Into<String>, offset_days: i32) -> Self {
1525        Self {
1526            text: text.into(),
1527            expression_type: TemporalExpressionType::Relative {
1528                offset_days,
1529                anchor: None,
1530            },
1531            granularity: TemporalGranularity::Day,
1532            grounded_range: None,
1533            grounding_confidence: 0.0, // Needs grounding
1534            requires_context: true,
1535        }
1536    }
1537
1538    /// Create a fuzzy temporal expression.
1539    #[must_use]
1540    pub fn fuzzy(text: impl Into<String>, direction: FuzzyDirection) -> Self {
1541        Self {
1542            text: text.into(),
1543            expression_type: TemporalExpressionType::Fuzzy {
1544                direction,
1545                approximate_days: None,
1546            },
1547            granularity: TemporalGranularity::Unknown,
1548            grounded_range: None,
1549            grounding_confidence: 0.0,
1550            requires_context: true,
1551        }
1552    }
1553
1554    /// Ground this expression relative to a document date.
1555    ///
1556    /// For relative expressions like "yesterday", this resolves to an absolute time.
1557    #[must_use]
1558    pub fn ground(&self, document_date: &DateTime<Utc>) -> Option<Self> {
1559        let mut grounded = self.clone();
1560
1561        match &self.expression_type {
1562            TemporalExpressionType::Relative { offset_days, .. } => {
1563                let target = *document_date + Duration::days(i64::from(*offset_days));
1564                let start = target
1565                    .date_naive()
1566                    .and_hms_opt(0, 0, 0)
1567                    .map(|dt| Utc.from_utc_datetime(&dt))?;
1568                let end = target
1569                    .date_naive()
1570                    .and_hms_opt(23, 59, 59)
1571                    .map(|dt| Utc.from_utc_datetime(&dt))?;
1572
1573                grounded.grounded_range = Some((start, end));
1574                grounded.grounding_confidence = 0.95;
1575                grounded.requires_context = false;
1576                Some(grounded)
1577            }
1578            TemporalExpressionType::Fuzzy {
1579                direction,
1580                approximate_days,
1581            } => {
1582                // For fuzzy expressions, create a probabilistic range
1583                let (min_days, max_days) = approximate_days.unwrap_or(match direction {
1584                    FuzzyDirection::Past => (-365, -1),
1585                    FuzzyDirection::Future => (1, 365),
1586                    FuzzyDirection::Unknown => (-365, 365),
1587                });
1588
1589                let start = *document_date + Duration::days(i64::from(min_days));
1590                let end = *document_date + Duration::days(i64::from(max_days));
1591
1592                grounded.grounded_range = Some((start, end));
1593                grounded.grounding_confidence = 0.3; // Low confidence for fuzzy
1594                grounded.requires_context = false;
1595                Some(grounded)
1596            }
1597            _ => Some(grounded), // Already absolute or not groundable
1598        }
1599    }
1600
1601    /// Check if this expression overlaps with another.
1602    #[must_use]
1603    pub fn overlaps(&self, other: &Self) -> bool {
1604        match (&self.grounded_range, &other.grounded_range) {
1605            (Some((s1, e1)), Some((s2, e2))) => s1 <= e2 && s2 <= e1,
1606            _ => false, // Can't determine overlap without grounded ranges
1607        }
1608    }
1609
1610    /// Get the midpoint of this temporal expression (if grounded).
1611    #[must_use]
1612    pub fn midpoint(&self) -> Option<DateTime<Utc>> {
1613        self.grounded_range
1614            .map(|(start, end)| start + Duration::seconds((end - start).num_seconds() / 2))
1615    }
1616}
1617
1618/// Parse a temporal expression and determine its type and granularity.
1619///
1620/// This is a lightweight parser for common patterns. For production use,
1621/// consider integrating with SUTime, HeidelTime, or similar.
1622#[must_use]
1623pub fn parse_temporal_expression(text: &str) -> AbstractTemporalExpression {
1624    let text_lower = text.to_lowercase();
1625    let text_trimmed = text.trim();
1626
1627    // Check for relative expressions
1628    if let Some(expr) = parse_relative_expression(&text_lower) {
1629        return expr;
1630    }
1631
1632    // Check for fuzzy expressions
1633    if let Some(expr) = parse_fuzzy_expression(&text_lower) {
1634        return expr;
1635    }
1636
1637    // Check for recurring patterns
1638    if let Some(expr) = parse_recurring_expression(&text_lower) {
1639        return expr;
1640    }
1641
1642    // Try to parse as absolute date and determine granularity
1643    if let Some(_normalized) = normalize_date(text_trimmed) {
1644        let granularity = infer_granularity(text_trimmed);
1645        let mut expr = AbstractTemporalExpression::absolute(text_trimmed, granularity);
1646
1647        // Try to ground it
1648        if let Some(dt) = parse_date(text_trimmed) {
1649            let (start, end) = granularity_to_range(&dt, granularity);
1650            expr.grounded_range = Some((start, end));
1651        }
1652
1653        return expr;
1654    }
1655
1656    // Fallback: unknown expression
1657    AbstractTemporalExpression {
1658        text: text_trimmed.to_string(),
1659        expression_type: TemporalExpressionType::Partial {
1660            specified: PartialTimeComponents::default(),
1661        },
1662        granularity: TemporalGranularity::Unknown,
1663        grounded_range: None,
1664        grounding_confidence: 0.0,
1665        requires_context: true,
1666    }
1667}
1668
1669fn parse_relative_expression(text: &str) -> Option<AbstractTemporalExpression> {
1670    let patterns = [
1671        ("yesterday", -1),
1672        ("today", 0),
1673        ("tomorrow", 1),
1674        ("day before yesterday", -2),
1675        ("day after tomorrow", 2),
1676    ];
1677
1678    for (pattern, offset) in patterns {
1679        if text.contains(pattern) {
1680            return Some(AbstractTemporalExpression::relative(text, offset));
1681        }
1682    }
1683
1684    // Check for "N days ago" / "in N days"
1685    if text.contains("ago") {
1686        if let Some(n) = extract_number(text) {
1687            if text.contains("day") {
1688                return Some(AbstractTemporalExpression::relative(text, -(n as i32)));
1689            } else if text.contains("week") {
1690                return Some(AbstractTemporalExpression::relative(text, -(n as i32) * 7));
1691            } else if text.contains("month") {
1692                return Some(AbstractTemporalExpression::relative(text, -(n as i32) * 30));
1693            }
1694        }
1695    }
1696
1697    if text.starts_with("in ") || text.starts_with("next ") {
1698        if let Some(n) = extract_number(text) {
1699            if text.contains("day") {
1700                return Some(AbstractTemporalExpression::relative(text, n as i32));
1701            } else if text.contains("week") {
1702                return Some(AbstractTemporalExpression::relative(text, n as i32 * 7));
1703            }
1704        }
1705        // "next week", "next month"
1706        if text.contains("week") {
1707            return Some(AbstractTemporalExpression::relative(text, 7));
1708        }
1709        if text.contains("month") {
1710            return Some(AbstractTemporalExpression::relative(text, 30));
1711        }
1712    }
1713
1714    if text.starts_with("last ") {
1715        if text.contains("week") {
1716            return Some(AbstractTemporalExpression::relative(text, -7));
1717        }
1718        if text.contains("month") {
1719            return Some(AbstractTemporalExpression::relative(text, -30));
1720        }
1721    }
1722
1723    None
1724}
1725
1726fn parse_fuzzy_expression(text: &str) -> Option<AbstractTemporalExpression> {
1727    let past_patterns = ["recently", "lately", "long ago", "in the past", "earlier"];
1728    let future_patterns = ["soon", "eventually", "in the future", "later"];
1729
1730    for pattern in past_patterns {
1731        if text.contains(pattern) {
1732            return Some(AbstractTemporalExpression::fuzzy(
1733                text,
1734                FuzzyDirection::Past,
1735            ));
1736        }
1737    }
1738
1739    for pattern in future_patterns {
1740        if text.contains(pattern) {
1741            return Some(AbstractTemporalExpression::fuzzy(
1742                text,
1743                FuzzyDirection::Future,
1744            ));
1745        }
1746    }
1747
1748    if text.contains("sometime") || text.contains("someday") {
1749        return Some(AbstractTemporalExpression::fuzzy(
1750            text,
1751            FuzzyDirection::Unknown,
1752        ));
1753    }
1754
1755    None
1756}
1757
1758fn parse_recurring_expression(text: &str) -> Option<AbstractTemporalExpression> {
1759    let frequency = if text.contains("daily") || text.contains("every day") {
1760        Some(RecurrenceFrequency::Daily)
1761    } else if text.contains("weekly") || text.contains("every week") {
1762        Some(RecurrenceFrequency::Weekly)
1763    } else if text.contains("monthly") || text.contains("every month") {
1764        Some(RecurrenceFrequency::Monthly)
1765    } else if text.contains("annually") || text.contains("every year") || text.contains("yearly") {
1766        Some(RecurrenceFrequency::Annually)
1767    } else if text.starts_with("every ") || text.starts_with("on ") && text.contains("s") {
1768        // "every Monday", "on Mondays"
1769        Some(RecurrenceFrequency::Weekly)
1770    } else {
1771        None
1772    };
1773
1774    frequency.map(|freq| AbstractTemporalExpression {
1775        text: text.to_string(),
1776        expression_type: TemporalExpressionType::Recurring {
1777            pattern: text.to_string(),
1778            frequency: Some(freq),
1779        },
1780        granularity: TemporalGranularity::Unknown,
1781        grounded_range: None,
1782        grounding_confidence: 0.0,
1783        requires_context: true,
1784    })
1785}
1786
1787fn extract_number(text: &str) -> Option<u32> {
1788    // Simple number extraction
1789    for word in text.split_whitespace() {
1790        if let Ok(n) = word.parse::<u32>() {
1791            return Some(n);
1792        }
1793    }
1794    // Word numbers
1795    let word_numbers = [
1796        ("one", 1),
1797        ("two", 2),
1798        ("three", 3),
1799        ("four", 4),
1800        ("five", 5),
1801        ("six", 6),
1802        ("seven", 7),
1803        ("eight", 8),
1804        ("nine", 9),
1805        ("ten", 10),
1806    ];
1807    for (word, n) in word_numbers {
1808        if text.contains(word) {
1809            return Some(n);
1810        }
1811    }
1812    None
1813}
1814
1815fn infer_granularity(text: &str) -> TemporalGranularity {
1816    // Check for time component
1817    if text.contains(':') || text.contains("am") || text.contains("pm") {
1818        return TemporalGranularity::Instant;
1819    }
1820
1821    // Check for day-level precision
1822    if text.chars().filter(|c| c.is_ascii_digit()).count() >= 6 {
1823        // Has enough digits for YYYY-MM-DD or similar
1824        return TemporalGranularity::Day;
1825    }
1826
1827    // Check for month-level patterns
1828    let months = [
1829        "january",
1830        "february",
1831        "march",
1832        "april",
1833        "may",
1834        "june",
1835        "july",
1836        "august",
1837        "september",
1838        "october",
1839        "november",
1840        "december",
1841        "jan",
1842        "feb",
1843        "mar",
1844        "apr",
1845        "jun",
1846        "jul",
1847        "aug",
1848        "sep",
1849        "oct",
1850        "nov",
1851        "dec",
1852    ];
1853    let text_lower = text.to_lowercase();
1854
1855    for month in months {
1856        if text_lower.contains(month) {
1857            // If there's a day number too, it's Day granularity
1858            if text.chars().filter(|c| c.is_ascii_digit()).count() >= 2 {
1859                // Has day number
1860                let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
1861                if digits.len() <= 4 {
1862                    // Just year or just day
1863                    if digits.len() == 4 {
1864                        return TemporalGranularity::Month;
1865                    }
1866                }
1867                return TemporalGranularity::Day;
1868            }
1869            return TemporalGranularity::Month;
1870        }
1871    }
1872
1873    // Check for quarter
1874    if text_lower.contains("q1")
1875        || text_lower.contains("q2")
1876        || text_lower.contains("q3")
1877        || text_lower.contains("q4")
1878    {
1879        return TemporalGranularity::Quarter;
1880    }
1881
1882    // Check for century (before decade, since "21st century" contains digits)
1883    if text_lower.contains("century") {
1884        return TemporalGranularity::Century;
1885    }
1886
1887    // Check for decade (e.g., "1990s", "the 90s")
1888    if text_lower.contains("'s") || text_lower.ends_with("0s") {
1889        if let Ok(decade) = text
1890            .chars()
1891            .filter(|c| c.is_ascii_digit())
1892            .collect::<String>()
1893            .parse::<u32>()
1894        {
1895            if decade < 100 || (1900..=2100).contains(&decade) {
1896                return TemporalGranularity::Decade;
1897            }
1898        }
1899    }
1900
1901    // Check for era
1902    if text_lower.contains("era") || text_lower.contains("age") || text_lower.contains("period") {
1903        return TemporalGranularity::Era;
1904    }
1905
1906    // Default: if it's just a 4-digit year
1907    if text.chars().filter(|c| c.is_ascii_digit()).count() == 4 {
1908        return TemporalGranularity::Year;
1909    }
1910
1911    TemporalGranularity::Unknown
1912}
1913
1914fn granularity_to_range(
1915    dt: &DateTime<Utc>,
1916    granularity: TemporalGranularity,
1917) -> (DateTime<Utc>, DateTime<Utc>) {
1918    use chrono::Datelike;
1919
1920    let date = dt.date_naive();
1921
1922    match granularity {
1923        TemporalGranularity::Instant => (*dt, *dt),
1924        TemporalGranularity::Day => {
1925            let start = date
1926                .and_hms_opt(0, 0, 0)
1927                .map(|d| Utc.from_utc_datetime(&d))
1928                .unwrap_or(*dt);
1929            let end = date
1930                .and_hms_opt(23, 59, 59)
1931                .map(|d| Utc.from_utc_datetime(&d))
1932                .unwrap_or(*dt);
1933            (start, end)
1934        }
1935        TemporalGranularity::Week => {
1936            let weekday = date.weekday().num_days_from_monday();
1937            let start_date = date - Duration::days(i64::from(weekday));
1938            let end_date = start_date + Duration::days(6);
1939            let start = start_date
1940                .and_hms_opt(0, 0, 0)
1941                .map(|d| Utc.from_utc_datetime(&d))
1942                .unwrap_or(*dt);
1943            let end = end_date
1944                .and_hms_opt(23, 59, 59)
1945                .map(|d| Utc.from_utc_datetime(&d))
1946                .unwrap_or(*dt);
1947            (start, end)
1948        }
1949        TemporalGranularity::Month => {
1950            let start_date = NaiveDate::from_ymd_opt(date.year(), date.month(), 1).unwrap_or(date);
1951            let end_date = if date.month() == 12 {
1952                NaiveDate::from_ymd_opt(date.year() + 1, 1, 1).unwrap_or(date) - Duration::days(1)
1953            } else {
1954                NaiveDate::from_ymd_opt(date.year(), date.month() + 1, 1).unwrap_or(date)
1955                    - Duration::days(1)
1956            };
1957            let start = start_date
1958                .and_hms_opt(0, 0, 0)
1959                .map(|d| Utc.from_utc_datetime(&d))
1960                .unwrap_or(*dt);
1961            let end = end_date
1962                .and_hms_opt(23, 59, 59)
1963                .map(|d| Utc.from_utc_datetime(&d))
1964                .unwrap_or(*dt);
1965            (start, end)
1966        }
1967        TemporalGranularity::Quarter => {
1968            let quarter = (date.month() - 1) / 3;
1969            let start_month = quarter * 3 + 1;
1970            let end_month = start_month + 2;
1971            let start_date = NaiveDate::from_ymd_opt(date.year(), start_month, 1).unwrap_or(date);
1972            let end_date = if end_month == 12 {
1973                NaiveDate::from_ymd_opt(date.year(), 12, 31).unwrap_or(date)
1974            } else {
1975                NaiveDate::from_ymd_opt(date.year(), end_month + 1, 1).unwrap_or(date)
1976                    - Duration::days(1)
1977            };
1978            let start = start_date
1979                .and_hms_opt(0, 0, 0)
1980                .map(|d| Utc.from_utc_datetime(&d))
1981                .unwrap_or(*dt);
1982            let end = end_date
1983                .and_hms_opt(23, 59, 59)
1984                .map(|d| Utc.from_utc_datetime(&d))
1985                .unwrap_or(*dt);
1986            (start, end)
1987        }
1988        TemporalGranularity::Year => {
1989            let start_date = NaiveDate::from_ymd_opt(date.year(), 1, 1).unwrap_or(date);
1990            let end_date = NaiveDate::from_ymd_opt(date.year(), 12, 31).unwrap_or(date);
1991            let start = start_date
1992                .and_hms_opt(0, 0, 0)
1993                .map(|d| Utc.from_utc_datetime(&d))
1994                .unwrap_or(*dt);
1995            let end = end_date
1996                .and_hms_opt(23, 59, 59)
1997                .map(|d| Utc.from_utc_datetime(&d))
1998                .unwrap_or(*dt);
1999            (start, end)
2000        }
2001        TemporalGranularity::Decade => {
2002            let decade_start = (date.year() / 10) * 10;
2003            let start_date = NaiveDate::from_ymd_opt(decade_start, 1, 1).unwrap_or(date);
2004            let end_date = NaiveDate::from_ymd_opt(decade_start + 9, 12, 31).unwrap_or(date);
2005            let start = start_date
2006                .and_hms_opt(0, 0, 0)
2007                .map(|d| Utc.from_utc_datetime(&d))
2008                .unwrap_or(*dt);
2009            let end = end_date
2010                .and_hms_opt(23, 59, 59)
2011                .map(|d| Utc.from_utc_datetime(&d))
2012                .unwrap_or(*dt);
2013            (start, end)
2014        }
2015        TemporalGranularity::Century => {
2016            let century_start = (date.year() / 100) * 100;
2017            let start_date = NaiveDate::from_ymd_opt(century_start, 1, 1).unwrap_or(date);
2018            let end_date = NaiveDate::from_ymd_opt(century_start + 99, 12, 31).unwrap_or(date);
2019            let start = start_date
2020                .and_hms_opt(0, 0, 0)
2021                .map(|d| Utc.from_utc_datetime(&d))
2022                .unwrap_or(*dt);
2023            let end = end_date
2024                .and_hms_opt(23, 59, 59)
2025                .map(|d| Utc.from_utc_datetime(&d))
2026                .unwrap_or(*dt);
2027            (start, end)
2028        }
2029        TemporalGranularity::Era | TemporalGranularity::Unknown => {
2030            // Can't determine bounds for era/unknown
2031            (*dt, *dt)
2032        }
2033    }
2034}
2035
2036// =============================================================================
2037// Tests
2038// =============================================================================
2039
2040#[cfg(test)]
2041mod tests {
2042    use super::*;
2043    use chrono::TimeZone;
2044
2045    fn make_entity(text: &str, from: DateTime<Utc>, until: Option<DateTime<Utc>>) -> Entity {
2046        let mut e = Entity::new(text, EntityType::Person, 0, text.len(), 0.9);
2047        e.valid_from = Some(from);
2048        e.valid_until = until;
2049        e
2050    }
2051
2052    #[test]
2053    fn test_point_in_time_query() {
2054        let mut tracker = TemporalEntityTracker::new();
2055
2056        let ballmer = make_entity(
2057            "Steve Ballmer",
2058            Utc.with_ymd_and_hms(2000, 1, 13, 0, 0, 0).unwrap(),
2059            Some(Utc.with_ymd_and_hms(2014, 2, 4, 0, 0, 0).unwrap()),
2060        );
2061        tracker.add_entity(ballmer);
2062
2063        let nadella = make_entity(
2064            "Satya Nadella",
2065            Utc.with_ymd_and_hms(2014, 2, 4, 0, 0, 0).unwrap(),
2066            None,
2067        );
2068        tracker.add_entity(nadella);
2069
2070        // 2010: Should get Ballmer
2071        let query_2010 = Utc.with_ymd_and_hms(2010, 6, 1, 0, 0, 0).unwrap();
2072        let result = tracker.query_at(&query_2010);
2073        assert_eq!(result.len(), 1);
2074        assert!(result[0].text.contains("Ballmer"));
2075
2076        // 2020: Should get Nadella
2077        let query_2020 = Utc.with_ymd_and_hms(2020, 6, 1, 0, 0, 0).unwrap();
2078        let result = tracker.query_at(&query_2020);
2079        assert_eq!(result.len(), 1);
2080        assert!(result[0].text.contains("Nadella"));
2081    }
2082
2083    #[test]
2084    fn test_entity_timeline() {
2085        let mut timeline = EntityTimeline::new("CEO_OF_MICROSOFT");
2086
2087        let mut ballmer = make_entity(
2088            "Steve Ballmer",
2089            Utc.with_ymd_and_hms(2000, 1, 13, 0, 0, 0).unwrap(),
2090            Some(Utc.with_ymd_and_hms(2014, 2, 4, 0, 0, 0).unwrap()),
2091        );
2092        ballmer.normalized = Some("CEO_OF_MICROSOFT".into());
2093        timeline.add(ballmer, None);
2094
2095        let mut nadella = make_entity(
2096            "Satya Nadella",
2097            Utc.with_ymd_and_hms(2014, 2, 4, 0, 0, 0).unwrap(),
2098            None,
2099        );
2100        nadella.normalized = Some("CEO_OF_MICROSOFT".into());
2101        timeline.add(nadella, None);
2102
2103        // Check historical values
2104        assert_eq!(timeline.history().len(), 2);
2105
2106        // Check current value
2107        let current = timeline.current();
2108        assert!(current.is_some());
2109        assert!(current.unwrap().text.contains("Nadella"));
2110
2111        // Check value at specific time
2112        let query_2012 = Utc.with_ymd_and_hms(2012, 1, 1, 0, 0, 0).unwrap();
2113        let val_2012 = timeline.value_at(&query_2012);
2114        assert!(val_2012.is_some());
2115        assert!(val_2012.unwrap().text.contains("Ballmer"));
2116    }
2117
2118    #[test]
2119    fn test_temporal_scope() {
2120        let entity = make_entity(
2121            "Test",
2122            Utc.with_ymd_and_hms(2010, 1, 1, 0, 0, 0).unwrap(),
2123            Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()),
2124        );
2125
2126        // Point in time - within range
2127        let scope = TemporalScope::PointInTime(Utc.with_ymd_and_hms(2015, 1, 1, 0, 0, 0).unwrap());
2128        assert!(scope.contains(&entity));
2129
2130        // Point in time - before range
2131        let scope = TemporalScope::PointInTime(Utc.with_ymd_and_hms(2005, 1, 1, 0, 0, 0).unwrap());
2132        assert!(!scope.contains(&entity));
2133
2134        // Range - overlapping
2135        let scope = TemporalScope::Range {
2136            start: Utc.with_ymd_and_hms(2008, 1, 1, 0, 0, 0).unwrap(),
2137            end: Utc.with_ymd_and_hms(2012, 1, 1, 0, 0, 0).unwrap(),
2138        };
2139        assert!(scope.contains(&entity));
2140
2141        // Current - entity has end date
2142        let scope = TemporalScope::Current;
2143        assert!(!scope.contains(&entity));
2144    }
2145
2146    #[test]
2147    fn test_temporal_relation() {
2148        let a = make_entity(
2149            "A",
2150            Utc.with_ymd_and_hms(2010, 1, 1, 0, 0, 0).unwrap(),
2151            Some(Utc.with_ymd_and_hms(2015, 1, 1, 0, 0, 0).unwrap()),
2152        );
2153        let b = make_entity(
2154            "B",
2155            Utc.with_ymd_and_hms(2016, 1, 1, 0, 0, 0).unwrap(),
2156            Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()),
2157        );
2158
2159        // A ends before B starts
2160        let rel = TemporalRelation::between(&a, &b);
2161        assert_eq!(rel, Some(TemporalRelation::Before));
2162
2163        // B starts after A ends
2164        let rel = TemporalRelation::between(&b, &a);
2165        assert_eq!(rel, Some(TemporalRelation::After));
2166    }
2167
2168    #[test]
2169    fn test_temporal_stats() {
2170        let mut tracker = TemporalEntityTracker::new();
2171
2172        // Add temporal entity
2173        let temporal = make_entity(
2174            "Temporal",
2175            Utc.with_ymd_and_hms(2010, 1, 1, 0, 0, 0).unwrap(),
2176            Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()),
2177        );
2178        tracker.add_entity(temporal);
2179
2180        // Add currently valid entity
2181        let current = make_entity(
2182            "Current",
2183            Utc.with_ymd_and_hms(2015, 1, 1, 0, 0, 0).unwrap(),
2184            None,
2185        );
2186        tracker.add_entity(current);
2187
2188        // Add atemporal entity
2189        let atemporal = Entity::new("Atemporal", EntityType::Person, 0, 9, 0.9);
2190        tracker.add_entity(atemporal);
2191
2192        let stats = tracker.temporal_stats();
2193        assert_eq!(stats.total, 3);
2194        assert_eq!(stats.temporal, 2);
2195        assert_eq!(stats.atemporal, 1);
2196        assert_eq!(stats.currently_valid, 1);
2197        assert_eq!(stats.historical, 1);
2198    }
2199
2200    #[test]
2201    fn test_timeline_gaps_and_overlaps() {
2202        let mut timeline = EntityTimeline::new("TEST");
2203
2204        // Add with gap
2205        let e1 = make_entity(
2206            "E1",
2207            Utc.with_ymd_and_hms(2010, 1, 1, 0, 0, 0).unwrap(),
2208            Some(Utc.with_ymd_and_hms(2012, 1, 1, 0, 0, 0).unwrap()),
2209        );
2210        timeline.add(e1, None);
2211
2212        let e2 = make_entity(
2213            "E2",
2214            Utc.with_ymd_and_hms(2015, 1, 1, 0, 0, 0).unwrap(), // Gap: 2012-2015
2215            Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()),
2216        );
2217        timeline.add(e2, None);
2218
2219        assert!(timeline.has_gaps());
2220        assert!(!timeline.has_overlaps());
2221    }
2222
2223    // =========================================================================
2224    // Abstract Temporal Expression Tests
2225    // =========================================================================
2226
2227    #[test]
2228    fn test_granularity_ordering() {
2229        assert!(TemporalGranularity::Instant.level() < TemporalGranularity::Day.level());
2230        assert!(TemporalGranularity::Day.level() < TemporalGranularity::Month.level());
2231        assert!(TemporalGranularity::Month.level() < TemporalGranularity::Year.level());
2232        assert!(TemporalGranularity::Year.level() < TemporalGranularity::Decade.level());
2233    }
2234
2235    #[test]
2236    fn test_parse_relative_expression() {
2237        let expr = parse_temporal_expression("yesterday");
2238        assert!(matches!(
2239            expr.expression_type,
2240            TemporalExpressionType::Relative {
2241                offset_days: -1,
2242                ..
2243            }
2244        ));
2245        assert!(expr.requires_context);
2246
2247        let expr = parse_temporal_expression("tomorrow");
2248        assert!(matches!(
2249            expr.expression_type,
2250            TemporalExpressionType::Relative { offset_days: 1, .. }
2251        ));
2252
2253        let expr = parse_temporal_expression("3 days ago");
2254        assert!(matches!(
2255            expr.expression_type,
2256            TemporalExpressionType::Relative {
2257                offset_days: -3,
2258                ..
2259            }
2260        ));
2261    }
2262
2263    #[test]
2264    fn test_parse_fuzzy_expression() {
2265        let expr = parse_temporal_expression("recently");
2266        assert!(matches!(
2267            expr.expression_type,
2268            TemporalExpressionType::Fuzzy {
2269                direction: FuzzyDirection::Past,
2270                ..
2271            }
2272        ));
2273
2274        let expr = parse_temporal_expression("soon");
2275        assert!(matches!(
2276            expr.expression_type,
2277            TemporalExpressionType::Fuzzy {
2278                direction: FuzzyDirection::Future,
2279                ..
2280            }
2281        ));
2282
2283        let expr = parse_temporal_expression("sometime");
2284        assert!(matches!(
2285            expr.expression_type,
2286            TemporalExpressionType::Fuzzy {
2287                direction: FuzzyDirection::Unknown,
2288                ..
2289            }
2290        ));
2291    }
2292
2293    #[test]
2294    fn test_parse_recurring_expression() {
2295        let expr = parse_temporal_expression("every Monday");
2296        assert!(matches!(
2297            expr.expression_type,
2298            TemporalExpressionType::Recurring { .. }
2299        ));
2300
2301        let expr = parse_temporal_expression("daily");
2302        if let TemporalExpressionType::Recurring { frequency, .. } = expr.expression_type {
2303            assert_eq!(frequency, Some(RecurrenceFrequency::Daily));
2304        } else {
2305            panic!("Expected Recurring expression");
2306        }
2307    }
2308
2309    #[test]
2310    fn test_ground_relative_expression() {
2311        use chrono::Datelike;
2312
2313        let expr = AbstractTemporalExpression::relative("yesterday", -1);
2314        let doc_date = Utc.with_ymd_and_hms(2024, 6, 15, 12, 0, 0).unwrap();
2315
2316        let grounded = expr.ground(&doc_date).unwrap();
2317        assert!(grounded.grounded_range.is_some());
2318
2319        let (start, _end) = grounded.grounded_range.unwrap();
2320        assert_eq!(start.day(), 14); // June 14
2321    }
2322
2323    #[test]
2324    fn test_infer_granularity() {
2325        assert_eq!(infer_granularity("2024-01-15"), TemporalGranularity::Day);
2326        assert_eq!(
2327            infer_granularity("January 2024"),
2328            TemporalGranularity::Month
2329        );
2330        assert_eq!(infer_granularity("2024"), TemporalGranularity::Year);
2331        assert_eq!(infer_granularity("Q1 2024"), TemporalGranularity::Quarter);
2332        // "21st century" contains "century" but also matches decade pattern due to "21st"
2333        // Fix the check order in infer_granularity to prioritize explicit keywords
2334        assert_eq!(
2335            infer_granularity("the 21st century"),
2336            TemporalGranularity::Century
2337        );
2338        assert_eq!(infer_granularity("the 90s"), TemporalGranularity::Decade);
2339    }
2340
2341    #[test]
2342    fn test_granularity_to_range() {
2343        use chrono::{Datelike, Timelike};
2344
2345        let dt = Utc.with_ymd_and_hms(2024, 6, 15, 12, 0, 0).unwrap();
2346
2347        // Day granularity should span the whole day
2348        let (start, end) = granularity_to_range(&dt, TemporalGranularity::Day);
2349        assert_eq!(start.hour(), 0);
2350        assert_eq!(end.hour(), 23);
2351
2352        // Month granularity should span the whole month
2353        let (start, end) = granularity_to_range(&dt, TemporalGranularity::Month);
2354        assert_eq!(start.day(), 1);
2355        assert_eq!(end.day(), 30); // June has 30 days
2356
2357        // Year granularity should span the whole year
2358        let (start, end) = granularity_to_range(&dt, TemporalGranularity::Year);
2359        assert_eq!(start.month(), 1);
2360        assert_eq!(end.month(), 12);
2361    }
2362
2363    #[test]
2364    fn test_abstract_expression_overlap() {
2365        let jan_2024 = AbstractTemporalExpression {
2366            text: "January 2024".to_string(),
2367            expression_type: TemporalExpressionType::Absolute,
2368            granularity: TemporalGranularity::Month,
2369            grounded_range: Some((
2370                Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap(),
2371                Utc.with_ymd_and_hms(2024, 1, 31, 23, 59, 59).unwrap(),
2372            )),
2373            grounding_confidence: 1.0,
2374            requires_context: false,
2375        };
2376
2377        let jan_15 = AbstractTemporalExpression {
2378            text: "January 15, 2024".to_string(),
2379            expression_type: TemporalExpressionType::Absolute,
2380            granularity: TemporalGranularity::Day,
2381            grounded_range: Some((
2382                Utc.with_ymd_and_hms(2024, 1, 15, 0, 0, 0).unwrap(),
2383                Utc.with_ymd_and_hms(2024, 1, 15, 23, 59, 59).unwrap(),
2384            )),
2385            grounding_confidence: 1.0,
2386            requires_context: false,
2387        };
2388
2389        let feb_2024 = AbstractTemporalExpression {
2390            text: "February 2024".to_string(),
2391            expression_type: TemporalExpressionType::Absolute,
2392            granularity: TemporalGranularity::Month,
2393            grounded_range: Some((
2394                Utc.with_ymd_and_hms(2024, 2, 1, 0, 0, 0).unwrap(),
2395                Utc.with_ymd_and_hms(2024, 2, 29, 23, 59, 59).unwrap(),
2396            )),
2397            grounding_confidence: 1.0,
2398            requires_context: false,
2399        };
2400
2401        // Jan 15 is within January
2402        assert!(jan_2024.overlaps(&jan_15));
2403        assert!(jan_15.overlaps(&jan_2024));
2404
2405        // January and February don't overlap
2406        assert!(!jan_2024.overlaps(&feb_2024));
2407    }
2408}
2409
2410// =============================================================================
2411// Jiff Integration (Optional)
2412// =============================================================================
2413
2414/// Jiff datetime interoperability.
2415///
2416/// When the `jiff-time` feature is enabled, this module provides conversion
2417/// utilities between `chrono` and `jiff` datetime types, allowing seamless
2418/// integration with the modern `jiff` datetime library.
2419///
2420/// # Why Jiff?
2421///
2422/// While `chrono` is the established datetime library in Rust, `jiff` offers:
2423/// - Better handling of civil time vs. absolute time
2424/// - Cleaner timezone arithmetic
2425/// - More ergonomic span/duration types
2426/// - Stricter correctness guarantees
2427///
2428/// # Example
2429///
2430/// ```rust,ignore
2431/// use anno::temporal::jiff_interop::{chrono_to_jiff, jiff_to_chrono};
2432/// use chrono::Utc;
2433/// use jiff::Timestamp;
2434///
2435/// let chrono_dt = Utc::now();
2436/// let jiff_ts = chrono_to_jiff(&chrono_dt);
2437///
2438/// // Use jiff for calculations
2439/// let jiff_future = jiff_ts.checked_add(jiff::Span::new().days(30)).unwrap();
2440///
2441/// // Convert back to chrono for storage
2442/// let chrono_future = jiff_to_chrono(&jiff_future);
2443/// ```
2444#[cfg(feature = "jiff-time")]
2445pub mod jiff_interop {
2446    use chrono::{DateTime, TimeZone, Utc};
2447    use jiff::{Span, Timestamp, ToSpan, Zoned};
2448
2449    /// Convert a chrono `DateTime<Utc>` to a jiff `Timestamp`.
2450    #[must_use]
2451    pub fn chrono_to_jiff(dt: &DateTime<Utc>) -> Timestamp {
2452        Timestamp::from_second(dt.timestamp())
2453            .expect("chrono DateTime should be valid jiff Timestamp")
2454    }
2455
2456    /// Convert a jiff `Timestamp` to a chrono `DateTime<Utc>`.
2457    #[must_use]
2458    pub fn jiff_to_chrono(ts: &Timestamp) -> DateTime<Utc> {
2459        Utc.timestamp_opt(ts.as_second(), 0)
2460            .single()
2461            .expect("jiff Timestamp should be valid chrono DateTime")
2462    }
2463
2464    /// Convert a chrono `Duration` to a jiff `Span`.
2465    #[must_use]
2466    pub fn duration_to_span(d: &chrono::Duration) -> Span {
2467        d.num_seconds().seconds()
2468    }
2469
2470    /// Convert a jiff `Span` to a chrono `Duration`.
2471    ///
2472    /// Note: This only preserves the total duration, not the civil components.
2473    #[must_use]
2474    pub fn span_to_duration(s: &Span) -> chrono::Duration {
2475        // Convert span to total seconds (approximate for civil spans)
2476        let total = s.total(jiff::Unit::Second).unwrap_or(0.0) as i64;
2477        chrono::Duration::seconds(total)
2478    }
2479
2480    /// A temporal entity tracker that uses jiff internally.
2481    ///
2482    /// This provides a more ergonomic API for temporal operations while
2483    /// maintaining compatibility with anno's chrono-based Entity type.
2484    #[derive(Debug, Clone)]
2485    pub struct JiffTemporalTracker {
2486        entities: Vec<(crate::Entity, Option<Timestamp>, Option<Timestamp>)>,
2487    }
2488
2489    impl JiffTemporalTracker {
2490        /// Create a new tracker.
2491        #[must_use]
2492        pub fn new() -> Self {
2493            Self {
2494                entities: Vec::new(),
2495            }
2496        }
2497
2498        /// Add an entity with jiff timestamps.
2499        pub fn add(
2500            &mut self,
2501            entity: crate::Entity,
2502            from: Option<Timestamp>,
2503            until: Option<Timestamp>,
2504        ) {
2505            self.entities.push((entity, from, until));
2506        }
2507
2508        /// Add an entity, converting from chrono timestamps.
2509        pub fn add_from_chrono(&mut self, entity: crate::Entity) {
2510            let from = entity.valid_from.as_ref().map(chrono_to_jiff);
2511            let until = entity.valid_until.as_ref().map(chrono_to_jiff);
2512            self.entities.push((entity, from, until));
2513        }
2514
2515        /// Query entities valid at a jiff timestamp.
2516        #[must_use]
2517        pub fn at(&self, ts: &Timestamp) -> Vec<&crate::Entity> {
2518            self.entities
2519                .iter()
2520                .filter(|(_, from, until)| {
2521                    let after_start = from.map_or(true, |f| ts >= &f);
2522                    let before_end = until.map_or(true, |u| ts < &u);
2523                    after_start && before_end
2524                })
2525                .map(|(e, _, _)| e)
2526                .collect()
2527        }
2528
2529        /// Query entities valid within a jiff span from now.
2530        #[must_use]
2531        pub fn within(&self, span: Span) -> Vec<&crate::Entity> {
2532            let now = Timestamp::now();
2533            let end = now.checked_add(span).unwrap_or(now);
2534
2535            self.entities
2536                .iter()
2537                .filter(|(_, from, until)| {
2538                    let from = from.unwrap_or(Timestamp::MIN);
2539                    let until = until.unwrap_or(Timestamp::MAX);
2540                    // Overlap check
2541                    from <= end && until >= now
2542                })
2543                .map(|(e, _, _)| e)
2544                .collect()
2545        }
2546
2547        /// Convert to a standard TemporalEntityTracker.
2548        #[must_use]
2549        pub fn to_chrono_tracker(&self) -> super::TemporalEntityTracker {
2550            let mut tracker = super::TemporalEntityTracker::new();
2551            for (entity, from, until) in &self.entities {
2552                let mut entity = entity.clone();
2553                entity.valid_from = from.map(|f| jiff_to_chrono(&f));
2554                entity.valid_until = until.map(|u| jiff_to_chrono(&u));
2555                tracker.add_entity(entity);
2556            }
2557            tracker
2558        }
2559    }
2560
2561    impl Default for JiffTemporalTracker {
2562        fn default() -> Self {
2563            Self::new()
2564        }
2565    }
2566
2567    /// Parse a date string using jiff's flexible parser.
2568    ///
2569    /// Jiff has excellent parsing support for various date formats.
2570    #[must_use]
2571    pub fn parse_date_jiff(text: &str) -> Option<Timestamp> {
2572        // Try parsing as a zoned datetime first
2573        if let Ok(zoned) = text.parse::<Zoned>() {
2574            return Some(zoned.timestamp());
2575        }
2576
2577        // Try parsing as a timestamp
2578        if let Ok(ts) = text.parse::<Timestamp>() {
2579            return Some(ts);
2580        }
2581
2582        // Try civil date parsing
2583        if let Ok(date) = text.parse::<jiff::civil::Date>() {
2584            // Convert to timestamp at midnight UTC
2585            let dt = date.at(0, 0, 0, 0);
2586            if let Ok(ts) = dt.to_zoned(jiff::tz::TimeZone::UTC) {
2587                return Some(ts.timestamp());
2588            }
2589        }
2590
2591        None
2592    }
2593
2594    #[cfg(test)]
2595    mod tests {
2596        use super::*;
2597        use crate::{Entity, EntityType};
2598
2599        #[test]
2600        fn test_chrono_jiff_roundtrip() {
2601            let chrono_now = Utc::now();
2602            let jiff_ts = chrono_to_jiff(&chrono_now);
2603            let chrono_back = jiff_to_chrono(&jiff_ts);
2604
2605            // Should be within 1 second (we lose sub-second precision)
2606            assert!((chrono_now - chrono_back).num_seconds().abs() < 1);
2607        }
2608
2609        #[test]
2610        fn test_jiff_tracker_query() {
2611            let mut tracker = JiffTemporalTracker::new();
2612
2613            let entity = Entity::new("Test", EntityType::Person, 0, 4, 0.9);
2614            let now = Timestamp::now();
2615            // Use hours instead of days (Timestamp doesn't support calendar units)
2616            let past = now.checked_sub(720.hours()).unwrap(); // ~30 days
2617
2618            tracker.add(entity, Some(past), Some(now));
2619
2620            // Query in the middle - should find it
2621            let mid = now.checked_sub(360.hours()).unwrap(); // ~15 days
2622            let results = tracker.at(&mid);
2623            assert_eq!(results.len(), 1);
2624
2625            // Query in the future - should not find it
2626            let future = now.checked_add(360.hours()).unwrap();
2627            let results = tracker.at(&future);
2628            assert_eq!(results.len(), 0);
2629        }
2630
2631        #[test]
2632        fn test_parse_date_jiff() {
2633            // ISO 8601
2634            assert!(parse_date_jiff("2024-01-15").is_some());
2635
2636            // With time
2637            assert!(parse_date_jiff("2024-01-15T10:30:00Z").is_some());
2638        }
2639    }
2640}