Skip to main content

oxirs_graphrag/temporal/
temporal_retrieval.rs

1//! Temporal GraphRAG: time-aware retrieval that filters and weights graph nodes
2//! by temporal relevance.
3//!
4//! # Design
5//!
6//! Temporal relevance is modelled via two orthogonal mechanisms:
7//!
8//! 1. **Hard filtering** – remove entities/triples that fall entirely outside
9//!    a caller-specified time window.
10//! 2. **Soft weighting (temporal decay)** – reduce the retrieval score of
11//!    older entities using a configurable decay function (exponential,
12//!    linear, or step).  More recent entities rank higher.
13//!
14//! The module parses timestamps from entity metadata or triple objects
15//! (recognised formats: RFC-3339 / ISO-8601, Unix epoch seconds, year strings
16//! like "2021").  Unparseable timestamps are treated as "unknown" and kept
17//! with a configurable fallback weight.
18
19use crate::{GraphRAGResult, ScoreSource, ScoredEntity, Triple};
20use chrono::{DateTime, NaiveDate, TimeZone, Utc};
21use serde::{Deserialize, Serialize};
22use std::collections::HashMap;
23
24// ─── Temporal window ─────────────────────────────────────────────────────────
25
26/// A half-open time window `[start, end)`
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct TimeWindow {
29    /// Inclusive start (None = no lower bound)
30    pub start: Option<DateTime<Utc>>,
31    /// Exclusive end (None = no upper bound)
32    pub end: Option<DateTime<Utc>>,
33}
34
35impl TimeWindow {
36    /// Create an unbounded window (accepts all timestamps)
37    pub fn unbounded() -> Self {
38        Self {
39            start: None,
40            end: None,
41        }
42    }
43
44    /// Create a window starting at `start` with no upper bound
45    pub fn since(start: DateTime<Utc>) -> Self {
46        Self {
47            start: Some(start),
48            end: None,
49        }
50    }
51
52    /// Create a window ending before `end` with no lower bound
53    pub fn before(end: DateTime<Utc>) -> Self {
54        Self {
55            start: None,
56            end: Some(end),
57        }
58    }
59
60    /// Create a bounded window
61    pub fn between(start: DateTime<Utc>, end: DateTime<Utc>) -> Self {
62        Self {
63            start: Some(start),
64            end: Some(end),
65        }
66    }
67
68    /// Return `true` if `ts` is within this window
69    pub fn contains(&self, ts: DateTime<Utc>) -> bool {
70        let after_start = self.start.map_or(true, |s| ts >= s);
71        let before_end = self.end.map_or(true, |e| ts < e);
72        after_start && before_end
73    }
74}
75
76// ─── Decay functions ─────────────────────────────────────────────────────────
77
78/// Temporal decay model
79#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
80pub enum DecayFn {
81    /// Exponential decay: w = exp(−λ · age_days)
82    Exponential {
83        /// Decay rate λ (higher = faster decay; default ≈ 0.005 → half-life ~140 days)
84        lambda: f64,
85    },
86    /// Linear decay: w = max(0, 1 − age_days / half_life_days)
87    Linear {
88        /// Age in days at which weight reaches 0
89        half_life_days: f64,
90    },
91    /// Step decay: w = 1 if age_days ≤ cutoff, else `old_weight`
92    Step {
93        /// Age threshold in days
94        cutoff_days: f64,
95        /// Weight assigned to items older than `cutoff_days`
96        old_weight: f64,
97    },
98    /// No decay – all items receive weight 1.0
99    None,
100}
101
102impl Default for DecayFn {
103    fn default() -> Self {
104        Self::Exponential { lambda: 0.005 }
105    }
106}
107
108impl DecayFn {
109    /// Compute temporal weight for an entity whose timestamp is `age_days` old.
110    /// Returns a value in [0.0, 1.0].
111    pub fn weight(&self, age_days: f64) -> f64 {
112        let age = age_days.max(0.0);
113        match *self {
114            Self::Exponential { lambda } => (-lambda * age).exp().clamp(0.0, 1.0),
115            Self::Linear { half_life_days } => {
116                (1.0 - age / half_life_days.max(1.0)).clamp(0.0, 1.0)
117            }
118            Self::Step {
119                cutoff_days,
120                old_weight,
121            } => {
122                if age <= cutoff_days {
123                    1.0
124                } else {
125                    old_weight.clamp(0.0, 1.0)
126                }
127            }
128            Self::None => 1.0,
129        }
130    }
131}
132
133// ─── Temporal metadata extraction ────────────────────────────────────────────
134
135/// Well-known temporal metadata keys (checked in order)
136const TEMPORAL_META_KEYS: &[&str] = &[
137    "timestamp",
138    "created",
139    "modified",
140    "updated",
141    "date",
142    "published",
143    "valid_from",
144    "validFrom",
145    "time",
146];
147
148/// Try to parse a string into a UTC `DateTime`.
149/// Supports:
150/// - RFC-3339 / ISO-8601  (e.g. "2024-03-15T10:00:00Z")
151/// - Date only (e.g. "2024-03-15")
152/// - Year only (e.g. "2021")
153/// - Unix epoch seconds (e.g. "1700000000")
154pub fn parse_timestamp(s: &str) -> Option<DateTime<Utc>> {
155    let s = s.trim();
156
157    // RFC-3339 / ISO-8601 with timezone
158    if let Ok(dt) = DateTime::parse_from_rfc3339(s) {
159        return Some(dt.with_timezone(&Utc));
160    }
161
162    // ISO-8601 without timezone → assume UTC
163    let formats_no_tz = [
164        "%Y-%m-%dT%H:%M:%S",
165        "%Y-%m-%dT%H:%M",
166        "%Y-%m-%d %H:%M:%S",
167        "%Y-%m-%d %H:%M",
168    ];
169    for fmt in &formats_no_tz {
170        if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(s, fmt) {
171            return Some(Utc.from_utc_datetime(&naive));
172        }
173    }
174
175    // Date only
176    if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
177        let naive = date.and_hms_opt(0, 0, 0)?;
178        return Some(Utc.from_utc_datetime(&naive));
179    }
180
181    // Year only (e.g. "2021")
182    if s.len() == 4 {
183        if let Ok(year) = s.parse::<i32>() {
184            let date = NaiveDate::from_ymd_opt(year, 1, 1)?;
185            let naive = date.and_hms_opt(0, 0, 0)?;
186            return Some(Utc.from_utc_datetime(&naive));
187        }
188    }
189
190    // Unix epoch seconds
191    if let Ok(epoch) = s.parse::<i64>() {
192        return Utc.timestamp_opt(epoch, 0).single();
193    }
194
195    None
196}
197
198/// Extract the best timestamp from entity metadata
199pub fn extract_timestamp_from_metadata(
200    metadata: &HashMap<String, String>,
201) -> Option<DateTime<Utc>> {
202    for key in TEMPORAL_META_KEYS {
203        if let Some(val) = metadata.get(*key) {
204            if let Some(ts) = parse_timestamp(val) {
205                return Some(ts);
206            }
207        }
208    }
209    None
210}
211
212/// Try to extract a timestamp from a triple's object literal
213pub fn extract_timestamp_from_triple(triple: &Triple) -> Option<DateTime<Utc>> {
214    parse_timestamp(&triple.object)
215}
216
217// ─── Temporal retrieval configuration ────────────────────────────────────────
218
219/// Fallback behaviour when no timestamp can be determined
220#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
221pub enum UnknownTimestampPolicy {
222    /// Keep the entity with its original score (weight = 1.0)
223    #[default]
224    Keep,
225    /// Discard the entity entirely
226    Discard,
227    /// Apply a fixed weight (configured as `unknown_weight` below)
228    FixedWeight,
229}
230
231/// Temporal filtering and weighting configuration
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct TemporalRetrievalConfig {
234    /// Reference time for decay calculation (None = now)
235    pub reference_time: Option<DateTime<Utc>>,
236    /// Hard filter window (only entities within this window survive)
237    pub filter_window: Option<TimeWindow>,
238    /// Decay function for score weighting
239    pub decay_fn: DecayFn,
240    /// Alpha: blend factor between original score and temporal weight
241    /// final_score = (1 − temporal_alpha) · original + temporal_alpha · temporal_weight
242    pub temporal_alpha: f64,
243    /// Policy when timestamp cannot be determined
244    pub unknown_policy: UnknownTimestampPolicy,
245    /// Fixed weight used when `unknown_policy == FixedWeight`
246    pub unknown_weight: f64,
247    /// Temporal predicates to scan in triples for timestamps
248    /// (e.g. `"http://schema.org/datePublished"`, `"http://purl.org/dc/terms/date"`)
249    pub temporal_predicates: Vec<String>,
250}
251
252impl Default for TemporalRetrievalConfig {
253    fn default() -> Self {
254        Self {
255            reference_time: None,
256            filter_window: None,
257            decay_fn: DecayFn::default(),
258            temporal_alpha: 0.3,
259            unknown_policy: UnknownTimestampPolicy::Keep,
260            unknown_weight: 0.5,
261            temporal_predicates: vec![
262                "http://schema.org/datePublished".to_string(),
263                "http://schema.org/dateModified".to_string(),
264                "http://purl.org/dc/terms/date".to_string(),
265                "http://purl.org/dc/terms/created".to_string(),
266                "http://purl.org/dc/terms/modified".to_string(),
267                "http://www.w3.org/2006/time#inXSDDateTimeStamp".to_string(),
268            ],
269        }
270    }
271}
272
273// ─── Temporal retriever ───────────────────────────────────────────────────────
274
275/// Timestamp index built from the RDF subgraph
276struct TemporalIndex {
277    /// entity_uri → best timestamp
278    timestamps: HashMap<String, DateTime<Utc>>,
279}
280
281impl TemporalIndex {
282    fn build(subgraph: &[Triple], config: &TemporalRetrievalConfig) -> Self {
283        let mut timestamps: HashMap<String, DateTime<Utc>> = HashMap::new();
284
285        let pred_set: std::collections::HashSet<&str> = config
286            .temporal_predicates
287            .iter()
288            .map(|s| s.as_str())
289            .collect();
290
291        for triple in subgraph {
292            if pred_set.contains(triple.predicate.as_str()) {
293                if let Some(ts) = parse_timestamp(&triple.object) {
294                    timestamps
295                        .entry(triple.subject.clone())
296                        .and_modify(|existing| {
297                            // Keep the most recent timestamp
298                            if ts > *existing {
299                                *existing = ts;
300                            }
301                        })
302                        .or_insert(ts);
303                }
304            }
305        }
306
307        Self { timestamps }
308    }
309
310    fn get(&self, uri: &str) -> Option<DateTime<Utc>> {
311        self.timestamps.get(uri).copied()
312    }
313}
314
315/// Time-aware retrieval engine
316pub struct TemporalRetriever {
317    config: TemporalRetrievalConfig,
318}
319
320impl TemporalRetriever {
321    pub fn new(config: TemporalRetrievalConfig) -> Self {
322        Self { config }
323    }
324
325    /// Reference time for decay calculation
326    fn reference_time(&self) -> DateTime<Utc> {
327        self.config.reference_time.unwrap_or_else(Utc::now)
328    }
329
330    /// Apply temporal filtering and re-weighting to a list of scored entities.
331    ///
332    /// `subgraph` is used to build a temporal index from temporal predicates.
333    pub fn apply(
334        &self,
335        entities: Vec<ScoredEntity>,
336        subgraph: &[Triple],
337    ) -> GraphRAGResult<Vec<ScoredEntity>> {
338        let index = TemporalIndex::build(subgraph, &self.config);
339        let ref_time = self.reference_time();
340
341        let mut result: Vec<ScoredEntity> = Vec::with_capacity(entities.len());
342
343        for entity in entities {
344            // Determine timestamp: subgraph index first, then metadata
345            let ts = index
346                .get(&entity.uri)
347                .or_else(|| extract_timestamp_from_metadata(&entity.metadata));
348
349            let temporal_weight = match ts {
350                Some(t) => {
351                    let age_days = (ref_time - t).num_seconds().max(0) as f64 / 86_400.0;
352
353                    // Hard filter
354                    if let Some(window) = &self.config.filter_window {
355                        if !window.contains(t) {
356                            continue; // discard
357                        }
358                    }
359
360                    self.config.decay_fn.weight(age_days)
361                }
362                None => match self.config.unknown_policy {
363                    UnknownTimestampPolicy::Discard => continue,
364                    UnknownTimestampPolicy::FixedWeight => self.config.unknown_weight,
365                    UnknownTimestampPolicy::Keep => 1.0,
366                },
367            };
368
369            // Blend original score with temporal weight
370            let alpha = self.config.temporal_alpha;
371            let new_score = (1.0 - alpha) * entity.score + alpha * temporal_weight;
372
373            let mut updated = entity;
374            updated.score = new_score.clamp(0.0, f64::MAX);
375            result.push(updated);
376        }
377
378        result.sort_by(|a, b| {
379            b.score
380                .partial_cmp(&a.score)
381                .unwrap_or(std::cmp::Ordering::Equal)
382        });
383
384        Ok(result)
385    }
386
387    /// Filter triples whose associated subject has a timestamp outside the window.
388    /// Triples with no discernible timestamp are kept by default.
389    pub fn filter_triples(&self, triples: Vec<Triple>) -> GraphRAGResult<Vec<Triple>> {
390        let window = match &self.config.filter_window {
391            None => return Ok(triples),
392            Some(w) => w,
393        };
394
395        let result: Vec<Triple> = triples
396            .into_iter()
397            .filter(|t| {
398                match extract_timestamp_from_triple(t) {
399                    Some(ts) => window.contains(ts),
400                    None => true, // keep triples without timestamp
401                }
402            })
403            .collect();
404
405        Ok(result)
406    }
407
408    /// Score a single entity's temporal relevance (0.0..1.0)
409    pub fn temporal_score(&self, ts: DateTime<Utc>) -> f64 {
410        let ref_time = self.reference_time();
411        let age_days = (ref_time - ts).num_seconds().max(0) as f64 / 86_400.0;
412        self.config.decay_fn.weight(age_days)
413    }
414}
415
416// ─── Integration helper ───────────────────────────────────────────────────────
417
418/// Annotate entities with their most recent timestamp (if available in subgraph)
419pub fn annotate_timestamps(
420    entities: Vec<ScoredEntity>,
421    subgraph: &[Triple],
422    config: &TemporalRetrievalConfig,
423) -> Vec<ScoredEntity> {
424    let index = TemporalIndex::build(subgraph, config);
425
426    entities
427        .into_iter()
428        .map(|mut e| {
429            if let Some(ts) = index.get(&e.uri) {
430                e.metadata
431                    .insert("temporal_timestamp".to_string(), ts.to_rfc3339());
432            }
433            e
434        })
435        .collect()
436}
437
438#[cfg(test)]
439mod tests {
440    use super::*;
441    use crate::ScoreSource;
442    use chrono::{Datelike, Duration};
443
444    fn make_entity(uri: &str, score: f64) -> ScoredEntity {
445        ScoredEntity {
446            uri: uri.to_string(),
447            score,
448            source: ScoreSource::Fused,
449            metadata: HashMap::new(),
450        }
451    }
452
453    fn make_entity_with_ts(uri: &str, score: f64, ts_key: &str, ts_val: &str) -> ScoredEntity {
454        let mut e = make_entity(uri, score);
455        e.metadata.insert(ts_key.to_string(), ts_val.to_string());
456        e
457    }
458
459    fn ref_time_days_ago(days: i64) -> DateTime<Utc> {
460        Utc::now() - Duration::days(days)
461    }
462
463    // ── parse_timestamp ────────────────────────────────────────────────────
464
465    #[test]
466    fn test_parse_rfc3339() {
467        let ts = parse_timestamp("2024-03-15T10:00:00Z").expect("should succeed");
468        assert_eq!(ts.year(), 2024);
469    }
470
471    #[test]
472    fn test_parse_date_only() {
473        let ts = parse_timestamp("2023-06-01").expect("should succeed");
474        assert_eq!(ts.year(), 2023);
475        assert_eq!(ts.month(), 6);
476    }
477
478    #[test]
479    fn test_parse_year_only() {
480        let ts = parse_timestamp("2021").expect("should succeed");
481        assert_eq!(ts.year(), 2021);
482    }
483
484    #[test]
485    fn test_parse_unix_epoch() {
486        let ts = parse_timestamp("1700000000").expect("should succeed");
487        assert!(ts.year() >= 2023);
488    }
489
490    #[test]
491    fn test_parse_invalid_returns_none() {
492        assert!(parse_timestamp("not-a-date").is_none());
493        assert!(parse_timestamp("").is_none());
494    }
495
496    // ── TimeWindow ────────────────────────────────────────────────────────
497
498    #[test]
499    fn test_time_window_contains() {
500        let start = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap();
501        let end = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
502        let w = TimeWindow::between(start, end);
503
504        let inside = Utc.with_ymd_and_hms(2023, 6, 1, 0, 0, 0).unwrap();
505        let before = Utc.with_ymd_and_hms(2022, 12, 31, 0, 0, 0).unwrap();
506        let after = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
507
508        assert!(w.contains(inside));
509        assert!(!w.contains(before));
510        assert!(!w.contains(after)); // exclusive end
511    }
512
513    #[test]
514    fn test_time_window_unbounded_accepts_all() {
515        let w = TimeWindow::unbounded();
516        assert!(w.contains(Utc::now()));
517        assert!(w.contains(Utc.with_ymd_and_hms(1900, 1, 1, 0, 0, 0).unwrap()));
518    }
519
520    #[test]
521    fn test_time_window_since() {
522        let start = Utc::now() - Duration::days(30);
523        let w = TimeWindow::since(start);
524        assert!(w.contains(Utc::now()));
525        assert!(!w.contains(Utc::now() - Duration::days(60)));
526    }
527
528    // ── DecayFn ───────────────────────────────────────────────────────────
529
530    #[test]
531    fn test_decay_exponential_at_zero() {
532        let d = DecayFn::Exponential { lambda: 0.01 };
533        assert!((d.weight(0.0) - 1.0).abs() < 1e-9);
534    }
535
536    #[test]
537    fn test_decay_exponential_decreases() {
538        let d = DecayFn::Exponential { lambda: 0.01 };
539        assert!(d.weight(100.0) < d.weight(10.0));
540    }
541
542    #[test]
543    fn test_decay_linear_at_zero_is_one() {
544        let d = DecayFn::Linear {
545            half_life_days: 365.0,
546        };
547        assert!((d.weight(0.0) - 1.0).abs() < 1e-9);
548    }
549
550    #[test]
551    fn test_decay_linear_at_half_life_is_zero() {
552        let d = DecayFn::Linear {
553            half_life_days: 100.0,
554        };
555        assert!((d.weight(100.0)).abs() < 1e-9);
556    }
557
558    #[test]
559    fn test_decay_linear_clamps_to_zero() {
560        let d = DecayFn::Linear {
561            half_life_days: 10.0,
562        };
563        assert_eq!(d.weight(200.0), 0.0);
564    }
565
566    #[test]
567    fn test_decay_step_recent() {
568        let d = DecayFn::Step {
569            cutoff_days: 30.0,
570            old_weight: 0.1,
571        };
572        assert_eq!(d.weight(10.0), 1.0);
573        assert_eq!(d.weight(31.0), 0.1);
574    }
575
576    #[test]
577    fn test_decay_none_always_one() {
578        let d = DecayFn::None;
579        assert_eq!(d.weight(0.0), 1.0);
580        assert_eq!(d.weight(9999.0), 1.0);
581    }
582
583    // ── TemporalRetriever::apply ──────────────────────────────────────────
584
585    #[test]
586    fn test_apply_no_filter_no_decay() {
587        let config = TemporalRetrievalConfig {
588            decay_fn: DecayFn::None,
589            temporal_alpha: 0.0,
590            ..Default::default()
591        };
592        let retriever = TemporalRetriever::new(config);
593        let entities = vec![make_entity("http://a", 0.9), make_entity("http://b", 0.7)];
594        let result = retriever.apply(entities, &[]).expect("should succeed");
595        assert_eq!(result.len(), 2);
596        // Scores unchanged (alpha=0, decay=1)
597        assert!((result[0].score - 0.9).abs() < 1e-9);
598    }
599
600    #[test]
601    fn test_apply_decays_older_entities() {
602        let ref_ts = Utc::now();
603        let config = TemporalRetrievalConfig {
604            reference_time: Some(ref_ts),
605            decay_fn: DecayFn::Exponential { lambda: 0.1 },
606            temporal_alpha: 1.0, // pure temporal
607            ..Default::default()
608        };
609        let retriever = TemporalRetriever::new(config);
610
611        // Recent entity: 1 day old
612        let mut recent = make_entity_with_ts("http://a", 0.9, "timestamp", "");
613        let recent_ts = ref_ts - Duration::days(1);
614        recent
615            .metadata
616            .insert("timestamp".to_string(), recent_ts.to_rfc3339());
617
618        // Old entity: 365 days old
619        let mut old = make_entity_with_ts("http://b", 0.9, "timestamp", "");
620        let old_ts = ref_ts - Duration::days(365);
621        old.metadata
622            .insert("timestamp".to_string(), old_ts.to_rfc3339());
623
624        let result = retriever
625            .apply(vec![recent, old], &[])
626            .expect("should succeed");
627        assert_eq!(result.len(), 2);
628        // Recent should rank higher
629        assert!(result[0].uri == "http://a", "Recent should rank first");
630        assert!(result[0].score > result[1].score);
631    }
632
633    #[test]
634    fn test_apply_hard_filter() {
635        let window_start = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap();
636        let window_end = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
637        let config = TemporalRetrievalConfig {
638            reference_time: Some(window_end),
639            filter_window: Some(TimeWindow::between(window_start, window_end)),
640            decay_fn: DecayFn::None,
641            temporal_alpha: 0.0,
642            ..Default::default()
643        };
644        let retriever = TemporalRetriever::new(config);
645
646        let mut inside_entity = make_entity("http://inside", 0.8);
647        inside_entity
648            .metadata
649            .insert("timestamp".to_string(), "2023-06-01".to_string());
650
651        let mut outside_entity = make_entity("http://outside", 0.9);
652        outside_entity
653            .metadata
654            .insert("timestamp".to_string(), "2022-01-01".to_string());
655
656        let result = retriever
657            .apply(vec![inside_entity, outside_entity], &[])
658            .expect("should succeed");
659        assert_eq!(result.len(), 1);
660        assert_eq!(result[0].uri, "http://inside");
661    }
662
663    #[test]
664    fn test_apply_unknown_timestamp_keep() {
665        let config = TemporalRetrievalConfig {
666            unknown_policy: UnknownTimestampPolicy::Keep,
667            ..Default::default()
668        };
669        let retriever = TemporalRetriever::new(config);
670        let entities = vec![make_entity("http://notimestamp", 0.7)];
671        let result = retriever.apply(entities, &[]).expect("should succeed");
672        assert_eq!(result.len(), 1);
673    }
674
675    #[test]
676    fn test_apply_unknown_timestamp_discard() {
677        let config = TemporalRetrievalConfig {
678            unknown_policy: UnknownTimestampPolicy::Discard,
679            ..Default::default()
680        };
681        let retriever = TemporalRetriever::new(config);
682        let entities = vec![make_entity("http://notimestamp", 0.7)];
683        let result = retriever.apply(entities, &[]).expect("should succeed");
684        assert!(result.is_empty());
685    }
686
687    #[test]
688    fn test_apply_unknown_timestamp_fixed_weight() {
689        let config = TemporalRetrievalConfig {
690            unknown_policy: UnknownTimestampPolicy::FixedWeight,
691            unknown_weight: 0.5,
692            temporal_alpha: 1.0,
693            decay_fn: DecayFn::None,
694            ..Default::default()
695        };
696        let retriever = TemporalRetriever::new(config);
697        let entities = vec![make_entity("http://notimestamp", 0.8)];
698        let result = retriever.apply(entities, &[]).expect("should succeed");
699        assert_eq!(result.len(), 1);
700        assert!((result[0].score - 0.5).abs() < 1e-9);
701    }
702
703    #[test]
704    fn test_apply_subgraph_temporal_index() {
705        let ref_ts = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
706        let config = TemporalRetrievalConfig {
707            reference_time: Some(ref_ts),
708            decay_fn: DecayFn::None,
709            temporal_alpha: 0.0,
710            temporal_predicates: vec!["http://schema.org/datePublished".to_string()],
711            ..Default::default()
712        };
713        let retriever = TemporalRetriever::new(config);
714
715        let subgraph = vec![Triple::new(
716            "http://entity",
717            "http://schema.org/datePublished",
718            "2023-06-01",
719        )];
720        let entities = vec![make_entity("http://entity", 0.8)];
721        let result = retriever
722            .apply(entities, &subgraph)
723            .expect("should succeed");
724        assert_eq!(result.len(), 1);
725    }
726
727    // ── filter_triples ────────────────────────────────────────────────────
728
729    #[test]
730    fn test_filter_triples_no_window_keeps_all() {
731        let config = TemporalRetrievalConfig::default();
732        let retriever = TemporalRetriever::new(config);
733        let triples = vec![
734            Triple::new("http://s", "http://p", "2023-01-01"),
735            Triple::new("http://s", "http://p", "some literal"),
736        ];
737        let result = retriever
738            .filter_triples(triples.clone())
739            .expect("should succeed");
740        assert_eq!(result.len(), triples.len());
741    }
742
743    #[test]
744    fn test_filter_triples_with_window() {
745        let start = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap();
746        let end = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
747        let config = TemporalRetrievalConfig {
748            filter_window: Some(TimeWindow::between(start, end)),
749            ..Default::default()
750        };
751        let retriever = TemporalRetriever::new(config);
752        let triples = vec![
753            Triple::new("http://s", "http://p", "2023-06-01"), // inside
754            Triple::new("http://s", "http://p", "2022-01-01"), // before
755            Triple::new("http://s", "http://p", "not-a-date"), // no ts → keep
756        ];
757        let result = retriever.filter_triples(triples).expect("should succeed");
758        // "inside" + "no ts" kept; "before" discarded
759        assert_eq!(result.len(), 2);
760    }
761
762    // ── temporal_score ────────────────────────────────────────────────────
763
764    #[test]
765    fn test_temporal_score_recent_is_high() {
766        let config = TemporalRetrievalConfig {
767            reference_time: Some(Utc::now()),
768            decay_fn: DecayFn::Exponential { lambda: 0.01 },
769            ..Default::default()
770        };
771        let retriever = TemporalRetriever::new(config);
772        let score = retriever.temporal_score(Utc::now() - Duration::days(1));
773        assert!(
774            score > 0.98,
775            "Recent item should score close to 1.0: {score}"
776        );
777    }
778
779    // ── annotate_timestamps ───────────────────────────────────────────────
780
781    #[test]
782    fn test_annotate_timestamps() {
783        let config = TemporalRetrievalConfig {
784            temporal_predicates: vec!["http://schema.org/datePublished".to_string()],
785            ..Default::default()
786        };
787        let subgraph = vec![Triple::new(
788            "http://entity",
789            "http://schema.org/datePublished",
790            "2023-06-01",
791        )];
792        let entities = vec![make_entity("http://entity", 0.8)];
793        let annotated = annotate_timestamps(entities, &subgraph, &config);
794        assert!(
795            annotated[0].metadata.contains_key("temporal_timestamp"),
796            "Expected temporal_timestamp in metadata"
797        );
798    }
799
800    // ── Extract metadata ──────────────────────────────────────────────────
801
802    #[test]
803    fn test_extract_timestamp_from_metadata_finds_key() {
804        let mut m = HashMap::new();
805        m.insert("created".to_string(), "2023-01-15".to_string());
806        let ts = extract_timestamp_from_metadata(&m).expect("should succeed");
807        assert_eq!(ts.year(), 2023);
808    }
809
810    #[test]
811    fn test_extract_timestamp_none_when_absent() {
812        let m = HashMap::new();
813        assert!(extract_timestamp_from_metadata(&m).is_none());
814    }
815
816    // ── Blending alpha ────────────────────────────────────────────────────
817
818    #[test]
819    fn test_alpha_zero_preserves_original_score() {
820        let config = TemporalRetrievalConfig {
821            temporal_alpha: 0.0,
822            decay_fn: DecayFn::None,
823            ..Default::default()
824        };
825        let retriever = TemporalRetriever::new(config);
826        let mut e = make_entity("http://a", 0.75);
827        e.metadata
828            .insert("timestamp".to_string(), "2023-01-01".to_string());
829        let result = retriever.apply(vec![e], &[]).expect("should succeed");
830        assert!((result[0].score - 0.75).abs() < 1e-9);
831    }
832
833    #[test]
834    fn test_alpha_one_gives_temporal_weight() {
835        let ref_ts = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
836        let config = TemporalRetrievalConfig {
837            reference_time: Some(ref_ts),
838            temporal_alpha: 1.0,
839            decay_fn: DecayFn::None,
840            ..Default::default()
841        };
842        let retriever = TemporalRetriever::new(config);
843        let mut e = make_entity("http://a", 0.75);
844        e.metadata
845            .insert("timestamp".to_string(), "2023-12-31".to_string());
846        let result = retriever.apply(vec![e], &[]).expect("should succeed");
847        // decay = None → weight = 1.0, alpha = 1 → score ≈ 1.0
848        assert!((result[0].score - 1.0).abs() < 0.01);
849    }
850}
851
852// ─── Additional tests ─────────────────────────────────────────────────────────
853
854#[cfg(test)]
855mod additional_tests {
856    use super::*;
857    use crate::{ScoreSource, ScoredEntity, Triple};
858    use chrono::{Datelike, Duration, TimeZone, Utc};
859
860    fn make_entity(uri: &str, score: f64) -> ScoredEntity {
861        ScoredEntity {
862            uri: uri.to_string(),
863            score,
864            source: ScoreSource::Vector,
865            metadata: HashMap::new(),
866        }
867    }
868
869    // ── TimeWindow ────────────────────────────────────────────────────────
870
871    #[test]
872    fn test_time_window_unbounded_contains_anything() {
873        let w = TimeWindow::unbounded();
874        assert!(w.contains(Utc::now()));
875        assert!(w.contains(Utc.with_ymd_and_hms(1970, 1, 1, 0, 0, 0).unwrap()));
876        assert!(w.contains(Utc.with_ymd_and_hms(2099, 12, 31, 0, 0, 0).unwrap()));
877    }
878
879    #[test]
880    fn test_time_window_since_excludes_before_start() {
881        let start = Utc.with_ymd_and_hms(2023, 6, 1, 0, 0, 0).unwrap();
882        let w = TimeWindow::since(start);
883        let before = Utc.with_ymd_and_hms(2023, 5, 31, 0, 0, 0).unwrap();
884        assert!(!w.contains(before));
885        assert!(w.contains(start)); // inclusive
886    }
887
888    #[test]
889    fn test_time_window_before_excludes_at_end() {
890        let end = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
891        let w = TimeWindow::before(end);
892        // end itself should be excluded (half-open)
893        assert!(!w.contains(end));
894        let before_end = end - Duration::seconds(1);
895        assert!(w.contains(before_end));
896    }
897
898    #[test]
899    fn test_time_window_between_includes_start_excludes_end() {
900        let start = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap();
901        let end = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
902        let w = TimeWindow::between(start, end);
903        assert!(w.contains(start));
904        assert!(!w.contains(end));
905        let mid = Utc.with_ymd_and_hms(2023, 6, 15, 0, 0, 0).unwrap();
906        assert!(w.contains(mid));
907    }
908
909    // ── DecayFn tests ─────────────────────────────────────────────────────
910
911    #[test]
912    fn test_decay_exponential_zero_age_is_one() {
913        let decay = DecayFn::Exponential { lambda: 0.01 };
914        assert!((decay.weight(0.0) - 1.0).abs() < 1e-9);
915    }
916
917    #[test]
918    fn test_decay_exponential_negative_age_clamps_to_zero() {
919        let decay = DecayFn::Exponential { lambda: 0.01 };
920        // Negative age clamped to 0
921        assert!((decay.weight(-10.0) - 1.0).abs() < 1e-9);
922    }
923
924    #[test]
925    fn test_decay_exponential_monotone_decreasing() {
926        let decay = DecayFn::Exponential { lambda: 0.005 };
927        let w1 = decay.weight(30.0);
928        let w2 = decay.weight(60.0);
929        assert!(w1 > w2, "Older items should have lower weight");
930    }
931
932    #[test]
933    fn test_decay_linear_reaches_zero_at_half_life() {
934        let decay = DecayFn::Linear {
935            half_life_days: 100.0,
936        };
937        let w = decay.weight(100.0);
938        assert!((w - 0.0).abs() < 1e-9);
939    }
940
941    #[test]
942    fn test_decay_linear_half_way_is_half() {
943        let decay = DecayFn::Linear {
944            half_life_days: 100.0,
945        };
946        let w = decay.weight(50.0);
947        assert!((w - 0.5).abs() < 1e-9);
948    }
949
950    #[test]
951    fn test_decay_linear_beyond_half_life_clamps_zero() {
952        let decay = DecayFn::Linear {
953            half_life_days: 10.0,
954        };
955        let w = decay.weight(100.0);
956        assert!((w - 0.0).abs() < 1e-9);
957    }
958
959    #[test]
960    fn test_decay_step_within_cutoff_is_one() {
961        let decay = DecayFn::Step {
962            cutoff_days: 30.0,
963            old_weight: 0.1,
964        };
965        assert!((decay.weight(15.0) - 1.0).abs() < 1e-9);
966        assert!((decay.weight(30.0) - 1.0).abs() < 1e-9); // at boundary → still 1
967    }
968
969    #[test]
970    fn test_decay_step_beyond_cutoff_uses_old_weight() {
971        let decay = DecayFn::Step {
972            cutoff_days: 30.0,
973            old_weight: 0.3,
974        };
975        assert!((decay.weight(31.0) - 0.3).abs() < 1e-9);
976    }
977
978    #[test]
979    fn test_decay_none_always_one() {
980        let decay = DecayFn::None;
981        for age in [0.0, 1.0, 100.0, 365.0, 10000.0] {
982            assert!((decay.weight(age) - 1.0).abs() < 1e-9);
983        }
984    }
985
986    #[test]
987    fn test_decay_fn_default_is_exponential() {
988        let decay = DecayFn::default();
989        matches!(decay, DecayFn::Exponential { lambda } if (lambda - 0.005).abs() < f64::EPSILON);
990    }
991
992    // ── parse_timestamp tests ─────────────────────────────────────────────
993
994    #[test]
995    fn test_parse_timestamp_rfc3339() {
996        let ts = parse_timestamp("2024-03-15T10:00:00Z").expect("should succeed");
997        assert_eq!(ts.year(), 2024);
998        assert_eq!(ts.month(), 3);
999        assert_eq!(ts.day(), 15);
1000    }
1001
1002    #[test]
1003    fn test_parse_timestamp_date_only() {
1004        let ts = parse_timestamp("2023-07-04").expect("should succeed");
1005        assert_eq!(ts.year(), 2023);
1006        assert_eq!(ts.month(), 7);
1007    }
1008
1009    #[test]
1010    fn test_parse_timestamp_year_only() {
1011        let ts = parse_timestamp("2020");
1012        // Year-only parsing may or may not succeed depending on implementation
1013        // Just ensure we don't panic
1014        let _ = ts;
1015    }
1016
1017    #[test]
1018    fn test_parse_timestamp_invalid_returns_none() {
1019        assert!(parse_timestamp("not-a-date").is_none());
1020        assert!(parse_timestamp("").is_none());
1021    }
1022
1023    #[test]
1024    fn test_parse_timestamp_unix_epoch() {
1025        let ts = parse_timestamp("1700000000");
1026        assert!(ts.is_some());
1027        let ts = ts.expect("should succeed");
1028        assert!(ts.year() >= 2023); // 1700000000 ≈ Nov 2023
1029    }
1030
1031    // ── TemporalRetrievalConfig defaults ──────────────────────────────────
1032
1033    #[test]
1034    fn test_temporal_config_defaults() {
1035        let cfg = TemporalRetrievalConfig::default();
1036        assert!((cfg.temporal_alpha - 0.3).abs() < f64::EPSILON);
1037        assert!(cfg.reference_time.is_none());
1038        assert!(!cfg.temporal_predicates.is_empty());
1039    }
1040
1041    // ── apply with time window ────────────────────────────────────────────
1042
1043    #[test]
1044    fn test_apply_entities_sorted_by_score_descending() {
1045        let config = TemporalRetrievalConfig {
1046            decay_fn: DecayFn::None,
1047            temporal_alpha: 0.0,
1048            ..Default::default()
1049        };
1050        let retriever = TemporalRetriever::new(config);
1051        let entities = vec![
1052            make_entity("http://low", 0.3),
1053            make_entity("http://high", 0.9),
1054            make_entity("http://mid", 0.6),
1055        ];
1056        let result = retriever.apply(entities, &[]).expect("should succeed");
1057        // Scores should be descending
1058        for i in 1..result.len() {
1059            assert!(
1060                result[i - 1].score >= result[i].score,
1061                "Results should be sorted descending"
1062            );
1063        }
1064    }
1065
1066    #[test]
1067    fn test_apply_empty_entities() {
1068        let config = TemporalRetrievalConfig::default();
1069        let retriever = TemporalRetriever::new(config);
1070        let result = retriever.apply(vec![], &[]).expect("should succeed");
1071        assert!(result.is_empty());
1072    }
1073
1074    #[test]
1075    fn test_apply_with_filter_window_no_match_discards_all() {
1076        let past_start = Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap();
1077        let past_end = Utc.with_ymd_and_hms(2021, 1, 1, 0, 0, 0).unwrap();
1078        let config = TemporalRetrievalConfig {
1079            filter_window: Some(TimeWindow::between(past_start, past_end)),
1080            ..Default::default()
1081        };
1082        let retriever = TemporalRetriever::new(config);
1083        let mut e = make_entity("http://recent", 0.8);
1084        // Timestamp in 2023 – outside the 2020-2021 window
1085        e.metadata
1086            .insert("timestamp".to_string(), "2023-01-01".to_string());
1087        let result = retriever.apply(vec![e], &[]).expect("should succeed");
1088        assert!(result.is_empty());
1089    }
1090
1091    // ── temporal_score ────────────────────────────────────────────────────
1092
1093    #[test]
1094    fn test_temporal_score_old_item_lower_than_recent() {
1095        let ref_ts = Utc::now();
1096        let config = TemporalRetrievalConfig {
1097            reference_time: Some(ref_ts),
1098            decay_fn: DecayFn::Exponential { lambda: 0.01 },
1099            ..Default::default()
1100        };
1101        let retriever = TemporalRetriever::new(config);
1102        let recent_score = retriever.temporal_score(ref_ts - Duration::days(10));
1103        let old_score = retriever.temporal_score(ref_ts - Duration::days(500));
1104        assert!(recent_score > old_score, "Recent items should score higher");
1105    }
1106
1107    #[test]
1108    fn test_temporal_score_none_decay_always_one() {
1109        let config = TemporalRetrievalConfig {
1110            reference_time: Some(Utc::now()),
1111            decay_fn: DecayFn::None,
1112            ..Default::default()
1113        };
1114        let retriever = TemporalRetriever::new(config);
1115        let score = retriever.temporal_score(Utc::now() - Duration::days(9999));
1116        assert!((score - 1.0).abs() < 1e-9);
1117    }
1118
1119    // ── filter_triples with temporal predicates ───────────────────────────
1120
1121    #[test]
1122    fn test_filter_triples_keeps_non_temporal_predicates() {
1123        let config = TemporalRetrievalConfig::default();
1124        let retriever = TemporalRetriever::new(config);
1125        let triples = vec![Triple::new(
1126            "http://s",
1127            "http://someOtherPred",
1128            "some value",
1129        )];
1130        let result = retriever
1131            .filter_triples(triples.clone())
1132            .expect("should succeed");
1133        assert_eq!(result.len(), 1);
1134    }
1135
1136    // ── annotate_timestamps ───────────────────────────────────────────────
1137
1138    #[test]
1139    fn test_annotate_timestamps_no_match_leaves_metadata_empty() {
1140        let config = TemporalRetrievalConfig {
1141            temporal_predicates: vec!["http://schema.org/datePublished".to_string()],
1142            ..Default::default()
1143        };
1144        let subgraph = vec![Triple::new(
1145            "http://other_entity",
1146            "http://schema.org/datePublished",
1147            "2023-01-01",
1148        )];
1149        let entities = vec![make_entity("http://entity_no_match", 0.8)];
1150        let annotated = annotate_timestamps(entities, &subgraph, &config);
1151        // No match → temporal_timestamp should not be set
1152        assert!(!annotated[0].metadata.contains_key("temporal_timestamp"));
1153    }
1154
1155    // ── UnknownTimestampPolicy ────────────────────────────────────────────
1156
1157    #[test]
1158    fn test_unknown_timestamp_fixed_weight_multiplies_score() {
1159        let config = TemporalRetrievalConfig {
1160            unknown_policy: UnknownTimestampPolicy::FixedWeight,
1161            unknown_weight: 0.25,
1162            temporal_alpha: 1.0,
1163            decay_fn: DecayFn::None,
1164            ..Default::default()
1165        };
1166        let retriever = TemporalRetriever::new(config);
1167        let entities = vec![make_entity("http://no_ts", 0.8)];
1168        let result = retriever.apply(entities, &[]).expect("should succeed");
1169        // alpha=1.0, decay=none → score = unknown_weight = 0.25
1170        assert_eq!(result.len(), 1);
1171        assert!((result[0].score - 0.25).abs() < 1e-6);
1172    }
1173
1174    #[test]
1175    fn test_unknown_timestamp_keep_policy_preserves_original_score() {
1176        let config = TemporalRetrievalConfig {
1177            unknown_policy: UnknownTimestampPolicy::Keep,
1178            temporal_alpha: 0.0, // don't blend
1179            decay_fn: DecayFn::None,
1180            ..Default::default()
1181        };
1182        let retriever = TemporalRetriever::new(config);
1183        let entities = vec![make_entity("http://no_ts", 0.75)];
1184        let result = retriever.apply(entities, &[]).expect("should succeed");
1185        assert_eq!(result.len(), 1);
1186        assert!((result[0].score - 0.75).abs() < 1e-9);
1187    }
1188}