Skip to main content

oxirs_graphrag/temporal/
knowledge_graph.rs

1//! Time-aware knowledge graph with BFS path finding and entity history tracking.
2//!
3//! # Overview
4//!
5//! [`TemporalKnowledgeGraph`] stores RDF-like triples annotated with validity
6//! intervals (`valid_from` / `valid_to` as Unix-ms timestamps) and a confidence
7//! score.  It exposes point-in-time and range queries, entity set queries, and a
8//! BFS-based temporal path finder.
9//!
10//! [`TemporalGraphRag`] wraps the knowledge graph and adds a simple keyword-based
11//! retrieval layer that returns the most relevant temporal triples for a natural-
12//! language question at a given point in time.
13
14use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
15
16use serde::{Deserialize, Serialize};
17
18// ─────────────────────────────────────────────────────────────────────────────
19// Core types
20// ─────────────────────────────────────────────────────────────────────────────
21
22/// A triple annotated with a validity interval and confidence score.
23#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
24pub struct TemporalTriple {
25    /// Subject URI or blank node identifier
26    pub subject: String,
27    /// Predicate URI
28    pub predicate: String,
29    /// Object URI or literal
30    pub object: String,
31    /// Inclusive start of the validity interval (Unix-ms)
32    pub valid_from: i64,
33    /// Exclusive end of the validity interval (`None` means "still valid")
34    pub valid_to: Option<i64>,
35    /// Confidence in [0.0, 1.0]
36    pub confidence: f64,
37}
38
39impl TemporalTriple {
40    /// Returns `true` if this triple is valid at `timestamp` (Unix-ms).
41    pub fn is_valid_at(&self, timestamp: i64) -> bool {
42        if timestamp < self.valid_from {
43            return false;
44        }
45        match self.valid_to {
46            Some(to) => timestamp < to,
47            None => true,
48        }
49    }
50
51    /// Returns `true` if the triple's validity interval overlaps with `[from, to)`.
52    pub fn overlaps_range(&self, from: i64, to: i64) -> bool {
53        // Triple ends before range starts
54        if let Some(t) = self.valid_to {
55            if t <= from {
56                return false;
57            }
58        }
59        // Triple starts at or after range ends
60        self.valid_from < to
61    }
62}
63
64// ─────────────────────────────────────────────────────────────────────────────
65// TemporalKnowledgeGraph
66// ─────────────────────────────────────────────────────────────────────────────
67
68/// An in-memory knowledge graph that stores triples with temporal validity.
69///
70/// Internally, triples are stored in a `Vec` and a secondary `BTreeMap` index
71/// maps `valid_from` timestamps to the indices of triples that start at that
72/// timestamp.  This allows efficient range scans over the time axis.
73pub struct TemporalKnowledgeGraph {
74    triples: Vec<TemporalTriple>,
75    /// timestamp → indices of triples whose `valid_from` equals the key
76    time_index: BTreeMap<i64, Vec<usize>>,
77}
78
79impl Default for TemporalKnowledgeGraph {
80    fn default() -> Self {
81        Self::new()
82    }
83}
84
85impl TemporalKnowledgeGraph {
86    /// Create an empty knowledge graph.
87    pub fn new() -> Self {
88        Self {
89            triples: Vec::new(),
90            time_index: BTreeMap::new(),
91        }
92    }
93
94    /// Insert a temporal triple and update the time index.
95    pub fn insert(&mut self, triple: TemporalTriple) {
96        let idx = self.triples.len();
97        let ts = triple.valid_from;
98        self.triples.push(triple);
99        self.time_index.entry(ts).or_default().push(idx);
100    }
101
102    /// Return all triples that are valid at `timestamp`.
103    pub fn query_at(&self, timestamp: i64) -> Vec<&TemporalTriple> {
104        self.triples
105            .iter()
106            .filter(|t| t.is_valid_at(timestamp))
107            .collect()
108    }
109
110    /// Return all triples whose validity interval overlaps `[from, to)`.
111    pub fn query_range(&self, from: i64, to: i64) -> Vec<&TemporalTriple> {
112        self.triples
113            .iter()
114            .filter(|t| t.overlaps_range(from, to))
115            .collect()
116    }
117
118    /// Return the set of entity identifiers (subjects and objects) that appear
119    /// in at least one valid triple at `timestamp`.
120    pub fn entities_at(&self, timestamp: i64) -> HashSet<String> {
121        let mut set = HashSet::new();
122        for t in self.query_at(timestamp) {
123            set.insert(t.subject.clone());
124            set.insert(t.object.clone());
125        }
126        set
127    }
128
129    /// Find a path from `from` to `to` using BFS over triples that are valid at
130    /// `at`.  Returns the sequence of entity identifiers along the path
131    /// (inclusive of both endpoints), or `None` if no path exists.
132    pub fn temporal_path(&self, from: &str, to: &str, at: i64) -> Option<Vec<String>> {
133        if from == to {
134            return Some(vec![from.to_string()]);
135        }
136
137        // Build adjacency from triples valid at `at`
138        let mut adj: HashMap<&str, Vec<&str>> = HashMap::new();
139        for t in self.query_at(at) {
140            adj.entry(t.subject.as_str())
141                .or_default()
142                .push(t.object.as_str());
143        }
144
145        // BFS
146        let mut visited: HashSet<&str> = HashSet::new();
147        let mut queue: VecDeque<(&str, Vec<&str>)> = VecDeque::new();
148        queue.push_back((from, vec![from]));
149        visited.insert(from);
150
151        while let Some((node, path)) = queue.pop_front() {
152            if let Some(neighbors) = adj.get(node) {
153                for &neighbor in neighbors {
154                    if neighbor == to {
155                        let mut result: Vec<String> = path.iter().map(|s| s.to_string()).collect();
156                        result.push(to.to_string());
157                        return Some(result);
158                    }
159                    if !visited.contains(neighbor) {
160                        visited.insert(neighbor);
161                        let mut new_path = path.clone();
162                        new_path.push(neighbor);
163                        queue.push_back((neighbor, new_path));
164                    }
165                }
166            }
167        }
168
169        None
170    }
171
172    /// Total number of triples (including those no longer valid).
173    pub fn len(&self) -> usize {
174        self.triples.len()
175    }
176
177    /// Returns `true` if the knowledge graph contains no triples.
178    pub fn is_empty(&self) -> bool {
179        self.triples.is_empty()
180    }
181}
182
183// ─────────────────────────────────────────────────────────────────────────────
184// EntityHistory
185// ─────────────────────────────────────────────────────────────────────────────
186
187/// Summary of an entity's appearances across time.
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct EntityHistory {
190    /// The entity identifier
191    pub entity: String,
192    /// All triples in which this entity appears (as subject or object)
193    pub events: Vec<TemporalTriple>,
194    /// Earliest `valid_from` among all events
195    pub first_seen: Option<i64>,
196    /// Latest `valid_from` among all events
197    pub last_seen: Option<i64>,
198    /// Total number of distinct predicates observed for this entity
199    pub relationship_count: usize,
200}
201
202// ─────────────────────────────────────────────────────────────────────────────
203// TemporalGraphRag
204// ─────────────────────────────────────────────────────────────────────────────
205
206/// A lightweight retrieval engine that wraps [`TemporalKnowledgeGraph`] and
207/// scores triples by keyword overlap with a natural-language question.
208pub struct TemporalGraphRag {
209    kg: TemporalKnowledgeGraph,
210    /// Optional pre-computed embedding cache (placeholder for future GPU-backed
211    /// embedding; stored as flat `Vec<f32>` keyed by entity URI).
212    embedding_cache: HashMap<String, Vec<f32>>,
213}
214
215impl Default for TemporalGraphRag {
216    fn default() -> Self {
217        Self::new()
218    }
219}
220
221impl TemporalGraphRag {
222    /// Create an empty engine.
223    pub fn new() -> Self {
224        Self {
225            kg: TemporalKnowledgeGraph::new(),
226            embedding_cache: HashMap::new(),
227        }
228    }
229
230    /// Ingest a new event into the underlying knowledge graph.
231    ///
232    /// `timestamp` is the Unix-ms start time; the triple is treated as
233    /// "still valid" (no `valid_to`).
234    pub fn ingest_event(
235        &mut self,
236        subject: &str,
237        predicate: &str,
238        object: &str,
239        timestamp: i64,
240        confidence: f64,
241    ) {
242        self.kg.insert(TemporalTriple {
243            subject: subject.to_string(),
244            predicate: predicate.to_string(),
245            object: object.to_string(),
246            valid_from: timestamp,
247            valid_to: None,
248            confidence: confidence.clamp(0.0, 1.0),
249        });
250    }
251
252    /// Retrieve up to `top_k` triples valid at `timestamp` that are most
253    /// relevant to `question`, ranked by keyword overlap score.
254    pub fn query(&self, question: &str, timestamp: i64, top_k: usize) -> Vec<TemporalTriple> {
255        let keywords: Vec<String> = question
256            .split_whitespace()
257            .map(|w| w.to_lowercase())
258            .collect();
259
260        let candidates = self.kg.query_at(timestamp);
261
262        let mut scored: Vec<(f64, &TemporalTriple)> = candidates
263            .into_iter()
264            .map(|t| {
265                let score = self.keyword_score(t, &keywords);
266                (score, t)
267            })
268            .collect();
269
270        // Sort descending by (score, confidence)
271        scored.sort_by(|(sa, ta), (sb, tb)| {
272            sa.partial_cmp(sb)
273                .unwrap_or(std::cmp::Ordering::Equal)
274                .reverse()
275                .then_with(|| {
276                    ta.confidence
277                        .partial_cmp(&tb.confidence)
278                        .unwrap_or(std::cmp::Ordering::Equal)
279                        .reverse()
280                })
281        });
282
283        scored
284            .into_iter()
285            .take(top_k)
286            .map(|(_, t)| t.clone())
287            .collect()
288    }
289
290    /// Compute a keyword overlap score for a triple against a set of query keywords.
291    fn keyword_score(&self, triple: &TemporalTriple, keywords: &[String]) -> f64 {
292        if keywords.is_empty() {
293            return 0.0;
294        }
295
296        let text = format!(
297            "{} {} {}",
298            triple.subject.to_lowercase(),
299            triple.predicate.to_lowercase(),
300            triple.object.to_lowercase()
301        );
302
303        let matched = keywords
304            .iter()
305            .filter(|kw| text.contains(kw.as_str()))
306            .count();
307
308        let raw_score = matched as f64 / keywords.len() as f64;
309        // Blend with confidence
310        raw_score * 0.7 + triple.confidence * 0.3
311    }
312
313    /// Summarise all historical appearances of `entity` in the knowledge graph.
314    pub fn summarize_entity_history(&self, entity: &str) -> EntityHistory {
315        let events: Vec<TemporalTriple> = self
316            .kg
317            .triples
318            .iter()
319            .filter(|t| t.subject == entity || t.object == entity)
320            .cloned()
321            .collect();
322
323        let first_seen = events.iter().map(|t| t.valid_from).min();
324        let last_seen = events.iter().map(|t| t.valid_from).max();
325
326        let relationship_count: HashSet<String> =
327            events.iter().map(|t| t.predicate.clone()).collect();
328        let relationship_count = relationship_count.len();
329
330        EntityHistory {
331            entity: entity.to_string(),
332            events,
333            first_seen,
334            last_seen,
335            relationship_count,
336        }
337    }
338
339    /// Store an embedding vector for `entity` in the cache.
340    pub fn cache_embedding(&mut self, entity: &str, embedding: Vec<f32>) {
341        self.embedding_cache.insert(entity.to_string(), embedding);
342    }
343
344    /// Retrieve a cached embedding, if present.
345    pub fn get_embedding(&self, entity: &str) -> Option<&Vec<f32>> {
346        self.embedding_cache.get(entity)
347    }
348
349    /// Number of events ingested.
350    pub fn event_count(&self) -> usize {
351        self.kg.len()
352    }
353}
354
355// ─────────────────────────────────────────────────────────────────────────────
356// Tests
357// ─────────────────────────────────────────────────────────────────────────────
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    fn make_triple(s: &str, p: &str, o: &str, from: i64, to: Option<i64>) -> TemporalTriple {
364        TemporalTriple {
365            subject: s.to_string(),
366            predicate: p.to_string(),
367            object: o.to_string(),
368            valid_from: from,
369            valid_to: to,
370            confidence: 0.9,
371        }
372    }
373
374    // ── TemporalTriple::is_valid_at ───────────────────────────────────────
375
376    #[test]
377    fn test_is_valid_at_within_interval() {
378        let t = make_triple("s", "p", "o", 100, Some(200));
379        assert!(t.is_valid_at(100));
380        assert!(t.is_valid_at(150));
381        assert!(!t.is_valid_at(99));
382        assert!(!t.is_valid_at(200)); // exclusive end
383    }
384
385    #[test]
386    fn test_is_valid_at_no_end() {
387        let t = make_triple("s", "p", "o", 100, None);
388        assert!(t.is_valid_at(100));
389        assert!(t.is_valid_at(i64::MAX));
390        assert!(!t.is_valid_at(99));
391    }
392
393    // ── TemporalTriple::overlaps_range ────────────────────────────────────
394
395    #[test]
396    fn test_overlaps_range_full_overlap() {
397        let t = make_triple("s", "p", "o", 50, Some(150));
398        assert!(t.overlaps_range(100, 200)); // starts before range end, ends after range start
399    }
400
401    #[test]
402    fn test_overlaps_range_no_overlap_before() {
403        let t = make_triple("s", "p", "o", 0, Some(50));
404        assert!(!t.overlaps_range(100, 200));
405    }
406
407    #[test]
408    fn test_overlaps_range_no_overlap_after() {
409        let t = make_triple("s", "p", "o", 300, None);
410        assert!(!t.overlaps_range(100, 200));
411    }
412
413    #[test]
414    fn test_overlaps_range_open_end() {
415        let t = make_triple("s", "p", "o", 50, None);
416        assert!(t.overlaps_range(100, 200)); // always overlaps when no end
417    }
418
419    // ── TemporalKnowledgeGraph::insert and len ────────────────────────────
420
421    #[test]
422    fn test_insert_increases_len() {
423        let mut kg = TemporalKnowledgeGraph::new();
424        assert_eq!(kg.len(), 0);
425        assert!(kg.is_empty());
426        kg.insert(make_triple("s", "p", "o", 0, None));
427        assert_eq!(kg.len(), 1);
428        assert!(!kg.is_empty());
429    }
430
431    #[test]
432    fn test_insert_updates_time_index() {
433        let mut kg = TemporalKnowledgeGraph::new();
434        kg.insert(make_triple("s", "p", "o", 1000, None));
435        assert!(kg.time_index.contains_key(&1000));
436    }
437
438    // ── TemporalKnowledgeGraph::query_at ─────────────────────────────────
439
440    #[test]
441    fn test_query_at_returns_valid_triples() {
442        let mut kg = TemporalKnowledgeGraph::new();
443        kg.insert(make_triple("a", "p", "b", 0, Some(100)));
444        kg.insert(make_triple("c", "p", "d", 50, None));
445        kg.insert(make_triple("e", "p", "f", 200, None));
446
447        let valid = kg.query_at(75);
448        assert_eq!(valid.len(), 2);
449    }
450
451    #[test]
452    fn test_query_at_empty_graph() {
453        let kg = TemporalKnowledgeGraph::new();
454        assert!(kg.query_at(0).is_empty());
455    }
456
457    #[test]
458    fn test_query_at_excludes_expired() {
459        let mut kg = TemporalKnowledgeGraph::new();
460        kg.insert(make_triple("a", "p", "b", 0, Some(50)));
461        let valid = kg.query_at(100);
462        assert!(valid.is_empty());
463    }
464
465    // ── TemporalKnowledgeGraph::query_range ───────────────────────────────
466
467    #[test]
468    fn test_query_range_returns_overlapping() {
469        let mut kg = TemporalKnowledgeGraph::new();
470        kg.insert(make_triple("a", "p", "b", 0, Some(100))); // overlaps [50,150)
471        kg.insert(make_triple("c", "p", "d", 80, Some(200))); // overlaps [50,150)
472        kg.insert(make_triple("e", "p", "f", 200, None)); // does NOT overlap [50,150)
473
474        let result = kg.query_range(50, 150);
475        assert_eq!(result.len(), 2);
476    }
477
478    #[test]
479    fn test_query_range_no_overlap() {
480        let mut kg = TemporalKnowledgeGraph::new();
481        kg.insert(make_triple("a", "p", "b", 0, Some(10)));
482        let result = kg.query_range(100, 200);
483        assert!(result.is_empty());
484    }
485
486    // ── TemporalKnowledgeGraph::entities_at ───────────────────────────────
487
488    #[test]
489    fn test_entities_at_collects_subjects_and_objects() {
490        let mut kg = TemporalKnowledgeGraph::new();
491        kg.insert(make_triple("Alice", "knows", "Bob", 0, None));
492        kg.insert(make_triple("Bob", "likes", "Carol", 0, None));
493
494        let entities = kg.entities_at(0);
495        assert!(entities.contains("Alice"));
496        assert!(entities.contains("Bob"));
497        assert!(entities.contains("Carol"));
498    }
499
500    #[test]
501    fn test_entities_at_respects_timestamp() {
502        let mut kg = TemporalKnowledgeGraph::new();
503        kg.insert(make_triple("Alice", "knows", "Bob", 0, Some(50)));
504        kg.insert(make_triple("Carol", "knows", "Dave", 100, None));
505
506        let entities = kg.entities_at(75);
507        // "Alice"/"Bob" expired, "Carol"/"Dave" not yet started
508        assert!(entities.is_empty());
509    }
510
511    // ── TemporalKnowledgeGraph::temporal_path ─────────────────────────────
512
513    #[test]
514    fn test_temporal_path_direct_edge() {
515        let mut kg = TemporalKnowledgeGraph::new();
516        kg.insert(make_triple("A", "p", "B", 0, None));
517
518        let path = kg.temporal_path("A", "B", 0).expect("should succeed");
519        assert_eq!(path, vec!["A", "B"]);
520    }
521
522    #[test]
523    fn test_temporal_path_multi_hop() {
524        let mut kg = TemporalKnowledgeGraph::new();
525        kg.insert(make_triple("A", "p", "B", 0, None));
526        kg.insert(make_triple("B", "p", "C", 0, None));
527        kg.insert(make_triple("C", "p", "D", 0, None));
528
529        let path = kg.temporal_path("A", "D", 0).expect("should succeed");
530        assert_eq!(path.first().map(|s| s.as_str()), Some("A"));
531        assert_eq!(path.last().map(|s| s.as_str()), Some("D"));
532        assert!(path.len() >= 2);
533    }
534
535    #[test]
536    fn test_temporal_path_no_path() {
537        let mut kg = TemporalKnowledgeGraph::new();
538        kg.insert(make_triple("A", "p", "B", 0, None));
539        // C is disconnected
540        assert!(kg.temporal_path("A", "C", 0).is_none());
541    }
542
543    #[test]
544    fn test_temporal_path_same_node() {
545        let kg = TemporalKnowledgeGraph::new();
546        let path = kg.temporal_path("A", "A", 0).expect("should succeed");
547        assert_eq!(path, vec!["A"]);
548    }
549
550    #[test]
551    fn test_temporal_path_ignores_future_triples() {
552        let mut kg = TemporalKnowledgeGraph::new();
553        kg.insert(make_triple("A", "p", "B", 1000, None)); // not valid at t=0
554        assert!(kg.temporal_path("A", "B", 0).is_none());
555    }
556
557    // ── TemporalGraphRag::ingest_event ────────────────────────────────────
558
559    #[test]
560    fn test_ingest_event_stores_triple() {
561        let mut rag = TemporalGraphRag::new();
562        rag.ingest_event("Alice", "knows", "Bob", 1000, 0.9);
563        assert_eq!(rag.event_count(), 1);
564    }
565
566    #[test]
567    fn test_ingest_event_clamps_confidence() {
568        let mut rag = TemporalGraphRag::new();
569        rag.ingest_event("A", "p", "B", 0, 1.5); // > 1.0 → clamp to 1.0
570        rag.ingest_event("C", "p", "D", 0, -0.5); // < 0.0 → clamp to 0.0
571        let triples = rag.kg.query_at(0);
572        for t in triples {
573            assert!(t.confidence >= 0.0 && t.confidence <= 1.0);
574        }
575    }
576
577    // ── TemporalGraphRag::query ───────────────────────────────────────────
578
579    #[test]
580    fn test_query_returns_relevant_triple() {
581        let mut rag = TemporalGraphRag::new();
582        rag.ingest_event("Apple", "releases", "iPhone", 1000, 0.9);
583        rag.ingest_event("Google", "releases", "Pixel", 1000, 0.8);
584
585        let results = rag.query("Apple iPhone", 1000, 5);
586        assert!(!results.is_empty());
587        // The Apple/iPhone triple should rank first
588        assert_eq!(results[0].subject, "Apple");
589    }
590
591    #[test]
592    fn test_query_respects_top_k() {
593        let mut rag = TemporalGraphRag::new();
594        for i in 0..10 {
595            rag.ingest_event(&format!("S{i}"), "p", &format!("O{i}"), 0, 0.9);
596        }
597        let results = rag.query("any", 0, 3);
598        assert!(results.len() <= 3);
599    }
600
601    #[test]
602    fn test_query_respects_timestamp() {
603        let mut rag = TemporalGraphRag::new();
604        rag.ingest_event("Past", "event", "X", 0, 0.9);
605        // Query at timestamp before the event
606        let results = rag.query("Past event X", -1, 5);
607        assert!(results.is_empty());
608    }
609
610    #[test]
611    fn test_query_empty_graph_returns_empty() {
612        let rag = TemporalGraphRag::new();
613        let results = rag.query("anything", 0, 5);
614        assert!(results.is_empty());
615    }
616
617    // ── TemporalGraphRag::summarize_entity_history ────────────────────────
618
619    #[test]
620    fn test_summarize_entity_history_basic() {
621        let mut rag = TemporalGraphRag::new();
622        rag.ingest_event("Alice", "knows", "Bob", 100, 0.9);
623        rag.ingest_event("Alice", "likes", "Carol", 200, 0.8);
624        rag.ingest_event("Dave", "knows", "Alice", 300, 0.7);
625
626        let history = rag.summarize_entity_history("Alice");
627        assert_eq!(history.entity, "Alice");
628        assert_eq!(history.events.len(), 3);
629        assert_eq!(history.first_seen, Some(100));
630        assert_eq!(history.last_seen, Some(300));
631        assert_eq!(history.relationship_count, 2); // "knows" and "likes"
632    }
633
634    #[test]
635    fn test_summarize_entity_history_unknown_entity() {
636        let rag = TemporalGraphRag::new();
637        let history = rag.summarize_entity_history("Unknown");
638        assert!(history.events.is_empty());
639        assert!(history.first_seen.is_none());
640        assert!(history.last_seen.is_none());
641        assert_eq!(history.relationship_count, 0);
642    }
643
644    // ── TemporalGraphRag::embedding cache ─────────────────────────────────
645
646    #[test]
647    fn test_embedding_cache_roundtrip() {
648        let mut rag = TemporalGraphRag::new();
649        let embedding = vec![0.1_f32, 0.2, 0.3];
650        rag.cache_embedding("Alice", embedding.clone());
651        assert_eq!(rag.get_embedding("Alice"), Some(&embedding));
652        assert!(rag.get_embedding("Bob").is_none());
653    }
654}