Skip to main content

graphrag_core/graph/
temporal.rs

1//! Temporal Graph Analysis
2//!
3//! This module provides analysis capabilities for time-evolving graphs:
4//! - Temporal graph representation
5//! - Snapshot-based analysis
6//! - Evolution metrics
7//! - Temporal community detection
8//! - Time-aware path finding
9//!
10//! ## Use Cases
11//!
12//! - Social network evolution tracking
13//! - Knowledge graph versioning
14//! - Anomaly detection in dynamic networks
15//! - Trend analysis and forecasting
16//! - Event detection in temporal data
17
18use serde::{Deserialize, Serialize};
19use std::collections::{BTreeMap, HashMap, HashSet};
20
21/// Time range for temporal validity
22///
23/// Represents when an entity exists or a relationship is valid in the real world.
24/// Uses Unix timestamps (seconds since epoch). Special values:
25/// - i64::MIN represents unknown/unspecified start
26/// - i64::MAX represents ongoing/current (no end)
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28pub struct TemporalRange {
29    /// Start of validity (Unix timestamp)
30    pub start: i64,
31    /// End of validity (Unix timestamp)
32    pub end: i64,
33}
34
35impl TemporalRange {
36    /// Create a new temporal range
37    pub fn new(start: i64, end: i64) -> Self {
38        Self { start, end }
39    }
40
41    /// Check if a timestamp falls within this range
42    pub fn contains(&self, timestamp: i64) -> bool {
43        timestamp >= self.start && timestamp <= self.end
44    }
45
46    /// Check if this range overlaps with another
47    pub fn overlaps(&self, other: &TemporalRange) -> bool {
48        self.start <= other.end && self.end >= other.start
49    }
50
51    /// Get the duration of this range in seconds
52    pub fn duration(&self) -> i64 {
53        self.end.saturating_sub(self.start)
54    }
55}
56
57/// Type of temporal relationship between entities
58///
59/// Defines how entities are related in time, including causal relationships.
60/// Based on Allen's Interval Algebra and causal reasoning.
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
62pub enum TemporalRelationType {
63    /// A occurred before B (temporal precedence)
64    Before,
65    /// A occurred during B (temporal containment)
66    During,
67    /// A occurred after B (temporal succession)
68    After,
69    /// A and B occurred simultaneously
70    SimultaneousWith,
71    /// A directly caused B (strong causal link)
72    Caused,
73    /// A enabled or facilitated B (weak causal link)
74    Enabled,
75    /// A prevented or inhibited B (negative causal link)
76    Prevented,
77    /// A and B mutually influenced each other
78    Correlated,
79}
80
81impl TemporalRelationType {
82    /// Check if this is a causal relationship type
83    pub fn is_causal(&self) -> bool {
84        matches!(
85            self,
86            TemporalRelationType::Caused
87                | TemporalRelationType::Enabled
88                | TemporalRelationType::Prevented
89        )
90    }
91
92    /// Get the strength weight for this relationship type
93    pub fn default_strength(&self) -> f32 {
94        match self {
95            TemporalRelationType::Caused => 0.9,
96            TemporalRelationType::Enabled => 0.6,
97            TemporalRelationType::Prevented => 0.7,
98            TemporalRelationType::Correlated => 0.5,
99            _ => 0.3, // Temporal-only relationships have lower default strength
100        }
101    }
102}
103
104/// Temporal edge with timestamp
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct TemporalEdge {
107    /// Source node
108    pub source: String,
109    /// Target node
110    pub target: String,
111    /// Edge type/label
112    pub edge_type: String,
113    /// Timestamp (Unix timestamp)
114    pub timestamp: i64,
115    /// Edge weight
116    pub weight: f32,
117    /// Start time (optional, for interval-based edges)
118    pub start_time: Option<i64>,
119    /// End time (optional, for interval-based edges)
120    pub end_time: Option<i64>,
121}
122
123impl TemporalEdge {
124    /// Check if edge is active at given timestamp
125    pub fn is_active_at(&self, timestamp: i64) -> bool {
126        if let (Some(start), Some(end)) = (self.start_time, self.end_time) {
127            timestamp >= start && timestamp <= end
128        } else {
129            // Point-in-time edge
130            self.timestamp == timestamp
131        }
132    }
133
134    /// Check if edge is active in time range
135    pub fn is_active_in_range(&self, start: i64, end: i64) -> bool {
136        if let (Some(edge_start), Some(edge_end)) = (self.start_time, self.end_time) {
137            // Interval overlap check
138            edge_start <= end && edge_end >= start
139        } else {
140            // Point-in-time edge
141            self.timestamp >= start && self.timestamp <= end
142        }
143    }
144}
145
146/// Graph snapshot at specific time
147#[derive(Debug, Clone)]
148pub struct Snapshot {
149    /// Snapshot timestamp
150    pub timestamp: i64,
151    /// Nodes active in this snapshot
152    pub nodes: HashSet<String>,
153    /// Edges active in this snapshot
154    pub edges: Vec<TemporalEdge>,
155    /// Edge count
156    pub edge_count: usize,
157    /// Node count
158    pub node_count: usize,
159}
160
161impl Snapshot {
162    /// Create snapshot from temporal edges
163    pub fn from_edges(timestamp: i64, edges: Vec<TemporalEdge>) -> Self {
164        let mut nodes = HashSet::new();
165
166        for edge in &edges {
167            nodes.insert(edge.source.clone());
168            nodes.insert(edge.target.clone());
169        }
170
171        let node_count = nodes.len();
172        let edge_count = edges.len();
173
174        Self {
175            timestamp,
176            nodes,
177            edges,
178            edge_count,
179            node_count,
180        }
181    }
182
183    /// Get node degree in snapshot
184    pub fn node_degree(&self, node: &str) -> usize {
185        self.edges
186            .iter()
187            .filter(|e| e.source == node || e.target == node)
188            .count()
189    }
190
191    /// Get graph density
192    pub fn density(&self) -> f32 {
193        if self.node_count < 2 {
194            return 0.0;
195        }
196
197        let max_edges = (self.node_count * (self.node_count - 1)) / 2;
198        self.edge_count as f32 / max_edges as f32
199    }
200}
201
202/// Temporal graph
203pub struct TemporalGraph {
204    /// All temporal edges
205    edges: Vec<TemporalEdge>,
206    /// Edge index by timestamp
207    edge_index: BTreeMap<i64, Vec<usize>>,
208    /// Node first appearance time
209    node_first_seen: HashMap<String, i64>,
210    /// Node last seen time
211    node_last_seen: HashMap<String, i64>,
212}
213
214impl TemporalGraph {
215    /// Create new temporal graph
216    pub fn new() -> Self {
217        Self {
218            edges: Vec::new(),
219            edge_index: BTreeMap::new(),
220            node_first_seen: HashMap::new(),
221            node_last_seen: HashMap::new(),
222        }
223    }
224
225    /// Add temporal edge
226    pub fn add_edge(&mut self, edge: TemporalEdge) {
227        let timestamp = edge.timestamp;
228        let edge_idx = self.edges.len();
229
230        // Update node timestamps
231        self.update_node_timestamp(&edge.source, timestamp);
232        self.update_node_timestamp(&edge.target, timestamp);
233
234        // Add to edge index
235        self.edge_index.entry(timestamp).or_default().push(edge_idx);
236
237        self.edges.push(edge);
238    }
239
240    /// Update node first/last seen timestamps
241    fn update_node_timestamp(&mut self, node: &str, timestamp: i64) {
242        self.node_first_seen
243            .entry(node.to_string())
244            .and_modify(|t| *t = (*t).min(timestamp))
245            .or_insert(timestamp);
246
247        self.node_last_seen
248            .entry(node.to_string())
249            .and_modify(|t| *t = (*t).max(timestamp))
250            .or_insert(timestamp);
251    }
252
253    /// Get snapshot at specific timestamp
254    pub fn snapshot_at(&self, timestamp: i64) -> Snapshot {
255        let edges: Vec<TemporalEdge> = self
256            .edges
257            .iter()
258            .filter(|e| e.is_active_at(timestamp))
259            .cloned()
260            .collect();
261
262        Snapshot::from_edges(timestamp, edges)
263    }
264
265    /// Get snapshot for time range
266    pub fn snapshot_range(&self, start: i64, end: i64) -> Snapshot {
267        let edges: Vec<TemporalEdge> = self
268            .edges
269            .iter()
270            .filter(|e| e.is_active_in_range(start, end))
271            .cloned()
272            .collect();
273
274        Snapshot::from_edges((start + end) / 2, edges)
275    }
276
277    /// Get all timestamps (discrete time points)
278    pub fn timestamps(&self) -> Vec<i64> {
279        self.edge_index.keys().copied().collect()
280    }
281
282    /// Get time range
283    pub fn time_range(&self) -> Option<(i64, i64)> {
284        if self.edges.is_empty() {
285            return None;
286        }
287
288        let min = self
289            .edges
290            .iter()
291            .map(|e| e.timestamp)
292            .min()
293            .expect("non-empty iter");
294        let max = self
295            .edges
296            .iter()
297            .map(|e| e.timestamp)
298            .max()
299            .expect("non-empty iter");
300
301        Some((min, max))
302    }
303
304    /// Get node lifetime
305    pub fn node_lifetime(&self, node: &str) -> Option<(i64, i64)> {
306        let first = self.node_first_seen.get(node)?;
307        let last = self.node_last_seen.get(node)?;
308
309        Some((*first, *last))
310    }
311
312    /// Get all nodes
313    pub fn nodes(&self) -> HashSet<String> {
314        self.node_first_seen.keys().cloned().collect()
315    }
316
317    /// Get edge count
318    pub fn edge_count(&self) -> usize {
319        self.edges.len()
320    }
321
322    /// Get node count
323    pub fn node_count(&self) -> usize {
324        self.node_first_seen.len()
325    }
326}
327
328impl Default for TemporalGraph {
329    fn default() -> Self {
330        Self::new()
331    }
332}
333
334/// Temporal query parameters
335#[derive(Debug, Clone, Serialize, Deserialize)]
336pub struct TemporalQuery {
337    /// Start timestamp
338    pub start_time: i64,
339    /// End timestamp
340    pub end_time: i64,
341    /// Granularity (e.g., daily, weekly)
342    pub granularity: i64,
343    /// Node filter (optional)
344    pub nodes: Option<Vec<String>>,
345    /// Edge type filter (optional)
346    pub edge_types: Option<Vec<String>>,
347}
348
349/// Temporal analytics engine
350pub struct TemporalAnalytics {
351    graph: TemporalGraph,
352}
353
354impl TemporalAnalytics {
355    /// Create analytics engine
356    pub fn new(graph: TemporalGraph) -> Self {
357        Self { graph }
358    }
359
360    /// Calculate evolution metrics over time
361    pub fn evolution_metrics(&self, query: &TemporalQuery) -> Vec<EvolutionMetrics> {
362        let mut metrics = Vec::new();
363        let mut current_time = query.start_time;
364
365        while current_time <= query.end_time {
366            let next_time = current_time + query.granularity;
367            let snapshot = self.graph.snapshot_range(current_time, next_time);
368
369            let metric = EvolutionMetrics {
370                timestamp: current_time,
371                node_count: snapshot.node_count,
372                edge_count: snapshot.edge_count,
373                density: snapshot.density(),
374                avg_degree: self.calculate_avg_degree(&snapshot),
375            };
376
377            metrics.push(metric);
378            current_time = next_time;
379        }
380
381        metrics
382    }
383
384    /// Calculate average node degree in snapshot
385    fn calculate_avg_degree(&self, snapshot: &Snapshot) -> f32 {
386        if snapshot.node_count == 0 {
387            return 0.0;
388        }
389
390        let total_degree: usize = snapshot.nodes.iter().map(|n| snapshot.node_degree(n)).sum();
391
392        total_degree as f32 / snapshot.node_count as f32
393    }
394
395    /// Detect node churn (nodes appearing/disappearing)
396    pub fn node_churn(&self, query: &TemporalQuery) -> NodeChurn {
397        let start_snapshot = self.graph.snapshot_at(query.start_time);
398        let end_snapshot = self.graph.snapshot_at(query.end_time);
399
400        let added: HashSet<_> = end_snapshot
401            .nodes
402            .difference(&start_snapshot.nodes)
403            .cloned()
404            .collect();
405
406        let removed: HashSet<_> = start_snapshot
407            .nodes
408            .difference(&end_snapshot.nodes)
409            .cloned()
410            .collect();
411
412        let stable: HashSet<_> = start_snapshot
413            .nodes
414            .intersection(&end_snapshot.nodes)
415            .cloned()
416            .collect();
417
418        let added_count = added.len();
419        let removed_count = removed.len();
420        let stable_count = stable.len();
421
422        NodeChurn {
423            added: added.into_iter().collect(),
424            removed: removed.into_iter().collect(),
425            stable: stable.into_iter().collect(),
426            added_count,
427            removed_count,
428            stable_count,
429        }
430    }
431
432    /// Find nodes with highest activity growth
433    pub fn top_growing_nodes(&self, query: &TemporalQuery, top_k: usize) -> Vec<(String, f32)> {
434        let start_snapshot = self
435            .graph
436            .snapshot_range(query.start_time, query.start_time + query.granularity);
437        let end_snapshot = self
438            .graph
439            .snapshot_range(query.end_time - query.granularity, query.end_time);
440
441        let mut growth_scores: Vec<(String, f32)> = Vec::new();
442
443        for node in &end_snapshot.nodes {
444            let start_degree = start_snapshot.node_degree(node) as f32;
445            let end_degree = end_snapshot.node_degree(node) as f32;
446
447            let growth = if start_degree > 0.0 {
448                (end_degree - start_degree) / start_degree
449            } else {
450                end_degree
451            };
452
453            growth_scores.push((node.clone(), growth));
454        }
455
456        growth_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
457        growth_scores.truncate(top_k);
458
459        growth_scores
460    }
461
462    /// Get temporal centrality (activity over time)
463    pub fn temporal_centrality(&self, node: &str, query: &TemporalQuery) -> Vec<(i64, f32)> {
464        let mut centrality = Vec::new();
465        let mut current_time = query.start_time;
466
467        while current_time <= query.end_time {
468            let next_time = current_time + query.granularity;
469            let snapshot = self.graph.snapshot_range(current_time, next_time);
470
471            let degree = snapshot.node_degree(node) as f32;
472            let centrality_score = if snapshot.node_count > 1 {
473                degree / (snapshot.node_count - 1) as f32
474            } else {
475                0.0
476            };
477
478            centrality.push((current_time, centrality_score));
479            current_time = next_time;
480        }
481
482        centrality
483    }
484}
485
486/// Evolution metrics over time
487#[derive(Debug, Clone, Serialize, Deserialize)]
488pub struct EvolutionMetrics {
489    /// Timestamp
490    pub timestamp: i64,
491    /// Number of nodes
492    pub node_count: usize,
493    /// Number of edges
494    pub edge_count: usize,
495    /// Graph density
496    pub density: f32,
497    /// Average degree
498    pub avg_degree: f32,
499}
500
501/// Node churn analysis
502#[derive(Debug, Clone)]
503pub struct NodeChurn {
504    /// Nodes added
505    pub added: Vec<String>,
506    /// Nodes removed
507    pub removed: Vec<String>,
508    /// Stable nodes
509    pub stable: Vec<String>,
510    /// Count of added nodes
511    pub added_count: usize,
512    /// Count of removed nodes
513    pub removed_count: usize,
514    /// Count of stable nodes
515    pub stable_count: usize,
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521
522    fn create_test_temporal_graph() -> TemporalGraph {
523        let mut graph = TemporalGraph::new();
524
525        // Add edges over time
526        graph.add_edge(TemporalEdge {
527            source: "A".to_string(),
528            target: "B".to_string(),
529            edge_type: "knows".to_string(),
530            timestamp: 100,
531            weight: 1.0,
532            start_time: Some(100),
533            end_time: Some(200),
534        });
535
536        graph.add_edge(TemporalEdge {
537            source: "B".to_string(),
538            target: "C".to_string(),
539            edge_type: "knows".to_string(),
540            timestamp: 150,
541            weight: 1.0,
542            start_time: Some(150),
543            end_time: Some(250),
544        });
545
546        graph.add_edge(TemporalEdge {
547            source: "A".to_string(),
548            target: "C".to_string(),
549            edge_type: "knows".to_string(),
550            timestamp: 200,
551            weight: 1.0,
552            start_time: Some(200),
553            end_time: Some(300),
554        });
555
556        graph
557    }
558
559    #[test]
560    fn test_temporal_graph_creation() {
561        let graph = create_test_temporal_graph();
562        assert_eq!(graph.edge_count(), 3);
563        assert_eq!(graph.node_count(), 3);
564    }
565
566    #[test]
567    fn test_snapshot_at_timestamp() {
568        let graph = create_test_temporal_graph();
569        let snapshot = graph.snapshot_at(150);
570
571        assert!(snapshot.node_count > 0);
572        assert!(snapshot.edge_count > 0);
573    }
574
575    #[test]
576    fn test_snapshot_range() {
577        let graph = create_test_temporal_graph();
578        let snapshot = graph.snapshot_range(100, 200);
579
580        assert_eq!(snapshot.node_count, 3);
581        assert!(snapshot.edge_count >= 2);
582    }
583
584    #[test]
585    fn test_time_range() {
586        let graph = create_test_temporal_graph();
587        let (min, max) = graph.time_range().unwrap();
588
589        assert_eq!(min, 100);
590        assert_eq!(max, 200);
591    }
592
593    #[test]
594    fn test_node_lifetime() {
595        let graph = create_test_temporal_graph();
596        let (first, last) = graph.node_lifetime("A").unwrap();
597
598        assert_eq!(first, 100);
599        assert_eq!(last, 200);
600    }
601
602    #[test]
603    fn test_evolution_metrics() {
604        let graph = create_test_temporal_graph();
605        let analytics = TemporalAnalytics::new(graph);
606
607        let query = TemporalQuery {
608            start_time: 100,
609            end_time: 300,
610            granularity: 50,
611            nodes: None,
612            edge_types: None,
613        };
614
615        let metrics = analytics.evolution_metrics(&query);
616        assert!(!metrics.is_empty());
617
618        for metric in &metrics {
619            assert!(metric.timestamp >= 100);
620            assert!(metric.timestamp <= 300);
621        }
622    }
623
624    #[test]
625    fn test_node_churn() {
626        let mut graph = TemporalGraph::new();
627
628        // Initial nodes: A, B
629        graph.add_edge(TemporalEdge {
630            source: "A".to_string(),
631            target: "B".to_string(),
632            edge_type: "knows".to_string(),
633            timestamp: 100,
634            weight: 1.0,
635            start_time: None,
636            end_time: None,
637        });
638
639        // Later: B, C (A removed, C added)
640        graph.add_edge(TemporalEdge {
641            source: "B".to_string(),
642            target: "C".to_string(),
643            edge_type: "knows".to_string(),
644            timestamp: 200,
645            weight: 1.0,
646            start_time: None,
647            end_time: None,
648        });
649
650        let analytics = TemporalAnalytics::new(graph);
651        let query = TemporalQuery {
652            start_time: 100,
653            end_time: 200,
654            granularity: 50,
655            nodes: None,
656            edge_types: None,
657        };
658
659        let churn = analytics.node_churn(&query);
660        assert!(churn.added.contains(&"C".to_string()) || churn.stable_count > 0);
661    }
662
663    #[test]
664    fn test_temporal_edge_is_active() {
665        let edge = TemporalEdge {
666            source: "A".to_string(),
667            target: "B".to_string(),
668            edge_type: "knows".to_string(),
669            timestamp: 100,
670            weight: 1.0,
671            start_time: Some(100),
672            end_time: Some(200),
673        };
674
675        assert!(edge.is_active_at(150));
676        assert!(edge.is_active_at(100));
677        assert!(edge.is_active_at(200));
678        assert!(!edge.is_active_at(50));
679        assert!(!edge.is_active_at(250));
680
681        assert!(edge.is_active_in_range(90, 110));
682        assert!(edge.is_active_in_range(150, 250));
683        assert!(!edge.is_active_in_range(50, 90));
684    }
685
686    // Phase 1.2: Temporal Fields Tests
687    #[test]
688    fn test_temporal_range_creation() {
689        let range = TemporalRange::new(100, 200);
690        assert_eq!(range.start, 100);
691        assert_eq!(range.end, 200);
692    }
693
694    #[test]
695    fn test_temporal_range_contains() {
696        let range = TemporalRange::new(100, 200);
697
698        assert!(range.contains(100));
699        assert!(range.contains(150));
700        assert!(range.contains(200));
701        assert!(!range.contains(50));
702        assert!(!range.contains(250));
703    }
704
705    #[test]
706    fn test_temporal_range_overlaps() {
707        let range1 = TemporalRange::new(100, 200);
708        let range2 = TemporalRange::new(150, 250);
709        let range3 = TemporalRange::new(250, 300);
710
711        assert!(range1.overlaps(&range2));
712        assert!(range2.overlaps(&range1));
713        assert!(!range1.overlaps(&range3));
714        assert!(!range3.overlaps(&range1));
715    }
716
717    #[test]
718    fn test_temporal_range_duration() {
719        let range = TemporalRange::new(100, 200);
720        assert_eq!(range.duration(), 100);
721
722        let instant = TemporalRange::new(100, 100);
723        assert_eq!(instant.duration(), 0);
724    }
725
726    #[test]
727    fn test_temporal_range_serialization() {
728        let range = TemporalRange::new(100, 200);
729        let json = serde_json::to_string(&range).unwrap();
730        assert!(json.contains("100"));
731        assert!(json.contains("200"));
732
733        let deserialized: TemporalRange = serde_json::from_str(&json).unwrap();
734        assert_eq!(deserialized.start, 100);
735        assert_eq!(deserialized.end, 200);
736    }
737
738    #[test]
739    fn test_temporal_relation_type_is_causal() {
740        assert!(TemporalRelationType::Caused.is_causal());
741        assert!(TemporalRelationType::Enabled.is_causal());
742        assert!(TemporalRelationType::Prevented.is_causal());
743
744        assert!(!TemporalRelationType::Before.is_causal());
745        assert!(!TemporalRelationType::During.is_causal());
746        assert!(!TemporalRelationType::After.is_causal());
747        assert!(!TemporalRelationType::SimultaneousWith.is_causal());
748    }
749
750    #[test]
751    fn test_temporal_relation_type_default_strength() {
752        assert_eq!(TemporalRelationType::Caused.default_strength(), 0.9);
753        assert_eq!(TemporalRelationType::Enabled.default_strength(), 0.6);
754        assert_eq!(TemporalRelationType::Prevented.default_strength(), 0.7);
755        assert_eq!(TemporalRelationType::Correlated.default_strength(), 0.5);
756
757        // Non-causal should have lower strength
758        assert!(TemporalRelationType::Before.default_strength() < 0.5);
759    }
760
761    #[test]
762    fn test_temporal_relation_type_serialization() {
763        let rel_type = TemporalRelationType::Caused;
764        let json = serde_json::to_string(&rel_type).unwrap();
765        assert!(json.contains("Caused"));
766
767        let deserialized: TemporalRelationType = serde_json::from_str(&json).unwrap();
768        assert_eq!(deserialized, TemporalRelationType::Caused);
769    }
770}