Skip to main content

graphrag_core/graph/
temporal.rs

1//! Temporal Graph Analysis
2//!
3//! This module provides analysis capabilities for time-evolving graphs:
4//! - Temporal graph representation
5//! - Snapshot-based analysis
6//! - Evolution metrics
7//! - Temporal community detection
8//! - Time-aware path finding
9//!
10//! ## Use Cases
11//!
12//! - Social network evolution tracking
13//! - Knowledge graph versioning
14//! - Anomaly detection in dynamic networks
15//! - Trend analysis and forecasting
16//! - Event detection in temporal data
17
18use serde::{Deserialize, Serialize};
19use std::collections::{BTreeMap, HashMap, HashSet};
20
21/// Time range for temporal validity
22///
23/// Represents when an entity exists or a relationship is valid in the real world.
24/// Uses Unix timestamps (seconds since epoch). Special values:
25/// - i64::MIN represents unknown/unspecified start
26/// - i64::MAX represents ongoing/current (no end)
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28pub struct TemporalRange {
29    /// Start of validity (Unix timestamp)
30    pub start: i64,
31    /// End of validity (Unix timestamp)
32    pub end: i64,
33}
34
35impl TemporalRange {
36    /// Create a new temporal range
37    pub fn new(start: i64, end: i64) -> Self {
38        Self { start, end }
39    }
40
41    /// Check if a timestamp falls within this range
42    pub fn contains(&self, timestamp: i64) -> bool {
43        timestamp >= self.start && timestamp <= self.end
44    }
45
46    /// Check if this range overlaps with another
47    pub fn overlaps(&self, other: &TemporalRange) -> bool {
48        self.start <= other.end && self.end >= other.start
49    }
50
51    /// Get the duration of this range in seconds
52    pub fn duration(&self) -> i64 {
53        self.end.saturating_sub(self.start)
54    }
55}
56
57/// Type of temporal relationship between entities
58///
59/// Defines how entities are related in time, including causal relationships.
60/// Based on Allen's Interval Algebra and causal reasoning.
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
62pub enum TemporalRelationType {
63    /// A occurred before B (temporal precedence)
64    Before,
65    /// A occurred during B (temporal containment)
66    During,
67    /// A occurred after B (temporal succession)
68    After,
69    /// A and B occurred simultaneously
70    SimultaneousWith,
71    /// A directly caused B (strong causal link)
72    Caused,
73    /// A enabled or facilitated B (weak causal link)
74    Enabled,
75    /// A prevented or inhibited B (negative causal link)
76    Prevented,
77    /// A and B mutually influenced each other
78    Correlated,
79}
80
81impl TemporalRelationType {
82    /// Check if this is a causal relationship type
83    pub fn is_causal(&self) -> bool {
84        matches!(
85            self,
86            TemporalRelationType::Caused
87                | TemporalRelationType::Enabled
88                | TemporalRelationType::Prevented
89        )
90    }
91
92    /// Get the strength weight for this relationship type
93    pub fn default_strength(&self) -> f32 {
94        match self {
95            TemporalRelationType::Caused => 0.9,
96            TemporalRelationType::Enabled => 0.6,
97            TemporalRelationType::Prevented => 0.7,
98            TemporalRelationType::Correlated => 0.5,
99            _ => 0.3, // Temporal-only relationships have lower default strength
100        }
101    }
102}
103
104/// Temporal edge with timestamp
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct TemporalEdge {
107    /// Source node
108    pub source: String,
109    /// Target node
110    pub target: String,
111    /// Edge type/label
112    pub edge_type: String,
113    /// Timestamp (Unix timestamp)
114    pub timestamp: i64,
115    /// Edge weight
116    pub weight: f32,
117    /// Start time (optional, for interval-based edges)
118    pub start_time: Option<i64>,
119    /// End time (optional, for interval-based edges)
120    pub end_time: Option<i64>,
121}
122
123impl TemporalEdge {
124    /// Check if edge is active at given timestamp
125    pub fn is_active_at(&self, timestamp: i64) -> bool {
126        if let (Some(start), Some(end)) = (self.start_time, self.end_time) {
127            timestamp >= start && timestamp <= end
128        } else {
129            // Point-in-time edge
130            self.timestamp == timestamp
131        }
132    }
133
134    /// Check if edge is active in time range
135    pub fn is_active_in_range(&self, start: i64, end: i64) -> bool {
136        if let (Some(edge_start), Some(edge_end)) = (self.start_time, self.end_time) {
137            // Interval overlap check
138            edge_start <= end && edge_end >= start
139        } else {
140            // Point-in-time edge
141            self.timestamp >= start && self.timestamp <= end
142        }
143    }
144}
145
146/// Graph snapshot at specific time
147#[derive(Debug, Clone)]
148pub struct Snapshot {
149    /// Snapshot timestamp
150    pub timestamp: i64,
151    /// Nodes active in this snapshot
152    pub nodes: HashSet<String>,
153    /// Edges active in this snapshot
154    pub edges: Vec<TemporalEdge>,
155    /// Edge count
156    pub edge_count: usize,
157    /// Node count
158    pub node_count: usize,
159}
160
161impl Snapshot {
162    /// Create snapshot from temporal edges
163    pub fn from_edges(timestamp: i64, edges: Vec<TemporalEdge>) -> Self {
164        let mut nodes = HashSet::new();
165
166        for edge in &edges {
167            nodes.insert(edge.source.clone());
168            nodes.insert(edge.target.clone());
169        }
170
171        let node_count = nodes.len();
172        let edge_count = edges.len();
173
174        Self {
175            timestamp,
176            nodes,
177            edges,
178            edge_count,
179            node_count,
180        }
181    }
182
183    /// Get node degree in snapshot
184    pub fn node_degree(&self, node: &str) -> usize {
185        self.edges
186            .iter()
187            .filter(|e| e.source == node || e.target == node)
188            .count()
189    }
190
191    /// Get graph density
192    pub fn density(&self) -> f32 {
193        if self.node_count < 2 {
194            return 0.0;
195        }
196
197        let max_edges = (self.node_count * (self.node_count - 1)) / 2;
198        self.edge_count as f32 / max_edges as f32
199    }
200}
201
202/// Temporal graph
203pub struct TemporalGraph {
204    /// All temporal edges
205    edges: Vec<TemporalEdge>,
206    /// Edge index by timestamp
207    edge_index: BTreeMap<i64, Vec<usize>>,
208    /// Node first appearance time
209    node_first_seen: HashMap<String, i64>,
210    /// Node last seen time
211    node_last_seen: HashMap<String, i64>,
212}
213
214impl TemporalGraph {
215    /// Create new temporal graph
216    pub fn new() -> Self {
217        Self {
218            edges: Vec::new(),
219            edge_index: BTreeMap::new(),
220            node_first_seen: HashMap::new(),
221            node_last_seen: HashMap::new(),
222        }
223    }
224
225    /// Add temporal edge
226    pub fn add_edge(&mut self, edge: TemporalEdge) {
227        let timestamp = edge.timestamp;
228        let edge_idx = self.edges.len();
229
230        // Update node timestamps
231        self.update_node_timestamp(&edge.source, timestamp);
232        self.update_node_timestamp(&edge.target, timestamp);
233
234        // Add to edge index
235        self.edge_index.entry(timestamp).or_default().push(edge_idx);
236
237        self.edges.push(edge);
238    }
239
240    /// Update node first/last seen timestamps
241    fn update_node_timestamp(&mut self, node: &str, timestamp: i64) {
242        self.node_first_seen
243            .entry(node.to_string())
244            .and_modify(|t| *t = (*t).min(timestamp))
245            .or_insert(timestamp);
246
247        self.node_last_seen
248            .entry(node.to_string())
249            .and_modify(|t| *t = (*t).max(timestamp))
250            .or_insert(timestamp);
251    }
252
253    /// Get snapshot at specific timestamp
254    pub fn snapshot_at(&self, timestamp: i64) -> Snapshot {
255        let edges: Vec<TemporalEdge> = self
256            .edges
257            .iter()
258            .filter(|e| e.is_active_at(timestamp))
259            .cloned()
260            .collect();
261
262        Snapshot::from_edges(timestamp, edges)
263    }
264
265    /// Get snapshot for time range
266    pub fn snapshot_range(&self, start: i64, end: i64) -> Snapshot {
267        let edges: Vec<TemporalEdge> = self
268            .edges
269            .iter()
270            .filter(|e| e.is_active_in_range(start, end))
271            .cloned()
272            .collect();
273
274        Snapshot::from_edges((start + end) / 2, edges)
275    }
276
277    /// Get all timestamps (discrete time points)
278    pub fn timestamps(&self) -> Vec<i64> {
279        self.edge_index.keys().copied().collect()
280    }
281
282    /// Get time range
283    pub fn time_range(&self) -> Option<(i64, i64)> {
284        if self.edges.is_empty() {
285            return None;
286        }
287
288        let min = self.edges.iter().map(|e| e.timestamp).min().unwrap();
289        let max = self.edges.iter().map(|e| e.timestamp).max().unwrap();
290
291        Some((min, max))
292    }
293
294    /// Get node lifetime
295    pub fn node_lifetime(&self, node: &str) -> Option<(i64, i64)> {
296        let first = self.node_first_seen.get(node)?;
297        let last = self.node_last_seen.get(node)?;
298
299        Some((*first, *last))
300    }
301
302    /// Get all nodes
303    pub fn nodes(&self) -> HashSet<String> {
304        self.node_first_seen.keys().cloned().collect()
305    }
306
307    /// Get edge count
308    pub fn edge_count(&self) -> usize {
309        self.edges.len()
310    }
311
312    /// Get node count
313    pub fn node_count(&self) -> usize {
314        self.node_first_seen.len()
315    }
316}
317
318impl Default for TemporalGraph {
319    fn default() -> Self {
320        Self::new()
321    }
322}
323
324/// Temporal query parameters
325#[derive(Debug, Clone, Serialize, Deserialize)]
326pub struct TemporalQuery {
327    /// Start timestamp
328    pub start_time: i64,
329    /// End timestamp
330    pub end_time: i64,
331    /// Granularity (e.g., daily, weekly)
332    pub granularity: i64,
333    /// Node filter (optional)
334    pub nodes: Option<Vec<String>>,
335    /// Edge type filter (optional)
336    pub edge_types: Option<Vec<String>>,
337}
338
339/// Temporal analytics engine
340pub struct TemporalAnalytics {
341    graph: TemporalGraph,
342}
343
344impl TemporalAnalytics {
345    /// Create analytics engine
346    pub fn new(graph: TemporalGraph) -> Self {
347        Self { graph }
348    }
349
350    /// Calculate evolution metrics over time
351    pub fn evolution_metrics(&self, query: &TemporalQuery) -> Vec<EvolutionMetrics> {
352        let mut metrics = Vec::new();
353        let mut current_time = query.start_time;
354
355        while current_time <= query.end_time {
356            let next_time = current_time + query.granularity;
357            let snapshot = self.graph.snapshot_range(current_time, next_time);
358
359            let metric = EvolutionMetrics {
360                timestamp: current_time,
361                node_count: snapshot.node_count,
362                edge_count: snapshot.edge_count,
363                density: snapshot.density(),
364                avg_degree: self.calculate_avg_degree(&snapshot),
365            };
366
367            metrics.push(metric);
368            current_time = next_time;
369        }
370
371        metrics
372    }
373
374    /// Calculate average node degree in snapshot
375    fn calculate_avg_degree(&self, snapshot: &Snapshot) -> f32 {
376        if snapshot.node_count == 0 {
377            return 0.0;
378        }
379
380        let total_degree: usize = snapshot.nodes.iter().map(|n| snapshot.node_degree(n)).sum();
381
382        total_degree as f32 / snapshot.node_count as f32
383    }
384
385    /// Detect node churn (nodes appearing/disappearing)
386    pub fn node_churn(&self, query: &TemporalQuery) -> NodeChurn {
387        let start_snapshot = self.graph.snapshot_at(query.start_time);
388        let end_snapshot = self.graph.snapshot_at(query.end_time);
389
390        let added: HashSet<_> = end_snapshot
391            .nodes
392            .difference(&start_snapshot.nodes)
393            .cloned()
394            .collect();
395
396        let removed: HashSet<_> = start_snapshot
397            .nodes
398            .difference(&end_snapshot.nodes)
399            .cloned()
400            .collect();
401
402        let stable: HashSet<_> = start_snapshot
403            .nodes
404            .intersection(&end_snapshot.nodes)
405            .cloned()
406            .collect();
407
408        let added_count = added.len();
409        let removed_count = removed.len();
410        let stable_count = stable.len();
411
412        NodeChurn {
413            added: added.into_iter().collect(),
414            removed: removed.into_iter().collect(),
415            stable: stable.into_iter().collect(),
416            added_count,
417            removed_count,
418            stable_count,
419        }
420    }
421
422    /// Find nodes with highest activity growth
423    pub fn top_growing_nodes(&self, query: &TemporalQuery, top_k: usize) -> Vec<(String, f32)> {
424        let start_snapshot = self
425            .graph
426            .snapshot_range(query.start_time, query.start_time + query.granularity);
427        let end_snapshot = self
428            .graph
429            .snapshot_range(query.end_time - query.granularity, query.end_time);
430
431        let mut growth_scores: Vec<(String, f32)> = Vec::new();
432
433        for node in &end_snapshot.nodes {
434            let start_degree = start_snapshot.node_degree(node) as f32;
435            let end_degree = end_snapshot.node_degree(node) as f32;
436
437            let growth = if start_degree > 0.0 {
438                (end_degree - start_degree) / start_degree
439            } else {
440                end_degree
441            };
442
443            growth_scores.push((node.clone(), growth));
444        }
445
446        growth_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
447        growth_scores.truncate(top_k);
448
449        growth_scores
450    }
451
452    /// Get temporal centrality (activity over time)
453    pub fn temporal_centrality(&self, node: &str, query: &TemporalQuery) -> Vec<(i64, f32)> {
454        let mut centrality = Vec::new();
455        let mut current_time = query.start_time;
456
457        while current_time <= query.end_time {
458            let next_time = current_time + query.granularity;
459            let snapshot = self.graph.snapshot_range(current_time, next_time);
460
461            let degree = snapshot.node_degree(node) as f32;
462            let centrality_score = if snapshot.node_count > 1 {
463                degree / (snapshot.node_count - 1) as f32
464            } else {
465                0.0
466            };
467
468            centrality.push((current_time, centrality_score));
469            current_time = next_time;
470        }
471
472        centrality
473    }
474}
475
476/// Evolution metrics over time
477#[derive(Debug, Clone, Serialize, Deserialize)]
478pub struct EvolutionMetrics {
479    /// Timestamp
480    pub timestamp: i64,
481    /// Number of nodes
482    pub node_count: usize,
483    /// Number of edges
484    pub edge_count: usize,
485    /// Graph density
486    pub density: f32,
487    /// Average degree
488    pub avg_degree: f32,
489}
490
491/// Node churn analysis
492#[derive(Debug, Clone)]
493pub struct NodeChurn {
494    /// Nodes added
495    pub added: Vec<String>,
496    /// Nodes removed
497    pub removed: Vec<String>,
498    /// Stable nodes
499    pub stable: Vec<String>,
500    /// Count of added nodes
501    pub added_count: usize,
502    /// Count of removed nodes
503    pub removed_count: usize,
504    /// Count of stable nodes
505    pub stable_count: usize,
506}
507
508#[cfg(test)]
509mod tests {
510    use super::*;
511
512    fn create_test_temporal_graph() -> TemporalGraph {
513        let mut graph = TemporalGraph::new();
514
515        // Add edges over time
516        graph.add_edge(TemporalEdge {
517            source: "A".to_string(),
518            target: "B".to_string(),
519            edge_type: "knows".to_string(),
520            timestamp: 100,
521            weight: 1.0,
522            start_time: Some(100),
523            end_time: Some(200),
524        });
525
526        graph.add_edge(TemporalEdge {
527            source: "B".to_string(),
528            target: "C".to_string(),
529            edge_type: "knows".to_string(),
530            timestamp: 150,
531            weight: 1.0,
532            start_time: Some(150),
533            end_time: Some(250),
534        });
535
536        graph.add_edge(TemporalEdge {
537            source: "A".to_string(),
538            target: "C".to_string(),
539            edge_type: "knows".to_string(),
540            timestamp: 200,
541            weight: 1.0,
542            start_time: Some(200),
543            end_time: Some(300),
544        });
545
546        graph
547    }
548
549    #[test]
550    fn test_temporal_graph_creation() {
551        let graph = create_test_temporal_graph();
552        assert_eq!(graph.edge_count(), 3);
553        assert_eq!(graph.node_count(), 3);
554    }
555
556    #[test]
557    fn test_snapshot_at_timestamp() {
558        let graph = create_test_temporal_graph();
559        let snapshot = graph.snapshot_at(150);
560
561        assert!(snapshot.node_count > 0);
562        assert!(snapshot.edge_count > 0);
563    }
564
565    #[test]
566    fn test_snapshot_range() {
567        let graph = create_test_temporal_graph();
568        let snapshot = graph.snapshot_range(100, 200);
569
570        assert_eq!(snapshot.node_count, 3);
571        assert!(snapshot.edge_count >= 2);
572    }
573
574    #[test]
575    fn test_time_range() {
576        let graph = create_test_temporal_graph();
577        let (min, max) = graph.time_range().unwrap();
578
579        assert_eq!(min, 100);
580        assert_eq!(max, 200);
581    }
582
583    #[test]
584    fn test_node_lifetime() {
585        let graph = create_test_temporal_graph();
586        let (first, last) = graph.node_lifetime("A").unwrap();
587
588        assert_eq!(first, 100);
589        assert_eq!(last, 200);
590    }
591
592    #[test]
593    fn test_evolution_metrics() {
594        let graph = create_test_temporal_graph();
595        let analytics = TemporalAnalytics::new(graph);
596
597        let query = TemporalQuery {
598            start_time: 100,
599            end_time: 300,
600            granularity: 50,
601            nodes: None,
602            edge_types: None,
603        };
604
605        let metrics = analytics.evolution_metrics(&query);
606        assert!(!metrics.is_empty());
607
608        for metric in &metrics {
609            assert!(metric.timestamp >= 100);
610            assert!(metric.timestamp <= 300);
611        }
612    }
613
614    #[test]
615    fn test_node_churn() {
616        let mut graph = TemporalGraph::new();
617
618        // Initial nodes: A, B
619        graph.add_edge(TemporalEdge {
620            source: "A".to_string(),
621            target: "B".to_string(),
622            edge_type: "knows".to_string(),
623            timestamp: 100,
624            weight: 1.0,
625            start_time: None,
626            end_time: None,
627        });
628
629        // Later: B, C (A removed, C added)
630        graph.add_edge(TemporalEdge {
631            source: "B".to_string(),
632            target: "C".to_string(),
633            edge_type: "knows".to_string(),
634            timestamp: 200,
635            weight: 1.0,
636            start_time: None,
637            end_time: None,
638        });
639
640        let analytics = TemporalAnalytics::new(graph);
641        let query = TemporalQuery {
642            start_time: 100,
643            end_time: 200,
644            granularity: 50,
645            nodes: None,
646            edge_types: None,
647        };
648
649        let churn = analytics.node_churn(&query);
650        assert!(churn.added.contains(&"C".to_string()) || churn.stable_count > 0);
651    }
652
653    #[test]
654    fn test_temporal_edge_is_active() {
655        let edge = TemporalEdge {
656            source: "A".to_string(),
657            target: "B".to_string(),
658            edge_type: "knows".to_string(),
659            timestamp: 100,
660            weight: 1.0,
661            start_time: Some(100),
662            end_time: Some(200),
663        };
664
665        assert!(edge.is_active_at(150));
666        assert!(edge.is_active_at(100));
667        assert!(edge.is_active_at(200));
668        assert!(!edge.is_active_at(50));
669        assert!(!edge.is_active_at(250));
670
671        assert!(edge.is_active_in_range(90, 110));
672        assert!(edge.is_active_in_range(150, 250));
673        assert!(!edge.is_active_in_range(50, 90));
674    }
675
676    // Phase 1.2: Temporal Fields Tests
677    #[test]
678    fn test_temporal_range_creation() {
679        let range = TemporalRange::new(100, 200);
680        assert_eq!(range.start, 100);
681        assert_eq!(range.end, 200);
682    }
683
684    #[test]
685    fn test_temporal_range_contains() {
686        let range = TemporalRange::new(100, 200);
687
688        assert!(range.contains(100));
689        assert!(range.contains(150));
690        assert!(range.contains(200));
691        assert!(!range.contains(50));
692        assert!(!range.contains(250));
693    }
694
695    #[test]
696    fn test_temporal_range_overlaps() {
697        let range1 = TemporalRange::new(100, 200);
698        let range2 = TemporalRange::new(150, 250);
699        let range3 = TemporalRange::new(250, 300);
700
701        assert!(range1.overlaps(&range2));
702        assert!(range2.overlaps(&range1));
703        assert!(!range1.overlaps(&range3));
704        assert!(!range3.overlaps(&range1));
705    }
706
707    #[test]
708    fn test_temporal_range_duration() {
709        let range = TemporalRange::new(100, 200);
710        assert_eq!(range.duration(), 100);
711
712        let instant = TemporalRange::new(100, 100);
713        assert_eq!(instant.duration(), 0);
714    }
715
716    #[test]
717    fn test_temporal_range_serialization() {
718        let range = TemporalRange::new(100, 200);
719        let json = serde_json::to_string(&range).unwrap();
720        assert!(json.contains("100"));
721        assert!(json.contains("200"));
722
723        let deserialized: TemporalRange = serde_json::from_str(&json).unwrap();
724        assert_eq!(deserialized.start, 100);
725        assert_eq!(deserialized.end, 200);
726    }
727
728    #[test]
729    fn test_temporal_relation_type_is_causal() {
730        assert!(TemporalRelationType::Caused.is_causal());
731        assert!(TemporalRelationType::Enabled.is_causal());
732        assert!(TemporalRelationType::Prevented.is_causal());
733
734        assert!(!TemporalRelationType::Before.is_causal());
735        assert!(!TemporalRelationType::During.is_causal());
736        assert!(!TemporalRelationType::After.is_causal());
737        assert!(!TemporalRelationType::SimultaneousWith.is_causal());
738    }
739
740    #[test]
741    fn test_temporal_relation_type_default_strength() {
742        assert_eq!(TemporalRelationType::Caused.default_strength(), 0.9);
743        assert_eq!(TemporalRelationType::Enabled.default_strength(), 0.6);
744        assert_eq!(TemporalRelationType::Prevented.default_strength(), 0.7);
745        assert_eq!(TemporalRelationType::Correlated.default_strength(), 0.5);
746
747        // Non-causal should have lower strength
748        assert!(TemporalRelationType::Before.default_strength() < 0.5);
749    }
750
751    #[test]
752    fn test_temporal_relation_type_serialization() {
753        let rel_type = TemporalRelationType::Caused;
754        let json = serde_json::to_string(&rel_type).unwrap();
755        assert!(json.contains("Caused"));
756
757        let deserialized: TemporalRelationType = serde_json::from_str(&json).unwrap();
758        assert_eq!(deserialized, TemporalRelationType::Caused);
759    }
760}