Skip to main content

fabula_discovery/
corpus.rs

1use fabula::interval::{AllenRelation, Interval};
2use std::collections::{HashMap, HashSet};
3
4/// A single edge in a trace corpus.
5#[derive(Debug, Clone)]
6pub struct TraceEdge {
7    pub source: String,
8    pub label: String,
9    pub target: String,
10    pub interval: Interval<i64>,
11}
12
13/// An indexed log of edges for pattern discovery.
14///
15/// Built from a simulation's edge history. Provides indexed access
16/// by label, by node, and by time range for efficient mining.
17#[derive(Debug, Clone)]
18pub struct TraceCorpus {
19    edges: Vec<TraceEdge>,
20    by_label: HashMap<String, Vec<usize>>,
21    by_source: HashMap<String, Vec<usize>>,
22    by_target: HashMap<String, Vec<usize>>,
23}
24
25impl TraceCorpus {
26    /// Build a corpus from a list of (source, label, target, interval) tuples.
27    pub fn new(raw: Vec<(String, String, String, Interval<i64>)>) -> Self {
28        let edges: Vec<TraceEdge> = raw
29            .into_iter()
30            .map(|(source, label, target, interval)| TraceEdge {
31                source,
32                label,
33                target,
34                interval,
35            })
36            .collect();
37
38        let mut by_label: HashMap<String, Vec<usize>> = HashMap::new();
39        let mut by_source: HashMap<String, Vec<usize>> = HashMap::new();
40        let mut by_target: HashMap<String, Vec<usize>> = HashMap::new();
41
42        for (i, e) in edges.iter().enumerate() {
43            by_label.entry(e.label.clone()).or_default().push(i);
44            by_source.entry(e.source.clone()).or_default().push(i);
45            by_target.entry(e.target.clone()).or_default().push(i);
46        }
47
48        Self {
49            edges,
50            by_label,
51            by_source,
52            by_target,
53        }
54    }
55
56    /// Number of edges in the corpus.
57    pub fn len(&self) -> usize {
58        self.edges.len()
59    }
60
61    /// Returns true if the corpus has no edges.
62    pub fn is_empty(&self) -> bool {
63        self.edges.is_empty()
64    }
65
66    /// All edges in insertion order.
67    pub fn edges(&self) -> &[TraceEdge] {
68        &self.edges
69    }
70
71    /// All distinct labels in the corpus.
72    pub fn labels(&self) -> HashSet<&str> {
73        self.by_label.keys().map(|s| s.as_str()).collect()
74    }
75
76    /// All distinct nodes (sources and targets) in the corpus.
77    pub fn nodes(&self) -> HashSet<&str> {
78        let mut nodes: HashSet<&str> = HashSet::new();
79        for e in &self.edges {
80            nodes.insert(&e.source);
81            nodes.insert(&e.target);
82        }
83        nodes
84    }
85
86    /// Edges with a given label.
87    pub fn edges_with_label(&self, label: &str) -> Vec<&TraceEdge> {
88        self.by_label
89            .get(label)
90            .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
91            .unwrap_or_default()
92    }
93
94    /// Edges originating from a given node.
95    pub fn edges_from_node(&self, node: &str) -> Vec<&TraceEdge> {
96        self.by_source
97            .get(node)
98            .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
99            .unwrap_or_default()
100    }
101
102    /// Edges targeting a given node.
103    pub fn edges_to_node(&self, node: &str) -> Vec<&TraceEdge> {
104        self.by_target
105            .get(node)
106            .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
107            .unwrap_or_default()
108    }
109
110    /// (min_start, max_end) across all edges.
111    /// For open-ended intervals, uses start as the end bound.
112    pub fn time_range(&self) -> (i64, i64) {
113        let min = self
114            .edges
115            .iter()
116            .map(|e| e.interval.start)
117            .min()
118            .unwrap_or(0);
119        let max = self
120            .edges
121            .iter()
122            .map(|e| e.interval.end.unwrap_or(e.interval.start))
123            .max()
124            .unwrap_or(0);
125        (min, max)
126    }
127
128    /// Split into two corpora at time `t`.
129    /// Edges with `start < t` go to the first corpus; the rest to the second.
130    pub fn split_at(&self, t: &i64) -> (Self, Self) {
131        let (before, after): (Vec<_>, Vec<_>) = self
132            .edges
133            .iter()
134            .cloned()
135            .map(|e| (e.source, e.label, e.target, e.interval))
136            .partition(|(_, _, _, iv)| iv.start < *t);
137
138        (Self::new(before), Self::new(after))
139    }
140
141    /// All ordered pairs of distinct labels.
142    pub fn label_pairs(&self) -> Vec<(&str, &str)> {
143        let mut labels: Vec<&str> = self.labels().into_iter().collect();
144        labels.sort();
145        let mut pairs = Vec::new();
146        for &a in &labels {
147            for &b in &labels {
148                if a != b {
149                    pairs.push((a, b));
150                }
151            }
152        }
153        pairs
154    }
155
156    /// For a pair of labels, find all instances where edges share a node
157    /// (source of one matches source or target of the other) and compute
158    /// the Allen relation between their intervals.
159    pub fn pairwise_relations(&self, label_a: &str, label_b: &str) -> Vec<PairwiseHit> {
160        let edges_a = self.edges_with_label(label_a);
161        let edges_b = self.edges_with_label(label_b);
162        let mut hits = Vec::new();
163
164        for a in &edges_a {
165            for b in &edges_b {
166                // Check for shared node (source-source, source-target, target-source)
167                let shared = if a.source == b.source {
168                    Some(SharedNode::Source(a.source.clone()))
169                } else if a.source == b.target {
170                    Some(SharedNode::SourceTarget(a.source.clone()))
171                } else if a.target == b.source {
172                    Some(SharedNode::TargetSource(a.target.clone()))
173                } else if a.target == b.target {
174                    Some(SharedNode::Target(a.target.clone()))
175                } else {
176                    None
177                };
178
179                if let Some(shared_node) = shared {
180                    if let Some(relation) = a.interval.relation(&b.interval) {
181                        hits.push(PairwiseHit {
182                            shared_node,
183                            relation,
184                        });
185                    }
186                }
187            }
188        }
189
190        hits
191    }
192}
193
194/// How two edges share a node.
195#[derive(Debug, Clone, PartialEq)]
196pub enum SharedNode {
197    /// Both edges have the same source.
198    Source(String),
199    /// Edge A's source equals edge B's target.
200    SourceTarget(String),
201    /// Edge A's target equals edge B's source.
202    TargetSource(String),
203    /// Both edges have the same target.
204    Target(String),
205}
206
207/// A co-occurrence of two edges sharing a node with a computed Allen relation.
208#[derive(Debug, Clone)]
209pub struct PairwiseHit {
210    pub shared_node: SharedNode,
211    pub relation: AllenRelation,
212}