fabula_discovery/
corpus.rs1use fabula::interval::{AllenRelation, Interval};
2use std::collections::{HashMap, HashSet};
3
4#[derive(Debug, Clone)]
6pub struct TraceEdge {
7 pub source: String,
8 pub label: String,
9 pub target: String,
10 pub interval: Interval<i64>,
11}
12
13#[derive(Debug, Clone)]
18pub struct TraceCorpus {
19 edges: Vec<TraceEdge>,
20 by_label: HashMap<String, Vec<usize>>,
21 by_source: HashMap<String, Vec<usize>>,
22 by_target: HashMap<String, Vec<usize>>,
23}
24
25impl TraceCorpus {
26 pub fn new(raw: Vec<(String, String, String, Interval<i64>)>) -> Self {
28 let edges: Vec<TraceEdge> = raw
29 .into_iter()
30 .map(|(source, label, target, interval)| TraceEdge {
31 source,
32 label,
33 target,
34 interval,
35 })
36 .collect();
37
38 let mut by_label: HashMap<String, Vec<usize>> = HashMap::new();
39 let mut by_source: HashMap<String, Vec<usize>> = HashMap::new();
40 let mut by_target: HashMap<String, Vec<usize>> = HashMap::new();
41
42 for (i, e) in edges.iter().enumerate() {
43 by_label.entry(e.label.clone()).or_default().push(i);
44 by_source.entry(e.source.clone()).or_default().push(i);
45 by_target.entry(e.target.clone()).or_default().push(i);
46 }
47
48 Self {
49 edges,
50 by_label,
51 by_source,
52 by_target,
53 }
54 }
55
56 pub fn len(&self) -> usize {
58 self.edges.len()
59 }
60
61 pub fn is_empty(&self) -> bool {
63 self.edges.is_empty()
64 }
65
66 pub fn edges(&self) -> &[TraceEdge] {
68 &self.edges
69 }
70
71 pub fn labels(&self) -> HashSet<&str> {
73 self.by_label.keys().map(|s| s.as_str()).collect()
74 }
75
76 pub fn nodes(&self) -> HashSet<&str> {
78 let mut nodes: HashSet<&str> = HashSet::new();
79 for e in &self.edges {
80 nodes.insert(&e.source);
81 nodes.insert(&e.target);
82 }
83 nodes
84 }
85
86 pub fn edges_with_label(&self, label: &str) -> Vec<&TraceEdge> {
88 self.by_label
89 .get(label)
90 .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
91 .unwrap_or_default()
92 }
93
94 pub fn edges_from_node(&self, node: &str) -> Vec<&TraceEdge> {
96 self.by_source
97 .get(node)
98 .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
99 .unwrap_or_default()
100 }
101
102 pub fn edges_to_node(&self, node: &str) -> Vec<&TraceEdge> {
104 self.by_target
105 .get(node)
106 .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
107 .unwrap_or_default()
108 }
109
110 pub fn time_range(&self) -> (i64, i64) {
113 let min = self
114 .edges
115 .iter()
116 .map(|e| e.interval.start)
117 .min()
118 .unwrap_or(0);
119 let max = self
120 .edges
121 .iter()
122 .map(|e| e.interval.end.unwrap_or(e.interval.start))
123 .max()
124 .unwrap_or(0);
125 (min, max)
126 }
127
128 pub fn split_at(&self, t: &i64) -> (Self, Self) {
131 let (before, after): (Vec<_>, Vec<_>) = self
132 .edges
133 .iter()
134 .cloned()
135 .map(|e| (e.source, e.label, e.target, e.interval))
136 .partition(|(_, _, _, iv)| iv.start < *t);
137
138 (Self::new(before), Self::new(after))
139 }
140
141 pub fn label_pairs(&self) -> Vec<(&str, &str)> {
143 let mut labels: Vec<&str> = self.labels().into_iter().collect();
144 labels.sort();
145 let mut pairs = Vec::new();
146 for &a in &labels {
147 for &b in &labels {
148 if a != b {
149 pairs.push((a, b));
150 }
151 }
152 }
153 pairs
154 }
155
156 pub fn pairwise_relations(&self, label_a: &str, label_b: &str) -> Vec<PairwiseHit> {
160 let edges_a = self.edges_with_label(label_a);
161 let edges_b = self.edges_with_label(label_b);
162 let mut hits = Vec::new();
163
164 for a in &edges_a {
165 for b in &edges_b {
166 let shared = if a.source == b.source {
168 Some(SharedNode::Source(a.source.clone()))
169 } else if a.source == b.target {
170 Some(SharedNode::SourceTarget(a.source.clone()))
171 } else if a.target == b.source {
172 Some(SharedNode::TargetSource(a.target.clone()))
173 } else if a.target == b.target {
174 Some(SharedNode::Target(a.target.clone()))
175 } else {
176 None
177 };
178
179 if let Some(shared_node) = shared {
180 if let Some(relation) = a.interval.relation(&b.interval) {
181 hits.push(PairwiseHit {
182 shared_node,
183 relation,
184 });
185 }
186 }
187 }
188 }
189
190 hits
191 }
192}
193
194#[derive(Debug, Clone, PartialEq)]
196pub enum SharedNode {
197 Source(String),
199 SourceTarget(String),
201 TargetSource(String),
203 Target(String),
205}
206
207#[derive(Debug, Clone)]
209pub struct PairwiseHit {
210 pub shared_node: SharedNode,
211 pub relation: AllenRelation,
212}