graphannis_core/graph/storage/
adjacencylist.rs1use crate::{
2 annostorage::{
3 AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl,
4 },
5 dfs::CycleSafeDFS,
6 errors::Result,
7 types::{AnnoKey, Annotation, Edge, NodeID},
8};
9
10use super::{
11 EdgeContainer, GraphStatistic, GraphStorage, WriteableGraphStorage, deserialize_gs_field,
12 legacy::{self, AdjacencyListStorageV1},
13 load_statistics_from_location, save_statistics_to_toml, serialize_gs_field,
14};
15use itertools::Itertools;
16use rustc_hash::FxHashSet;
17use serde::Deserialize;
18use std::collections::{BTreeSet, HashMap};
19use std::{ops::Bound, path::Path};
20
21#[derive(Serialize, Deserialize, Clone)]
22pub struct AdjacencyListStorage {
23 edges: HashMap<NodeID, Vec<NodeID>>,
24 inverse_edges: HashMap<NodeID, Vec<NodeID>>,
25 annos: AnnoStorageImpl<Edge>,
26 stats: Option<GraphStatistic>,
27}
28
29fn get_fan_outs(edges: &HashMap<NodeID, Vec<NodeID>>) -> Vec<usize> {
30 let mut fan_outs: Vec<usize> = Vec::new();
31 if !edges.is_empty() {
32 for outgoing in edges.values() {
33 fan_outs.push(outgoing.len());
34 }
35 }
36 fan_outs.sort_unstable();
38
39 fan_outs
40}
41
42impl Default for AdjacencyListStorage {
43 fn default() -> Self {
44 AdjacencyListStorage::new()
45 }
46}
47
48impl AdjacencyListStorage {
49 pub fn new() -> AdjacencyListStorage {
50 AdjacencyListStorage {
51 edges: HashMap::default(),
52 inverse_edges: HashMap::default(),
53 annos: AnnoStorageImpl::new(),
54 stats: None,
55 }
56 }
57
58 pub fn clear(&mut self) -> Result<()> {
59 self.edges.clear();
60 self.inverse_edges.clear();
61 self.annos.clear()?;
62 self.stats = None;
63 Ok(())
64 }
65}
66
67impl EdgeContainer for AdjacencyListStorage {
68 fn get_outgoing_edges<'a>(
69 &'a self,
70 node: NodeID,
71 ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
72 if let Some(outgoing) = self.edges.get(&node) {
73 return match outgoing.len() {
74 0 => Box::new(std::iter::empty()),
75 1 => Box::new(std::iter::once(Ok(outgoing[0]))),
76 _ => Box::new(outgoing.iter().copied().map(Ok)),
77 };
78 }
79 Box::new(std::iter::empty())
80 }
81
82 fn has_outgoing_edges(&self, node: NodeID) -> Result<bool> {
83 if let Some(outgoing) = self.edges.get(&node) {
84 Ok(!outgoing.is_empty())
85 } else {
86 Ok(false)
87 }
88 }
89
90 fn get_ingoing_edges<'a>(
91 &'a self,
92 node: NodeID,
93 ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
94 if let Some(ingoing) = self.inverse_edges.get(&node) {
95 return match ingoing.len() {
96 0 => Box::new(std::iter::empty()),
97 1 => Box::new(std::iter::once(Ok(ingoing[0]))),
98 _ => Box::new(ingoing.iter().map(|e| Ok(*e))),
99 };
100 }
101 Box::new(std::iter::empty())
102 }
103 fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
104 let it = self
105 .edges
106 .iter()
107 .filter(|(_, outgoing)| !outgoing.is_empty())
108 .map(|(key, _)| Ok(*key));
109 Box::new(it)
110 }
111
112 fn get_statistics(&self) -> Option<&GraphStatistic> {
113 self.stats.as_ref()
114 }
115}
116
117impl GraphStorage for AdjacencyListStorage {
118 fn get_anno_storage(&self) -> &dyn EdgeAnnotationStorage {
119 &self.annos
120 }
121
122 fn serialization_id(&self) -> String {
123 "AdjacencyListV1".to_owned()
124 }
125
126 fn load_from(location: &Path) -> Result<Self>
127 where
128 for<'de> Self: std::marker::Sized + Deserialize<'de>,
129 {
130 let legacy_path = location.join("component.bin");
131 let mut result: Self = if legacy_path.is_file() {
132 let component: AdjacencyListStorageV1 = deserialize_gs_field(location, "component")?;
133 Self {
134 stats: component.stats.map(GraphStatistic::from),
135 edges: component.edges,
136 inverse_edges: component.inverse_edges,
137 annos: component.annos,
138 }
139 } else {
140 let stats = load_statistics_from_location(location)?;
141 Self {
142 edges: deserialize_gs_field(location, "edges")?,
143 inverse_edges: deserialize_gs_field(location, "inverse_edges")?,
144 annos: deserialize_gs_field(location, "annos")?,
145 stats,
146 }
147 };
148
149 result.annos.after_deserialization();
150 Ok(result)
151 }
152
153 fn save_to(&self, location: &Path) -> Result<()> {
154 serialize_gs_field(&self.edges, "edges", location)?;
155 serialize_gs_field(&self.inverse_edges, "inverse_edges", location)?;
156 serialize_gs_field(&self.annos, "annos", location)?;
157 save_statistics_to_toml(location, self.stats.as_ref())?;
158 Ok(())
159 }
160
161 fn find_connected<'a>(
162 &'a self,
163 node: NodeID,
164 min_distance: usize,
165 max_distance: Bound<usize>,
166 ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
167 let mut visited = FxHashSet::<NodeID>::default();
168 let max_distance = match max_distance {
169 Bound::Unbounded => usize::MAX,
170 Bound::Included(max_distance) => max_distance,
171 Bound::Excluded(max_distance) => max_distance - 1,
172 };
173 let it = CycleSafeDFS::<'a>::new(self, node, min_distance, max_distance).filter_map_ok(
174 move |x| {
175 if visited.insert(x.node) {
176 Some(x.node)
177 } else {
178 None
179 }
180 },
181 );
182 Box::new(it)
183 }
184
185 fn find_connected_inverse<'a>(
186 &'a self,
187 node: NodeID,
188 min_distance: usize,
189 max_distance: Bound<usize>,
190 ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
191 let mut visited = FxHashSet::<NodeID>::default();
192 let max_distance = match max_distance {
193 Bound::Unbounded => usize::MAX,
194 Bound::Included(max_distance) => max_distance,
195 Bound::Excluded(max_distance) => max_distance - 1,
196 };
197
198 let it = CycleSafeDFS::<'a>::new_inverse(self, node, min_distance, max_distance)
199 .filter_map_ok(move |x| {
200 if visited.insert(x.node) {
201 Some(x.node)
202 } else {
203 None
204 }
205 });
206 Box::new(it)
207 }
208
209 fn distance(&self, source: NodeID, target: NodeID) -> Result<Option<usize>> {
210 let mut it = CycleSafeDFS::new(self, source, usize::MIN, usize::MAX)
211 .filter_ok(|x| target == x.node)
212 .map_ok(|x| x.distance);
213 match it.next() {
214 Some(distance) => {
215 let distance = distance?;
216 Ok(Some(distance))
217 }
218 None => Ok(None),
219 }
220 }
221 fn is_connected(
222 &self,
223 source: NodeID,
224 target: NodeID,
225 min_distance: usize,
226 max_distance: std::ops::Bound<usize>,
227 ) -> Result<bool> {
228 let max_distance = match max_distance {
229 Bound::Unbounded => usize::MAX,
230 Bound::Included(max_distance) => max_distance,
231 Bound::Excluded(max_distance) => max_distance - 1,
232 };
233 let mut it = CycleSafeDFS::new(self, source, min_distance, max_distance)
234 .filter_ok(|x| target == x.node);
235
236 Ok(it.next().is_some())
237 }
238
239 fn copy(
240 &mut self,
241 _node_annos: &dyn NodeAnnotationStorage,
242 orig: &dyn GraphStorage,
243 ) -> Result<()> {
244 self.clear()?;
245
246 for source in orig.source_nodes() {
247 let source = source?;
248 for target in orig.get_outgoing_edges(source) {
249 let target = target?;
250 let e = Edge { source, target };
251 self.add_edge(e.clone())?;
252 for a in orig.get_anno_storage().get_annotations_for_item(&e)? {
253 self.add_edge_annotation(e.clone(), a)?;
254 }
255 }
256 }
257
258 self.stats = orig.get_statistics().cloned();
259 self.annos.calculate_statistics()?;
260 Ok(())
261 }
262
263 fn as_writeable(&mut self) -> Option<&mut dyn WriteableGraphStorage> {
264 Some(self)
265 }
266 fn as_edgecontainer(&self) -> &dyn EdgeContainer {
267 self
268 }
269
270 fn inverse_has_same_cost(&self) -> bool {
271 true
272 }
273}
274
275impl WriteableGraphStorage for AdjacencyListStorage {
276 fn add_edge(&mut self, edge: Edge) -> Result<()> {
277 if edge.source != edge.target {
278 let inverse_entry = self.inverse_edges.entry(edge.target).or_default();
281 if let Err(insertion_idx) = inverse_entry.binary_search(&edge.source) {
283 inverse_entry.insert(insertion_idx, edge.source);
284 }
285
286 let regular_entry = self.edges.entry(edge.source).or_default();
287 if let Err(insertion_idx) = regular_entry.binary_search(&edge.target) {
288 regular_entry.insert(insertion_idx, edge.target);
289 }
290 self.stats = None;
291 }
292 Ok(())
293 }
294
295 fn add_edge_annotation(&mut self, edge: Edge, anno: Annotation) -> Result<()> {
296 if let Some(outgoing) = self.edges.get(&edge.source)
297 && outgoing.contains(&edge.target)
298 {
299 self.annos.insert(edge, anno)?;
300 }
301 Ok(())
302 }
303
304 fn delete_edge(&mut self, edge: &Edge) -> Result<()> {
305 if let Some(outgoing) = self.edges.get_mut(&edge.source)
306 && let Ok(idx) = outgoing.binary_search(&edge.target)
307 {
308 outgoing.remove(idx);
309 }
310
311 if let Some(ingoing) = self.inverse_edges.get_mut(&edge.target)
312 && let Ok(idx) = ingoing.binary_search(&edge.source)
313 {
314 ingoing.remove(idx);
315 }
316 self.annos.remove_item(edge)?;
317
318 Ok(())
319 }
320 fn delete_edge_annotation(&mut self, edge: &Edge, anno_key: &AnnoKey) -> Result<()> {
321 self.annos.remove_annotation_for_item(edge, anno_key)?;
322 Ok(())
323 }
324 fn delete_node(&mut self, node: NodeID) -> Result<()> {
325 let mut to_delete = std::collections::LinkedList::<Edge>::new();
327
328 if let Some(outgoing) = self.edges.get(&node) {
329 for target in outgoing.iter() {
330 to_delete.push_back(Edge {
331 source: node,
332 target: *target,
333 })
334 }
335 }
336 if let Some(ingoing) = self.inverse_edges.get(&node) {
337 for source in ingoing.iter() {
338 to_delete.push_back(Edge {
339 source: *source,
340 target: node,
341 })
342 }
343 }
344
345 for e in to_delete {
346 self.delete_edge(&e)?;
347 }
348
349 Ok(())
350 }
351
352 fn calculate_statistics(&mut self) -> Result<()> {
353 let mut stats = GraphStatistic {
354 max_depth: 1,
355 max_fan_out: 0,
356 avg_fan_out: 0.0,
357 fan_out_99_percentile: 0,
358 inverse_fan_out_99_percentile: 0,
359 cyclic: false,
360 rooted_tree: true,
361 nodes: 0,
362 root_nodes: 0,
363 dfs_visit_ratio: 0.0,
364 };
365
366 self.annos.calculate_statistics()?;
367
368 let mut has_incoming_edge: BTreeSet<NodeID> = BTreeSet::new();
369
370 let mut roots: BTreeSet<NodeID> = BTreeSet::new();
372 {
373 let mut all_nodes: BTreeSet<NodeID> = BTreeSet::new();
374 for (source, outgoing) in &self.edges {
375 roots.insert(*source);
376 all_nodes.insert(*source);
377 for target in outgoing {
378 all_nodes.insert(*target);
379
380 if stats.rooted_tree {
381 if has_incoming_edge.contains(target) {
382 stats.rooted_tree = false;
383 } else {
384 has_incoming_edge.insert(*target);
385 }
386 }
387 }
388 }
389 stats.nodes = all_nodes.len();
390 }
391
392 if !self.edges.is_empty() {
393 for outgoing in self.edges.values() {
394 for target in outgoing {
395 roots.remove(target);
396 }
397 }
398 }
399 stats.root_nodes = roots.len();
400
401 let fan_outs = get_fan_outs(&self.edges);
402 let sum_fan_out: usize = fan_outs.iter().sum();
403
404 if let Some(last) = fan_outs.last() {
405 stats.max_fan_out = *last;
406 }
407 let inverse_fan_outs = get_fan_outs(&self.inverse_edges);
408
409 if !fan_outs.is_empty() {
412 stats.fan_out_99_percentile = fan_outs[fan_outs.len() - 1];
413 }
414 if !inverse_fan_outs.is_empty() {
415 stats.inverse_fan_out_99_percentile = inverse_fan_outs[inverse_fan_outs.len() - 1];
416 }
417 if fan_outs.len() >= 100 {
419 let idx: usize = fan_outs.len() / 100;
420 if idx < fan_outs.len() {
421 stats.fan_out_99_percentile = fan_outs[idx];
422 }
423 }
424 if inverse_fan_outs.len() >= 100 {
425 let idx: usize = inverse_fan_outs.len() / 100;
426 if idx < inverse_fan_outs.len() {
427 stats.inverse_fan_out_99_percentile = inverse_fan_outs[idx];
428 }
429 }
430
431 let mut number_of_visits = 0;
432 if roots.is_empty() && !self.edges.is_empty() {
433 stats.cyclic = true;
435 } else {
436 for root_node in &roots {
437 let mut dfs = CycleSafeDFS::new(self, *root_node, 0, usize::MAX);
438 for step in &mut dfs {
439 let step = step?;
440 number_of_visits += 1;
441 stats.max_depth = std::cmp::max(stats.max_depth, step.distance);
442 }
443 if dfs.is_cyclic() {
444 stats.cyclic = true;
445 }
446 }
447 }
448
449 if stats.cyclic {
450 stats.rooted_tree = false;
451 stats.max_depth = 0;
453 stats.dfs_visit_ratio = 0.0;
454 } else if stats.nodes > 0 {
455 stats.dfs_visit_ratio = f64::from(number_of_visits) / (stats.nodes as f64);
456 }
457
458 if sum_fan_out > 0 && stats.nodes > 0 {
459 stats.avg_fan_out = (sum_fan_out as f64) / (stats.nodes as f64);
460 }
461
462 self.stats = Some(stats);
463
464 Ok(())
465 }
466
467 fn clear(&mut self) -> Result<()> {
468 self.annos.clear()?;
469 self.edges.clear();
470 self.inverse_edges.clear();
471 self.stats = None;
472 Ok(())
473 }
474}
475
476impl From<legacy::AdjacencyListStorageV1> for AdjacencyListStorage {
477 fn from(value: legacy::AdjacencyListStorageV1) -> Self {
478 Self {
479 edges: value.edges,
480 inverse_edges: value.inverse_edges,
481 annos: value.annos,
482 stats: value.stats.map(GraphStatistic::from),
483 }
484 }
485}
486
487#[cfg(test)]
488mod tests;