graphannis_core/graph/storage/
adjacencylist.rs

1use crate::{
2    annostorage::{
3        AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl,
4    },
5    dfs::CycleSafeDFS,
6    errors::Result,
7    types::{AnnoKey, Annotation, Edge, NodeID},
8};
9
10use super::{
11    EdgeContainer, GraphStatistic, GraphStorage, WriteableGraphStorage, deserialize_gs_field,
12    legacy::{self, AdjacencyListStorageV1},
13    load_statistics_from_location, save_statistics_to_toml, serialize_gs_field,
14};
15use itertools::Itertools;
16use rustc_hash::FxHashSet;
17use serde::Deserialize;
18use std::collections::{BTreeSet, HashMap};
19use std::{ops::Bound, path::Path};
20
21#[derive(Serialize, Deserialize, Clone)]
22pub struct AdjacencyListStorage {
23    edges: HashMap<NodeID, Vec<NodeID>>,
24    inverse_edges: HashMap<NodeID, Vec<NodeID>>,
25    annos: AnnoStorageImpl<Edge>,
26    stats: Option<GraphStatistic>,
27}
28
29fn get_fan_outs(edges: &HashMap<NodeID, Vec<NodeID>>) -> Vec<usize> {
30    let mut fan_outs: Vec<usize> = Vec::new();
31    if !edges.is_empty() {
32        for outgoing in edges.values() {
33            fan_outs.push(outgoing.len());
34        }
35    }
36    // order the fan-outs
37    fan_outs.sort_unstable();
38
39    fan_outs
40}
41
42impl Default for AdjacencyListStorage {
43    fn default() -> Self {
44        AdjacencyListStorage::new()
45    }
46}
47
48impl AdjacencyListStorage {
49    pub fn new() -> AdjacencyListStorage {
50        AdjacencyListStorage {
51            edges: HashMap::default(),
52            inverse_edges: HashMap::default(),
53            annos: AnnoStorageImpl::new(),
54            stats: None,
55        }
56    }
57
58    pub fn clear(&mut self) -> Result<()> {
59        self.edges.clear();
60        self.inverse_edges.clear();
61        self.annos.clear()?;
62        self.stats = None;
63        Ok(())
64    }
65}
66
67impl EdgeContainer for AdjacencyListStorage {
68    fn get_outgoing_edges<'a>(
69        &'a self,
70        node: NodeID,
71    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
72        if let Some(outgoing) = self.edges.get(&node) {
73            return match outgoing.len() {
74                0 => Box::new(std::iter::empty()),
75                1 => Box::new(std::iter::once(Ok(outgoing[0]))),
76                _ => Box::new(outgoing.iter().copied().map(Ok)),
77            };
78        }
79        Box::new(std::iter::empty())
80    }
81
82    fn has_outgoing_edges(&self, node: NodeID) -> Result<bool> {
83        if let Some(outgoing) = self.edges.get(&node) {
84            Ok(!outgoing.is_empty())
85        } else {
86            Ok(false)
87        }
88    }
89
90    fn get_ingoing_edges<'a>(
91        &'a self,
92        node: NodeID,
93    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
94        if let Some(ingoing) = self.inverse_edges.get(&node) {
95            return match ingoing.len() {
96                0 => Box::new(std::iter::empty()),
97                1 => Box::new(std::iter::once(Ok(ingoing[0]))),
98                _ => Box::new(ingoing.iter().map(|e| Ok(*e))),
99            };
100        }
101        Box::new(std::iter::empty())
102    }
103    fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
104        let it = self
105            .edges
106            .iter()
107            .filter(|(_, outgoing)| !outgoing.is_empty())
108            .map(|(key, _)| Ok(*key));
109        Box::new(it)
110    }
111
112    fn get_statistics(&self) -> Option<&GraphStatistic> {
113        self.stats.as_ref()
114    }
115}
116
117impl GraphStorage for AdjacencyListStorage {
118    fn get_anno_storage(&self) -> &dyn EdgeAnnotationStorage {
119        &self.annos
120    }
121
122    fn serialization_id(&self) -> String {
123        "AdjacencyListV1".to_owned()
124    }
125
126    fn load_from(location: &Path) -> Result<Self>
127    where
128        for<'de> Self: std::marker::Sized + Deserialize<'de>,
129    {
130        let legacy_path = location.join("component.bin");
131        let mut result: Self = if legacy_path.is_file() {
132            let component: AdjacencyListStorageV1 = deserialize_gs_field(location, "component")?;
133            Self {
134                stats: component.stats.map(GraphStatistic::from),
135                edges: component.edges,
136                inverse_edges: component.inverse_edges,
137                annos: component.annos,
138            }
139        } else {
140            let stats = load_statistics_from_location(location)?;
141            Self {
142                edges: deserialize_gs_field(location, "edges")?,
143                inverse_edges: deserialize_gs_field(location, "inverse_edges")?,
144                annos: deserialize_gs_field(location, "annos")?,
145                stats,
146            }
147        };
148
149        result.annos.after_deserialization();
150        Ok(result)
151    }
152
153    fn save_to(&self, location: &Path) -> Result<()> {
154        serialize_gs_field(&self.edges, "edges", location)?;
155        serialize_gs_field(&self.inverse_edges, "inverse_edges", location)?;
156        serialize_gs_field(&self.annos, "annos", location)?;
157        save_statistics_to_toml(location, self.stats.as_ref())?;
158        Ok(())
159    }
160
161    fn find_connected<'a>(
162        &'a self,
163        node: NodeID,
164        min_distance: usize,
165        max_distance: Bound<usize>,
166    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
167        let mut visited = FxHashSet::<NodeID>::default();
168        let max_distance = match max_distance {
169            Bound::Unbounded => usize::MAX,
170            Bound::Included(max_distance) => max_distance,
171            Bound::Excluded(max_distance) => max_distance - 1,
172        };
173        let it = CycleSafeDFS::<'a>::new(self, node, min_distance, max_distance).filter_map_ok(
174            move |x| {
175                if visited.insert(x.node) {
176                    Some(x.node)
177                } else {
178                    None
179                }
180            },
181        );
182        Box::new(it)
183    }
184
185    fn find_connected_inverse<'a>(
186        &'a self,
187        node: NodeID,
188        min_distance: usize,
189        max_distance: Bound<usize>,
190    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
191        let mut visited = FxHashSet::<NodeID>::default();
192        let max_distance = match max_distance {
193            Bound::Unbounded => usize::MAX,
194            Bound::Included(max_distance) => max_distance,
195            Bound::Excluded(max_distance) => max_distance - 1,
196        };
197
198        let it = CycleSafeDFS::<'a>::new_inverse(self, node, min_distance, max_distance)
199            .filter_map_ok(move |x| {
200                if visited.insert(x.node) {
201                    Some(x.node)
202                } else {
203                    None
204                }
205            });
206        Box::new(it)
207    }
208
209    fn distance(&self, source: NodeID, target: NodeID) -> Result<Option<usize>> {
210        let mut it = CycleSafeDFS::new(self, source, usize::MIN, usize::MAX)
211            .filter_ok(|x| target == x.node)
212            .map_ok(|x| x.distance);
213        match it.next() {
214            Some(distance) => {
215                let distance = distance?;
216                Ok(Some(distance))
217            }
218            None => Ok(None),
219        }
220    }
221    fn is_connected(
222        &self,
223        source: NodeID,
224        target: NodeID,
225        min_distance: usize,
226        max_distance: std::ops::Bound<usize>,
227    ) -> Result<bool> {
228        let max_distance = match max_distance {
229            Bound::Unbounded => usize::MAX,
230            Bound::Included(max_distance) => max_distance,
231            Bound::Excluded(max_distance) => max_distance - 1,
232        };
233        let mut it = CycleSafeDFS::new(self, source, min_distance, max_distance)
234            .filter_ok(|x| target == x.node);
235
236        Ok(it.next().is_some())
237    }
238
239    fn copy(
240        &mut self,
241        _node_annos: &dyn NodeAnnotationStorage,
242        orig: &dyn GraphStorage,
243    ) -> Result<()> {
244        self.clear()?;
245
246        for source in orig.source_nodes() {
247            let source = source?;
248            for target in orig.get_outgoing_edges(source) {
249                let target = target?;
250                let e = Edge { source, target };
251                self.add_edge(e.clone())?;
252                for a in orig.get_anno_storage().get_annotations_for_item(&e)? {
253                    self.add_edge_annotation(e.clone(), a)?;
254                }
255            }
256        }
257
258        self.stats = orig.get_statistics().cloned();
259        self.annos.calculate_statistics()?;
260        Ok(())
261    }
262
263    fn as_writeable(&mut self) -> Option<&mut dyn WriteableGraphStorage> {
264        Some(self)
265    }
266    fn as_edgecontainer(&self) -> &dyn EdgeContainer {
267        self
268    }
269
270    fn inverse_has_same_cost(&self) -> bool {
271        true
272    }
273}
274
275impl WriteableGraphStorage for AdjacencyListStorage {
276    fn add_edge(&mut self, edge: Edge) -> Result<()> {
277        if edge.source != edge.target {
278            // insert to both regular and inverse maps
279
280            let inverse_entry = self.inverse_edges.entry(edge.target).or_default();
281            // no need to insert it: edge already exists
282            if let Err(insertion_idx) = inverse_entry.binary_search(&edge.source) {
283                inverse_entry.insert(insertion_idx, edge.source);
284            }
285
286            let regular_entry = self.edges.entry(edge.source).or_default();
287            if let Err(insertion_idx) = regular_entry.binary_search(&edge.target) {
288                regular_entry.insert(insertion_idx, edge.target);
289            }
290            self.stats = None;
291        }
292        Ok(())
293    }
294
295    fn add_edge_annotation(&mut self, edge: Edge, anno: Annotation) -> Result<()> {
296        if let Some(outgoing) = self.edges.get(&edge.source)
297            && outgoing.contains(&edge.target)
298        {
299            self.annos.insert(edge, anno)?;
300        }
301        Ok(())
302    }
303
304    fn delete_edge(&mut self, edge: &Edge) -> Result<()> {
305        if let Some(outgoing) = self.edges.get_mut(&edge.source)
306            && let Ok(idx) = outgoing.binary_search(&edge.target)
307        {
308            outgoing.remove(idx);
309        }
310
311        if let Some(ingoing) = self.inverse_edges.get_mut(&edge.target)
312            && let Ok(idx) = ingoing.binary_search(&edge.source)
313        {
314            ingoing.remove(idx);
315        }
316        self.annos.remove_item(edge)?;
317
318        Ok(())
319    }
320    fn delete_edge_annotation(&mut self, edge: &Edge, anno_key: &AnnoKey) -> Result<()> {
321        self.annos.remove_annotation_for_item(edge, anno_key)?;
322        Ok(())
323    }
324    fn delete_node(&mut self, node: NodeID) -> Result<()> {
325        // find all both ingoing and outgoing edges
326        let mut to_delete = std::collections::LinkedList::<Edge>::new();
327
328        if let Some(outgoing) = self.edges.get(&node) {
329            for target in outgoing.iter() {
330                to_delete.push_back(Edge {
331                    source: node,
332                    target: *target,
333                })
334            }
335        }
336        if let Some(ingoing) = self.inverse_edges.get(&node) {
337            for source in ingoing.iter() {
338                to_delete.push_back(Edge {
339                    source: *source,
340                    target: node,
341                })
342            }
343        }
344
345        for e in to_delete {
346            self.delete_edge(&e)?;
347        }
348
349        Ok(())
350    }
351
352    fn calculate_statistics(&mut self) -> Result<()> {
353        let mut stats = GraphStatistic {
354            max_depth: 1,
355            max_fan_out: 0,
356            avg_fan_out: 0.0,
357            fan_out_99_percentile: 0,
358            inverse_fan_out_99_percentile: 0,
359            cyclic: false,
360            rooted_tree: true,
361            nodes: 0,
362            root_nodes: 0,
363            dfs_visit_ratio: 0.0,
364        };
365
366        self.annos.calculate_statistics()?;
367
368        let mut has_incoming_edge: BTreeSet<NodeID> = BTreeSet::new();
369
370        // find all root nodes
371        let mut roots: BTreeSet<NodeID> = BTreeSet::new();
372        {
373            let mut all_nodes: BTreeSet<NodeID> = BTreeSet::new();
374            for (source, outgoing) in &self.edges {
375                roots.insert(*source);
376                all_nodes.insert(*source);
377                for target in outgoing {
378                    all_nodes.insert(*target);
379
380                    if stats.rooted_tree {
381                        if has_incoming_edge.contains(target) {
382                            stats.rooted_tree = false;
383                        } else {
384                            has_incoming_edge.insert(*target);
385                        }
386                    }
387                }
388            }
389            stats.nodes = all_nodes.len();
390        }
391
392        if !self.edges.is_empty() {
393            for outgoing in self.edges.values() {
394                for target in outgoing {
395                    roots.remove(target);
396                }
397            }
398        }
399        stats.root_nodes = roots.len();
400
401        let fan_outs = get_fan_outs(&self.edges);
402        let sum_fan_out: usize = fan_outs.iter().sum();
403
404        if let Some(last) = fan_outs.last() {
405            stats.max_fan_out = *last;
406        }
407        let inverse_fan_outs = get_fan_outs(&self.inverse_edges);
408
409        // get the percentile value(s)
410        // set some default values in case there are not enough elements in the component
411        if !fan_outs.is_empty() {
412            stats.fan_out_99_percentile = fan_outs[fan_outs.len() - 1];
413        }
414        if !inverse_fan_outs.is_empty() {
415            stats.inverse_fan_out_99_percentile = inverse_fan_outs[inverse_fan_outs.len() - 1];
416        }
417        // calculate the more accurate values
418        if fan_outs.len() >= 100 {
419            let idx: usize = fan_outs.len() / 100;
420            if idx < fan_outs.len() {
421                stats.fan_out_99_percentile = fan_outs[idx];
422            }
423        }
424        if inverse_fan_outs.len() >= 100 {
425            let idx: usize = inverse_fan_outs.len() / 100;
426            if idx < inverse_fan_outs.len() {
427                stats.inverse_fan_out_99_percentile = inverse_fan_outs[idx];
428            }
429        }
430
431        let mut number_of_visits = 0;
432        if roots.is_empty() && !self.edges.is_empty() {
433            // if we have edges but no roots at all there must be a cycle
434            stats.cyclic = true;
435        } else {
436            for root_node in &roots {
437                let mut dfs = CycleSafeDFS::new(self, *root_node, 0, usize::MAX);
438                for step in &mut dfs {
439                    let step = step?;
440                    number_of_visits += 1;
441                    stats.max_depth = std::cmp::max(stats.max_depth, step.distance);
442                }
443                if dfs.is_cyclic() {
444                    stats.cyclic = true;
445                }
446            }
447        }
448
449        if stats.cyclic {
450            stats.rooted_tree = false;
451            // it's infinite
452            stats.max_depth = 0;
453            stats.dfs_visit_ratio = 0.0;
454        } else if stats.nodes > 0 {
455            stats.dfs_visit_ratio = f64::from(number_of_visits) / (stats.nodes as f64);
456        }
457
458        if sum_fan_out > 0 && stats.nodes > 0 {
459            stats.avg_fan_out = (sum_fan_out as f64) / (stats.nodes as f64);
460        }
461
462        self.stats = Some(stats);
463
464        Ok(())
465    }
466
467    fn clear(&mut self) -> Result<()> {
468        self.annos.clear()?;
469        self.edges.clear();
470        self.inverse_edges.clear();
471        self.stats = None;
472        Ok(())
473    }
474}
475
476impl From<legacy::AdjacencyListStorageV1> for AdjacencyListStorage {
477    fn from(value: legacy::AdjacencyListStorageV1) -> Self {
478        Self {
479            edges: value.edges,
480            inverse_edges: value.inverse_edges,
481            annos: value.annos,
482            stats: value.stats.map(GraphStatistic::from),
483        }
484    }
485}
486
487#[cfg(test)]
488mod tests;