graphannis_core/graph/storage/
dense_adjacency.rs

1use super::{
2    EdgeContainer, GraphStatistic, GraphStorage, deserialize_gs_field,
3    legacy::DenseAdjacencyListStorageV1, load_statistics_from_location, save_statistics_to_toml,
4    serialize_gs_field,
5};
6use crate::{
7    annostorage::{
8        AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl,
9    },
10    dfs::CycleSafeDFS,
11    errors::Result,
12    types::{Edge, NodeID},
13};
14use itertools::Itertools;
15use num_traits::ToPrimitive;
16use rustc_hash::FxHashSet;
17use serde::Deserialize;
18use std::{collections::HashMap, ops::Bound, path::Path};
19
20#[derive(Serialize, Deserialize, Clone)]
21pub struct DenseAdjacencyListStorage {
22    edges: Vec<Option<NodeID>>,
23    inverse_edges: HashMap<NodeID, Vec<NodeID>>,
24    annos: AnnoStorageImpl<Edge>,
25    stats: Option<GraphStatistic>,
26}
27
28impl Default for DenseAdjacencyListStorage {
29    fn default() -> Self {
30        DenseAdjacencyListStorage::new()
31    }
32}
33
34impl DenseAdjacencyListStorage {
35    pub fn new() -> DenseAdjacencyListStorage {
36        DenseAdjacencyListStorage {
37            edges: Vec::default(),
38            inverse_edges: HashMap::default(),
39            annos: AnnoStorageImpl::new(),
40            stats: None,
41        }
42    }
43}
44
45impl EdgeContainer for DenseAdjacencyListStorage {
46    /// Get all outgoing edges for a given `node`.
47    fn get_outgoing_edges<'a>(
48        &'a self,
49        node: NodeID,
50    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
51        if let Some(node) = node.to_usize()
52            && node < self.edges.len()
53            && let Some(outgoing) = self.edges[node]
54        {
55            return Box::new(std::iter::once(Ok(outgoing)));
56        }
57        Box::new(std::iter::empty())
58    }
59
60    fn get_ingoing_edges<'a>(
61        &'a self,
62        node: NodeID,
63    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
64        if let Some(ingoing) = self.inverse_edges.get(&node) {
65            return match ingoing.len() {
66                0 => Box::new(std::iter::empty()),
67                1 => Box::new(std::iter::once(Ok(ingoing[0]))),
68                _ => Box::new(ingoing.iter().map(|e| Ok(*e))),
69            };
70        }
71        Box::new(std::iter::empty())
72    }
73
74    fn get_statistics(&self) -> Option<&GraphStatistic> {
75        self.stats.as_ref()
76    }
77
78    /// Provides an iterator over all nodes of this edge container that are the source an edge
79    fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
80        let it = self
81            .edges
82            .iter()
83            .enumerate()
84            .filter(|(_, outgoing)| outgoing.is_some())
85            .filter_map(|(key, _)| key.to_u64())
86            .map(Ok);
87        Box::new(it)
88    }
89}
90
91impl GraphStorage for DenseAdjacencyListStorage {
92    fn find_connected<'a>(
93        &'a self,
94        node: NodeID,
95        min_distance: usize,
96        max_distance: Bound<usize>,
97    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
98        let mut visited = FxHashSet::<NodeID>::default();
99        let max_distance = match max_distance {
100            Bound::Unbounded => usize::MAX,
101            Bound::Included(max_distance) => max_distance,
102            Bound::Excluded(max_distance) => max_distance - 1,
103        };
104        let it = CycleSafeDFS::<'a>::new(self, node, min_distance, max_distance)
105            .map_ok(|x| x.node)
106            .filter_ok(move |n| visited.insert(*n));
107        Box::new(it)
108    }
109
110    fn find_connected_inverse<'a>(
111        &'a self,
112        node: NodeID,
113        min_distance: usize,
114        max_distance: Bound<usize>,
115    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
116        let mut visited = FxHashSet::<NodeID>::default();
117        let max_distance = match max_distance {
118            Bound::Unbounded => usize::MAX,
119            Bound::Included(max_distance) => max_distance,
120            Bound::Excluded(max_distance) => max_distance - 1,
121        };
122
123        let it = CycleSafeDFS::<'a>::new_inverse(self, node, min_distance, max_distance)
124            .map_ok(|x| x.node)
125            .filter_ok(move |n| visited.insert(*n));
126        Box::new(it)
127    }
128
129    fn distance(&self, source: NodeID, target: NodeID) -> Result<Option<usize>> {
130        let mut it = CycleSafeDFS::new(self, source, usize::MIN, usize::MAX)
131            .filter_ok(|x| target == x.node)
132            .map_ok(|x| x.distance);
133
134        match it.next() {
135            Some(distance) => {
136                let distance = distance?;
137                Ok(Some(distance))
138            }
139            None => Ok(None),
140        }
141    }
142    fn is_connected(
143        &self,
144        source: NodeID,
145        target: NodeID,
146        min_distance: usize,
147        max_distance: std::ops::Bound<usize>,
148    ) -> Result<bool> {
149        let max_distance = match max_distance {
150            Bound::Unbounded => usize::MAX,
151            Bound::Included(max_distance) => max_distance,
152            Bound::Excluded(max_distance) => max_distance - 1,
153        };
154        let mut it = CycleSafeDFS::new(self, source, min_distance, max_distance)
155            .filter_ok(|x| target == x.node);
156
157        Ok(it.next().is_some())
158    }
159
160    fn get_anno_storage(&self) -> &dyn EdgeAnnotationStorage {
161        &self.annos
162    }
163
164    fn copy(
165        &mut self,
166        node_annos: &dyn NodeAnnotationStorage,
167        orig: &dyn GraphStorage,
168    ) -> Result<()> {
169        self.annos.clear()?;
170        self.edges.clear();
171        self.inverse_edges.clear();
172
173        if let Some(largest_idx) = node_annos
174            .get_largest_item()?
175            .and_then(|idx| idx.to_usize())
176        {
177            debug!("Resizing dense adjacency list to size {}", largest_idx + 1);
178            self.edges.resize(largest_idx + 1, None);
179
180            for source in orig.source_nodes() {
181                let source = source?;
182                if let Some(idx) = source.to_usize()
183                    && let Some(target) = orig.get_outgoing_edges(source).next()
184                {
185                    let target = target?;
186                    // insert edge
187                    self.edges[idx] = Some(target);
188
189                    // insert inverse edge
190                    let e = Edge { source, target };
191                    let inverse_entry = self.inverse_edges.entry(e.target).or_default();
192                    // no need to insert it: edge already exists
193                    if let Err(insertion_idx) = inverse_entry.binary_search(&e.source) {
194                        inverse_entry.insert(insertion_idx, e.source);
195                    }
196                    // insert annotation
197                    for a in orig.get_anno_storage().get_annotations_for_item(&e)? {
198                        self.annos.insert(e.clone(), a)?;
199                    }
200                }
201            }
202            self.stats = orig.get_statistics().cloned();
203            self.annos.calculate_statistics()?;
204        }
205        Ok(())
206    }
207
208    fn as_edgecontainer(&self) -> &dyn EdgeContainer {
209        self
210    }
211
212    fn inverse_has_same_cost(&self) -> bool {
213        true
214    }
215
216    /// Return an identifier for this graph storage which is used to distinguish the different graph storages when (de-) serialized.
217    fn serialization_id(&self) -> String {
218        "DenseAdjacencyListV1".to_owned()
219    }
220
221    fn load_from(location: &Path) -> Result<Self>
222    where
223        for<'de> Self: std::marker::Sized + Deserialize<'de>,
224    {
225        let legacy_path = location.join("component.bin");
226        let mut result: Self = if legacy_path.is_file() {
227            let component: DenseAdjacencyListStorageV1 =
228                deserialize_gs_field(location, "component")?;
229            Self {
230                edges: component.edges,
231                inverse_edges: component.inverse_edges,
232                annos: component.annos,
233                stats: component.stats.map(GraphStatistic::from),
234            }
235        } else {
236            let stats = load_statistics_from_location(location)?;
237            Self {
238                edges: deserialize_gs_field(location, "edges")?,
239                inverse_edges: deserialize_gs_field(location, "inverse_edges")?,
240                annos: deserialize_gs_field(location, "annos")?,
241                stats,
242            }
243        };
244
245        result.annos.after_deserialization();
246        Ok(result)
247    }
248
249    fn save_to(&self, location: &Path) -> Result<()> {
250        serialize_gs_field(&self.edges, "edges", location)?;
251        serialize_gs_field(&self.inverse_edges, "inverse_edges", location)?;
252        serialize_gs_field(&self.annos, "annos", location)?;
253        save_statistics_to_toml(location, self.stats.as_ref())?;
254        Ok(())
255    }
256}
257
258#[cfg(test)]
259mod tests;