graphannis_core/graph/storage/
mod.rs

1pub mod adjacencylist;
2pub mod dense_adjacency;
3pub mod disk_adjacency;
4pub mod disk_path;
5pub mod linear;
6pub mod prepost;
7pub mod registry;
8pub mod union;
9
10pub(crate) mod legacy;
11
12use crate::annostorage::{EdgeAnnotationStorage, NodeAnnotationStorage};
13use crate::{
14    annostorage::AnnotationStorage,
15    errors::Result,
16    types::{AnnoKey, Annotation, Edge, NodeID},
17};
18use itertools::Itertools;
19use serde::{Deserialize, Serialize};
20use std::{self, path::Path};
21
22/// Some general statistical numbers specific to a graph component
23#[derive(Serialize, Deserialize, Clone)]
24pub struct GraphStatistic {
25    /// True if the component contains any cycle.
26    pub cyclic: bool,
27
28    /// True if the component consists of [rooted trees](https://en.wikipedia.org/wiki/Tree_(graph_theory)).
29    pub rooted_tree: bool,
30
31    /// Number of nodes in this graph storage (both source and target nodes).
32    pub nodes: usize,
33
34    /// Number of root nodes in this graph storage.
35    pub root_nodes: usize,
36
37    /// Average fan out.
38    pub avg_fan_out: f64,
39    /// Max fan-out of 99% of the data.
40    pub fan_out_99_percentile: usize,
41
42    /// Max inverse fan-out of 99% of the data.
43    pub inverse_fan_out_99_percentile: usize,
44
45    /// Maximal number of children of a node.
46    pub max_fan_out: usize,
47    /// Maximum length from a root node to a terminal node.
48    pub max_depth: usize,
49
50    /// Only valid for acyclic graphs: the average number of times a DFS will visit each node.
51    pub dfs_visit_ratio: f64,
52}
53
54impl std::fmt::Display for GraphStatistic {
55    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
56        write!(
57            f,
58            "nodes={}, root nodes={}, avg_fan_out={:.2}, max_fan_out={}, fan_out_99%={}, inv_fan_out_99%={}, max_depth={}",
59            self.nodes, self.root_nodes, self.avg_fan_out, self.max_fan_out, self.fan_out_99_percentile, self.inverse_fan_out_99_percentile, self.max_depth
60        )?;
61        if self.cyclic {
62            write!(f, ", cyclic")?;
63        }
64        if self.rooted_tree {
65            write!(f, ", tree")?;
66        }
67        Ok(())
68    }
69}
70
71/// Basic trait for accessing edges of a graph for a specific component.
72pub trait EdgeContainer: Sync + Send {
73    /// Get all outgoing edges for a given `node`.
74    fn get_outgoing_edges<'a>(
75        &'a self,
76        node: NodeID,
77    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
78
79    /// Return true of the given node has any outgoing edges.
80    fn has_outgoing_edges(&self, node: NodeID) -> Result<bool> {
81        if let Some(outgoing) = self.get_outgoing_edges(node).next() {
82            outgoing?;
83            Ok(true)
84        } else {
85            Ok(false)
86        }
87    }
88
89    /// Get all incoming edges for a given `node`.
90    fn get_ingoing_edges<'a>(
91        &'a self,
92        node: NodeID,
93    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
94
95    /// Return true of the given node has any incoming edges.
96    fn has_ingoing_edges(&self, node: NodeID) -> Result<bool> {
97        if let Some(ingoing) = self.get_ingoing_edges(node).next() {
98            ingoing?;
99            Ok(true)
100        } else {
101            Ok(false)
102        }
103    }
104
105    fn get_statistics(&self) -> Option<&GraphStatistic> {
106        None
107    }
108
109    /// Provides an iterator over all nodes of this edge container that are the source of an edge.
110    fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
111
112    /// Provides an iterator over all nodes of this edge container that have no incoming edges.
113    fn root_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
114        // Provide an unoptimized default implementation that iterates over all source nodes.
115        let it = self
116            .source_nodes()
117            .map(move |n| -> Result<Option<NodeID>> {
118                let n = n?;
119                if !self.has_ingoing_edges(n)? {
120                    Ok(Some(n))
121                } else {
122                    Ok(None)
123                }
124            })
125            .filter_map_ok(|n| n);
126        Box::new(it)
127    }
128}
129
130/// A graph storage is the representation of an edge component of a graph with specific structures.
131/// These specific structures are exploited to efficiently implement reachability queries.
132pub trait GraphStorage: EdgeContainer {
133    /// Find all nodes reachable from a given start node inside the component.
134    fn find_connected<'a>(
135        &'a self,
136        node: NodeID,
137        min_distance: usize,
138        max_distance: std::ops::Bound<usize>,
139    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
140
141    /// Find all nodes reachable from a given start node inside the component, when the directed edges are inversed.
142    fn find_connected_inverse<'a>(
143        &'a self,
144        node: NodeID,
145        min_distance: usize,
146        max_distance: std::ops::Bound<usize>,
147    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
148
149    /// Compute the distance (shortest path length) of two nodes inside this component.
150    fn distance(&self, source: NodeID, target: NodeID) -> Result<Option<usize>>;
151
152    /// Check if two nodes are connected with any path in this component given a minimum (`min_distance`) and maximum (`max_distance`) path length.
153    fn is_connected(
154        &self,
155        source: NodeID,
156        target: NodeID,
157        min_distance: usize,
158        max_distance: std::ops::Bound<usize>,
159    ) -> Result<bool>;
160
161    /// Get the annotation storage for the edges of this graph storage.
162    fn get_anno_storage(&self) -> &dyn EdgeAnnotationStorage;
163
164    /// Copy the content of another component.
165    /// This removes the existing content of this graph storage.
166    fn copy(
167        &mut self,
168        node_annos: &dyn NodeAnnotationStorage,
169        orig: &dyn GraphStorage,
170    ) -> Result<()>;
171
172    /// Upcast this graph storage to the [EdgeContainer](trait.EdgeContainer.html) trait.
173    fn as_edgecontainer(&self) -> &dyn EdgeContainer;
174
175    /// Try to downcast this graph storage to a [WriteableGraphStorage](trait.WriteableGraphStorage.html) trait.
176    /// Returns `None` if this graph storage is not writable.
177    fn as_writeable(&mut self) -> Option<&mut dyn WriteableGraphStorage> {
178        None
179    }
180
181    /// If true, finding the inverse connected nodes via [find_connected_inverse(...)](#tymethod.find_connected_inverse) has the same cost as the non-inverse case.
182    fn inverse_has_same_cost(&self) -> bool {
183        false
184    }
185
186    /// Return an identifier for this graph storage which is used to distinguish the different graph storages when (de-) serialized.
187    fn serialization_id(&self) -> String;
188
189    /// Load the graph storage from a `location` on the disk. This location is a directory, which can contain files specific to this graph storage.
190    fn load_from(location: &Path) -> Result<Self>
191    where
192        Self: std::marker::Sized;
193
194    /// Save the graph storage a `location` on the disk. This location must point to an existing directory.
195    fn save_to(&self, location: &Path) -> Result<()>;
196}
197
198pub fn serialize_gs_field<T>(field: &T, field_name: &str, location: &Path) -> Result<()>
199where
200    T: Serialize,
201{
202    let data_path = location.join(format!("{field_name}.bin"));
203    let f_data = std::fs::File::create(data_path)?;
204    let mut writer = std::io::BufWriter::new(f_data);
205    bincode::serialize_into(&mut writer, field)?;
206    Ok(())
207}
208
209pub fn deserialize_gs_field<T>(location: &Path, field_name: &str) -> Result<T>
210where
211    for<'de> T: std::marker::Sized + Deserialize<'de>,
212{
213    let data_path = location.join(format!("{field_name}.bin"));
214    let f_data = std::fs::File::open(data_path)?;
215    let input = std::io::BufReader::new(f_data);
216
217    let result = bincode::deserialize_from(input)?;
218    Ok(result)
219}
220
221const STATISTICS_FILE_NAME: &str = "stats.toml";
222
223pub fn load_statistics_from_location(location: &Path) -> Result<Option<GraphStatistic>> {
224    let stats_path_toml = location.join(STATISTICS_FILE_NAME);
225    let legacy_stats_path_bin = location.join("edge_stats.bin");
226
227    let stats = if stats_path_toml.is_file() {
228        let file_content = std::fs::read_to_string(stats_path_toml)?;
229        let stats: GraphStatistic = toml::from_str(&file_content)?;
230        Some(stats)
231    } else if legacy_stats_path_bin.is_file() {
232        let f_stats = std::fs::File::open(legacy_stats_path_bin)?;
233        let input = std::io::BufReader::new(f_stats);
234        // This is a legacy file which needs an older version of the struct
235        let legacy_stats: Option<legacy::GraphStatisticV1> = bincode::deserialize_from(input)?;
236        legacy_stats.map(|s| s.into())
237    } else {
238        None
239    };
240    Ok(stats)
241}
242
243pub fn save_statistics_to_toml(location: &Path, stats: Option<&GraphStatistic>) -> Result<()> {
244    let file_path = location.join(STATISTICS_FILE_NAME);
245    if file_path.is_file() {
246        std::fs::remove_file(&file_path)?;
247    }
248
249    if let Some(stats) = stats {
250        let file_content = toml::to_string(stats)?;
251        std::fs::write(file_path, file_content)?;
252    }
253    Ok(())
254}
255
256/// Trait for accessing graph storages which can be written to.
257pub trait WriteableGraphStorage: GraphStorage {
258    /// Add an edge to this graph storage.
259    fn add_edge(&mut self, edge: Edge) -> Result<()>;
260
261    /// Add an annotation to an edge in this graph storage.
262    /// The edge has to exist.
263    fn add_edge_annotation(&mut self, edge: Edge, anno: Annotation) -> Result<()>;
264
265    /// Delete an existing edge.
266    fn delete_edge(&mut self, edge: &Edge) -> Result<()>;
267
268    /// Delete the annotation (defined by the qualified annotation name in `anno_key`) for an `edge`.
269    fn delete_edge_annotation(&mut self, edge: &Edge, anno_key: &AnnoKey) -> Result<()>;
270
271    /// Delete a node from this graph storage.
272    /// This deletes both edges edges where the node is the source or the target node.
273    fn delete_node(&mut self, node: NodeID) -> Result<()>;
274
275    /// Re-calculate the [statistics](struct.GraphStatistic.html) of this graph storage.
276    fn calculate_statistics(&mut self) -> Result<()>;
277
278    /// Remove all edges from this grap storage.
279    fn clear(&mut self) -> Result<()>;
280}