graphannis_core/graph/storage/
mod.rs

1pub mod adjacencylist;
2pub mod dense_adjacency;
3pub mod disk_adjacency;
4pub mod disk_path;
5pub mod linear;
6pub mod prepost;
7pub mod registry;
8pub mod union;
9
10pub(crate) mod legacy;
11
12use crate::annostorage::{EdgeAnnotationStorage, NodeAnnotationStorage};
13use crate::{
14    annostorage::AnnotationStorage,
15    errors::Result,
16    types::{AnnoKey, Annotation, Edge, NodeID},
17};
18use itertools::Itertools;
19use serde::{Deserialize, Serialize};
20use std::{self, path::Path};
21
22/// Some general statistical numbers specific to a graph component
23#[derive(Serialize, Deserialize, Clone)]
24pub struct GraphStatistic {
25    /// True if the component contains any cycle.
26    pub cyclic: bool,
27
28    /// True if the component consists of [rooted trees](https://en.wikipedia.org/wiki/Tree_(graph_theory)).
29    pub rooted_tree: bool,
30
31    /// Number of nodes in this graph storage (both source and target nodes).
32    pub nodes: usize,
33
34    /// Number of root nodes in this graph storage.
35    pub root_nodes: usize,
36
37    /// Average fan out.
38    pub avg_fan_out: f64,
39    /// Max fan-out of 99% of the data.
40    pub fan_out_99_percentile: usize,
41
42    /// Max inverse fan-out of 99% of the data.
43    pub inverse_fan_out_99_percentile: usize,
44
45    /// Maximal number of children of a node.
46    pub max_fan_out: usize,
47    /// Maximum length from a root node to a terminal node.
48    pub max_depth: usize,
49
50    /// Only valid for acyclic graphs: the average number of times a DFS will visit each node.
51    pub dfs_visit_ratio: f64,
52}
53
54impl std::fmt::Display for GraphStatistic {
55    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
56        write!(
57            f,
58            "nodes={}, root nodes={}, avg_fan_out={:.2}, max_fan_out={}, fan_out_99%={}, inv_fan_out_99%={}, max_depth={}",
59            self.nodes,
60            self.root_nodes,
61            self.avg_fan_out,
62            self.max_fan_out,
63            self.fan_out_99_percentile,
64            self.inverse_fan_out_99_percentile,
65            self.max_depth
66        )?;
67        if self.cyclic {
68            write!(f, ", cyclic")?;
69        }
70        if self.rooted_tree {
71            write!(f, ", tree")?;
72        }
73        Ok(())
74    }
75}
76
77/// Basic trait for accessing edges of a graph for a specific component.
78pub trait EdgeContainer: Sync + Send {
79    /// Get all outgoing edges for a given `node`.
80    fn get_outgoing_edges<'a>(
81        &'a self,
82        node: NodeID,
83    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
84
85    /// Return true of the given node has any outgoing edges.
86    fn has_outgoing_edges(&self, node: NodeID) -> Result<bool> {
87        if let Some(outgoing) = self.get_outgoing_edges(node).next() {
88            outgoing?;
89            Ok(true)
90        } else {
91            Ok(false)
92        }
93    }
94
95    /// Get all incoming edges for a given `node`.
96    fn get_ingoing_edges<'a>(
97        &'a self,
98        node: NodeID,
99    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
100
101    /// Return true of the given node has any incoming edges.
102    fn has_ingoing_edges(&self, node: NodeID) -> Result<bool> {
103        if let Some(ingoing) = self.get_ingoing_edges(node).next() {
104            ingoing?;
105            Ok(true)
106        } else {
107            Ok(false)
108        }
109    }
110
111    fn get_statistics(&self) -> Option<&GraphStatistic> {
112        None
113    }
114
115    /// Provides an iterator over all nodes of this edge container that are the source of an edge.
116    fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
117
118    /// Provides an iterator over all nodes of this edge container that have no incoming edges.
119    fn root_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
120        // Provide an unoptimized default implementation that iterates over all source nodes.
121        let it = self
122            .source_nodes()
123            .map(move |n| -> Result<Option<NodeID>> {
124                let n = n?;
125                if !self.has_ingoing_edges(n)? {
126                    Ok(Some(n))
127                } else {
128                    Ok(None)
129                }
130            })
131            .filter_map_ok(|n| n);
132        Box::new(it)
133    }
134}
135
136/// A graph storage is the representation of an edge component of a graph with specific structures.
137/// These specific structures are exploited to efficiently implement reachability queries.
138pub trait GraphStorage: EdgeContainer {
139    /// Find all nodes reachable from a given start node inside the component.
140    fn find_connected<'a>(
141        &'a self,
142        node: NodeID,
143        min_distance: usize,
144        max_distance: std::ops::Bound<usize>,
145    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
146
147    /// Find all nodes reachable from a given start node inside the component, when the directed edges are inversed.
148    fn find_connected_inverse<'a>(
149        &'a self,
150        node: NodeID,
151        min_distance: usize,
152        max_distance: std::ops::Bound<usize>,
153    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
154
155    /// Compute the distance (shortest path length) of two nodes inside this component.
156    fn distance(&self, source: NodeID, target: NodeID) -> Result<Option<usize>>;
157
158    /// Check if two nodes are connected with any path in this component given a minimum (`min_distance`) and maximum (`max_distance`) path length.
159    fn is_connected(
160        &self,
161        source: NodeID,
162        target: NodeID,
163        min_distance: usize,
164        max_distance: std::ops::Bound<usize>,
165    ) -> Result<bool>;
166
167    /// Get the annotation storage for the edges of this graph storage.
168    fn get_anno_storage(&self) -> &dyn EdgeAnnotationStorage;
169
170    /// Copy the content of another component.
171    /// This removes the existing content of this graph storage.
172    fn copy(
173        &mut self,
174        node_annos: &dyn NodeAnnotationStorage,
175        orig: &dyn GraphStorage,
176    ) -> Result<()>;
177
178    /// Upcast this graph storage to the [EdgeContainer](trait.EdgeContainer.html) trait.
179    fn as_edgecontainer(&self) -> &dyn EdgeContainer;
180
181    /// Try to downcast this graph storage to a [WriteableGraphStorage](trait.WriteableGraphStorage.html) trait.
182    /// Returns `None` if this graph storage is not writable.
183    fn as_writeable(&mut self) -> Option<&mut dyn WriteableGraphStorage> {
184        None
185    }
186
187    /// If true, finding the inverse connected nodes via [find_connected_inverse(...)](#tymethod.find_connected_inverse) has the same cost as the non-inverse case.
188    fn inverse_has_same_cost(&self) -> bool {
189        false
190    }
191
192    /// Return an identifier for this graph storage which is used to distinguish the different graph storages when (de-) serialized.
193    fn serialization_id(&self) -> String;
194
195    /// Load the graph storage from a `location` on the disk. This location is a directory, which can contain files specific to this graph storage.
196    fn load_from(location: &Path) -> Result<Self>
197    where
198        Self: std::marker::Sized;
199
200    /// Save the graph storage a `location` on the disk. This location must point to an existing directory.
201    fn save_to(&self, location: &Path) -> Result<()>;
202}
203
204pub fn serialize_gs_field<T>(field: &T, field_name: &str, location: &Path) -> Result<()>
205where
206    T: Serialize,
207{
208    let data_path = location.join(format!("{field_name}.bin"));
209    let f_data = std::fs::File::create(data_path)?;
210    let mut writer = std::io::BufWriter::new(f_data);
211    bincode::serialize_into(&mut writer, field)?;
212    Ok(())
213}
214
215pub fn deserialize_gs_field<T>(location: &Path, field_name: &str) -> Result<T>
216where
217    for<'de> T: std::marker::Sized + Deserialize<'de>,
218{
219    let data_path = location.join(format!("{field_name}.bin"));
220    let f_data = std::fs::File::open(data_path)?;
221    let input = std::io::BufReader::new(f_data);
222
223    let result = bincode::deserialize_from(input)?;
224    Ok(result)
225}
226
227const STATISTICS_FILE_NAME: &str = "stats.toml";
228
229pub fn load_statistics_from_location(location: &Path) -> Result<Option<GraphStatistic>> {
230    let stats_path_toml = location.join(STATISTICS_FILE_NAME);
231    let legacy_stats_path_bin = location.join("edge_stats.bin");
232
233    let stats = if stats_path_toml.is_file() {
234        let file_content = std::fs::read_to_string(stats_path_toml)?;
235        let stats: GraphStatistic = toml::from_str(&file_content)?;
236        Some(stats)
237    } else if legacy_stats_path_bin.is_file() {
238        let f_stats = std::fs::File::open(legacy_stats_path_bin)?;
239        let input = std::io::BufReader::new(f_stats);
240        // This is a legacy file which needs an older version of the struct
241        let legacy_stats: Option<legacy::GraphStatisticV1> = bincode::deserialize_from(input)?;
242        legacy_stats.map(|s| s.into())
243    } else {
244        None
245    };
246    Ok(stats)
247}
248
249pub fn save_statistics_to_toml(location: &Path, stats: Option<&GraphStatistic>) -> Result<()> {
250    let file_path = location.join(STATISTICS_FILE_NAME);
251    if file_path.is_file() {
252        std::fs::remove_file(&file_path)?;
253    }
254
255    if let Some(stats) = stats {
256        let file_content = toml::to_string(stats)?;
257        std::fs::write(file_path, file_content)?;
258    }
259    Ok(())
260}
261
262/// Trait for accessing graph storages which can be written to.
263pub trait WriteableGraphStorage: GraphStorage {
264    /// Add an edge to this graph storage.
265    fn add_edge(&mut self, edge: Edge) -> Result<()>;
266
267    /// Add an annotation to an edge in this graph storage.
268    /// The edge has to exist.
269    fn add_edge_annotation(&mut self, edge: Edge, anno: Annotation) -> Result<()>;
270
271    /// Delete an existing edge.
272    fn delete_edge(&mut self, edge: &Edge) -> Result<()>;
273
274    /// Delete the annotation (defined by the qualified annotation name in `anno_key`) for an `edge`.
275    fn delete_edge_annotation(&mut self, edge: &Edge, anno_key: &AnnoKey) -> Result<()>;
276
277    /// Delete a node from this graph storage.
278    /// This deletes both edges edges where the node is the source or the target node.
279    fn delete_node(&mut self, node: NodeID) -> Result<()>;
280
281    /// Re-calculate the [statistics](struct.GraphStatistic.html) of this graph storage.
282    fn calculate_statistics(&mut self) -> Result<()>;
283
284    /// Remove all edges from this grap storage.
285    fn clear(&mut self) -> Result<()>;
286}