1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
pub mod adjacencylist;
pub mod dense_adjacency;
pub mod disk_adjacency;
pub mod linear;
pub mod prepost;
pub mod registry;
pub mod union;
use crate::annostorage::{EdgeAnnotationStorage, NodeAnnotationStorage};
use crate::{
annostorage::AnnotationStorage,
errors::Result,
types::{AnnoKey, Annotation, Edge, NodeID},
};
use serde::{Deserialize, Serialize};
use std::{self, path::Path};
/// Some general statistical numbers specific to a graph component
#[derive(Serialize, Deserialize, Clone)]
pub struct GraphStatistic {
/// True if the component contains any cycle.
pub cyclic: bool,
/// True if the component consists of a [rooted trees](https://en.wikipedia.org/wiki/Tree_(graph_theory)).
pub rooted_tree: bool,
/// Number of nodes in this graph storage (both source and target nodes).
pub nodes: usize,
/// Average fan out.
pub avg_fan_out: f64,
/// Max fan-out of 99% of the data.
pub fan_out_99_percentile: usize,
/// Max inverse fan-out of 99% of the data.
pub inverse_fan_out_99_percentile: usize,
/// Maximal number of children of a node.
pub max_fan_out: usize,
/// Maximum length from a root node to a terminal node.
pub max_depth: usize,
/// Only valid for acyclic graphs: the average number of times a DFS will visit each node.
pub dfs_visit_ratio: f64,
}
impl std::fmt::Display for GraphStatistic {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"nodes={}, avg_fan_out={:.2}, max_fan_out={}, max_depth={}",
self.nodes, self.avg_fan_out, self.max_fan_out, self.max_depth
)?;
if self.cyclic {
write!(f, ", cyclic")?;
}
if self.rooted_tree {
write!(f, ", tree")?;
}
Ok(())
}
}
/// Basic trait for accessing edges of a graph for a specific component.
pub trait EdgeContainer: Sync + Send {
/// Get all outgoing edges for a given `node`.
fn get_outgoing_edges<'a>(
&'a self,
node: NodeID,
) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
/// Return true of the given node has any outgoing edges.
fn has_outgoing_edges(&self, node: NodeID) -> Result<bool> {
if let Some(outgoing) = self.get_outgoing_edges(node).next() {
outgoing?;
Ok(true)
} else {
Ok(false)
}
}
/// Get all incoming edges for a given `node`.
fn get_ingoing_edges<'a>(
&'a self,
node: NodeID,
) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
fn get_statistics(&self) -> Option<&GraphStatistic> {
None
}
/// Provides an iterator over all nodes of this edge container that are the source of an edge
fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
}
/// A graph storage is the representation of an edge component of a graph with specific structures.
/// These specific structures are exploited to efficiently implement reachability queries.
pub trait GraphStorage: EdgeContainer {
/// Find all nodes reachable from a given start node inside the component.
fn find_connected<'a>(
&'a self,
node: NodeID,
min_distance: usize,
max_distance: std::ops::Bound<usize>,
) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
/// Find all nodes reachable from a given start node inside the component, when the directed edges are inversed.
fn find_connected_inverse<'a>(
&'a self,
node: NodeID,
min_distance: usize,
max_distance: std::ops::Bound<usize>,
) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a>;
/// Compute the distance (shortest path length) of two nodes inside this component.
fn distance(&self, source: NodeID, target: NodeID) -> Result<Option<usize>>;
/// Check if two nodes are connected with any path in this component given a minimum (`min_distance`) and maximum (`max_distance`) path length.
fn is_connected(
&self,
source: NodeID,
target: NodeID,
min_distance: usize,
max_distance: std::ops::Bound<usize>,
) -> Result<bool>;
/// Get the annotation storage for the edges of this graph storage.
fn get_anno_storage(&self) -> &dyn EdgeAnnotationStorage;
/// Copy the content of another component.
/// This removes the existing content of this graph storage.
fn copy(
&mut self,
node_annos: &dyn NodeAnnotationStorage,
orig: &dyn GraphStorage,
) -> Result<()>;
/// Upcast this graph storage to the [EdgeContainer](trait.EdgeContainer.html) trait.
fn as_edgecontainer(&self) -> &dyn EdgeContainer;
/// Try to downcast this graph storage to a [WriteableGraphStorage](trait.WriteableGraphStorage.html) trait.
/// Returns `None` if this graph storage is not writable.
fn as_writeable(&mut self) -> Option<&mut dyn WriteableGraphStorage> {
None
}
/// If true, finding the inverse connected nodes via [find_connected_inverse(...)](#tymethod.find_connected_inverse) has the same cost as the non-inverse case.
fn inverse_has_same_cost(&self) -> bool {
false
}
/// Return an identifier for this graph storage which is used to distinguish the different graph storages when (de-) serialized.
fn serialization_id(&self) -> String;
/// Load the graph storage from a `location` on the disk. This location is a directory, which can contain files specific to this graph storage.
fn load_from(location: &Path) -> Result<Self>
where
Self: std::marker::Sized;
/// Save the graph storage a `location` on the disk. This location must point to an existing directory.
fn save_to(&self, location: &Path) -> Result<()>;
}
pub fn default_serialize_gs<GS>(gs: &GS, location: &Path) -> Result<()>
where
GS: Serialize,
{
let data_path = location.join("component.bin");
let f_data = std::fs::File::create(data_path)?;
let mut writer = std::io::BufWriter::new(f_data);
bincode::serialize_into(&mut writer, gs)?;
Ok(())
}
pub fn default_deserialize_gs<GS>(location: &Path) -> Result<GS>
where
for<'de> GS: std::marker::Sized + Deserialize<'de>,
{
let data_path = location.join("component.bin");
let f_data = std::fs::File::open(data_path)?;
let input = std::io::BufReader::new(f_data);
let result = bincode::deserialize_from(input)?;
Ok(result)
}
/// Trait for accessing graph storages which can be written to.
pub trait WriteableGraphStorage: GraphStorage {
/// Add an edge to this graph storage.
fn add_edge(&mut self, edge: Edge) -> Result<()>;
/// Add an annotation to an edge in this graph storage.
/// The edge has to exist.
fn add_edge_annotation(&mut self, edge: Edge, anno: Annotation) -> Result<()>;
/// Delete an existing edge.
fn delete_edge(&mut self, edge: &Edge) -> Result<()>;
/// Delete the annotation (defined by the qualified annotation name in `anno_key`) for an `edge`.
fn delete_edge_annotation(&mut self, edge: &Edge, anno_key: &AnnoKey) -> Result<()>;
/// Delete a node from this graph storage.
/// This deletes both edges edges where the node is the source or the target node.
fn delete_node(&mut self, node: NodeID) -> Result<()>;
/// Re-calculate the [statistics](struct.GraphStatistic.html) of this graph storage.
fn calculate_statistics(&mut self) -> Result<()>;
/// Remove all edges from this grap storage.
fn clear(&mut self) -> Result<()>;
}