graphannis_core/graph/storage/
registry.rs

1use super::adjacencylist::AdjacencyListStorage;
2use super::dense_adjacency::DenseAdjacencyListStorage;
3use super::disk_adjacency::DiskAdjacencyListStorage;
4use super::disk_path::DiskPathStorage;
5use super::linear::LinearGraphStorage;
6
7use super::{GraphStatistic, GraphStorage, prepost::PrePostOrderStorage};
8use super::{disk_adjacency, disk_path};
9use crate::{
10    errors::{GraphAnnisCoreError, Result},
11    graph::Graph,
12    types::ComponentType,
13};
14use serde::Deserialize;
15use std::collections::HashMap;
16use std::{path::Path, sync::Arc};
17
18pub struct GSInfo {
19    pub id: String,
20    constructor: fn() -> Result<Arc<dyn GraphStorage>>,
21    deserialize_func: fn(&Path) -> Result<Arc<dyn GraphStorage>>,
22}
23
24lazy_static! {
25    static ref REGISTRY: HashMap<String, GSInfo> = {
26        let mut m = HashMap::new();
27
28        insert_info::<AdjacencyListStorage>(&mut m);
29        m.insert(
30            disk_adjacency::SERIALIZATION_ID.to_owned(),
31            create_info_diskadjacency(),
32        );
33
34        m.insert(
35            disk_path::SERIALIZATION_ID.to_owned(),
36            create_info_diskpath(),
37        );
38        insert_info::<DenseAdjacencyListStorage>(&mut m);
39
40        insert_info::<PrePostOrderStorage<u64, u64>>(&mut m);
41        insert_info::<PrePostOrderStorage<u64, u32>>(&mut m);
42        insert_info::<PrePostOrderStorage<u64, u8>>(&mut m);
43        insert_info::<PrePostOrderStorage<u32, u32>>(&mut m);
44        insert_info::<PrePostOrderStorage<u32, u8>>(&mut m);
45        insert_info::<PrePostOrderStorage<u16, u32>>(&mut m);
46        insert_info::<PrePostOrderStorage<u16, u8>>(&mut m);
47
48        insert_info::<LinearGraphStorage<u64>>(&mut m);
49        insert_info::<LinearGraphStorage<u32>>(&mut m);
50        insert_info::<LinearGraphStorage<u16>>(&mut m);
51        insert_info::<LinearGraphStorage<u8>>(&mut m);
52
53        m
54    };
55}
56
57pub fn create_writeable<CT: ComponentType>(
58    graph: &Graph<CT>,
59    orig: Option<&dyn GraphStorage>,
60) -> Result<Arc<dyn GraphStorage>> {
61    if graph.disk_based {
62        let mut result = DiskAdjacencyListStorage::new()?;
63        if let Some(orig) = orig {
64            result.copy(graph.get_node_annos(), orig)?;
65        }
66        Ok(Arc::from(result))
67    } else {
68        let mut result = AdjacencyListStorage::new();
69        if let Some(orig) = orig {
70            result.copy(graph.get_node_annos(), orig)?;
71        }
72        Ok(Arc::from(result))
73    }
74}
75
76pub fn get_optimal_impl_heuristic<CT: ComponentType>(
77    db: &Graph<CT>,
78    stats: &GraphStatistic,
79) -> GSInfo {
80    if db.disk_based
81        && stats.max_depth <= disk_path::MAX_DEPTH
82        && stats.max_fan_out == 1
83        && !stats.cyclic
84    {
85        // If we need to use a disk-based implementation and have short paths
86        // without any branching (e.g. PartOf is often structured that way), use
87        // an optimized implementation that stores the single path for each
88        // source node.
89        return create_info_diskpath();
90    } else if stats.max_depth <= 1 {
91        // if we don't have any deep graph structures an adjencency list is always fasted (and has no overhead)
92        return get_adjacencylist_impl(db, stats);
93    } else if stats.rooted_tree {
94        if stats.max_fan_out <= 1 {
95            return get_linear_by_size(stats);
96        } else {
97            return get_prepostorder_by_size(stats);
98        }
99    // it might be still wise to use pre/post order if the graph is "almost" a tree, thus
100    // does not have many exceptions
101    } else if !stats.cyclic && stats.dfs_visit_ratio <= 1.03 {
102        // there is no more than 3% overhead
103        // TODO: how to determine the border?
104        return get_prepostorder_by_size(stats);
105    }
106
107    // fallback
108    get_adjacencylist_impl(db, stats)
109}
110
111fn get_adjacencylist_impl<CT: ComponentType>(db: &Graph<CT>, stats: &GraphStatistic) -> GSInfo {
112    if db.disk_based {
113        create_info_diskadjacency()
114    } else {
115        // check if a large percentage of nodes are part of the graph storage
116        if let Ok(Some(largest_node_id)) = db.node_annos.get_largest_item()
117            && stats.max_fan_out <= 1
118            && (stats.nodes as f64 / largest_node_id as f64) >= 0.75
119        {
120            return create_info::<DenseAdjacencyListStorage>();
121        }
122
123        create_info::<AdjacencyListStorage>()
124    }
125}
126
127fn get_prepostorder_by_size(stats: &GraphStatistic) -> GSInfo {
128    if stats.rooted_tree {
129        // There are exactly two order values per node and there can be only one order value per node
130        // in a tree.
131        if stats.nodes < (u16::MAX / 2) as usize {
132            if stats.max_depth < u8::MAX as usize {
133                return create_info::<PrePostOrderStorage<u16, u8>>();
134            } else if stats.max_depth < u32::MAX as usize {
135                return create_info::<PrePostOrderStorage<u16, u32>>();
136            }
137        } else if stats.nodes < (u32::MAX / 2) as usize {
138            if stats.max_depth < u8::MAX as usize {
139                return create_info::<PrePostOrderStorage<u32, u8>>();
140            } else if stats.max_depth < u32::MAX as usize {
141                return create_info::<PrePostOrderStorage<u32, u32>>();
142            }
143        } else if stats.max_depth < u8::MAX as usize {
144            return create_info::<PrePostOrderStorage<u64, u8>>();
145        } else if stats.max_depth < u32::MAX as usize {
146            return create_info::<PrePostOrderStorage<u64, u32>>();
147        }
148    } else if stats.max_depth < u8::MAX as usize {
149        return create_info::<PrePostOrderStorage<u64, u8>>();
150    }
151    create_info::<PrePostOrderStorage<u64, u64>>()
152}
153
154fn get_linear_by_size(stats: &GraphStatistic) -> GSInfo {
155    if stats.max_depth < u8::MAX as usize {
156        create_info::<LinearGraphStorage<u8>>()
157    } else if stats.max_depth < u16::MAX as usize {
158        create_info::<LinearGraphStorage<u16>>()
159    } else if stats.max_depth < u32::MAX as usize {
160        create_info::<LinearGraphStorage<u32>>()
161    } else {
162        create_info::<LinearGraphStorage<u64>>()
163    }
164}
165
166fn insert_info<GS>(registry: &mut HashMap<String, GSInfo>)
167where
168    for<'de> GS: 'static + GraphStorage + Default + Deserialize<'de>,
169{
170    let info = create_info::<GS>();
171    registry.insert(info.id.clone(), info);
172}
173
174fn create_info<GS>() -> GSInfo
175where
176    for<'de> GS: 'static + GraphStorage + Default + Deserialize<'de>,
177{
178    // create an instance to get the name
179    let instance = GS::default();
180
181    GSInfo {
182        id: instance.serialization_id(),
183        constructor: || Ok(Arc::new(GS::default())),
184        deserialize_func: |location| Ok(Arc::new(GS::load_from(location)?)),
185    }
186}
187
188fn create_info_diskadjacency() -> GSInfo {
189    GSInfo {
190        id: disk_adjacency::SERIALIZATION_ID.to_owned(),
191        constructor: || Ok(Arc::from(DiskAdjacencyListStorage::new()?)),
192        deserialize_func: |path| {
193            let result = DiskAdjacencyListStorage::load_from(path)?;
194            Ok(Arc::from(result))
195        },
196    }
197}
198
199fn create_info_diskpath() -> GSInfo {
200    GSInfo {
201        id: disk_path::SERIALIZATION_ID.to_string(),
202        constructor: || Ok(Arc::from(DiskPathStorage::new()?)),
203        deserialize_func: |path| {
204            let result = DiskPathStorage::load_from(path)?;
205            Ok(Arc::from(result))
206        },
207    }
208}
209
210pub fn create_from_info(info: &GSInfo) -> Result<Arc<dyn GraphStorage>> {
211    (info.constructor)()
212}
213
214pub fn deserialize(impl_name: &str, location: &Path) -> Result<Arc<dyn GraphStorage>> {
215    let info = REGISTRY
216        .get(impl_name)
217        .ok_or_else(|| GraphAnnisCoreError::UnknownGraphStorageImpl(impl_name.to_string()))?;
218    (info.deserialize_func)(location)
219}