graphannis_core/graph/storage/
prepost.rs

1use super::{
2    EdgeContainer, GraphStatistic, GraphStorage, deserialize_gs_field,
3    legacy::PrePostOrderStorageV1, load_statistics_from_location, save_statistics_to_toml,
4    serialize_gs_field,
5};
6use crate::{
7    annostorage::{
8        AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl,
9    },
10    dfs::CycleSafeDFS,
11    errors::Result,
12    types::{Edge, NodeID, NumValue},
13};
14use rustc_hash::{FxHashMap, FxHashSet};
15use serde::{Deserialize, Serialize};
16use std::clone::Clone;
17use std::{ops::Bound::*, path::Path};
18
19#[derive(PartialOrd, PartialEq, Ord, Eq, Clone, Serialize, Deserialize)]
20pub struct PrePost<OrderT, LevelT> {
21    pub pre: OrderT,
22    pub post: OrderT,
23    pub level: LevelT,
24}
25
26#[derive(Serialize, Deserialize, Clone, Debug)]
27pub(crate) enum OrderVecEntry<OrderT, LevelT> {
28    None,
29    Pre {
30        post: OrderT,
31        level: LevelT,
32        node: NodeID,
33    },
34    Post {
35        pre: OrderT,
36        level: LevelT,
37        node: NodeID,
38    },
39}
40
41#[derive(Serialize, Deserialize, Clone)]
42pub struct PrePostOrderStorage<OrderT: NumValue, LevelT: NumValue> {
43    node_to_order: FxHashMap<NodeID, Vec<PrePost<OrderT, LevelT>>>,
44    order_to_node: Vec<OrderVecEntry<OrderT, LevelT>>,
45    annos: AnnoStorageImpl<Edge>,
46    stats: Option<GraphStatistic>,
47}
48
49struct NodeStackEntry<OrderT, LevelT> {
50    pub id: NodeID,
51    pub order: PrePost<OrderT, LevelT>,
52}
53
54impl<OrderT, LevelT> Default for PrePostOrderStorage<OrderT, LevelT>
55where
56    OrderT: NumValue,
57    LevelT: NumValue,
58{
59    fn default() -> Self {
60        PrePostOrderStorage::new()
61    }
62}
63
64impl<OrderT, LevelT> PrePostOrderStorage<OrderT, LevelT>
65where
66    OrderT: NumValue,
67    LevelT: NumValue,
68{
69    pub fn new() -> PrePostOrderStorage<OrderT, LevelT> {
70        PrePostOrderStorage {
71            node_to_order: FxHashMap::default(),
72            order_to_node: Vec::new(),
73            annos: AnnoStorageImpl::new(),
74            stats: None,
75        }
76    }
77
78    pub fn clear(&mut self) -> Result<()> {
79        self.node_to_order.clear();
80        self.order_to_node.clear();
81        self.annos.clear()?;
82        self.stats = None;
83        Ok(())
84    }
85
86    fn enter_node(
87        current_order: &mut OrderT,
88        node_id: NodeID,
89        level: LevelT,
90        node_stack: &mut NStack<OrderT, LevelT>,
91    ) {
92        let new_entry = NodeStackEntry {
93            id: node_id,
94            order: PrePost {
95                pre: current_order.clone(),
96                level,
97                post: OrderT::zero(),
98            },
99        };
100        current_order.add_assign(OrderT::one());
101        node_stack.push_front(new_entry);
102    }
103
104    fn exit_node(&mut self, current_order: &mut OrderT, node_stack: &mut NStack<OrderT, LevelT>) {
105        // find the correct pre/post entry and update the post-value
106        if let Some(entry) = node_stack.front_mut() {
107            entry.order.post = current_order.clone();
108            current_order.add_assign(OrderT::one());
109
110            self.node_to_order
111                .entry(entry.id)
112                .or_insert_with(|| Vec::with_capacity(1))
113                .push(entry.order.clone());
114        }
115        node_stack.pop_front();
116    }
117
118    fn copy_edge_annos_for_node(
119        &mut self,
120        source_node: NodeID,
121        orig: &dyn GraphStorage,
122    ) -> Result<()> {
123        // Iterate over the outgoing edges of this node to add the edge
124        // annotations
125        let out_edges = orig.get_outgoing_edges(source_node);
126        for target in out_edges {
127            let target = target?;
128            let e = Edge {
129                source: source_node,
130                target,
131            };
132            let edge_annos = orig.get_anno_storage().get_annotations_for_item(&e)?;
133            for a in edge_annos {
134                self.annos.insert(e.clone(), a)?;
135            }
136        }
137        Ok(())
138    }
139}
140
141type NStack<OrderT, LevelT> = std::collections::LinkedList<NodeStackEntry<OrderT, LevelT>>;
142
143impl<OrderT: 'static, LevelT: 'static> EdgeContainer for PrePostOrderStorage<OrderT, LevelT>
144where
145    for<'de> OrderT: NumValue + Deserialize<'de> + Serialize,
146    for<'de> LevelT: NumValue + Deserialize<'de> + Serialize,
147{
148    fn get_outgoing_edges<'a>(
149        &'a self,
150        node: NodeID,
151    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
152        self.find_connected(node, 1, Included(1))
153    }
154
155    fn get_ingoing_edges<'a>(
156        &'a self,
157        node: NodeID,
158    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
159        self.find_connected_inverse(node, 1, Included(1))
160    }
161
162    fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
163        let it = self
164            .node_to_order
165            .iter()
166            .filter_map(move |(n, _order)| {
167                // check if this is actual a source node (and not only a target node)
168                if self.get_outgoing_edges(*n).next().is_some() {
169                    Some(*n)
170                } else {
171                    None
172                }
173            })
174            .map(Ok);
175        Box::new(it)
176    }
177
178    fn get_statistics(&self) -> Option<&GraphStatistic> {
179        self.stats.as_ref()
180    }
181}
182
183impl<OrderT: 'static, LevelT: 'static> GraphStorage for PrePostOrderStorage<OrderT, LevelT>
184where
185    for<'de> OrderT: NumValue + Deserialize<'de> + Serialize,
186    for<'de> LevelT: NumValue + Deserialize<'de> + Serialize,
187{
188    fn get_anno_storage(&self) -> &dyn EdgeAnnotationStorage {
189        &self.annos
190    }
191
192    fn serialization_id(&self) -> String {
193        format!(
194            "PrePostOrderO{}L{}V1",
195            std::mem::size_of::<OrderT>() * 8,
196            std::mem::size_of::<LevelT>() * 8
197        )
198    }
199
200    fn load_from(location: &Path) -> Result<Self>
201    where
202        for<'de> Self: std::marker::Sized + Deserialize<'de>,
203    {
204        let legacy_path = location.join("component.bin");
205        let mut result: Self = if legacy_path.is_file() {
206            let component: PrePostOrderStorageV1<OrderT, LevelT> =
207                deserialize_gs_field(location, "component")?;
208            Self {
209                node_to_order: component.node_to_order,
210                order_to_node: component.order_to_node,
211                annos: component.annos,
212                stats: component.stats.map(GraphStatistic::from),
213            }
214        } else {
215            let stats = load_statistics_from_location(location)?;
216            Self {
217                node_to_order: deserialize_gs_field(location, "node_to_order")?,
218                order_to_node: deserialize_gs_field(location, "order_to_node")?,
219                annos: deserialize_gs_field(location, "annos")?,
220                stats,
221            }
222        };
223
224        result.annos.after_deserialization();
225
226        Ok(result)
227    }
228
229    fn save_to(&self, location: &Path) -> Result<()> {
230        serialize_gs_field(&self.node_to_order, "node_to_order", location)?;
231        serialize_gs_field(&self.order_to_node, "order_to_node", location)?;
232        serialize_gs_field(&self.annos, "annos", location)?;
233        save_statistics_to_toml(location, self.stats.as_ref())?;
234        Ok(())
235    }
236
237    fn find_connected<'a>(
238        &'a self,
239        node: NodeID,
240        min_distance: usize,
241        max_distance: std::ops::Bound<usize>,
242    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
243        if let Some(start_orders) = self.node_to_order.get(&node) {
244            let mut visited = FxHashSet::<NodeID>::default();
245
246            let max_distance = match max_distance {
247                Unbounded => usize::MAX,
248                Included(max_distance) => max_distance,
249                Excluded(max_distance) => max_distance - 1,
250            };
251
252            let it = start_orders
253                .iter()
254                .flat_map(move |root_order: &PrePost<OrderT, LevelT>| {
255                    let start = root_order.pre.to_usize().unwrap_or(0);
256                    let end = root_order
257                        .post
258                        .to_usize()
259                        .unwrap_or(self.order_to_node.len() - 1)
260                        + 1;
261                    self.order_to_node[start..end]
262                        .iter()
263                        .map(move |order| (root_order.clone(), order))
264                })
265                .filter_map(move |(root, order)| match order {
266                    OrderVecEntry::Pre { post, level, node } => {
267                        if let (Some(current_level), Some(root_level)) =
268                            (level.to_usize(), root.level.to_usize())
269                        {
270                            let diff_level = current_level - root_level;
271                            if *post <= root.post
272                                && min_distance <= diff_level
273                                && diff_level <= max_distance
274                            {
275                                Some(*node)
276                            } else {
277                                None
278                            }
279                        } else {
280                            None
281                        }
282                    }
283                    _ => None,
284                })
285                .filter(move |n| visited.insert(*n))
286                .map(Ok);
287            Box::new(it)
288        } else {
289            Box::new(std::iter::empty())
290        }
291    }
292
293    fn find_connected_inverse<'a>(
294        &'a self,
295        start_node: NodeID,
296        min_distance: usize,
297        max_distance: std::ops::Bound<usize>,
298    ) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
299        if let Some(start_orders) = self.node_to_order.get(&start_node) {
300            let mut visited = FxHashSet::<NodeID>::default();
301
302            let max_distance = match max_distance {
303                Unbounded => usize::MAX,
304                Included(max_distance) => max_distance,
305                Excluded(max_distance) => max_distance - 1,
306            };
307
308            let it = start_orders
309                .iter()
310                .flat_map(move |root_order: &PrePost<OrderT, LevelT>| {
311                    let root_pre = root_order.pre.clone().to_usize().unwrap_or(0);
312                    let root_post = root_order
313                        .post
314                        .clone()
315                        .to_usize()
316                        .unwrap_or(self.order_to_node.len() - 1);
317
318                    // decide which search range (either 0..pre or post..len()) is smaller
319                    let (start, end, use_post) = if self.order_to_node.len() - root_post < root_pre
320                    {
321                        // use post..len()
322                        (root_post, self.order_to_node.len(), true)
323                    } else {
324                        // use 0..pre
325                        (0, root_pre + 1, false)
326                    };
327
328                    self.order_to_node[start..end]
329                        .iter()
330                        .enumerate()
331                        .map(move |(idx, order)| (use_post, root_order.clone(), start + idx, order))
332                })
333                .filter_map(move |(use_post, root, idx, order)| {
334                    let (current_pre, current_post, current_level, current_node) = if use_post {
335                        match order {
336                            OrderVecEntry::Post { pre, level, node } => {
337                                (pre.to_usize(), Some(idx), level.to_usize(), Some(node))
338                            }
339                            _ => (None, None, None, None),
340                        }
341                    } else {
342                        match order {
343                            OrderVecEntry::Pre { post, level, node } => {
344                                (Some(idx), post.to_usize(), level.to_usize(), Some(node))
345                            }
346                            _ => (None, None, None, None),
347                        }
348                    };
349
350                    if let (
351                        Some(current_node),
352                        Some(current_pre),
353                        Some(current_post),
354                        Some(current_level),
355                        Some(root_level),
356                        Some(root_pre),
357                        Some(root_post),
358                    ) = (
359                        current_node,
360                        current_pre,
361                        current_post,
362                        current_level,
363                        root.level.to_usize(),
364                        root.pre.to_usize(),
365                        root.post.to_usize(),
366                    ) {
367                        if let Some(diff_level) = root_level.checked_sub(current_level) {
368                            if current_pre <= root_pre
369                                && current_post >= root_post
370                                && min_distance <= diff_level
371                                && diff_level <= max_distance
372                            {
373                                Some(*current_node)
374                            } else {
375                                None
376                            }
377                        } else {
378                            None
379                        }
380                    } else {
381                        None
382                    }
383                })
384                .filter(move |n| visited.insert(*n))
385                .map(Ok);
386            Box::new(it)
387        } else {
388            Box::new(std::iter::empty())
389        }
390    }
391
392    fn distance(&self, source: NodeID, target: NodeID) -> Result<Option<usize>> {
393        if source == target {
394            return Ok(Some(0));
395        }
396
397        let mut min_level = usize::MAX;
398        let mut was_found = false;
399
400        if let (Some(order_source), Some(order_target)) = (
401            self.node_to_order.get(&source),
402            self.node_to_order.get(&target),
403        ) {
404            for order_source in order_source.iter() {
405                for order_target in order_target.iter() {
406                    if order_source.pre <= order_target.pre
407                        && order_target.post <= order_source.post
408                    {
409                        // check the level
410                        if let (Some(source_level), Some(target_level)) =
411                            (order_source.level.to_usize(), order_target.level.to_usize())
412                            && source_level <= target_level
413                        {
414                            was_found = true;
415                            min_level = std::cmp::min(target_level - source_level, min_level);
416                        }
417                    }
418                }
419            }
420        }
421
422        if was_found {
423            Ok(Some(min_level))
424        } else {
425            Ok(None)
426        }
427    }
428    fn is_connected(
429        &self,
430        source: NodeID,
431        target: NodeID,
432        min_distance: usize,
433        max_distance: std::ops::Bound<usize>,
434    ) -> Result<bool> {
435        if let (Some(order_source), Some(order_target)) = (
436            self.node_to_order.get(&source),
437            self.node_to_order.get(&target),
438        ) {
439            let max_distance = match max_distance {
440                Unbounded => usize::MAX,
441                Included(max_distance) => max_distance,
442                Excluded(max_distance) => max_distance - 1,
443            };
444
445            for order_source in order_source.iter() {
446                for order_target in order_target.iter() {
447                    if order_source.pre <= order_target.pre
448                        && order_target.post <= order_source.post
449                    {
450                        // check the level
451                        if let (Some(source_level), Some(target_level)) =
452                            (order_source.level.to_usize(), order_target.level.to_usize())
453                            && source_level <= target_level
454                        {
455                            let diff_level = target_level - source_level;
456                            return Ok(min_distance <= diff_level && diff_level <= max_distance);
457                        }
458                    }
459                }
460            }
461        }
462
463        Ok(false)
464    }
465
466    fn copy(
467        &mut self,
468        _node_annos: &dyn NodeAnnotationStorage,
469        orig: &dyn GraphStorage,
470    ) -> Result<()> {
471        self.clear()?;
472
473        // find all roots of the component
474        let roots: Result<FxHashSet<NodeID>> = orig.root_nodes().collect();
475        let roots = roots?;
476
477        let mut current_order = OrderT::zero();
478        // traverse the graph for each sub-component
479        for start_node in &roots {
480            let mut last_distance: usize = 0;
481
482            let mut node_stack: NStack<OrderT, LevelT> = NStack::new();
483
484            self.copy_edge_annos_for_node(*start_node, orig)?;
485
486            PrePostOrderStorage::enter_node(
487                &mut current_order,
488                *start_node,
489                LevelT::zero(),
490                &mut node_stack,
491            );
492
493            let dfs = CycleSafeDFS::new(orig.as_edgecontainer(), *start_node, 1, usize::MAX);
494            for step in dfs {
495                let step = step?;
496
497                self.copy_edge_annos_for_node(step.node, orig)?;
498
499                if step.distance <= last_distance {
500                    // Neighbor node, the last subtree was iterated completely, thus the last node
501                    // can be assigned a post-order.
502                    // The parent node must be at the top of the node stack,
503                    // thus exit every node which comes after the parent node.
504                    // Distance starts with 0 but the stack size starts with 1.
505                    while node_stack.len() > step.distance {
506                        self.exit_node(&mut current_order, &mut node_stack);
507                    }
508                }
509                // set pre-order
510                if let Some(dist) = LevelT::from_usize(step.distance) {
511                    PrePostOrderStorage::enter_node(
512                        &mut current_order,
513                        step.node,
514                        dist,
515                        &mut node_stack,
516                    );
517                }
518                last_distance = step.distance;
519            } // end for each DFS step
520
521            while !node_stack.is_empty() {
522                self.exit_node(&mut current_order, &mut node_stack);
523            }
524        } // end for each root
525
526        // there must be an entry in the vector for all possible order values
527        self.order_to_node
528            .resize(current_order.to_usize().unwrap_or(0), OrderVecEntry::None);
529        for (node, orders_for_node) in &self.node_to_order {
530            for order in orders_for_node {
531                if let Some(pre) = order.pre.to_usize() {
532                    self.order_to_node[pre] = OrderVecEntry::Pre {
533                        post: order.post.clone(),
534                        level: order.level.clone(),
535                        node: *node,
536                    };
537                }
538                if let Some(post) = order.post.to_usize() {
539                    self.order_to_node[post] = OrderVecEntry::Post {
540                        pre: order.pre.clone(),
541                        level: order.level.clone(),
542                        node: *node,
543                    };
544                }
545            }
546        }
547
548        self.stats = orig.get_statistics().cloned();
549        self.annos.calculate_statistics()?;
550
551        self.node_to_order.shrink_to_fit();
552
553        Ok(())
554    }
555
556    fn as_edgecontainer(&self) -> &dyn EdgeContainer {
557        self
558    }
559}
560
561#[cfg(test)]
562mod tests;