graphannis_core/annostorage/
mod.rs

1pub mod inmemory;
2pub mod ondisk;
3pub mod symboltable;
4
5use smallvec::SmallVec;
6
7use crate::{
8    errors::{GraphAnnisCoreError, Result},
9    types::{AnnoKey, Annotation, Edge, NodeID},
10};
11use std::sync::Arc;
12use std::{borrow::Cow, error::Error};
13use std::{boxed::Box, path::Path};
14
15use self::symboltable::SymbolTable;
16
17/// A match is the result of a query on an annotation storage.
18#[derive(Debug, Default, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
19pub struct Match {
20    /// The node identifier this match refers to.
21    pub node: NodeID,
22    /// The qualified annotation name.
23    pub anno_key: Arc<AnnoKey>,
24}
25
26/// A group of single matched nodes.
27///
28/// cbindgen:ignore
29pub type MatchGroup = SmallVec<[Match; 8]>;
30
31/// Convert a `MatchGroup` to a vector of node and annotation key symbol IDs.
32pub fn match_group_with_symbol_ids(
33    match_group: &MatchGroup,
34    anno_key_symbols: &mut SymbolTable<AnnoKey>,
35) -> Result<Vec<(NodeID, usize)>> {
36    let result: Result<Vec<_>> = match_group
37        .iter()
38        .map(|m| m.as_annotation_key_symbol(anno_key_symbols))
39        .collect();
40    result
41}
42
43/// Convert a slice of node and annotation key symbol IDs to a `MatchGroup`.
44pub fn match_group_resolve_symbol_ids(
45    unresolved_match_group: &[(NodeID, usize)],
46    anno_key_symbols: &SymbolTable<AnnoKey>,
47) -> Result<MatchGroup> {
48    let result: Result<MatchGroup> = unresolved_match_group
49        .iter()
50        .map(|m| Match::from_annotation_key_symbol(*m, anno_key_symbols))
51        .collect();
52    result
53}
54
55impl Match {
56    fn from_annotation_key_symbol(
57        m: (NodeID, usize),
58        symbols: &SymbolTable<AnnoKey>,
59    ) -> Result<Match> {
60        let anno_key = symbols
61            .get_value(m.1)
62            .ok_or(GraphAnnisCoreError::UnknownAnnoKeySymbolId(m.1))?;
63        Ok(Match {
64            node: m.0,
65            anno_key,
66        })
67    }
68
69    fn as_annotation_key_symbol(
70        &self,
71        symbols: &mut SymbolTable<AnnoKey>,
72    ) -> Result<(NodeID, usize)> {
73        let anno_key_id = symbols.insert_shared(self.anno_key.clone())?;
74        Ok((self.node, anno_key_id))
75    }
76
77    /// Extract the annotation for this match . The annotation value
78    /// is retrieved from the `node_annos` given as argument.
79    pub fn extract_annotation(
80        &self,
81        node_annos: &dyn NodeAnnotationStorage,
82    ) -> Result<Option<Annotation>> {
83        let val = node_annos
84            .get_value_for_item(&self.node, &self.anno_key)?
85            .to_owned();
86        if let Some(val) = val {
87            Ok(Some(Annotation {
88                key: self.anno_key.as_ref().clone(),
89                val: val.into(),
90            }))
91        } else {
92            Ok(None)
93        }
94    }
95
96    /// Returns true if this match is different to all the other matches given as argument.
97    ///
98    /// A single match is different if the node ID or the annotation key are different.
99    pub fn different_to_all(&self, other: &[Match]) -> bool {
100        for o in other.iter() {
101            if self.node == o.node && self.anno_key == o.anno_key {
102                return false;
103            }
104        }
105        true
106    }
107
108    /// Returns true if this match is different to the other match given as argument.
109    ///
110    /// A single match is different if the node ID or the annotation key are different.
111    pub fn different_to(&self, other: &Match) -> bool {
112        self.node != other.node || self.anno_key != other.anno_key
113    }
114}
115
116impl From<(Edge, Arc<AnnoKey>)> for Match {
117    fn from(t: (Edge, Arc<AnnoKey>)) -> Self {
118        Match {
119            node: t.0.source,
120            anno_key: t.1,
121        }
122    }
123}
124
125impl From<(NodeID, Arc<AnnoKey>)> for Match {
126    fn from(t: (NodeID, Arc<AnnoKey>)) -> Self {
127        Match {
128            node: t.0,
129            anno_key: t.1,
130        }
131    }
132}
133
134#[derive(Clone)]
135pub enum ValueSearch<T> {
136    Any,
137    Some(T),
138    NotSome(T),
139}
140
141impl<T> From<Option<T>> for ValueSearch<T> {
142    fn from(orig: Option<T>) -> ValueSearch<T> {
143        match orig {
144            None => ValueSearch::Any,
145            Some(v) => ValueSearch::Some(v),
146        }
147    }
148}
149
150impl<T> ValueSearch<T> {
151    #[inline]
152    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> ValueSearch<U> {
153        match self {
154            ValueSearch::Any => ValueSearch::Any,
155            ValueSearch::Some(v) => ValueSearch::Some(f(v)),
156            ValueSearch::NotSome(v) => ValueSearch::NotSome(f(v)),
157        }
158    }
159
160    #[inline]
161    pub fn as_ref(&self) -> ValueSearch<&T> {
162        match self {
163            ValueSearch::Any => ValueSearch::Any,
164            ValueSearch::Some(v) => ValueSearch::Some(v),
165            ValueSearch::NotSome(v) => ValueSearch::NotSome(v),
166        }
167    }
168}
169
170/// Access annotations for nodes or edges.
171pub trait AnnotationStorage<T>: Send + Sync
172where
173    T: Send + Sync,
174{
175    /// Insert an annotation `anno` (with annotation key and value) for an item `item`.
176    fn insert(&mut self, item: T, anno: Annotation) -> Result<()>;
177
178    /// Get all the annotation keys of a node, filtered by the optional namespace (`ns`) and `name`.
179    fn get_all_keys_for_item(
180        &self,
181        item: &T,
182        ns: Option<&str>,
183        name: Option<&str>,
184    ) -> Result<Vec<Arc<AnnoKey>>>;
185
186    /// Remove the annotation given by its `key` for a specific `item`
187    /// Returns the value for that annotation, if it existed.
188    fn remove_annotation_for_item(
189        &mut self,
190        item: &T,
191        key: &AnnoKey,
192    ) -> Result<Option<Cow<'_, str>>>;
193
194    /// Remove all annotations for the given item. Returns whether the item had
195    /// any annotations.
196    fn remove_item(&mut self, item: &T) -> Result<bool>;
197
198    /// Remove all annotations.
199    fn clear(&mut self) -> Result<()>;
200
201    /// Get all qualified annotation names (including namespace) for a given annotation name
202    fn get_qnames(&self, name: &str) -> Result<Vec<AnnoKey>>;
203
204    /// Get all annotations for an `item` (node or edge).
205    fn get_annotations_for_item(&self, item: &T) -> Result<Vec<Annotation>>;
206
207    /// Get the annotation for a given `item` and the annotation `key`.
208    fn get_value_for_item(&self, item: &T, key: &AnnoKey) -> Result<Option<Cow<'_, str>>>;
209
210    /// Returns `true` if the given `item` has an annotation for the given `key`.
211    fn has_value_for_item(&self, item: &T, key: &AnnoKey) -> Result<bool>;
212
213    /// Get the matching annotation keys for each item in the iterator.
214    ///
215    /// This function allows to filter the received annotation keys by specifying the namespace and name.
216    fn get_keys_for_iterator<'a>(
217        &'a self,
218        ns: Option<&str>,
219        name: Option<&str>,
220        it: Box<dyn Iterator<Item = std::result::Result<T, Box<dyn Error + Send + Sync>>> + 'a>,
221    ) -> Result<Vec<Match>>;
222
223    /// Return the total number of annotations contained in this `AnnotationStorage`.
224    fn number_of_annotations(&self) -> Result<usize>;
225
226    /// Return true if there are no annotations in this `AnnotationStorage`.
227    fn is_empty(&self) -> Result<bool>;
228
229    /// Return the number of annotations contained in this `AnnotationStorage` filtered by `name` and optional namespace (`ns`).
230    fn number_of_annotations_by_name(&self, ns: Option<&str>, name: &str) -> Result<usize>;
231
232    /// Returns an iterator for all items that exactly match the given annotation constraints.
233    /// The annotation `name` must be given as argument, the other arguments are optional.
234    ///
235    /// - `namespace`- If given, only annotations having this namespace are returned.
236    /// - `name`  - Only annotations with this name are returned.
237    /// - `value` - Constrain the value of the annotation.
238    ///
239    /// The result is an iterator over matches.
240    /// A match contains the node ID and the qualifed name of the matched annotation
241    /// (e.g. there can be multiple annotations with the same name if the namespace is different).
242    fn exact_anno_search<'a>(
243        &'a self,
244        namespace: Option<&str>,
245        name: &str,
246        value: ValueSearch<&str>,
247    ) -> Box<dyn Iterator<Item = Result<Match>> + 'a>;
248
249    /// Returns an iterator for all items where the value matches the regular expression.
250    /// The annotation `name` and the `pattern` for the value must be given as argument, the
251    /// `namespace` argument is optional and can be used as additional constraint.
252    ///
253    /// - `namespace`- If given, only annotations having this namespace are returned.
254    /// - `name`  - Only annotations with this name are returned.
255    /// - `pattern` - If given, only annotation having a value that mattches this pattern are returned.
256    /// - `negated` - If true, find all annotations that do not match the value
257    ///
258    /// The result is an iterator over matches.
259    /// A match contains the node ID and the qualifed name of the matched annotation
260    /// (e.g. there can be multiple annotations with the same name if the namespace is different).
261    fn regex_anno_search<'a>(
262        &'a self,
263        namespace: Option<&str>,
264        name: &str,
265        pattern: &str,
266        negated: bool,
267    ) -> Box<dyn Iterator<Item = Result<Match>> + 'a>;
268
269    /// Estimate the number of results for an [annotation exact search](#tymethod.exact_anno_search) for a given an inclusive value range.
270    ///
271    /// - `ns` - If given, only annotations having this namespace are considered.
272    /// - `name`  - Only annotations with this name are considered.
273    /// - `lower_val`- Inclusive lower bound for the annotation value.
274    /// - `upper_val`- Inclusive upper bound for the annotation value.
275    fn guess_max_count(
276        &self,
277        ns: Option<&str>,
278        name: &str,
279        lower_val: &str,
280        upper_val: &str,
281    ) -> Result<usize>;
282
283    /// Estimate the number of results for an [annotation regular expression search](#tymethod.regex_anno_search)
284    /// for a given pattern.
285    ///
286    /// - `ns` - If given, only annotations having this namespace are considered.
287    /// - `name`  - Only annotations with this name are considered.
288    /// - `pattern`- The regular expression pattern.
289    fn guess_max_count_regex(&self, ns: Option<&str>, name: &str, pattern: &str) -> Result<usize>;
290
291    /// Estimate the most frequent value for a given annotation `name` with an optional namespace (`ns`).
292    ///
293    /// If more than one qualified annotation name matches the defnition, the more frequent value is used.
294    fn guess_most_frequent_value(
295        &self,
296        ns: Option<&str>,
297        name: &str,
298    ) -> Result<Option<Cow<'_, str>>>;
299
300    /// Return a list of all existing values for a given annotation `key`.
301    /// If the `most_frequent_first` parameter is true, the results are sorted by their frequency.
302    fn get_all_values(&self, key: &AnnoKey, most_frequent_first: bool)
303    -> Result<Vec<Cow<'_, str>>>;
304
305    /// Get all the annotation keys which are part of this annotation storage
306    fn annotation_keys(&self) -> Result<Vec<AnnoKey>>;
307
308    /// Return the item with the largest item which has an annotation value in this annotation storage.
309    ///
310    /// This can be used to calculate new IDs for new items.
311    fn get_largest_item(&self) -> Result<Option<T>>;
312
313    /// (Re-) calculate the internal statistics needed for estimating annotation values.
314    ///
315    /// An annotation storage can invalid statistics, in which case the estimation function will not return
316    /// valid results.
317    fn calculate_statistics(&mut self) -> Result<()>;
318
319    /// Load the annotation from an external `location`.
320    fn load_annotations_from(&mut self, location: &Path) -> Result<()>;
321
322    /// Save the current annotation to a `location` on the disk, but do not remember this location.
323    fn save_annotations_to(&self, location: &Path) -> Result<()>;
324}
325
326/// An annotation storage for nodes.
327pub trait NodeAnnotationStorage: AnnotationStorage<NodeID> {
328    /// Return the internal [`NodeID`] for the node that has the given
329    /// `node_name` as `annis::node_name` annotation.
330    fn get_node_id_from_name(&self, node_name: &str) -> Result<Option<NodeID>>;
331
332    /// Returns true if there is a node with the given `node_name` as value for
333    /// the `annis::node_name` annotation.
334    fn has_node_name(&self, node_name: &str) -> Result<bool>;
335}
336
337/// An annotation storage for edges.
338pub trait EdgeAnnotationStorage: AnnotationStorage<Edge> {}