graphannis_core/annostorage/mod.rs
1pub mod inmemory;
2pub mod ondisk;
3pub mod symboltable;
4
5use smallvec::SmallVec;
6
7use crate::{
8 errors::{GraphAnnisCoreError, Result},
9 types::{AnnoKey, Annotation, Edge, NodeID},
10};
11use std::sync::Arc;
12use std::{borrow::Cow, error::Error};
13use std::{boxed::Box, path::Path};
14
15use self::symboltable::SymbolTable;
16
17/// A match is the result of a query on an annotation storage.
18#[derive(Debug, Default, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
19pub struct Match {
20 /// The node identifier this match refers to.
21 pub node: NodeID,
22 /// The qualified annotation name.
23 pub anno_key: Arc<AnnoKey>,
24}
25
26/// A group of single matched nodes.
27///
28/// cbindgen:ignore
29pub type MatchGroup = SmallVec<[Match; 8]>;
30
31/// Convert a `MatchGroup` to a vector of node and annotation key symbol IDs.
32pub fn match_group_with_symbol_ids(
33 match_group: &MatchGroup,
34 anno_key_symbols: &mut SymbolTable<AnnoKey>,
35) -> Result<Vec<(NodeID, usize)>> {
36 let result: Result<Vec<_>> = match_group
37 .iter()
38 .map(|m| m.as_annotation_key_symbol(anno_key_symbols))
39 .collect();
40 result
41}
42
43/// Convert a slice of node and annotation key symbol IDs to a `MatchGroup`.
44pub fn match_group_resolve_symbol_ids(
45 unresolved_match_group: &[(NodeID, usize)],
46 anno_key_symbols: &SymbolTable<AnnoKey>,
47) -> Result<MatchGroup> {
48 let result: Result<MatchGroup> = unresolved_match_group
49 .iter()
50 .map(|m| Match::from_annotation_key_symbol(*m, anno_key_symbols))
51 .collect();
52 result
53}
54
55impl Match {
56 fn from_annotation_key_symbol(
57 m: (NodeID, usize),
58 symbols: &SymbolTable<AnnoKey>,
59 ) -> Result<Match> {
60 let anno_key = symbols
61 .get_value(m.1)
62 .ok_or(GraphAnnisCoreError::UnknownAnnoKeySymbolId(m.1))?;
63 Ok(Match {
64 node: m.0,
65 anno_key,
66 })
67 }
68
69 fn as_annotation_key_symbol(
70 &self,
71 symbols: &mut SymbolTable<AnnoKey>,
72 ) -> Result<(NodeID, usize)> {
73 let anno_key_id = symbols.insert_shared(self.anno_key.clone())?;
74 Ok((self.node, anno_key_id))
75 }
76
77 /// Extract the annotation for this match . The annotation value
78 /// is retrieved from the `node_annos` given as argument.
79 pub fn extract_annotation(
80 &self,
81 node_annos: &dyn NodeAnnotationStorage,
82 ) -> Result<Option<Annotation>> {
83 let val = node_annos
84 .get_value_for_item(&self.node, &self.anno_key)?
85 .to_owned();
86 if let Some(val) = val {
87 Ok(Some(Annotation {
88 key: self.anno_key.as_ref().clone(),
89 val: val.into(),
90 }))
91 } else {
92 Ok(None)
93 }
94 }
95
96 /// Returns true if this match is different to all the other matches given as argument.
97 ///
98 /// A single match is different if the node ID or the annotation key are different.
99 pub fn different_to_all(&self, other: &[Match]) -> bool {
100 for o in other.iter() {
101 if self.node == o.node && self.anno_key == o.anno_key {
102 return false;
103 }
104 }
105 true
106 }
107
108 /// Returns true if this match is different to the other match given as argument.
109 ///
110 /// A single match is different if the node ID or the annotation key are different.
111 pub fn different_to(&self, other: &Match) -> bool {
112 self.node != other.node || self.anno_key != other.anno_key
113 }
114}
115
116impl From<(Edge, Arc<AnnoKey>)> for Match {
117 fn from(t: (Edge, Arc<AnnoKey>)) -> Self {
118 Match {
119 node: t.0.source,
120 anno_key: t.1,
121 }
122 }
123}
124
125impl From<(NodeID, Arc<AnnoKey>)> for Match {
126 fn from(t: (NodeID, Arc<AnnoKey>)) -> Self {
127 Match {
128 node: t.0,
129 anno_key: t.1,
130 }
131 }
132}
133
134#[derive(Clone)]
135pub enum ValueSearch<T> {
136 Any,
137 Some(T),
138 NotSome(T),
139}
140
141impl<T> From<Option<T>> for ValueSearch<T> {
142 fn from(orig: Option<T>) -> ValueSearch<T> {
143 match orig {
144 None => ValueSearch::Any,
145 Some(v) => ValueSearch::Some(v),
146 }
147 }
148}
149
150impl<T> ValueSearch<T> {
151 #[inline]
152 pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> ValueSearch<U> {
153 match self {
154 ValueSearch::Any => ValueSearch::Any,
155 ValueSearch::Some(v) => ValueSearch::Some(f(v)),
156 ValueSearch::NotSome(v) => ValueSearch::NotSome(f(v)),
157 }
158 }
159
160 #[inline]
161 pub fn as_ref(&self) -> ValueSearch<&T> {
162 match self {
163 ValueSearch::Any => ValueSearch::Any,
164 ValueSearch::Some(v) => ValueSearch::Some(v),
165 ValueSearch::NotSome(v) => ValueSearch::NotSome(v),
166 }
167 }
168}
169
170/// Access annotations for nodes or edges.
171pub trait AnnotationStorage<T>: Send + Sync
172where
173 T: Send + Sync,
174{
175 /// Insert an annotation `anno` (with annotation key and value) for an item `item`.
176 fn insert(&mut self, item: T, anno: Annotation) -> Result<()>;
177
178 /// Get all the annotation keys of a node, filtered by the optional namespace (`ns`) and `name`.
179 fn get_all_keys_for_item(
180 &self,
181 item: &T,
182 ns: Option<&str>,
183 name: Option<&str>,
184 ) -> Result<Vec<Arc<AnnoKey>>>;
185
186 /// Remove the annotation given by its `key` for a specific `item`
187 /// Returns the value for that annotation, if it existed.
188 fn remove_annotation_for_item(
189 &mut self,
190 item: &T,
191 key: &AnnoKey,
192 ) -> Result<Option<Cow<'_, str>>>;
193
194 /// Remove all annotations for the given item. Returns whether the item had
195 /// any annotations.
196 fn remove_item(&mut self, item: &T) -> Result<bool>;
197
198 /// Remove all annotations.
199 fn clear(&mut self) -> Result<()>;
200
201 /// Get all qualified annotation names (including namespace) for a given annotation name
202 fn get_qnames(&self, name: &str) -> Result<Vec<AnnoKey>>;
203
204 /// Get all annotations for an `item` (node or edge).
205 fn get_annotations_for_item(&self, item: &T) -> Result<Vec<Annotation>>;
206
207 /// Get the annotation for a given `item` and the annotation `key`.
208 fn get_value_for_item(&self, item: &T, key: &AnnoKey) -> Result<Option<Cow<'_, str>>>;
209
210 /// Returns `true` if the given `item` has an annotation for the given `key`.
211 fn has_value_for_item(&self, item: &T, key: &AnnoKey) -> Result<bool>;
212
213 /// Get the matching annotation keys for each item in the iterator.
214 ///
215 /// This function allows to filter the received annotation keys by specifying the namespace and name.
216 fn get_keys_for_iterator<'a>(
217 &'a self,
218 ns: Option<&str>,
219 name: Option<&str>,
220 it: Box<dyn Iterator<Item = std::result::Result<T, Box<dyn Error + Send + Sync>>> + 'a>,
221 ) -> Result<Vec<Match>>;
222
223 /// Return the total number of annotations contained in this `AnnotationStorage`.
224 fn number_of_annotations(&self) -> Result<usize>;
225
226 /// Return true if there are no annotations in this `AnnotationStorage`.
227 fn is_empty(&self) -> Result<bool>;
228
229 /// Return the number of annotations contained in this `AnnotationStorage` filtered by `name` and optional namespace (`ns`).
230 fn number_of_annotations_by_name(&self, ns: Option<&str>, name: &str) -> Result<usize>;
231
232 /// Returns an iterator for all items that exactly match the given annotation constraints.
233 /// The annotation `name` must be given as argument, the other arguments are optional.
234 ///
235 /// - `namespace`- If given, only annotations having this namespace are returned.
236 /// - `name` - Only annotations with this name are returned.
237 /// - `value` - Constrain the value of the annotation.
238 ///
239 /// The result is an iterator over matches.
240 /// A match contains the node ID and the qualifed name of the matched annotation
241 /// (e.g. there can be multiple annotations with the same name if the namespace is different).
242 fn exact_anno_search<'a>(
243 &'a self,
244 namespace: Option<&str>,
245 name: &str,
246 value: ValueSearch<&str>,
247 ) -> Box<dyn Iterator<Item = Result<Match>> + 'a>;
248
249 /// Returns an iterator for all items where the value matches the regular expression.
250 /// The annotation `name` and the `pattern` for the value must be given as argument, the
251 /// `namespace` argument is optional and can be used as additional constraint.
252 ///
253 /// - `namespace`- If given, only annotations having this namespace are returned.
254 /// - `name` - Only annotations with this name are returned.
255 /// - `pattern` - If given, only annotation having a value that mattches this pattern are returned.
256 /// - `negated` - If true, find all annotations that do not match the value
257 ///
258 /// The result is an iterator over matches.
259 /// A match contains the node ID and the qualifed name of the matched annotation
260 /// (e.g. there can be multiple annotations with the same name if the namespace is different).
261 fn regex_anno_search<'a>(
262 &'a self,
263 namespace: Option<&str>,
264 name: &str,
265 pattern: &str,
266 negated: bool,
267 ) -> Box<dyn Iterator<Item = Result<Match>> + 'a>;
268
269 /// Estimate the number of results for an [annotation exact search](#tymethod.exact_anno_search) for a given an inclusive value range.
270 ///
271 /// - `ns` - If given, only annotations having this namespace are considered.
272 /// - `name` - Only annotations with this name are considered.
273 /// - `lower_val`- Inclusive lower bound for the annotation value.
274 /// - `upper_val`- Inclusive upper bound for the annotation value.
275 fn guess_max_count(
276 &self,
277 ns: Option<&str>,
278 name: &str,
279 lower_val: &str,
280 upper_val: &str,
281 ) -> Result<usize>;
282
283 /// Estimate the number of results for an [annotation regular expression search](#tymethod.regex_anno_search)
284 /// for a given pattern.
285 ///
286 /// - `ns` - If given, only annotations having this namespace are considered.
287 /// - `name` - Only annotations with this name are considered.
288 /// - `pattern`- The regular expression pattern.
289 fn guess_max_count_regex(&self, ns: Option<&str>, name: &str, pattern: &str) -> Result<usize>;
290
291 /// Estimate the most frequent value for a given annotation `name` with an optional namespace (`ns`).
292 ///
293 /// If more than one qualified annotation name matches the defnition, the more frequent value is used.
294 fn guess_most_frequent_value(
295 &self,
296 ns: Option<&str>,
297 name: &str,
298 ) -> Result<Option<Cow<'_, str>>>;
299
300 /// Return a list of all existing values for a given annotation `key`.
301 /// If the `most_frequent_first` parameter is true, the results are sorted by their frequency.
302 fn get_all_values(&self, key: &AnnoKey, most_frequent_first: bool)
303 -> Result<Vec<Cow<'_, str>>>;
304
305 /// Get all the annotation keys which are part of this annotation storage
306 fn annotation_keys(&self) -> Result<Vec<AnnoKey>>;
307
308 /// Return the item with the largest item which has an annotation value in this annotation storage.
309 ///
310 /// This can be used to calculate new IDs for new items.
311 fn get_largest_item(&self) -> Result<Option<T>>;
312
313 /// (Re-) calculate the internal statistics needed for estimating annotation values.
314 ///
315 /// An annotation storage can invalid statistics, in which case the estimation function will not return
316 /// valid results.
317 fn calculate_statistics(&mut self) -> Result<()>;
318
319 /// Load the annotation from an external `location`.
320 fn load_annotations_from(&mut self, location: &Path) -> Result<()>;
321
322 /// Save the current annotation to a `location` on the disk, but do not remember this location.
323 fn save_annotations_to(&self, location: &Path) -> Result<()>;
324}
325
326/// An annotation storage for nodes.
327pub trait NodeAnnotationStorage: AnnotationStorage<NodeID> {
328 /// Return the internal [`NodeID`] for the node that has the given
329 /// `node_name` as `annis::node_name` annotation.
330 fn get_node_id_from_name(&self, node_name: &str) -> Result<Option<NodeID>>;
331
332 /// Returns true if there is a node with the given `node_name` as value for
333 /// the `annis::node_name` annotation.
334 fn has_node_name(&self, node_name: &str) -> Result<bool>;
335}
336
337/// An annotation storage for edges.
338pub trait EdgeAnnotationStorage: AnnotationStorage<Edge> {}