graphannis/annis/util/
mod.rs

1pub mod quicksort;
2pub mod sortablecontainer;
3
4use graphannis_core::serializer::KeyVec;
5use regex_syntax::hir::literal::Extractor;
6
7use crate::errors::{GraphAnnisError, Result};
8
9use std::{
10    path::Path,
11    time::{Duration, Instant},
12};
13
14/// If a regular expression only matches a specific string, return this string.
15/// Otherwise return `None`.
16pub(crate) fn exact_value_for_regex(pattern: &str) -> Option<String> {
17    let parsed = regex_syntax::Parser::new().parse(pattern).ok()?;
18    if let Some(matching_literals) = Extractor::new().extract(&parsed).literals() {
19        if matching_literals.is_empty() {
20            return Some("".into());
21        } else if matching_literals.len() == 1 && matching_literals[0].is_exact() {
22            let matching_value = std::str::from_utf8(matching_literals[0].as_bytes()).ok()?;
23            return Some(matching_value.to_string());
24        }
25    }
26    None
27}
28
29/// Creates a byte array key from a vector of strings.
30///
31/// The strings are terminated with `\0`.
32pub fn create_str_vec_key(val: &[&str]) -> KeyVec {
33    let mut result: KeyVec = KeyVec::default();
34    for v in val {
35        // append null-terminated string to result
36        for b in v.as_bytes() {
37            result.push(*b)
38        }
39        result.push(0);
40    }
41    result
42}
43
44/// Defines a definition of a query including its number of expected results.
45#[derive(Debug, Deserialize, Clone)]
46pub struct SearchDef {
47    pub aql: String,
48    pub count: u64,
49    pub name: String,
50    pub corpus: Vec<String>,
51}
52
53#[derive(Debug, Deserialize)]
54struct SearchDefRaw {
55    pub aql: String,
56    pub count: u64,
57    pub name: String,
58    pub corpus: String,
59}
60
61impl From<SearchDefRaw> for SearchDef {
62    fn from(orig: SearchDefRaw) -> Self {
63        // Copy all information but split the corpus names to a vector
64        SearchDef {
65            aql: orig.aql,
66            count: orig.count,
67            name: orig.name,
68            corpus: orig.corpus.split(',').map(|s| s.to_string()).collect(),
69        }
70    }
71}
72
73/// Returns a vector over all query definitions defined in a CSV file.
74/// - `file` - The CSV file path.
75/// - `panic_on_invalid` - If true, an invalid query definition will trigger a panic, otherwise it will be ignored.
76///
77/// Can be used if this query is called in a test case to fail the test.
78pub fn get_queries_from_csv(file: &Path, panic_on_invalid: bool) -> Vec<SearchDef> {
79    if let Ok(mut reader) = csv::Reader::from_path(file) {
80        if panic_on_invalid {
81            let it = reader.deserialize().map(|row| -> SearchDef {
82                let raw: SearchDefRaw = row.unwrap();
83                raw.into()
84            });
85            it.collect()
86        } else {
87            let it = reader
88                .deserialize()
89                .filter_map(|row| -> Option<SearchDef> { row.ok() });
90            it.collect()
91        }
92    } else {
93        vec![]
94    }
95}
96
97/// Takes a match identifier (which includes the matched annotation name) and returns the node name.
98pub fn node_names_from_match(match_line: &str) -> Vec<String> {
99    let mut result = Vec::default();
100
101    for m in match_line.split_whitespace() {
102        let elements: Vec<&str> = m.splitn(3, "::").collect();
103        if let Some(last_element) = elements.last() {
104            result.push(last_element.to_string());
105        }
106    }
107
108    result
109}
110
111#[derive(Clone, Copy)]
112pub struct TimeoutCheck {
113    start_time: Instant,
114    timeout: Option<Duration>,
115}
116
117impl TimeoutCheck {
118    pub fn new(timeout: Option<Duration>) -> TimeoutCheck {
119        TimeoutCheck {
120            start_time: Instant::now(),
121            timeout,
122        }
123    }
124
125    /// Check if too much time was used and return an error if this is the case.
126    pub fn check(&self) -> Result<()> {
127        if let Some(timeout) = self.timeout {
128            let elapsed = self.start_time.elapsed();
129            if elapsed > timeout {
130                debug!(
131                    "Timeout reached after {} ms (configured for {} ms)",
132                    elapsed.as_millis(),
133                    timeout.as_millis()
134                );
135                return Err(GraphAnnisError::Timeout);
136            }
137        }
138        Ok(())
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn test_node_names_from_match() {
148        assert_eq!(
149            vec!["corpus1/doc1#n1".to_string()],
150            node_names_from_match("ns::name::corpus1/doc1#n1")
151        );
152        assert_eq!(
153            vec!["corpus1/doc1#n1".to_string()],
154            node_names_from_match("name::corpus1/doc1#n1")
155        );
156        assert_eq!(
157            vec!["corpus1/doc1#n1".to_string()],
158            node_names_from_match("corpus1/doc1#n1")
159        );
160
161        assert_eq!(
162            vec![
163                "n1".to_string(),
164                "n2".to_string(),
165                "n3".to_string(),
166                "n4".to_string()
167            ],
168            node_names_from_match("annis::test::n1 n2 test2::n3 n4")
169        );
170    }
171    #[test]
172    fn test_exact_value_for_regex() {
173        assert_eq!(None, exact_value_for_regex("A[abc]"));
174        assert_eq!(None, exact_value_for_regex("A|B"));
175        assert_eq!(None, exact_value_for_regex("A\\"));
176        assert_eq!(Some("A/b".to_string()), exact_value_for_regex("A\x2Fb"));
177        assert_eq!(Some("Test".to_string()), exact_value_for_regex("Test"));
178    }
179}