dsh_api/
query_processor.rs

1//! # Enums, traits and structs used by the various find methods
2use std::fmt::{Display, Formatter};
3
4use crate::query_processor::Part::{Matching, NonMatching};
5use crate::DshApiError;
6use regex::Regex;
7
8/// # Represents a part of a matched query.
9#[derive(Debug, PartialEq)]
10pub enum Part {
11  /// Represents a part of a string that did match the query.
12  Matching(String),
13  /// Represents a part of a string that did not match the query.
14  NonMatching(String),
15}
16
17/// # Defines the methods in the query processor
18///
19/// A `QueryProcessor` will query a `haystack` string for substrings that match a certain pattern.
20/// If there is a match, the result will be a vector with alternating matching and
21/// non-matching parts, represented by [`Part`] enums.
22pub trait QueryProcessor: Send + Sync {
23  /// # Returns a description of the query
24  ///
25  /// # Returns
26  /// * a `String` describing the query processor
27  fn describe(&self) -> String;
28
29  /// # Applies query to string
30  ///
31  /// # Parameters
32  /// * `haystack` - `String` that will be searched for parts that match the query
33  ///
34  /// # Returns
35  /// * `Ok(Vec<Part>)` - when the `haystack` contains one or more parts that match the query
36  /// * `None` - when the `haystack` did not match the query
37  fn matching_parts(&self, haystack: &str) -> Option<Vec<Part>>;
38}
39
40impl Part {
41  /// # Create a `Part::Matching`
42  ///
43  /// # Parameters
44  /// `value` - the value of this `Part::Matching`
45  ///
46  /// # Returns
47  /// The created instance.
48  pub fn matching(value: impl Into<String>) -> Part {
49    Matching(value.into())
50  }
51
52  /// # Create a `Part::NonMatching`
53  ///
54  /// # Parameters
55  /// `value` - the value of this `Part::NonMatching`
56  ///
57  /// # Returns
58  /// The created instance.
59  pub fn non_matching(value: impl Into<String>) -> Part {
60    NonMatching(value.into())
61  }
62}
63
64impl Display for Part {
65  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
66    match self {
67      Matching(part) => write!(f, "{}", part),
68      NonMatching(part) => write!(f, "{}", part),
69    }
70  }
71}
72
73/// # Generate string with ansi formatting from a `Part`
74///
75/// For a `NonMatching` part this method will return the literal inner `String`. For a `Matching`
76/// part the returned `String` will be wrapped in an ANSI escape code for a bold type face.
77///
78/// # Parameters
79/// `part` - The `Part` to generate the formatted string from
80///
81/// # Returns
82/// String representation of this `Part`
83///
84/// # Examples
85/// ```
86/// use dsh_api::query_processor::{part_to_ansi_formatted_string, Part};
87///
88/// println!("part is {}", part_to_ansi_formatted_string(&Part::matching("MATCH")));
89/// ```
90/// This will print the string `"part is \x1B[1mMATCH\x1B[0m"` which,
91/// on a terminal that supports ANSI escape sequences,
92/// will be shown as `"part is `<code><b>MATCH</b></code>`"`.
93pub fn part_to_ansi_formatted_string(part: &Part) -> String {
94  match part {
95    Matching(part) => format!("\x1B[1m{}\x1B[0m", part),
96    NonMatching(part) => part.to_string(),
97  }
98}
99
100/// # Generate string with ansi formatting from a slice of `Part`s
101///
102/// This method will generate a `String` representation from a `&[Part]` slice, where the
103/// `Matching` parts will be wrapped in an ANSI escape code for a bold type face.
104///
105/// # Parameters
106/// `parts` - The `Part`s to generate the formatted string from
107///
108/// # Returns
109/// String representation of this `&[Part]` slice
110/// # Examples
111/// ```
112/// use dsh_api::query_processor::{parts_to_ansi_formatted_string, Part};
113///
114/// let parts: [Part; 3] =
115///   [Part::non_matching("prefix"), Part::matching("MATCH"), Part::non_matching("postfix")];
116/// println!("parts are {}", parts_to_ansi_formatted_string(&parts));
117/// ```
118/// This will print the string `"parts are prefix\x1B[1mMATCH\x1B[0mpostfix"` which,
119/// on a terminal that supports ANSI escape sequences,
120/// will be shown as `"parts are prefix`<code><b>MATCH</b></code>`postfix"`.
121pub fn parts_to_ansi_formatted_string(parts: &[Part]) -> String {
122  parts.iter().map(part_to_ansi_formatted_string).collect::<Vec<_>>().join("")
123}
124
125/// # Query processor implementation for exact matches
126///
127/// # Examples
128/// This example will demonstrate how to create and use a `QueryProcessor` that will performa an
129/// exact match on the `haystack` string.
130/// Note that the `matching_parts` method can only return `None` when no match was found,
131/// or a `Some` which contains a `Vec` with exactly one `Part::Matching` element,
132/// containing the entire `haystack`.
133/// ```
134/// use dsh_api::query_processor::{ExactMatchQueryProcessor, Part, QueryProcessor};
135///
136/// let exact_match_query_processor = ExactMatchQueryProcessor::create("exact").unwrap();
137/// let parts = exact_match_query_processor.matching_parts("exact").unwrap();
138/// assert_eq!(parts, vec![Part::matching("exact")]);
139/// ```
140pub struct ExactMatchQueryProcessor<'a> {
141  pattern: &'a str,
142}
143
144impl<'a> ExactMatchQueryProcessor<'a> {
145  pub fn create(pattern: &'a str) -> Result<Self, DshApiError> {
146    Ok(Self { pattern })
147  }
148}
149
150impl QueryProcessor for ExactMatchQueryProcessor<'_> {
151  fn describe(&self) -> String {
152    format!("match the pattern \"{}\"", self.pattern)
153  }
154
155  fn matching_parts(&self, haystack: &str) -> Option<Vec<Part>> {
156    if self.pattern == haystack {
157      Some(vec![Part::matching(haystack)])
158    } else {
159      None
160    }
161  }
162}
163
164/// # Query processor implementation based on regular expressions
165///
166/// # Examples
167/// ```
168/// use dsh_api::query_processor::{Part, QueryProcessor, RegexQueryProcessor};
169///
170/// let regex_query_processor = RegexQueryProcessor::create("a+").unwrap();
171/// let parts = regex_query_processor.matching_parts("bbabbbaab").unwrap();
172/// assert_eq!(parts, vec![
173///   Part::non_matching("bb"),
174///   Part::matching("a"),
175///   Part::non_matching("bbb"),
176///   Part::matching("aa"),
177///   Part::non_matching("b"),
178/// ]);
179/// ```
180pub struct RegexQueryProcessor {
181  regex: Regex,
182}
183
184impl RegexQueryProcessor {
185  pub fn create(pattern: &str) -> Result<Self, DshApiError> {
186    match Regex::new(pattern) {
187      Ok(regex) => Ok(Self { regex }),
188      Err(error) => Err(DshApiError::Configuration(error.to_string())),
189    }
190  }
191}
192
193impl QueryProcessor for RegexQueryProcessor {
194  fn describe(&self) -> String {
195    format!("match against regular expression \"{}\"", self.regex.as_str())
196  }
197
198  fn matching_parts(&self, haystack: &str) -> Option<Vec<Part>> {
199    let mut parts: Vec<Part> = vec![];
200    let mut ptr: usize = 0;
201    let mut match_found = false;
202    for matching in self.regex.find_iter(haystack) {
203      if matching.start() > ptr {
204        parts.push(Part::non_matching(&haystack[ptr..matching.start()]))
205      }
206      match_found = true;
207      parts.push(Part::matching(matching.as_str()));
208      ptr = matching.end();
209    }
210    if haystack.len() > ptr {
211      parts.push(Part::non_matching(&haystack[ptr..haystack.len()]));
212    }
213    if match_found {
214      Some(parts)
215    } else {
216      None
217    }
218  }
219}
220
221/// # Dummy query processor implementation
222///
223/// This dummy query processor always returns the literal `haystack` as a
224/// single non-matching `Part`.
225/// This can be useful when you want to apply a function that expects a query processor,
226/// without actually applying the query.
227pub struct DummyQueryProcessor {}
228
229impl DummyQueryProcessor {
230  pub fn create() -> Result<Self, DshApiError> {
231    Ok(Self {})
232  }
233}
234
235impl QueryProcessor for DummyQueryProcessor {
236  fn describe(&self) -> String {
237    "accept all input".to_string()
238  }
239
240  fn matching_parts(&self, haystack: &str) -> Option<Vec<Part>> {
241    Some(vec![Part::non_matching(haystack)])
242  }
243}
244
245#[test]
246fn test_exact_match_query_processor() {
247  let haystacks: [(&str, &str, Option<Vec<Part>>); 4] = [("aa", "", None), ("aa", "a", None), ("aa", "aa", Some(vec![Part::matching("aa")])), ("aa", "aaa", None)];
248  for (pattern, haystack, parts) in haystacks {
249    let exact_match_query_processor = ExactMatchQueryProcessor::create(pattern).unwrap();
250    assert_eq!(exact_match_query_processor.describe(), format!("match the pattern \"{}\"", pattern));
251    assert_eq!(exact_match_query_processor.matching_parts(haystack), parts);
252  }
253}
254
255#[test]
256fn test_regex_query_processor() {
257  let haystacks: [(&str, &str, Option<Vec<Part>>); 19] = [
258    ("a+", "", None),
259    ("a+", "b", None),
260    ("a+", "a", Some(vec![Part::matching("a")])),
261    ("a+", "aaa", Some(vec![Part::matching("aaa")])),
262    (
263      "a+",
264      "bbabbbaab",
265      Some(vec![
266        Part::non_matching("bb"),
267        Part::matching("a"),
268        Part::non_matching("bbb"),
269        Part::matching("aa"),
270        Part::non_matching("b"),
271      ]),
272    ),
273    (
274      "a+",
275      "aaabbabbbaab",
276      Some(vec![
277        Part::matching("aaa"),
278        Part::non_matching("bb"),
279        Part::matching("a"),
280        Part::non_matching("bbb"),
281        Part::matching("aa"),
282        Part::non_matching("b"),
283      ]),
284    ),
285    (
286      "a+",
287      "bbabbbaabaaa",
288      Some(vec![
289        Part::non_matching("bb"),
290        Part::matching("a"),
291        Part::non_matching("bbb"),
292        Part::matching("aa"),
293        Part::non_matching("b"),
294        Part::matching("aaa"),
295      ]),
296    ),
297    (
298      "a+",
299      "aaabbabbbaabaaa",
300      Some(vec![
301        Part::matching("aaa"),
302        Part::non_matching("bb"),
303        Part::matching("a"),
304        Part::non_matching("bbb"),
305        Part::matching("aa"),
306        Part::non_matching("b"),
307        Part::matching("aaa"),
308      ]),
309    ),
310    ("aa", "", None),
311    ("aa", "bbb", None),
312    ("aa", "aa", Some(vec![Part::matching("aa")])),
313    ("aa", "aaa", Some(vec![Part::matching("aa"), Part::non_matching("a")])),
314    ("aa", "aaaa", Some(vec![Part::matching("aa"), Part::matching("aa")])),
315    ("aa", "aaaaa", Some(vec![Part::matching("aa"), Part::matching("aa"), Part::non_matching("a")])),
316    ("aa", "aaabb", Some(vec![Part::matching("aa".to_string()), Part::non_matching("abb")])),
317    (
318      "aa",
319      "bbaaabbbaaab",
320      Some(vec![
321        Part::non_matching("bb"),
322        Part::matching("aa"),
323        Part::non_matching("abbb"),
324        Part::matching("aa"),
325        Part::non_matching("ab"),
326      ]),
327    ),
328    (
329      "aa",
330      "aaabbabbbaab",
331      Some(vec![
332        Part::matching("aa"),
333        Part::non_matching("abbabbb"),
334        Part::matching("aa"),
335        Part::non_matching("b"),
336      ]),
337    ),
338    (
339      "aa",
340      "bbabbbaabaaa",
341      Some(vec![
342        Part::non_matching("bbabbb"),
343        Part::matching("aa"),
344        Part::non_matching("b"),
345        Part::matching("aa"),
346        Part::non_matching("a"),
347      ]),
348    ),
349    (
350      "aa",
351      "aaabbabbbaabaaa",
352      Some(vec![
353        Part::matching("aa"),
354        Part::non_matching("abbabbb"),
355        Part::matching("aa"),
356        Part::non_matching("b"),
357        Part::matching("aa"),
358        Part::non_matching("a"),
359      ]),
360    ),
361  ];
362  for (pattern, haystack, parts) in haystacks {
363    let regex_query_processor = RegexQueryProcessor::create(pattern).unwrap();
364    assert_eq!(regex_query_processor.describe(), format!("match against regular expression \"{}\"", pattern));
365    assert_eq!(regex_query_processor.matching_parts(haystack), parts);
366  }
367}