Skip to main content

nodedb_fts/analyzer/
standard.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Standard, Simple, and Keyword analyzers.
4
5use super::pipeline::{TextAnalyzer, analyze};
6
7/// Standard English text analyzer (default).
8///
9/// Pipeline: NFD normalize → lowercase → split → filter → stop words → Snowball stem.
10pub struct StandardAnalyzer;
11
12impl TextAnalyzer for StandardAnalyzer {
13    fn analyze(&self, text: &str) -> Vec<String> {
14        analyze(text)
15    }
16
17    fn name(&self) -> &str {
18        "standard"
19    }
20}
21
22/// Simple analyzer: lowercase + split on whitespace. No stemming or stop words.
23///
24/// Useful for exact-match fields (email addresses, tags, identifiers).
25pub struct SimpleAnalyzer;
26
27impl TextAnalyzer for SimpleAnalyzer {
28    fn analyze(&self, text: &str) -> Vec<String> {
29        text.to_lowercase()
30            .split_whitespace()
31            .filter(|w| w.len() > 1)
32            .map(|w| w.to_string())
33            .collect()
34    }
35
36    fn name(&self) -> &str {
37        "simple"
38    }
39}
40
41/// Keyword analyzer: treats entire input as a single token (lowercase).
42///
43/// Used for fields where the entire value is the token (status fields,
44/// enum-like values, exact-match tags).
45pub struct KeywordAnalyzer;
46
47impl TextAnalyzer for KeywordAnalyzer {
48    fn analyze(&self, text: &str) -> Vec<String> {
49        let trimmed = text.trim().to_lowercase();
50        if trimmed.is_empty() {
51            Vec::new()
52        } else {
53            vec![trimmed]
54        }
55    }
56
57    fn name(&self) -> &str {
58        "keyword"
59    }
60}
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65
66    #[test]
67    fn simple_analyzer() {
68        let analyzer = SimpleAnalyzer;
69        let tokens = analyzer.analyze("Hello World foo");
70        assert_eq!(tokens, vec!["hello", "world", "foo"]);
71    }
72
73    #[test]
74    fn keyword_analyzer() {
75        let analyzer = KeywordAnalyzer;
76        let tokens = analyzer.analyze("Active Status");
77        assert_eq!(tokens, vec!["active status"]);
78    }
79
80    #[test]
81    fn keyword_empty() {
82        let analyzer = KeywordAnalyzer;
83        assert!(analyzer.analyze("  ").is_empty());
84    }
85}