alith_client/components/grammar/text/
text_grammar.rs

1use super::{Grammar, GrammarError, GrammarSetterTrait, NEWLINE_CHARS, RefCell, build_disallowed};
2
3#[derive(Clone, PartialEq)]
4pub struct TextGrammar {
5    pub item_token_length: u32,
6    pub stop_word_done: Option<String>,
7    pub stop_word_no_result: Option<String>,
8    pub disallowed_chars: Vec<char>,
9    pub allow_newline: bool,
10    grammar_string: RefCell<Option<String>>,
11}
12
13impl Default for TextGrammar {
14    fn default() -> Self {
15        Self {
16            item_token_length: 200,
17            stop_word_done: None,
18            stop_word_no_result: None,
19            disallowed_chars: vec![],
20            allow_newline: false,
21            grammar_string: RefCell::new(None),
22        }
23    }
24}
25
26impl TextGrammar {
27    #[inline]
28    pub fn wrap(self) -> Grammar {
29        Grammar::Text(self)
30    }
31
32    pub fn item_token_length(mut self, item_token_length: u32) -> Self {
33        self.item_token_length = item_token_length;
34        self
35    }
36
37    pub fn disallowed_char(mut self, disallowed_char: char) -> Self {
38        self.disallowed_chars.push(disallowed_char);
39        self
40    }
41
42    pub fn disallowed_chars(mut self, disallowed_chars: Vec<char>) -> Self {
43        self.disallowed_chars.extend(disallowed_chars);
44        self
45    }
46
47    pub fn allow_newline(mut self, allow_newline: bool) -> Self {
48        self.allow_newline = allow_newline;
49        self
50    }
51
52    pub fn grammar_string(&self) -> String {
53        let mut grammar_string = self.grammar_string.borrow_mut();
54        if grammar_string.is_none() {
55            *grammar_string = Some(text_grammar(
56                self.item_token_length,
57                &self.stop_word_done,
58                &self.stop_word_no_result,
59                self.allow_newline,
60                &self.disallowed_chars,
61            ));
62        }
63        grammar_string.as_ref().unwrap().clone()
64    }
65
66    pub fn validate_clean(&self, content: &str) -> Result<String, GrammarError> {
67        text_validate_clean(content)
68    }
69
70    pub fn grammar_parse(&self, content: &str) -> Result<String, GrammarError> {
71        text_parse(content)
72    }
73}
74
75impl GrammarSetterTrait for TextGrammar {
76    fn stop_word_done_mut(&mut self) -> &mut Option<String> {
77        &mut self.stop_word_done
78    }
79
80    fn stop_word_no_result_mut(&mut self) -> &mut Option<String> {
81        &mut self.stop_word_no_result
82    }
83}
84
85pub fn text_grammar(
86    item_token_length: u32,
87    stop_word_done: &Option<String>,
88    stop_word_no_result: &Option<String>,
89    allow_newline: bool,
90    disallowed_chars: &[char],
91) -> String {
92    let disallowed = if allow_newline {
93        build_disallowed(disallowed_chars)
94    } else {
95        let mut disallowed = disallowed_chars.to_vec();
96        disallowed.extend(NEWLINE_CHARS.iter());
97        build_disallowed(&disallowed)
98    };
99    match (stop_word_done, stop_word_no_result) {
100        (Some(stop_word_done), Some(stop_word_no_result)) => {
101            format!(
102                "root ::= ( item{{1,{}}} | \"{stop_word_no_result}\" ) \" {stop_word_done}\"\nitem ::= {disallowed}",
103                (item_token_length as f32 * 4.5).floor() as u32,
104            )
105        }
106        (Some(stop_word_done), None) => {
107            format!(
108                "root ::= item{{1,{}}} \" {stop_word_done}\"\nitem ::= {disallowed}",
109                (item_token_length as f32 * 4.5).floor() as u32,
110            )
111        }
112        (None, Some(stop_word_no_result)) => {
113            format!(
114                "root ::= ( item{{1,{}}} | \"{stop_word_no_result}\" )\nitem ::= {disallowed}",
115                (item_token_length as f32 * 4.5).floor() as u32
116            )
117        }
118        (None, None) => {
119            format!(
120                "root ::= item{{0,{}}}\n\nitem ::= {disallowed}",
121                (item_token_length as f32 * 4.5).floor() as u32
122            )
123        }
124    }
125}
126
127pub fn text_validate_clean(content: &str) -> Result<String, GrammarError> {
128    let content: &str = content
129        .trim_start_matches(|c: char| !c.is_alphanumeric() && !c.is_ascii_punctuation())
130        .trim_end_matches(|c: char| !c.is_alphanumeric() && !c.is_ascii_punctuation());
131
132    if text_parse(content).is_ok() {
133        Ok(content.to_string())
134    } else {
135        Err(GrammarError::ParseValueError {
136            content: content.to_string(),
137            parse_type: "String".to_string(),
138        })
139    }
140}
141
142pub fn text_parse(content: &str) -> Result<String, GrammarError> {
143    if content.is_empty() {
144        return Err(GrammarError::ParseValueError {
145            content: content.to_string(),
146            parse_type: "String".to_string(),
147        });
148    }
149    Ok(content.to_string())
150}