alith_client/components/grammar/text/
sentences.rs

1use super::{
2    Grammar, GrammarError, GrammarSetterTrait, NEWLINE_CHARS, RefCell, build_disallowed,
3    build_quotes, create_range,
4};
5
6#[derive(Clone, PartialEq)]
7pub struct SentencesGrammar {
8    pub min_count: u8,
9    pub max_count: u8,
10    pub sentence_token_length: u32,
11    pub capitalize_first: bool,
12    pub stop_word_done: Option<String>,
13    pub stop_word_no_result: Option<String>,
14    pub concatenator: String,
15    pub disallowed_chars: Vec<char>,
16    grammar_string: RefCell<Option<String>>,
17}
18
19impl Default for SentencesGrammar {
20    fn default() -> Self {
21        let mut disallowed_chars = NEWLINE_CHARS.to_vec();
22        disallowed_chars.push('.');
23        disallowed_chars.push('!');
24        disallowed_chars.push('?');
25        Self {
26            min_count: 1,
27            max_count: 1,
28            sentence_token_length: 50,
29            capitalize_first: true,
30            stop_word_done: None,
31            stop_word_no_result: None,
32            concatenator: " ".to_string(),
33            disallowed_chars,
34            grammar_string: RefCell::new(None),
35        }
36    }
37}
38
39impl SentencesGrammar {
40    #[inline]
41    pub fn wrap(self) -> Grammar {
42        Grammar::Sentences(self)
43    }
44
45    pub fn min_count(mut self, min_count: u8) -> Self {
46        self.min_count = min_count;
47        self
48    }
49
50    pub fn max_count(mut self, max_count: u8) -> Self {
51        self.max_count = max_count;
52        self
53    }
54
55    pub fn capitalize_first(mut self, capitalize_first: bool) -> Self {
56        self.capitalize_first = capitalize_first;
57        self
58    }
59
60    pub fn concatenator(mut self, concatenator: &str) -> Self {
61        self.concatenator = concatenator.to_string();
62        self
63    }
64
65    pub fn disallowed_char(mut self, disallowed_char: char) -> Self {
66        self.disallowed_chars.push(disallowed_char);
67        self
68    }
69
70    pub fn disallowed_chars(mut self, disallowed_chars: Vec<char>) -> Self {
71        self.disallowed_chars.extend(disallowed_chars);
72        self
73    }
74
75    pub fn grammar_string(&self) -> String {
76        let mut grammar_string = self.grammar_string.borrow_mut();
77        if grammar_string.is_none() {
78            *grammar_string = Some(sentences_grammar(
79                self.min_count,
80                self.max_count,
81                self.sentence_token_length,
82                self.capitalize_first,
83                &self.concatenator,
84                &self.stop_word_done,
85                &self.stop_word_no_result,
86                &self.disallowed_chars,
87            ));
88        }
89        grammar_string.as_ref().unwrap().clone()
90    }
91
92    pub fn validate_clean(&self, content: &str) -> Result<String, GrammarError> {
93        sentences_validate_clean(content)
94    }
95
96    pub fn grammar_parse(&self, content: &str) -> Result<String, GrammarError> {
97        sentences_parse(content)
98    }
99}
100
101impl GrammarSetterTrait for SentencesGrammar {
102    fn stop_word_done_mut(&mut self) -> &mut Option<String> {
103        &mut self.stop_word_done
104    }
105
106    fn stop_word_no_result_mut(&mut self) -> &mut Option<String> {
107        &mut self.stop_word_no_result
108    }
109}
110
111#[allow(clippy::too_many_arguments)]
112pub fn sentences_grammar<T: AsRef<str>>(
113    min_count: u8,
114    max_count: u8,
115    sentence_token_length: u32,
116    capitalize_first: bool,
117    concatenator: &str,
118    stop_word_done: &Option<T>,
119    stop_word_no_result: &Option<T>,
120    disallowed_chars: &[char],
121) -> String {
122    let char_count = (sentence_token_length as f32 * 4.5).floor() as u32;
123
124    let disallowed = build_disallowed(disallowed_chars);
125    let quotes = build_quotes(disallowed_chars);
126
127    if capitalize_first {
128        let range = create_range(false, min_count, max_count, stop_word_done);
129        let sentence_item = format!(
130            "item ::= {} \"{concatenator}\"",
131            build_sentence_item(char_count, true, &disallowed, &quotes)
132        );
133        match (stop_word_done, stop_word_no_result) {
134            (Some(stop_word_done), Some(stop_word_no_result)) => format!(
135                "root ::= ( {range} | \"{}\" ) \" {}\"\n\n{sentence_item}",
136                stop_word_no_result.as_ref(),
137                stop_word_done.as_ref()
138            ),
139            (None, Some(stop_word_no_result)) => {
140                format!(
141                    "root ::= ( {range} | \"{}\" )\n\n{sentence_item}",
142                    stop_word_no_result.as_ref()
143                )
144            }
145            (Some(stop_word_done), None) => {
146                format!(
147                    "root ::= {range} \" {}\"\n\n{sentence_item}",
148                    stop_word_done.as_ref()
149                )
150            }
151            (None, None) => format!("root ::= {range}\n\n{sentence_item}"),
152        }
153    } else {
154        let first_item = format!(
155            "first ::= {} \"{concatenator}\"",
156            build_sentence_item(char_count, false, &disallowed, &quotes)
157        );
158        let range = create_range(true, min_count, max_count, stop_word_done);
159        let sentence_item = format!(
160            "item ::= {} \"{concatenator}\"",
161            build_sentence_item(char_count, true, &disallowed, &quotes)
162        );
163        match (stop_word_done, stop_word_no_result) {
164            (Some(stop_word_done), Some(stop_word_no_result)) => format!(
165                "root ::= ( {range} | \"{}\" ) \" {}\"\n\n{first_item}\n\n{sentence_item}",
166                stop_word_no_result.as_ref(),
167                stop_word_done.as_ref()
168            ),
169            (None, Some(stop_word_no_result)) => {
170                format!(
171                    "root ::= ( {range} | \"{}\" )\n\n{first_item}\n\n{sentence_item}",
172                    stop_word_no_result.as_ref()
173                )
174            }
175            (Some(stop_word_done), None) => {
176                format!(
177                    "root ::= {range} \" {}\"\n\n{first_item}\n\n{sentence_item}",
178                    stop_word_done.as_ref()
179                )
180            }
181            (None, None) => format!("root ::= {range}\n\n{first_item}\n\n{sentence_item}"),
182        }
183    }
184}
185
186fn build_sentence_item(
187    char_count: u32,
188    capitalize_start: bool,
189    disallowed: &str,
190    quotes: &Option<String>,
191) -> String {
192    let first = if capitalize_start { "[A-Z]" } else { "[a-z]" };
193    if let Some(quotes) = quotes {
194        format!(
195            "({quotes} | {first}) {disallowed}{{1,{char_count}}} [a-z] (\".\" | \"?\" | \"!\" | \".\" {quotes} | \"?\" {quotes} | \"!\" {quotes})"
196        )
197    } else {
198        format!("{first} {disallowed}{{1,{char_count}}} [a-z] (\".\" | \"?\" | \"!\")")
199    }
200}
201
202pub fn sentences_validate_clean(content: &str) -> Result<String, GrammarError> {
203    let content: &str = content.trim();
204    // .trim_start_matches(|c: char| !c.is_alphanumeric() && !c.is_ascii_punctuation())
205    // .trim_end_matches(|c: char| !c.is_alphanumeric() && !c.is_ascii_punctuation());
206
207    if sentences_parse(content).is_ok() {
208        Ok(content.to_string())
209    } else {
210        Err(GrammarError::ParseValueError {
211            content: content.to_string(),
212            parse_type: "String".to_string(),
213        })
214    }
215}
216
217pub fn sentences_parse(content: &str) -> Result<String, GrammarError> {
218    if content.is_empty() {
219        return Err(GrammarError::ParseValueError {
220            content: content.to_string(),
221            parse_type: "String".to_string(),
222        });
223    }
224    Ok(content.to_string())
225}