alith_client/components/grammar/text/
sentences.rs1use super::{
2 Grammar, GrammarError, GrammarSetterTrait, NEWLINE_CHARS, RefCell, build_disallowed,
3 build_quotes, create_range,
4};
5
6#[derive(Clone, PartialEq)]
7pub struct SentencesGrammar {
8 pub min_count: u8,
9 pub max_count: u8,
10 pub sentence_token_length: u32,
11 pub capitalize_first: bool,
12 pub stop_word_done: Option<String>,
13 pub stop_word_no_result: Option<String>,
14 pub concatenator: String,
15 pub disallowed_chars: Vec<char>,
16 grammar_string: RefCell<Option<String>>,
17}
18
19impl Default for SentencesGrammar {
20 fn default() -> Self {
21 let mut disallowed_chars = NEWLINE_CHARS.to_vec();
22 disallowed_chars.push('.');
23 disallowed_chars.push('!');
24 disallowed_chars.push('?');
25 Self {
26 min_count: 1,
27 max_count: 1,
28 sentence_token_length: 50,
29 capitalize_first: true,
30 stop_word_done: None,
31 stop_word_no_result: None,
32 concatenator: " ".to_string(),
33 disallowed_chars,
34 grammar_string: RefCell::new(None),
35 }
36 }
37}
38
39impl SentencesGrammar {
40 #[inline]
41 pub fn wrap(self) -> Grammar {
42 Grammar::Sentences(self)
43 }
44
45 pub fn min_count(mut self, min_count: u8) -> Self {
46 self.min_count = min_count;
47 self
48 }
49
50 pub fn max_count(mut self, max_count: u8) -> Self {
51 self.max_count = max_count;
52 self
53 }
54
55 pub fn capitalize_first(mut self, capitalize_first: bool) -> Self {
56 self.capitalize_first = capitalize_first;
57 self
58 }
59
60 pub fn concatenator(mut self, concatenator: &str) -> Self {
61 self.concatenator = concatenator.to_string();
62 self
63 }
64
65 pub fn disallowed_char(mut self, disallowed_char: char) -> Self {
66 self.disallowed_chars.push(disallowed_char);
67 self
68 }
69
70 pub fn disallowed_chars(mut self, disallowed_chars: Vec<char>) -> Self {
71 self.disallowed_chars.extend(disallowed_chars);
72 self
73 }
74
75 pub fn grammar_string(&self) -> String {
76 let mut grammar_string = self.grammar_string.borrow_mut();
77 if grammar_string.is_none() {
78 *grammar_string = Some(sentences_grammar(
79 self.min_count,
80 self.max_count,
81 self.sentence_token_length,
82 self.capitalize_first,
83 &self.concatenator,
84 &self.stop_word_done,
85 &self.stop_word_no_result,
86 &self.disallowed_chars,
87 ));
88 }
89 grammar_string.as_ref().unwrap().clone()
90 }
91
92 pub fn validate_clean(&self, content: &str) -> Result<String, GrammarError> {
93 sentences_validate_clean(content)
94 }
95
96 pub fn grammar_parse(&self, content: &str) -> Result<String, GrammarError> {
97 sentences_parse(content)
98 }
99}
100
101impl GrammarSetterTrait for SentencesGrammar {
102 fn stop_word_done_mut(&mut self) -> &mut Option<String> {
103 &mut self.stop_word_done
104 }
105
106 fn stop_word_no_result_mut(&mut self) -> &mut Option<String> {
107 &mut self.stop_word_no_result
108 }
109}
110
111#[allow(clippy::too_many_arguments)]
112pub fn sentences_grammar<T: AsRef<str>>(
113 min_count: u8,
114 max_count: u8,
115 sentence_token_length: u32,
116 capitalize_first: bool,
117 concatenator: &str,
118 stop_word_done: &Option<T>,
119 stop_word_no_result: &Option<T>,
120 disallowed_chars: &[char],
121) -> String {
122 let char_count = (sentence_token_length as f32 * 4.5).floor() as u32;
123
124 let disallowed = build_disallowed(disallowed_chars);
125 let quotes = build_quotes(disallowed_chars);
126
127 if capitalize_first {
128 let range = create_range(false, min_count, max_count, stop_word_done);
129 let sentence_item = format!(
130 "item ::= {} \"{concatenator}\"",
131 build_sentence_item(char_count, true, &disallowed, "es)
132 );
133 match (stop_word_done, stop_word_no_result) {
134 (Some(stop_word_done), Some(stop_word_no_result)) => format!(
135 "root ::= ( {range} | \"{}\" ) \" {}\"\n\n{sentence_item}",
136 stop_word_no_result.as_ref(),
137 stop_word_done.as_ref()
138 ),
139 (None, Some(stop_word_no_result)) => {
140 format!(
141 "root ::= ( {range} | \"{}\" )\n\n{sentence_item}",
142 stop_word_no_result.as_ref()
143 )
144 }
145 (Some(stop_word_done), None) => {
146 format!(
147 "root ::= {range} \" {}\"\n\n{sentence_item}",
148 stop_word_done.as_ref()
149 )
150 }
151 (None, None) => format!("root ::= {range}\n\n{sentence_item}"),
152 }
153 } else {
154 let first_item = format!(
155 "first ::= {} \"{concatenator}\"",
156 build_sentence_item(char_count, false, &disallowed, "es)
157 );
158 let range = create_range(true, min_count, max_count, stop_word_done);
159 let sentence_item = format!(
160 "item ::= {} \"{concatenator}\"",
161 build_sentence_item(char_count, true, &disallowed, "es)
162 );
163 match (stop_word_done, stop_word_no_result) {
164 (Some(stop_word_done), Some(stop_word_no_result)) => format!(
165 "root ::= ( {range} | \"{}\" ) \" {}\"\n\n{first_item}\n\n{sentence_item}",
166 stop_word_no_result.as_ref(),
167 stop_word_done.as_ref()
168 ),
169 (None, Some(stop_word_no_result)) => {
170 format!(
171 "root ::= ( {range} | \"{}\" )\n\n{first_item}\n\n{sentence_item}",
172 stop_word_no_result.as_ref()
173 )
174 }
175 (Some(stop_word_done), None) => {
176 format!(
177 "root ::= {range} \" {}\"\n\n{first_item}\n\n{sentence_item}",
178 stop_word_done.as_ref()
179 )
180 }
181 (None, None) => format!("root ::= {range}\n\n{first_item}\n\n{sentence_item}"),
182 }
183 }
184}
185
186fn build_sentence_item(
187 char_count: u32,
188 capitalize_start: bool,
189 disallowed: &str,
190 quotes: &Option<String>,
191) -> String {
192 let first = if capitalize_start { "[A-Z]" } else { "[a-z]" };
193 if let Some(quotes) = quotes {
194 format!(
195 "({quotes} | {first}) {disallowed}{{1,{char_count}}} [a-z] (\".\" | \"?\" | \"!\" | \".\" {quotes} | \"?\" {quotes} | \"!\" {quotes})"
196 )
197 } else {
198 format!("{first} {disallowed}{{1,{char_count}}} [a-z] (\".\" | \"?\" | \"!\")")
199 }
200}
201
202pub fn sentences_validate_clean(content: &str) -> Result<String, GrammarError> {
203 let content: &str = content.trim();
204 if sentences_parse(content).is_ok() {
208 Ok(content.to_string())
209 } else {
210 Err(GrammarError::ParseValueError {
211 content: content.to_string(),
212 parse_type: "String".to_string(),
213 })
214 }
215}
216
217pub fn sentences_parse(content: &str) -> Result<String, GrammarError> {
218 if content.is_empty() {
219 return Err(GrammarError::ParseValueError {
220 content: content.to_string(),
221 parse_type: "String".to_string(),
222 });
223 }
224 Ok(content.to_string())
225}