alith_client/components/grammar/text/
text_grammar.rs1use super::{Grammar, GrammarError, GrammarSetterTrait, NEWLINE_CHARS, RefCell, build_disallowed};
2
3#[derive(Clone, PartialEq)]
4pub struct TextGrammar {
5 pub item_token_length: u32,
6 pub stop_word_done: Option<String>,
7 pub stop_word_no_result: Option<String>,
8 pub disallowed_chars: Vec<char>,
9 pub allow_newline: bool,
10 grammar_string: RefCell<Option<String>>,
11}
12
13impl Default for TextGrammar {
14 fn default() -> Self {
15 Self {
16 item_token_length: 200,
17 stop_word_done: None,
18 stop_word_no_result: None,
19 disallowed_chars: vec![],
20 allow_newline: false,
21 grammar_string: RefCell::new(None),
22 }
23 }
24}
25
26impl TextGrammar {
27 #[inline]
28 pub fn wrap(self) -> Grammar {
29 Grammar::Text(self)
30 }
31
32 pub fn item_token_length(mut self, item_token_length: u32) -> Self {
33 self.item_token_length = item_token_length;
34 self
35 }
36
37 pub fn disallowed_char(mut self, disallowed_char: char) -> Self {
38 self.disallowed_chars.push(disallowed_char);
39 self
40 }
41
42 pub fn disallowed_chars(mut self, disallowed_chars: Vec<char>) -> Self {
43 self.disallowed_chars.extend(disallowed_chars);
44 self
45 }
46
47 pub fn allow_newline(mut self, allow_newline: bool) -> Self {
48 self.allow_newline = allow_newline;
49 self
50 }
51
52 pub fn grammar_string(&self) -> String {
53 let mut grammar_string = self.grammar_string.borrow_mut();
54 if grammar_string.is_none() {
55 *grammar_string = Some(text_grammar(
56 self.item_token_length,
57 &self.stop_word_done,
58 &self.stop_word_no_result,
59 self.allow_newline,
60 &self.disallowed_chars,
61 ));
62 }
63 grammar_string.as_ref().unwrap().clone()
64 }
65
66 pub fn validate_clean(&self, content: &str) -> Result<String, GrammarError> {
67 text_validate_clean(content)
68 }
69
70 pub fn grammar_parse(&self, content: &str) -> Result<String, GrammarError> {
71 text_parse(content)
72 }
73}
74
75impl GrammarSetterTrait for TextGrammar {
76 fn stop_word_done_mut(&mut self) -> &mut Option<String> {
77 &mut self.stop_word_done
78 }
79
80 fn stop_word_no_result_mut(&mut self) -> &mut Option<String> {
81 &mut self.stop_word_no_result
82 }
83}
84
85pub fn text_grammar(
86 item_token_length: u32,
87 stop_word_done: &Option<String>,
88 stop_word_no_result: &Option<String>,
89 allow_newline: bool,
90 disallowed_chars: &[char],
91) -> String {
92 let disallowed = if allow_newline {
93 build_disallowed(disallowed_chars)
94 } else {
95 let mut disallowed = disallowed_chars.to_vec();
96 disallowed.extend(NEWLINE_CHARS.iter());
97 build_disallowed(&disallowed)
98 };
99 match (stop_word_done, stop_word_no_result) {
100 (Some(stop_word_done), Some(stop_word_no_result)) => {
101 format!(
102 "root ::= ( item{{1,{}}} | \"{stop_word_no_result}\" ) \" {stop_word_done}\"\nitem ::= {disallowed}",
103 (item_token_length as f32 * 4.5).floor() as u32,
104 )
105 }
106 (Some(stop_word_done), None) => {
107 format!(
108 "root ::= item{{1,{}}} \" {stop_word_done}\"\nitem ::= {disallowed}",
109 (item_token_length as f32 * 4.5).floor() as u32,
110 )
111 }
112 (None, Some(stop_word_no_result)) => {
113 format!(
114 "root ::= ( item{{1,{}}} | \"{stop_word_no_result}\" )\nitem ::= {disallowed}",
115 (item_token_length as f32 * 4.5).floor() as u32
116 )
117 }
118 (None, None) => {
119 format!(
120 "root ::= item{{0,{}}}\n\nitem ::= {disallowed}",
121 (item_token_length as f32 * 4.5).floor() as u32
122 )
123 }
124 }
125}
126
127pub fn text_validate_clean(content: &str) -> Result<String, GrammarError> {
128 let content: &str = content
129 .trim_start_matches(|c: char| !c.is_alphanumeric() && !c.is_ascii_punctuation())
130 .trim_end_matches(|c: char| !c.is_alphanumeric() && !c.is_ascii_punctuation());
131
132 if text_parse(content).is_ok() {
133 Ok(content.to_string())
134 } else {
135 Err(GrammarError::ParseValueError {
136 content: content.to_string(),
137 parse_type: "String".to_string(),
138 })
139 }
140}
141
142pub fn text_parse(content: &str) -> Result<String, GrammarError> {
143 if content.is_empty() {
144 return Err(GrammarError::ParseValueError {
145 content: content.to_string(),
146 parse_type: "String".to_string(),
147 });
148 }
149 Ok(content.to_string())
150}