nodedb_sql/functions/fts_ops/
tsquery_parser.rs1use crate::error::{Result, SqlError};
22use crate::fts_types::FtsQuery;
23
24pub fn parse_tsquery(input: &str) -> Result<FtsQuery> {
28 let input = input.trim();
29 if input.is_empty() {
30 return Err(SqlError::Parse {
31 detail: "tsquery: empty query string".into(),
32 });
33 }
34 let tokens = tokenize(input);
35 let mut pos = 0;
36 let query = parse_or(&tokens, &mut pos)?;
37 if pos < tokens.len() {
38 return Err(SqlError::Parse {
39 detail: format!(
40 "tsquery: unexpected token '{}' at position {pos}",
41 tokens[pos]
42 ),
43 });
44 }
45 Ok(query)
46}
47
48pub fn parse_phrase_terms(input: &str) -> FtsQuery {
54 let terms: Vec<String> = input
55 .split_whitespace()
56 .map(|s| s.to_ascii_lowercase())
57 .filter(|s| !s.is_empty())
58 .collect();
59 FtsQuery::Phrase(terms)
60}
61
62fn tokenize(input: &str) -> Vec<String> {
65 let mut tokens = Vec::new();
66 let mut chars = input.char_indices().peekable();
67 while let Some((i, c)) = chars.next() {
68 match c {
69 ' ' | '\t' | '\n' | '\r' => continue,
70 '(' | ')' | '&' | '|' | '!' => tokens.push(c.to_string()),
71 ':' => {
72 if chars.peek().map(|(_, nc)| *nc) == Some('*') {
76 chars.next(); tokens.push(":*".into());
78 } else {
79 tokens.push(":".into());
80 }
81 }
82 _ => {
83 let start = i;
85 let mut end = i + c.len_utf8();
86 while let Some(&(j, nc)) = chars.peek() {
87 if nc.is_alphanumeric() || nc == '-' || nc == '_' {
88 end = j + nc.len_utf8();
89 chars.next();
90 } else {
91 break;
92 }
93 }
94 tokens.push(input[start..end].to_ascii_lowercase());
95 }
96 }
97 }
98 tokens
99}
100
101fn parse_or(tokens: &[String], pos: &mut usize) -> Result<FtsQuery> {
104 let mut left = parse_and(tokens, pos)?;
105 while *pos < tokens.len() && tokens[*pos] == "|" {
106 *pos += 1;
107 let right = parse_and(tokens, pos)?;
108 left = match left {
109 FtsQuery::Or(mut parts) => {
110 parts.push(right);
111 FtsQuery::Or(parts)
112 }
113 other => FtsQuery::Or(vec![other, right]),
114 };
115 }
116 Ok(left)
117}
118
119fn parse_and(tokens: &[String], pos: &mut usize) -> Result<FtsQuery> {
120 let mut left = parse_not(tokens, pos)?;
121 while *pos < tokens.len() && tokens[*pos] == "&" {
122 *pos += 1;
123 let right = parse_not(tokens, pos)?;
124 left = match left {
125 FtsQuery::And(mut parts) => {
126 parts.push(right);
127 FtsQuery::And(parts)
128 }
129 other => FtsQuery::And(vec![other, right]),
130 };
131 }
132 Ok(left)
133}
134
135fn parse_not(tokens: &[String], pos: &mut usize) -> Result<FtsQuery> {
136 if *pos < tokens.len() && tokens[*pos] == "!" {
137 *pos += 1;
138 let inner = parse_not(tokens, pos)?;
139 return Ok(FtsQuery::Not(Box::new(inner)));
140 }
141 parse_atom(tokens, pos)
142}
143
144fn parse_atom(tokens: &[String], pos: &mut usize) -> Result<FtsQuery> {
145 if *pos >= tokens.len() {
146 return Err(SqlError::Parse {
147 detail: "tsquery: unexpected end of input".into(),
148 });
149 }
150 if tokens[*pos] == "(" {
151 *pos += 1;
152 let inner = parse_or(tokens, pos)?;
153 if *pos >= tokens.len() || tokens[*pos] != ")" {
154 return Err(SqlError::Parse {
155 detail: "tsquery: unmatched '('".into(),
156 });
157 }
158 *pos += 1;
159 return Ok(inner);
160 }
161 let term = tokens[*pos].clone();
163 if term == ")" || term == "&" || term == "|" || term == "!" {
164 return Err(SqlError::Parse {
165 detail: format!("tsquery: unexpected operator '{term}'"),
166 });
167 }
168 *pos += 1;
169 if *pos < tokens.len() && tokens[*pos] == ":*" {
171 *pos += 1;
172 return Ok(FtsQuery::Prefix(term));
173 }
174 Ok(FtsQuery::Plain {
175 text: term,
176 fuzzy: false,
177 })
178}
179
180#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn parse_single_term() {
188 let q = parse_tsquery("rust").unwrap();
189 assert_eq!(
190 q,
191 FtsQuery::Plain {
192 text: "rust".into(),
193 fuzzy: false
194 }
195 );
196 }
197
198 #[test]
199 fn parse_and_two_terms() {
200 let q = parse_tsquery("rust & lang").unwrap();
201 assert_eq!(
202 q,
203 FtsQuery::And(vec![
204 FtsQuery::Plain {
205 text: "rust".into(),
206 fuzzy: false
207 },
208 FtsQuery::Plain {
209 text: "lang".into(),
210 fuzzy: false
211 },
212 ])
213 );
214 }
215
216 #[test]
217 fn parse_or_two_terms() {
218 let q = parse_tsquery("rust | lang").unwrap();
219 assert_eq!(
220 q,
221 FtsQuery::Or(vec![
222 FtsQuery::Plain {
223 text: "rust".into(),
224 fuzzy: false
225 },
226 FtsQuery::Plain {
227 text: "lang".into(),
228 fuzzy: false
229 },
230 ])
231 );
232 }
233
234 #[test]
235 fn parse_prefix() {
236 let q = parse_tsquery("rust:*").unwrap();
237 assert_eq!(q, FtsQuery::Prefix("rust".into()));
238 }
239
240 #[test]
241 fn parse_nested() {
242 let q = parse_tsquery("(a & b) | c").unwrap();
244 assert_eq!(
245 q,
246 FtsQuery::Or(vec![
247 FtsQuery::And(vec![
248 FtsQuery::Plain {
249 text: "a".into(),
250 fuzzy: false
251 },
252 FtsQuery::Plain {
253 text: "b".into(),
254 fuzzy: false
255 },
256 ]),
257 FtsQuery::Plain {
258 text: "c".into(),
259 fuzzy: false
260 },
261 ])
262 );
263 }
264
265 #[test]
266 fn parse_not_term() {
267 let q = parse_tsquery("!foo").unwrap();
268 assert_eq!(
269 q,
270 FtsQuery::Not(Box::new(FtsQuery::Plain {
271 text: "foo".into(),
272 fuzzy: false
273 }))
274 );
275 }
276
277 #[test]
278 fn parse_empty_is_error() {
279 assert!(parse_tsquery("").is_err());
280 assert!(parse_tsquery(" ").is_err());
281 }
282
283 #[test]
284 fn parse_phrase_terms_basic() {
285 let q = parse_phrase_terms("hello world");
286 assert_eq!(q, FtsQuery::Phrase(vec!["hello".into(), "world".into()]));
287 }
288
289 #[test]
290 fn and_is_left_associative() {
291 let q = parse_tsquery("a & b & c").unwrap();
292 assert_eq!(
294 q,
295 FtsQuery::And(vec![
296 FtsQuery::Plain {
297 text: "a".into(),
298 fuzzy: false
299 },
300 FtsQuery::Plain {
301 text: "b".into(),
302 fuzzy: false
303 },
304 FtsQuery::Plain {
305 text: "c".into(),
306 fuzzy: false
307 },
308 ])
309 );
310 }
311
312 #[test]
313 fn or_is_left_associative() {
314 let q = parse_tsquery("a | b | c").unwrap();
315 assert_eq!(
316 q,
317 FtsQuery::Or(vec![
318 FtsQuery::Plain {
319 text: "a".into(),
320 fuzzy: false
321 },
322 FtsQuery::Plain {
323 text: "b".into(),
324 fuzzy: false
325 },
326 FtsQuery::Plain {
327 text: "c".into(),
328 fuzzy: false
329 },
330 ])
331 );
332 }
333}