1use std::collections::HashMap;
2use std::collections::HashSet;
3use std::iter::Peekable;
4use std::str::Chars;
5
6#[derive(Debug, Clone, PartialEq)]
8pub enum Expr {
9 Term {
16 keywords: Vec<String>,
17 field: Option<String>,
18 required: bool,
19 excluded: bool,
20 exact: bool,
21 },
22
23 And(Box<Expr>, Box<Expr>),
25
26 Or(Box<Expr>, Box<Expr>),
28}
29
30impl Expr {
31 #[cfg(test)]
33 pub fn extract_terms(&self) -> (Vec<String>, Vec<String>) {
34 let mut required = Vec::new();
35 let mut optional = Vec::new();
36 self.collect_terms(&mut required, &mut optional);
37 (required, optional)
38 }
39
40 #[cfg(test)]
41 fn collect_terms(&self, required: &mut Vec<String>, optional: &mut Vec<String>) {
42 match self {
43 Expr::Term {
44 keywords,
45 required: is_required,
46 excluded,
47 ..
48 } => {
49 if !excluded {
50 for keyword in keywords {
51 if *is_required {
52 required.push(keyword.clone());
53 } else {
54 optional.push(keyword.clone());
55 }
56 }
57 }
58 }
59 Expr::And(left, right) => {
60 left.collect_terms(required, optional);
61 right.collect_terms(required, optional);
62 }
63 Expr::Or(left, right) => {
64 left.collect_terms(required, optional);
65 right.collect_terms(required, optional);
66 }
67 }
68 }
69
70 fn has_required_term(&self) -> bool {
72 match self {
73 Expr::Term { required, .. } => *required,
74 Expr::And(left, right) | Expr::Or(left, right) => {
75 left.has_required_term() || right.has_required_term()
76 }
77 }
78 }
79
80 fn evaluate_with_has_required(
83 &self,
84 matched_terms: &HashSet<usize>,
85 term_indices: &HashMap<String, usize>,
86 ignore_negatives: bool,
87 has_required_anywhere: bool,
88 ) -> bool {
89 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
90
91 match self {
92 Expr::Term {
93 keywords,
94 required,
95 excluded,
96 ..
97 } => {
98 if keywords.is_empty() {
99 return *excluded;
101 }
102 let all_present = keywords.iter().all(|kw| {
104 term_indices
105 .get(kw)
106 .map(|idx| matched_terms.contains(idx))
107 .unwrap_or(false)
108 });
109
110 if *excluded {
111 if ignore_negatives {
112 true
114 } else {
115 !keywords.iter().any(|kw| {
117 term_indices
118 .get(kw)
119 .map(|idx| matched_terms.contains(idx))
120 .unwrap_or(false)
121 })
122 }
123 } else if *required && ignore_negatives {
124 true
126 } else if *required {
127 all_present
129 } else {
130 if has_required_anywhere {
133 true
134 } else {
135 let any_present = keywords.iter().any(|kw| {
142 term_indices
143 .get(kw)
144 .map(|idx| matched_terms.contains(idx))
145 .unwrap_or(false)
146 });
147
148 if !any_present {
150 return false;
151 }
152
153 all_present
156 }
157 }
158 }
159 Expr::And(left, right) => {
160 let lval = left.evaluate_with_has_required(
161 matched_terms,
162 term_indices,
163 ignore_negatives,
164 has_required_anywhere,
165 );
166 let rval = right.evaluate_with_has_required(
167 matched_terms,
168 term_indices,
169 ignore_negatives,
170 has_required_anywhere,
171 );
172 if debug_mode {
173 println!(
174 "DEBUG: AND => left={}, right={}, result={}",
175 lval,
176 rval,
177 lval && rval
178 );
179 }
180 lval && rval
181 }
182 Expr::Or(left, right) => {
183 let lval = left.evaluate_with_has_required(
187 matched_terms,
188 term_indices,
189 ignore_negatives,
190 has_required_anywhere,
191 );
192 let rval = right.evaluate_with_has_required(
193 matched_terms,
194 term_indices,
195 ignore_negatives,
196 has_required_anywhere,
197 );
198
199 if debug_mode {
200 println!(
201 "DEBUG: OR => left={}, right={}, result={}",
202 lval,
203 rval,
204 lval || rval
205 );
206 }
207 lval || rval
208 }
209 }
210 }
211
212 pub fn evaluate(
221 &self,
222 matched_terms: &HashSet<usize>,
223 term_indices: &HashMap<String, usize>,
224 ignore_negatives: bool,
225 ) -> bool {
226 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
227
228 if ignore_negatives {
231 fn collect_required(expr: &Expr) -> Vec<String> {
232 match expr {
233 Expr::Term {
234 keywords,
235 required,
236 excluded,
237 ..
238 } => {
239 if *required && !*excluded {
240 keywords.clone()
241 } else {
242 vec![]
243 }
244 }
245 Expr::And(left, right) => {
246 let mut out = collect_required(left);
247 out.extend(collect_required(right));
248 out
249 }
250 Expr::Or(left, right) => {
251 let mut out = collect_required(left);
252 out.extend(collect_required(right));
253 out
254 }
255 }
256 }
257 let required_terms = collect_required(self);
258 if debug_mode && !required_terms.is_empty() {
259 println!("DEBUG: Required terms (ignoring negatives): {required_terms:?}");
260 }
261 for term in &required_terms {
262 if let Some(&idx) = term_indices.get(term) {
263 if !matched_terms.contains(&idx) {
264 if debug_mode {
265 println!("DEBUG: Missing required term '{term}' (idx={idx})");
266 }
267 return false;
268 }
269 } else {
270 return false;
272 }
273 }
274 }
275
276 let has_required_anywhere = self.has_required_term();
278
279 if debug_mode {
280 println!("DEBUG: Evaluating => {self:?}");
281 println!("DEBUG: matched_terms => {matched_terms:?}");
282 println!("DEBUG: term_indices => {term_indices:?}");
283 println!("DEBUG: Expression has_required_anywhere? {has_required_anywhere}");
284 }
285
286 self.evaluate_with_has_required(
288 matched_terms,
289 term_indices,
290 ignore_negatives,
291 has_required_anywhere,
292 )
293 }
294}
295
296impl std::fmt::Display for Expr {
297 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
298 match self {
299 Expr::Term {
300 keywords,
301 field,
302 required,
303 excluded,
304 exact,
305 } => {
306 let prefix = if *required {
307 "+"
308 } else if *excluded {
309 "-"
310 } else {
311 ""
312 };
313 let field_prefix = if let Some(ref field_name) = field {
314 format!("{field_name}:")
315 } else {
316 String::new()
317 };
318 if keywords.len() == 1 && *exact {
321 write!(f, "{}{}\"{}\"", prefix, field_prefix, keywords[0])
322 } else if keywords.len() == 1 {
323 write!(f, "{}{}{}", prefix, field_prefix, keywords[0])
324 } else {
325 write!(f, "{}{}\"{}\"", prefix, field_prefix, keywords.join(" "))
326 }
327 }
328 Expr::And(left, right) => write!(f, "({left} AND {right})"),
329 Expr::Or(left, right) => write!(f, "({left} OR {right})"),
330 }
331 }
332}
333
334#[derive(Debug, Clone, PartialEq)]
335pub enum Token {
336 Plus, Minus, And, Or, LParen, RParen, Colon, Ident(String), QuotedString(String), }
346
347#[derive(Debug)]
349pub enum ParseError {
350 #[allow(dead_code)]
351 UnexpectedChar(char),
352 UnexpectedEndOfInput,
353 UnexpectedToken(Token),
354 Generic(String),
355}
356impl std::fmt::Display for ParseError {
357 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
358 match self {
359 ParseError::UnexpectedChar(c) => write!(f, "Unexpected character '{c}'"),
360 ParseError::UnexpectedEndOfInput => write!(f, "Unexpected end of input"),
361 ParseError::UnexpectedToken(t) => write!(f, "Unexpected token '{t:?}'"),
362 ParseError::Generic(s) => write!(f, "{s}"),
363 }
364 }
365}
366
367impl std::error::Error for ParseError {}
368
369fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
371 let mut chars = input.chars().peekable();
372 let mut tokens = Vec::new();
373 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
374
375 while let Some(&ch) = chars.peek() {
376 match ch {
377 c if c.is_whitespace() => {
378 chars.next();
379 }
380 '+' => {
381 tokens.push(Token::Plus);
382 chars.next();
383 }
384 '-' => {
385 tokens.push(Token::Minus);
386 chars.next();
387 }
388 '(' => {
389 tokens.push(Token::LParen);
390 chars.next();
391 }
392 ')' => {
393 tokens.push(Token::RParen);
394 chars.next();
395 }
396 ':' => {
397 tokens.push(Token::Colon);
398 chars.next();
399 }
400 '"' => {
401 chars.next(); let quoted_string = lex_quoted_string(&mut chars)?;
403 tokens.push(Token::QuotedString(quoted_string));
404 }
405 _ => {
406 if ch.is_alphanumeric() || ch == '_' || ch == '.' {
408 let ident = lex_identifier(&mut chars);
409 let ident_upper = ident.to_ascii_uppercase();
410 if ident_upper == "AND" {
411 tokens.push(Token::And);
412 } else if ident_upper == "OR" {
413 tokens.push(Token::Or);
414 } else {
415 tokens.push(Token::Ident(ident));
416 }
417 } else {
418 if debug_mode {
420 println!("DEBUG: Skipping unknown character '{ch}'");
421 }
422 chars.next();
423 }
424 }
425 }
426 }
427
428 if tokens.is_empty() {
429 return Err(ParseError::Generic(
430 "No valid tokens found in input".to_string(),
431 ));
432 }
433 Ok(tokens)
434}
435
436fn lex_quoted_string(chars: &mut Peekable<Chars>) -> Result<String, ParseError> {
438 let mut buf = String::new();
439 let mut escaped = false;
440
441 while let Some(&ch) = chars.peek() {
442 if escaped {
443 buf.push(ch);
444 escaped = false;
445 chars.next();
446 } else if ch == '\\' {
447 escaped = true;
448 chars.next();
449 } else if ch == '"' {
450 chars.next(); return Ok(buf);
452 } else {
453 buf.push(ch);
454 chars.next();
455 }
456 }
457 Err(ParseError::UnexpectedEndOfInput)
459}
460
461fn lex_identifier(chars: &mut Peekable<Chars>) -> String {
462 let mut buf = String::new();
463 while let Some(&ch) = chars.peek() {
464 if ch.is_alphanumeric() || ch == '_' || ch == '.' {
465 buf.push(ch);
466 chars.next();
467 } else {
468 break;
469 }
470 }
471 buf
472}
473
474use probe_code::search::tokenization::{add_special_term, tokenize as custom_tokenize};
476
477struct Parser {
478 tokens: Vec<Token>,
479 pos: usize,
480}
481
482impl Parser {
483 fn new(tokens: Vec<Token>) -> Self {
484 Parser { tokens, pos: 0 }
485 }
486
487 fn peek(&self) -> Option<&Token> {
488 self.tokens.get(self.pos)
489 }
490
491 fn next(&mut self) -> Option<Token> {
492 let t = self.peek()?.clone();
493 self.pos += 1;
494 Some(t)
495 }
496
497 fn expect(&mut self, expected: &Token) -> Result<Token, ParseError> {
498 match self.peek() {
499 Some(t) if t == expected => Ok(self.next().unwrap()),
500 Some(t) => Err(ParseError::UnexpectedToken(t.clone())),
501 None => Err(ParseError::UnexpectedEndOfInput),
502 }
503 }
504
505 fn parse_expr(&mut self) -> Result<Expr, ParseError> {
506 let expr = self.parse_or_expr()?;
507 Ok(expr)
509 }
510
511 fn parse_or_expr(&mut self) -> Result<Expr, ParseError> {
512 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
513 if debug_mode {
514 println!("DEBUG: parse_or_expr => pos={pos}", pos = self.pos);
515 }
516
517 let mut left = self.parse_and_expr()?;
518
519 while let Some(Token::Or) = self.peek() {
520 self.next(); let right = self.parse_and_expr()?;
522 left = Expr::Or(Box::new(left), Box::new(right));
523 if debug_mode {
524 println!("DEBUG: OR => {left:?}");
525 }
526 }
527 Ok(left)
528 }
529
530 fn parse_and_expr(&mut self) -> Result<Expr, ParseError> {
531 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
532 if debug_mode {
533 println!("DEBUG: parse_and_expr => pos={pos}", pos = self.pos);
534 }
535
536 let mut left = self.parse_factor()?;
537
538 while let Some(token) = self.peek() {
539 match token {
540 Token::And => {
542 self.next(); let right = self.parse_factor()?;
544 left = Expr::And(Box::new(left), Box::new(right));
545 if debug_mode {
546 println!("DEBUG: AND => {left:?}");
547 }
548 }
549 Token::Or => {
551 break;
552 }
553 Token::Plus | Token::Minus => {
555 let right = self.parse_factor()?;
556 left = Expr::And(Box::new(left), Box::new(right));
557 if debug_mode {
558 println!("DEBUG: forced AND => {left:?}");
559 }
560 }
561 Token::Ident(_) | Token::QuotedString(_) | Token::LParen => {
563 let right = self.parse_factor()?;
564 left = Expr::Or(Box::new(left), Box::new(right));
566 if debug_mode {
567 println!("DEBUG: implicit OR => {left:?}");
568 }
569 }
570 _ => break,
571 }
572 }
573 Ok(left)
574 }
575
576 fn parse_factor(&mut self) -> Result<Expr, ParseError> {
577 match self.peek() {
578 Some(Token::LParen) => {
579 self.next(); let expr = self.parse_expr()?;
581 self.expect(&Token::RParen)?;
582 Ok(expr)
583 }
584 _ => self.parse_prefixed_term(),
585 }
586 }
587
588 fn parse_prefixed_term(&mut self) -> Result<Expr, ParseError> {
589 let mut required = false;
590 let mut excluded = false;
591 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
592
593 match self.peek() {
594 Some(Token::Plus) => {
595 required = true;
596 self.next();
597 }
598 Some(Token::Minus) => {
599 excluded = true;
600 self.next();
601 }
602 _ => {}
603 }
604
605 let primary_expr = self.parse_primary()?;
606 if let Expr::Term {
608 keywords,
609 field,
610 required: _,
611 excluded: _,
612 exact,
613 } = primary_expr
614 {
615 let final_keywords = if exact || excluded {
617 for kw in &keywords {
619 add_special_term(kw);
620 }
621 keywords
622 } else {
623 let mut expanded = Vec::new();
625 for kw in &keywords {
626 let splitted = custom_tokenize(kw);
627 expanded.extend(splitted.into_iter().filter(|s| !s.is_empty()));
629 }
630 expanded
633 };
634
635 if debug_mode {
636 println!("DEBUG: parse_prefixed_term => required={required}, excluded={excluded}, final_keywords={final_keywords:?}");
637 }
638
639 Ok(Expr::Term {
640 keywords: final_keywords,
641 field,
642 required,
643 excluded,
644 exact,
645 })
646 } else {
647 Ok(primary_expr)
649 }
650 }
651
652 fn parse_primary(&mut self) -> Result<Expr, ParseError> {
653 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
654
655 match self.peek() {
656 Some(Token::QuotedString(s)) => {
658 let val = s.clone();
659 self.next();
660 if debug_mode {
661 println!("DEBUG: QuotedString => {val}");
662 }
663 Ok(Expr::Term {
664 keywords: vec![val],
665 field: None,
666 required: false,
667 excluded: false,
668 exact: true,
669 })
670 }
671 Some(Token::Ident(_)) => {
673 let Token::Ident(first) = self.next().unwrap() else {
674 unreachable!()
675 };
676 if debug_mode {
677 println!("DEBUG: Ident => {first}");
678 }
679 if let Some(Token::Colon) = self.peek() {
680 self.next(); match self.peek() {
684 Some(Token::Ident(ident2)) => {
685 let val2 = ident2.clone();
686 self.next();
687 Ok(Expr::Term {
688 keywords: vec![val2],
689 field: Some(first),
690 required: false,
691 excluded: false,
692 exact: false,
693 })
694 }
695 Some(Token::QuotedString(qs)) => {
696 let qval = qs.clone();
697 self.next();
698 Ok(Expr::Term {
699 keywords: vec![qval],
700 field: Some(first),
701 required: false,
702 excluded: false,
703 exact: true,
704 })
705 }
706 _ => Ok(Expr::Term {
708 keywords: vec![],
709 field: Some(first),
710 required: false,
711 excluded: false,
712 exact: false,
713 }),
714 }
715 } else {
716 Ok(Expr::Term {
718 keywords: vec![first],
719 field: None,
720 required: false,
721 excluded: false,
722 exact: false,
723 })
724 }
725 }
726 Some(t) => Err(ParseError::UnexpectedToken(t.clone())),
727 None => Err(ParseError::UnexpectedEndOfInput),
728 }
729 }
730}
731
732pub fn parse_query(input: &str, exact: bool) -> Result<Expr, ParseError> {
734 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
735
736 if debug_mode {
737 println!("DEBUG: parse_query('{input}', exact={exact})");
738 }
739
740 if exact {
742 if debug_mode {
743 println!("DEBUG: Exact search enabled, treating query as a single term");
744 }
745 return Ok(Expr::Term {
746 keywords: vec![input.to_string()],
747 field: None,
748 required: false,
749 excluded: false,
750 exact: true,
751 });
752 }
753
754 let tokens_result = tokenize(input);
756 if debug_mode {
757 println!("DEBUG: Tokens => {tokens_result:?}");
758 }
759
760 let tokens = match tokens_result {
762 Ok(ts) => ts,
763 Err(_) => {
764 let cleaned_input = input
765 .chars()
766 .filter(|&c| c.is_alphanumeric() || c.is_whitespace() || c == '_' || c == '.')
767 .collect::<String>();
768 if cleaned_input.trim().is_empty() {
769 return Err(ParseError::Generic("No valid tokens found".to_string()));
770 }
771 let keywords = cleaned_input
772 .split_whitespace()
773 .map(|s| s.to_lowercase())
774 .collect::<Vec<String>>();
775 return Ok(Expr::Term {
776 keywords,
777 field: None,
778 required: false,
779 excluded: false,
780 exact: false,
781 });
782 }
783 };
784
785 let mut parser = Parser::new(tokens);
787 let parsed = parser.parse_expr();
788
789 if parsed.is_err() {
790 let idents = parser
792 .tokens
793 .iter()
794 .filter_map(|t| match t {
795 Token::Ident(s) => Some(s.clone()),
796 _ => None,
797 })
798 .collect::<Vec<_>>();
799 if idents.is_empty() {
800 return Err(ParseError::Generic(
801 "No valid identifiers found".to_string(),
802 ));
803 }
804 return Ok(Expr::Term {
805 keywords: idents,
806 field: None,
807 required: false,
808 excluded: false,
809 exact: false,
810 });
811 }
812
813 Ok(parsed.unwrap())
815}
816
817#[allow(dead_code)]
819pub fn parse_query_compat(input: &str) -> Result<Expr, ParseError> {
820 parse_query(input, false)
821}
822
823#[allow(dead_code)]
825pub fn parse_query_test(input: &str) -> Result<Expr, ParseError> {
826 parse_query(input, false)
827}
828
829#[cfg(test)]
830mod tests {
831 include!("elastic_query_tests.rs");
832 include!("elastic_query_evaluate_tests.rs");
833 include!("elastic_query_tokenization_tests.rs");
834}