1use alloc::boxed::Box;
9use alloc::format;
10use alloc::string::{String, ToString};
11use alloc::vec::Vec;
12
13use spg_storage::{TsLexeme, TsQueryAst, Value};
14
15use super::{EvalContext, EvalError};
16
17pub(super) fn fts_ts_rank(args: &[Value]) -> Result<Value, EvalError> {
22 let (vec, query) = parse_rank_args("ts_rank", args)?;
23 match (vec, query) {
24 (None, _) | (_, None) => Ok(Value::Null),
25 (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank(&v, &q)))),
26 }
27}
28
29pub(super) fn fts_ts_rank_cd(args: &[Value]) -> Result<Value, EvalError> {
30 let (vec, query) = parse_rank_args("ts_rank_cd", args)?;
31 match (vec, query) {
32 (None, _) | (_, None) => Ok(Value::Null),
33 (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank_cd(&v, &q)))),
34 }
35}
36
37fn parse_rank_args(
38 name: &str,
39 args: &[Value],
40) -> Result<
41 (
42 Option<Vec<spg_storage::TsLexeme>>,
43 Option<spg_storage::TsQueryAst>,
44 ),
45 EvalError,
46> {
47 if args.len() != 2 {
48 return Err(EvalError::TypeMismatch {
49 detail: format!(
50 "{name}() takes 2 args in v7.12.2 (weights array + normalisation flag are v7.12.x carve-out), got {}",
51 args.len()
52 ),
53 });
54 }
55 let vec = match &args[0] {
56 Value::Null => None,
57 Value::TsVector(v) => Some(v.clone()),
58 other => {
59 return Err(EvalError::TypeMismatch {
60 detail: format!(
61 "{name}() first arg must be tsvector, got {:?}",
62 other.data_type()
63 ),
64 });
65 }
66 };
67 let query = match &args[1] {
68 Value::Null => None,
69 Value::TsQuery(q) => Some(q.clone()),
70 other => {
71 return Err(EvalError::TypeMismatch {
72 detail: format!(
73 "{name}() second arg must be tsquery, got {:?}",
74 other.data_type()
75 ),
76 });
77 }
78 };
79 Ok((vec, query))
80}
81
82pub(super) fn ts_match(l: Value, r: Value) -> Result<Value, EvalError> {
87 let (vec, query) = match (l, r) {
88 (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
89 (Value::TsVector(v), Value::TsQuery(q)) => (v, q),
90 (Value::TsQuery(q), Value::TsVector(v)) => (v, q),
91 (l, r) => {
92 return Err(EvalError::TypeMismatch {
93 detail: format!(
94 "@@ requires (tsvector, tsquery), got ({:?}, {:?})",
95 l.data_type(),
96 r.data_type()
97 ),
98 });
99 }
100 };
101 Ok(Value::Bool(crate::fts::ts_query_matches(&vec, &query)))
102}
103
104pub(super) fn fts_to_tsvector(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
109 let (config, text) = parse_fts_args("to_tsvector", args, ctx)?;
110 match text {
111 None => Ok(Value::Null),
112 Some(t) => Ok(Value::TsVector(crate::fts::to_tsvector(config, &t))),
113 }
114}
115
116pub(super) fn fts_setweight(args: &[Value]) -> Result<Value, EvalError> {
119 let [vec_arg, weight_arg] = args else {
120 return Err(EvalError::TypeMismatch {
121 detail: alloc::format!("setweight expects 2 arguments, got {}", args.len()),
122 });
123 };
124 if matches!(vec_arg, Value::Null) || matches!(weight_arg, Value::Null) {
125 return Ok(Value::Null);
126 }
127 let Value::TsVector(lexemes) = vec_arg else {
128 return Err(EvalError::TypeMismatch {
129 detail: alloc::format!(
130 "setweight expects a tsvector, got {:?}",
131 vec_arg.data_type()
132 ),
133 });
134 };
135 let Value::Text(w) = weight_arg else {
136 return Err(EvalError::TypeMismatch {
137 detail: alloc::format!(
138 "setweight expects a weight letter, got {:?}",
139 weight_arg.data_type()
140 ),
141 });
142 };
143 let weight = match w.to_ascii_uppercase().as_str() {
144 "A" => 3,
145 "B" => 2,
146 "C" => 1,
147 "D" => 0,
148 other => {
149 return Err(EvalError::TypeMismatch {
150 detail: alloc::format!("unrecognized weight: {other:?} (expected A, B, C or D)"),
151 });
152 }
153 };
154 let mut out = lexemes.clone();
155 for lex in &mut out {
156 lex.weight = weight;
157 }
158 Ok(Value::TsVector(out))
159}
160
161pub(super) fn fts_plainto_tsquery(
162 args: &[Value],
163 ctx: &EvalContext<'_>,
164) -> Result<Value, EvalError> {
165 let (config, text) = parse_fts_args("plainto_tsquery", args, ctx)?;
166 match text {
167 None => Ok(Value::Null),
168 Some(t) => Ok(Value::TsQuery(crate::fts::plainto_tsquery(config, &t))),
169 }
170}
171
172pub(super) fn fts_phraseto_tsquery(
173 args: &[Value],
174 ctx: &EvalContext<'_>,
175) -> Result<Value, EvalError> {
176 let (config, text) = parse_fts_args("phraseto_tsquery", args, ctx)?;
177 match text {
178 None => Ok(Value::Null),
179 Some(t) => Ok(Value::TsQuery(crate::fts::phraseto_tsquery(config, &t))),
180 }
181}
182
183pub(super) fn fts_websearch_to_tsquery(
184 args: &[Value],
185 ctx: &EvalContext<'_>,
186) -> Result<Value, EvalError> {
187 let (config, text) = parse_fts_args("websearch_to_tsquery", args, ctx)?;
188 match text {
189 None => Ok(Value::Null),
190 Some(t) => Ok(Value::TsQuery(crate::fts::websearch_to_tsquery(config, &t))),
191 }
192}
193
194pub(super) fn fts_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
195 let (config, text) = parse_fts_args("to_tsquery", args, ctx)?;
196 match text {
197 None => Ok(Value::Null),
198 Some(t) => Ok(Value::TsQuery(crate::fts::to_tsquery(config, &t)?)),
199 }
200}
201
202fn parse_fts_args(
207 name: &str,
208 args: &[Value],
209 ctx: &EvalContext<'_>,
210) -> Result<(crate::fts::TsConfig, Option<String>), EvalError> {
211 let (config_arg, text_arg) = match args {
212 [t] => (None, t),
213 [c, t] => (Some(c), t),
214 _ => {
215 return Err(EvalError::TypeMismatch {
216 detail: format!("{name}() takes 1 or 2 args, got {}", args.len()),
217 });
218 }
219 };
220 let config = match config_arg {
221 None => match ctx.default_text_search_config {
222 Some(name_str) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
223 EvalError::TypeMismatch {
224 detail: format!(
225 "text search config not implemented: {name_str:?} (supported: simple, english)"
226 ),
227 }
228 })?,
229 None => crate::fts::TsConfig::Simple,
230 },
231 Some(Value::Null) => return Ok((crate::fts::TsConfig::Simple, None)),
232 Some(Value::Text(name_str)) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
233 EvalError::TypeMismatch {
234 detail: format!(
235 "text search config not implemented: {name_str:?} (supported: simple, english)"
236 ),
237 }
238 })?,
239 Some(other) => {
240 return Err(EvalError::TypeMismatch {
241 detail: format!(
242 "{name}() config arg must be text, got {:?}",
243 other.data_type()
244 ),
245 });
246 }
247 };
248 let text = match text_arg {
249 Value::Null => None,
250 Value::Text(s) => Some(s.clone()),
251 other => {
252 return Err(EvalError::TypeMismatch {
253 detail: format!(
254 "{name}() text arg must be text, got {:?}",
255 other.data_type()
256 ),
257 });
258 }
259 };
260 Ok((config, text))
261}
262
263pub fn format_tsvector(lexs: &[TsLexeme]) -> String {
269 let mut out = String::with_capacity(lexs.len() * 12);
270 for (i, l) in lexs.iter().enumerate() {
271 if i > 0 {
272 out.push(' ');
273 }
274 out.push('\'');
275 for c in l.word.chars() {
276 if c == '\'' {
277 out.push('\'');
278 }
279 out.push(c);
280 }
281 out.push('\'');
282 if !l.positions.is_empty() {
283 for (pi, p) in l.positions.iter().enumerate() {
284 out.push(if pi == 0 { ':' } else { ',' });
285 out.push_str(&p.to_string());
286 }
287 match l.weight {
292 3 => out.push('A'),
293 2 => out.push('B'),
294 1 => out.push('C'),
295 _ => {}
296 }
297 }
298 }
299 out
300}
301
302pub fn format_tsquery(ast: &TsQueryAst) -> String {
305 fn go(ast: &TsQueryAst, parent_prec: u8, out: &mut String) {
306 let (own_prec, write_self): (u8, &dyn Fn(&mut String)) = match ast {
308 TsQueryAst::Or(_, _) => (1, &|_| {}),
309 TsQueryAst::And(_, _) | TsQueryAst::Phrase { .. } => (2, &|_| {}),
310 TsQueryAst::Not(_) => (3, &|_| {}),
311 TsQueryAst::Term { .. } => (4, &|_| {}),
312 };
313 let need_parens = own_prec < parent_prec;
314 if need_parens {
315 out.push('(');
316 }
317 match ast {
318 TsQueryAst::Term { word, .. } => {
319 out.push('\'');
320 for c in word.chars() {
321 if c == '\'' {
322 out.push('\'');
323 }
324 out.push(c);
325 }
326 out.push('\'');
327 }
328 TsQueryAst::And(a, b) => {
329 go(a, own_prec, out);
330 out.push_str(" & ");
331 go(b, own_prec, out);
332 }
333 TsQueryAst::Or(a, b) => {
334 go(a, own_prec, out);
335 out.push_str(" | ");
336 go(b, own_prec, out);
337 }
338 TsQueryAst::Not(x) => {
339 out.push('!');
340 go(x, own_prec, out);
341 }
342 TsQueryAst::Phrase {
343 left,
344 right,
345 distance,
346 } => {
347 go(left, own_prec, out);
348 out.push_str(&alloc::format!(" <{distance}> "));
349 go(right, own_prec, out);
350 }
351 }
352 write_self(out);
353 if need_parens {
354 out.push(')');
355 }
356 }
357 let mut out = String::new();
358 go(ast, 0, &mut out);
359 out
360}
361
362pub fn decode_tsvector_external(s: &str) -> Result<Vec<TsLexeme>, EvalError> {
371 let mut out: Vec<TsLexeme> = Vec::new();
372 let mut i = 0;
373 let bytes = s.as_bytes();
374 while i < bytes.len() {
375 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
376 i += 1;
377 }
378 if i >= bytes.len() {
379 break;
380 }
381 let word = if bytes[i] == b'\'' {
384 i += 1;
385 let mut w = String::new();
386 loop {
387 if i >= bytes.len() {
388 return Err(EvalError::TypeMismatch {
389 detail: "tsvector literal: unterminated quoted lexeme".into(),
390 });
391 }
392 let b = bytes[i];
393 if b == b'\'' {
394 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
395 w.push('\'');
396 i += 2;
397 } else {
398 i += 1;
399 break;
400 }
401 } else {
402 w.push(b as char);
403 i += 1;
404 }
405 }
406 w
407 } else {
408 let start = i;
410 while i < bytes.len() && !bytes[i].is_ascii_whitespace() && bytes[i] != b':' {
411 i += 1;
412 }
413 core::str::from_utf8(&bytes[start..i])
414 .map_err(|_| EvalError::TypeMismatch {
415 detail: "tsvector literal: non-UTF-8 lexeme".into(),
416 })?
417 .to_string()
418 };
419 if word.is_empty() {
420 return Err(EvalError::TypeMismatch {
421 detail: "tsvector literal: empty lexeme".into(),
422 });
423 }
424 let mut positions: Vec<u16> = Vec::new();
427 let mut weight: u8 = 0;
428 if i < bytes.len() && bytes[i] == b':' {
429 i += 1;
430 loop {
431 let start = i;
432 while i < bytes.len() && bytes[i].is_ascii_digit() {
433 i += 1;
434 }
435 if start == i {
436 return Err(EvalError::TypeMismatch {
437 detail: "tsvector literal: expected digit after ':'".into(),
438 });
439 }
440 let num: u16 = core::str::from_utf8(&bytes[start..i])
441 .expect("ascii digits")
442 .parse()
443 .map_err(|_| EvalError::TypeMismatch {
444 detail: alloc::format!(
445 "tsvector literal: position {} overflows u16",
446 core::str::from_utf8(&bytes[start..i]).unwrap_or("?")
447 ),
448 })?;
449 positions.push(num);
450 if i < bytes.len() {
451 let w = bytes[i];
452 if matches!(w, b'A' | b'B' | b'C' | b'D') {
453 weight = match w {
454 b'A' => 3,
455 b'B' => 2,
456 b'C' => 1,
457 _ => 0,
458 };
459 i += 1;
460 }
461 }
462 if i < bytes.len() && bytes[i] == b',' {
463 i += 1;
464 continue;
465 }
466 break;
467 }
468 }
469 positions.sort_unstable();
470 positions.dedup();
471 match out.binary_search_by(|l| l.word.as_str().cmp(word.as_str())) {
474 Ok(idx) => {
475 for p in positions {
476 if !out[idx].positions.contains(&p) {
477 out[idx].positions.push(p);
478 }
479 }
480 out[idx].positions.sort_unstable();
481 if weight != 0 {
482 out[idx].weight = weight;
483 }
484 }
485 Err(idx) => {
486 out.insert(
487 idx,
488 TsLexeme {
489 word,
490 positions,
491 weight,
492 },
493 );
494 }
495 }
496 }
497 Ok(out)
498}
499
500pub fn decode_tsquery_external(s: &str) -> Result<TsQueryAst, EvalError> {
506 let mut p = TsQueryParser {
507 bytes: s.as_bytes(),
508 pos: 0,
509 };
510 p.skip_ws();
511 if p.pos >= p.bytes.len() {
512 return Err(EvalError::TypeMismatch {
513 detail: "tsquery literal: empty".into(),
514 });
515 }
516 let ast = p.parse_or()?;
517 p.skip_ws();
518 if p.pos < p.bytes.len() {
519 return Err(EvalError::TypeMismatch {
520 detail: alloc::format!("tsquery literal: trailing garbage at offset {}", p.pos),
521 });
522 }
523 Ok(ast)
524}
525
526struct TsQueryParser<'a> {
527 bytes: &'a [u8],
528 pos: usize,
529}
530
531impl<'a> TsQueryParser<'a> {
532 fn skip_ws(&mut self) {
533 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_whitespace() {
534 self.pos += 1;
535 }
536 }
537 fn peek(&self) -> Option<u8> {
538 self.bytes.get(self.pos).copied()
539 }
540 fn parse_or(&mut self) -> Result<TsQueryAst, EvalError> {
541 let mut lhs = self.parse_and()?;
542 loop {
543 self.skip_ws();
544 if self.peek() != Some(b'|') {
545 return Ok(lhs);
546 }
547 self.pos += 1;
548 let rhs = self.parse_and()?;
549 lhs = TsQueryAst::Or(Box::new(lhs), Box::new(rhs));
550 }
551 }
552 fn parse_and(&mut self) -> Result<TsQueryAst, EvalError> {
553 let mut lhs = self.parse_unary()?;
554 loop {
555 self.skip_ws();
556 match self.peek() {
557 Some(b'&') => {
558 self.pos += 1;
559 let rhs = self.parse_unary()?;
560 lhs = TsQueryAst::And(Box::new(lhs), Box::new(rhs));
561 }
562 Some(b'<') => {
563 self.pos += 1;
565 let start = self.pos;
566 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
567 self.pos += 1;
568 }
569 if start == self.pos || self.peek() != Some(b'>') {
570 return Err(EvalError::TypeMismatch {
571 detail: "tsquery literal: malformed <N> phrase operator".into(),
572 });
573 }
574 let n: u16 = core::str::from_utf8(&self.bytes[start..self.pos])
575 .expect("ascii digits")
576 .parse()
577 .map_err(|_| EvalError::TypeMismatch {
578 detail: "tsquery literal: phrase distance overflows u16".into(),
579 })?;
580 self.pos += 1; let rhs = self.parse_unary()?;
582 lhs = TsQueryAst::Phrase {
583 left: Box::new(lhs),
584 right: Box::new(rhs),
585 distance: n,
586 };
587 }
588 _ => return Ok(lhs),
589 }
590 }
591 }
592 fn parse_unary(&mut self) -> Result<TsQueryAst, EvalError> {
593 self.skip_ws();
594 if self.peek() == Some(b'!') {
595 self.pos += 1;
596 let inner = self.parse_unary()?;
597 return Ok(TsQueryAst::Not(Box::new(inner)));
598 }
599 self.parse_atom()
600 }
601 fn parse_atom(&mut self) -> Result<TsQueryAst, EvalError> {
602 self.skip_ws();
603 match self.peek() {
604 Some(b'(') => {
605 self.pos += 1;
606 let inner = self.parse_or()?;
607 self.skip_ws();
608 if self.peek() != Some(b')') {
609 return Err(EvalError::TypeMismatch {
610 detail: "tsquery literal: missing ')'".into(),
611 });
612 }
613 self.pos += 1;
614 Ok(inner)
615 }
616 Some(b'\'') => {
617 self.pos += 1;
618 let mut w = String::new();
619 loop {
620 match self.peek() {
621 None => {
622 return Err(EvalError::TypeMismatch {
623 detail: "tsquery literal: unterminated quoted lexeme".into(),
624 });
625 }
626 Some(b'\'') => {
627 if self.bytes.get(self.pos + 1) == Some(&b'\'') {
628 w.push('\'');
629 self.pos += 2;
630 } else {
631 self.pos += 1;
632 break;
633 }
634 }
635 Some(b) => {
636 w.push(b as char);
637 self.pos += 1;
638 }
639 }
640 }
641 self.skip_weight_suffix();
644 Ok(TsQueryAst::Term {
645 word: w,
646 weight_mask: 0,
647 })
648 }
649 Some(b) if b.is_ascii_alphanumeric() || b == b'_' => {
650 let start = self.pos;
651 while self.pos < self.bytes.len() {
652 let c = self.bytes[self.pos];
653 if c.is_ascii_alphanumeric() || c == b'_' {
654 self.pos += 1;
655 } else {
656 break;
657 }
658 }
659 let w = core::str::from_utf8(&self.bytes[start..self.pos])
660 .map_err(|_| EvalError::TypeMismatch {
661 detail: "tsquery literal: non-UTF-8 lexeme".into(),
662 })?
663 .to_string();
664 self.skip_weight_suffix();
665 Ok(TsQueryAst::Term {
666 word: w,
667 weight_mask: 0,
668 })
669 }
670 Some(b) => Err(EvalError::TypeMismatch {
671 detail: alloc::format!(
672 "tsquery literal: unexpected byte {:?} at offset {}",
673 b as char,
674 self.pos
675 ),
676 }),
677 None => Err(EvalError::TypeMismatch {
678 detail: "tsquery literal: expected term".into(),
679 }),
680 }
681 }
682 fn skip_weight_suffix(&mut self) {
683 if self.peek() != Some(b':') {
684 return;
685 }
686 self.pos += 1;
687 while let Some(b) = self.peek() {
688 if matches!(
689 b,
690 b'A' | b'B' | b'C' | b'D' | b'a' | b'b' | b'c' | b'd' | b'*'
691 ) || b.is_ascii_digit()
692 {
693 self.pos += 1;
694 } else {
695 break;
696 }
697 }
698 }
699}
700
701pub(super) fn tsvector_concat(l: &[spg_storage::TsLexeme], r: &[spg_storage::TsLexeme]) -> Value {
702 let shift = l
703 .iter()
704 .flat_map(|x| x.positions.iter().copied())
705 .max()
706 .unwrap_or(0);
707 let mut out: Vec<spg_storage::TsLexeme> = l.to_vec();
708 for lex in r {
709 let shifted: Vec<u16> = lex
710 .positions
711 .iter()
712 .map(|p| p.saturating_add(shift))
713 .collect();
714 if let Some(existing) = out.iter_mut().find(|x| x.word == lex.word) {
715 existing.positions.extend(shifted);
716 existing.positions.sort_unstable();
717 existing.weight = existing.weight.max(lex.weight);
718 } else {
719 out.push(spg_storage::TsLexeme {
720 word: lex.word.clone(),
721 positions: shifted,
722 weight: lex.weight,
723 });
724 }
725 }
726 out.sort_by(|a, b| a.word.cmp(&b.word));
727 Value::TsVector(out)
728}