Skip to main content

spg_engine/eval/
textsearch.rs

1//! Full-text-search SQL functions and `tsvector` / `tsquery` codecs.
2//! Wraps the lexer/stemmer engine in `crate::fts`: the `to_tsvector` /
3//! `*_tsquery` / `ts_rank` / `setweight` / `@@` builtins plus the PG
4//! external-form render (`format_*`) and parse (`decode_*_external`)
5//! used by the wire layer and `::tsvector` / `::tsquery` casts.
6//! Split out of `eval.rs` (cut 26).
7
8use alloc::boxed::Box;
9use alloc::format;
10use alloc::string::{String, ToString};
11use alloc::vec::Vec;
12
13use spg_storage::{TsLexeme, TsQueryAst, Value};
14
15use super::{EvalContext, EvalError};
16
17/// v7.12.2 — `ts_rank([weights,] vec, query [, norm])`. v7.12.2
18/// supports the canonical `(vec, query)` two-arg form mailrs uses;
19/// optional weight-array / normalisation arguments error with an
20/// "unsupported" message rather than silently changing semantics.
21pub(super) fn fts_ts_rank(args: &[Value]) -> Result<Value, EvalError> {
22    let (vec, query) = parse_rank_args("ts_rank", args)?;
23    match (vec, query) {
24        (None, _) | (_, None) => Ok(Value::Null),
25        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank(&v, &q)))),
26    }
27}
28
29pub(super) fn fts_ts_rank_cd(args: &[Value]) -> Result<Value, EvalError> {
30    let (vec, query) = parse_rank_args("ts_rank_cd", args)?;
31    match (vec, query) {
32        (None, _) | (_, None) => Ok(Value::Null),
33        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank_cd(&v, &q)))),
34    }
35}
36
37fn parse_rank_args(
38    name: &str,
39    args: &[Value],
40) -> Result<
41    (
42        Option<Vec<spg_storage::TsLexeme>>,
43        Option<spg_storage::TsQueryAst>,
44    ),
45    EvalError,
46> {
47    if args.len() != 2 {
48        return Err(EvalError::TypeMismatch {
49            detail: format!(
50                "{name}() takes 2 args in v7.12.2 (weights array + normalisation flag are v7.12.x carve-out), got {}",
51                args.len()
52            ),
53        });
54    }
55    let vec = match &args[0] {
56        Value::Null => None,
57        Value::TsVector(v) => Some(v.clone()),
58        other => {
59            return Err(EvalError::TypeMismatch {
60                detail: format!(
61                    "{name}() first arg must be tsvector, got {:?}",
62                    other.data_type()
63                ),
64            });
65        }
66    };
67    let query = match &args[1] {
68        Value::Null => None,
69        Value::TsQuery(q) => Some(q.clone()),
70        other => {
71            return Err(EvalError::TypeMismatch {
72                detail: format!(
73                    "{name}() second arg must be tsquery, got {:?}",
74                    other.data_type()
75                ),
76            });
77        }
78    };
79    Ok((vec, query))
80}
81
82/// v7.12.2 — `tsvector @@ tsquery` match operator. Either
83/// ordering accepted (PG semantics). NULL on either side → NULL.
84/// Anything that isn't tsvector/tsquery on either side is a type
85/// mismatch. Returns BOOL.
86pub(super) fn ts_match(l: Value, r: Value) -> Result<Value, EvalError> {
87    let (vec, query) = match (l, r) {
88        (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
89        (Value::TsVector(v), Value::TsQuery(q)) => (v, q),
90        (Value::TsQuery(q), Value::TsVector(v)) => (v, q),
91        (l, r) => {
92            return Err(EvalError::TypeMismatch {
93                detail: format!(
94                    "@@ requires (tsvector, tsquery), got ({:?}, {:?})",
95                    l.data_type(),
96                    r.data_type()
97                ),
98            });
99        }
100    };
101    Ok(Value::Bool(crate::fts::ts_query_matches(&vec, &query)))
102}
103
104/// v7.12.1 — `to_tsvector([config,] text)`. With one arg the
105/// session-resolved `default_text_search_config` is used (defaults
106/// to `simple` when unset); with two args the first picks the
107/// config. NULL text → NULL.
108pub(super) fn fts_to_tsvector(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
109    let (config, text) = parse_fts_args("to_tsvector", args, ctx)?;
110    match text {
111        None => Ok(Value::Null),
112        Some(t) => Ok(Value::TsVector(crate::fts::to_tsvector(config, &t))),
113    }
114}
115
116/// v7.24 (round-16 C) — `setweight(tsvector, "char")`. Relabels
117/// every lexeme with the given PG weight letter (A=3 B=2 C=1 D=0).
118pub(super) fn fts_setweight(args: &[Value]) -> Result<Value, EvalError> {
119    let [vec_arg, weight_arg] = args else {
120        return Err(EvalError::TypeMismatch {
121            detail: alloc::format!("setweight expects 2 arguments, got {}", args.len()),
122        });
123    };
124    if matches!(vec_arg, Value::Null) || matches!(weight_arg, Value::Null) {
125        return Ok(Value::Null);
126    }
127    let Value::TsVector(lexemes) = vec_arg else {
128        return Err(EvalError::TypeMismatch {
129            detail: alloc::format!(
130                "setweight expects a tsvector, got {:?}",
131                vec_arg.data_type()
132            ),
133        });
134    };
135    let Value::Text(w) = weight_arg else {
136        return Err(EvalError::TypeMismatch {
137            detail: alloc::format!(
138                "setweight expects a weight letter, got {:?}",
139                weight_arg.data_type()
140            ),
141        });
142    };
143    let weight = match w.to_ascii_uppercase().as_str() {
144        "A" => 3,
145        "B" => 2,
146        "C" => 1,
147        "D" => 0,
148        other => {
149            return Err(EvalError::TypeMismatch {
150                detail: alloc::format!("unrecognized weight: {other:?} (expected A, B, C or D)"),
151            });
152        }
153    };
154    let mut out = lexemes.clone();
155    for lex in &mut out {
156        lex.weight = weight;
157    }
158    Ok(Value::TsVector(out))
159}
160
161pub(super) fn fts_plainto_tsquery(
162    args: &[Value],
163    ctx: &EvalContext<'_>,
164) -> Result<Value, EvalError> {
165    let (config, text) = parse_fts_args("plainto_tsquery", args, ctx)?;
166    match text {
167        None => Ok(Value::Null),
168        Some(t) => Ok(Value::TsQuery(crate::fts::plainto_tsquery(config, &t))),
169    }
170}
171
172pub(super) fn fts_phraseto_tsquery(
173    args: &[Value],
174    ctx: &EvalContext<'_>,
175) -> Result<Value, EvalError> {
176    let (config, text) = parse_fts_args("phraseto_tsquery", args, ctx)?;
177    match text {
178        None => Ok(Value::Null),
179        Some(t) => Ok(Value::TsQuery(crate::fts::phraseto_tsquery(config, &t))),
180    }
181}
182
183pub(super) fn fts_websearch_to_tsquery(
184    args: &[Value],
185    ctx: &EvalContext<'_>,
186) -> Result<Value, EvalError> {
187    let (config, text) = parse_fts_args("websearch_to_tsquery", args, ctx)?;
188    match text {
189        None => Ok(Value::Null),
190        Some(t) => Ok(Value::TsQuery(crate::fts::websearch_to_tsquery(config, &t))),
191    }
192}
193
194pub(super) fn fts_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
195    let (config, text) = parse_fts_args("to_tsquery", args, ctx)?;
196    match text {
197        None => Ok(Value::Null),
198        Some(t) => Ok(Value::TsQuery(crate::fts::to_tsquery(config, &t)?)),
199    }
200}
201
202/// Parse the `(config, text)` / `(text)` argument pair shared by
203/// all FTS builders. Returns the resolved config + the text
204/// payload (None when text is NULL). The one-arg form pulls the
205/// config from the session's `default_text_search_config`.
206fn parse_fts_args(
207    name: &str,
208    args: &[Value],
209    ctx: &EvalContext<'_>,
210) -> Result<(crate::fts::TsConfig, Option<String>), EvalError> {
211    let (config_arg, text_arg) = match args {
212        [t] => (None, t),
213        [c, t] => (Some(c), t),
214        _ => {
215            return Err(EvalError::TypeMismatch {
216                detail: format!("{name}() takes 1 or 2 args, got {}", args.len()),
217            });
218        }
219    };
220    let config = match config_arg {
221        None => match ctx.default_text_search_config {
222            Some(name_str) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
223                EvalError::TypeMismatch {
224                    detail: format!(
225                        "text search config not implemented: {name_str:?} (supported: simple, english)"
226                    ),
227                }
228            })?,
229            None => crate::fts::TsConfig::Simple,
230        },
231        Some(Value::Null) => return Ok((crate::fts::TsConfig::Simple, None)),
232        Some(Value::Text(name_str)) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
233            EvalError::TypeMismatch {
234                detail: format!(
235                    "text search config not implemented: {name_str:?} (supported: simple, english)"
236                ),
237            }
238        })?,
239        Some(other) => {
240            return Err(EvalError::TypeMismatch {
241                detail: format!(
242                    "{name}() config arg must be text, got {:?}",
243                    other.data_type()
244                ),
245            });
246        }
247    };
248    let text = match text_arg {
249        Value::Null => None,
250        Value::Text(s) => Some(s.clone()),
251        other => {
252            return Err(EvalError::TypeMismatch {
253                detail: format!(
254                    "{name}() text arg must be text, got {:?}",
255                    other.data_type()
256                ),
257            });
258        }
259    };
260    Ok((config, text))
261}
262
263/// v7.12.0 — render a `tsvector` in PG's external form:
264/// `'lex':1,2A 'word':3` (single-quoted lexemes, optional
265/// `:positions`, optional weight letter `A/B/C/D` per position).
266/// Lexemes already arrive sorted + deduped from the engine. Used
267/// by the wire layer (OID 3614) and by SELECT-text output.
268pub fn format_tsvector(lexs: &[TsLexeme]) -> String {
269    let mut out = String::with_capacity(lexs.len() * 12);
270    for (i, l) in lexs.iter().enumerate() {
271        if i > 0 {
272            out.push(' ');
273        }
274        out.push('\'');
275        for c in l.word.chars() {
276            if c == '\'' {
277                out.push('\'');
278            }
279            out.push(c);
280        }
281        out.push('\'');
282        if !l.positions.is_empty() {
283            for (pi, p) in l.positions.iter().enumerate() {
284                out.push(if pi == 0 { ':' } else { ',' });
285                out.push_str(&p.to_string());
286            }
287            // v7.12.0 — weight is per-lexeme (the v7.12 design
288            // collapses PG's per-position weight into one letter).
289            // Emit once after the last position; default `D`
290            // (weight=0) stays implicit.
291            match l.weight {
292                3 => out.push('A'),
293                2 => out.push('B'),
294                1 => out.push('C'),
295                _ => {}
296            }
297        }
298    }
299    out
300}
301
302/// v7.12.0 — render a `tsquery` in PG's external form. Operator
303/// precedence: `!` > `&` > `|`. Phrase distance shown as `<N>`.
304pub fn format_tsquery(ast: &TsQueryAst) -> String {
305    fn go(ast: &TsQueryAst, parent_prec: u8, out: &mut String) {
306        // 0 = top, 1 = OR, 2 = AND, 3 = NOT/Phrase, 4 = atom.
307        let (own_prec, write_self): (u8, &dyn Fn(&mut String)) = match ast {
308            TsQueryAst::Or(_, _) => (1, &|_| {}),
309            TsQueryAst::And(_, _) | TsQueryAst::Phrase { .. } => (2, &|_| {}),
310            TsQueryAst::Not(_) => (3, &|_| {}),
311            TsQueryAst::Term { .. } => (4, &|_| {}),
312        };
313        let need_parens = own_prec < parent_prec;
314        if need_parens {
315            out.push('(');
316        }
317        match ast {
318            TsQueryAst::Term { word, .. } => {
319                out.push('\'');
320                for c in word.chars() {
321                    if c == '\'' {
322                        out.push('\'');
323                    }
324                    out.push(c);
325                }
326                out.push('\'');
327            }
328            TsQueryAst::And(a, b) => {
329                go(a, own_prec, out);
330                out.push_str(" & ");
331                go(b, own_prec, out);
332            }
333            TsQueryAst::Or(a, b) => {
334                go(a, own_prec, out);
335                out.push_str(" | ");
336                go(b, own_prec, out);
337            }
338            TsQueryAst::Not(x) => {
339                out.push('!');
340                go(x, own_prec, out);
341            }
342            TsQueryAst::Phrase {
343                left,
344                right,
345                distance,
346            } => {
347                go(left, own_prec, out);
348                out.push_str(&alloc::format!(" <{distance}> "));
349                go(right, own_prec, out);
350            }
351        }
352        write_self(out);
353        if need_parens {
354            out.push(')');
355        }
356    }
357    let mut out = String::new();
358    go(ast, 0, &mut out);
359    out
360}
361
362/// v7.12.0 — decode PG external form `'word':1,2A 'other':3` into
363/// a `Vec<TsLexeme>`. Lexemes are sorted ascending by `word` (with
364/// duplicates merged on positions) so the output matches the
365/// engine invariant. Empty input yields an empty vector.
366///
367/// v7.12.0 only ships the cast-literal entry. Full `to_tsvector`
368/// (Unicode word-split + Porter stemming + stopwords) lands in
369/// v7.12.1.
370pub fn decode_tsvector_external(s: &str) -> Result<Vec<TsLexeme>, EvalError> {
371    let mut out: Vec<TsLexeme> = Vec::new();
372    let mut i = 0;
373    let bytes = s.as_bytes();
374    while i < bytes.len() {
375        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
376            i += 1;
377        }
378        if i >= bytes.len() {
379            break;
380        }
381        // Quoted form `'word'` (with embedded `''` for a literal
382        // single quote, mirroring PG).
383        let word = if bytes[i] == b'\'' {
384            i += 1;
385            let mut w = String::new();
386            loop {
387                if i >= bytes.len() {
388                    return Err(EvalError::TypeMismatch {
389                        detail: "tsvector literal: unterminated quoted lexeme".into(),
390                    });
391                }
392                let b = bytes[i];
393                if b == b'\'' {
394                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
395                        w.push('\'');
396                        i += 2;
397                    } else {
398                        i += 1;
399                        break;
400                    }
401                } else {
402                    w.push(b as char);
403                    i += 1;
404                }
405            }
406            w
407        } else {
408            // Bare form — read until whitespace, ':' or end.
409            let start = i;
410            while i < bytes.len() && !bytes[i].is_ascii_whitespace() && bytes[i] != b':' {
411                i += 1;
412            }
413            core::str::from_utf8(&bytes[start..i])
414                .map_err(|_| EvalError::TypeMismatch {
415                    detail: "tsvector literal: non-UTF-8 lexeme".into(),
416                })?
417                .to_string()
418        };
419        if word.is_empty() {
420            return Err(EvalError::TypeMismatch {
421                detail: "tsvector literal: empty lexeme".into(),
422            });
423        }
424        // Optional `:pos[,pos][,pos]`. Each position is u16; each
425        // may carry a trailing weight letter A/B/C/D.
426        let mut positions: Vec<u16> = Vec::new();
427        let mut weight: u8 = 0;
428        if i < bytes.len() && bytes[i] == b':' {
429            i += 1;
430            loop {
431                let start = i;
432                while i < bytes.len() && bytes[i].is_ascii_digit() {
433                    i += 1;
434                }
435                if start == i {
436                    return Err(EvalError::TypeMismatch {
437                        detail: "tsvector literal: expected digit after ':'".into(),
438                    });
439                }
440                let num: u16 = core::str::from_utf8(&bytes[start..i])
441                    .expect("ascii digits")
442                    .parse()
443                    .map_err(|_| EvalError::TypeMismatch {
444                        detail: alloc::format!(
445                            "tsvector literal: position {} overflows u16",
446                            core::str::from_utf8(&bytes[start..i]).unwrap_or("?")
447                        ),
448                    })?;
449                positions.push(num);
450                if i < bytes.len() {
451                    let w = bytes[i];
452                    if matches!(w, b'A' | b'B' | b'C' | b'D') {
453                        weight = match w {
454                            b'A' => 3,
455                            b'B' => 2,
456                            b'C' => 1,
457                            _ => 0,
458                        };
459                        i += 1;
460                    }
461                }
462                if i < bytes.len() && bytes[i] == b',' {
463                    i += 1;
464                    continue;
465                }
466                break;
467            }
468        }
469        positions.sort_unstable();
470        positions.dedup();
471        // Merge into the output vector — sorted insert by word,
472        // duplicate words merge positions.
473        match out.binary_search_by(|l| l.word.as_str().cmp(word.as_str())) {
474            Ok(idx) => {
475                for p in positions {
476                    if !out[idx].positions.contains(&p) {
477                        out[idx].positions.push(p);
478                    }
479                }
480                out[idx].positions.sort_unstable();
481                if weight != 0 {
482                    out[idx].weight = weight;
483                }
484            }
485            Err(idx) => {
486                out.insert(
487                    idx,
488                    TsLexeme {
489                        word,
490                        positions,
491                        weight,
492                    },
493                );
494            }
495        }
496    }
497    Ok(out)
498}
499
500/// v7.12.0 — decode PG external form `'foo' & 'bar' | !'baz'`
501/// into a `TsQueryAst`. v7.12.0 supports the canonical
502/// `to_tsquery` surface: single-quoted lexemes, `&` / `|` / `!`,
503/// parens, and phrase `<N>`. Bare lexemes are accepted too. Full
504/// `plainto_tsquery` / `websearch_to_tsquery` arrive in v7.12.1.
505pub fn decode_tsquery_external(s: &str) -> Result<TsQueryAst, EvalError> {
506    let mut p = TsQueryParser {
507        bytes: s.as_bytes(),
508        pos: 0,
509    };
510    p.skip_ws();
511    if p.pos >= p.bytes.len() {
512        return Err(EvalError::TypeMismatch {
513            detail: "tsquery literal: empty".into(),
514        });
515    }
516    let ast = p.parse_or()?;
517    p.skip_ws();
518    if p.pos < p.bytes.len() {
519        return Err(EvalError::TypeMismatch {
520            detail: alloc::format!("tsquery literal: trailing garbage at offset {}", p.pos),
521        });
522    }
523    Ok(ast)
524}
525
526struct TsQueryParser<'a> {
527    bytes: &'a [u8],
528    pos: usize,
529}
530
531impl<'a> TsQueryParser<'a> {
532    fn skip_ws(&mut self) {
533        while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_whitespace() {
534            self.pos += 1;
535        }
536    }
537    fn peek(&self) -> Option<u8> {
538        self.bytes.get(self.pos).copied()
539    }
540    fn parse_or(&mut self) -> Result<TsQueryAst, EvalError> {
541        let mut lhs = self.parse_and()?;
542        loop {
543            self.skip_ws();
544            if self.peek() != Some(b'|') {
545                return Ok(lhs);
546            }
547            self.pos += 1;
548            let rhs = self.parse_and()?;
549            lhs = TsQueryAst::Or(Box::new(lhs), Box::new(rhs));
550        }
551    }
552    fn parse_and(&mut self) -> Result<TsQueryAst, EvalError> {
553        let mut lhs = self.parse_unary()?;
554        loop {
555            self.skip_ws();
556            match self.peek() {
557                Some(b'&') => {
558                    self.pos += 1;
559                    let rhs = self.parse_unary()?;
560                    lhs = TsQueryAst::And(Box::new(lhs), Box::new(rhs));
561                }
562                Some(b'<') => {
563                    // Phrase distance `<N>`.
564                    self.pos += 1;
565                    let start = self.pos;
566                    while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
567                        self.pos += 1;
568                    }
569                    if start == self.pos || self.peek() != Some(b'>') {
570                        return Err(EvalError::TypeMismatch {
571                            detail: "tsquery literal: malformed <N> phrase operator".into(),
572                        });
573                    }
574                    let n: u16 = core::str::from_utf8(&self.bytes[start..self.pos])
575                        .expect("ascii digits")
576                        .parse()
577                        .map_err(|_| EvalError::TypeMismatch {
578                            detail: "tsquery literal: phrase distance overflows u16".into(),
579                        })?;
580                    self.pos += 1; // consume '>'
581                    let rhs = self.parse_unary()?;
582                    lhs = TsQueryAst::Phrase {
583                        left: Box::new(lhs),
584                        right: Box::new(rhs),
585                        distance: n,
586                    };
587                }
588                _ => return Ok(lhs),
589            }
590        }
591    }
592    fn parse_unary(&mut self) -> Result<TsQueryAst, EvalError> {
593        self.skip_ws();
594        if self.peek() == Some(b'!') {
595            self.pos += 1;
596            let inner = self.parse_unary()?;
597            return Ok(TsQueryAst::Not(Box::new(inner)));
598        }
599        self.parse_atom()
600    }
601    fn parse_atom(&mut self) -> Result<TsQueryAst, EvalError> {
602        self.skip_ws();
603        match self.peek() {
604            Some(b'(') => {
605                self.pos += 1;
606                let inner = self.parse_or()?;
607                self.skip_ws();
608                if self.peek() != Some(b')') {
609                    return Err(EvalError::TypeMismatch {
610                        detail: "tsquery literal: missing ')'".into(),
611                    });
612                }
613                self.pos += 1;
614                Ok(inner)
615            }
616            Some(b'\'') => {
617                self.pos += 1;
618                let mut w = String::new();
619                loop {
620                    match self.peek() {
621                        None => {
622                            return Err(EvalError::TypeMismatch {
623                                detail: "tsquery literal: unterminated quoted lexeme".into(),
624                            });
625                        }
626                        Some(b'\'') => {
627                            if self.bytes.get(self.pos + 1) == Some(&b'\'') {
628                                w.push('\'');
629                                self.pos += 2;
630                            } else {
631                                self.pos += 1;
632                                break;
633                            }
634                        }
635                        Some(b) => {
636                            w.push(b as char);
637                            self.pos += 1;
638                        }
639                    }
640                }
641                // Optional `:WEIGHT_MASK` (digit-mask) — v7.12.0
642                // accepts but always stores 0 (any).
643                self.skip_weight_suffix();
644                Ok(TsQueryAst::Term {
645                    word: w,
646                    weight_mask: 0,
647                })
648            }
649            Some(b) if b.is_ascii_alphanumeric() || b == b'_' => {
650                let start = self.pos;
651                while self.pos < self.bytes.len() {
652                    let c = self.bytes[self.pos];
653                    if c.is_ascii_alphanumeric() || c == b'_' {
654                        self.pos += 1;
655                    } else {
656                        break;
657                    }
658                }
659                let w = core::str::from_utf8(&self.bytes[start..self.pos])
660                    .map_err(|_| EvalError::TypeMismatch {
661                        detail: "tsquery literal: non-UTF-8 lexeme".into(),
662                    })?
663                    .to_string();
664                self.skip_weight_suffix();
665                Ok(TsQueryAst::Term {
666                    word: w,
667                    weight_mask: 0,
668                })
669            }
670            Some(b) => Err(EvalError::TypeMismatch {
671                detail: alloc::format!(
672                    "tsquery literal: unexpected byte {:?} at offset {}",
673                    b as char,
674                    self.pos
675                ),
676            }),
677            None => Err(EvalError::TypeMismatch {
678                detail: "tsquery literal: expected term".into(),
679            }),
680        }
681    }
682    fn skip_weight_suffix(&mut self) {
683        if self.peek() != Some(b':') {
684            return;
685        }
686        self.pos += 1;
687        while let Some(b) = self.peek() {
688            if matches!(
689                b,
690                b'A' | b'B' | b'C' | b'D' | b'a' | b'b' | b'c' | b'd' | b'*'
691            ) || b.is_ascii_digit()
692            {
693                self.pos += 1;
694            } else {
695                break;
696            }
697        }
698    }
699}
700
701pub(super) fn tsvector_concat(l: &[spg_storage::TsLexeme], r: &[spg_storage::TsLexeme]) -> Value {
702    let shift = l
703        .iter()
704        .flat_map(|x| x.positions.iter().copied())
705        .max()
706        .unwrap_or(0);
707    let mut out: Vec<spg_storage::TsLexeme> = l.to_vec();
708    for lex in r {
709        let shifted: Vec<u16> = lex
710            .positions
711            .iter()
712            .map(|p| p.saturating_add(shift))
713            .collect();
714        if let Some(existing) = out.iter_mut().find(|x| x.word == lex.word) {
715            existing.positions.extend(shifted);
716            existing.positions.sort_unstable();
717            existing.weight = existing.weight.max(lex.weight);
718        } else {
719            out.push(spg_storage::TsLexeme {
720                word: lex.word.clone(),
721                positions: shifted,
722                weight: lex.weight,
723            });
724        }
725    }
726    out.sort_by(|a, b| a.word.cmp(&b.word));
727    Value::TsVector(out)
728}