Skip to main content

spg_engine/
eval.rs

1//! Expression evaluator. Given a parsed `Expr`, a `Row`, and the row's column
2//! schema, produce a `Value`. v0.4 implements:
3//!
4//! - literals
5//! - column lookups (bare and qualified `t.col`)
6//! - unary minus / NOT
7//! - binary arithmetic, comparison, AND, OR
8//! - numeric widening (`Int → BigInt → Float`) at evaluation time
9//! - SQL three-valued logic for NULL:
10//!     * any arithmetic / comparison op with a NULL operand → NULL
11//!     * `TRUE OR NULL` → TRUE, `FALSE OR NULL` → NULL,
12//!     * `FALSE AND NULL` → FALSE, `TRUE AND NULL` → NULL,
13//!     * `NOT NULL` → NULL
14//!
15//! v0.4 deliberately does *not* implement: function calls, string
16//! concatenation, IS NULL / IS NOT NULL, BETWEEN, IN, etc. Those come later.
17
18use alloc::boxed::Box;
19use alloc::format;
20use alloc::string::{String, ToString};
21use alloc::vec::Vec;
22
23use spg_sql::ast::{BinOp, CastTarget, ColumnName, Expr, Literal, UnOp};
24use spg_storage::{ColumnSchema, DataType, Row, TsLexeme, TsQueryAst, Value};
25
26/// Resolution context for evaluating a single row. `table_alias` is the alias
27/// (or table name) callers should accept as the qualifier on a column ref —
28/// e.g. `FROM users AS u` makes `u.name` valid and rejects `other.name`.
29#[derive(Clone)]
30#[allow(missing_debug_implementations)] // sequence_resolver is a dyn Fn — no Debug
31pub struct EvalContext<'a> {
32    pub columns: &'a [ColumnSchema],
33    pub table_alias: Option<&'a str>,
34    /// v6.1.1 — bound parameters for `$N` placeholders inside the
35    /// expression tree. Empty for simple queries; populated by the
36    /// prepared-statement Execute path with Bind values converted
37    /// to `Value`. Index N (1-based per PG) hits `params[N-1]`.
38    pub params: &'a [Value],
39    /// v7.12.1 — session text-search config (from `SET
40    /// default_text_search_config = '<name>'`). Resolved when the
41    /// engine builds an `EvalContext` and consumed by the FTS
42    /// function dispatcher when `to_tsvector(text)` /
43    /// `plainto_tsquery(text)` etc are called without an explicit
44    /// config arg. `None` falls through to `simple`.
45    pub default_text_search_config: Option<&'a str>,
46    /// v7.17.0 Phase 1.1 — `nextval` / `currval` / `setval`
47    /// resolver. The engine builds this around a `&mut Catalog`
48    /// so apply_function can mutate sequence state without
49    /// eval owning a catalog reference. When `None`, sequence
50    /// functions return an error (read-only contexts).
51    pub sequence_resolver: Option<&'a SequenceResolver<'a>>,
52}
53
54/// v7.17.0 — sequence-mutating callback used by `apply_function`
55/// for `nextval` / `currval` / `setval`. Implemented by the
56/// engine to thread `&mut Catalog` access through an immutable
57/// `&EvalContext`.
58pub type SequenceResolver<'a> = dyn Fn(SequenceOp) -> Result<i64, EvalError> + 'a;
59
60/// v7.17.0 — sequence operation requested by an Expr eval.
61#[derive(Debug, Clone)]
62pub enum SequenceOp {
63    Next(String),
64    Curr(String),
65    Set {
66        name: String,
67        value: i64,
68        is_called: bool,
69    },
70}
71
72impl<'a> EvalContext<'a> {
73    pub const fn new(columns: &'a [ColumnSchema], table_alias: Option<&'a str>) -> Self {
74        Self {
75            columns,
76            table_alias,
77            params: &[],
78            default_text_search_config: None,
79            sequence_resolver: None,
80        }
81    }
82
83    /// v7.17.0 — attach a sequence resolver. The engine wraps a
84    /// `&mut Catalog` in a closure that performs the requested
85    /// SequenceOp.
86    #[must_use]
87    pub const fn with_sequence_resolver(mut self, resolver: &'a SequenceResolver<'a>) -> Self {
88        self.sequence_resolver = Some(resolver);
89        self
90    }
91
92    /// v6.1.1 — attach a parameter buffer for `$N` placeholder
93    /// resolution. The slice must outlive the context; callers
94    /// construct it from the prepared statement's Bind values.
95    #[must_use]
96    pub const fn with_params(mut self, params: &'a [Value]) -> Self {
97        self.params = params;
98        self
99    }
100
101    /// v7.12.1 — attach the session's
102    /// `default_text_search_config`. Used by the FTS function
103    /// dispatcher when no explicit config arg is given.
104    #[must_use]
105    pub const fn with_default_text_search_config(mut self, cfg: Option<&'a str>) -> Self {
106        self.default_text_search_config = cfg;
107        self
108    }
109}
110
111#[derive(Debug, Clone, PartialEq)]
112pub enum EvalError {
113    ColumnNotFound {
114        name: String,
115    },
116    UnknownQualifier {
117        qualifier: String,
118    },
119    DivisionByZero,
120    TypeMismatch {
121        detail: String,
122    },
123    /// v6.1.1 — `$N` reference past the number of bound parameters.
124    /// Either the client sent too few in Bind, or the SQL has a
125    /// placeholder the prepared statement didn't account for.
126    PlaceholderOutOfRange {
127        n: u16,
128        bound: u16,
129    },
130}
131
132impl core::fmt::Display for EvalError {
133    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
134        match self {
135            Self::ColumnNotFound { name } => write!(f, "column not found: {name}"),
136            Self::UnknownQualifier { qualifier } => {
137                write!(f, "unknown table qualifier: {qualifier}")
138            }
139            Self::DivisionByZero => f.write_str("division by zero"),
140            Self::TypeMismatch { detail } => write!(f, "type mismatch: {detail}"),
141            Self::PlaceholderOutOfRange { n, bound } => write!(
142                f,
143                "parameter ${n} referenced but only {bound} bound by client"
144            ),
145        }
146    }
147}
148
149pub fn eval_expr(expr: &Expr, row: &Row, ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
150    match expr {
151        Expr::AggregateOrdered { .. } => Err(EvalError::TypeMismatch {
152            detail: "aggregate ORDER BY is only valid inside an aggregating SELECT".into(),
153        }),
154        Expr::Literal(l) => Ok(literal_to_value(l)),
155        Expr::Column(c) => resolve_column(c, row, ctx),
156        Expr::Placeholder(n) => {
157            let idx = usize::from(*n).saturating_sub(1);
158            ctx.params
159                .get(idx)
160                .cloned()
161                .ok_or_else(|| EvalError::PlaceholderOutOfRange {
162                    n: *n,
163                    bound: u16::try_from(ctx.params.len()).unwrap_or(u16::MAX),
164                })
165        }
166        Expr::Unary { op, expr } => {
167            let v = eval_expr(expr, row, ctx)?;
168            apply_unary(*op, v)
169        }
170        Expr::Binary { lhs, op, rhs } => {
171            let l = eval_expr(lhs, row, ctx)?;
172            let r = eval_expr(rhs, row, ctx)?;
173            // v7.17.0 Phase 2.5 — collation-aware text comparison.
174            // When either operand of a comparison op references a
175            // column declared `COLLATE "case_insensitive"` (or any
176            // MySQL `_ci` collation), case-fold both sides before
177            // the byte-wise compare so `WHERE name = 'foo'` matches
178            // stored `'Foo'`. Non-Text values fall straight through
179            // — the helper is a no-op outside Text-Text equality
180            // and inequality.
181            let (l, r) = collation_fold_for_compare(*op, lhs, rhs, l, r, ctx);
182            apply_binary(*op, l, r)
183        }
184        Expr::Cast { expr, target } => {
185            let v = eval_expr(expr, row, ctx)?;
186            cast_value(v, *target)
187        }
188        Expr::IsNull { expr, negated } => {
189            let v = eval_expr(expr, row, ctx)?;
190            let is_null = matches!(v, Value::Null);
191            Ok(Value::Bool(if *negated { !is_null } else { is_null }))
192        }
193        Expr::FunctionCall { name, args } => {
194            let evaluated: Result<Vec<Value>, _> =
195                args.iter().map(|a| eval_expr(a, row, ctx)).collect();
196            apply_function(name, &evaluated?, ctx)
197        }
198        Expr::Like {
199            expr,
200            pattern,
201            negated,
202            case_insensitive,
203        } => {
204            let v = eval_expr(expr, row, ctx)?;
205            let p = eval_expr(pattern, row, ctx)?;
206            // NULL on either side propagates to NULL — same as PG.
207            let (text, pat) = match (v, p) {
208                (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
209                (Value::Text(a), Value::Text(b)) => (a, b),
210                (Value::Text(_), other) | (other, _) => {
211                    return Err(EvalError::TypeMismatch {
212                        detail: format!("LIKE requires text operands, got {:?}", other.data_type()),
213                    });
214                }
215            };
216            // v7.25 (round-17) — ILIKE folds both operands (PG
217            // lowercases per the default collation).
218            let m = if *case_insensitive {
219                like_match(&text.to_lowercase(), &pat.to_lowercase())
220            } else {
221                like_match(&text, &pat)
222            };
223            Ok(Value::Bool(if *negated { !m } else { m }))
224        }
225        Expr::Extract { field, source } => {
226            let v = eval_expr(source, row, ctx)?;
227            extract_field(*field, &v)
228        }
229        // v4.10: subquery nodes should have been resolved into
230        // Literal / Binary-Eq-OR chains by Engine::resolve_select_subqueries
231        // before the row loop. Anything reaching here is a bug.
232        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {
233            Err(EvalError::TypeMismatch {
234                detail: "subquery reached row eval — engine resolver bug".into(),
235            })
236        }
237        // v4.12: window functions should have been rewritten into
238        // synthetic __win_N column references by
239        // exec_select_with_window before row eval. Anything
240        // reaching here is similarly a bug.
241        Expr::WindowFunction { .. } => Err(EvalError::TypeMismatch {
242            detail: "window function reached row eval — engine rewrite bug".into(),
243        }),
244        // v7.10.10 — `ARRAY[expr, expr, …]` constructor.
245        // v7.11.13 — element-type detection: all integers →
246        // IntArray (or BigIntArray when widening), any Text →
247        // TextArray. Non-TEXT non-integer elements (Bool, Float)
248        // stringify into TextArray as the safe default.
249        Expr::Array(items) => {
250            let mut materialised: Vec<Value> = Vec::with_capacity(items.len());
251            for elem in items {
252                materialised.push(eval_expr(elem, row, ctx)?);
253            }
254            let mut has_text = false;
255            let mut has_bigint = false;
256            let mut has_int = false;
257            for v in &materialised {
258                match v {
259                    Value::Null => {}
260                    Value::Int(_) | Value::SmallInt(_) => has_int = true,
261                    Value::BigInt(_) => has_bigint = true,
262                    Value::Text(_) | Value::Json(_) => has_text = true,
263                    _ => has_text = true,
264                }
265            }
266            if has_text || (!has_int && !has_bigint) {
267                let out: Vec<Option<String>> = materialised
268                    .into_iter()
269                    .map(|v| match v {
270                        Value::Null => None,
271                        Value::Text(s) | Value::Json(s) => Some(s),
272                        other => Some(value_to_text_for_array(&other)),
273                    })
274                    .collect();
275                return Ok(Value::TextArray(out));
276            }
277            if has_bigint {
278                let out: Vec<Option<i64>> = materialised
279                    .into_iter()
280                    .map(|v| match v {
281                        Value::Null => None,
282                        Value::Int(n) => Some(i64::from(n)),
283                        Value::SmallInt(n) => Some(i64::from(n)),
284                        Value::BigInt(n) => Some(n),
285                        _ => unreachable!(),
286                    })
287                    .collect();
288                return Ok(Value::BigIntArray(out));
289            }
290            let out: Vec<Option<i32>> = materialised
291                .into_iter()
292                .map(|v| match v {
293                    Value::Null => None,
294                    Value::Int(n) => Some(n),
295                    Value::SmallInt(n) => Some(i32::from(n)),
296                    _ => unreachable!(),
297                })
298                .collect();
299            Ok(Value::IntArray(out))
300        }
301        // v7.10.12 — `arr[i]` PG-style 1-based indexing.
302        // Out-of-range indices (including i ≤ 0) return NULL.
303        Expr::ArraySubscript { target, index } => {
304            let target_v = eval_expr(target, row, ctx)?;
305            let idx_v = eval_expr(index, row, ctx)?;
306            if matches!(target_v, Value::Null) || matches!(idx_v, Value::Null) {
307                return Ok(Value::Null);
308            }
309            let i: i64 = match idx_v {
310                Value::Int(n) => i64::from(n),
311                Value::BigInt(n) => n,
312                Value::SmallInt(n) => i64::from(n),
313                other => {
314                    return Err(EvalError::TypeMismatch {
315                        detail: format!(
316                            "array subscript must be integer, got {:?}",
317                            other.data_type()
318                        ),
319                    });
320                }
321            };
322            if i < 1 {
323                return Ok(Value::Null);
324            }
325            let pos = (i - 1) as usize;
326            match target_v {
327                Value::TextArray(items) => match items.get(pos) {
328                    Some(Some(s)) => Ok(Value::Text(s.clone())),
329                    Some(None) | None => Ok(Value::Null),
330                },
331                Value::IntArray(items) => match items.get(pos) {
332                    Some(Some(n)) => Ok(Value::Int(*n)),
333                    Some(None) | None => Ok(Value::Null),
334                },
335                Value::BigIntArray(items) => match items.get(pos) {
336                    Some(Some(n)) => Ok(Value::BigInt(*n)),
337                    Some(None) | None => Ok(Value::Null),
338                },
339                other => Err(EvalError::TypeMismatch {
340                    detail: format!(
341                        "subscript target must be an array, got {:?}",
342                        other.data_type()
343                    ),
344                }),
345            }
346        }
347        // v7.10.12 — `x op ANY(arr)` / `x op ALL(arr)`. PG
348        // 3VL: ANY → true if any element compares-true; NULL if
349        // no true but some NULL; false otherwise. ALL: false if
350        // any compares-false; NULL if no false but some NULL;
351        // true otherwise.
352        Expr::AnyAll {
353            expr,
354            op,
355            array,
356            is_any,
357        } => {
358            let lhs = eval_expr(expr, row, ctx)?;
359            let arr = eval_expr(array, row, ctx)?;
360            if matches!(arr, Value::Null) {
361                return Ok(Value::Null);
362            }
363            let elems: Vec<Option<Value>> = match arr {
364                Value::TextArray(items) => items.into_iter().map(|o| o.map(Value::Text)).collect(),
365                Value::IntArray(items) => items.into_iter().map(|o| o.map(Value::Int)).collect(),
366                Value::BigIntArray(items) => {
367                    items.into_iter().map(|o| o.map(Value::BigInt)).collect()
368                }
369                other => {
370                    return Err(EvalError::TypeMismatch {
371                        detail: format!(
372                            "ANY/ALL right-hand side must be an array, got {:?}",
373                            other.data_type()
374                        ),
375                    });
376                }
377            };
378            let mut saw_null = matches!(lhs, Value::Null);
379            let mut saw_match = false;
380            let mut saw_mismatch = false;
381            for elem in elems {
382                let elem_v = match elem {
383                    Some(v) => v,
384                    None => {
385                        saw_null = true;
386                        continue;
387                    }
388                };
389                if matches!(lhs, Value::Null) {
390                    saw_null = true;
391                    continue;
392                }
393                match apply_binary(*op, lhs.clone(), elem_v) {
394                    Ok(Value::Bool(true)) => saw_match = true,
395                    Ok(Value::Bool(false)) => saw_mismatch = true,
396                    Ok(Value::Null) => saw_null = true,
397                    Ok(other) => {
398                        return Err(EvalError::TypeMismatch {
399                            detail: format!(
400                                "ANY/ALL comparison didn't return Bool: {:?}",
401                                other.data_type()
402                            ),
403                        });
404                    }
405                    Err(e) => return Err(e),
406                }
407            }
408            let result = if *is_any {
409                if saw_match {
410                    Value::Bool(true)
411                } else if saw_null {
412                    Value::Null
413                } else {
414                    Value::Bool(false)
415                }
416            } else if saw_mismatch {
417                Value::Bool(false)
418            } else if saw_null {
419                Value::Null
420            } else {
421                Value::Bool(true)
422            };
423            Ok(result)
424        }
425        // v7.13.0 — CASE WHEN … END (mailrs round-5 G9).
426        // Short-circuit on the first matching branch. Searched form
427        // (operand=None) treats each branch's WHEN as a Bool
428        // predicate. Simple form (operand=Some) compares with =.
429        // ELSE on no match; NULL if no ELSE.
430        Expr::Case {
431            operand,
432            branches,
433            else_branch,
434        } => {
435            let operand_value = match operand {
436                Some(o) => Some(eval_expr(o, row, ctx)?),
437                None => None,
438            };
439            for (when_expr, then_expr) in branches {
440                let when_value = eval_expr(when_expr, row, ctx)?;
441                let matched = match &operand_value {
442                    None => matches!(when_value, Value::Bool(true)),
443                    Some(op_v) => matches!(
444                        apply_binary(spg_sql::ast::BinOp::Eq, op_v.clone(), when_value)?,
445                        Value::Bool(true)
446                    ),
447                };
448                if matched {
449                    return eval_expr(then_expr, row, ctx);
450                }
451            }
452            match else_branch {
453                Some(e) => eval_expr(e, row, ctx),
454                None => Ok(Value::Null),
455            }
456        }
457    }
458}
459
460/// v7.10.10 — best-effort text rendering for non-TEXT array
461/// elements (numbers, bools, etc.). The PG rule is that
462/// `ARRAY[1, 2]` is `int[]`, but SPG's v7.10 only models TEXT[],
463/// so we widen by stringifying. NUMERIC formatting goes through
464/// the existing canonical helpers to stay consistent with
465/// `format_numeric` / `format_date` etc.
466fn value_to_text_for_array(v: &Value) -> String {
467    match v {
468        Value::Text(s) | Value::Json(s) => s.clone(),
469        Value::Int(n) => n.to_string(),
470        Value::BigInt(n) => n.to_string(),
471        Value::SmallInt(n) => n.to_string(),
472        Value::Bool(b) => {
473            if *b {
474                "true".into()
475            } else {
476                "false".into()
477            }
478        }
479        Value::Float(x) => format!("{x}"),
480        Value::Date(d) => format_date(*d),
481        Value::Timestamp(t) => format_timestamp(*t),
482        Value::Numeric { scaled, scale } => format_numeric(*scaled, *scale),
483        _ => format!("{v:?}"),
484    }
485}
486
487/// Pull an integer component (year / month / ... / microsecond) out
488/// of a `DATE` or `TIMESTAMP`. Returns NULL on a NULL source, errors
489/// when the source isn't a calendar type.
490fn extract_field(field: spg_sql::ast::ExtractField, v: &Value) -> Result<Value, EvalError> {
491    use spg_sql::ast::ExtractField as F;
492    if matches!(v, Value::Null) {
493        return Ok(Value::Null);
494    }
495    // INTERVAL has its own decomposition — `YEAR` / `MONTH` come from
496    // the months part, the rest from the microseconds part. PG matches
497    // this convention (months is normalised modulo 12 for MONTH).
498    if let Value::Interval { months, micros } = *v {
499        let years = months / 12;
500        let mons = months % 12;
501        let secs_total = micros / 1_000_000;
502        let frac = micros % 1_000_000;
503        let result = match field {
504            F::Year => i64::from(years),
505            F::Month => i64::from(mons),
506            F::Day => micros / 86_400_000_000,
507            F::Hour => (secs_total / 3600) % 24,
508            F::Minute => (secs_total / 60) % 60,
509            F::Second => secs_total % 60,
510            F::Microsecond => (secs_total % 60) * 1_000_000 + frac,
511            // total seconds in the interval (months count as 30 days,
512            // PG's justify_interval convention).
513            F::Epoch => i64::from(months) * 30 * 86_400 + secs_total,
514        };
515        return Ok(Value::BigInt(result));
516    }
517    let (days, day_micros) = match *v {
518        Value::Date(d) => (d, 0_i64),
519        Value::Timestamp(t) => {
520            let days = t.div_euclid(86_400_000_000);
521            let day_micros = t.rem_euclid(86_400_000_000);
522            (i32::try_from(days).unwrap_or(i32::MAX), day_micros)
523        }
524        _ => {
525            return Err(EvalError::TypeMismatch {
526                detail: format!(
527                    "EXTRACT requires DATE / TIMESTAMP / INTERVAL, got {:?}",
528                    v.data_type()
529                ),
530            });
531        }
532    };
533    let (y, m, d) = civil_components(days);
534    let secs = day_micros / 1_000_000;
535    let hh = secs / 3600;
536    let mm = (secs / 60) % 60;
537    let ss = secs % 60;
538    let frac = day_micros % 1_000_000;
539    let result = match field {
540        F::Year => i64::from(y),
541        F::Month => i64::from(m),
542        F::Day => i64::from(d),
543        F::Hour => hh,
544        F::Minute => mm,
545        F::Second => ss,
546        F::Microsecond => ss * 1_000_000 + frac,
547        // seconds since the unix epoch (truncated; PG returns
548        // numeric with fraction — mailrs casts ::BIGINT anyway).
549        F::Epoch => i64::from(days) * 86_400 + secs,
550    };
551    Ok(Value::BigInt(result))
552}
553
554/// Internal wrapper around the file-private `civil_from_days` so the
555/// public surface area doesn't change. Returns `(year, month, day)`.
556fn civil_components(days: i32) -> (i32, u32, u32) {
557    civil_from_days(days)
558}
559
560/// SQL `LIKE` matcher. Wildcards are `%` (any run, possibly empty) and `_`
561/// (exactly one char). `\` escapes the next pattern char so `\%` matches a
562/// literal `%`. Matches the whole input — no implicit anchoring needed
563/// since SQL `LIKE` is always full-string.
564fn like_match(text: &str, pattern: &str) -> bool {
565    let text: Vec<char> = text.chars().collect();
566    let pat: Vec<char> = pattern.chars().collect();
567    like_match_inner(&text, 0, &pat, 0)
568}
569
570fn like_match_inner(text: &[char], mut ti: usize, pat: &[char], mut pi: usize) -> bool {
571    while pi < pat.len() {
572        match pat[pi] {
573            '%' => {
574                // Collapse consecutive `%` and try every possible split.
575                while pi < pat.len() && pat[pi] == '%' {
576                    pi += 1;
577                }
578                if pi == pat.len() {
579                    return true;
580                }
581                for k in ti..=text.len() {
582                    if like_match_inner(text, k, pat, pi) {
583                        return true;
584                    }
585                }
586                return false;
587            }
588            '_' => {
589                if ti >= text.len() {
590                    return false;
591                }
592                ti += 1;
593                pi += 1;
594            }
595            '\\' if pi + 1 < pat.len() => {
596                let want = pat[pi + 1];
597                if ti >= text.len() || text[ti] != want {
598                    return false;
599                }
600                ti += 1;
601                pi += 2;
602            }
603            c => {
604                if ti >= text.len() || text[ti] != c {
605                    return false;
606                }
607                ti += 1;
608                pi += 1;
609            }
610        }
611    }
612    ti == text.len()
613}
614
615/// Dispatch on lowercased function name. v1.4 implements only a handful of
616/// scalar functions; aggregates land in v1.5 alongside GROUP BY.
617fn apply_function(name: &str, args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
618    match name.to_ascii_lowercase().as_str() {
619        // v7.17.0 Phase 1.1 — SEQUENCE accessor functions.
620        "nextval" => {
621            if args.len() != 1 {
622                return Err(EvalError::TypeMismatch {
623                    detail: format!("nextval() takes 1 arg, got {}", args.len()),
624                });
625            }
626            let seq_name = match &args[0] {
627                Value::Text(s) => s.clone(),
628                Value::Null => return Ok(Value::Null),
629                other => {
630                    return Err(EvalError::TypeMismatch {
631                        detail: format!(
632                            "nextval() argument must be TEXT, got {:?}",
633                            other.data_type()
634                        ),
635                    });
636                }
637            };
638            let resolver = ctx
639                .sequence_resolver
640                .ok_or_else(|| EvalError::TypeMismatch {
641                    detail: "nextval() requires a sequence resolver (read-only context)".into(),
642                })?;
643            let v = resolver(SequenceOp::Next(seq_name))?;
644            Ok(Value::BigInt(v))
645        }
646        "currval" => {
647            if args.len() != 1 {
648                return Err(EvalError::TypeMismatch {
649                    detail: format!("currval() takes 1 arg, got {}", args.len()),
650                });
651            }
652            let seq_name = match &args[0] {
653                Value::Text(s) => s.clone(),
654                Value::Null => return Ok(Value::Null),
655                other => {
656                    return Err(EvalError::TypeMismatch {
657                        detail: format!(
658                            "currval() argument must be TEXT, got {:?}",
659                            other.data_type()
660                        ),
661                    });
662                }
663            };
664            let resolver = ctx
665                .sequence_resolver
666                .ok_or_else(|| EvalError::TypeMismatch {
667                    detail: "currval() requires a sequence resolver (read-only context)".into(),
668                })?;
669            let v = resolver(SequenceOp::Curr(seq_name))?;
670            Ok(Value::BigInt(v))
671        }
672        "setval" => {
673            if args.len() != 2 && args.len() != 3 {
674                return Err(EvalError::TypeMismatch {
675                    detail: format!("setval() takes 2 or 3 args, got {}", args.len()),
676                });
677            }
678            let seq_name = match &args[0] {
679                Value::Text(s) => s.clone(),
680                Value::Null => return Ok(Value::Null),
681                other => {
682                    return Err(EvalError::TypeMismatch {
683                        detail: format!(
684                            "setval() name argument must be TEXT, got {:?}",
685                            other.data_type()
686                        ),
687                    });
688                }
689            };
690            let value = match &args[1] {
691                Value::SmallInt(n) => i64::from(*n),
692                Value::Int(n) => i64::from(*n),
693                Value::BigInt(n) => *n,
694                Value::Null => return Ok(Value::Null),
695                other => {
696                    return Err(EvalError::TypeMismatch {
697                        detail: format!(
698                            "setval() value argument must be integer, got {:?}",
699                            other.data_type()
700                        ),
701                    });
702                }
703            };
704            let is_called = if args.len() == 3 {
705                match &args[2] {
706                    Value::Bool(b) => *b,
707                    Value::Null => return Ok(Value::Null),
708                    other => {
709                        return Err(EvalError::TypeMismatch {
710                            detail: format!(
711                                "setval() is_called argument must be BOOL, got {:?}",
712                                other.data_type()
713                            ),
714                        });
715                    }
716                }
717            } else {
718                true
719            };
720            let resolver = ctx
721                .sequence_resolver
722                .ok_or_else(|| EvalError::TypeMismatch {
723                    detail: "setval() requires a sequence resolver (read-only context)".into(),
724                })?;
725            let v = resolver(SequenceOp::Set {
726                name: seq_name,
727                value,
728                is_called,
729            })?;
730            Ok(Value::BigInt(v))
731        }
732        // v7.22 (round-13) — char_length / character_length are the
733        // SQL-standard spellings PG accepts everywhere; pg_dump
734        // CHECK predicates carry them verbatim.
735        "length" | "char_length" | "character_length" => {
736            if args.len() != 1 {
737                return Err(EvalError::TypeMismatch {
738                    detail: format!("length() takes 1 arg, got {}", args.len()),
739                });
740            }
741            match &args[0] {
742                Value::Null => Ok(Value::Null),
743                Value::Text(s) => {
744                    let n = i32::try_from(s.chars().count()).unwrap_or(i32::MAX);
745                    Ok(Value::Int(n))
746                }
747                // v7.10.4 — PG semantics: length(bytea) returns
748                // byte count (= octet_length). Without this branch
749                // mailrs's INSERT … SELECT length(body) … against a
750                // BYTEA column would type-mismatch.
751                Value::Bytes(b) => {
752                    let n = i32::try_from(b.len()).unwrap_or(i32::MAX);
753                    Ok(Value::Int(n))
754                }
755                other => Err(EvalError::TypeMismatch {
756                    detail: format!("length() needs text or bytea, got {:?}", other.data_type()),
757                }),
758            }
759        }
760        // v7.10.4 — `OCTET_LENGTH(x)` returns byte count for both
761        // TEXT (UTF-8 byte length) and BYTEA. PG-spec name; aliases
762        // to length() for bytea by design.
763        "octet_length" => {
764            if args.len() != 1 {
765                return Err(EvalError::TypeMismatch {
766                    detail: format!("octet_length() takes 1 arg, got {}", args.len()),
767                });
768            }
769            match &args[0] {
770                Value::Null => Ok(Value::Null),
771                Value::Text(s) => {
772                    let n = i32::try_from(s.len()).unwrap_or(i32::MAX);
773                    Ok(Value::Int(n))
774                }
775                Value::Bytes(b) => {
776                    let n = i32::try_from(b.len()).unwrap_or(i32::MAX);
777                    Ok(Value::Int(n))
778                }
779                other => Err(EvalError::TypeMismatch {
780                    detail: format!(
781                        "octet_length() needs text or bytea, got {:?}",
782                        other.data_type()
783                    ),
784                }),
785            }
786        }
787        // v7.11.6 — `array_length(arr, dim)` returns the element
788        // count of `arr` along dimension `dim`. v7.11 only models
789        // single-dimension arrays so dim must be 1 (otherwise NULL,
790        // matching PG semantics for unsupported dimensions). NULL
791        // array → NULL. v7.11 TEXT[] only; non-array operand is
792        // a type mismatch.
793        "array_length" => {
794            if args.len() != 2 {
795                return Err(EvalError::TypeMismatch {
796                    detail: format!("array_length() takes 2 args, got {}", args.len()),
797                });
798            }
799            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
800                return Ok(Value::Null);
801            }
802            let len = match &args[0] {
803                Value::TextArray(items) => items.len(),
804                Value::IntArray(items) => items.len(),
805                Value::BigIntArray(items) => items.len(),
806                _ => {
807                    return Err(EvalError::TypeMismatch {
808                        detail: format!(
809                            "array_length() first arg must be an array, got {:?}",
810                            args[0].data_type()
811                        ),
812                    });
813                }
814            };
815            let dim: i64 = match args[1] {
816                Value::Int(n) => i64::from(n),
817                Value::BigInt(n) => n,
818                Value::SmallInt(n) => i64::from(n),
819                _ => {
820                    return Err(EvalError::TypeMismatch {
821                        detail: format!(
822                            "array_length() second arg must be integer, got {:?}",
823                            args[1].data_type()
824                        ),
825                    });
826                }
827            };
828            if dim != 1 {
829                return Ok(Value::Null);
830            }
831            let n = i32::try_from(len).unwrap_or(i32::MAX);
832            Ok(Value::Int(n))
833        }
834        // v7.11.6 — `array_position(arr, val)` returns 1-based
835        // index of the first element of `arr` equal to `val`, or
836        // NULL if not found. PG NULL semantics: NULL array → NULL;
837        // NULL val never matches (returns NULL if absent).
838        "array_position" => {
839            if args.len() != 2 {
840                return Err(EvalError::TypeMismatch {
841                    detail: format!("array_position() takes 2 args, got {}", args.len()),
842                });
843            }
844            if matches!(args[0], Value::Null) {
845                return Ok(Value::Null);
846            }
847            if matches!(args[1], Value::Null) {
848                return Ok(Value::Null);
849            }
850            match (&args[0], &args[1]) {
851                (Value::TextArray(items), Value::Text(needle)) => {
852                    for (idx, item) in items.iter().enumerate() {
853                        if let Some(s) = item
854                            && s == needle
855                        {
856                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
857                        }
858                    }
859                    Ok(Value::Null)
860                }
861                (Value::IntArray(items), needle_v)
862                    if matches!(
863                        needle_v,
864                        Value::Int(_) | Value::SmallInt(_) | Value::BigInt(_)
865                    ) =>
866                {
867                    let needle: i64 = match *needle_v {
868                        Value::Int(n) => i64::from(n),
869                        Value::SmallInt(n) => i64::from(n),
870                        Value::BigInt(n) => n,
871                        _ => unreachable!(),
872                    };
873                    for (idx, item) in items.iter().enumerate() {
874                        if let Some(n) = item
875                            && i64::from(*n) == needle
876                        {
877                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
878                        }
879                    }
880                    Ok(Value::Null)
881                }
882                (Value::BigIntArray(items), needle_v)
883                    if matches!(
884                        needle_v,
885                        Value::Int(_) | Value::SmallInt(_) | Value::BigInt(_)
886                    ) =>
887                {
888                    let needle: i64 = match *needle_v {
889                        Value::Int(n) => i64::from(n),
890                        Value::SmallInt(n) => i64::from(n),
891                        Value::BigInt(n) => n,
892                        _ => unreachable!(),
893                    };
894                    for (idx, item) in items.iter().enumerate() {
895                        if let Some(n) = item
896                            && *n == needle
897                        {
898                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
899                        }
900                    }
901                    Ok(Value::Null)
902                }
903                (
904                    arr @ (Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_)),
905                    other,
906                ) => Err(EvalError::TypeMismatch {
907                    detail: format!(
908                        "array_position() needle type {:?} doesn't match array {:?}",
909                        other.data_type(),
910                        arr.data_type()
911                    ),
912                }),
913                (other, _) => Err(EvalError::TypeMismatch {
914                    detail: format!(
915                        "array_position() first arg must be an array, got {:?}",
916                        other.data_type()
917                    ),
918                }),
919            }
920        }
921        // v7.11.15 — `substring(s, start)` / `substring(s, start, length)`
922        // for both TEXT and BYTEA. PG semantics: `start` is 1-based;
923        // values ≤ 0 clamp into the string (i.e. effective start is
924        // adjusted so the window still begins at index 1 — but
925        // `length` is reduced by the clipped prefix). A NULL arg
926        // makes the result NULL. Out-of-range windows return an
927        // empty value, not NULL.
928        "substring" | "substr" => {
929            if !matches!(args.len(), 2 | 3) {
930                return Err(EvalError::TypeMismatch {
931                    detail: format!("substring() takes 2 or 3 args, got {}", args.len()),
932                });
933            }
934            if args.iter().any(|a| matches!(a, Value::Null)) {
935                return Ok(Value::Null);
936            }
937            let start: i64 = match args[1] {
938                Value::Int(n) => i64::from(n),
939                Value::BigInt(n) => n,
940                Value::SmallInt(n) => i64::from(n),
941                _ => {
942                    return Err(EvalError::TypeMismatch {
943                        detail: format!(
944                            "substring() start must be integer, got {:?}",
945                            args[1].data_type()
946                        ),
947                    });
948                }
949            };
950            let length: Option<i64> = if args.len() == 3 {
951                match args[2] {
952                    Value::Int(n) => Some(i64::from(n)),
953                    Value::BigInt(n) => Some(n),
954                    Value::SmallInt(n) => Some(i64::from(n)),
955                    _ => {
956                        return Err(EvalError::TypeMismatch {
957                            detail: format!(
958                                "substring() length must be integer, got {:?}",
959                                args[2].data_type()
960                            ),
961                        });
962                    }
963                }
964            } else {
965                None
966            };
967            // PG: when length is given, end = start + length; if
968            // end < start the result is empty. Clip start to 1.
969            let (effective_start, effective_length): (i64, Option<i64>) = match length {
970                Some(len) => {
971                    let end = start.saturating_add(len);
972                    if end <= 1 || len < 0 {
973                        return Ok(match &args[0] {
974                            Value::Text(_) => Value::Text(String::new()),
975                            Value::Bytes(_) => Value::Bytes(Vec::new()),
976                            other => {
977                                return Err(EvalError::TypeMismatch {
978                                    detail: format!(
979                                        "substring() needs text or bytea, got {:?}",
980                                        other.data_type()
981                                    ),
982                                });
983                            }
984                        });
985                    }
986                    let eff_start = start.max(1);
987                    let eff_len = end - eff_start;
988                    (eff_start, Some(eff_len.max(0)))
989                }
990                None => (start.max(1), None),
991            };
992            match &args[0] {
993                Value::Text(s) => {
994                    // PG counts in characters (codepoints) for TEXT.
995                    let chars: Vec<char> = s.chars().collect();
996                    let skip = (effective_start - 1) as usize;
997                    if skip >= chars.len() {
998                        return Ok(Value::Text(String::new()));
999                    }
1000                    let take = match effective_length {
1001                        Some(n) => (n as usize).min(chars.len() - skip),
1002                        None => chars.len() - skip,
1003                    };
1004                    Ok(Value::Text(chars[skip..skip + take].iter().collect()))
1005                }
1006                Value::Bytes(b) => {
1007                    let skip = (effective_start - 1) as usize;
1008                    if skip >= b.len() {
1009                        return Ok(Value::Bytes(Vec::new()));
1010                    }
1011                    let take = match effective_length {
1012                        Some(n) => (n as usize).min(b.len() - skip),
1013                        None => b.len() - skip,
1014                    };
1015                    Ok(Value::Bytes(b[skip..skip + take].to_vec()))
1016                }
1017                other => Err(EvalError::TypeMismatch {
1018                    detail: format!(
1019                        "substring() needs text or bytea, got {:?}",
1020                        other.data_type()
1021                    ),
1022                }),
1023            }
1024        }
1025        // v7.11.15 — `position(needle, haystack)`. PG semantics:
1026        // 1-based byte/char index of first occurrence, or 0 if
1027        // absent. NULL on either operand → NULL. Empty needle
1028        // returns 1 (PG convention). Works on TEXT (char positions)
1029        // and BYTEA (byte positions). (The PG-spec syntax `position(
1030        // needle IN haystack)` is not parsed in v7.11; clients must
1031        // call the function-call form.)
1032        "position" => {
1033            if args.len() != 2 {
1034                return Err(EvalError::TypeMismatch {
1035                    detail: format!("position() takes 2 args, got {}", args.len()),
1036                });
1037            }
1038            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
1039                return Ok(Value::Null);
1040            }
1041            match (&args[0], &args[1]) {
1042                (Value::Text(needle), Value::Text(haystack)) => {
1043                    if needle.is_empty() {
1044                        return Ok(Value::Int(1));
1045                    }
1046                    // Char-based position (PG uses character count).
1047                    let h_chars: Vec<char> = haystack.chars().collect();
1048                    let n_chars: Vec<char> = needle.chars().collect();
1049                    if n_chars.len() > h_chars.len() {
1050                        return Ok(Value::Int(0));
1051                    }
1052                    for i in 0..=h_chars.len() - n_chars.len() {
1053                        if h_chars[i..i + n_chars.len()] == n_chars[..] {
1054                            return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1055                        }
1056                    }
1057                    Ok(Value::Int(0))
1058                }
1059                (Value::Bytes(needle), Value::Bytes(haystack)) => {
1060                    if needle.is_empty() {
1061                        return Ok(Value::Int(1));
1062                    }
1063                    if needle.len() > haystack.len() {
1064                        return Ok(Value::Int(0));
1065                    }
1066                    for i in 0..=haystack.len() - needle.len() {
1067                        if &haystack[i..i + needle.len()] == needle.as_slice() {
1068                            return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1069                        }
1070                    }
1071                    Ok(Value::Int(0))
1072                }
1073                (a, b) => Err(EvalError::TypeMismatch {
1074                    detail: format!(
1075                        "position() operands must both be text or both bytea, got {:?} and {:?}",
1076                        a.data_type(),
1077                        b.data_type()
1078                    ),
1079                }),
1080            }
1081        }
1082        "upper" => {
1083            if args.len() != 1 {
1084                return Err(EvalError::TypeMismatch {
1085                    detail: format!("upper() takes 1 arg, got {}", args.len()),
1086                });
1087            }
1088            match &args[0] {
1089                Value::Null => Ok(Value::Null),
1090                Value::Text(s) => Ok(Value::Text(s.to_uppercase())),
1091                other => Err(EvalError::TypeMismatch {
1092                    detail: format!("upper() needs text, got {:?}", other.data_type()),
1093                }),
1094            }
1095        }
1096        "lower" => {
1097            if args.len() != 1 {
1098                return Err(EvalError::TypeMismatch {
1099                    detail: format!("lower() takes 1 arg, got {}", args.len()),
1100                });
1101            }
1102            match &args[0] {
1103                Value::Null => Ok(Value::Null),
1104                Value::Text(s) => Ok(Value::Text(s.to_lowercase())),
1105                other => Err(EvalError::TypeMismatch {
1106                    detail: format!("lower() needs text, got {:?}", other.data_type()),
1107                }),
1108            }
1109        }
1110        "abs" => {
1111            if args.len() != 1 {
1112                return Err(EvalError::TypeMismatch {
1113                    detail: format!("abs() takes 1 arg, got {}", args.len()),
1114                });
1115            }
1116            match &args[0] {
1117                Value::Null => Ok(Value::Null),
1118                Value::Int(n) => Ok(Value::Int(n.wrapping_abs())),
1119                Value::BigInt(n) => Ok(Value::BigInt(n.wrapping_abs())),
1120                Value::Float(x) => Ok(Value::Float(x.abs())),
1121                other => Err(EvalError::TypeMismatch {
1122                    detail: format!("abs() needs numeric, got {:?}", other.data_type()),
1123                }),
1124            }
1125        }
1126        "coalesce" => {
1127            for a in args {
1128                if !matches!(a, Value::Null) {
1129                    return Ok(a.clone());
1130                }
1131            }
1132            Ok(Value::Null)
1133        }
1134        "date_trunc" => date_trunc(args),
1135        "date_part" => date_part(args),
1136        "age" => age(args),
1137        "to_char" => to_char(args),
1138        // v7.17.0 Phase 3.P0-29 — MySQL time aliases. WordPress,
1139        // Laravel, mysql-connector-python emit these constantly.
1140        // `unix_timestamp()` (bare) is folded by clock_replacement_for
1141        // into a BigInt literal — this arm only handles the 1-arg
1142        // form (TIMESTAMP / DATE → epoch seconds).
1143        "date_format" => date_format_mysql(args),
1144        "unix_timestamp" => unix_timestamp_of(args),
1145        "from_unixtime" => from_unixtime(args),
1146        // v7.17.0 Phase 3.8 — PG `format(fmt, args…)` sprintf-style.
1147        // Conversion specifiers: `%s` (literal string from arg),
1148        // `%I` (quoted identifier), `%L` (quoted SQL literal),
1149        // `%%` (literal `%`). `%n$X` argument-position prefix
1150        // (1-based). NULL arg → empty string for %s; NULL for %I
1151        // is an error in PG; NULL for %L renders as the SQL
1152        // literal `NULL`. Args missing for a position → error.
1153        "format" => format_string(args),
1154        // PG `concat(args...)` — variadic; coerces every arg to
1155        // its text representation; NULL arguments are silently
1156        // skipped (the canonical PG semantic — `concat()` is the
1157        // NULL-tolerant counterpart to the `||` operator which
1158        // propagates NULL).
1159        //
1160        // Reference:
1161        //   https://www.postgresql.org/docs/current/functions-string.html
1162        //   "Concatenates the text representations of all the
1163        //   arguments. NULL arguments are ignored."
1164        //
1165        // Edge cases:
1166        //   * `concat()` (no args) → ''
1167        //   * Every arg NULL → '' (NEVER returns NULL — distinct
1168        //     from `||` and from `array_agg`)
1169        //   * Bool → PG single-char form 't' / 'f'
1170        //   * SmallInt / Int / BigInt / Float / Numeric / Date /
1171        //     Timestamp / Json / Bytes → their canonical text
1172        //     rendering (shared with `format()`'s %s specifier
1173        //     via `value_to_format_text`).
1174        "concat" => {
1175            let mut out = String::new();
1176            for v in args {
1177                if matches!(v, Value::Null) {
1178                    continue;
1179                }
1180                out.push_str(&value_to_format_text(v));
1181            }
1182            Ok(Value::Text(out))
1183        }
1184        // PG `concat_ws(sep, val1 [, val2 ...])` — like concat but
1185        // with a separator inserted between each pair of NON-NULL
1186        // arguments. Critical semantic subtleties:
1187        //   * NULL separator → NULL result (the sep position is
1188        //     mandatory and poison-prone; this is the ONLY way
1189        //     concat_ws can return NULL).
1190        //   * NULL data args silently SKIPPED — the separator is
1191        //     NOT inserted around them. `concat_ws(',', 'a', NULL,
1192        //     'b')` → `'a,b'`, not `'a,,b'`.
1193        //   * Empty-string data args are KEPT (separator placed
1194        //     around them). `concat_ws(',', 'a', '', 'b')` →
1195        //     `'a,,b'`. Distinction with NULL matters for code
1196        //     like `concat_ws(', ', first_name, middle_name,
1197        //     last_name)`.
1198        //   * 0 args → arity error (sep is mandatory).
1199        //   * Only sep (no data) → '' (NOT NULL — distinct from
1200        //     the all-NULL data case which also returns '').
1201        //
1202        // Reference:
1203        //   https://www.postgresql.org/docs/current/functions-string.html
1204        // PG `trim` / `ltrim` / `rtrim` / `btrim`.
1205        //
1206        // Semantic anchors (PG-canonical):
1207        //   * Default chars set is the ASCII SPACE only (NOT the
1208        //     POSIX whitespace class — tab / newline / form-feed
1209        //     stay put unless explicitly listed in `chars`).
1210        //   * `chars` arg is a UTF-8 codepoint SET — any char in
1211        //     the set is stripped, not the substring.
1212        //   * `trim(s)` == `btrim(s)` == strip both ends.
1213        //   * `ltrim(s, c)` / `rtrim(s, c)` strip only the named
1214        //     side; inner occurrences are preserved.
1215        //   * NULL on EITHER arg → NULL result.
1216        //   * Non-text input is coerced via `value_to_format_text`
1217        //     so trim(42) returns '42'.
1218        //
1219        // Reference:
1220        //   https://www.postgresql.org/docs/current/functions-string.html
1221        // PG `replace(string, from, to)` — substring substitution
1222        // for every (non-overlapping, greedy left-to-right)
1223        // occurrence. Empty `from` passes input through unchanged
1224        // (PG behavior — avoids infinite loop). Inserted text is
1225        // NOT re-scanned for new matches (so `replace('a', 'a',
1226        // 'aa')` terminates at `'aa'`, not blows up). NULL on any
1227        // arg poisons.
1228        // PG `split_part(string, delimiter, n)` — split on delim,
1229        // return the n-th field (1-indexed). Negative n counts
1230        // from the end (PG 14+). Out-of-range n → '' (NOT NULL).
1231        // n = 0 → error. Empty delimiter → error. NULL on any
1232        // arg → NULL.
1233        // PG `repeat(string, n)` — duplicate the input N times.
1234        // n=0 → ''; n<0 → '' (PG does NOT error on negative);
1235        // NULL on any arg → NULL.
1236        // PG `lpad(string, length [, fill])` / `rpad(...)`.
1237        // length is the target CODEPOINT count. Truncation when
1238        // input longer (lpad keeps the LEFT side, rpad keeps
1239        // LEFT too — both wait truncate from the right side per
1240        // PG-verified behavior). Padding when shorter, using
1241        // `fill` (default SPACE) cycling for multi-char fills.
1242        // length<=0 → ''. Empty fill + needs padding → returns
1243        // input verbatim (potentially truncated). NULL on any
1244        // arg → NULL.
1245        // PG `strpos(string, substring)` — same as position()
1246        // but with reversed arg order. PG convention is
1247        // strpos(haystack, needle); position(needle, haystack).
1248        // Both are 1-indexed; 0 = not found; codepoint-counted.
1249        // PG `left(string, n)` / `right(string, n)` — head/tail
1250        // substring helpers. Negative n means "all but last/first
1251        // |n| chars" — slice from the OPPOSITE side. n=0 → ''.
1252        // Codepoint-counted. NULL on any arg → NULL.
1253        // PG `floor(x)` — largest integer <= x.
1254        //   * Negative floats floor TOWARD -infinity, NOT toward 0.
1255        //   * Integer types passthrough unchanged.
1256        //   * NULL → NULL.
1257        // PG `ceil(x)` / `ceiling(x)` — smallest integer >= x.
1258        //   * Negative floats round TOWARD zero (toward +inf):
1259        //     ceil(-1.5) → -1, NOT -2.
1260        //   * Integer types passthrough unchanged.
1261        //   * NULL → NULL.
1262        // PG `round(x)` / `round(x, scale)` — half-away-from-zero
1263        // rounding (NUMERIC semantic).
1264        //   * round(0.5) → 1; round(-0.5) → -1; round(2.5) → 3.
1265        //   * Two-arg form rounds to N decimal places (n>0) or to
1266        //     nearest 10^|n| (n<0).
1267        //   * Integer types passthrough unchanged.
1268        //   * NULL on any arg → NULL.
1269        // PG `trunc(x)` / `trunc(x, scale)` — truncate TOWARD zero.
1270        //   * Distinct from floor() which rounds toward -inf:
1271        //     trunc(-1.7)→-1; floor(-1.7)→-2.
1272        //   * Distinct from round() which rounds half-away:
1273        //     trunc(1.5)→1; round(1.5)→2.
1274        //   * Two-arg form truncates to N decimal places (or 10^|n|
1275        //     for negative n).
1276        //   * Integer types passthrough unchanged.
1277        //   * NULL on any arg → NULL.
1278        // PG `nullif(a, b)` — returns NULL if a = b, else a.
1279        // Canonical use cases:
1280        //   * Divide-by-zero protection: `x / nullif(y, 0)`
1281        //   * Empty-string normalisation: `nullif(field, '')`
1282        // Edge: nullif(NULL, NULL) returns NULL. nullif(NULL, x)
1283        // returns NULL. nullif(x, NULL) returns x (since NULL is
1284        // not == to anything per IS DISTINCT FROM semantic, x ≠ NULL).
1285        // PG `greatest(...)` / `least(...)` — variadic max/min.
1286        // NULL args silently skipped (PG-canonical). All-NULL → NULL.
1287        // Cross-type widening for numeric comparisons.
1288        // PG `mod(y, x)` — modulo. Result sign follows dividend.
1289        //   * mod(7, 3) = 1
1290        //   * mod(-7, 3) = -1
1291        //   * mod(7, -3) = 1
1292        //   * mod(-7, -3) = -1
1293        // Division by zero → error. NULL on any arg → NULL.
1294        // PG `power(x, y)` / `pow(x, y)` — x^y.
1295        // Integer exponent → exact via repeated multiplication
1296        // (no precision loss). Fractional exponent → exp(y*ln(x))
1297        // via the no_std exp/ln series helpers.
1298        // x=0 with negative y → error (1/0). NULL → NULL.
1299        // PG `sqrt(x)` — square root. Negative input → error.
1300        // PG `sign(x)` — -1 / 0 / 1.
1301        // PG `random()` — uniform float in [0, 1). Per-row /
1302        // per-call: each evaluation returns a different value
1303        // even within the same statement. Backed by a xorshift64*
1304        // PRNG with a process-static seed; not cryptographically
1305        // secure (use a cryptographic source for security tokens).
1306        "random" => {
1307            if !args.is_empty() {
1308                return Err(EvalError::TypeMismatch {
1309                    detail: alloc::format!("random() takes 0 args, got {}", args.len()),
1310                });
1311            }
1312            Ok(Value::Float(prng_next_f64()))
1313        }
1314        // v7.17.0 — PG `gen_random_uuid()` (built-in, no extension)
1315        // and the historical uuid-ossp `uuid_generate_v4()` alias.
1316        // Both produce a RFC 4122 v4 (random) UUID. This is the
1317        // function Django / Rails / Hibernate emit in `id UUID
1318        // PRIMARY KEY DEFAULT gen_random_uuid()`, the modern
1319        // default PK pattern.
1320        "gen_random_uuid" | "uuid_generate_v4" => {
1321            if !args.is_empty() {
1322                return Err(EvalError::TypeMismatch {
1323                    detail: alloc::format!("{name}() takes 0 args, got {}", args.len()),
1324                });
1325            }
1326            Ok(Value::Uuid(gen_random_uuid_bytes()))
1327        }
1328        "sign" => {
1329            if args.len() != 1 {
1330                return Err(EvalError::TypeMismatch {
1331                    detail: alloc::format!("sign() takes 1 arg, got {}", args.len()),
1332                });
1333            }
1334            match &args[0] {
1335                Value::Null => Ok(Value::Null),
1336                Value::SmallInt(n) => Ok(Value::SmallInt(n.signum())),
1337                Value::Int(n) => Ok(Value::Int(n.signum())),
1338                Value::BigInt(n) => Ok(Value::BigInt(n.signum())),
1339                Value::Float(x) => {
1340                    let s = if *x > 0.0 {
1341                        1.0
1342                    } else if *x < 0.0 {
1343                        -1.0
1344                    } else {
1345                        0.0
1346                    };
1347                    Ok(Value::Float(s))
1348                }
1349                Value::Numeric { scaled, scale } => {
1350                    let s = scaled.signum();
1351                    Ok(Value::Numeric {
1352                        scaled: s * pow10_i128(*scale),
1353                        scale: *scale,
1354                    })
1355                }
1356                other => Err(EvalError::TypeMismatch {
1357                    detail: alloc::format!("sign() needs numeric, got {:?}", other.data_type()),
1358                }),
1359            }
1360        }
1361        "sqrt" => {
1362            if args.len() != 1 {
1363                return Err(EvalError::TypeMismatch {
1364                    detail: alloc::format!("sqrt() takes 1 arg, got {}", args.len()),
1365                });
1366            }
1367            match &args[0] {
1368                Value::Null => Ok(Value::Null),
1369                v => {
1370                    let x = value_to_f64(v).ok_or_else(|| EvalError::TypeMismatch {
1371                        detail: alloc::format!("sqrt() needs numeric, got {:?}", v.data_type()),
1372                    })?;
1373                    if x < 0.0 {
1374                        return Err(EvalError::TypeMismatch {
1375                            detail: "sqrt(): negative input outside real domain".into(),
1376                        });
1377                    }
1378                    if x == 0.0 {
1379                        return Ok(Value::Float(0.0));
1380                    }
1381                    Ok(Value::Float(f64_sqrt(x)))
1382                }
1383            }
1384        }
1385        "power" | "pow" => {
1386            if args.len() != 2 {
1387                return Err(EvalError::TypeMismatch {
1388                    detail: alloc::format!("power() takes 2 args, got {}", args.len()),
1389                });
1390            }
1391            if args.iter().any(|v| matches!(v, Value::Null)) {
1392                return Ok(Value::Null);
1393            }
1394            let x = value_to_f64(&args[0]).ok_or_else(|| EvalError::TypeMismatch {
1395                detail: "power() needs numeric x".into(),
1396            })?;
1397            let y = value_to_f64(&args[1]).ok_or_else(|| EvalError::TypeMismatch {
1398                detail: "power() needs numeric y".into(),
1399            })?;
1400            // Integer-exponent fast path.
1401            let y_int = y as i32;
1402            if (y_int as f64) == y && y.abs() < 1024.0 {
1403                let result = f64_powi(x, y_int);
1404                return Ok(Value::Float(result));
1405            }
1406            // Fractional exponent — only defined for x >= 0 in real
1407            // arithmetic. Negative x raised to fractional power is
1408            // complex; reject cleanly.
1409            if x < 0.0 {
1410                return Err(EvalError::TypeMismatch {
1411                    detail: "power(): negative base with fractional exponent yields complex result"
1412                        .into(),
1413                });
1414            }
1415            if x == 0.0 && y < 0.0 {
1416                return Err(EvalError::TypeMismatch {
1417                    detail: "power(): 0 raised to negative power is undefined".into(),
1418                });
1419            }
1420            if x == 0.0 {
1421                return Ok(Value::Float(0.0));
1422            }
1423            Ok(Value::Float(f64_exp(y * f64_ln(x))))
1424        }
1425        "mod" => {
1426            if args.len() != 2 {
1427                return Err(EvalError::TypeMismatch {
1428                    detail: alloc::format!("mod() takes 2 args, got {}", args.len()),
1429                });
1430            }
1431            if args.iter().any(|v| matches!(v, Value::Null)) {
1432                return Ok(Value::Null);
1433            }
1434            let to_i64 = |v: &Value| -> Result<i64, EvalError> {
1435                match v {
1436                    Value::SmallInt(x) => Ok(i64::from(*x)),
1437                    Value::Int(x) => Ok(i64::from(*x)),
1438                    Value::BigInt(x) => Ok(*x),
1439                    other => Err(EvalError::TypeMismatch {
1440                        detail: alloc::format!("mod() needs integer, got {:?}", other.data_type()),
1441                    }),
1442                }
1443            };
1444            let y = to_i64(&args[0])?;
1445            let x = to_i64(&args[1])?;
1446            if x == 0 {
1447                return Err(EvalError::TypeMismatch {
1448                    detail: "mod(): division by zero".into(),
1449                });
1450            }
1451            // Rust's `%` operator on signed integers follows the
1452            // dividend's sign — same as PG.
1453            let result = y % x;
1454            // Pick the narrowest type that holds the result.
1455            if let Ok(small) = i16::try_from(result) {
1456                if matches!(args[0], Value::SmallInt(_)) && matches!(args[1], Value::SmallInt(_)) {
1457                    return Ok(Value::SmallInt(small));
1458                }
1459            }
1460            if let Ok(int_) = i32::try_from(result) {
1461                if !matches!(args[0], Value::BigInt(_)) && !matches!(args[1], Value::BigInt(_)) {
1462                    return Ok(Value::Int(int_));
1463                }
1464            }
1465            Ok(Value::BigInt(result))
1466        }
1467        "greatest" | "least" => {
1468            if args.is_empty() {
1469                return Err(EvalError::TypeMismatch {
1470                    detail: alloc::format!(
1471                        "{lc}() takes at least 1 arg",
1472                        lc = if name.eq_ignore_ascii_case("greatest") {
1473                            "greatest"
1474                        } else {
1475                            "least"
1476                        }
1477                    ),
1478                });
1479            }
1480            let non_null: alloc::vec::Vec<&Value> =
1481                args.iter().filter(|v| !matches!(v, Value::Null)).collect();
1482            if non_null.is_empty() {
1483                return Ok(Value::Null);
1484            }
1485            let is_greatest = name.eq_ignore_ascii_case("greatest");
1486            let mut best = non_null[0].clone();
1487            for v in &non_null[1..] {
1488                let ord = value_cmp_for_min_max(&best, v);
1489                let take = if is_greatest {
1490                    ord == core::cmp::Ordering::Less
1491                } else {
1492                    ord == core::cmp::Ordering::Greater
1493                };
1494                if take {
1495                    best = (*v).clone();
1496                }
1497            }
1498            Ok(best)
1499        }
1500        // MySQL `ifnull(a, b)` — alias for coalesce(a, b).
1501        // Used by every ORM with a MySQL target (Hibernate /
1502        // Laravel / Sequelize).
1503        "ifnull" => {
1504            if args.len() != 2 {
1505                return Err(EvalError::TypeMismatch {
1506                    detail: alloc::format!("ifnull() takes 2 args, got {}", args.len()),
1507                });
1508            }
1509            for v in args {
1510                if !matches!(v, Value::Null) {
1511                    return Ok(v.clone());
1512                }
1513            }
1514            Ok(Value::Null)
1515        }
1516        // MySQL `if(cond, then, else)` — alias for CASE WHEN.
1517        // NULL condition → else branch (MySQL semantic).
1518        // Integer condition: nonzero is true.
1519        "if" => {
1520            if args.len() != 3 {
1521                return Err(EvalError::TypeMismatch {
1522                    detail: alloc::format!(
1523                        "if() takes 3 args (cond, then, else), got {}",
1524                        args.len()
1525                    ),
1526                });
1527            }
1528            let truthy = match &args[0] {
1529                Value::Null => false,
1530                Value::Bool(b) => *b,
1531                Value::SmallInt(n) => *n != 0,
1532                Value::Int(n) => *n != 0,
1533                Value::BigInt(n) => *n != 0,
1534                Value::Float(x) => *x != 0.0,
1535                Value::Text(s) => !s.is_empty() && s != "0",
1536                _ => true,
1537            };
1538            if truthy {
1539                Ok(args[1].clone())
1540            } else {
1541                Ok(args[2].clone())
1542            }
1543        }
1544        "nullif" => {
1545            if args.len() != 2 {
1546                return Err(EvalError::TypeMismatch {
1547                    detail: alloc::format!("nullif() takes 2 args, got {}", args.len()),
1548                });
1549            }
1550            match (&args[0], &args[1]) {
1551                (Value::Null, _) => Ok(Value::Null),
1552                (a, Value::Null) => Ok(a.clone()),
1553                (a, b) => {
1554                    // Use value_cmp (already defined as Ord-like
1555                    // function in lib.rs) — but it's not accessible
1556                    // here. Fall back to direct equality.
1557                    if values_equal_for_nullif(a, b) {
1558                        Ok(Value::Null)
1559                    } else {
1560                        Ok(a.clone())
1561                    }
1562                }
1563            }
1564        }
1565        "trunc" => {
1566            match args.len() {
1567                1 => match &args[0] {
1568                    Value::Null => Ok(Value::Null),
1569                    Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1570                    Value::Float(x) => Ok(Value::Float(f64_trunc(*x))),
1571                    Value::Numeric { scaled, scale } => {
1572                        let factor = pow10_i128(*scale);
1573                        // Truncate toward zero — sign-preserving division.
1574                        let q = scaled / factor;
1575                        Ok(Value::Numeric {
1576                            scaled: q * factor,
1577                            scale: *scale,
1578                        })
1579                    }
1580                    other => Err(EvalError::TypeMismatch {
1581                        detail: alloc::format!(
1582                            "trunc() needs numeric, got {:?}",
1583                            other.data_type()
1584                        ),
1585                    }),
1586                },
1587                2 => {
1588                    if args.iter().any(|v| matches!(v, Value::Null)) {
1589                        return Ok(Value::Null);
1590                    }
1591                    let n = match &args[1] {
1592                        Value::SmallInt(x) => i32::from(*x),
1593                        Value::Int(x) => *x,
1594                        Value::BigInt(x) => {
1595                            i32::try_from(*x).map_err(|_| EvalError::TypeMismatch {
1596                                detail: "trunc(): scale must fit in i32".into(),
1597                            })?
1598                        }
1599                        other => {
1600                            return Err(EvalError::TypeMismatch {
1601                                detail: alloc::format!(
1602                                    "trunc(): scale must be integer, got {:?}",
1603                                    other.data_type()
1604                                ),
1605                            });
1606                        }
1607                    };
1608                    let x = match &args[0] {
1609                        Value::SmallInt(v) => f64::from(*v),
1610                        Value::Int(v) => f64::from(*v),
1611                        Value::BigInt(v) => *v as f64,
1612                        Value::Float(v) => *v,
1613                        Value::Numeric { scaled, scale } => {
1614                            (*scaled as f64) / f64_powi(10.0, i32::from(*scale))
1615                        }
1616                        other => {
1617                            return Err(EvalError::TypeMismatch {
1618                                detail: alloc::format!(
1619                                    "trunc() needs numeric x, got {:?}",
1620                                    other.data_type()
1621                                ),
1622                            });
1623                        }
1624                    };
1625                    let result = if n >= 0 {
1626                        let factor = f64_powi(10.0, n);
1627                        f64_trunc(x * factor) / factor
1628                    } else {
1629                        let factor = f64_powi(10.0, -n);
1630                        f64_trunc(x / factor) * factor
1631                    };
1632                    Ok(Value::Float(result))
1633                }
1634                _ => Err(EvalError::TypeMismatch {
1635                    detail: alloc::format!("trunc() takes 1 or 2 args, got {}", args.len()),
1636                }),
1637            }
1638        }
1639        "round" => {
1640            match args.len() {
1641                1 => match &args[0] {
1642                    Value::Null => Ok(Value::Null),
1643                    Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1644                    Value::Float(x) => Ok(Value::Float(f64_round_half_away(*x))),
1645                    Value::Numeric { scaled, scale } => {
1646                        let factor = pow10_i128(*scale);
1647                        let q = scaled.div_euclid(factor);
1648                        let r = scaled.rem_euclid(factor);
1649                        // Half-away-from-zero: if 2*r >= factor → round up.
1650                        let result = if 2 * r >= factor { q + 1 } else { q };
1651                        Ok(Value::Numeric {
1652                            scaled: result * factor,
1653                            scale: *scale,
1654                        })
1655                    }
1656                    other => Err(EvalError::TypeMismatch {
1657                        detail: alloc::format!(
1658                            "round() needs numeric, got {:?}",
1659                            other.data_type()
1660                        ),
1661                    }),
1662                },
1663                2 => {
1664                    if args.iter().any(|v| matches!(v, Value::Null)) {
1665                        return Ok(Value::Null);
1666                    }
1667                    let n = match &args[1] {
1668                        Value::SmallInt(x) => i32::from(*x),
1669                        Value::Int(x) => *x,
1670                        Value::BigInt(x) => {
1671                            i32::try_from(*x).map_err(|_| EvalError::TypeMismatch {
1672                                detail: "round(): scale must fit in i32".into(),
1673                            })?
1674                        }
1675                        other => {
1676                            return Err(EvalError::TypeMismatch {
1677                                detail: alloc::format!(
1678                                    "round(): scale must be integer, got {:?}",
1679                                    other.data_type()
1680                                ),
1681                            });
1682                        }
1683                    };
1684                    // Convert input to f64 for arithmetic
1685                    // simplicity (PG does NUMERIC math here but
1686                    // SPG's f64 path matches the dominant
1687                    // customer expectation for round(N, scale)
1688                    // patterns).
1689                    let x = match &args[0] {
1690                        Value::SmallInt(v) => f64::from(*v),
1691                        Value::Int(v) => f64::from(*v),
1692                        Value::BigInt(v) => *v as f64,
1693                        Value::Float(v) => *v,
1694                        Value::Numeric { scaled, scale } => {
1695                            (*scaled as f64) / f64_powi(10.0, i32::from(*scale))
1696                        }
1697                        other => {
1698                            return Err(EvalError::TypeMismatch {
1699                                detail: alloc::format!(
1700                                    "round() needs numeric x, got {:?}",
1701                                    other.data_type()
1702                                ),
1703                            });
1704                        }
1705                    };
1706                    // Avoid float precision drift from the
1707                    // 10^(-k) reciprocal — for n<0 work with the
1708                    // positive-exponent form: round(x / 10^|n|) *
1709                    // 10^|n|.
1710                    let result = if n >= 0 {
1711                        let factor = f64_powi(10.0, n);
1712                        f64_round_half_away(x * factor) / factor
1713                    } else {
1714                        let factor = f64_powi(10.0, -n);
1715                        f64_round_half_away(x / factor) * factor
1716                    };
1717                    Ok(Value::Float(result))
1718                }
1719                _ => Err(EvalError::TypeMismatch {
1720                    detail: alloc::format!("round() takes 1 or 2 args, got {}", args.len()),
1721                }),
1722            }
1723        }
1724        "ceil" | "ceiling" => {
1725            if args.len() != 1 {
1726                return Err(EvalError::TypeMismatch {
1727                    detail: alloc::format!("ceil() takes 1 arg, got {}", args.len()),
1728                });
1729            }
1730            match &args[0] {
1731                Value::Null => Ok(Value::Null),
1732                Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1733                Value::Float(x) => Ok(Value::Float(f64_ceil(*x))),
1734                Value::Numeric { scaled, scale } => {
1735                    let factor = pow10_i128(*scale);
1736                    let q = scaled.div_euclid(factor);
1737                    let r = scaled.rem_euclid(factor);
1738                    let result = if r == 0 { q } else { q + 1 };
1739                    Ok(Value::Numeric {
1740                        scaled: result * factor,
1741                        scale: *scale,
1742                    })
1743                }
1744                other => Err(EvalError::TypeMismatch {
1745                    detail: alloc::format!("ceil() needs numeric, got {:?}", other.data_type()),
1746                }),
1747            }
1748        }
1749        "floor" => {
1750            if args.len() != 1 {
1751                return Err(EvalError::TypeMismatch {
1752                    detail: alloc::format!("floor() takes 1 arg, got {}", args.len()),
1753                });
1754            }
1755            match &args[0] {
1756                Value::Null => Ok(Value::Null),
1757                Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1758                Value::Float(x) => Ok(Value::Float(f64_floor(*x))),
1759                Value::Numeric { scaled, scale } => {
1760                    let factor = pow10_i128(*scale);
1761                    let q = scaled.div_euclid(factor);
1762                    // div_euclid rounds toward -infinity which is
1763                    // exactly the floor semantic — perfect for
1764                    // negative values.
1765                    Ok(Value::Numeric {
1766                        scaled: q * factor,
1767                        scale: *scale,
1768                    })
1769                }
1770                other => Err(EvalError::TypeMismatch {
1771                    detail: alloc::format!("floor() needs numeric, got {:?}", other.data_type()),
1772                }),
1773            }
1774        }
1775        "left" => string_left_right(args, true, "left"),
1776        "right" => string_left_right(args, false, "right"),
1777        "strpos" => {
1778            if args.len() != 2 {
1779                return Err(EvalError::TypeMismatch {
1780                    detail: alloc::format!(
1781                        "strpos() takes 2 args (haystack, needle), got {}",
1782                        args.len()
1783                    ),
1784                });
1785            }
1786            if args.iter().any(|v| matches!(v, Value::Null)) {
1787                return Ok(Value::Null);
1788            }
1789            let haystack = value_to_format_text(&args[0]);
1790            let needle = value_to_format_text(&args[1]);
1791            if needle.is_empty() {
1792                return Ok(Value::Int(1));
1793            }
1794            let h_chars: Vec<char> = haystack.chars().collect();
1795            let n_chars: Vec<char> = needle.chars().collect();
1796            if n_chars.len() > h_chars.len() {
1797                return Ok(Value::Int(0));
1798            }
1799            for i in 0..=h_chars.len() - n_chars.len() {
1800                if h_chars[i..i + n_chars.len()] == n_chars[..] {
1801                    return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1802                }
1803            }
1804            Ok(Value::Int(0))
1805        }
1806        "lpad" => string_pad(args, true, "lpad"),
1807        "rpad" => string_pad(args, false, "rpad"),
1808        "repeat" => {
1809            if args.len() != 2 {
1810                return Err(EvalError::TypeMismatch {
1811                    detail: alloc::format!("repeat() takes 2 args, got {}", args.len()),
1812                });
1813            }
1814            if args.iter().any(|v| matches!(v, Value::Null)) {
1815                return Ok(Value::Null);
1816            }
1817            let s = value_to_format_text(&args[0]);
1818            let n = match &args[1] {
1819                Value::SmallInt(x) => i64::from(*x),
1820                Value::Int(x) => i64::from(*x),
1821                Value::BigInt(x) => *x,
1822                other => {
1823                    return Err(EvalError::TypeMismatch {
1824                        detail: alloc::format!(
1825                            "repeat(): n must be integer, got {:?}",
1826                            other.data_type()
1827                        ),
1828                    });
1829                }
1830            };
1831            if n <= 0 {
1832                return Ok(Value::Text(String::new()));
1833            }
1834            // Safety cap so a runaway argument doesn't allocate
1835            // terabytes. PG itself enforces a similar cap via
1836            // work_mem; SPG inherits a defensive 64MiB cap.
1837            const MAX_REPEAT_BYTES: usize = 64 * 1024 * 1024;
1838            let needed =
1839                s.len()
1840                    .checked_mul(n as usize)
1841                    .ok_or_else(|| EvalError::TypeMismatch {
1842                        detail: "repeat(): result size overflows usize".into(),
1843                    })?;
1844            if needed > MAX_REPEAT_BYTES {
1845                return Err(EvalError::TypeMismatch {
1846                    detail: alloc::format!(
1847                        "repeat(): result would exceed {MAX_REPEAT_BYTES} bytes"
1848                    ),
1849                });
1850            }
1851            Ok(Value::Text(s.repeat(n as usize)))
1852        }
1853        "split_part" => {
1854            if args.len() != 3 {
1855                return Err(EvalError::TypeMismatch {
1856                    detail: alloc::format!(
1857                        "split_part() takes 3 args (string, delim, n), got {}",
1858                        args.len()
1859                    ),
1860                });
1861            }
1862            if args.iter().any(|v| matches!(v, Value::Null)) {
1863                return Ok(Value::Null);
1864            }
1865            let s = value_to_format_text(&args[0]);
1866            let delim = value_to_format_text(&args[1]);
1867            if delim.is_empty() {
1868                return Err(EvalError::TypeMismatch {
1869                    detail: "split_part(): delimiter cannot be empty".into(),
1870                });
1871            }
1872            let n = match &args[2] {
1873                Value::SmallInt(x) => i64::from(*x),
1874                Value::Int(x) => i64::from(*x),
1875                Value::BigInt(x) => *x,
1876                other => {
1877                    return Err(EvalError::TypeMismatch {
1878                        detail: alloc::format!(
1879                            "split_part(): n must be integer, got {:?}",
1880                            other.data_type()
1881                        ),
1882                    });
1883                }
1884            };
1885            if n == 0 {
1886                return Err(EvalError::TypeMismatch {
1887                    detail: "split_part(): n must be nonzero (PG: 1-indexed)".into(),
1888                });
1889            }
1890            let parts: alloc::vec::Vec<&str> = s.split(&delim[..]).collect();
1891            let total = parts.len() as i64;
1892            let idx = if n > 0 {
1893                n - 1
1894            } else {
1895                // n=-1 → last (idx = total - 1)
1896                total + n
1897            };
1898            if idx < 0 || idx >= total {
1899                return Ok(Value::Text(String::new()));
1900            }
1901            Ok(Value::Text(parts[idx as usize].to_string()))
1902        }
1903        // PG `translate(s, from, to)` — char-by-char positional
1904        // mapping. Each codepoint in `from` is replaced by the
1905        // codepoint at the same index in `to`. When `from` is
1906        // longer than `to`, the extra `from` codepoints are
1907        // DELETED (not replaced). When `from` has duplicates,
1908        // the FIRST occurrence's mapping wins. NULL → NULL.
1909        "translate" => {
1910            if args.len() != 3 {
1911                return Err(EvalError::TypeMismatch {
1912                    detail: alloc::format!("translate() takes 3 args, got {}", args.len()),
1913                });
1914            }
1915            if args.iter().any(|v| matches!(v, Value::Null)) {
1916                return Ok(Value::Null);
1917            }
1918            let s = value_to_format_text(&args[0]);
1919            let from = value_to_format_text(&args[1]);
1920            let to = value_to_format_text(&args[2]);
1921            let from_chars: Vec<char> = from.chars().collect();
1922            let to_chars: Vec<char> = to.chars().collect();
1923            // Build the codepoint map. First occurrence wins.
1924            let mut map: alloc::collections::BTreeMap<char, Option<char>> =
1925                alloc::collections::BTreeMap::new();
1926            for (i, &fc) in from_chars.iter().enumerate() {
1927                if map.contains_key(&fc) {
1928                    continue;
1929                }
1930                let replacement = to_chars.get(i).copied();
1931                map.insert(fc, replacement);
1932            }
1933            let mut out = String::with_capacity(s.len());
1934            for c in s.chars() {
1935                match map.get(&c) {
1936                    Some(Some(r)) => out.push(*r),
1937                    Some(None) => {} // mapped to "deleted"
1938                    None => out.push(c),
1939                }
1940            }
1941            Ok(Value::Text(out))
1942        }
1943        "replace" => {
1944            if args.len() != 3 {
1945                return Err(EvalError::TypeMismatch {
1946                    detail: alloc::format!(
1947                        "replace() takes 3 args (string, from, to), got {}",
1948                        args.len()
1949                    ),
1950                });
1951            }
1952            if args.iter().any(|v| matches!(v, Value::Null)) {
1953                return Ok(Value::Null);
1954            }
1955            let s = value_to_format_text(&args[0]);
1956            let from = value_to_format_text(&args[1]);
1957            let to = value_to_format_text(&args[2]);
1958            if from.is_empty() {
1959                return Ok(Value::Text(s));
1960            }
1961            // std `String::replace` matches PG semantics exactly:
1962            // non-overlapping, left-to-right, no re-scan of
1963            // inserted text. Sealed test surface verifies the
1964            // edge cases independently.
1965            Ok(Value::Text(s.replace(&from[..], &to)))
1966        }
1967        "trim" | "btrim" => string_trim(args, TrimSide::Both, "trim"),
1968        "ltrim" => string_trim(args, TrimSide::Left, "ltrim"),
1969        "rtrim" => string_trim(args, TrimSide::Right, "rtrim"),
1970        "concat_ws" => {
1971            if args.is_empty() {
1972                return Err(EvalError::TypeMismatch {
1973                    detail: "concat_ws() requires at least 1 arg (the separator)".into(),
1974                });
1975            }
1976            // NULL separator poisons the result.
1977            let sep = match &args[0] {
1978                Value::Null => return Ok(Value::Null),
1979                v => value_to_format_text(v),
1980            };
1981            let mut out = String::new();
1982            let mut first = true;
1983            for v in &args[1..] {
1984                if matches!(v, Value::Null) {
1985                    continue;
1986                }
1987                if first {
1988                    first = false;
1989                } else {
1990                    out.push_str(&sep);
1991                }
1992                out.push_str(&value_to_format_text(v));
1993            }
1994            Ok(Value::Text(out))
1995        }
1996        // v7.17.0 Phase 3.7 — PG regex function family.
1997        "regexp_matches" => regexp_matches(args),
1998        "regexp_replace" => regexp_replace(args),
1999        "regexp_split_to_array" => regexp_split_to_array(args),
2000        // v7.17.0 Phase 3.P0-28 — PG JSON builder family.
2001        // to_json / to_jsonb coerce any value to JSON text (NULL
2002        // becomes the JSON literal 'null', not SQL NULL).
2003        "to_json" | "to_jsonb" => {
2004            if args.len() != 1 {
2005                return Err(EvalError::TypeMismatch {
2006                    detail: alloc::format!("to_json() takes 1 arg, got {}", args.len()),
2007                });
2008            }
2009            // Json input passes through verbatim — PG identity.
2010            if let Value::Json(s) = &args[0] {
2011                return Ok(Value::Json(s.clone()));
2012            }
2013            Ok(Value::Json(crate::json::value_to_json_text(&args[0])))
2014        }
2015        "json_build_object" | "jsonb_build_object" => crate::json::build_object(args),
2016        "json_build_array" | "jsonb_build_array" => crate::json::build_array(args),
2017        "jsonb_set" | "json_set" => crate::json::set(args),
2018        "jsonb_insert" | "json_insert" => crate::json::insert(args),
2019        // v7.17.0 Phase 3.9 — PG `jsonb_path_query` family.
2020        "jsonb_path_query" | "json_path_query" => {
2021            if args.len() != 2 {
2022                return Err(EvalError::TypeMismatch {
2023                    detail: alloc::format!("jsonb_path_query() takes 2 args, got {}", args.len()),
2024                });
2025            }
2026            crate::json::path_query(&args[0], &args[1])
2027        }
2028        "jsonb_path_query_first" | "json_path_query_first" => {
2029            if args.len() != 2 {
2030                return Err(EvalError::TypeMismatch {
2031                    detail: alloc::format!(
2032                        "jsonb_path_query_first() takes 2 args, got {}",
2033                        args.len()
2034                    ),
2035                });
2036            }
2037            crate::json::path_query_first(&args[0], &args[1])
2038        }
2039        "jsonb_path_query_array" | "json_path_query_array" => {
2040            if args.len() != 2 {
2041                return Err(EvalError::TypeMismatch {
2042                    detail: alloc::format!(
2043                        "jsonb_path_query_array() takes 2 args, got {}",
2044                        args.len()
2045                    ),
2046                });
2047            }
2048            crate::json::path_query_array(&args[0], &args[1])
2049        }
2050        // v7.17.0 Phase 7 — INET / CIDR network helpers.
2051        "host" => inet_host(args),
2052        "network" => inet_network(args),
2053        "masklen" => inet_masklen(args),
2054        // v6.4.3 — encode/decode + error_on_null SQL function bundle.
2055        "encode" => encode_text(args),
2056        "decode" => decode_text(args),
2057        "error_on_null" => error_on_null(args),
2058        // v7.12.1 — PG full-text search lexer / tsquery builders.
2059        // mailrs G-CRIT-3 acceptance path: `to_tsvector('english',
2060        // … || ' ' || … || …)` runs end-to-end against a tsvector
2061        // column with Porter stemming + standard english stopwords.
2062        "to_tsvector" => fts_to_tsvector(args, ctx),
2063        // v7.24 (round-16 C) — setweight(tsvector, 'A'..'D'): label
2064        // every lexeme. mailrs's migrate-016 search trigger builds
2065        // its vector as setweight(to_tsvector(…),'A') || ….
2066        "setweight" => fts_setweight(args),
2067        // v7.24 (round-15) — string_to_array(text, delim): inverse
2068        // of array_to_string. PG semantics: NULL text → NULL,
2069        // '' → empty array, NULL delim → one element per char.
2070        "string_to_array" => fn_string_to_array(args),
2071        "plainto_tsquery" => fts_plainto_tsquery(args, ctx),
2072        "phraseto_tsquery" => fts_phraseto_tsquery(args, ctx),
2073        "websearch_to_tsquery" => fts_websearch_to_tsquery(args, ctx),
2074        "to_tsquery" => fts_to_tsquery(args, ctx),
2075        // v7.12.2 — ranking functions. mailrs's fallback search
2076        // query ORDERs BY ts_rank(search_vector, q) DESC.
2077        "ts_rank" => fts_ts_rank(args),
2078        "ts_rank_cd" => fts_ts_rank_cd(args),
2079        // v7.14.0 — PG dump preamble emits
2080        // `SELECT pg_catalog.set_config('search_path', '', false);`
2081        // and friends. SPG is single-schema; accept-as-no-op
2082        // returning either the new value or NULL.
2083        "set_config" => Ok(args.get(1).cloned().unwrap_or(Value::Null)),
2084        "current_setting" => Ok(Value::Text(String::new())),
2085        // PG `pg_catalog.*` discovery / cast helpers commonly
2086        // emitted by ORMs probing the server. Accept-as-no-op
2087        // with sensible defaults so the dump preamble doesn't
2088        // fail. `pg_get_serial_sequence` returns NULL (no
2089        // sequence — SPG has AUTO_INCREMENT instead).
2090        "pg_get_serial_sequence" | "pg_get_constraintdef" | "pg_get_indexdef" => Ok(Value::Null),
2091        "version" => Ok(Value::Text("PostgreSQL 16 (SPG-compat)".into())),
2092        // v7.17.0 Phase 3.P0-30 — session / introspection functions.
2093        // Engine-level dispatch so these compose inside expressions
2094        // (`WHERE schemaname = current_schema()`, `SELECT *,
2095        // database() AS db FROM t`) — the pgwire layer's canned
2096        // shortcuts only catch the bare top-level SELECT shape.
2097        // SPG is single-database + single-schema; the values
2098        // mirror the wire-layer canned defaults.
2099        "current_database" | "database" => Ok(Value::Text("spg".into())),
2100        "current_schema" => Ok(Value::Text("public".into())),
2101        "current_user" | "session_user" | "user" => Ok(Value::Text("admin".into())),
2102        // v7.17.0 Phase 3.P0-31 — `pg_typeof(any)` returns the
2103        // canonical PG lowercase type name. sqlx / SQLAlchemy /
2104        // Diesel emit this during describe; generic ORMs may
2105        // branch on it (`CASE WHEN pg_typeof(x) = 'jsonb' ...`).
2106        // NULL has no resolved value-level type → 'unknown' per
2107        // PG semantics.
2108        "pg_typeof" => {
2109            if args.len() != 1 {
2110                return Err(EvalError::TypeMismatch {
2111                    detail: format!("pg_typeof() takes 1 arg, got {}", args.len()),
2112                });
2113            }
2114            Ok(Value::Text(pg_typeof_name(&args[0]).into()))
2115        }
2116        // v7.17.0 — `nextval` / `currval` / `setval` are handled
2117        // at the top of this match against the SequenceResolver.
2118        // `lastval()` (no-arg session memory) still degrades to
2119        // NULL pending a Phase 1.1b session tracker.
2120        "lastval" => Ok(Value::Null),
2121        // v7.15.0 — pg_trgm: similarity, show_trgm. Match PG
2122        // semantics: similarity returns Jaccard of trigram sets;
2123        // show_trgm returns the trigram set as TEXT[]. NULL on
2124        // any NULL arg.
2125        "similarity" => {
2126            if args.len() != 2 {
2127                return Err(EvalError::TypeMismatch {
2128                    detail: format!("similarity() takes 2 args, got {}", args.len()),
2129                });
2130            }
2131            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2132                return Ok(Value::Null);
2133            }
2134            let a = match &args[0] {
2135                Value::Text(s) => s.as_str(),
2136                other => {
2137                    return Err(EvalError::TypeMismatch {
2138                        detail: format!("similarity() needs text, got {:?}", other.data_type()),
2139                    });
2140                }
2141            };
2142            let b = match &args[1] {
2143                Value::Text(s) => s.as_str(),
2144                other => {
2145                    return Err(EvalError::TypeMismatch {
2146                        detail: format!("similarity() needs text, got {:?}", other.data_type()),
2147                    });
2148                }
2149            };
2150            // PG returns REAL (f32) — we use Float (f64) and let
2151            // coerce_value narrow on assignment to a REAL column.
2152            Ok(Value::Float(spg_storage::trgm::similarity(a, b)))
2153        }
2154        "show_trgm" => {
2155            if args.len() != 1 {
2156                return Err(EvalError::TypeMismatch {
2157                    detail: format!("show_trgm() takes 1 arg, got {}", args.len()),
2158                });
2159            }
2160            if matches!(args[0], Value::Null) {
2161                return Ok(Value::Null);
2162            }
2163            let s = match &args[0] {
2164                Value::Text(s) => s.as_str(),
2165                other => {
2166                    return Err(EvalError::TypeMismatch {
2167                        detail: format!("show_trgm() needs text, got {:?}", other.data_type()),
2168                    });
2169                }
2170            };
2171            // PG returns the trigram set sorted lexicographically.
2172            // `extract_trigrams` already returns a BTreeSet so the
2173            // order is canonical.
2174            let trigrams: Vec<Option<String>> = spg_storage::trgm::extract_trigrams(s)
2175                .into_iter()
2176                .map(Some)
2177                .collect();
2178            Ok(Value::TextArray(trigrams))
2179        }
2180        other => Err(EvalError::TypeMismatch {
2181            detail: format!("unknown function `{other}`"),
2182        }),
2183    }
2184}
2185
2186/// v7.12.2 — `ts_rank([weights,] vec, query [, norm])`. v7.12.2
2187/// supports the canonical `(vec, query)` two-arg form mailrs uses;
2188/// optional weight-array / normalisation arguments error with an
2189/// "unsupported" message rather than silently changing semantics.
2190fn fts_ts_rank(args: &[Value]) -> Result<Value, EvalError> {
2191    let (vec, query) = parse_rank_args("ts_rank", args)?;
2192    match (vec, query) {
2193        (None, _) | (_, None) => Ok(Value::Null),
2194        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank(&v, &q)))),
2195    }
2196}
2197
2198fn fts_ts_rank_cd(args: &[Value]) -> Result<Value, EvalError> {
2199    let (vec, query) = parse_rank_args("ts_rank_cd", args)?;
2200    match (vec, query) {
2201        (None, _) | (_, None) => Ok(Value::Null),
2202        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank_cd(&v, &q)))),
2203    }
2204}
2205
2206fn parse_rank_args(
2207    name: &str,
2208    args: &[Value],
2209) -> Result<
2210    (
2211        Option<Vec<spg_storage::TsLexeme>>,
2212        Option<spg_storage::TsQueryAst>,
2213    ),
2214    EvalError,
2215> {
2216    if args.len() != 2 {
2217        return Err(EvalError::TypeMismatch {
2218            detail: format!(
2219                "{name}() takes 2 args in v7.12.2 (weights array + normalisation flag are v7.12.x carve-out), got {}",
2220                args.len()
2221            ),
2222        });
2223    }
2224    let vec = match &args[0] {
2225        Value::Null => None,
2226        Value::TsVector(v) => Some(v.clone()),
2227        other => {
2228            return Err(EvalError::TypeMismatch {
2229                detail: format!(
2230                    "{name}() first arg must be tsvector, got {:?}",
2231                    other.data_type()
2232                ),
2233            });
2234        }
2235    };
2236    let query = match &args[1] {
2237        Value::Null => None,
2238        Value::TsQuery(q) => Some(q.clone()),
2239        other => {
2240            return Err(EvalError::TypeMismatch {
2241                detail: format!(
2242                    "{name}() second arg must be tsquery, got {:?}",
2243                    other.data_type()
2244                ),
2245            });
2246        }
2247    };
2248    Ok((vec, query))
2249}
2250
2251/// v7.12.2 — `tsvector @@ tsquery` match operator. Either
2252/// ordering accepted (PG semantics). NULL on either side → NULL.
2253/// Anything that isn't tsvector/tsquery on either side is a type
2254/// mismatch. Returns BOOL.
2255fn ts_match(l: Value, r: Value) -> Result<Value, EvalError> {
2256    let (vec, query) = match (l, r) {
2257        (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
2258        (Value::TsVector(v), Value::TsQuery(q)) => (v, q),
2259        (Value::TsQuery(q), Value::TsVector(v)) => (v, q),
2260        (l, r) => {
2261            return Err(EvalError::TypeMismatch {
2262                detail: format!(
2263                    "@@ requires (tsvector, tsquery), got ({:?}, {:?})",
2264                    l.data_type(),
2265                    r.data_type()
2266                ),
2267            });
2268        }
2269    };
2270    Ok(Value::Bool(crate::fts::ts_query_matches(&vec, &query)))
2271}
2272
2273/// v7.12.1 — `to_tsvector([config,] text)`. With one arg the
2274/// session-resolved `default_text_search_config` is used (defaults
2275/// to `simple` when unset); with two args the first picks the
2276/// config. NULL text → NULL.
2277fn fts_to_tsvector(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2278    let (config, text) = parse_fts_args("to_tsvector", args, ctx)?;
2279    match text {
2280        None => Ok(Value::Null),
2281        Some(t) => Ok(Value::TsVector(crate::fts::to_tsvector(config, &t))),
2282    }
2283}
2284
2285/// v7.24 (round-16 C) — `setweight(tsvector, "char")`. Relabels
2286/// every lexeme with the given PG weight letter (A=3 B=2 C=1 D=0).
2287fn fts_setweight(args: &[Value]) -> Result<Value, EvalError> {
2288    let [vec_arg, weight_arg] = args else {
2289        return Err(EvalError::TypeMismatch {
2290            detail: alloc::format!("setweight expects 2 arguments, got {}", args.len()),
2291        });
2292    };
2293    if matches!(vec_arg, Value::Null) || matches!(weight_arg, Value::Null) {
2294        return Ok(Value::Null);
2295    }
2296    let Value::TsVector(lexemes) = vec_arg else {
2297        return Err(EvalError::TypeMismatch {
2298            detail: alloc::format!(
2299                "setweight expects a tsvector, got {:?}",
2300                vec_arg.data_type()
2301            ),
2302        });
2303    };
2304    let Value::Text(w) = weight_arg else {
2305        return Err(EvalError::TypeMismatch {
2306            detail: alloc::format!(
2307                "setweight expects a weight letter, got {:?}",
2308                weight_arg.data_type()
2309            ),
2310        });
2311    };
2312    let weight = match w.to_ascii_uppercase().as_str() {
2313        "A" => 3,
2314        "B" => 2,
2315        "C" => 1,
2316        "D" => 0,
2317        other => {
2318            return Err(EvalError::TypeMismatch {
2319                detail: alloc::format!("unrecognized weight: {other:?} (expected A, B, C or D)"),
2320            });
2321        }
2322    };
2323    let mut out = lexemes.clone();
2324    for lex in &mut out {
2325        lex.weight = weight;
2326    }
2327    Ok(Value::TsVector(out))
2328}
2329
2330/// v7.24 (round-15) — `string_to_array(text, delimiter)`.
2331fn fn_string_to_array(args: &[Value]) -> Result<Value, EvalError> {
2332    let [text_arg, delim_arg] = args else {
2333        return Err(EvalError::TypeMismatch {
2334            detail: alloc::format!("string_to_array expects 2 arguments, got {}", args.len()),
2335        });
2336    };
2337    let text = match text_arg {
2338        Value::Null => return Ok(Value::Null),
2339        Value::Text(t) => t,
2340        other => {
2341            return Err(EvalError::TypeMismatch {
2342                detail: alloc::format!("string_to_array expects text, got {:?}", other.data_type()),
2343            });
2344        }
2345    };
2346    // PG (9.1+): empty input → empty array, regardless of delimiter.
2347    if text.is_empty() {
2348        return Ok(Value::TextArray(Vec::new()));
2349    }
2350    let parts: Vec<Option<String>> = match delim_arg {
2351        // NULL delimiter → one element per character.
2352        Value::Null => text.chars().map(|c| Some(c.to_string())).collect(),
2353        Value::Text(d) if d.is_empty() => alloc::vec![Some(text.clone())],
2354        Value::Text(d) => text
2355            .split(d.as_str())
2356            .map(|p| Some(p.to_string()))
2357            .collect(),
2358        other => {
2359            return Err(EvalError::TypeMismatch {
2360                detail: alloc::format!(
2361                    "string_to_array delimiter must be text, got {:?}",
2362                    other.data_type()
2363                ),
2364            });
2365        }
2366    };
2367    Ok(Value::TextArray(parts))
2368}
2369
2370fn fts_plainto_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2371    let (config, text) = parse_fts_args("plainto_tsquery", args, ctx)?;
2372    match text {
2373        None => Ok(Value::Null),
2374        Some(t) => Ok(Value::TsQuery(crate::fts::plainto_tsquery(config, &t))),
2375    }
2376}
2377
2378fn fts_phraseto_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2379    let (config, text) = parse_fts_args("phraseto_tsquery", args, ctx)?;
2380    match text {
2381        None => Ok(Value::Null),
2382        Some(t) => Ok(Value::TsQuery(crate::fts::phraseto_tsquery(config, &t))),
2383    }
2384}
2385
2386fn fts_websearch_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2387    let (config, text) = parse_fts_args("websearch_to_tsquery", args, ctx)?;
2388    match text {
2389        None => Ok(Value::Null),
2390        Some(t) => Ok(Value::TsQuery(crate::fts::websearch_to_tsquery(config, &t))),
2391    }
2392}
2393
2394fn fts_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2395    let (config, text) = parse_fts_args("to_tsquery", args, ctx)?;
2396    match text {
2397        None => Ok(Value::Null),
2398        Some(t) => Ok(Value::TsQuery(crate::fts::to_tsquery(config, &t)?)),
2399    }
2400}
2401
2402/// Parse the `(config, text)` / `(text)` argument pair shared by
2403/// all FTS builders. Returns the resolved config + the text
2404/// payload (None when text is NULL). The one-arg form pulls the
2405/// config from the session's `default_text_search_config`.
2406fn parse_fts_args(
2407    name: &str,
2408    args: &[Value],
2409    ctx: &EvalContext<'_>,
2410) -> Result<(crate::fts::TsConfig, Option<String>), EvalError> {
2411    let (config_arg, text_arg) = match args {
2412        [t] => (None, t),
2413        [c, t] => (Some(c), t),
2414        _ => {
2415            return Err(EvalError::TypeMismatch {
2416                detail: format!("{name}() takes 1 or 2 args, got {}", args.len()),
2417            });
2418        }
2419    };
2420    let config = match config_arg {
2421        None => match ctx.default_text_search_config {
2422            Some(name_str) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
2423                EvalError::TypeMismatch {
2424                    detail: format!(
2425                        "text search config not implemented: {name_str:?} (supported: simple, english)"
2426                    ),
2427                }
2428            })?,
2429            None => crate::fts::TsConfig::Simple,
2430        },
2431        Some(Value::Null) => return Ok((crate::fts::TsConfig::Simple, None)),
2432        Some(Value::Text(name_str)) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
2433            EvalError::TypeMismatch {
2434                detail: format!(
2435                    "text search config not implemented: {name_str:?} (supported: simple, english)"
2436                ),
2437            }
2438        })?,
2439        Some(other) => {
2440            return Err(EvalError::TypeMismatch {
2441                detail: format!(
2442                    "{name}() config arg must be text, got {:?}",
2443                    other.data_type()
2444                ),
2445            });
2446        }
2447    };
2448    let text = match text_arg {
2449        Value::Null => None,
2450        Value::Text(s) => Some(s.clone()),
2451        other => {
2452            return Err(EvalError::TypeMismatch {
2453                detail: format!(
2454                    "{name}() text arg must be text, got {:?}",
2455                    other.data_type()
2456                ),
2457            });
2458        }
2459    };
2460    Ok((config, text))
2461}
2462
2463/// v6.4.3 — `encode(bytes_as_text, format)`. PG works on bytea
2464/// arguments; SPG's value space treats Text as the byte container
2465/// (raw UTF-8 bytes). Supported formats: base64 (PG default),
2466/// base64url (RFC 4648 §5), base32hex (RFC 4648 §7 extended-hex),
2467/// hex.
2468fn encode_text(args: &[Value]) -> Result<Value, EvalError> {
2469    if args.len() != 2 {
2470        return Err(EvalError::TypeMismatch {
2471            detail: format!("encode() takes 2 args, got {}", args.len()),
2472        });
2473    }
2474    if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2475        return Ok(Value::Null);
2476    }
2477    let bytes: &[u8] = match &args[0] {
2478        Value::Text(s) => s.as_bytes(),
2479        other => {
2480            return Err(EvalError::TypeMismatch {
2481                detail: format!("encode() expects text bytes, got {:?}", other.data_type()),
2482            });
2483        }
2484    };
2485    let fmt = match &args[1] {
2486        Value::Text(s) => s.to_ascii_lowercase(),
2487        other => {
2488            return Err(EvalError::TypeMismatch {
2489                detail: format!("encode() format must be text, got {:?}", other.data_type()),
2490            });
2491        }
2492    };
2493    let out = match fmt.as_str() {
2494        "base64" => b64_encode(bytes, B64_STD),
2495        "base64url" => b64_encode(bytes, B64_URL),
2496        "base32hex" => b32hex_encode(bytes),
2497        "hex" => hex_encode(bytes),
2498        other => {
2499            return Err(EvalError::TypeMismatch {
2500                detail: format!("encode(): unknown format `{other}`"),
2501            });
2502        }
2503    };
2504    Ok(Value::Text(out))
2505}
2506
2507/// v6.4.3 — `decode(text, format)`. Inverse of `encode`; returns
2508/// Text containing the raw decoded bytes (caller may CAST to bytea
2509/// equivalent if SPG adds bytea later).
2510fn decode_text(args: &[Value]) -> Result<Value, EvalError> {
2511    if args.len() != 2 {
2512        return Err(EvalError::TypeMismatch {
2513            detail: format!("decode() takes 2 args, got {}", args.len()),
2514        });
2515    }
2516    if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2517        return Ok(Value::Null);
2518    }
2519    let text = match &args[0] {
2520        Value::Text(s) => s.as_str(),
2521        other => {
2522            return Err(EvalError::TypeMismatch {
2523                detail: format!("decode() expects text, got {:?}", other.data_type()),
2524            });
2525        }
2526    };
2527    let fmt = match &args[1] {
2528        Value::Text(s) => s.to_ascii_lowercase(),
2529        other => {
2530            return Err(EvalError::TypeMismatch {
2531                detail: format!("decode() format must be text, got {:?}", other.data_type()),
2532            });
2533        }
2534    };
2535    let bytes = match fmt.as_str() {
2536        "base64" => b64_decode(text, B64_STD)?,
2537        "base64url" => b64_decode(text, B64_URL)?,
2538        "base32hex" => b32hex_decode(text)?,
2539        "hex" => hex_decode(text)?,
2540        other => {
2541            return Err(EvalError::TypeMismatch {
2542                detail: format!("decode(): unknown format `{other}`"),
2543            });
2544        }
2545    };
2546    let s = String::from_utf8(bytes).map_err(|_| EvalError::TypeMismatch {
2547        detail: "decode(): result bytes are not valid UTF-8 (SPG stores raw bytes as Text)".into(),
2548    })?;
2549    Ok(Value::Text(s))
2550}
2551
2552/// v6.4.3 — `error_on_null(v)`. Returns `v` unchanged if non-NULL;
2553/// errors otherwise. Convenience to assert NOT NULL inside an
2554/// expression without wrapping it in COALESCE + raise hacks.
2555fn error_on_null(args: &[Value]) -> Result<Value, EvalError> {
2556    if args.len() != 1 {
2557        return Err(EvalError::TypeMismatch {
2558            detail: format!("error_on_null() takes 1 arg, got {}", args.len()),
2559        });
2560    }
2561    if matches!(args[0], Value::Null) {
2562        return Err(EvalError::TypeMismatch {
2563            detail: "error_on_null(): argument is NULL".into(),
2564        });
2565    }
2566    Ok(args[0].clone())
2567}
2568
2569// ── byte-level encoders ───────────────────────────────────────────
2570
2571const B64_STD: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2572const B64_URL: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2573const B32HEX_ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHIJKLMNOPQRSTUV";
2574
2575fn b64_encode(bytes: &[u8], alpha: &[u8; 64]) -> String {
2576    let mut out = String::with_capacity((bytes.len() + 2) / 3 * 4);
2577    let mut i = 0;
2578    while i + 3 <= bytes.len() {
2579        let n = ((bytes[i] as u32) << 16) | ((bytes[i + 1] as u32) << 8) | (bytes[i + 2] as u32);
2580        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2581        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2582        out.push(alpha[((n >> 6) & 0x3f) as usize] as char);
2583        out.push(alpha[(n & 0x3f) as usize] as char);
2584        i += 3;
2585    }
2586    let rem = bytes.len() - i;
2587    if rem == 1 {
2588        let n = (bytes[i] as u32) << 16;
2589        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2590        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2591        out.push('=');
2592        out.push('=');
2593    } else if rem == 2 {
2594        let n = ((bytes[i] as u32) << 16) | ((bytes[i + 1] as u32) << 8);
2595        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2596        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2597        out.push(alpha[((n >> 6) & 0x3f) as usize] as char);
2598        out.push('=');
2599    }
2600    out
2601}
2602
2603fn b64_decode(text: &str, alpha: &[u8; 64]) -> Result<Vec<u8>, EvalError> {
2604    let mut lookup = [255u8; 256];
2605    for (i, &c) in alpha.iter().enumerate() {
2606        lookup[c as usize] = i as u8;
2607    }
2608    let mut out = Vec::with_capacity(text.len() * 3 / 4);
2609    let mut buf: u32 = 0;
2610    let mut bits: u32 = 0;
2611    for c in text.bytes() {
2612        if c == b'=' {
2613            break;
2614        }
2615        if c == b'\n' || c == b'\r' || c == b' ' {
2616            continue;
2617        }
2618        let v = lookup[c as usize];
2619        if v == 255 {
2620            return Err(EvalError::TypeMismatch {
2621                detail: format!("decode(base64): invalid char {:?}", c as char),
2622            });
2623        }
2624        buf = (buf << 6) | v as u32;
2625        bits += 6;
2626        if bits >= 8 {
2627            bits -= 8;
2628            out.push(((buf >> bits) & 0xff) as u8);
2629        }
2630    }
2631    Ok(out)
2632}
2633
2634fn b32hex_encode(bytes: &[u8]) -> String {
2635    let mut out = String::with_capacity((bytes.len() * 8 + 4) / 5);
2636    let mut buf: u64 = 0;
2637    let mut bits: u32 = 0;
2638    for &b in bytes {
2639        buf = (buf << 8) | b as u64;
2640        bits += 8;
2641        while bits >= 5 {
2642            bits -= 5;
2643            out.push(B32HEX_ALPHABET[((buf >> bits) & 0x1f) as usize] as char);
2644        }
2645    }
2646    if bits > 0 {
2647        out.push(B32HEX_ALPHABET[((buf << (5 - bits)) & 0x1f) as usize] as char);
2648    }
2649    // Pad to multiple of 8.
2650    while out.len() % 8 != 0 {
2651        out.push('=');
2652    }
2653    out
2654}
2655
2656fn b32hex_decode(text: &str) -> Result<Vec<u8>, EvalError> {
2657    let mut lookup = [255u8; 256];
2658    for (i, &c) in B32HEX_ALPHABET.iter().enumerate() {
2659        lookup[c as usize] = i as u8;
2660        // base32hex is case-insensitive — also map lowercase.
2661        let lower = (c as char).to_ascii_lowercase() as u8;
2662        lookup[lower as usize] = i as u8;
2663    }
2664    let mut out = Vec::with_capacity(text.len() * 5 / 8);
2665    let mut buf: u64 = 0;
2666    let mut bits: u32 = 0;
2667    for c in text.bytes() {
2668        if c == b'=' {
2669            break;
2670        }
2671        if c == b'\n' || c == b'\r' || c == b' ' {
2672            continue;
2673        }
2674        let v = lookup[c as usize];
2675        if v == 255 {
2676            return Err(EvalError::TypeMismatch {
2677                detail: format!("decode(base32hex): invalid char {:?}", c as char),
2678            });
2679        }
2680        buf = (buf << 5) | v as u64;
2681        bits += 5;
2682        if bits >= 8 {
2683            bits -= 8;
2684            out.push(((buf >> bits) & 0xff) as u8);
2685        }
2686    }
2687    Ok(out)
2688}
2689
2690fn hex_encode(bytes: &[u8]) -> String {
2691    const HEX: &[u8; 16] = b"0123456789abcdef";
2692    let mut out = String::with_capacity(bytes.len() * 2);
2693    for &b in bytes {
2694        out.push(HEX[(b >> 4) as usize] as char);
2695        out.push(HEX[(b & 0xf) as usize] as char);
2696    }
2697    out
2698}
2699
2700fn hex_decode(text: &str) -> Result<Vec<u8>, EvalError> {
2701    let trimmed = text.trim();
2702    if trimmed.len() % 2 != 0 {
2703        return Err(EvalError::TypeMismatch {
2704            detail: "decode(hex): input length must be even".into(),
2705        });
2706    }
2707    let mut out = Vec::with_capacity(trimmed.len() / 2);
2708    let mut hi: u8 = 0;
2709    for (i, c) in trimmed.bytes().enumerate() {
2710        let v = match c {
2711            b'0'..=b'9' => c - b'0',
2712            b'a'..=b'f' => c - b'a' + 10,
2713            b'A'..=b'F' => c - b'A' + 10,
2714            _ => {
2715                return Err(EvalError::TypeMismatch {
2716                    detail: format!("decode(hex): invalid char {:?}", c as char),
2717                });
2718            }
2719        };
2720        if i % 2 == 0 {
2721            hi = v;
2722        } else {
2723            out.push((hi << 4) | v);
2724        }
2725    }
2726    Ok(out)
2727}
2728
2729/// `date_part(field_text, source)` — function form of `EXTRACT(field FROM
2730/// source)`. Same component dispatch (DATE / TIMESTAMP / INTERVAL) and
2731/// same `BigInt` return shape; PG returns double precision but we keep the
2732/// integer convention so the runner's `query I` shape works unchanged.
2733fn date_part(args: &[Value]) -> Result<Value, EvalError> {
2734    use spg_sql::ast::ExtractField as F;
2735    if args.len() != 2 {
2736        return Err(EvalError::TypeMismatch {
2737            detail: format!("date_part() takes 2 args, got {}", args.len()),
2738        });
2739    }
2740    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
2741        return Ok(Value::Null);
2742    }
2743    let Value::Text(field_name) = &args[0] else {
2744        return Err(EvalError::TypeMismatch {
2745            detail: format!(
2746                "date_part() needs a text field, got {:?}",
2747                args[0].data_type()
2748            ),
2749        });
2750    };
2751    let field = match field_name.to_ascii_lowercase().as_str() {
2752        "year" => F::Year,
2753        "month" => F::Month,
2754        "day" => F::Day,
2755        "hour" => F::Hour,
2756        "minute" => F::Minute,
2757        "second" => F::Second,
2758        "microsecond" | "microseconds" => F::Microsecond,
2759        "epoch" => F::Epoch,
2760        other => {
2761            return Err(EvalError::TypeMismatch {
2762                detail: format!(
2763                    "unknown date_part field {other:?}; \
2764                     supported: year, month, day, hour, minute, second, microsecond"
2765                ),
2766            });
2767        }
2768    };
2769    extract_field(field, &args[1])
2770}
2771
2772/// `age(t1, t2)` — return `t1 - t2` as an INTERVAL. v2.12 produces a
2773/// micros-only interval (no months normalisation) because PG's
2774/// month-justification rule is sensitive to the day-of-month walk and
2775/// adds material complexity for marginal corpus value.
2776///
2777/// `age(t)` (single-arg form) is intentionally unsupported in v2.12:
2778/// the dispatcher errors instead of guessing a clock source. Callers
2779/// who want PG's `age(t)` semantics should write `age(CURRENT_DATE, t)`
2780/// explicitly so the clock reference is visible at the SQL layer.
2781fn age(args: &[Value]) -> Result<Value, EvalError> {
2782    if args.is_empty() || args.len() > 2 {
2783        return Err(EvalError::TypeMismatch {
2784            detail: format!("age() takes 1 or 2 args, got {}", args.len()),
2785        });
2786    }
2787    if args.iter().any(|v| matches!(v, Value::Null)) {
2788        return Ok(Value::Null);
2789    }
2790    // Coerce to TIMESTAMP micros — DATE lifts to midnight; TIMESTAMP
2791    // stays as-is; anything else errors.
2792    let to_micros = |v: &Value| -> Result<i64, EvalError> {
2793        match v {
2794            Value::Timestamp(t) => Ok(*t),
2795            Value::Date(d) => Ok(i64::from(*d) * 86_400_000_000),
2796            other => Err(EvalError::TypeMismatch {
2797                detail: format!("age() needs DATE or TIMESTAMP, got {:?}", other.data_type()),
2798            }),
2799        }
2800    };
2801    if args.len() == 1 {
2802        return Err(EvalError::TypeMismatch {
2803            detail: "single-arg age() is unsupported in v2.12 \
2804                     (use age(CURRENT_DATE, t) explicitly)"
2805                .into(),
2806        });
2807    }
2808    let a = to_micros(&args[0])?;
2809    let b = to_micros(&args[1])?;
2810    let delta = a.checked_sub(b).ok_or(EvalError::TypeMismatch {
2811        detail: "age() subtraction overflows i64 microseconds".into(),
2812    })?;
2813    Ok(Value::Interval {
2814        months: 0,
2815        micros: delta,
2816    })
2817}
2818
2819// `to_char(value, format)` — render a DATE / TIMESTAMP through a PG
2820// format template. Supports the high-traffic placeholders:
2821//   YYYY YY MM Mon Month DD HH24 HH12 MI SS MS US AM PM
2822// Unrecognised characters pass through literally so the template's
2823// punctuation ('-', ':', ' ', '/') needs no escape mechanism.
2824
2825// ─── v7.17.0 Phase 7 — INET / CIDR text helpers ───────────────────────
2826//
2827// SPG stores network address types as Text. The host() / network() /
2828// masklen() helpers parse the textual `addr[/mask]` form and return
2829// the constituent pieces, matching PG's contract for the dominant
2830// customer surface (Django ORM / Rails ORM normalisation).
2831
2832fn inet_host(args: &[Value]) -> Result<Value, EvalError> {
2833    let s = match args {
2834        [Value::Text(s)] => s.clone(),
2835        [Value::Null] => return Ok(Value::Null),
2836        _ => {
2837            return Err(EvalError::TypeMismatch {
2838                detail: alloc::format!("host() takes one TEXT arg, got {} args", args.len()),
2839            });
2840        }
2841    };
2842    let host = s.split('/').next().unwrap_or("").to_string();
2843    Ok(Value::Text(host))
2844}
2845
2846fn inet_network(args: &[Value]) -> Result<Value, EvalError> {
2847    let s = match args {
2848        [Value::Text(s)] => s.clone(),
2849        [Value::Null] => return Ok(Value::Null),
2850        _ => {
2851            return Err(EvalError::TypeMismatch {
2852                detail: alloc::format!("network() takes one TEXT arg, got {} args", args.len()),
2853            });
2854        }
2855    };
2856    // For a `host/mask` form return the masked-network address.
2857    // SPG ships the simple "drop trailing octets per byte" path
2858    // for IPv4; full bit-level masking is out of v7.17 scope.
2859    let mut split = s.splitn(2, '/');
2860    let host = split.next().unwrap_or("").to_string();
2861    let mask: u32 = split.next().and_then(|m| m.parse().ok()).unwrap_or(32);
2862    if !host.contains('.') {
2863        // IPv6 / MACADDR — return the input unmasked.
2864        return Ok(Value::Text(s));
2865    }
2866    let octets: Vec<&str> = host.split('.').collect();
2867    if octets.len() != 4 {
2868        return Ok(Value::Text(s));
2869    }
2870    let keep_bytes = ((mask + 7) / 8) as usize;
2871    let mut out = alloc::string::String::new();
2872    for (i, oct) in octets.iter().enumerate() {
2873        if i > 0 {
2874            out.push('.');
2875        }
2876        if i < keep_bytes {
2877            out.push_str(oct);
2878        } else {
2879            out.push('0');
2880        }
2881    }
2882    out.push('/');
2883    out.push_str(&mask.to_string());
2884    Ok(Value::Text(out))
2885}
2886
2887fn inet_masklen(args: &[Value]) -> Result<Value, EvalError> {
2888    let s = match args {
2889        [Value::Text(s)] => s.clone(),
2890        [Value::Null] => return Ok(Value::Null),
2891        _ => {
2892            return Err(EvalError::TypeMismatch {
2893                detail: alloc::format!("masklen() takes one TEXT arg, got {} args", args.len()),
2894            });
2895        }
2896    };
2897    let mask: i32 = s
2898        .split_once('/')
2899        .and_then(|(_, m)| m.parse().ok())
2900        .unwrap_or(32);
2901    Ok(Value::Int(mask))
2902}
2903
2904// ─── v7.17.0 Phase 3.P0-47 — INET / CIDR containment + overlap ────────
2905//
2906// SPG stores INET / CIDR as Text (Phase 7 design); these helpers parse
2907// the textual `addr[/mask]` form into a (family, bytes, prefix_bits)
2908// triple and implement PG's network-comparison operators on that
2909// representation.
2910//
2911// PG semantics:
2912//   * `<<`  — strictly contained-in (LHS ⊊ RHS)
2913//   * `<<=` — contained-in-or-equal (LHS ⊆ RHS)
2914//   * `>>`, `>>=` — mirrors of the above
2915//   * `&&`  — overlap (either LHS ⊆ RHS or RHS ⊆ LHS)
2916//
2917// NULL on either side → NULL (3VL). Mixed family (v4 vs v6) is never
2918// contained / never overlaps but is not an error — same as PG.
2919
2920/// Parsed inet network: address bytes (4 for v4, 16 for v6) and the
2921/// network prefix length in bits.
2922struct InetNet {
2923    bytes: [u8; 16],
2924    /// 4 for IPv4, 16 for IPv6.
2925    family_bytes: u8,
2926    /// 0..=32 for v4, 0..=128 for v6.
2927    prefix_bits: u8,
2928}
2929
2930fn parse_inet_text(s: &str) -> Option<InetNet> {
2931    let mut split = s.splitn(2, '/');
2932    let host = split.next()?;
2933    let mask_str = split.next();
2934    if host.contains(':') {
2935        let bytes = parse_ipv6(host)?;
2936        let prefix_bits = match mask_str {
2937            Some(m) => m.parse::<u8>().ok().filter(|&n| n <= 128)?,
2938            None => 128,
2939        };
2940        let mut out = [0u8; 16];
2941        out.copy_from_slice(&bytes);
2942        Some(InetNet {
2943            bytes: out,
2944            family_bytes: 16,
2945            prefix_bits,
2946        })
2947    } else {
2948        let bytes = parse_ipv4(host)?;
2949        let prefix_bits = match mask_str {
2950            Some(m) => m.parse::<u8>().ok().filter(|&n| n <= 32)?,
2951            None => 32,
2952        };
2953        let mut out = [0u8; 16];
2954        out[..4].copy_from_slice(&bytes);
2955        Some(InetNet {
2956            bytes: out,
2957            family_bytes: 4,
2958            prefix_bits,
2959        })
2960    }
2961}
2962
2963fn parse_ipv4(s: &str) -> Option<[u8; 4]> {
2964    let parts: Vec<&str> = s.split('.').collect();
2965    if parts.len() != 4 {
2966        return None;
2967    }
2968    let mut out = [0u8; 4];
2969    for (i, p) in parts.iter().enumerate() {
2970        out[i] = p.parse::<u8>().ok()?;
2971    }
2972    Some(out)
2973}
2974
2975fn parse_ipv6(s: &str) -> Option<[u8; 16]> {
2976    // Split on the `::` shorthand at most once.
2977    let (head, tail) = match s.find("::") {
2978        Some(idx) => (&s[..idx], Some(&s[idx + 2..])),
2979        None => (s, None),
2980    };
2981    let head_groups: Vec<&str> = if head.is_empty() {
2982        Vec::new()
2983    } else {
2984        head.split(':').collect()
2985    };
2986    let tail_groups: Vec<&str> = match tail {
2987        Some(t) if !t.is_empty() => t.split(':').collect(),
2988        _ => Vec::new(),
2989    };
2990    let head_len = head_groups.len();
2991    let tail_len = tail_groups.len();
2992    // Without `::` we need exactly 8 groups; with `::` we need ≤ 7.
2993    if tail.is_none() {
2994        if head_len != 8 {
2995            return None;
2996        }
2997    } else if head_len + tail_len > 7 {
2998        return None;
2999    }
3000    let mut words = [0u16; 8];
3001    for (i, g) in head_groups.iter().enumerate() {
3002        words[i] = u16::from_str_radix(g, 16).ok()?;
3003    }
3004    let tail_start = 8 - tail_len;
3005    for (i, g) in tail_groups.iter().enumerate() {
3006        words[tail_start + i] = u16::from_str_radix(g, 16).ok()?;
3007    }
3008    let mut out = [0u8; 16];
3009    for (i, w) in words.iter().enumerate() {
3010        out[i * 2] = (w >> 8) as u8;
3011        out[i * 2 + 1] = (w & 0xff) as u8;
3012    }
3013    Some(out)
3014}
3015
3016/// Compare the first `prefix_bits` bits of `a` and `b`. Returns true if
3017/// they match. `prefix_bits` must not exceed the family size.
3018fn network_prefix_eq(a: &InetNet, b: &InetNet, prefix_bits: u8) -> bool {
3019    let full_bytes = (prefix_bits / 8) as usize;
3020    if a.bytes[..full_bytes] != b.bytes[..full_bytes] {
3021        return false;
3022    }
3023    let extra = prefix_bits % 8;
3024    if extra == 0 {
3025        return true;
3026    }
3027    let mask: u8 = 0xff << (8 - extra);
3028    (a.bytes[full_bytes] & mask) == (b.bytes[full_bytes] & mask)
3029}
3030
3031/// True iff network `a` is fully contained in network `b` (a ⊆ b).
3032fn inet_contained_eq(a: &InetNet, b: &InetNet) -> bool {
3033    if a.family_bytes != b.family_bytes {
3034        return false;
3035    }
3036    if a.prefix_bits < b.prefix_bits {
3037        return false;
3038    }
3039    network_prefix_eq(a, b, b.prefix_bits)
3040}
3041
3042/// True iff a and b are exactly the same network (same family + same
3043/// prefix + same masked address).
3044fn inet_networks_equal(a: &InetNet, b: &InetNet) -> bool {
3045    if a.family_bytes != b.family_bytes {
3046        return false;
3047    }
3048    if a.prefix_bits != b.prefix_bits {
3049        return false;
3050    }
3051    network_prefix_eq(a, b, a.prefix_bits)
3052}
3053
3054fn inet_op_bool_result(op: BinOp, l: &Value, r: &Value) -> Result<Value, EvalError> {
3055    if matches!(l, Value::Null) || matches!(r, Value::Null) {
3056        return Ok(Value::Null);
3057    }
3058    let (lt, rt) = match (l, r) {
3059        (Value::Text(a), Value::Text(b)) => (a, b),
3060        _ => {
3061            return Err(EvalError::TypeMismatch {
3062                detail: format!(
3063                    "inet operator requires TEXT/INET operands, got {:?} and {:?}",
3064                    l.data_type(),
3065                    r.data_type()
3066                ),
3067            });
3068        }
3069    };
3070    let a = parse_inet_text(lt).ok_or_else(|| EvalError::TypeMismatch {
3071        detail: format!("invalid inet text: {:?}", lt),
3072    })?;
3073    let b = parse_inet_text(rt).ok_or_else(|| EvalError::TypeMismatch {
3074        detail: format!("invalid inet text: {:?}", rt),
3075    })?;
3076    let result = match op {
3077        BinOp::InetContainedByEq => inet_contained_eq(&a, &b),
3078        BinOp::InetContainedBy => inet_contained_eq(&a, &b) && !inet_networks_equal(&a, &b),
3079        BinOp::InetContainsEq => inet_contained_eq(&b, &a),
3080        BinOp::InetContains => inet_contained_eq(&b, &a) && !inet_networks_equal(&a, &b),
3081        BinOp::InetOverlap => inet_contained_eq(&a, &b) || inet_contained_eq(&b, &a),
3082        _ => unreachable!("inet_op_bool_result called with non-inet op"),
3083    };
3084    Ok(Value::Bool(result))
3085}
3086
3087// ─── v7.17.0 Phase 3.7 — minimal POSIX-ERE-shaped regex matcher ───────
3088//
3089// SPG-engine is `#![no_std]` and has no external regex dependency, so
3090// this module hand-implements the subset of PG's regex needed by the
3091// dominant customer patterns. Supported syntax:
3092//
3093//   * literal characters (with `\.`, `\*`, `\+`, `\?`, `\(`, `\)`,
3094//     `\[`, `\]`, `\\`, `\^`, `\$`, `\|` escapes)
3095//   * `.` — any single character
3096//   * `*`, `+`, `?` — greedy quantifiers
3097//   * character classes: `[abc]`, `[^abc]`, `[a-z0-9_]`
3098//   * shortcut classes: `\d` `\D` `\w` `\W` `\s` `\S`
3099//   * anchors `^` `$`
3100//   * non-capturing groups `(...)`
3101//   * alternation `|`
3102//
3103// NOT supported in v7.17 (errors clearly):
3104//   * backreferences `\1`
3105//   * lookaround `(?=…)` `(?<=…)`
3106//   * named captures
3107//   * inline flag groups `(?i)`
3108//   * lazy quantifiers `*?` `+?` `??` — patterns containing `?` after
3109//     a quantifier are accepted but interpreted as the greedy form
3110//     (this is the v7.17 stop-gap; customers needing lazy semantics
3111//     should preprocess the pattern)
3112//   * counted repetition `{n,m}`
3113//
3114// The matcher uses a backtracking NFA-shaped walk; performance is fine
3115// for the small strings PG regex functions usually operate on.
3116
3117#[derive(Debug, Clone)]
3118enum ReNode {
3119    /// Single literal byte. ASCII fast-path; non-ASCII falls through
3120    /// to Any since the engine doesn't decode UTF-8 here.
3121    Literal(char),
3122    /// Any single character.
3123    AnyChar,
3124    /// Character class: (positive members list, negated flag).
3125    Class {
3126        members: Vec<ClassMember>,
3127        negated: bool,
3128    },
3129    /// Anchor start.
3130    Start,
3131    /// Anchor end.
3132    End,
3133    /// Greedy quantifier.
3134    Quant {
3135        inner: Box<ReNode>,
3136        min: usize,
3137        max: Option<usize>,
3138    },
3139    /// Concatenation of sub-nodes.
3140    Concat(Vec<ReNode>),
3141    /// Alternation.
3142    Alt(Vec<ReNode>),
3143}
3144
3145#[derive(Debug, Clone)]
3146enum ClassMember {
3147    Single(char),
3148    Range(char, char),
3149}
3150
3151fn re_compile(pat: &str) -> Result<ReNode, EvalError> {
3152    let chars: Vec<char> = pat.chars().collect();
3153    let mut p = 0;
3154    let n = re_parse_alt(&chars, &mut p)?;
3155    if p != chars.len() {
3156        return Err(EvalError::TypeMismatch {
3157            detail: alloc::format!("regex compile: trailing chars at pos {p} in {pat:?}"),
3158        });
3159    }
3160    Ok(n)
3161}
3162
3163fn re_parse_alt(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3164    let mut branches = alloc::vec![re_parse_concat(chars, p)?];
3165    while *p < chars.len() && chars[*p] == '|' {
3166        *p += 1;
3167        branches.push(re_parse_concat(chars, p)?);
3168    }
3169    if branches.len() == 1 {
3170        Ok(branches.pop().unwrap())
3171    } else {
3172        Ok(ReNode::Alt(branches))
3173    }
3174}
3175
3176fn re_parse_concat(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3177    let mut items: Vec<ReNode> = Vec::new();
3178    while *p < chars.len() {
3179        let c = chars[*p];
3180        if c == '|' || c == ')' {
3181            break;
3182        }
3183        let atom = re_parse_atom(chars, p)?;
3184        // Optional quantifier suffix.
3185        let quantified = if *p < chars.len() {
3186            match chars[*p] {
3187                '*' => {
3188                    *p += 1;
3189                    // v7.17 stop-gap: tolerate `*?` lazy quantifier
3190                    // by treating it as greedy. Skip the trailing
3191                    // `?` if present.
3192                    if *p < chars.len() && chars[*p] == '?' {
3193                        *p += 1;
3194                    }
3195                    ReNode::Quant {
3196                        inner: Box::new(atom),
3197                        min: 0,
3198                        max: None,
3199                    }
3200                }
3201                '+' => {
3202                    *p += 1;
3203                    if *p < chars.len() && chars[*p] == '?' {
3204                        *p += 1;
3205                    }
3206                    ReNode::Quant {
3207                        inner: Box::new(atom),
3208                        min: 1,
3209                        max: None,
3210                    }
3211                }
3212                '?' => {
3213                    *p += 1;
3214                    ReNode::Quant {
3215                        inner: Box::new(atom),
3216                        min: 0,
3217                        max: Some(1),
3218                    }
3219                }
3220                _ => atom,
3221            }
3222        } else {
3223            atom
3224        };
3225        items.push(quantified);
3226    }
3227    if items.len() == 1 {
3228        Ok(items.pop().unwrap())
3229    } else {
3230        Ok(ReNode::Concat(items))
3231    }
3232}
3233
3234fn re_parse_atom(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3235    let c = chars[*p];
3236    match c {
3237        '(' => {
3238            *p += 1;
3239            let inner = re_parse_alt(chars, p)?;
3240            if *p >= chars.len() || chars[*p] != ')' {
3241                return Err(EvalError::TypeMismatch {
3242                    detail: "regex compile: unmatched '('".into(),
3243                });
3244            }
3245            *p += 1;
3246            Ok(inner)
3247        }
3248        '[' => {
3249            *p += 1;
3250            let mut negated = false;
3251            if *p < chars.len() && chars[*p] == '^' {
3252                negated = true;
3253                *p += 1;
3254            }
3255            let mut members: Vec<ClassMember> = Vec::new();
3256            while *p < chars.len() && chars[*p] != ']' {
3257                let start = chars[*p];
3258                *p += 1;
3259                if *p + 1 < chars.len() && chars[*p] == '-' && chars[*p + 1] != ']' {
3260                    let end = chars[*p + 1];
3261                    *p += 2;
3262                    members.push(ClassMember::Range(start, end));
3263                } else {
3264                    members.push(ClassMember::Single(start));
3265                }
3266            }
3267            if *p >= chars.len() {
3268                return Err(EvalError::TypeMismatch {
3269                    detail: "regex compile: unmatched '['".into(),
3270                });
3271            }
3272            *p += 1; // consume ]
3273            Ok(ReNode::Class { members, negated })
3274        }
3275        '.' => {
3276            *p += 1;
3277            Ok(ReNode::AnyChar)
3278        }
3279        '^' => {
3280            *p += 1;
3281            Ok(ReNode::Start)
3282        }
3283        '$' => {
3284            *p += 1;
3285            Ok(ReNode::End)
3286        }
3287        '\\' => {
3288            *p += 1;
3289            if *p >= chars.len() {
3290                return Err(EvalError::TypeMismatch {
3291                    detail: "regex compile: dangling backslash".into(),
3292                });
3293            }
3294            let esc = chars[*p];
3295            *p += 1;
3296            match esc {
3297                'd' => Ok(ReNode::Class {
3298                    members: alloc::vec![ClassMember::Range('0', '9')],
3299                    negated: false,
3300                }),
3301                'D' => Ok(ReNode::Class {
3302                    members: alloc::vec![ClassMember::Range('0', '9')],
3303                    negated: true,
3304                }),
3305                'w' => Ok(ReNode::Class {
3306                    members: alloc::vec![
3307                        ClassMember::Range('a', 'z'),
3308                        ClassMember::Range('A', 'Z'),
3309                        ClassMember::Range('0', '9'),
3310                        ClassMember::Single('_'),
3311                    ],
3312                    negated: false,
3313                }),
3314                'W' => Ok(ReNode::Class {
3315                    members: alloc::vec![
3316                        ClassMember::Range('a', 'z'),
3317                        ClassMember::Range('A', 'Z'),
3318                        ClassMember::Range('0', '9'),
3319                        ClassMember::Single('_'),
3320                    ],
3321                    negated: true,
3322                }),
3323                's' => Ok(ReNode::Class {
3324                    members: alloc::vec![
3325                        ClassMember::Single(' '),
3326                        ClassMember::Single('\t'),
3327                        ClassMember::Single('\n'),
3328                        ClassMember::Single('\r'),
3329                    ],
3330                    negated: false,
3331                }),
3332                'S' => Ok(ReNode::Class {
3333                    members: alloc::vec![
3334                        ClassMember::Single(' '),
3335                        ClassMember::Single('\t'),
3336                        ClassMember::Single('\n'),
3337                        ClassMember::Single('\r'),
3338                    ],
3339                    negated: true,
3340                }),
3341                other => Ok(ReNode::Literal(other)),
3342            }
3343        }
3344        other => {
3345            *p += 1;
3346            Ok(ReNode::Literal(other))
3347        }
3348    }
3349}
3350
3351fn class_matches(member: &ClassMember, c: char) -> bool {
3352    match member {
3353        ClassMember::Single(s) => *s == c,
3354        ClassMember::Range(a, b) => c >= *a && c <= *b,
3355    }
3356}
3357
3358/// Try to match `node` starting at `pos` in `s`. Returns Some(end)
3359/// of the matched span (exclusive), or None if no match. Greedy
3360/// backtracking: each quantifier tries the longest viable repeat
3361/// and shrinks if the tail doesn't fit.
3362fn re_match_at(node: &ReNode, s: &[char], pos: usize) -> Option<usize> {
3363    match node {
3364        ReNode::Literal(c) => {
3365            if s.get(pos).copied() == Some(*c) {
3366                Some(pos + 1)
3367            } else {
3368                None
3369            }
3370        }
3371        ReNode::AnyChar => {
3372            if pos < s.len() && s[pos] != '\n' {
3373                Some(pos + 1)
3374            } else {
3375                None
3376            }
3377        }
3378        ReNode::Class { members, negated } => {
3379            let c = *s.get(pos)?;
3380            let hit = members.iter().any(|m| class_matches(m, c));
3381            if hit ^ negated { Some(pos + 1) } else { None }
3382        }
3383        ReNode::Start => {
3384            if pos == 0 {
3385                Some(pos)
3386            } else {
3387                None
3388            }
3389        }
3390        ReNode::End => {
3391            if pos == s.len() {
3392                Some(pos)
3393            } else {
3394                None
3395            }
3396        }
3397        ReNode::Concat(items) => {
3398            let mut p = pos;
3399            for it in items {
3400                p = re_match_at(it, s, p)?;
3401            }
3402            Some(p)
3403        }
3404        ReNode::Alt(branches) => {
3405            for b in branches {
3406                if let Some(p) = re_match_at(b, s, pos) {
3407                    return Some(p);
3408                }
3409            }
3410            None
3411        }
3412        ReNode::Quant { inner, min, max } => {
3413            // Greedy: gather as many matches as possible, then
3414            // shrink. v7.17 stop-gap doesn't continue the outer
3415            // tail match (we're at a leaf in concat already), so
3416            // we just return the longest match.
3417            let mut count = 0usize;
3418            let mut p = pos;
3419            loop {
3420                if let Some(cap) = max {
3421                    if count >= *cap {
3422                        break;
3423                    }
3424                }
3425                match re_match_at(inner, s, p) {
3426                    Some(np) if np > p => {
3427                        p = np;
3428                        count += 1;
3429                    }
3430                    _ => break,
3431                }
3432            }
3433            if count < *min {
3434                return None;
3435            }
3436            Some(p)
3437        }
3438    }
3439}
3440
3441/// Find the first match of `node` in `s`, starting at or after
3442/// `from`. Returns the (start, end) char positions of the match.
3443fn re_find(node: &ReNode, s: &[char], from: usize) -> Option<(usize, usize)> {
3444    let mut start = from;
3445    loop {
3446        if let Some(end) = re_match_at(node, s, start) {
3447            return Some((start, end));
3448        }
3449        if start >= s.len() {
3450            return None;
3451        }
3452        start += 1;
3453    }
3454}
3455
3456/// v7.17.0 Phase 3.7 — `regexp_matches(s, pat)` returns the FIRST
3457/// match as a single-element TEXT[]. (PG returns one row per match
3458/// across all captures; SPG simplifies to first-match-only TEXT[].
3459/// The `g` flag form `regexp_matches(s, pat, 'g')` falls through
3460/// to all-matches concatenation as a flat array.)
3461fn regexp_matches(args: &[Value]) -> Result<Value, EvalError> {
3462    let (text, pat, all_matches) = match args.len() {
3463        2 => (text_arg(&args[0])?, text_arg(&args[1])?, false),
3464        3 => {
3465            let flags = text_arg(&args[2])?.unwrap_or_default();
3466            (
3467                text_arg(&args[0])?,
3468                text_arg(&args[1])?,
3469                flags.contains('g'),
3470            )
3471        }
3472        n => {
3473            return Err(EvalError::TypeMismatch {
3474                detail: alloc::format!("regexp_matches() takes 2 or 3 args, got {n}"),
3475            });
3476        }
3477    };
3478    let Some(text) = text else {
3479        return Ok(Value::Null);
3480    };
3481    let Some(pat) = pat else {
3482        return Ok(Value::Null);
3483    };
3484    let node = re_compile(&pat)?;
3485    let chars: Vec<char> = text.chars().collect();
3486    let mut out: Vec<Option<String>> = Vec::new();
3487    let mut from = 0usize;
3488    while let Some((s_pos, e_pos)) = re_find(&node, &chars, from) {
3489        out.push(Some(chars[s_pos..e_pos].iter().collect()));
3490        if !all_matches {
3491            break;
3492        }
3493        // Advance past the match; if zero-width, step one.
3494        from = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3495        if from > chars.len() {
3496            break;
3497        }
3498    }
3499    Ok(Value::TextArray(out))
3500}
3501
3502/// v7.17.0 Phase 3.7 — `regexp_replace(s, pat, repl[, flags])`.
3503/// `flags` containing `g` replaces all matches; absent flag
3504/// replaces only the first match (PG default).
3505fn regexp_replace(args: &[Value]) -> Result<Value, EvalError> {
3506    let (text, pat, repl, flags) = match args.len() {
3507        3 => (
3508            text_arg(&args[0])?,
3509            text_arg(&args[1])?,
3510            text_arg(&args[2])?,
3511            String::new(),
3512        ),
3513        4 => (
3514            text_arg(&args[0])?,
3515            text_arg(&args[1])?,
3516            text_arg(&args[2])?,
3517            text_arg(&args[3])?.unwrap_or_default(),
3518        ),
3519        n => {
3520            return Err(EvalError::TypeMismatch {
3521                detail: alloc::format!("regexp_replace() takes 3 or 4 args, got {n}"),
3522            });
3523        }
3524    };
3525    let Some(text) = text else {
3526        return Ok(Value::Null);
3527    };
3528    let Some(pat) = pat else {
3529        return Ok(Value::Null);
3530    };
3531    let Some(repl) = repl else {
3532        return Ok(Value::Null);
3533    };
3534    let global = flags.contains('g');
3535    let node = re_compile(&pat)?;
3536    let chars: Vec<char> = text.chars().collect();
3537    let mut out = String::with_capacity(text.len());
3538    let mut from = 0usize;
3539    loop {
3540        match re_find(&node, &chars, from) {
3541            Some((s_pos, e_pos)) => {
3542                out.extend(chars[from..s_pos].iter());
3543                out.push_str(&repl);
3544                let step = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3545                from = step;
3546                if !global {
3547                    if from <= chars.len() {
3548                        out.extend(chars[from..].iter());
3549                    }
3550                    return Ok(Value::Text(out));
3551                }
3552                if from > chars.len() {
3553                    break;
3554                }
3555            }
3556            None => {
3557                out.extend(chars[from..].iter());
3558                break;
3559            }
3560        }
3561    }
3562    Ok(Value::Text(out))
3563}
3564
3565/// v7.17.0 Phase 3.7 — `regexp_split_to_array(s, pat)`. Returns
3566/// TEXT[] of the pieces between matches.
3567fn regexp_split_to_array(args: &[Value]) -> Result<Value, EvalError> {
3568    if args.len() != 2 {
3569        return Err(EvalError::TypeMismatch {
3570            detail: alloc::format!("regexp_split_to_array() takes 2 args, got {}", args.len()),
3571        });
3572    }
3573    let text = text_arg(&args[0])?;
3574    let pat = text_arg(&args[1])?;
3575    let Some(text) = text else {
3576        return Ok(Value::Null);
3577    };
3578    let Some(pat) = pat else {
3579        return Ok(Value::Null);
3580    };
3581    let node = re_compile(&pat)?;
3582    let chars: Vec<char> = text.chars().collect();
3583    let mut out: Vec<Option<String>> = Vec::new();
3584    let mut piece_start = 0usize;
3585    let mut from = 0usize;
3586    loop {
3587        match re_find(&node, &chars, from) {
3588            Some((s_pos, e_pos)) => {
3589                let piece: String = chars[piece_start..s_pos].iter().collect();
3590                out.push(Some(piece));
3591                let step = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3592                from = step;
3593                piece_start = step;
3594                if from > chars.len() {
3595                    break;
3596                }
3597            }
3598            None => {
3599                let tail: String = chars[piece_start..].iter().collect();
3600                out.push(Some(tail));
3601                break;
3602            }
3603        }
3604    }
3605    Ok(Value::TextArray(out))
3606}
3607
3608/// Helper: coerce a Value to an Option<String> for regex args. NULL
3609/// propagates as None (caller short-circuits to Value::Null).
3610fn text_arg(v: &Value) -> Result<Option<String>, EvalError> {
3611    match v {
3612        Value::Text(s) => Ok(Some(s.clone())),
3613        Value::Null => Ok(None),
3614        other => Err(EvalError::TypeMismatch {
3615            detail: alloc::format!(
3616                "regex function expects TEXT arg, got {:?}",
3617                other.data_type()
3618            ),
3619        }),
3620    }
3621}
3622
3623// PG trim family: which side to strip.
3624#[derive(Debug, Clone, Copy)]
3625enum TrimSide {
3626    Left,
3627    Right,
3628    Both,
3629}
3630
3631/// PG `left(s, n)` / `right(s, n)` shared implementation. Both
3632/// support negative n which means "all but |n| chars from the
3633/// opposite side". n=0 → ''. Codepoint-counted. NULL → NULL.
3634fn string_left_right(args: &[Value], is_left: bool, fn_name: &str) -> Result<Value, EvalError> {
3635    if args.len() != 2 {
3636        return Err(EvalError::TypeMismatch {
3637            detail: alloc::format!("{fn_name}() takes 2 args, got {}", args.len()),
3638        });
3639    }
3640    if args.iter().any(|v| matches!(v, Value::Null)) {
3641        return Ok(Value::Null);
3642    }
3643    let s = value_to_format_text(&args[0]);
3644    let n = match &args[1] {
3645        Value::SmallInt(x) => i64::from(*x),
3646        Value::Int(x) => i64::from(*x),
3647        Value::BigInt(x) => *x,
3648        other => {
3649            return Err(EvalError::TypeMismatch {
3650                detail: alloc::format!(
3651                    "{fn_name}(): n must be integer, got {:?}",
3652                    other.data_type()
3653                ),
3654            });
3655        }
3656    };
3657    let chars: Vec<char> = s.chars().collect();
3658    let len = chars.len() as i64;
3659    if n == 0 {
3660        return Ok(Value::Text(String::new()));
3661    }
3662    let (start, end) = if is_left {
3663        if n > 0 {
3664            (0usize, (n.min(len)) as usize)
3665        } else {
3666            // left(s, -k) → drop last |k| chars; keep [0..len - k]
3667            let drop = (-n).min(len);
3668            (0usize, (len - drop) as usize)
3669        }
3670    } else if n > 0 {
3671        // right(s, k) → keep last k chars; start = max(0, len-k)
3672        let start = (len - n).max(0);
3673        (start as usize, len as usize)
3674    } else {
3675        // right(s, -k) → drop first |k| chars; keep [k..len]
3676        let drop = (-n).min(len);
3677        (drop as usize, len as usize)
3678    };
3679    if start >= end {
3680        return Ok(Value::Text(String::new()));
3681    }
3682    Ok(Value::Text(chars[start..end].iter().collect()))
3683}
3684
3685/// Compare two values for min/max selection. Returns Equal when
3686/// values are equal (including cross-numeric-width), Less when
3687/// a < b, Greater when a > b. NULL handling is upstream.
3688fn value_cmp_for_min_max(a: &Value, b: &Value) -> core::cmp::Ordering {
3689    use core::cmp::Ordering;
3690    // Integer-widen first (covers SmallInt vs Int vs BigInt).
3691    let a_int = match a {
3692        Value::SmallInt(x) => Some(i64::from(*x)),
3693        Value::Int(x) => Some(i64::from(*x)),
3694        Value::BigInt(x) => Some(*x),
3695        _ => None,
3696    };
3697    let b_int = match b {
3698        Value::SmallInt(x) => Some(i64::from(*x)),
3699        Value::Int(x) => Some(i64::from(*x)),
3700        Value::BigInt(x) => Some(*x),
3701        _ => None,
3702    };
3703    if let (Some(av), Some(bv)) = (a_int, b_int) {
3704        return av.cmp(&bv);
3705    }
3706    // Float-widen.
3707    let a_f = value_to_f64(a);
3708    let b_f = value_to_f64(b);
3709    if let (Some(av), Some(bv)) = (a_f, b_f) {
3710        return av.partial_cmp(&bv).unwrap_or(Ordering::Equal);
3711    }
3712    // Text/Text.
3713    match (a, b) {
3714        (Value::Text(av), Value::Text(bv)) => av.cmp(bv),
3715        (Value::Bytes(av), Value::Bytes(bv)) => av.cmp(bv),
3716        _ => Ordering::Equal,
3717    }
3718}
3719
3720fn value_to_f64(v: &Value) -> Option<f64> {
3721    match v {
3722        Value::Float(x) => Some(*x),
3723        Value::SmallInt(x) => Some(f64::from(*x)),
3724        Value::Int(x) => Some(f64::from(*x)),
3725        Value::BigInt(x) => Some(*x as f64),
3726        Value::Numeric { scaled, scale } => {
3727            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3728        }
3729        _ => None,
3730    }
3731}
3732
3733/// PG-style equality for nullif. Handles cross-numeric-width
3734/// comparison (Int vs BigInt vs SmallInt vs Float vs Numeric);
3735/// text matches text exactly; everything else uses derived
3736/// PartialEq.
3737fn values_equal_for_nullif(a: &Value, b: &Value) -> bool {
3738    // Same-type fast path.
3739    if a == b {
3740        return true;
3741    }
3742    // Cross-int widening: SmallInt / Int / BigInt all comparable.
3743    let a_int = match a {
3744        Value::SmallInt(x) => Some(i64::from(*x)),
3745        Value::Int(x) => Some(i64::from(*x)),
3746        Value::BigInt(x) => Some(*x),
3747        _ => None,
3748    };
3749    let b_int = match b {
3750        Value::SmallInt(x) => Some(i64::from(*x)),
3751        Value::Int(x) => Some(i64::from(*x)),
3752        Value::BigInt(x) => Some(*x),
3753        _ => None,
3754    };
3755    if let (Some(a), Some(b)) = (a_int, b_int) {
3756        return a == b;
3757    }
3758    // Float / Numeric: widen to f64.
3759    let a_f = match a {
3760        Value::Float(x) => Some(*x),
3761        Value::SmallInt(x) => Some(f64::from(*x)),
3762        Value::Int(x) => Some(f64::from(*x)),
3763        Value::BigInt(x) => Some(*x as f64),
3764        Value::Numeric { scaled, scale } => {
3765            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3766        }
3767        _ => None,
3768    };
3769    let b_f = match b {
3770        Value::Float(x) => Some(*x),
3771        Value::SmallInt(x) => Some(f64::from(*x)),
3772        Value::Int(x) => Some(f64::from(*x)),
3773        Value::BigInt(x) => Some(*x as f64),
3774        Value::Numeric { scaled, scale } => {
3775            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3776        }
3777        _ => None,
3778    };
3779    if let (Some(a), Some(b)) = (a_f, b_f) {
3780        return a == b;
3781    }
3782    false
3783}
3784
3785/// no_std-compatible `trunc(x)` for f64 — truncate toward zero.
3786/// `as i64 as f64` already truncates toward zero for the in-range
3787/// case; the |x| > 2^53 branch returns x verbatim because the f64
3788/// is already integer-precision.
3789fn f64_trunc(x: f64) -> f64 {
3790    if x.is_nan() || x.is_infinite() {
3791        return x;
3792    }
3793    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3794        return x;
3795    }
3796    (x as i64) as f64
3797}
3798
3799/// xorshift64* PRNG state — process-static seed advanced on
3800/// every `random()` call. Not cryptographically secure; use
3801/// `gen_random_uuid` / future crypto-RNG functions when
3802/// security matters.
3803static PRNG_STATE: core::sync::atomic::AtomicU64 =
3804    core::sync::atomic::AtomicU64::new(0x2545_F491_4F6C_DD1D);
3805
3806/// Advance the PRNG and return the raw next 64-bit state.
3807/// Shared between `random()` and `gen_random_uuid()`. The CAS
3808/// loop guarantees concurrent callers each see a distinct value
3809/// — important for `gen_random_uuid` collision freedom under
3810/// concurrent INSERTs.
3811fn prng_next_u64() -> u64 {
3812    use core::sync::atomic::Ordering;
3813    let mut x = PRNG_STATE.load(Ordering::Relaxed);
3814    loop {
3815        if x == 0 {
3816            x = 0x2545_F491_4F6C_DD1D;
3817        }
3818        let mut next = x;
3819        next ^= next << 13;
3820        next ^= next >> 7;
3821        next ^= next << 17;
3822        match PRNG_STATE.compare_exchange_weak(x, next, Ordering::Relaxed, Ordering::Relaxed) {
3823            Ok(_) => return next,
3824            Err(seen) => x = seen,
3825        }
3826    }
3827}
3828
3829/// Advance the PRNG and return a uniform double in [0, 1).
3830fn prng_next_f64() -> f64 {
3831    // 53 bits of randomness mapped to [0, 1).
3832    let mantissa = prng_next_u64() >> 11;
3833    let denom = (1u64 << 53) as f64;
3834    mantissa as f64 / denom
3835}
3836
3837/// v7.17.0 — generate a RFC 4122 v4 (random) UUID. Layout: 16
3838/// random bytes with the version nibble (high nibble of byte 6)
3839/// pinned to `0100` (= 4) and the variant top bits (high two bits
3840/// of byte 8) pinned to `10` — exactly what PG's
3841/// `gen_random_uuid()` and the historical uuid-ossp
3842/// `uuid_generate_v4()` produce.
3843pub fn gen_random_uuid_bytes() -> [u8; 16] {
3844    let mut out = [0u8; 16];
3845    let hi = prng_next_u64().to_be_bytes();
3846    let lo = prng_next_u64().to_be_bytes();
3847    out[..8].copy_from_slice(&hi);
3848    out[8..].copy_from_slice(&lo);
3849    // Version 4: top nibble of byte 6 must be 0100.
3850    out[6] = (out[6] & 0x0f) | 0x40;
3851    // Variant 1 (RFC 4122): top two bits of byte 8 must be 10.
3852    out[8] = (out[8] & 0x3f) | 0x80;
3853    out
3854}
3855
3856/// no_std `f64::sqrt(x)` — square root via Newton's method
3857/// (Babylonian). Gives EXACT results for perfect squares
3858/// because the iteration converges to bit-exact precision in
3859/// floating-point. x must be non-negative (caller's contract).
3860fn f64_sqrt(x: f64) -> f64 {
3861    if x == 0.0 || x.is_nan() {
3862        return x;
3863    }
3864    if x.is_infinite() {
3865        return x;
3866    }
3867    // Initial guess via bit manipulation of the exponent: divide
3868    // the exponent by 2. Avoids needing a logarithm for the
3869    // seed and converges in ~5 iterations.
3870    let bits = x.to_bits();
3871    let exp = ((bits >> 52) & 0x7ff) as i64 - 1023;
3872    let new_exp = (exp / 2) + 1023;
3873    let mut guess = f64::from_bits(((new_exp as u64) & 0x7ff) << 52);
3874    // 5 Newton iterations are MORE than enough for f64 precision.
3875    for _ in 0..8 {
3876        guess = 0.5 * (guess + x / guess);
3877    }
3878    guess
3879}
3880
3881/// no_std `f64::exp(x)` — e^x via range-reduction + Taylor
3882/// series. Adequate for power(), exp(), and pseudo-random-ish
3883/// scales the engine uses; ~1e-12 relative error in the
3884/// common range.
3885fn f64_exp(x: f64) -> f64 {
3886    if x.is_nan() {
3887        return x;
3888    }
3889    if x > 709.0 {
3890        return f64::INFINITY;
3891    }
3892    if x < -745.0 {
3893        return 0.0;
3894    }
3895    // exp(x) = 2^k * exp(r) where r = x - k*ln(2), |r| <= ln(2)/2.
3896    const LN2: f64 = 0.6931471805599453;
3897    let k = f64_round_half_away(x / LN2) as i32;
3898    let r = x - (k as f64) * LN2;
3899    // Taylor series for exp(r): sum r^n / n!  (rapid for |r|<0.35)
3900    let mut term = 1.0;
3901    let mut sum = 1.0;
3902    for n in 1..=20 {
3903        term *= r / (n as f64);
3904        sum += term;
3905        if term.abs() < 1e-18 {
3906            break;
3907        }
3908    }
3909    // Multiply by 2^k.
3910    f64_powi(2.0, k) * sum
3911}
3912
3913/// no_std `f64::ln(x)` — natural log via range-reduction +
3914/// atanh series. x must be positive (caller's contract).
3915fn f64_ln(x: f64) -> f64 {
3916    if x <= 0.0 {
3917        return f64::NAN;
3918    }
3919    if x == 1.0 {
3920        return 0.0;
3921    }
3922    // x = 2^k * m where m in [0.5, 1.0). Then ln(x) = k*ln(2) + ln(m).
3923    const LN2: f64 = 0.6931471805599453;
3924    let mut k = 0i32;
3925    let mut m = x;
3926    while m >= 2.0 {
3927        m *= 0.5;
3928        k += 1;
3929    }
3930    while m < 1.0 {
3931        m *= 2.0;
3932        k -= 1;
3933    }
3934    // Now m in [1.0, 2.0). Use atanh series via u = (m-1)/(m+1).
3935    // ln(m) = 2*(u + u^3/3 + u^5/5 + ...). Converges fast.
3936    let u = (m - 1.0) / (m + 1.0);
3937    let u2 = u * u;
3938    let mut term = u;
3939    let mut sum = u;
3940    for k_iter in 1..50 {
3941        term *= u2;
3942        let denom = (2 * k_iter + 1) as f64;
3943        sum += term / denom;
3944        if (term / denom).abs() < 1e-18 {
3945            break;
3946        }
3947    }
3948    2.0 * sum + (k as f64) * LN2
3949}
3950
3951/// no_std `f64::powi` substitute — integer exponent for f64
3952/// base. Uses repeated multiplication; correct for the small
3953/// exponents the rounding / cast code uses (scale up to ±38).
3954fn f64_powi(base: f64, exp: i32) -> f64 {
3955    if exp == 0 {
3956        return 1.0;
3957    }
3958    let mut result = 1.0;
3959    let mut b = if exp > 0 { base } else { 1.0 / base };
3960    let mut e = exp.unsigned_abs();
3961    while e > 0 {
3962        if e & 1 == 1 {
3963            result *= b;
3964        }
3965        e >>= 1;
3966        if e > 0 {
3967            b *= b;
3968        }
3969    }
3970    result
3971}
3972
3973/// no_std-compatible `round(x)` for f64 with half-away-from-zero
3974/// rule (PG NUMERIC semantic — NOT banker's rounding).
3975fn f64_round_half_away(x: f64) -> f64 {
3976    if x.is_nan() || x.is_infinite() {
3977        return x;
3978    }
3979    if x >= 0.0 {
3980        f64_floor(x + 0.5)
3981    } else {
3982        f64_ceil(x - 0.5)
3983    }
3984}
3985
3986/// no_std-compatible `ceil(x)` for f64. Same shape as
3987/// `f64_floor` but rounds toward +infinity for fractional
3988/// values. Negative fractions round toward zero
3989/// (ceil(-1.5) → -1, NOT -2).
3990fn f64_ceil(x: f64) -> f64 {
3991    if x.is_nan() || x.is_infinite() {
3992        return x;
3993    }
3994    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3995        return x;
3996    }
3997    let trunc = (x as i64) as f64;
3998    if x > 0.0 && x != trunc {
3999        trunc + 1.0
4000    } else {
4001        trunc
4002    }
4003}
4004
4005/// no_std-compatible `floor(x)` for f64. SPG's engine is
4006/// `#![no_std]` and can't call `f64::floor` directly (libm).
4007/// This handles the floor semantic manually:
4008///   * NaN / Inf passthrough.
4009///   * Values outside i64 range are already integer-precision.
4010///   * Negative non-integers floor toward -infinity (the
4011///     critical PG-canonical semantic).
4012fn f64_floor(x: f64) -> f64 {
4013    if x.is_nan() || x.is_infinite() {
4014        return x;
4015    }
4016    // f64 representation: any value with |x| > 2^53 is integer
4017    // precision (mantissa is 52 bits), so floor is identity.
4018    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
4019        return x;
4020    }
4021    let trunc = (x as i64) as f64;
4022    if x < 0.0 && x != trunc {
4023        trunc - 1.0
4024    } else {
4025        trunc
4026    }
4027}
4028
4029/// PG `lpad` / `rpad` shared implementation. Length is the
4030/// target codepoint count. When the input is longer than `length`,
4031/// truncate keeping the LEFT side (both lpad and rpad agree with
4032/// PG here). When shorter, pad with `fill` (default SPACE) cycling
4033/// for multi-char fills, on the appropriate side. Empty fill +
4034/// needs padding → returns input verbatim (potentially
4035/// truncated). NULL on any arg → NULL.
4036fn string_pad(args: &[Value], is_left: bool, fn_name: &str) -> Result<Value, EvalError> {
4037    if args.len() != 2 && args.len() != 3 {
4038        return Err(EvalError::TypeMismatch {
4039            detail: alloc::format!("{fn_name}() takes 2 or 3 args, got {}", args.len()),
4040        });
4041    }
4042    if args.iter().any(|v| matches!(v, Value::Null)) {
4043        return Ok(Value::Null);
4044    }
4045    let s = value_to_format_text(&args[0]);
4046    let target = match &args[1] {
4047        Value::SmallInt(x) => i64::from(*x),
4048        Value::Int(x) => i64::from(*x),
4049        Value::BigInt(x) => *x,
4050        other => {
4051            return Err(EvalError::TypeMismatch {
4052                detail: alloc::format!(
4053                    "{fn_name}(): length must be integer, got {:?}",
4054                    other.data_type()
4055                ),
4056            });
4057        }
4058    };
4059    let fill = if args.len() == 3 {
4060        value_to_format_text(&args[2])
4061    } else {
4062        String::from(" ")
4063    };
4064    if target <= 0 {
4065        return Ok(Value::Text(String::new()));
4066    }
4067    let target = target as usize;
4068    let s_chars: Vec<char> = s.chars().collect();
4069    if s_chars.len() >= target {
4070        // Truncate from the right (PG keeps LEFT side for both
4071        // lpad and rpad).
4072        return Ok(Value::Text(s_chars[..target].iter().collect()));
4073    }
4074    if fill.is_empty() {
4075        return Ok(Value::Text(s));
4076    }
4077    let pad_needed = target - s_chars.len();
4078    let fill_chars: Vec<char> = fill.chars().collect();
4079    let mut padding = String::with_capacity(pad_needed * 4);
4080    for i in 0..pad_needed {
4081        padding.push(fill_chars[i % fill_chars.len()]);
4082    }
4083    if is_left {
4084        Ok(Value::Text(padding + &s))
4085    } else {
4086        Ok(Value::Text(s + &padding))
4087    }
4088}
4089
4090/// PG `trim` / `ltrim` / `rtrim` / `btrim` shared implementation.
4091/// Accepts 1 or 2 args; coerces both to text via the standard
4092/// `value_to_format_text` helper; treats the chars arg as a SET
4093/// of UTF-8 codepoints (not a substring). NULL on either arg
4094/// poisons the result.
4095fn string_trim(args: &[Value], side: TrimSide, fn_name: &str) -> Result<Value, EvalError> {
4096    let (input, chars_str) = match args {
4097        [v] => (v.clone(), String::from(" ")),
4098        [v, c] => (v.clone(), {
4099            // NULL chars poisons.
4100            if matches!(c, Value::Null) {
4101                return Ok(Value::Null);
4102            }
4103            value_to_format_text(c)
4104        }),
4105        _ => {
4106            return Err(EvalError::TypeMismatch {
4107                detail: alloc::format!("{fn_name}() takes 1 or 2 args, got {}", args.len()),
4108            });
4109        }
4110    };
4111    if matches!(input, Value::Null) {
4112        return Ok(Value::Null);
4113    }
4114    let s = value_to_format_text(&input);
4115    let charset: alloc::collections::BTreeSet<char> = chars_str.chars().collect();
4116    let chars: Vec<char> = s.chars().collect();
4117    let mut start = 0usize;
4118    let mut end = chars.len();
4119    if matches!(side, TrimSide::Left | TrimSide::Both) {
4120        while start < end && charset.contains(&chars[start]) {
4121            start += 1;
4122        }
4123    }
4124    if matches!(side, TrimSide::Right | TrimSide::Both) {
4125        while end > start && charset.contains(&chars[end - 1]) {
4126            end -= 1;
4127        }
4128    }
4129    Ok(Value::Text(chars[start..end].iter().collect()))
4130}
4131
4132/// v7.17.0 Phase 3.8 — PG `format(fmtstr, args…)` with
4133/// sprintf-style conversion specifiers. Subset covered:
4134///   * `%s` — text rendering of the arg
4135///   * `%I` — quoted SQL identifier (always double-quoted; embedded
4136///     `"` doubled per SQL grammar)
4137///   * `%L` — quoted SQL literal (single-quoted; embedded `'`
4138///     doubled; NULL → literal `NULL`)
4139///   * `%%` — literal `%`
4140///   * `%n$X` — argument position (1-based) before the specifier
4141///     character (e.g. `%2$s` picks the 2nd arg)
4142fn format_string(args: &[Value]) -> Result<Value, EvalError> {
4143    if args.is_empty() {
4144        return Err(EvalError::TypeMismatch {
4145            detail: "format() takes at least 1 arg (format string)".into(),
4146        });
4147    }
4148    let fmt = match &args[0] {
4149        Value::Text(s) => s.clone(),
4150        Value::Null => return Ok(Value::Null),
4151        other => {
4152            return Err(EvalError::TypeMismatch {
4153                detail: format!(
4154                    "format(): first arg must be text, got {:?}",
4155                    other.data_type()
4156                ),
4157            });
4158        }
4159    };
4160    let arg_values = &args[1..];
4161    let mut out = String::new();
4162    let mut chars = fmt.chars().peekable();
4163    // Position cursor — next implicit arg picked when no `n$`
4164    // prefix is given. PG's format uses a 1-based cursor that
4165    // advances on each implicit-position spec.
4166    let mut implicit_cursor: usize = 0;
4167    while let Some(c) = chars.next() {
4168        if c != '%' {
4169            out.push(c);
4170            continue;
4171        }
4172        // Parse optional `n$` position prefix.
4173        let mut explicit_pos: Option<usize> = None;
4174        // Buffer the digits so we can roll back if no `$` follows.
4175        let mut digit_buf = String::new();
4176        while let Some(&d) = chars.peek() {
4177            if d.is_ascii_digit() {
4178                digit_buf.push(d);
4179                chars.next();
4180            } else {
4181                break;
4182            }
4183        }
4184        if !digit_buf.is_empty() && matches!(chars.peek(), Some(&'$')) {
4185            chars.next(); // consume `$`
4186            explicit_pos =
4187                Some(
4188                    digit_buf
4189                        .parse::<usize>()
4190                        .map_err(|_| EvalError::TypeMismatch {
4191                            detail: format!("format(): invalid arg position {digit_buf:?}"),
4192                        })?,
4193                );
4194            digit_buf.clear();
4195        }
4196        // Specifier character.
4197        let spec = match chars.next() {
4198            Some(c) => c,
4199            None => {
4200                return Err(EvalError::TypeMismatch {
4201                    detail: "format(): trailing `%` with no specifier".into(),
4202                });
4203            }
4204        };
4205        // Anything left in digit_buf (no `$`) was actually
4206        // pre-spec digits we now have to emit verbatim. PG would
4207        // treat them as width hint; v7.17 doesn't implement
4208        // width, but we don't want to silently drop the digits.
4209        // Strategy: ignore width for now and emit just the
4210        // converted value.
4211        let _ = digit_buf;
4212        if spec == '%' {
4213            out.push('%');
4214            continue;
4215        }
4216        let arg_index = match explicit_pos {
4217            Some(p) => p.saturating_sub(1),
4218            None => {
4219                let i = implicit_cursor;
4220                implicit_cursor += 1;
4221                i
4222            }
4223        };
4224        let arg = arg_values.get(arg_index).cloned().unwrap_or(Value::Null);
4225        match spec {
4226            's' => match arg {
4227                Value::Null => {} // PG: NULL renders as empty for %s.
4228                v => out.push_str(&value_to_format_text(&v)),
4229            },
4230            'I' => match arg {
4231                Value::Null => {
4232                    return Err(EvalError::TypeMismatch {
4233                        detail: "format(): NULL is not a valid identifier (%I)".into(),
4234                    });
4235                }
4236                v => {
4237                    let s = value_to_format_text(&v);
4238                    out.push('"');
4239                    for ch in s.chars() {
4240                        if ch == '"' {
4241                            out.push('"');
4242                            out.push('"');
4243                        } else {
4244                            out.push(ch);
4245                        }
4246                    }
4247                    out.push('"');
4248                }
4249            },
4250            'L' => match arg {
4251                Value::Null => out.push_str("NULL"),
4252                v => {
4253                    let s = value_to_format_text(&v);
4254                    out.push('\'');
4255                    for ch in s.chars() {
4256                        if ch == '\'' {
4257                            out.push('\'');
4258                            out.push('\'');
4259                        } else {
4260                            out.push(ch);
4261                        }
4262                    }
4263                    out.push('\'');
4264                }
4265            },
4266            other => {
4267                return Err(EvalError::TypeMismatch {
4268                    detail: format!(
4269                        "format(): unknown specifier '%{other}' \
4270                         (v7.17 supports %s %I %L %%)"
4271                    ),
4272                });
4273            }
4274        }
4275    }
4276    Ok(Value::Text(out))
4277}
4278
4279/// Helper: render a Value as text for format()'s %s / %I / %L
4280/// payload. Reuses the regular text-coercion table.
4281/// v7.17.0 Phase 3.P0-31 — map a `Value` to the canonical PG
4282/// type-name string returned by `pg_typeof`. Lowercase, matches
4283/// what real PostgreSQL emits (NOT SPG's UPPERCASE Display shape).
4284fn pg_typeof_name(v: &Value) -> &'static str {
4285    match v {
4286        Value::SmallInt(_) => "smallint",
4287        Value::Int(_) => "integer",
4288        Value::BigInt(_) => "bigint",
4289        Value::Float(_) => "double precision",
4290        Value::Text(_) => "text",
4291        Value::Bool(_) => "boolean",
4292        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => "vector",
4293        Value::Numeric { .. } => "numeric",
4294        Value::Date(_) => "date",
4295        Value::Timestamp(_) => "timestamp without time zone",
4296        Value::Interval { .. } => "interval",
4297        Value::Json(_) => {
4298            // SPG carries JSON and JSONB in the same Value::Json
4299            // variant; without a column ty hint we cannot tell
4300            // them apart at value level. Return "json" as the
4301            // conservative answer (PG's pg_typeof on a literal
4302            // `'{}'::json` returns "json"; the jsonb case is
4303            // covered when an explicit ::jsonb cast lands as
4304            // Value::Json too — see below override at call site).
4305            //
4306            // The eval-arm above for pg_typeof handles the
4307            // disambiguation via Expr-shape probing.
4308            "json"
4309        }
4310        Value::Bytes(_) => "bytea",
4311        Value::TextArray(_) => "text[]",
4312        Value::IntArray(_) => "integer[]",
4313        Value::BigIntArray(_) => "bigint[]",
4314        Value::TsVector(_) => "tsvector",
4315        Value::TsQuery(_) => "tsquery",
4316        Value::Uuid(_) => "uuid",
4317        Value::Null => "unknown",
4318        // Value is #[non_exhaustive]; future variants land here
4319        // until the table is updated.
4320        _ => "unknown",
4321    }
4322}
4323
4324fn value_to_format_text(v: &Value) -> String {
4325    match v {
4326        Value::Text(s) | Value::Json(s) => s.clone(),
4327        Value::SmallInt(n) => n.to_string(),
4328        Value::Int(n) => n.to_string(),
4329        Value::BigInt(n) => n.to_string(),
4330        Value::Float(x) => format!("{x}"),
4331        Value::Bool(b) => {
4332            if *b {
4333                "t".into()
4334            } else {
4335                "f".into()
4336            }
4337        }
4338        Value::Null => String::new(),
4339        other => format!("{other:?}"),
4340    }
4341}
4342
4343fn to_char(args: &[Value]) -> Result<Value, EvalError> {
4344    use core::fmt::Write as _;
4345    if args.len() != 2 {
4346        return Err(EvalError::TypeMismatch {
4347            detail: format!("to_char() takes 2 args, got {}", args.len()),
4348        });
4349    }
4350    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4351        return Ok(Value::Null);
4352    }
4353    let Value::Text(fmt) = &args[1] else {
4354        return Err(EvalError::TypeMismatch {
4355            detail: format!(
4356                "to_char() needs a text format, got {:?}",
4357                args[1].data_type()
4358            ),
4359        });
4360    };
4361    let (days, day_micros) = match &args[0] {
4362        Value::Date(d) => (*d, 0_i64),
4363        Value::Timestamp(t) => {
4364            let days = t.div_euclid(86_400_000_000);
4365            (
4366                i32::try_from(days).unwrap_or(i32::MAX),
4367                t.rem_euclid(86_400_000_000),
4368            )
4369        }
4370        other => {
4371            return Err(EvalError::TypeMismatch {
4372                detail: format!(
4373                    "to_char() needs DATE or TIMESTAMP, got {:?}",
4374                    other.data_type()
4375                ),
4376            });
4377        }
4378    };
4379    let (y, mo, d) = civil_from_days(days);
4380    let secs = day_micros / 1_000_000;
4381    let frac = day_micros % 1_000_000;
4382    // div_euclid keeps every value non-negative — the casts below are
4383    // sign-safe by construction. `secs ∈ [0, 86400)`, `frac ∈ [0,
4384    // 1_000_000)`, so all three quantities fit in u32.
4385    let hh24 = u32::try_from(secs / 3600).unwrap_or(0);
4386    let mi = u32::try_from((secs / 60) % 60).unwrap_or(0);
4387    let ss = u32::try_from(secs % 60).unwrap_or(0);
4388    let hh12 = match hh24 % 12 {
4389        0 => 12,
4390        x => x,
4391    };
4392    let ampm = if hh24 < 12 { "AM" } else { "PM" };
4393    let ms = u32::try_from(frac / 1_000).unwrap_or(0); // millisecond
4394    let us = u32::try_from(frac).unwrap_or(0); // microsecond (0..1_000_000)
4395
4396    let mut out = String::with_capacity(fmt.len() + 8);
4397    let bytes = fmt.as_bytes();
4398    let mut i = 0;
4399    // write! against a String never fails — discard the Result.
4400    while i < bytes.len() {
4401        // Try the longest prefixes first so "YYYY" wins over "YY".
4402        let rest = &bytes[i..];
4403        if rest.starts_with(b"YYYY") {
4404            let _ = write!(out, "{y:04}");
4405            i += 4;
4406        } else if rest.starts_with(b"YY") {
4407            #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
4408            let yy = (y.rem_euclid(100)) as u32;
4409            let _ = write!(out, "{yy:02}");
4410            i += 2;
4411        } else if rest.starts_with(b"Month") {
4412            out.push_str(MONTH_FULL[(mo - 1) as usize]);
4413            i += 5;
4414        } else if rest.starts_with(b"Mon") {
4415            out.push_str(MONTH_ABBR[(mo - 1) as usize]);
4416            i += 3;
4417        } else if rest.starts_with(b"MM") {
4418            let _ = write!(out, "{mo:02}");
4419            i += 2;
4420        } else if rest.starts_with(b"DD") {
4421            let _ = write!(out, "{d:02}");
4422            i += 2;
4423        } else if rest.starts_with(b"HH24") {
4424            let _ = write!(out, "{hh24:02}");
4425            i += 4;
4426        } else if rest.starts_with(b"HH12") {
4427            let _ = write!(out, "{hh12:02}");
4428            i += 4;
4429        } else if rest.starts_with(b"MI") {
4430            let _ = write!(out, "{mi:02}");
4431            i += 2;
4432        } else if rest.starts_with(b"SS") {
4433            let _ = write!(out, "{ss:02}");
4434            i += 2;
4435        } else if rest.starts_with(b"MS") {
4436            let _ = write!(out, "{ms:03}");
4437            i += 2;
4438        } else if rest.starts_with(b"US") {
4439            let _ = write!(out, "{us:06}");
4440            i += 2;
4441        } else if rest.starts_with(b"AM") || rest.starts_with(b"PM") {
4442            out.push_str(ampm);
4443            i += 2;
4444        } else {
4445            // Pass any non-placeholder byte through verbatim.
4446            out.push(bytes[i] as char);
4447            i += 1;
4448        }
4449    }
4450    Ok(Value::Text(out))
4451}
4452
4453const MONTH_FULL: [&str; 12] = [
4454    "January",
4455    "February",
4456    "March",
4457    "April",
4458    "May",
4459    "June",
4460    "July",
4461    "August",
4462    "September",
4463    "October",
4464    "November",
4465    "December",
4466];
4467const MONTH_ABBR: [&str; 12] = [
4468    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
4469];
4470
4471/// v7.17.0 Phase 3.P0-29 — MySQL `DATE_FORMAT(t, fmt)`.
4472///
4473/// Format tokens (MySQL 8.0 surface):
4474///   * `%Y` — 4-digit year  `%y` — 2-digit year
4475///   * `%m` — 01-12 month   `%c` — 1-12 month (no zero pad)
4476///   * `%d` — 01-31 day     `%e` — 1-31 day (no zero pad)
4477///   * `%H` — 00-23 hour    `%h` / `%I` — 01-12 hour
4478///   * `%i` — 00-59 MINUTE (NB: `%M` is month name in MySQL — easy
4479///     footgun if we mirror PG's `to_char` tokens by accident)
4480///   * `%s` / `%S` — 00-59 second
4481///   * `%f` — 000000-999999 microseconds (always 6 digits)
4482///   * `%p` — AM / PM
4483///   * `%M` — January-December (full month name)
4484///   * `%b` — Jan-Dec (abbreviated month name)
4485///   * `%%` — literal `%`
4486///
4487/// Unknown `%X` tokens pass through verbatim (MySQL emits the `%`
4488/// then the unknown letter).
4489fn date_format_mysql(args: &[Value]) -> Result<Value, EvalError> {
4490    use core::fmt::Write as _;
4491    if args.len() != 2 {
4492        return Err(EvalError::TypeMismatch {
4493            detail: format!("date_format() takes 2 args, got {}", args.len()),
4494        });
4495    }
4496    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4497        return Ok(Value::Null);
4498    }
4499    let Value::Text(fmt) = &args[1] else {
4500        return Err(EvalError::TypeMismatch {
4501            detail: format!(
4502                "date_format() needs a text format, got {:?}",
4503                args[1].data_type()
4504            ),
4505        });
4506    };
4507    let (days, day_micros) = match &args[0] {
4508        Value::Date(d) => (*d, 0_i64),
4509        Value::Timestamp(t) => {
4510            let days = t.div_euclid(86_400_000_000);
4511            (
4512                i32::try_from(days).unwrap_or(i32::MAX),
4513                t.rem_euclid(86_400_000_000),
4514            )
4515        }
4516        other => {
4517            return Err(EvalError::TypeMismatch {
4518                detail: format!(
4519                    "date_format() needs DATE or TIMESTAMP, got {:?}",
4520                    other.data_type()
4521                ),
4522            });
4523        }
4524    };
4525    let (y, mo, d) = civil_from_days(days);
4526    let secs = day_micros / 1_000_000;
4527    let frac = day_micros % 1_000_000;
4528    let hh24 = u32::try_from(secs / 3600).unwrap_or(0);
4529    let mi = u32::try_from((secs / 60) % 60).unwrap_or(0);
4530    let ss = u32::try_from(secs % 60).unwrap_or(0);
4531    let hh12 = match hh24 % 12 {
4532        0 => 12,
4533        x => x,
4534    };
4535    let ampm = if hh24 < 12 { "AM" } else { "PM" };
4536    let us = u32::try_from(frac).unwrap_or(0);
4537
4538    let mut out = String::with_capacity(fmt.len() + 8);
4539    let bytes = fmt.as_bytes();
4540    let mut i = 0;
4541    while i < bytes.len() {
4542        if bytes[i] != b'%' {
4543            out.push(bytes[i] as char);
4544            i += 1;
4545            continue;
4546        }
4547        if i + 1 >= bytes.len() {
4548            // Trailing `%` with no specifier — emit verbatim.
4549            out.push('%');
4550            i += 1;
4551            continue;
4552        }
4553        let token = bytes[i + 1];
4554        match token {
4555            b'Y' => {
4556                let _ = write!(out, "{y:04}");
4557            }
4558            b'y' => {
4559                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
4560                let yy = (y.rem_euclid(100)) as u32;
4561                let _ = write!(out, "{yy:02}");
4562            }
4563            b'm' => {
4564                let _ = write!(out, "{mo:02}");
4565            }
4566            b'c' => {
4567                let _ = write!(out, "{mo}");
4568            }
4569            b'd' => {
4570                let _ = write!(out, "{d:02}");
4571            }
4572            b'e' => {
4573                let _ = write!(out, "{d}");
4574            }
4575            b'H' => {
4576                let _ = write!(out, "{hh24:02}");
4577            }
4578            b'h' | b'I' => {
4579                let _ = write!(out, "{hh12:02}");
4580            }
4581            b'i' => {
4582                // MINUTE — distinct from PG's `MI` and from MySQL's
4583                // own `%M` (month name).
4584                let _ = write!(out, "{mi:02}");
4585            }
4586            b's' | b'S' => {
4587                let _ = write!(out, "{ss:02}");
4588            }
4589            b'f' => {
4590                let _ = write!(out, "{us:06}");
4591            }
4592            b'p' => {
4593                out.push_str(ampm);
4594            }
4595            b'M' => {
4596                out.push_str(MONTH_FULL[(mo - 1) as usize]);
4597            }
4598            b'b' => {
4599                out.push_str(MONTH_ABBR[(mo - 1) as usize]);
4600            }
4601            b'%' => {
4602                out.push('%');
4603            }
4604            other => {
4605                // Unknown specifier — MySQL emits the letter
4606                // verbatim (without the `%`).
4607                out.push(other as char);
4608            }
4609        }
4610        i += 2;
4611    }
4612    Ok(Value::Text(out))
4613}
4614
4615/// v7.17.0 Phase 3.P0-29 — `UNIX_TIMESTAMP(t)` returns epoch
4616/// seconds (BIGINT) for a TIMESTAMP / DATE.
4617///
4618/// Bare `UNIX_TIMESTAMP()` (no args) is folded to a BigInt literal
4619/// by clock_replacement_for at the rewrite layer — never reaches
4620/// this arm.
4621fn unix_timestamp_of(args: &[Value]) -> Result<Value, EvalError> {
4622    if args.len() != 1 {
4623        return Err(EvalError::TypeMismatch {
4624            detail: format!("unix_timestamp() takes 0 or 1 arg, got {}", args.len()),
4625        });
4626    }
4627    match &args[0] {
4628        Value::Null => Ok(Value::Null),
4629        Value::Timestamp(t) => Ok(Value::BigInt(t.div_euclid(1_000_000))),
4630        Value::Date(d) => Ok(Value::BigInt(i64::from(*d) * 86_400)),
4631        other => Err(EvalError::TypeMismatch {
4632            detail: format!(
4633                "unix_timestamp() needs DATE or TIMESTAMP, got {:?}",
4634                other.data_type()
4635            ),
4636        }),
4637    }
4638}
4639
4640/// v7.17.0 Phase 3.P0-29 — `FROM_UNIXTIME(n)` returns a TIMESTAMP
4641/// at `n` seconds past the Unix epoch. `FROM_UNIXTIME(n, fmt)`
4642/// applies MySQL date_format on top, returning TEXT.
4643fn from_unixtime(args: &[Value]) -> Result<Value, EvalError> {
4644    if !(1..=2).contains(&args.len()) {
4645        return Err(EvalError::TypeMismatch {
4646            detail: format!("from_unixtime() takes 1 or 2 args, got {}", args.len()),
4647        });
4648    }
4649    if args.iter().any(|v| matches!(v, Value::Null)) {
4650        return Ok(Value::Null);
4651    }
4652    let secs: i64 = match &args[0] {
4653        Value::SmallInt(n) => i64::from(*n),
4654        Value::Int(n) => i64::from(*n),
4655        Value::BigInt(n) => *n,
4656        Value::Float(x) => *x as i64,
4657        Value::Numeric { scaled, scale } => {
4658            let denom = 10_i128.pow(u32::from(*scale));
4659            i64::try_from(scaled.div_euclid(denom)).unwrap_or(i64::MAX)
4660        }
4661        other => {
4662            return Err(EvalError::TypeMismatch {
4663                detail: format!(
4664                    "from_unixtime() needs a numeric epoch second count, got {:?}",
4665                    other.data_type()
4666                ),
4667            });
4668        }
4669    };
4670    let ts = Value::Timestamp(secs.saturating_mul(1_000_000));
4671    if args.len() == 1 {
4672        Ok(ts)
4673    } else {
4674        date_format_mysql(&[ts, args[1].clone()])
4675    }
4676}
4677
4678/// `date_trunc(unit, timestamp)` — round a `TIMESTAMP` down to the
4679/// requested calendar boundary (year / month / day / hour / minute /
4680/// second). Returns the truncated `TIMESTAMP`. NULL on either side
4681/// propagates to NULL.
4682fn date_trunc(args: &[Value]) -> Result<Value, EvalError> {
4683    if args.len() != 2 {
4684        return Err(EvalError::TypeMismatch {
4685            detail: format!("date_trunc() takes 2 args, got {}", args.len()),
4686        });
4687    }
4688    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4689        return Ok(Value::Null);
4690    }
4691    let Value::Text(unit) = &args[0] else {
4692        return Err(EvalError::TypeMismatch {
4693            detail: format!(
4694                "date_trunc() needs a text unit, got {:?}",
4695                args[0].data_type()
4696            ),
4697        });
4698    };
4699    // Both DATE and TIMESTAMP sources are accepted. DATE lifts to
4700    // midnight first; the result is always TIMESTAMP.
4701    let micros = match &args[1] {
4702        Value::Timestamp(t) => *t,
4703        Value::Date(d) => i64::from(*d) * 86_400_000_000,
4704        other => {
4705            return Err(EvalError::TypeMismatch {
4706                detail: format!(
4707                    "date_trunc() needs DATE or TIMESTAMP, got {:?}",
4708                    other.data_type()
4709                ),
4710            });
4711        }
4712    };
4713    let unit_lc = unit.to_ascii_lowercase();
4714    let days = micros.div_euclid(86_400_000_000);
4715    let day_micros = micros.rem_euclid(86_400_000_000);
4716    let day_i32 = i32::try_from(days).unwrap_or(i32::MAX);
4717    let (y, m, _) = civil_from_days(day_i32);
4718    let truncated = match unit_lc.as_str() {
4719        "year" => i64::from(days_from_civil(y, 1, 1)) * 86_400_000_000,
4720        "month" => i64::from(days_from_civil(y, m, 1)) * 86_400_000_000,
4721        "day" => days * 86_400_000_000,
4722        "hour" => days * 86_400_000_000 + (day_micros / 3_600_000_000) * 3_600_000_000,
4723        "minute" => days * 86_400_000_000 + (day_micros / 60_000_000) * 60_000_000,
4724        "second" => days * 86_400_000_000 + (day_micros / 1_000_000) * 1_000_000,
4725        other => {
4726            return Err(EvalError::TypeMismatch {
4727                detail: format!(
4728                    "unknown date_trunc unit {other:?}; \
4729                     supported: year, month, day, hour, minute, second"
4730                ),
4731            });
4732        }
4733    };
4734    Ok(Value::Timestamp(truncated))
4735}
4736
4737/// PG-style `expr::TYPE` coercion. NULL always casts as NULL.
4738pub fn cast_value(v: Value, target: CastTarget) -> Result<Value, EvalError> {
4739    if matches!(v, Value::Null) {
4740        return Ok(Value::Null);
4741    }
4742    match target {
4743        CastTarget::Vector => cast_to_vector(v),
4744        CastTarget::Text => Ok(Value::Text(value_to_text(&v))),
4745        CastTarget::Int => cast_numeric_to_int(v),
4746        CastTarget::BigInt => cast_numeric_to_bigint(v),
4747        CastTarget::Float => cast_numeric_to_float(v),
4748        CastTarget::Bool => cast_to_bool(v),
4749        CastTarget::Date => cast_to_date(v),
4750        // TIMESTAMP and TIMESTAMPTZ have identical runtime
4751        // representation (i64 microseconds UTC).
4752        CastTarget::Timestamp | CastTarget::Timestamptz => cast_to_timestamp(v),
4753        // v7.9.25 — `expr::INTERVAL`. Currently only TEXT → Interval
4754        // is supported (the mailrs idiom: `$1::INTERVAL` where the
4755        // bound param is a string like `'7 days'`).
4756        CastTarget::Interval => cast_to_interval(v),
4757        // v7.9.25 — `::json` / `::jsonb`. Routes Text → Json
4758        // (validation is the producer's responsibility, same as
4759        // the column-INSERT path).
4760        CastTarget::Json | CastTarget::Jsonb => match v {
4761            Value::Json(s) => Ok(Value::Json(s)),
4762            Value::Text(s) => Ok(Value::Json(s)),
4763            other => Err(EvalError::TypeMismatch {
4764                detail: alloc::format!(
4765                    "::json / ::jsonb only accepts TEXT-shape inputs, got {:?}",
4766                    other.data_type()
4767                ),
4768            }),
4769        },
4770        // v7.17.0 Phase 5.3 — `::regtype` / `::regclass`. PG
4771        // semantics: each is a textual catalog-name surfacing as
4772        // a numeric OID at the wire layer that renders back as
4773        // the original name. SPG has no OID space, but pg_dump /
4774        // mailrs / Django code uses the cast purely for textual
4775        // round-trip — feeding `'public.t'::regclass::text` into
4776        // a downstream `format(…)` or string concat. We map to
4777        // that textual contract: Text in → Text out (the schema-
4778        // qualifier `public.` is stripped to match PG's default
4779        // search_path-aware rendering); numeric in → re-cast to
4780        // Text as best-effort; anything else errors.
4781        //
4782        // Pre-3.3 / pre-5.3 (v7.9.26) the cast surfaced a clean
4783        // error; this lifts to accept-and-textify so the dominant
4784        // dump-loader pattern unblocks. SPG-shaped queries that
4785        // genuinely need an OID for runtime joins are still
4786        // documented as unsupported.
4787        CastTarget::RegType | CastTarget::RegClass => match v {
4788            Value::Text(s) => {
4789                // Strip an optional `<schema>.` prefix — PG's
4790                // regclass render drops it when the schema is on
4791                // the search_path; SPG is single-schema so
4792                // dropping is always safe.
4793                let bare = s.rsplit('.').next().unwrap_or(&s).to_string();
4794                Ok(Value::Text(bare))
4795            }
4796            Value::Int(n) => Ok(Value::Text(alloc::format!("{n}"))),
4797            Value::BigInt(n) => Ok(Value::Text(alloc::format!("{n}"))),
4798            other => Err(EvalError::TypeMismatch {
4799                detail: alloc::format!(
4800                    "::regtype / ::regclass accepts TEXT (name) or integer (oid), got {:?}",
4801                    other.data_type()
4802                ),
4803            }),
4804        },
4805        // v7.10.11 — `::TEXT[]`. Decode PG external array form
4806        // when input is Text; pass through unchanged when it is
4807        // already TextArray. Anything else is a type mismatch.
4808        CastTarget::TextArray => match v {
4809            Value::TextArray(items) => Ok(Value::TextArray(items)),
4810            Value::Text(s) => decode_text_array_external(&s).map(Value::TextArray),
4811            other => Err(EvalError::TypeMismatch {
4812                detail: alloc::format!(
4813                    "::TEXT[] only accepts TEXT / TEXT[] inputs, got {:?}",
4814                    other.data_type()
4815                ),
4816            }),
4817        },
4818        // v7.11.13 — `::INT[]` / `::BIGINT[]`. Decode PG external
4819        // form `{1,2,3}` when input is Text; widen TextArray /
4820        // IntArray as appropriate.
4821        CastTarget::IntArray => cast_to_int_array(v),
4822        CastTarget::BigIntArray => cast_to_bigint_array(v),
4823        // v7.12.0 — `::tsvector` / `::tsquery`. Decodes PG external
4824        // form when input is Text; passes through unchanged when the
4825        // input is already the target type. Other inputs are a type
4826        // mismatch. Lexer / Porter stemmer arrive in v7.12.1; the
4827        // external-form cast at v7.12.0 is the path pg_dump and
4828        // direct-literal callers use.
4829        CastTarget::TsVector => match v {
4830            Value::TsVector(items) => Ok(Value::TsVector(items)),
4831            Value::Text(s) => decode_tsvector_external(&s).map(Value::TsVector),
4832            other => Err(EvalError::TypeMismatch {
4833                detail: alloc::format!(
4834                    "::tsvector only accepts TEXT / tsvector inputs, got {:?}",
4835                    other.data_type()
4836                ),
4837            }),
4838        },
4839        CastTarget::TsQuery => match v {
4840            Value::TsQuery(ast) => Ok(Value::TsQuery(ast)),
4841            Value::Text(s) => decode_tsquery_external(&s).map(Value::TsQuery),
4842            other => Err(EvalError::TypeMismatch {
4843                detail: alloc::format!(
4844                    "::tsquery only accepts TEXT / tsquery inputs, got {:?}",
4845                    other.data_type()
4846                ),
4847            }),
4848        },
4849        // v7.17.0 — `::uuid`. Identity for `uuid → uuid`; parse
4850        // text via the shared `parse_uuid_str`. Anything else is a
4851        // type mismatch — PG also rejects e.g. INT → UUID without
4852        // an explicit text bridge.
4853        CastTarget::Uuid => match v {
4854            Value::Uuid(b) => Ok(Value::Uuid(b)),
4855            Value::Text(s) => match spg_storage::parse_uuid_str(&s) {
4856                Some(b) => Ok(Value::Uuid(b)),
4857                None => Err(EvalError::TypeMismatch {
4858                    detail: alloc::format!("invalid input syntax for type uuid: {s:?}"),
4859                }),
4860            },
4861            other => Err(EvalError::TypeMismatch {
4862                detail: alloc::format!(
4863                    "::uuid only accepts TEXT / uuid inputs, got {:?}",
4864                    other.data_type()
4865                ),
4866            }),
4867        },
4868        // v7.18 — `::bytea`. Identity for `Bytes → Bytes`; decode
4869        // Text via the engine's PG-format bytea decoder (`\x`
4870        // hex form + `\NNN` escape form). Anything else is a type
4871        // mismatch — same shape as PG's contract. Closes the
4872        // mailrs D-pre #3 reverse-acceptance gap.
4873        CastTarget::Bytea => match v {
4874            Value::Bytes(b) => Ok(Value::Bytes(b)),
4875            Value::Text(s) => match crate::decode_bytea_literal(&s) {
4876                Ok(b) => Ok(Value::Bytes(b)),
4877                Err(msg) => Err(EvalError::TypeMismatch {
4878                    detail: alloc::format!("invalid input syntax for type bytea: {msg}"),
4879                }),
4880            },
4881            other => Err(EvalError::TypeMismatch {
4882                detail: alloc::format!(
4883                    "::bytea only accepts TEXT / bytea inputs, got {:?}",
4884                    other.data_type()
4885                ),
4886            }),
4887        },
4888    }
4889}
4890
4891fn cast_to_int_array(v: Value) -> Result<Value, EvalError> {
4892    match v {
4893        Value::IntArray(items) => Ok(Value::IntArray(items)),
4894        Value::BigIntArray(items) => {
4895            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
4896            for item in items {
4897                match item {
4898                    None => out.push(None),
4899                    Some(n) => match i32::try_from(n) {
4900                        Ok(x) => out.push(Some(x)),
4901                        Err(_) => {
4902                            return Err(EvalError::TypeMismatch {
4903                                detail: alloc::format!("::INT[] element {n} overflows i32"),
4904                            });
4905                        }
4906                    },
4907                }
4908            }
4909            Ok(Value::IntArray(out))
4910        }
4911        Value::Text(s) => decode_int_array_external(&s).map(Value::IntArray),
4912        Value::TextArray(items) => {
4913            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
4914            for item in items {
4915                match item {
4916                    None => out.push(None),
4917                    Some(s) => match s.parse::<i32>() {
4918                        Ok(n) => out.push(Some(n)),
4919                        Err(_) => {
4920                            return Err(EvalError::TypeMismatch {
4921                                detail: alloc::format!("::INT[] cannot parse {s:?}"),
4922                            });
4923                        }
4924                    },
4925                }
4926            }
4927            Ok(Value::IntArray(out))
4928        }
4929        other => Err(EvalError::TypeMismatch {
4930            detail: alloc::format!("::INT[] does not accept {:?}", other.data_type()),
4931        }),
4932    }
4933}
4934
4935fn cast_to_bigint_array(v: Value) -> Result<Value, EvalError> {
4936    match v {
4937        Value::BigIntArray(items) => Ok(Value::BigIntArray(items)),
4938        Value::IntArray(items) => Ok(Value::BigIntArray(
4939            items.into_iter().map(|x| x.map(i64::from)).collect(),
4940        )),
4941        Value::Text(s) => decode_bigint_array_external(&s).map(Value::BigIntArray),
4942        Value::TextArray(items) => {
4943            let mut out: Vec<Option<i64>> = Vec::with_capacity(items.len());
4944            for item in items {
4945                match item {
4946                    None => out.push(None),
4947                    Some(s) => match s.parse::<i64>() {
4948                        Ok(n) => out.push(Some(n)),
4949                        Err(_) => {
4950                            return Err(EvalError::TypeMismatch {
4951                                detail: alloc::format!("::BIGINT[] cannot parse {s:?}"),
4952                            });
4953                        }
4954                    },
4955                }
4956            }
4957            Ok(Value::BigIntArray(out))
4958        }
4959        other => Err(EvalError::TypeMismatch {
4960            detail: alloc::format!("::BIGINT[] does not accept {:?}", other.data_type()),
4961        }),
4962    }
4963}
4964
4965fn decode_int_array_external(s: &str) -> Result<Vec<Option<i32>>, EvalError> {
4966    let trimmed = s.trim();
4967    let inner = trimmed
4968        .strip_prefix('{')
4969        .and_then(|x| x.strip_suffix('}'))
4970        .ok_or_else(|| EvalError::TypeMismatch {
4971            detail: alloc::format!("INT[] literal {s:?} must be enclosed in '{{...}}'"),
4972        })?;
4973    if inner.trim().is_empty() {
4974        return Ok(Vec::new());
4975    }
4976    inner
4977        .split(',')
4978        .map(|part| {
4979            let p = part.trim();
4980            if p.eq_ignore_ascii_case("NULL") {
4981                Ok(None)
4982            } else {
4983                p.parse::<i32>()
4984                    .map(Some)
4985                    .map_err(|_| EvalError::TypeMismatch {
4986                        detail: alloc::format!("INT[] element {p:?} is not an i32"),
4987                    })
4988            }
4989        })
4990        .collect()
4991}
4992
4993fn decode_bigint_array_external(s: &str) -> Result<Vec<Option<i64>>, EvalError> {
4994    let trimmed = s.trim();
4995    let inner = trimmed
4996        .strip_prefix('{')
4997        .and_then(|x| x.strip_suffix('}'))
4998        .ok_or_else(|| EvalError::TypeMismatch {
4999            detail: alloc::format!("BIGINT[] literal {s:?} must be enclosed in '{{...}}'"),
5000        })?;
5001    if inner.trim().is_empty() {
5002        return Ok(Vec::new());
5003    }
5004    inner
5005        .split(',')
5006        .map(|part| {
5007            let p = part.trim();
5008            if p.eq_ignore_ascii_case("NULL") {
5009                Ok(None)
5010            } else {
5011                p.parse::<i64>()
5012                    .map(Some)
5013                    .map_err(|_| EvalError::TypeMismatch {
5014                        detail: alloc::format!("BIGINT[] element {p:?} is not an i64"),
5015                    })
5016            }
5017        })
5018        .collect()
5019}
5020
5021/// v7.10.11 — same decoder as `decode_text_array_literal` in
5022/// `lib.rs`, but lives here so the eval-time cast path stays
5023/// inside `spg-engine::eval`. Kept in lock-step with the engine
5024/// `coerce_value` decoder by tests.
5025fn decode_text_array_external(s: &str) -> Result<Vec<Option<String>>, EvalError> {
5026    let trimmed = s.trim();
5027    let inner = trimmed
5028        .strip_prefix('{')
5029        .and_then(|x| x.strip_suffix('}'))
5030        .ok_or_else(|| EvalError::TypeMismatch {
5031            detail: alloc::format!("TEXT[] literal {s:?} must be enclosed in '{{...}}'"),
5032        })?;
5033    let mut out: Vec<Option<String>> = Vec::new();
5034    if inner.trim().is_empty() {
5035        return Ok(out);
5036    }
5037    let bytes = inner.as_bytes();
5038    let mut i = 0;
5039    while i <= bytes.len() {
5040        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
5041            i += 1;
5042        }
5043        if i < bytes.len() && bytes[i] == b'"' {
5044            i += 1;
5045            let mut buf = String::new();
5046            while i < bytes.len() && bytes[i] != b'"' {
5047                if bytes[i] == b'\\' && i + 1 < bytes.len() {
5048                    buf.push(bytes[i + 1] as char);
5049                    i += 2;
5050                } else {
5051                    buf.push(bytes[i] as char);
5052                    i += 1;
5053                }
5054            }
5055            if i >= bytes.len() {
5056                return Err(EvalError::TypeMismatch {
5057                    detail: "unterminated quoted element in TEXT[] literal".into(),
5058                });
5059            }
5060            i += 1;
5061            out.push(Some(buf));
5062        } else {
5063            let start = i;
5064            while i < bytes.len() && bytes[i] != b',' {
5065                i += 1;
5066            }
5067            let raw = inner[start..i].trim();
5068            if raw.eq_ignore_ascii_case("NULL") {
5069                out.push(None);
5070            } else {
5071                out.push(Some(raw.to_string()));
5072            }
5073        }
5074        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
5075            i += 1;
5076        }
5077        if i >= bytes.len() {
5078            break;
5079        }
5080        if bytes[i] != b',' {
5081            return Err(EvalError::TypeMismatch {
5082                detail: "expected ',' between TEXT[] elements".into(),
5083            });
5084        }
5085        i += 1;
5086    }
5087    Ok(out)
5088}
5089
5090fn cast_to_interval(v: Value) -> Result<Value, EvalError> {
5091    match v {
5092        Value::Interval { months, micros } => Ok(Value::Interval { months, micros }),
5093        Value::Text(s) => {
5094            let (months, micros) = spg_sql::parser::parse_interval_text(&s).ok_or_else(|| {
5095                EvalError::TypeMismatch {
5096                    detail: alloc::format!("cannot parse {s:?} as INTERVAL"),
5097                }
5098            })?;
5099            Ok(Value::Interval { months, micros })
5100        }
5101        other => Err(EvalError::TypeMismatch {
5102            detail: alloc::format!(
5103                "::INTERVAL only accepts TEXT-shape inputs, got {:?}",
5104                other.data_type()
5105            ),
5106        }),
5107    }
5108}
5109
5110fn cast_to_date(v: Value) -> Result<Value, EvalError> {
5111    match v {
5112        Value::Date(d) => Ok(Value::Date(d)),
5113        // Integer literals carry days since the Unix epoch — used by
5114        // the `CURRENT_DATE` AST rewrite to inject the wall clock.
5115        Value::Int(n) => Ok(Value::Date(n)),
5116        Value::BigInt(n) => {
5117            i32::try_from(n)
5118                .map(Value::Date)
5119                .map_err(|_| EvalError::TypeMismatch {
5120                    detail: "bigint days-since-epoch out of DATE range".into(),
5121                })
5122        }
5123        // Timestamp truncates to its day boundary.
5124        Value::Timestamp(t) => {
5125            let days = t.div_euclid(86_400_000_000);
5126            i32::try_from(days)
5127                .map(Value::Date)
5128                .map_err(|_| EvalError::TypeMismatch {
5129                    detail: "timestamp out of DATE range".into(),
5130                })
5131        }
5132        Value::Text(s) => parse_date_literal(&s)
5133            .map(Value::Date)
5134            .ok_or(EvalError::TypeMismatch {
5135                detail: format!("cannot parse {s:?} as DATE (expected YYYY-MM-DD)"),
5136            }),
5137        other => Err(EvalError::TypeMismatch {
5138            detail: format!("cannot cast {:?} to DATE", other.data_type()),
5139        }),
5140    }
5141}
5142
5143fn cast_to_timestamp(v: Value) -> Result<Value, EvalError> {
5144    match v {
5145        Value::Timestamp(t) => Ok(Value::Timestamp(t)),
5146        // Int / BigInt carry microseconds since the Unix epoch — used
5147        // by the `NOW()` / `CURRENT_TIMESTAMP` AST rewrite to inject
5148        // the wall clock as a plain integer literal.
5149        Value::Int(n) => Ok(Value::Timestamp(i64::from(n))),
5150        Value::BigInt(n) => Ok(Value::Timestamp(n)),
5151        // DATE → TIMESTAMP picks midnight on the date.
5152        Value::Date(d) => Ok(Value::Timestamp(i64::from(d) * 86_400_000_000)),
5153        Value::Text(s) => {
5154            parse_timestamp_literal(&s)
5155                .map(Value::Timestamp)
5156                .ok_or(EvalError::TypeMismatch {
5157                    detail: format!(
5158                        "cannot parse {s:?} as TIMESTAMP \
5159                     (expected YYYY-MM-DD[ HH:MM:SS[.ffffff]])"
5160                    ),
5161                })
5162        }
5163        other => Err(EvalError::TypeMismatch {
5164            detail: format!("cannot cast {:?} to TIMESTAMP", other.data_type()),
5165        }),
5166    }
5167}
5168
5169fn value_to_text(v: &Value) -> String {
5170    match v {
5171        // v7.5.0 — Value is #[non_exhaustive]; any future variant
5172        // without explicit text rendering hits the Debug fallback
5173        // at the end.
5174        Value::SmallInt(n) => format!("{n}"),
5175        Value::Int(n) => format!("{n}"),
5176        Value::BigInt(n) => format!("{n}"),
5177        Value::Float(x) => format!("{x}"),
5178        // v4.9: JSON renders identically to Text — both are raw UTF-8.
5179        Value::Text(s) | Value::Json(s) => s.clone(),
5180        Value::Bool(b) => (if *b { "true" } else { "false" }).into(),
5181        Value::Vector(v) => {
5182            let cells: Vec<String> = v.iter().map(|x| format!("{x}")).collect();
5183            format!("[{}]", cells.join(", "))
5184        }
5185        // v6.0.1: render SQ8 cells dequantised, so SELECT output
5186        // matches the pgvector wire shape clients expect. The
5187        // recall envelope already absorbs the ≤ (max-min)/255/2
5188        // dequantisation error.
5189        Value::Sq8Vector(q) => {
5190            let cells: Vec<String> = spg_storage::quantize::dequantize(q)
5191                .iter()
5192                .map(|x| format!("{x}"))
5193                .collect();
5194            format!("[{}]", cells.join(", "))
5195        }
5196        // v6.0.3: HalfVector cells dequantise bit-exactly to f32
5197        // for SELECT output.
5198        Value::HalfVector(h) => {
5199            let cells: Vec<String> = h.to_f32_vec().iter().map(|x| format!("{x}")).collect();
5200            format!("[{}]", cells.join(", "))
5201        }
5202        Value::Numeric { scaled, scale } => format_numeric(*scaled, *scale),
5203        Value::Date(d) => format_date(*d),
5204        Value::Timestamp(t) => format_timestamp(*t),
5205        Value::Interval { months, micros } => format_interval(*months, *micros),
5206        Value::Null => "NULL".into(),
5207        // v7.10.4 — BYTEA renders as PG hex form.
5208        Value::Bytes(b) => format_bytea_hex(b),
5209        // v7.10.9 — TEXT[] / INT[] / BIGINT[] render PG external form.
5210        Value::TextArray(items) => format_text_array(items),
5211        Value::IntArray(items) => format_int_array(items),
5212        Value::BigIntArray(items) => format_bigint_array(items),
5213        // v7.12.0 — tsvector / tsquery render PG external form.
5214        Value::TsVector(lexs) => format_tsvector(lexs),
5215        Value::TsQuery(ast) => format_tsquery(ast),
5216        // v7.17.0 — UUID renders canonical lowercase 8-4-4-4-12
5217        // hyphenated form (PG `uuid_out`).
5218        Value::Uuid(b) => spg_storage::format_uuid(b),
5219        // v7.17.0 Phase 3.P0-32 — TIME canonical text.
5220        Value::Time(us) => format_time(*us),
5221        // v7.17.0 Phase 3.P0-34 — TIMETZ canonical text.
5222        Value::TimeTz { us, offset_secs } => format_timetz(*us, *offset_secs),
5223        // v7.17.0 Phase 3.P0-33 — YEAR 4-digit zero-padded.
5224        Value::Year(y) => format!("{y:04}"),
5225        // v7.17.0 Phase 3.P0-35 — MONEY en_US locale.
5226        Value::Money(c) => format_money(*c),
5227        // v7.17.0 Phase 3.P0-38 — Range canonical form. Routes
5228        // through the engine's format_range_text to share the
5229        // single renderer with pgwire / sqllogictest.
5230        Value::Range { .. } => crate::format_range_text(v),
5231        // v7.17.0 Phase 3.P0-39 — Hstore canonical PG text form.
5232        Value::Hstore(pairs) => crate::format_hstore_text(pairs),
5233        // v7.17.0 Phase 3.P0-40 — 2D array canonical PG text form.
5234        Value::IntArray2D(rows) => crate::format_int_2d_text_pub(rows),
5235        Value::BigIntArray2D(rows) => crate::format_bigint_2d_text_pub(rows),
5236        Value::TextArray2D(rows) => crate::format_text_2d_text_pub(rows),
5237        // v7.5.0 — #[non_exhaustive] fallback for future Value variants.
5238        _ => format!("{v:?}"),
5239    }
5240}
5241
5242/// Render a `Date` (days since epoch) as `YYYY-MM-DD`. Negative values
5243/// for pre-1970 dates render with a leading `-` on the year.
5244pub fn format_date(days: i32) -> String {
5245    let (y, m, d) = civil_from_days(days);
5246    format!("{y:04}-{m:02}-{d:02}")
5247}
5248
5249/// Render a `Timestamp` (microseconds since epoch) as
5250/// `YYYY-MM-DD HH:MM:SS[.fff...]`. Trailing-zero fractional digits are
5251/// dropped; a whole-second value has no fractional part.
5252/// v7.15.0 — PG-canonical TIMESTAMPTZ wire format. Storage is
5253/// the same i64 microseconds UTC as TIMESTAMP, but the canonical
5254/// PG text output appends the session's UTC-offset suffix (`+00`
5255/// for the default UTC session, the form pg_dump emits). Mailrs
5256/// round-8 acceptance criterion: `SELECT col FROM tstz` should
5257/// round-trip to a literal that re-INSERTs without semantic
5258/// drift.
5259pub fn format_timestamptz(micros: i64) -> String {
5260    let base = format_timestamp(micros);
5261    let mut s = String::with_capacity(base.len() + 3);
5262    s.push_str(&base);
5263    s.push_str("+00");
5264    s
5265}
5266
5267/// v7.17.0 Phase 3.P0-35 — PG `money` canonical text form, en_US
5268/// locale: `$N,NNN.CC`, negative → `-$1.23`. Mirrors PG's
5269/// `cash_out` for `lc_monetary = 'en_US.UTF-8'`.
5270pub fn format_money(cents: i64) -> String {
5271    let neg = cents < 0;
5272    let abs = cents.unsigned_abs();
5273    let dollars = abs / 100;
5274    let cc = abs % 100;
5275    // Insert comma thousands separators in the integer portion.
5276    let dollar_str = dollars.to_string();
5277    let bytes = dollar_str.as_bytes();
5278    let mut int_part = String::with_capacity(dollar_str.len() + dollar_str.len() / 3);
5279    for (i, b) in bytes.iter().enumerate() {
5280        // Position from the right: insert ',' before every 3rd
5281        // digit (except the first).
5282        let from_right = bytes.len() - i;
5283        if i > 0 && from_right % 3 == 0 {
5284            int_part.push(',');
5285        }
5286        int_part.push(*b as char);
5287    }
5288    let sign = if neg { "-" } else { "" };
5289    format!("{sign}${int_part}.{cc:02}")
5290}
5291
5292/// v7.17.0 Phase 3.P0-34 — PG `TIMETZ` canonical text form
5293/// `HH:MM:SS[.ffffff]±HH[:MM]`. Mirrors PG `timetz_out`. The
5294/// offset uses `±HH` for whole-hour offsets and `±HH:MM` for
5295/// sub-hour offsets (matching PG's "minimal display" rule).
5296pub fn format_timetz(us: i64, offset_secs: i32) -> String {
5297    let time = format_time(us);
5298    let sign = if offset_secs < 0 { '-' } else { '+' };
5299    let abs = offset_secs.unsigned_abs();
5300    let oh = abs / 3600;
5301    let om = (abs % 3600) / 60;
5302    if om == 0 {
5303        format!("{time}{sign}{oh:02}")
5304    } else {
5305        format!("{time}{sign}{oh:02}:{om:02}")
5306    }
5307}
5308
5309/// v7.17.0 Phase 3.P0-32 — PG `TIME` canonical text form
5310/// `HH:MM:SS[.ffffff]`. Mirrors PG `time_out`. Trailing zeros in
5311/// the fractional component are stripped — `12:00:00.500000`
5312/// renders as `12:00:00.5` to match PG's text output.
5313pub fn format_time(us: i64) -> String {
5314    let total_secs = us.div_euclid(1_000_000);
5315    let frac = us.rem_euclid(1_000_000);
5316    let hh = total_secs / 3600;
5317    let mm = (total_secs / 60) % 60;
5318    let ss = total_secs % 60;
5319    if frac == 0 {
5320        format!("{hh:02}:{mm:02}:{ss:02}")
5321    } else {
5322        let raw = format!("{frac:06}");
5323        let trimmed = raw.trim_end_matches('0');
5324        format!("{hh:02}:{mm:02}:{ss:02}.{trimmed}")
5325    }
5326}
5327
5328pub fn format_timestamp(micros: i64) -> String {
5329    const MICROS_PER_DAY: i64 = 86_400_000_000;
5330    // Split into day + intra-day part with proper floor division so
5331    // negative timestamps render right too.
5332    let days = micros.div_euclid(MICROS_PER_DAY);
5333    let day_micros = micros.rem_euclid(MICROS_PER_DAY);
5334    let day_i32 = i32::try_from(days).unwrap_or(i32::MAX);
5335    let (y, m, d) = civil_from_days(day_i32);
5336    let secs = day_micros / 1_000_000;
5337    let frac = day_micros % 1_000_000;
5338    let hh = secs / 3600;
5339    let mm = (secs / 60) % 60;
5340    let ss = secs % 60;
5341    if frac == 0 {
5342        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}")
5343    } else {
5344        // Strip trailing zeros from the 6-digit fractional component.
5345        let raw = format!("{frac:06}");
5346        let trimmed = raw.trim_end_matches('0');
5347        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}.{trimmed}")
5348    }
5349}
5350
5351/// Howard Hinnant's `civil_from_days` — converts days since the Unix
5352/// epoch back to a proleptic-Gregorian (year, month, day) triple. Both
5353/// directions of this calendar conversion live in `eval.rs` so the
5354/// engine never reaches for `std` time facilities.
5355#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5356fn civil_from_days(days: i32) -> (i32, u32, u32) {
5357    let z = i64::from(days) + 719_468;
5358    let era = z.div_euclid(146_097);
5359    // doe ∈ [0, 146_097); fits in u32 with room to spare. Same for
5360    // every other quantity below — `as u32` truncations are safe by
5361    // construction.
5362    let doe = (z - era * 146_097) as u32;
5363    let yoe = (doe.saturating_sub(doe / 1460) + doe / 36524 - doe / 146_096) / 365;
5364    let y_base = i64::from(yoe) + era * 400;
5365    let doy = doe.saturating_sub(365 * yoe + yoe / 4 - yoe / 100);
5366    let mp = (5 * doy + 2) / 153;
5367    let d = doy.saturating_sub((153 * mp + 2) / 5) + 1;
5368    let m = if mp < 10 { mp + 3 } else { mp - 9 };
5369    let y = if m <= 2 { y_base + 1 } else { y_base };
5370    (y as i32, m, d)
5371}
5372
5373/// Inverse of `civil_from_days` — converts (year, month, day) to days
5374/// since 1970-01-01. Out-of-range months / days saturate.
5375#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5376pub fn days_from_civil(y: i32, m: u32, d: u32) -> i32 {
5377    let y_adj = if m <= 2 {
5378        i64::from(y) - 1
5379    } else {
5380        i64::from(y)
5381    };
5382    let era = y_adj.div_euclid(400);
5383    let yoe = (y_adj - era * 400) as u32;
5384    let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d.saturating_sub(1);
5385    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
5386    let total = era * 146_097 + i64::from(doe) - 719_468;
5387    i32::try_from(total).unwrap_or(i32::MAX)
5388}
5389
5390/// Parse `YYYY-MM-DD` into a `Date` (days since Unix epoch). Returns
5391/// `None` on shape / numeric failure; the engine surfaces that as a
5392/// `TypeMismatch` with the original text included.
5393pub fn parse_date_literal(s: &str) -> Option<i32> {
5394    let bytes = s.as_bytes();
5395    if bytes.len() != 10 || bytes[4] != b'-' || bytes[7] != b'-' {
5396        return None;
5397    }
5398    let y: i32 = s[0..4].parse().ok()?;
5399    let m: u32 = s[5..7].parse().ok()?;
5400    let d: u32 = s[8..10].parse().ok()?;
5401    if !(1..=12).contains(&m) || !(1..=31).contains(&d) {
5402        return None;
5403    }
5404    Some(days_from_civil(y, m, d))
5405}
5406
5407/// Parse `YYYY-MM-DD[ HH:MM:SS[.ffffff]]` into a `Timestamp`
5408/// (microseconds since Unix epoch). The time portion is optional;
5409/// missing → midnight. The fractional portion accepts 1–6 digits and
5410/// pads with zeros to microseconds.
5411pub fn parse_timestamp_literal(s: &str) -> Option<i64> {
5412    let trimmed = s.trim();
5413    let (date_part, time_part) = match trimmed.find([' ', 'T']) {
5414        Some(i) => (&trimmed[..i], Some(&trimmed[i + 1..])),
5415        None => (trimmed, None),
5416    };
5417    let days = parse_date_literal(date_part)?;
5418    let (day_micros, tz_offset_micros) = match time_part {
5419        None => (0, 0),
5420        Some(t) => parse_time_of_day_micros(t)?,
5421    };
5422    // PG semantics: a TIMESTAMPTZ literal with an explicit offset
5423    // is normalised to UTC for storage. `'12:00:00+09'` means
5424    // 12:00:00 in a UTC+09 zone → 03:00:00 UTC → subtract the
5425    // positive offset (or add the negative one). Storage is i64
5426    // microseconds UTC for both TIMESTAMP and TIMESTAMPTZ (see
5427    // spg-storage::DataType::Timestamptz docs); the wire-level
5428    // round-trip then re-applies the session timezone on the
5429    // SELECT side when format_timestamp is asked for a TZ-aware
5430    // render.
5431    Some(i64::from(days) * 86_400_000_000 + day_micros - tz_offset_micros)
5432}
5433
5434/// v7.15.0 — Parse `HH:MM:SS[.frac][<tz>]` and return
5435/// `(day_micros, tz_offset_micros)` where `day_micros` is the
5436/// local-clock seconds-of-day in microseconds and
5437/// `tz_offset_micros` is the UTC offset (positive = east of
5438/// UTC, negative = west). Caller subtracts the offset to
5439/// normalise to UTC. PG's recognised TZ shapes after the
5440/// seconds (or frac) part:
5441///   * `+OO[:MM]` / `-OO[:MM]` — numeric offset
5442///   * `+OOMM` / `-OOMM` (no colon, less common but legal)
5443///   * ` UTC` / `UTC` / `Z` — explicit zero offset
5444/// Anything else after the seconds = parse failure (the caller
5445/// surfaces as "cannot parse … as TIMESTAMP").
5446fn parse_time_of_day_micros(t: &str) -> Option<(i64, i64)> {
5447    let t = t.trim();
5448    // Detect & strip optional TZ suffix. Anchor on the first
5449    // `+` / `-` AFTER position 8 (so the leading sign on a
5450    // negative offset can't be mistaken for an `HH:MM:SS-OO`
5451    // boundary if the time itself is somehow malformed).
5452    // ` UTC` and trailing `Z` also count as zero-offset TZ tags.
5453    let (core, tz_micros) = if let Some(rest) = t.strip_suffix('Z') {
5454        (rest, 0i64)
5455    } else if let Some(rest) = t.strip_suffix(" UTC").or_else(|| t.strip_suffix("UTC")) {
5456        (rest, 0i64)
5457    } else if let Some((idx, sign_byte)) = find_offset_sign(t) {
5458        let suffix = &t[idx..];
5459        let micros = parse_tz_offset_suffix(suffix, sign_byte == b'+')?;
5460        (&t[..idx], micros)
5461    } else {
5462        (t, 0i64)
5463    };
5464    let (time, frac_str) = match core.split_once('.') {
5465        Some((a, b)) => (a, Some(b)),
5466        None => (core, None),
5467    };
5468    let bytes = time.as_bytes();
5469    if bytes.len() != 8 || bytes[2] != b':' || bytes[5] != b':' {
5470        return None;
5471    }
5472    let hh: i64 = time[0..2].parse().ok()?;
5473    let mm: i64 = time[3..5].parse().ok()?;
5474    let ss: i64 = time[6..8].parse().ok()?;
5475    if !(0..24).contains(&hh) || !(0..60).contains(&mm) || !(0..60).contains(&ss) {
5476        return None;
5477    }
5478    let frac_micros: i64 = match frac_str {
5479        None => 0,
5480        Some(f) => {
5481            // Pad right with zeros to 6 digits, then truncate extras.
5482            if f.is_empty() || f.len() > 9 {
5483                return None;
5484            }
5485            let mut padded = String::with_capacity(6);
5486            padded.push_str(&f[..f.len().min(6)]);
5487            while padded.len() < 6 {
5488                padded.push('0');
5489            }
5490            padded.parse().ok()?
5491        }
5492    };
5493    Some((
5494        ((hh * 3600 + mm * 60 + ss) * 1_000_000) + frac_micros,
5495        tz_micros,
5496    ))
5497}
5498
5499/// Find the index of the TZ-offset sign byte (`+` or `-`) that
5500/// terminates an `HH:MM:SS[.fff]` time string, or `None` when
5501/// the time carries no numeric TZ suffix. Anchors past the first
5502/// 8 bytes (`HH:MM:SS`) so the seconds/minutes colons don't
5503/// confuse the scan.
5504fn find_offset_sign(t: &str) -> Option<(usize, u8)> {
5505    let bytes = t.as_bytes();
5506    // Start past `HH:MM:SS` (8 bytes).
5507    if bytes.len() < 9 {
5508        return None;
5509    }
5510    for i in 8..bytes.len() {
5511        match bytes[i] {
5512            b'+' | b'-' => return Some((i, bytes[i])),
5513            _ => {}
5514        }
5515    }
5516    None
5517}
5518
5519/// Parse `+OO`, `+OO:MM`, `+OOMM`, `-OO`, `-OO:MM`, `-OOMM` into
5520/// a UTC-offset microsecond delta. `is_positive` reflects the
5521/// already-stripped sign.
5522fn parse_tz_offset_suffix(suffix: &str, is_positive: bool) -> Option<i64> {
5523    // suffix starts with `+` or `-`; strip it.
5524    let body = &suffix[1..];
5525    let (hh, mm): (i64, i64) = if let Some((h, m)) = body.split_once(':') {
5526        (h.parse().ok()?, m.parse().ok()?)
5527    } else {
5528        match body.len() {
5529            2 => (body.parse().ok()?, 0),
5530            3 => {
5531                // PG's "+0530" form lacks the colon; but a 3-char
5532                // body is `OOM` which is ambiguous (`+053` ?). PG
5533                // doesn't emit that; reject.
5534                return None;
5535            }
5536            4 => {
5537                let h: i64 = body[0..2].parse().ok()?;
5538                let m: i64 = body[2..4].parse().ok()?;
5539                (h, m)
5540            }
5541            _ => return None,
5542        }
5543    };
5544    if !(0..=18).contains(&hh) || !(0..60).contains(&mm) {
5545        return None;
5546    }
5547    let abs = (hh * 3600 + mm * 60) * 1_000_000;
5548    Some(if is_positive { abs } else { -abs })
5549}
5550
5551/// Render an `Interval { months, micros }` in a PG-ish shape. The output
5552/// mirrors `psql`'s text format: years/months from the months part,
5553/// days/HH:MM:SS[.frac] from the microsecond part. Empty parts are
5554/// omitted; an all-zero interval renders as `0`.
5555pub fn format_interval(months: i32, micros: i64) -> String {
5556    const MICROS_PER_DAY: i64 = 86_400_000_000;
5557    let mut parts: Vec<String> = Vec::new();
5558    let years = months / 12;
5559    let mons = months % 12;
5560    // PG renders the unit in the singular only for `+1`; `-1` and any
5561    // other value pluralise. Helper closes over that rule.
5562    let unit = |n: i64, singular: &'static str, plural: &'static str| -> &'static str {
5563        if n == 1 { singular } else { plural }
5564    };
5565    if years != 0 {
5566        parts.push(format!(
5567            "{years} {}",
5568            unit(i64::from(years), "year", "years")
5569        ));
5570    }
5571    if mons != 0 {
5572        parts.push(format!("{mons} {}", unit(i64::from(mons), "mon", "mons")));
5573    }
5574    let days = micros / MICROS_PER_DAY;
5575    let mut rem = micros % MICROS_PER_DAY;
5576    if days != 0 {
5577        parts.push(format!("{days} {}", unit(days, "day", "days")));
5578    }
5579    if rem != 0 {
5580        let neg = rem < 0;
5581        if neg {
5582            rem = -rem;
5583        }
5584        let secs = rem / 1_000_000;
5585        let frac = rem % 1_000_000;
5586        let hh = secs / 3600;
5587        let mm = (secs / 60) % 60;
5588        let ss = secs % 60;
5589        let sign = if neg { "-" } else { "" };
5590        if frac == 0 {
5591            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}"));
5592        } else {
5593            let raw = format!("{frac:06}");
5594            let trimmed = raw.trim_end_matches('0');
5595            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}.{trimmed}"));
5596        }
5597    }
5598    if parts.is_empty() {
5599        "0".into()
5600    } else {
5601        parts.join(" ")
5602    }
5603}
5604
5605/// Add `months` (signed) to a `(year, month, day)` triple using PG's
5606/// clamp-to-last-day rule (so `'2024-01-31' + 1 month` → `'2024-02-29'`).
5607fn add_months_to_civil(y: i32, m: u32, d: u32, months: i32) -> (i32, u32, u32) {
5608    let total_months = i64::from(y) * 12 + i64::from(m) - 1 + i64::from(months);
5609    let new_year = i32::try_from(total_months.div_euclid(12)).unwrap_or(i32::MAX);
5610    let new_month_zero = total_months.rem_euclid(12);
5611    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5612    let new_month = (new_month_zero as u32) + 1;
5613    let max_day = days_in_month(new_year, new_month);
5614    (new_year, new_month, d.min(max_day))
5615}
5616
5617const fn days_in_month(y: i32, m: u32) -> u32 {
5618    match m {
5619        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
5620        2 => {
5621            // Proleptic Gregorian leap rule.
5622            if y.rem_euclid(4) == 0 && (y.rem_euclid(100) != 0 || y.rem_euclid(400) == 0) {
5623                29
5624            } else {
5625                28
5626            }
5627        }
5628        // 4 / 6 / 9 / 11 plus any out-of-range month (callers normalise
5629        // first, but be defensive) get the 30-day fallback.
5630        _ => 30,
5631    }
5632}
5633
5634/// v7.10.9 — render a TEXT[] in PG's external array form
5635/// (`{a,b,NULL}`). Elements containing whitespace, commas,
5636/// quotes, or braces get double-quoted with `\\` / `\"` escapes.
5637/// NULL elements use the literal token `NULL`. Public so the
5638/// wire layer can produce the canonical text-mode encoding.
5639pub fn format_text_array(items: &[Option<String>]) -> String {
5640    let mut out = String::with_capacity(2 + items.len() * 8);
5641    out.push('{');
5642    for (i, item) in items.iter().enumerate() {
5643        if i > 0 {
5644            out.push(',');
5645        }
5646        match item {
5647            None => out.push_str("NULL"),
5648            Some(s) => {
5649                let needs_quote = s.is_empty()
5650                    || s.eq_ignore_ascii_case("NULL")
5651                    || s.chars()
5652                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
5653                if needs_quote {
5654                    out.push('"');
5655                    for c in s.chars() {
5656                        if c == '"' || c == '\\' {
5657                            out.push('\\');
5658                        }
5659                        out.push(c);
5660                    }
5661                    out.push('"');
5662                } else {
5663                    out.push_str(s);
5664                }
5665            }
5666        }
5667    }
5668    out.push('}');
5669    out
5670}
5671
5672/// v7.11.14 — render an INT[] in PG's external array form
5673/// (`{1,2,NULL}`). Integer payloads never need quoting. NULL
5674/// elements use the literal token `NULL`.
5675pub fn format_int_array(items: &[Option<i32>]) -> String {
5676    let mut out = String::with_capacity(2 + items.len() * 4);
5677    out.push('{');
5678    for (i, item) in items.iter().enumerate() {
5679        if i > 0 {
5680            out.push(',');
5681        }
5682        match item {
5683            None => out.push_str("NULL"),
5684            Some(n) => out.push_str(&n.to_string()),
5685        }
5686    }
5687    out.push('}');
5688    out
5689}
5690
5691/// v7.11.14 — render a BIGINT[] in PG's external array form
5692/// (`{1,2,NULL}`).
5693pub fn format_bigint_array(items: &[Option<i64>]) -> String {
5694    let mut out = String::with_capacity(2 + items.len() * 6);
5695    out.push('{');
5696    for (i, item) in items.iter().enumerate() {
5697        if i > 0 {
5698            out.push(',');
5699        }
5700        match item {
5701            None => out.push_str("NULL"),
5702            Some(n) => out.push_str(&n.to_string()),
5703        }
5704    }
5705    out.push('}');
5706    out
5707}
5708
5709/// v7.12.0 — render a `tsvector` in PG's external form:
5710/// `'lex':1,2A 'word':3` (single-quoted lexemes, optional
5711/// `:positions`, optional weight letter `A/B/C/D` per position).
5712/// Lexemes already arrive sorted + deduped from the engine. Used
5713/// by the wire layer (OID 3614) and by SELECT-text output.
5714pub fn format_tsvector(lexs: &[TsLexeme]) -> String {
5715    let mut out = String::with_capacity(lexs.len() * 12);
5716    for (i, l) in lexs.iter().enumerate() {
5717        if i > 0 {
5718            out.push(' ');
5719        }
5720        out.push('\'');
5721        for c in l.word.chars() {
5722            if c == '\'' {
5723                out.push('\'');
5724            }
5725            out.push(c);
5726        }
5727        out.push('\'');
5728        if !l.positions.is_empty() {
5729            for (pi, p) in l.positions.iter().enumerate() {
5730                out.push(if pi == 0 { ':' } else { ',' });
5731                out.push_str(&p.to_string());
5732            }
5733            // v7.12.0 — weight is per-lexeme (the v7.12 design
5734            // collapses PG's per-position weight into one letter).
5735            // Emit once after the last position; default `D`
5736            // (weight=0) stays implicit.
5737            match l.weight {
5738                3 => out.push('A'),
5739                2 => out.push('B'),
5740                1 => out.push('C'),
5741                _ => {}
5742            }
5743        }
5744    }
5745    out
5746}
5747
5748/// v7.12.0 — render a `tsquery` in PG's external form. Operator
5749/// precedence: `!` > `&` > `|`. Phrase distance shown as `<N>`.
5750pub fn format_tsquery(ast: &TsQueryAst) -> String {
5751    fn go(ast: &TsQueryAst, parent_prec: u8, out: &mut String) {
5752        // 0 = top, 1 = OR, 2 = AND, 3 = NOT/Phrase, 4 = atom.
5753        let (own_prec, write_self): (u8, &dyn Fn(&mut String)) = match ast {
5754            TsQueryAst::Or(_, _) => (1, &|_| {}),
5755            TsQueryAst::And(_, _) | TsQueryAst::Phrase { .. } => (2, &|_| {}),
5756            TsQueryAst::Not(_) => (3, &|_| {}),
5757            TsQueryAst::Term { .. } => (4, &|_| {}),
5758        };
5759        let need_parens = own_prec < parent_prec;
5760        if need_parens {
5761            out.push('(');
5762        }
5763        match ast {
5764            TsQueryAst::Term { word, .. } => {
5765                out.push('\'');
5766                for c in word.chars() {
5767                    if c == '\'' {
5768                        out.push('\'');
5769                    }
5770                    out.push(c);
5771                }
5772                out.push('\'');
5773            }
5774            TsQueryAst::And(a, b) => {
5775                go(a, own_prec, out);
5776                out.push_str(" & ");
5777                go(b, own_prec, out);
5778            }
5779            TsQueryAst::Or(a, b) => {
5780                go(a, own_prec, out);
5781                out.push_str(" | ");
5782                go(b, own_prec, out);
5783            }
5784            TsQueryAst::Not(x) => {
5785                out.push('!');
5786                go(x, own_prec, out);
5787            }
5788            TsQueryAst::Phrase {
5789                left,
5790                right,
5791                distance,
5792            } => {
5793                go(left, own_prec, out);
5794                out.push_str(&alloc::format!(" <{distance}> "));
5795                go(right, own_prec, out);
5796            }
5797        }
5798        write_self(out);
5799        if need_parens {
5800            out.push(')');
5801        }
5802    }
5803    let mut out = String::new();
5804    go(ast, 0, &mut out);
5805    out
5806}
5807
5808/// v7.12.0 — decode PG external form `'word':1,2A 'other':3` into
5809/// a `Vec<TsLexeme>`. Lexemes are sorted ascending by `word` (with
5810/// duplicates merged on positions) so the output matches the
5811/// engine invariant. Empty input yields an empty vector.
5812///
5813/// v7.12.0 only ships the cast-literal entry. Full `to_tsvector`
5814/// (Unicode word-split + Porter stemming + stopwords) lands in
5815/// v7.12.1.
5816pub fn decode_tsvector_external(s: &str) -> Result<Vec<TsLexeme>, EvalError> {
5817    let mut out: Vec<TsLexeme> = Vec::new();
5818    let mut i = 0;
5819    let bytes = s.as_bytes();
5820    while i < bytes.len() {
5821        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
5822            i += 1;
5823        }
5824        if i >= bytes.len() {
5825            break;
5826        }
5827        // Quoted form `'word'` (with embedded `''` for a literal
5828        // single quote, mirroring PG).
5829        let word = if bytes[i] == b'\'' {
5830            i += 1;
5831            let mut w = String::new();
5832            loop {
5833                if i >= bytes.len() {
5834                    return Err(EvalError::TypeMismatch {
5835                        detail: "tsvector literal: unterminated quoted lexeme".into(),
5836                    });
5837                }
5838                let b = bytes[i];
5839                if b == b'\'' {
5840                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
5841                        w.push('\'');
5842                        i += 2;
5843                    } else {
5844                        i += 1;
5845                        break;
5846                    }
5847                } else {
5848                    w.push(b as char);
5849                    i += 1;
5850                }
5851            }
5852            w
5853        } else {
5854            // Bare form — read until whitespace, ':' or end.
5855            let start = i;
5856            while i < bytes.len() && !bytes[i].is_ascii_whitespace() && bytes[i] != b':' {
5857                i += 1;
5858            }
5859            core::str::from_utf8(&bytes[start..i])
5860                .map_err(|_| EvalError::TypeMismatch {
5861                    detail: "tsvector literal: non-UTF-8 lexeme".into(),
5862                })?
5863                .to_string()
5864        };
5865        if word.is_empty() {
5866            return Err(EvalError::TypeMismatch {
5867                detail: "tsvector literal: empty lexeme".into(),
5868            });
5869        }
5870        // Optional `:pos[,pos][,pos]`. Each position is u16; each
5871        // may carry a trailing weight letter A/B/C/D.
5872        let mut positions: Vec<u16> = Vec::new();
5873        let mut weight: u8 = 0;
5874        if i < bytes.len() && bytes[i] == b':' {
5875            i += 1;
5876            loop {
5877                let start = i;
5878                while i < bytes.len() && bytes[i].is_ascii_digit() {
5879                    i += 1;
5880                }
5881                if start == i {
5882                    return Err(EvalError::TypeMismatch {
5883                        detail: "tsvector literal: expected digit after ':'".into(),
5884                    });
5885                }
5886                let num: u16 = core::str::from_utf8(&bytes[start..i])
5887                    .expect("ascii digits")
5888                    .parse()
5889                    .map_err(|_| EvalError::TypeMismatch {
5890                        detail: alloc::format!(
5891                            "tsvector literal: position {} overflows u16",
5892                            core::str::from_utf8(&bytes[start..i]).unwrap_or("?")
5893                        ),
5894                    })?;
5895                positions.push(num);
5896                if i < bytes.len() {
5897                    let w = bytes[i];
5898                    if matches!(w, b'A' | b'B' | b'C' | b'D') {
5899                        weight = match w {
5900                            b'A' => 3,
5901                            b'B' => 2,
5902                            b'C' => 1,
5903                            _ => 0,
5904                        };
5905                        i += 1;
5906                    }
5907                }
5908                if i < bytes.len() && bytes[i] == b',' {
5909                    i += 1;
5910                    continue;
5911                }
5912                break;
5913            }
5914        }
5915        positions.sort_unstable();
5916        positions.dedup();
5917        // Merge into the output vector — sorted insert by word,
5918        // duplicate words merge positions.
5919        match out.binary_search_by(|l| l.word.as_str().cmp(word.as_str())) {
5920            Ok(idx) => {
5921                for p in positions {
5922                    if !out[idx].positions.contains(&p) {
5923                        out[idx].positions.push(p);
5924                    }
5925                }
5926                out[idx].positions.sort_unstable();
5927                if weight != 0 {
5928                    out[idx].weight = weight;
5929                }
5930            }
5931            Err(idx) => {
5932                out.insert(
5933                    idx,
5934                    TsLexeme {
5935                        word,
5936                        positions,
5937                        weight,
5938                    },
5939                );
5940            }
5941        }
5942    }
5943    Ok(out)
5944}
5945
5946/// v7.12.0 — decode PG external form `'foo' & 'bar' | !'baz'`
5947/// into a `TsQueryAst`. v7.12.0 supports the canonical
5948/// `to_tsquery` surface: single-quoted lexemes, `&` / `|` / `!`,
5949/// parens, and phrase `<N>`. Bare lexemes are accepted too. Full
5950/// `plainto_tsquery` / `websearch_to_tsquery` arrive in v7.12.1.
5951pub fn decode_tsquery_external(s: &str) -> Result<TsQueryAst, EvalError> {
5952    let mut p = TsQueryParser {
5953        bytes: s.as_bytes(),
5954        pos: 0,
5955    };
5956    p.skip_ws();
5957    if p.pos >= p.bytes.len() {
5958        return Err(EvalError::TypeMismatch {
5959            detail: "tsquery literal: empty".into(),
5960        });
5961    }
5962    let ast = p.parse_or()?;
5963    p.skip_ws();
5964    if p.pos < p.bytes.len() {
5965        return Err(EvalError::TypeMismatch {
5966            detail: alloc::format!("tsquery literal: trailing garbage at offset {}", p.pos),
5967        });
5968    }
5969    Ok(ast)
5970}
5971
5972struct TsQueryParser<'a> {
5973    bytes: &'a [u8],
5974    pos: usize,
5975}
5976
5977impl<'a> TsQueryParser<'a> {
5978    fn skip_ws(&mut self) {
5979        while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_whitespace() {
5980            self.pos += 1;
5981        }
5982    }
5983    fn peek(&self) -> Option<u8> {
5984        self.bytes.get(self.pos).copied()
5985    }
5986    fn parse_or(&mut self) -> Result<TsQueryAst, EvalError> {
5987        let mut lhs = self.parse_and()?;
5988        loop {
5989            self.skip_ws();
5990            if self.peek() != Some(b'|') {
5991                return Ok(lhs);
5992            }
5993            self.pos += 1;
5994            let rhs = self.parse_and()?;
5995            lhs = TsQueryAst::Or(Box::new(lhs), Box::new(rhs));
5996        }
5997    }
5998    fn parse_and(&mut self) -> Result<TsQueryAst, EvalError> {
5999        let mut lhs = self.parse_unary()?;
6000        loop {
6001            self.skip_ws();
6002            match self.peek() {
6003                Some(b'&') => {
6004                    self.pos += 1;
6005                    let rhs = self.parse_unary()?;
6006                    lhs = TsQueryAst::And(Box::new(lhs), Box::new(rhs));
6007                }
6008                Some(b'<') => {
6009                    // Phrase distance `<N>`.
6010                    self.pos += 1;
6011                    let start = self.pos;
6012                    while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
6013                        self.pos += 1;
6014                    }
6015                    if start == self.pos || self.peek() != Some(b'>') {
6016                        return Err(EvalError::TypeMismatch {
6017                            detail: "tsquery literal: malformed <N> phrase operator".into(),
6018                        });
6019                    }
6020                    let n: u16 = core::str::from_utf8(&self.bytes[start..self.pos])
6021                        .expect("ascii digits")
6022                        .parse()
6023                        .map_err(|_| EvalError::TypeMismatch {
6024                            detail: "tsquery literal: phrase distance overflows u16".into(),
6025                        })?;
6026                    self.pos += 1; // consume '>'
6027                    let rhs = self.parse_unary()?;
6028                    lhs = TsQueryAst::Phrase {
6029                        left: Box::new(lhs),
6030                        right: Box::new(rhs),
6031                        distance: n,
6032                    };
6033                }
6034                _ => return Ok(lhs),
6035            }
6036        }
6037    }
6038    fn parse_unary(&mut self) -> Result<TsQueryAst, EvalError> {
6039        self.skip_ws();
6040        if self.peek() == Some(b'!') {
6041            self.pos += 1;
6042            let inner = self.parse_unary()?;
6043            return Ok(TsQueryAst::Not(Box::new(inner)));
6044        }
6045        self.parse_atom()
6046    }
6047    fn parse_atom(&mut self) -> Result<TsQueryAst, EvalError> {
6048        self.skip_ws();
6049        match self.peek() {
6050            Some(b'(') => {
6051                self.pos += 1;
6052                let inner = self.parse_or()?;
6053                self.skip_ws();
6054                if self.peek() != Some(b')') {
6055                    return Err(EvalError::TypeMismatch {
6056                        detail: "tsquery literal: missing ')'".into(),
6057                    });
6058                }
6059                self.pos += 1;
6060                Ok(inner)
6061            }
6062            Some(b'\'') => {
6063                self.pos += 1;
6064                let mut w = String::new();
6065                loop {
6066                    match self.peek() {
6067                        None => {
6068                            return Err(EvalError::TypeMismatch {
6069                                detail: "tsquery literal: unterminated quoted lexeme".into(),
6070                            });
6071                        }
6072                        Some(b'\'') => {
6073                            if self.bytes.get(self.pos + 1) == Some(&b'\'') {
6074                                w.push('\'');
6075                                self.pos += 2;
6076                            } else {
6077                                self.pos += 1;
6078                                break;
6079                            }
6080                        }
6081                        Some(b) => {
6082                            w.push(b as char);
6083                            self.pos += 1;
6084                        }
6085                    }
6086                }
6087                // Optional `:WEIGHT_MASK` (digit-mask) — v7.12.0
6088                // accepts but always stores 0 (any).
6089                self.skip_weight_suffix();
6090                Ok(TsQueryAst::Term {
6091                    word: w,
6092                    weight_mask: 0,
6093                })
6094            }
6095            Some(b) if b.is_ascii_alphanumeric() || b == b'_' => {
6096                let start = self.pos;
6097                while self.pos < self.bytes.len() {
6098                    let c = self.bytes[self.pos];
6099                    if c.is_ascii_alphanumeric() || c == b'_' {
6100                        self.pos += 1;
6101                    } else {
6102                        break;
6103                    }
6104                }
6105                let w = core::str::from_utf8(&self.bytes[start..self.pos])
6106                    .map_err(|_| EvalError::TypeMismatch {
6107                        detail: "tsquery literal: non-UTF-8 lexeme".into(),
6108                    })?
6109                    .to_string();
6110                self.skip_weight_suffix();
6111                Ok(TsQueryAst::Term {
6112                    word: w,
6113                    weight_mask: 0,
6114                })
6115            }
6116            Some(b) => Err(EvalError::TypeMismatch {
6117                detail: alloc::format!(
6118                    "tsquery literal: unexpected byte {:?} at offset {}",
6119                    b as char,
6120                    self.pos
6121                ),
6122            }),
6123            None => Err(EvalError::TypeMismatch {
6124                detail: "tsquery literal: expected term".into(),
6125            }),
6126        }
6127    }
6128    fn skip_weight_suffix(&mut self) {
6129        if self.peek() != Some(b':') {
6130            return;
6131        }
6132        self.pos += 1;
6133        while let Some(b) = self.peek() {
6134            if matches!(
6135                b,
6136                b'A' | b'B' | b'C' | b'D' | b'a' | b'b' | b'c' | b'd' | b'*'
6137            ) || b.is_ascii_digit()
6138            {
6139                self.pos += 1;
6140            } else {
6141                break;
6142            }
6143        }
6144    }
6145}
6146
6147/// v7.10.4 — render a BYTEA payload in PG's hex output format
6148/// (`\x` prefix, lowercase hex pairs). Public so the wire layer
6149/// can emit the canonical bytea-as-text representation.
6150pub fn format_bytea_hex(b: &[u8]) -> String {
6151    let mut out = String::with_capacity(2 + 2 * b.len());
6152    out.push_str("\\x");
6153    const HEX: &[u8; 16] = b"0123456789abcdef";
6154    for byte in b {
6155        out.push(HEX[(byte >> 4) as usize] as char);
6156        out.push(HEX[(byte & 0x0F) as usize] as char);
6157    }
6158    out
6159}
6160
6161/// Render a `Numeric { scaled, scale }` as its decimal text form.
6162/// Negative `scaled` prepends `-` to the absolute value's digits; the
6163/// integer / fractional split is by character count, padding the
6164/// fractional side with leading zeros to exactly `scale` chars.
6165pub fn format_numeric(scaled: i128, scale: u8) -> String {
6166    if scale == 0 {
6167        return format!("{scaled}");
6168    }
6169    let negative = scaled < 0;
6170    let mag_str = scaled.unsigned_abs().to_string();
6171    let mag_bytes = mag_str.as_bytes();
6172    let scale_u = scale as usize;
6173    let mut out = String::with_capacity(mag_str.len() + 3);
6174    if negative {
6175        out.push('-');
6176    }
6177    if mag_bytes.len() <= scale_u {
6178        out.push('0');
6179        out.push('.');
6180        for _ in mag_bytes.len()..scale_u {
6181            out.push('0');
6182        }
6183        out.push_str(&mag_str);
6184    } else {
6185        let split = mag_bytes.len() - scale_u;
6186        out.push_str(&mag_str[..split]);
6187        out.push('.');
6188        out.push_str(&mag_str[split..]);
6189    }
6190    out
6191}
6192
6193fn cast_numeric_to_int(v: Value) -> Result<Value, EvalError> {
6194    match v {
6195        Value::Int(n) => Ok(Value::Int(n)),
6196        Value::BigInt(n) => i32::try_from(n)
6197            .map(Value::Int)
6198            .map_err(|_| EvalError::TypeMismatch {
6199                detail: format!("bigint {n} does not fit in int"),
6200            }),
6201        #[allow(clippy::cast_possible_truncation)]
6202        Value::Float(x) => Ok(Value::Int(x as i32)),
6203        Value::Text(s) => {
6204            s.trim()
6205                .parse::<i32>()
6206                .map(Value::Int)
6207                .map_err(|_| EvalError::TypeMismatch {
6208                    detail: format!("cannot parse {s:?} as int"),
6209                })
6210        }
6211        Value::Bool(b) => Ok(Value::Int(i32::from(b))),
6212        other => Err(EvalError::TypeMismatch {
6213            detail: format!("cannot cast {:?} to int", other.data_type()),
6214        }),
6215    }
6216}
6217
6218fn cast_numeric_to_bigint(v: Value) -> Result<Value, EvalError> {
6219    match v {
6220        Value::Int(n) => Ok(Value::BigInt(i64::from(n))),
6221        Value::BigInt(n) => Ok(Value::BigInt(n)),
6222        #[allow(clippy::cast_possible_truncation)]
6223        Value::Float(x) => Ok(Value::BigInt(x as i64)),
6224        Value::Text(s) => {
6225            s.trim()
6226                .parse::<i64>()
6227                .map(Value::BigInt)
6228                .map_err(|_| EvalError::TypeMismatch {
6229                    detail: format!("cannot parse {s:?} as bigint"),
6230                })
6231        }
6232        Value::Bool(b) => Ok(Value::BigInt(i64::from(b))),
6233        other => Err(EvalError::TypeMismatch {
6234            detail: format!("cannot cast {:?} to bigint", other.data_type()),
6235        }),
6236    }
6237}
6238
6239fn cast_numeric_to_float(v: Value) -> Result<Value, EvalError> {
6240    match v {
6241        Value::Int(n) => Ok(Value::Float(f64::from(n))),
6242        #[allow(clippy::cast_precision_loss)]
6243        Value::BigInt(n) => Ok(Value::Float(n as f64)),
6244        Value::Float(x) => Ok(Value::Float(x)),
6245        Value::Text(s) => {
6246            s.trim()
6247                .parse::<f64>()
6248                .map(Value::Float)
6249                .map_err(|_| EvalError::TypeMismatch {
6250                    detail: format!("cannot parse {s:?} as float"),
6251                })
6252        }
6253        other => Err(EvalError::TypeMismatch {
6254            detail: format!("cannot cast {:?} to float", other.data_type()),
6255        }),
6256    }
6257}
6258
6259fn cast_to_bool(v: Value) -> Result<Value, EvalError> {
6260    match v {
6261        Value::Bool(b) => Ok(Value::Bool(b)),
6262        Value::Int(n) => Ok(Value::Bool(n != 0)),
6263        Value::BigInt(n) => Ok(Value::Bool(n != 0)),
6264        Value::Text(s) => {
6265            let lo = s.trim().to_ascii_lowercase();
6266            match lo.as_str() {
6267                "true" | "t" | "yes" | "y" | "1" | "on" => Ok(Value::Bool(true)),
6268                "false" | "f" | "no" | "n" | "0" | "off" => Ok(Value::Bool(false)),
6269                _ => Err(EvalError::TypeMismatch {
6270                    detail: format!("cannot parse {s:?} as bool"),
6271                }),
6272            }
6273        }
6274        other => Err(EvalError::TypeMismatch {
6275            detail: format!("cannot cast {:?} to bool", other.data_type()),
6276        }),
6277    }
6278}
6279
6280/// Parse a `Value::Text("[1.0, 2.0, 3.0]")` into a `Value::Vector(..)`. Mirrors
6281/// pgvector's `'[..]'::vector` cast. NULL casts as NULL.
6282pub fn cast_to_vector(v: Value) -> Result<Value, EvalError> {
6283    match v {
6284        Value::Null => Ok(Value::Null),
6285        Value::Vector(v) => Ok(Value::Vector(v)),
6286        Value::Text(s) => parse_vector_text(&s)
6287            .map(Value::Vector)
6288            .ok_or(EvalError::TypeMismatch {
6289                detail: format!("cannot parse {s:?} as a vector literal"),
6290            }),
6291        other => Err(EvalError::TypeMismatch {
6292            detail: format!("::vector requires text input, got {:?}", other.data_type()),
6293        }),
6294    }
6295}
6296
6297/// Parse `"[1.0, 2.0, -3]"` into `Vec<f32>`. Returns `None` on malformed input.
6298pub fn parse_vector_text(s: &str) -> Option<Vec<f32>> {
6299    let trimmed = s.trim();
6300    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
6301    let trimmed_inner = inner.trim();
6302    if trimmed_inner.is_empty() {
6303        return Some(Vec::new());
6304    }
6305    let mut out = Vec::new();
6306    for part in trimmed_inner.split(',') {
6307        let f: f32 = part.trim().parse().ok()?;
6308        out.push(f);
6309    }
6310    Some(out)
6311}
6312
6313fn literal_to_value(l: &Literal) -> Value {
6314    match l {
6315        Literal::Integer(n) => {
6316            if let Ok(small) = i32::try_from(*n) {
6317                Value::Int(small)
6318            } else {
6319                Value::BigInt(*n)
6320            }
6321        }
6322        Literal::Float(x) => Value::Float(*x),
6323        Literal::String(s) => Value::Text(s.clone()),
6324        Literal::Vector(v) => Value::Vector(v.clone()),
6325        Literal::TextArray(items) => Value::TextArray(items.clone()),
6326        Literal::IntArray(items) => Value::IntArray(items.clone()),
6327        Literal::BigIntArray(items) => Value::BigIntArray(items.clone()),
6328        Literal::Bool(b) => Value::Bool(*b),
6329        Literal::Null => Value::Null,
6330        Literal::Interval { months, micros, .. } => Value::Interval {
6331            months: *months,
6332            micros: *micros,
6333        },
6334    }
6335}
6336
6337/// v7.17.0 Phase 2.5 — look up the collation of a column reference
6338/// in the current evaluation context. Returns `None` when the
6339/// expression is not a column reference (e.g. literal / function
6340/// call) or the column can't be resolved (caller falls back to
6341/// `Collation::Binary` semantics).
6342pub(crate) fn column_collation(e: &Expr, ctx: &EvalContext<'_>) -> Option<spg_storage::Collation> {
6343    let Expr::Column(c) = e else {
6344        return None;
6345    };
6346    if let Some(q) = &c.qualifier {
6347        let composite = alloc::format!("{q}.{name}", name = c.name);
6348        if let Some(s) = ctx.columns.iter().find(|s| s.name == composite) {
6349            return Some(s.collation);
6350        }
6351    }
6352    if let Some(s) = ctx.columns.iter().find(|s| s.name == c.name) {
6353        return Some(s.collation);
6354    }
6355    // Bare-name fallback for joined schemas (same shape as
6356    // resolve_column): match a single composite ending in
6357    // ".<name>".
6358    let suffix = alloc::format!(".{name}", name = c.name);
6359    let mut matches = ctx.columns.iter().filter(|s| s.name.ends_with(&suffix));
6360    let first = matches.next();
6361    let extra = matches.next();
6362    match (first, extra) {
6363        (Some(s), None) => Some(s.collation),
6364        _ => None,
6365    }
6366}
6367
6368/// v7.17.0 Phase 2.5 — if the comparison op is text-equality and
6369/// either operand references a CaseInsensitive column, return
6370/// ASCII-folded copies of both Text values; otherwise pass
6371/// through. Only Eq / NotEq / Lt / LtEq / Gt / GtEq trigger the
6372/// fold — relational operators on text still honour collation
6373/// the same way (PG semantics). Non-Text values pass through.
6374fn collation_fold_for_compare(
6375    op: BinOp,
6376    lhs: &Expr,
6377    rhs: &Expr,
6378    l: Value,
6379    r: Value,
6380    ctx: &EvalContext<'_>,
6381) -> (Value, Value) {
6382    if !matches!(
6383        op,
6384        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
6385    ) {
6386        return (l, r);
6387    }
6388    let lhs_col = column_collation(lhs, ctx);
6389    let rhs_col = column_collation(rhs, ctx);
6390    let ci = matches!(lhs_col, Some(spg_storage::Collation::CaseInsensitive))
6391        || matches!(rhs_col, Some(spg_storage::Collation::CaseInsensitive));
6392    if !ci {
6393        return (l, r);
6394    }
6395    let fold = |v: Value| match v {
6396        Value::Text(s) => Value::Text(s.to_ascii_lowercase()),
6397        other => other,
6398    };
6399    (fold(l), fold(r))
6400}
6401
6402fn resolve_column(c: &ColumnName, row: &Row, ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
6403    if let Some(q) = &c.qualifier {
6404        // Multi-table evaluation (joins): the synthesised schema uses
6405        // composite column names "alias.column" so we look that up
6406        // directly. Falls back to the single-table case below if the
6407        // composite isn't present.
6408        let composite = alloc::format!("{q}.{name}", name = c.name);
6409        if let Some(pos) = ctx.columns.iter().position(|s| s.name == composite) {
6410            return Ok(row.values[pos].clone());
6411        }
6412        let expected = ctx.table_alias.ok_or_else(|| EvalError::UnknownQualifier {
6413            qualifier: q.clone(),
6414        })?;
6415        if q != expected {
6416            return Err(EvalError::UnknownQualifier {
6417                qualifier: q.clone(),
6418            });
6419        }
6420    }
6421    if let Some(pos) = ctx.columns.iter().position(|s| s.name == c.name) {
6422        return Ok(row.values[pos].clone());
6423    }
6424    // Bare-name fallback for joined schemas: match any single composite
6425    // column ending in ".<name>"; ambiguity is an error.
6426    let suffix = alloc::format!(".{name}", name = c.name);
6427    let mut matches = ctx
6428        .columns
6429        .iter()
6430        .enumerate()
6431        .filter(|(_, s)| s.name.ends_with(&suffix));
6432    let first = matches.next();
6433    let extra = matches.next();
6434    match (first, extra) {
6435        (Some((pos, _)), None) => Ok(row.values[pos].clone()),
6436        (Some(_), Some(_)) => Err(EvalError::TypeMismatch {
6437            detail: alloc::format!("ambiguous column reference: {}", c.name),
6438        }),
6439        _ => Err(EvalError::ColumnNotFound {
6440            name: c.name.clone(),
6441        }),
6442    }
6443}
6444
6445fn apply_unary(op: UnOp, v: Value) -> Result<Value, EvalError> {
6446    match (op, v) {
6447        (_, Value::Null) => Ok(Value::Null),
6448        (UnOp::Neg, Value::Int(n)) => {
6449            n.checked_neg()
6450                .map(Value::Int)
6451                .ok_or(EvalError::TypeMismatch {
6452                    detail: "integer overflow on unary -".into(),
6453                })
6454        }
6455        (UnOp::Neg, Value::BigInt(n)) => {
6456            n.checked_neg()
6457                .map(Value::BigInt)
6458                .ok_or(EvalError::TypeMismatch {
6459                    detail: "bigint overflow on unary -".into(),
6460                })
6461        }
6462        (UnOp::Neg, Value::Float(x)) => Ok(Value::Float(-x)),
6463        (UnOp::Neg, other) => Err(EvalError::TypeMismatch {
6464            detail: format!("unary - applied to {:?}", other.data_type()),
6465        }),
6466        (UnOp::BitNot, Value::SmallInt(n)) => Ok(Value::Int(!i32::from(n))),
6467        (UnOp::BitNot, Value::Int(n)) => Ok(Value::Int(!n)),
6468        (UnOp::BitNot, Value::BigInt(n)) => Ok(Value::BigInt(!n)),
6469        (UnOp::BitNot, other) => Err(EvalError::TypeMismatch {
6470            detail: format!("cannot apply ~ to {other:?}"),
6471        }),
6472        (UnOp::Not, Value::Bool(b)) => Ok(Value::Bool(!b)),
6473        (UnOp::Not, other) => Err(EvalError::TypeMismatch {
6474            detail: format!("NOT applied to {:?}", other.data_type()),
6475        }),
6476    }
6477}
6478
6479/// v7.9.27b — true when two values are "not distinct" per PG:
6480/// both NULL counts as equal; otherwise reduces to regular Eq.
6481fn values_not_distinct(l: &Value, r: &Value) -> bool {
6482    match (l, r) {
6483        (Value::Null, Value::Null) => true,
6484        (Value::Null, _) | (_, Value::Null) => false,
6485        _ => l == r,
6486    }
6487}
6488
6489fn apply_binary(op: BinOp, l: Value, r: Value) -> Result<Value, EvalError> {
6490    // SQL three-valued logic for AND / OR with NULL is special — handle before
6491    // the general NULL-propagation rule.
6492    if let BinOp::And = op {
6493        return and_3vl(l, r);
6494    }
6495    if let BinOp::Or = op {
6496        return or_3vl(l, r);
6497    }
6498    // v7.9.27b — IS [NOT] DISTINCT FROM. NULL-safe equality:
6499    // `NULL IS NOT DISTINCT FROM NULL` → true. mailrs pg_dump.
6500    if let BinOp::IsNotDistinctFrom = op {
6501        return Ok(Value::Bool(values_not_distinct(&l, &r)));
6502    }
6503    if let BinOp::IsDistinctFrom = op {
6504        return Ok(Value::Bool(!values_not_distinct(&l, &r)));
6505    }
6506    // Everything else: any NULL operand → NULL.
6507    if l.is_null() || r.is_null() {
6508        return Ok(Value::Null);
6509    }
6510    // NUMERIC arithmetic and comparisons run in fixed-point; promote
6511    // integers to a common NUMERIC scale and stay in i128 throughout.
6512    if matches!(l, Value::Numeric { .. }) || matches!(r, Value::Numeric { .. }) {
6513        return apply_binary_numeric(op, l, r);
6514    }
6515    // Date / Timestamp arithmetic. PG semantics:
6516    //   * date + int      → date  (int is days)
6517    //   * int + date      → date
6518    //   * date - int      → date
6519    //   * date - date     → int   (days, signed)
6520    //   * timestamp - timestamp → bigint (microseconds, signed)
6521    // Other date/time math (`timestamp + int`, INTERVAL) lands later.
6522    if let Some(result) = apply_binary_calendar(op, &l, &r)? {
6523        return Ok(result);
6524    }
6525    match op {
6526        BinOp::Add => arith(l, r, i64::checked_add, |a, b| a + b, "+"),
6527        BinOp::Sub => arith(l, r, i64::checked_sub, |a, b| a - b, "-"),
6528        BinOp::Mul => arith(l, r, i64::checked_mul, |a, b| a * b, "*"),
6529        BinOp::Div => div_op(l, r),
6530        BinOp::L2Distance => l2_distance(l, r),
6531        BinOp::InnerProduct => inner_product(l, r),
6532        BinOp::CosineDistance => cosine_distance(l, r),
6533        BinOp::Concat => Ok(text_concat(&l, &r)),
6534        BinOp::BitOr => bitop(l, r, |a, b| a | b, "|"),
6535        BinOp::BitAnd => bitop(l, r, |a, b| a & b, "&"),
6536        BinOp::JsonGet => crate::json::path_get(&l, &r, false),
6537        BinOp::JsonGetText => crate::json::path_get(&l, &r, true),
6538        BinOp::JsonGetPath => crate::json::path_walk(&l, &r, false),
6539        BinOp::JsonGetPathText => crate::json::path_walk(&l, &r, true),
6540        BinOp::JsonContains => crate::json::contains(&l, &r),
6541        // v7.12.2 — `@@` match. NULL on either side → NULL; PG
6542        // accepts both orderings so we normalise.
6543        BinOp::TsMatch => ts_match(l, r),
6544        // v7.17.0 Phase 3.P0-47 — PG INET / CIDR containment + overlap.
6545        BinOp::InetContainedBy
6546        | BinOp::InetContainedByEq
6547        | BinOp::InetContains
6548        | BinOp::InetContainsEq
6549        | BinOp::InetOverlap => inet_op_bool_result(op, &l, &r),
6550        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6551            compare(op, &l, &r)
6552        }
6553        BinOp::And | BinOp::Or | BinOp::IsDistinctFrom | BinOp::IsNotDistinctFrom => {
6554            unreachable!("handled above")
6555        }
6556    }
6557}
6558
6559/// Calendar arithmetic. Returns `Some(value)` when the operand pair
6560/// is a date/time combo this function understands, `None` to let the
6561/// caller fall through to the regular numeric / text paths.
6562fn apply_binary_calendar(op: BinOp, l: &Value, r: &Value) -> Result<Option<Value>, EvalError> {
6563    let int_value = |v: &Value| -> Option<i64> {
6564        match v {
6565            Value::SmallInt(n) => Some(i64::from(*n)),
6566            Value::Int(n) => Some(i64::from(*n)),
6567            Value::BigInt(n) => Some(*n),
6568            _ => None,
6569        }
6570    };
6571    // Most-specific cases first — DATE-DATE / TS-TS subtraction before
6572    // DATE-integer subtraction, otherwise the latter swallows the
6573    // former with an `int_value(Date) = None` no-op fall-through.
6574    match (l, r) {
6575        (Value::Date(a), Value::Date(b)) if op == BinOp::Sub => {
6576            return Ok(Some(Value::BigInt(i64::from(*a) - i64::from(*b))));
6577        }
6578        (Value::Timestamp(a), Value::Timestamp(b)) if op == BinOp::Sub => {
6579            let delta = a.checked_sub(*b).ok_or(EvalError::TypeMismatch {
6580                detail: "TIMESTAMP - TIMESTAMP overflows i64 microseconds".into(),
6581            })?;
6582            return Ok(Some(Value::BigInt(delta)));
6583        }
6584        _ => {}
6585    }
6586    // INTERVAL arithmetic. PG: timestamp ± interval → timestamp,
6587    // date ± interval → date (if interval is pure days/months with no
6588    // sub-day component) else timestamp, interval ± interval → interval.
6589    if let Some(out) = apply_binary_interval(op, l, r)? {
6590        return Ok(Some(out));
6591    }
6592    match (l, r) {
6593        (Value::Date(d), other) if op == BinOp::Add => {
6594            if let Some(n) = int_value(other) {
6595                let days = i64::from(*d).saturating_add(n);
6596                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6597                    detail: "DATE + integer overflows DATE range".into(),
6598                })?;
6599                return Ok(Some(Value::Date(days32)));
6600            }
6601        }
6602        (other, Value::Date(d)) if op == BinOp::Add => {
6603            if let Some(n) = int_value(other) {
6604                let days = i64::from(*d).saturating_add(n);
6605                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6606                    detail: "integer + DATE overflows DATE range".into(),
6607                })?;
6608                return Ok(Some(Value::Date(days32)));
6609            }
6610        }
6611        (Value::Date(d), other) if op == BinOp::Sub => {
6612            if let Some(n) = int_value(other) {
6613                let days = i64::from(*d).saturating_sub(n);
6614                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6615                    detail: "DATE - integer overflows DATE range".into(),
6616                })?;
6617                return Ok(Some(Value::Date(days32)));
6618            }
6619        }
6620        _ => {}
6621    }
6622    Ok(None)
6623}
6624
6625/// INTERVAL-aware binary ops. Recognises:
6626///   timestamp ± interval → timestamp
6627///   date ± interval      → date (if interval is integral days/months only)
6628///                       → timestamp (if interval has sub-day micros)
6629///   interval ± interval  → interval
6630/// Commutative for `+`. Returns `None` for unrecognised operand pairs so
6631/// the caller can fall through.
6632pub(crate) fn apply_binary_interval(
6633    op: BinOp,
6634    l: &Value,
6635    r: &Value,
6636) -> Result<Option<Value>, EvalError> {
6637    // Normalise so the interval (if any) is always on the right for Add;
6638    // Sub stays left-handed because it isn't commutative.
6639    let (lhs, rhs, sign): (&Value, &Value, i64) = match (l, r, op) {
6640        (Value::Interval { .. }, _, BinOp::Add) => (r, l, 1),
6641        (_, Value::Interval { .. }, BinOp::Add) => (l, r, 1),
6642        (_, Value::Interval { .. }, BinOp::Sub) => (l, r, -1),
6643        _ => return Ok(None),
6644    };
6645    let Value::Interval {
6646        months: rhs_months,
6647        micros: rhs_us,
6648    } = rhs
6649    else {
6650        unreachable!("rhs guaranteed to be Interval by the match above");
6651    };
6652    let signed_months = i64::from(*rhs_months) * sign;
6653    let signed_micros = rhs_us.checked_mul(sign).ok_or(EvalError::TypeMismatch {
6654        detail: "INTERVAL micros overflows on negation".into(),
6655    })?;
6656    match lhs {
6657        Value::Timestamp(t) => Ok(Some(Value::Timestamp(add_interval_to_micros(
6658            *t,
6659            signed_months,
6660            signed_micros,
6661        )?))),
6662        Value::Date(d) => {
6663            // Date + interval stays a date when the interval has zero
6664            // sub-day microseconds; otherwise promote to TIMESTAMP at
6665            // midnight of the (months-shifted) date first.
6666            let day_aligned = signed_micros.rem_euclid(86_400_000_000) == 0;
6667            if day_aligned {
6668                let micros_per_day = 86_400_000_000_i64;
6669                let days_delta = signed_micros / micros_per_day;
6670                let shifted = shift_date_by_months(*d, signed_months)?;
6671                let new_days =
6672                    i64::from(shifted)
6673                        .checked_add(days_delta)
6674                        .ok_or(EvalError::TypeMismatch {
6675                            detail: "DATE ± INTERVAL overflows DATE range".into(),
6676                        })?;
6677                let days32 = i32::try_from(new_days).map_err(|_| EvalError::TypeMismatch {
6678                    detail: "DATE ± INTERVAL overflows DATE range".into(),
6679                })?;
6680                Ok(Some(Value::Date(days32)))
6681            } else {
6682                let base =
6683                    i64::from(*d)
6684                        .checked_mul(86_400_000_000)
6685                        .ok_or(EvalError::TypeMismatch {
6686                            detail: "DATE → TIMESTAMP lift overflows for INTERVAL math".into(),
6687                        })?;
6688                Ok(Some(Value::Timestamp(add_interval_to_micros(
6689                    base,
6690                    signed_months,
6691                    signed_micros,
6692                )?)))
6693            }
6694        }
6695        Value::Interval {
6696            months: lhs_months,
6697            micros: lhs_us,
6698        } => {
6699            let new_months = i64::from(*lhs_months)
6700                .checked_add(signed_months)
6701                .and_then(|n| i32::try_from(n).ok())
6702                .ok_or(EvalError::TypeMismatch {
6703                    detail: "INTERVAL ± INTERVAL months overflows i32".into(),
6704                })?;
6705            let new_micros = lhs_us
6706                .checked_add(signed_micros)
6707                .ok_or(EvalError::TypeMismatch {
6708                    detail: "INTERVAL ± INTERVAL micros overflows i64".into(),
6709                })?;
6710            Ok(Some(Value::Interval {
6711                months: new_months,
6712                micros: new_micros,
6713            }))
6714        }
6715        _ => Err(EvalError::TypeMismatch {
6716            detail: format!(
6717                "operator {op:?} not defined for {:?} and INTERVAL",
6718                lhs.data_type()
6719            ),
6720        }),
6721    }
6722}
6723
6724/// Shift a `Date` by a signed number of months using the PG clamp rule.
6725fn shift_date_by_months(d: i32, months: i64) -> Result<i32, EvalError> {
6726    let (y, m, day) = civil_from_days(d);
6727    let months_i32 = i32::try_from(months).map_err(|_| EvalError::TypeMismatch {
6728        detail: "INTERVAL months delta out of i32 range".into(),
6729    })?;
6730    let (ny, nm, nd) = add_months_to_civil(y, m, day, months_i32);
6731    Ok(days_from_civil(ny, nm, nd))
6732}
6733
6734/// Add (months, micros) to a `Timestamp` (microseconds since epoch).
6735/// Months part is applied through civil calendar with clamp-to-last-day;
6736/// micros part is plain i64 addition with overflow guard.
6737fn add_interval_to_micros(t: i64, months: i64, micros: i64) -> Result<i64, EvalError> {
6738    let mut out = t;
6739    if months != 0 {
6740        const MICROS_PER_DAY: i64 = 86_400_000_000;
6741        let days = out.div_euclid(MICROS_PER_DAY);
6742        let day_micros = out.rem_euclid(MICROS_PER_DAY);
6743        let day_i32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6744            detail: "TIMESTAMP day component out of i32 range for INTERVAL months math".into(),
6745        })?;
6746        let shifted_days = shift_date_by_months(day_i32, months)?;
6747        out = i64::from(shifted_days)
6748            .checked_mul(MICROS_PER_DAY)
6749            .and_then(|n| n.checked_add(day_micros))
6750            .ok_or(EvalError::TypeMismatch {
6751                detail: "TIMESTAMP ± INTERVAL months overflows i64 microseconds".into(),
6752            })?;
6753    }
6754    out.checked_add(micros).ok_or(EvalError::TypeMismatch {
6755        detail: "TIMESTAMP ± INTERVAL micros overflows i64".into(),
6756    })
6757}
6758
6759/// Dispatch for any binary op when at least one operand is NUMERIC.
6760/// Other-side integers / floats are promoted to a NUMERIC at a common
6761/// scale; all add / sub / mul / div / compare paths stay in i128.
6762#[allow(clippy::needless_pass_by_value)] // mirrors `apply_binary`'s by-value calling convention
6763fn apply_binary_numeric(op: BinOp, l: Value, r: Value) -> Result<Value, EvalError> {
6764    // Float still wins — Numeric + Float coerces both to f64 and runs
6765    // through the float path. PG demotes Numeric to float in this mix
6766    // too (the documented behaviour for `numeric + double precision`).
6767    let float_path = matches!(l, Value::Float(_)) || matches!(r, Value::Float(_));
6768    if float_path {
6769        let af = as_f64(&l)?;
6770        let bf = as_f64(&r)?;
6771        return match op {
6772            BinOp::Add => Ok(Value::Float(af + bf)),
6773            BinOp::Sub => Ok(Value::Float(af - bf)),
6774            BinOp::Mul => Ok(Value::Float(af * bf)),
6775            BinOp::Div => {
6776                if bf == 0.0 {
6777                    Err(EvalError::DivisionByZero)
6778                } else {
6779                    Ok(Value::Float(af / bf))
6780                }
6781            }
6782            BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6783                let ord = af.partial_cmp(&bf).ok_or(EvalError::TypeMismatch {
6784                    detail: "NaN in NUMERIC/Float comparison".into(),
6785                })?;
6786                Ok(Value::Bool(cmp_to_bool(op, ord)))
6787            }
6788            BinOp::Concat => Ok(text_concat(&l, &r)),
6789            other => Err(EvalError::TypeMismatch {
6790                detail: format!("operator {other:?} not defined for NUMERIC and Float"),
6791            }),
6792        };
6793    }
6794    // Promote integer ↔ numeric to a shared scale (max of both sides).
6795    let (a, sa) = numeric_or_widen(&l).ok_or_else(|| EvalError::TypeMismatch {
6796        detail: format!("NUMERIC op against non-numeric {:?}", l.data_type()),
6797    })?;
6798    let (b, sb) = numeric_or_widen(&r).ok_or_else(|| EvalError::TypeMismatch {
6799        detail: format!("NUMERIC op against non-numeric {:?}", r.data_type()),
6800    })?;
6801    match op {
6802        BinOp::Add | BinOp::Sub => {
6803            let target_scale = sa.max(sb);
6804            let lhs = rescale(a, sa, target_scale).ok_or(EvalError::TypeMismatch {
6805                detail: "NUMERIC overflow on rescale".into(),
6806            })?;
6807            let rhs = rescale(b, sb, target_scale).ok_or(EvalError::TypeMismatch {
6808                detail: "NUMERIC overflow on rescale".into(),
6809            })?;
6810            let r = match op {
6811                BinOp::Add => lhs.checked_add(rhs),
6812                BinOp::Sub => lhs.checked_sub(rhs),
6813                _ => unreachable!(),
6814            }
6815            .ok_or(EvalError::TypeMismatch {
6816                detail: "NUMERIC overflow on +/-".into(),
6817            })?;
6818            Ok(Value::Numeric {
6819                scaled: r,
6820                scale: target_scale,
6821            })
6822        }
6823        BinOp::Mul => {
6824            let scaled = a.checked_mul(b).ok_or(EvalError::TypeMismatch {
6825                detail: "NUMERIC overflow on *".into(),
6826            })?;
6827            Ok(Value::Numeric {
6828                scaled,
6829                scale: sa.saturating_add(sb),
6830            })
6831        }
6832        BinOp::Div => {
6833            if b == 0 {
6834                return Err(EvalError::DivisionByZero);
6835            }
6836            // Result scale: keep the wider operand's scale. Pre-scale
6837            // the numerator so the integer division retains that many
6838            // fractional digits. Round half-away-from-zero.
6839            let target_scale = sa.max(sb);
6840            // Numerator effective scale becomes sa + target_scale; we
6841            // bring it up to (target_scale + sb) so the divisor's scale
6842            // cancels cleanly.
6843            let bump = pow10_i128(target_scale.saturating_add(sb).saturating_sub(sa));
6844            let num = a.checked_mul(bump).ok_or(EvalError::TypeMismatch {
6845                detail: "NUMERIC overflow on / scaling".into(),
6846            })?;
6847            let half = if b >= 0 { b / 2 } else { -(b / 2) };
6848            let adj = if (num >= 0) == (b >= 0) {
6849                num + half
6850            } else {
6851                num - half
6852            };
6853            Ok(Value::Numeric {
6854                scaled: adj / b,
6855                scale: target_scale,
6856            })
6857        }
6858        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6859            let target_scale = sa.max(sb);
6860            let lhs = rescale(a, sa, target_scale).ok_or(EvalError::TypeMismatch {
6861                detail: "NUMERIC overflow on rescale".into(),
6862            })?;
6863            let rhs = rescale(b, sb, target_scale).ok_or(EvalError::TypeMismatch {
6864                detail: "NUMERIC overflow on rescale".into(),
6865            })?;
6866            Ok(Value::Bool(cmp_to_bool(op, lhs.cmp(&rhs))))
6867        }
6868        BinOp::Concat => Ok(text_concat(&l, &r)),
6869        other => Err(EvalError::TypeMismatch {
6870            detail: format!("operator {other:?} not defined for NUMERIC"),
6871        }),
6872    }
6873}
6874
6875/// Express `v` as a `(scaled_i128, scale)` pair. Plain integers come
6876/// back with `scale=0`; NUMERIC keeps its own scale. Anything else
6877/// returns `None` and the caller raises a type error.
6878fn numeric_or_widen(v: &Value) -> Option<(i128, u8)> {
6879    match v {
6880        Value::Numeric { scaled, scale } => Some((*scaled, *scale)),
6881        Value::Int(n) => Some((i128::from(*n), 0)),
6882        Value::SmallInt(n) => Some((i128::from(*n), 0)),
6883        Value::BigInt(n) => Some((i128::from(*n), 0)),
6884        _ => None,
6885    }
6886}
6887
6888fn rescale(scaled: i128, src: u8, dst: u8) -> Option<i128> {
6889    if src == dst {
6890        return Some(scaled);
6891    }
6892    if dst > src {
6893        scaled.checked_mul(pow10_i128(dst - src))
6894    } else {
6895        let drop = pow10_i128(src - dst);
6896        let half = drop / 2;
6897        let r = if scaled >= 0 {
6898            scaled + half
6899        } else {
6900            scaled - half
6901        };
6902        Some(r / drop)
6903    }
6904}
6905
6906const fn pow10_i128(p: u8) -> i128 {
6907    let mut acc: i128 = 1;
6908    let mut i = 0;
6909    while i < p {
6910        acc *= 10;
6911        i += 1;
6912    }
6913    acc
6914}
6915
6916const fn cmp_to_bool(op: BinOp, ord: core::cmp::Ordering) -> bool {
6917    use core::cmp::Ordering::{Equal, Greater, Less};
6918    match op {
6919        BinOp::Eq => matches!(ord, Equal),
6920        BinOp::NotEq => !matches!(ord, Equal),
6921        BinOp::Lt => matches!(ord, Less),
6922        BinOp::LtEq => matches!(ord, Less | Equal),
6923        BinOp::Gt => matches!(ord, Greater),
6924        BinOp::GtEq => matches!(ord, Greater | Equal),
6925        _ => false,
6926    }
6927}
6928
6929/// SQL `||` string concatenation. Operands are coerced to text via the same
6930/// rule as `::text` cast. NULL propagates (handled above; this function only
6931/// runs with non-NULL operands).
6932/// v7.24 (round-16 C) — `tsvector || tsvector`. PG semantics: the
6933/// right side's positions shift by the left side's max position;
6934/// lexemes present on both sides merge (positions concatenated,
6935/// the higher weight wins — SPG models weight per lexeme, PG per
6936/// position, so the stronger label is the faithful collapse).
6937fn tsvector_concat(l: &[spg_storage::TsLexeme], r: &[spg_storage::TsLexeme]) -> Value {
6938    let shift = l
6939        .iter()
6940        .flat_map(|x| x.positions.iter().copied())
6941        .max()
6942        .unwrap_or(0);
6943    let mut out: Vec<spg_storage::TsLexeme> = l.to_vec();
6944    for lex in r {
6945        let shifted: Vec<u16> = lex
6946            .positions
6947            .iter()
6948            .map(|p| p.saturating_add(shift))
6949            .collect();
6950        if let Some(existing) = out.iter_mut().find(|x| x.word == lex.word) {
6951            existing.positions.extend(shifted);
6952            existing.positions.sort_unstable();
6953            existing.weight = existing.weight.max(lex.weight);
6954        } else {
6955            out.push(spg_storage::TsLexeme {
6956                word: lex.word.clone(),
6957                positions: shifted,
6958                weight: lex.weight,
6959            });
6960        }
6961    }
6962    out.sort_by(|a, b| a.word.cmp(&b.word));
6963    Value::TsVector(out)
6964}
6965
6966fn text_concat(l: &Value, r: &Value) -> Value {
6967    if let (Value::TsVector(a), Value::TsVector(b)) = (l, r) {
6968        return tsvector_concat(a, b);
6969    }
6970    // v7.11.8 — PG `||` overloads: TEXT[] || TEXT[] = concatenated array;
6971    // TEXT[] || TEXT (or TEXT || TEXT[]) prepends/appends the single
6972    // element. NULL || anything = NULL (PG semantics for arrays;
6973    // text concat treats NULL the same way after value_to_text).
6974    match (l, r) {
6975        (Value::Null, _) | (_, Value::Null) => {
6976            // PG text concat: NULL || x = NULL. Array concat: NULL || x = NULL.
6977            // Keep the legacy text path (value_to_text handles Null as ""),
6978            // but for arrays we surface real NULL to match PG.
6979            if matches!(
6980                l,
6981                Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_) | Value::Bytes(_)
6982            ) || matches!(
6983                r,
6984                Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_) | Value::Bytes(_)
6985            ) {
6986                return Value::Null;
6987            }
6988        }
6989        (Value::TextArray(a), Value::TextArray(b)) => {
6990            let mut out = a.clone();
6991            out.extend(b.iter().cloned());
6992            return Value::TextArray(out);
6993        }
6994        (Value::TextArray(a), Value::Text(s)) => {
6995            let mut out = a.clone();
6996            out.push(Some(s.clone()));
6997            return Value::TextArray(out);
6998        }
6999        (Value::Text(s), Value::TextArray(b)) => {
7000            let mut out: alloc::vec::Vec<Option<alloc::string::String>> =
7001                alloc::vec::Vec::with_capacity(1 + b.len());
7002            out.push(Some(s.clone()));
7003            out.extend(b.iter().cloned());
7004            return Value::TextArray(out);
7005        }
7006        // v7.11.13 — IntArray / BigIntArray `||` overloads. Same
7007        // PG semantics as TEXT[]: array||array concatenates, and
7008        // array||scalar appends/prepends. Mixed Int/BigInt widens
7009        // to BigIntArray.
7010        (Value::IntArray(a), Value::IntArray(b)) => {
7011            let mut out = a.clone();
7012            out.extend(b.iter().copied());
7013            return Value::IntArray(out);
7014        }
7015        (Value::IntArray(a), Value::Int(n)) => {
7016            let mut out = a.clone();
7017            out.push(Some(*n));
7018            return Value::IntArray(out);
7019        }
7020        (Value::IntArray(a), Value::SmallInt(n)) => {
7021            let mut out = a.clone();
7022            out.push(Some(i32::from(*n)));
7023            return Value::IntArray(out);
7024        }
7025        (Value::Int(n), Value::IntArray(b)) => {
7026            let mut out: alloc::vec::Vec<Option<i32>> = alloc::vec::Vec::with_capacity(1 + b.len());
7027            out.push(Some(*n));
7028            out.extend(b.iter().copied());
7029            return Value::IntArray(out);
7030        }
7031        (Value::SmallInt(n), Value::IntArray(b)) => {
7032            let mut out: alloc::vec::Vec<Option<i32>> = alloc::vec::Vec::with_capacity(1 + b.len());
7033            out.push(Some(i32::from(*n)));
7034            out.extend(b.iter().copied());
7035            return Value::IntArray(out);
7036        }
7037        (Value::BigIntArray(a), Value::BigIntArray(b)) => {
7038            let mut out = a.clone();
7039            out.extend(b.iter().copied());
7040            return Value::BigIntArray(out);
7041        }
7042        (Value::BigIntArray(a), Value::IntArray(b)) => {
7043            let mut out = a.clone();
7044            out.extend(b.iter().map(|o| o.map(i64::from)));
7045            return Value::BigIntArray(out);
7046        }
7047        (Value::IntArray(a), Value::BigIntArray(b)) => {
7048            let mut out: alloc::vec::Vec<Option<i64>> =
7049                a.iter().map(|o| o.map(i64::from)).collect();
7050            out.extend(b.iter().copied());
7051            return Value::BigIntArray(out);
7052        }
7053        (Value::BigIntArray(a), Value::BigInt(n)) => {
7054            let mut out = a.clone();
7055            out.push(Some(*n));
7056            return Value::BigIntArray(out);
7057        }
7058        (Value::BigIntArray(a), Value::Int(n)) => {
7059            let mut out = a.clone();
7060            out.push(Some(i64::from(*n)));
7061            return Value::BigIntArray(out);
7062        }
7063        (Value::BigIntArray(a), Value::SmallInt(n)) => {
7064            let mut out = a.clone();
7065            out.push(Some(i64::from(*n)));
7066            return Value::BigIntArray(out);
7067        }
7068        (Value::BigInt(n), Value::BigIntArray(b)) => {
7069            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
7070            out.push(Some(*n));
7071            out.extend(b.iter().copied());
7072            return Value::BigIntArray(out);
7073        }
7074        (Value::Int(n), Value::BigIntArray(b)) => {
7075            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
7076            out.push(Some(i64::from(*n)));
7077            out.extend(b.iter().copied());
7078            return Value::BigIntArray(out);
7079        }
7080        (Value::SmallInt(n), Value::BigIntArray(b)) => {
7081            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
7082            out.push(Some(i64::from(*n)));
7083            out.extend(b.iter().copied());
7084            return Value::BigIntArray(out);
7085        }
7086        // v7.11.15 — BYTEA `||` is byte concatenation.
7087        (Value::Bytes(a), Value::Bytes(b)) => {
7088            let mut out = a.clone();
7089            out.extend_from_slice(b);
7090            return Value::Bytes(out);
7091        }
7092        _ => {}
7093    }
7094    let a = value_to_text(l);
7095    let b = value_to_text(r);
7096    Value::Text(a + &b)
7097}
7098
7099/// pgvector inner-product `<#>`. Returns the *negative* dot product so
7100/// smaller still means more similar — same convention as pgvector.
7101fn inner_product(l: Value, r: Value) -> Result<Value, EvalError> {
7102    let (a, b) = unwrap_vec_pair(l, r, "<#>")?;
7103    let mut dot: f64 = 0.0;
7104    for (x, y) in a.iter().zip(b.iter()) {
7105        dot += f64::from(*x) * f64::from(*y);
7106    }
7107    Ok(Value::Float(-dot))
7108}
7109
7110/// pgvector cosine distance `<=>` — `1 - (a·b) / (‖a‖ ‖b‖)`. A zero-norm
7111/// operand produces NaN (matches pgvector).
7112fn cosine_distance(l: Value, r: Value) -> Result<Value, EvalError> {
7113    let (a, b) = unwrap_vec_pair(l, r, "<=>")?;
7114    let mut dot: f64 = 0.0;
7115    let mut na: f64 = 0.0;
7116    let mut nb: f64 = 0.0;
7117    for (x, y) in a.iter().zip(b.iter()) {
7118        let xf = f64::from(*x);
7119        let yf = f64::from(*y);
7120        dot += xf * yf;
7121        na += xf * xf;
7122        nb += yf * yf;
7123    }
7124    let denom = sqrt_newton(na) * sqrt_newton(nb);
7125    if denom == 0.0 {
7126        return Ok(Value::Float(f64::NAN));
7127    }
7128    Ok(Value::Float(1.0 - dot / denom))
7129}
7130
7131fn unwrap_vec_pair(l: Value, r: Value, op: &str) -> Result<(Vec<f32>, Vec<f32>), EvalError> {
7132    // v6.0.1: SQ8 cells coming through the SQL evaluator are
7133    // dequantised to f32 here so the existing scalar distance
7134    // arithmetic stays intact. HNSW kNN search continues to use
7135    // the asymmetric ADC variant inside `cell_to_query_metric_
7136    // distance` — this path only runs when a vector expression
7137    // lands in the evaluator (full-scan ORDER BY, SELECT
7138    // projection of `v <-> $1`, etc.).
7139    let to_f32 = |v: Value| -> Option<Vec<f32>> {
7140        match v {
7141            Value::Vector(a) => Some(a),
7142            Value::Sq8Vector(q) => Some(spg_storage::quantize::dequantize(&q)),
7143            // v6.0.3: bit-exact dequant for halfvec cells.
7144            Value::HalfVector(h) => Some(h.to_f32_vec()),
7145            _ => None,
7146        }
7147    };
7148    let l_ty = l.data_type();
7149    let r_ty = r.data_type();
7150    match (to_f32(l), to_f32(r)) {
7151        (Some(a), Some(b)) => {
7152            if a.len() != b.len() {
7153                return Err(EvalError::TypeMismatch {
7154                    detail: format!("vector dim mismatch in {op}: {} vs {}", a.len(), b.len()),
7155                });
7156            }
7157            Ok((a, b))
7158        }
7159        _ => Err(EvalError::TypeMismatch {
7160            detail: format!("{op} requires two vectors, got {l_ty:?} and {r_ty:?}"),
7161        }),
7162    }
7163}
7164
7165/// Numeric arithmetic with widening.
7166/// - both `Int` → `Int` (with overflow check)
7167/// - `Int` op `BigInt` (either side) → `BigInt`
7168/// - any `Float` involved → `Float`
7169/// Bitwise integer op (`|` / `&`). PG defines these for integer
7170/// types only — SmallInt widens to Int, Int x BigInt widens to
7171/// BigInt, anything else is a type error (mailrs embed round-12).
7172fn bitop(
7173    l: Value,
7174    r: Value,
7175    f: impl Fn(i64, i64) -> i64,
7176    op_name: &str,
7177) -> Result<Value, EvalError> {
7178    let widen = |v: Value| -> Value {
7179        match v {
7180            Value::SmallInt(n) => Value::Int(i32::from(n)),
7181            other => other,
7182        }
7183    };
7184    match (widen(l), widen(r)) {
7185        (Value::Int(a), Value::Int(b)) => {
7186            let result = f(i64::from(a), i64::from(b));
7187            // Two i32 inputs can't overflow i32 under | / &.
7188            Ok(Value::Int(result as i32))
7189        }
7190        (Value::Int(a), Value::BigInt(b)) | (Value::BigInt(b), Value::Int(a)) => {
7191            Ok(Value::BigInt(f(i64::from(a), b)))
7192        }
7193        (Value::BigInt(a), Value::BigInt(b)) => Ok(Value::BigInt(f(a, b))),
7194        (a, b) => Err(EvalError::TypeMismatch {
7195            detail: format!("cannot apply {op_name} to {a:?} and {b:?}"),
7196        }),
7197    }
7198}
7199
7200fn arith(
7201    l: Value,
7202    r: Value,
7203    int_op: impl Fn(i64, i64) -> Option<i64>,
7204    float_op: impl Fn(f64, f64) -> f64,
7205    op_name: &str,
7206) -> Result<Value, EvalError> {
7207    // Widen SmallInt to Int up front so the rest of the arithmetic
7208    // table only deals with Int / BigInt / Float pairs.
7209    let widen = |v: Value| -> Value {
7210        match v {
7211            Value::SmallInt(n) => Value::Int(i32::from(n)),
7212            other => other,
7213        }
7214    };
7215    let l = widen(l);
7216    let r = widen(r);
7217    match (l, r) {
7218        (Value::Int(a), Value::Int(b)) => {
7219            let result = int_op(i64::from(a), i64::from(b)).ok_or(EvalError::TypeMismatch {
7220                detail: format!("integer overflow on {op_name}"),
7221            })?;
7222            if let Ok(small) = i32::try_from(result) {
7223                Ok(Value::Int(small))
7224            } else {
7225                Ok(Value::BigInt(result))
7226            }
7227        }
7228        (Value::Int(a), Value::BigInt(b)) | (Value::BigInt(b), Value::Int(a)) => {
7229            let result = int_op(i64::from(a), b).ok_or(EvalError::TypeMismatch {
7230                detail: format!("bigint overflow on {op_name}"),
7231            })?;
7232            Ok(Value::BigInt(result))
7233        }
7234        (Value::BigInt(a), Value::BigInt(b)) => {
7235            let result = int_op(a, b).ok_or(EvalError::TypeMismatch {
7236                detail: format!("bigint overflow on {op_name}"),
7237            })?;
7238            Ok(Value::BigInt(result))
7239        }
7240        (a, b)
7241            if a.data_type() == Some(DataType::Float) || b.data_type() == Some(DataType::Float) =>
7242        {
7243            let af = as_f64(&a)?;
7244            let bf = as_f64(&b)?;
7245            Ok(Value::Float(float_op(af, bf)))
7246        }
7247        (a, b) => Err(EvalError::TypeMismatch {
7248            detail: format!(
7249                "{op_name} applied to non-numeric: {:?} vs {:?}",
7250                a.data_type(),
7251                b.data_type()
7252            ),
7253        }),
7254    }
7255}
7256
7257/// L2 (Euclidean) distance between two vectors of equal dimension.
7258/// Returned as `Value::Float(d)` so it composes with the existing
7259/// comparison / sort plumbing. Mismatched dims or non-vector operands
7260/// raise `TypeMismatch`.
7261#[allow(clippy::many_single_char_names)] // l, r, a, b, d are the natural names
7262fn l2_distance(l: Value, r: Value) -> Result<Value, EvalError> {
7263    // v6.0.1: route both operands through `unwrap_vec_pair` so SQ8
7264    // cells dequantise on the way in. Sub-f64 precision loss is
7265    // negligible vs the dequantisation noise the SQ8 path already
7266    // ships with.
7267    let (a, b) = unwrap_vec_pair(l, r, "<->")?;
7268    let mut sum: f64 = 0.0;
7269    for (x, y) in a.iter().zip(b.iter()) {
7270        let d = f64::from(*x) - f64::from(*y);
7271        sum += d * d;
7272    }
7273    Ok(Value::Float(sqrt_newton(sum)))
7274}
7275
7276/// Self-built `sqrt` for `f64` — `std::f64::sqrt` lives in `std`, which the
7277/// engine's `no_std` constraint disallows. Newton-Raphson with a few rounds
7278/// reaches IEEE-754 precision for the inputs we'll see (sum of squares of
7279/// f32-derived distances, always non-negative, never NaN).
7280fn sqrt_newton(x: f64) -> f64 {
7281    if x <= 0.0 {
7282        return 0.0;
7283    }
7284    let mut g = x;
7285    // 10 iterations is conservative; 6 already converges to ulp for typical
7286    // distances.
7287    for _ in 0..10 {
7288        g = 0.5 * (g + x / g);
7289    }
7290    g
7291}
7292
7293fn div_op(l: Value, r: Value) -> Result<Value, EvalError> {
7294    let any_float = matches!(l.data_type(), Some(DataType::Float))
7295        || matches!(r.data_type(), Some(DataType::Float));
7296    if any_float {
7297        let a = as_f64(&l)?;
7298        let b = as_f64(&r)?;
7299        if b == 0.0 {
7300            return Err(EvalError::DivisionByZero);
7301        }
7302        return Ok(Value::Float(a / b));
7303    }
7304    arith(
7305        l,
7306        r,
7307        |a, b| {
7308            if b == 0 { None } else { Some(a / b) }
7309        },
7310        |a, b| a / b,
7311        "/",
7312    )
7313    .map_err(|e| match e {
7314        // The closure returns None on b == 0; translate that into the dedicated
7315        // DivisionByZero variant instead of "integer overflow on /".
7316        EvalError::TypeMismatch { detail } if detail.contains('/') => EvalError::DivisionByZero,
7317        other => other,
7318    })
7319}
7320
7321fn as_f64(v: &Value) -> Result<f64, EvalError> {
7322    match v {
7323        Value::SmallInt(n) => Ok(f64::from(*n)),
7324        Value::Int(n) => Ok(f64::from(*n)),
7325        #[allow(clippy::cast_precision_loss)]
7326        Value::BigInt(n) => Ok(*n as f64),
7327        Value::Float(x) => Ok(*x),
7328        #[allow(clippy::cast_precision_loss)]
7329        Value::Numeric { scaled, scale } => {
7330            let mut div = 1.0_f64;
7331            for _ in 0..*scale {
7332                div *= 10.0;
7333            }
7334            Ok((*scaled as f64) / div)
7335        }
7336        other => Err(EvalError::TypeMismatch {
7337            detail: format!("cannot convert {:?} to FLOAT", other.data_type()),
7338        }),
7339    }
7340}
7341
7342fn compare(op: BinOp, l: &Value, r: &Value) -> Result<Value, EvalError> {
7343    let ord = match (l, r) {
7344        (Value::Int(a), Value::Int(b)) => i64::from(*a).cmp(&i64::from(*b)),
7345        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7346        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
7347        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
7348        (a, b)
7349            if matches!(a.data_type(), Some(DataType::Float))
7350                || matches!(b.data_type(), Some(DataType::Float)) =>
7351        {
7352            let af = as_f64(a)?;
7353            let bf = as_f64(b)?;
7354            af.partial_cmp(&bf).ok_or(EvalError::TypeMismatch {
7355                detail: "NaN in comparison".into(),
7356            })?
7357        }
7358        (Value::Text(a), Value::Text(b)) => a.cmp(b),
7359        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
7360        // Date / Timestamp compare on their integer storage repr.
7361        // Cross-domain (Date vs Timestamp) lifts the Date to the
7362        // matching midnight TIMESTAMP first.
7363        (Value::Date(a), Value::Date(b)) => a.cmp(b),
7364        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
7365        (Value::Date(a), Value::Timestamp(b)) => (i64::from(*a) * 86_400_000_000).cmp(b),
7366        (Value::Timestamp(a), Value::Date(b)) => a.cmp(&(i64::from(*b) * 86_400_000_000)),
7367        // PG-style implicit coercion: comparing a DATE / TIMESTAMP
7368        // column against a text literal lifts the literal into the
7369        // matching domain (e.g. `day >= '2024-01-01'`).
7370        (Value::Date(a), Value::Text(b)) => {
7371            let bd = parse_date_literal(b).ok_or_else(|| EvalError::TypeMismatch {
7372                detail: format!("cannot parse {b:?} as DATE for comparison"),
7373            })?;
7374            a.cmp(&bd)
7375        }
7376        (Value::Text(a), Value::Date(b)) => {
7377            let ad = parse_date_literal(a).ok_or_else(|| EvalError::TypeMismatch {
7378                detail: format!("cannot parse {a:?} as DATE for comparison"),
7379            })?;
7380            ad.cmp(b)
7381        }
7382        (Value::Timestamp(a), Value::Text(b)) => {
7383            let bt = parse_timestamp_literal(b).ok_or_else(|| EvalError::TypeMismatch {
7384                detail: format!("cannot parse {b:?} as TIMESTAMP for comparison"),
7385            })?;
7386            a.cmp(&bt)
7387        }
7388        (Value::Text(a), Value::Timestamp(b)) => {
7389            let at = parse_timestamp_literal(a).ok_or_else(|| EvalError::TypeMismatch {
7390                detail: format!("cannot parse {a:?} as TIMESTAMP for comparison"),
7391            })?;
7392            at.cmp(b)
7393        }
7394        // v7.17.0 — UUID byte-wise comparison; both sides UUID.
7395        (Value::Uuid(a), Value::Uuid(b)) => a.cmp(b),
7396        // v7.17.0 — PG promotes a `text` literal compared against a
7397        // `uuid` column into uuid (unknown-type literal inference).
7398        // Without this, `WHERE id = '550e...'` falls through to the
7399        // generic TypeMismatch — the application's literal becomes
7400        // an error rather than a comparison.
7401        (Value::Uuid(a), Value::Text(b)) => {
7402            let bu = spg_storage::parse_uuid_str(b).ok_or_else(|| EvalError::TypeMismatch {
7403                detail: format!("invalid input syntax for type uuid: {b:?}"),
7404            })?;
7405            a.cmp(&bu)
7406        }
7407        (Value::Text(a), Value::Uuid(b)) => {
7408            let au = spg_storage::parse_uuid_str(a).ok_or_else(|| EvalError::TypeMismatch {
7409                detail: format!("invalid input syntax for type uuid: {a:?}"),
7410            })?;
7411            au.cmp(b)
7412        }
7413        (a, b) => {
7414            return Err(EvalError::TypeMismatch {
7415                detail: format!(
7416                    "comparison between {:?} and {:?}",
7417                    a.data_type(),
7418                    b.data_type()
7419                ),
7420            });
7421        }
7422    };
7423    let result = match op {
7424        BinOp::Eq => ord.is_eq(),
7425        BinOp::NotEq => !ord.is_eq(),
7426        BinOp::Lt => ord.is_lt(),
7427        BinOp::LtEq => ord.is_le(),
7428        BinOp::Gt => ord.is_gt(),
7429        BinOp::GtEq => ord.is_ge(),
7430        BinOp::And
7431        | BinOp::Or
7432        | BinOp::BitOr
7433        | BinOp::BitAnd
7434        | BinOp::Add
7435        | BinOp::Sub
7436        | BinOp::Mul
7437        | BinOp::Div
7438        | BinOp::L2Distance
7439        | BinOp::InnerProduct
7440        | BinOp::CosineDistance
7441        | BinOp::Concat
7442        | BinOp::JsonGet
7443        | BinOp::JsonGetText
7444        | BinOp::JsonGetPath
7445        | BinOp::JsonGetPathText
7446        | BinOp::JsonContains
7447        | BinOp::TsMatch
7448        | BinOp::IsDistinctFrom
7449        | BinOp::IsNotDistinctFrom
7450        | BinOp::InetContainedBy
7451        | BinOp::InetContainedByEq
7452        | BinOp::InetContains
7453        | BinOp::InetContainsEq
7454        | BinOp::InetOverlap => {
7455            unreachable!("compare() only called with comparison ops")
7456        }
7457    };
7458    Ok(Value::Bool(result))
7459}
7460
7461// SQL three-valued AND / OR.
7462fn and_3vl(l: Value, r: Value) -> Result<Value, EvalError> {
7463    match (l, r) {
7464        (Value::Bool(false), _) | (_, Value::Bool(false)) => Ok(Value::Bool(false)),
7465        (Value::Bool(true), Value::Bool(true)) => Ok(Value::Bool(true)),
7466        (Value::Null, _) | (_, Value::Null) => Ok(Value::Null),
7467        (a, b) => Err(EvalError::TypeMismatch {
7468            detail: format!(
7469                "AND on non-boolean: {:?} and {:?}",
7470                a.data_type(),
7471                b.data_type()
7472            ),
7473        }),
7474    }
7475}
7476
7477fn or_3vl(l: Value, r: Value) -> Result<Value, EvalError> {
7478    match (l, r) {
7479        (Value::Bool(true), _) | (_, Value::Bool(true)) => Ok(Value::Bool(true)),
7480        (Value::Bool(false), Value::Bool(false)) => Ok(Value::Bool(false)),
7481        (Value::Null, _) | (_, Value::Null) => Ok(Value::Null),
7482        (a, b) => Err(EvalError::TypeMismatch {
7483            detail: format!(
7484                "OR on non-boolean: {:?} and {:?}",
7485                a.data_type(),
7486                b.data_type()
7487            ),
7488        }),
7489    }
7490}
7491
7492#[cfg(test)]
7493mod tests {
7494    use super::*;
7495    use alloc::vec;
7496    use spg_storage::{ColumnSchema, Row};
7497
7498    fn col(name: &str, ty: DataType) -> ColumnSchema {
7499        ColumnSchema::new(name, ty, true)
7500    }
7501
7502    fn ctx<'a>(cols: &'a [ColumnSchema], alias: Option<&'a str>) -> EvalContext<'a> {
7503        EvalContext::new(cols, alias)
7504    }
7505
7506    fn lit(n: i64) -> Expr {
7507        Expr::Literal(Literal::Integer(n))
7508    }
7509
7510    fn null() -> Expr {
7511        Expr::Literal(Literal::Null)
7512    }
7513
7514    fn col_ref(name: &str) -> Expr {
7515        Expr::Column(ColumnName {
7516            qualifier: None,
7517            name: name.into(),
7518        })
7519    }
7520
7521    #[test]
7522    fn literal_evaluates_to_value() {
7523        let r = Row::new(vec![]);
7524        let cs: [ColumnSchema; 0] = [];
7525        let c = ctx(&cs, None);
7526        assert_eq!(eval_expr(&lit(42), &r, &c).unwrap(), Value::Int(42));
7527        assert_eq!(
7528            eval_expr(&Expr::Literal(Literal::Float(1.5)), &r, &c).unwrap(),
7529            Value::Float(1.5)
7530        );
7531        assert_eq!(eval_expr(&null(), &r, &c).unwrap(), Value::Null);
7532    }
7533
7534    #[test]
7535    fn column_lookup_unqualified() {
7536        let cs = vec![col("a", DataType::Int), col("b", DataType::Text)];
7537        let r = Row::new(vec![Value::Int(7), Value::Text("hi".into())]);
7538        let c = ctx(&cs, None);
7539        assert_eq!(eval_expr(&col_ref("a"), &r, &c).unwrap(), Value::Int(7));
7540        assert_eq!(
7541            eval_expr(&col_ref("b"), &r, &c).unwrap(),
7542            Value::Text("hi".into())
7543        );
7544    }
7545
7546    #[test]
7547    fn column_not_found_errors() {
7548        let cs = vec![col("a", DataType::Int)];
7549        let r = Row::new(vec![Value::Int(0)]);
7550        let c = ctx(&cs, None);
7551        let err = eval_expr(&col_ref("ghost"), &r, &c).unwrap_err();
7552        assert!(matches!(err, EvalError::ColumnNotFound { ref name } if name == "ghost"));
7553    }
7554
7555    #[test]
7556    fn qualified_column_matches_alias() {
7557        let cs = vec![col("a", DataType::Int)];
7558        let r = Row::new(vec![Value::Int(5)]);
7559        let c = ctx(&cs, Some("u"));
7560        let qualified = Expr::Column(ColumnName {
7561            qualifier: Some("u".into()),
7562            name: "a".into(),
7563        });
7564        assert_eq!(eval_expr(&qualified, &r, &c).unwrap(), Value::Int(5));
7565    }
7566
7567    #[test]
7568    fn qualified_column_unknown_alias_errors() {
7569        let cs = vec![col("a", DataType::Int)];
7570        let r = Row::new(vec![Value::Int(5)]);
7571        let c = ctx(&cs, Some("u"));
7572        let wrong = Expr::Column(ColumnName {
7573            qualifier: Some("x".into()),
7574            name: "a".into(),
7575        });
7576        assert!(matches!(
7577            eval_expr(&wrong, &r, &c).unwrap_err(),
7578            EvalError::UnknownQualifier { .. }
7579        ));
7580    }
7581
7582    #[test]
7583    fn arithmetic_with_widening() {
7584        let r = Row::new(vec![]);
7585        let cs: [ColumnSchema; 0] = [];
7586        let c = ctx(&cs, None);
7587        let e = Expr::Binary {
7588            lhs: alloc::boxed::Box::new(lit(2)),
7589            op: BinOp::Add,
7590            rhs: alloc::boxed::Box::new(Expr::Literal(Literal::Float(0.5))),
7591        };
7592        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Float(2.5));
7593    }
7594
7595    #[test]
7596    fn division_by_zero_errors() {
7597        let r = Row::new(vec![]);
7598        let cs: [ColumnSchema; 0] = [];
7599        let c = ctx(&cs, None);
7600        let e = Expr::Binary {
7601            lhs: alloc::boxed::Box::new(lit(1)),
7602            op: BinOp::Div,
7603            rhs: alloc::boxed::Box::new(lit(0)),
7604        };
7605        assert_eq!(
7606            eval_expr(&e, &r, &c).unwrap_err(),
7607            EvalError::DivisionByZero
7608        );
7609    }
7610
7611    #[test]
7612    fn comparison_returns_bool() {
7613        let r = Row::new(vec![]);
7614        let cs: [ColumnSchema; 0] = [];
7615        let c = ctx(&cs, None);
7616        let e = Expr::Binary {
7617            lhs: alloc::boxed::Box::new(lit(1)),
7618            op: BinOp::Lt,
7619            rhs: alloc::boxed::Box::new(lit(2)),
7620        };
7621        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Bool(true));
7622    }
7623
7624    #[test]
7625    fn null_propagates_through_arithmetic() {
7626        let r = Row::new(vec![]);
7627        let cs: [ColumnSchema; 0] = [];
7628        let c = ctx(&cs, None);
7629        let e = Expr::Binary {
7630            lhs: alloc::boxed::Box::new(lit(1)),
7631            op: BinOp::Add,
7632            rhs: alloc::boxed::Box::new(null()),
7633        };
7634        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Null);
7635    }
7636
7637    #[test]
7638    fn and_three_valued_logic() {
7639        let r = Row::new(vec![]);
7640        let cs: [ColumnSchema; 0] = [];
7641        let c = ctx(&cs, None);
7642        let tt = |a: bool, b_null: bool| Expr::Binary {
7643            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::Bool(a))),
7644            op: BinOp::And,
7645            rhs: alloc::boxed::Box::new(if b_null {
7646                null()
7647            } else {
7648                Expr::Literal(Literal::Bool(true))
7649            }),
7650        };
7651        // FALSE AND NULL → FALSE
7652        assert_eq!(
7653            eval_expr(&tt(false, true), &r, &c).unwrap(),
7654            Value::Bool(false)
7655        );
7656        // TRUE AND NULL → NULL
7657        assert_eq!(eval_expr(&tt(true, true), &r, &c).unwrap(), Value::Null);
7658        // TRUE AND TRUE → TRUE
7659        assert_eq!(
7660            eval_expr(&tt(true, false), &r, &c).unwrap(),
7661            Value::Bool(true)
7662        );
7663    }
7664
7665    #[test]
7666    fn or_three_valued_logic() {
7667        let r = Row::new(vec![]);
7668        let cs: [ColumnSchema; 0] = [];
7669        let c = ctx(&cs, None);
7670        let or_with_null = |a: bool| Expr::Binary {
7671            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::Bool(a))),
7672            op: BinOp::Or,
7673            rhs: alloc::boxed::Box::new(null()),
7674        };
7675        // TRUE OR NULL → TRUE
7676        assert_eq!(
7677            eval_expr(&or_with_null(true), &r, &c).unwrap(),
7678            Value::Bool(true)
7679        );
7680        // FALSE OR NULL → NULL
7681        assert_eq!(
7682            eval_expr(&or_with_null(false), &r, &c).unwrap(),
7683            Value::Null
7684        );
7685    }
7686
7687    #[test]
7688    fn not_on_null_is_null() {
7689        let r = Row::new(vec![]);
7690        let cs: [ColumnSchema; 0] = [];
7691        let c = ctx(&cs, None);
7692        let e = Expr::Unary {
7693            op: UnOp::Not,
7694            expr: alloc::boxed::Box::new(null()),
7695        };
7696        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Null);
7697    }
7698
7699    #[test]
7700    fn text_comparison_lexicographic() {
7701        let r = Row::new(vec![]);
7702        let cs: [ColumnSchema; 0] = [];
7703        let c = ctx(&cs, None);
7704        let e = Expr::Binary {
7705            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::String("apple".into()))),
7706            op: BinOp::Lt,
7707            rhs: alloc::boxed::Box::new(Expr::Literal(Literal::String("banana".into()))),
7708        };
7709        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Bool(true));
7710    }
7711
7712    #[test]
7713    fn interval_format_basics() {
7714        assert_eq!(format_interval(0, 0), "0");
7715        assert_eq!(format_interval(0, 86_400_000_000), "1 day");
7716        assert_eq!(format_interval(0, -86_400_000_000), "-1 days");
7717        assert_eq!(format_interval(0, 3_600_000_000), "01:00:00");
7718        assert_eq!(
7719            format_interval(0, 86_400_000_000 + 9_000_000),
7720            "1 day 00:00:09"
7721        );
7722        assert_eq!(format_interval(14, 0), "1 year 2 mons");
7723        assert_eq!(format_interval(-1, 0), "-1 mons");
7724    }
7725
7726    #[test]
7727    fn interval_add_to_timestamp_micros_part() {
7728        // 2024-01-01 00:00:00 + INTERVAL '1 hour' = 2024-01-01 01:00:00
7729        let ts = i64::from(days_from_civil(2024, 1, 1)) * 86_400_000_000;
7730        let r = add_interval_to_micros(ts, 0, 3_600_000_000).unwrap();
7731        let expected = ts + 3_600_000_000;
7732        assert_eq!(r, expected);
7733    }
7734
7735    #[test]
7736    fn interval_clamp_month_end() {
7737        // 2024-01-31 + 1 month = 2024-02-29 (leap year).
7738        let d = days_from_civil(2024, 1, 31);
7739        let shifted = shift_date_by_months(d, 1).unwrap();
7740        let (y, m, day) = civil_from_days(shifted);
7741        assert_eq!((y, m, day), (2024, 2, 29));
7742        // 2023-01-31 + 1 month = 2023-02-28 (non-leap).
7743        let d = days_from_civil(2023, 1, 31);
7744        let shifted = shift_date_by_months(d, 1).unwrap();
7745        let (y, m, day) = civil_from_days(shifted);
7746        assert_eq!((y, m, day), (2023, 2, 28));
7747        // 2024-03-31 - 1 month = 2024-02-29.
7748        let d = days_from_civil(2024, 3, 31);
7749        let shifted = shift_date_by_months(d, -1).unwrap();
7750        let (y, m, day) = civil_from_days(shifted);
7751        assert_eq!((y, m, day), (2024, 2, 29));
7752    }
7753
7754    #[test]
7755    fn interval_date_plus_pure_days_stays_date() {
7756        // DATE + INTERVAL '7 days' must stay DATE.
7757        let d = days_from_civil(2024, 6, 1);
7758        let lhs = Value::Date(d);
7759        let rhs = Value::Interval {
7760            months: 0,
7761            micros: 7 * 86_400_000_000,
7762        };
7763        let v = apply_binary_interval(BinOp::Add, &lhs, &rhs)
7764            .unwrap()
7765            .unwrap();
7766        let expected = days_from_civil(2024, 6, 8);
7767        assert_eq!(v, Value::Date(expected));
7768    }
7769
7770    #[test]
7771    fn interval_date_plus_sub_day_lifts_to_timestamp() {
7772        // DATE + INTERVAL '1 hour' must lift to TIMESTAMP.
7773        let d = days_from_civil(2024, 6, 1);
7774        let lhs = Value::Date(d);
7775        let rhs = Value::Interval {
7776            months: 0,
7777            micros: 3_600_000_000,
7778        };
7779        let v = apply_binary_interval(BinOp::Add, &lhs, &rhs)
7780            .unwrap()
7781            .unwrap();
7782        let expected = i64::from(d) * 86_400_000_000 + 3_600_000_000;
7783        assert_eq!(v, Value::Timestamp(expected));
7784    }
7785}