Skip to main content

spg_engine/
eval.rs

1//! Expression evaluator. Given a parsed `Expr`, a `Row`, and the row's column
2//! schema, produce a `Value`. v0.4 implements:
3//!
4//! - literals
5//! - column lookups (bare and qualified `t.col`)
6//! - unary minus / NOT
7//! - binary arithmetic, comparison, AND, OR
8//! - numeric widening (`Int → BigInt → Float`) at evaluation time
9//! - SQL three-valued logic for NULL:
10//!     * any arithmetic / comparison op with a NULL operand → NULL
11//!     * `TRUE OR NULL` → TRUE, `FALSE OR NULL` → NULL,
12//!     * `FALSE AND NULL` → FALSE, `TRUE AND NULL` → NULL,
13//!     * `NOT NULL` → NULL
14//!
15//! v0.4 deliberately does *not* implement: function calls, string
16//! concatenation, IS NULL / IS NOT NULL, BETWEEN, IN, etc. Those come later.
17
18use alloc::boxed::Box;
19use alloc::format;
20use alloc::string::{String, ToString};
21use alloc::vec::Vec;
22
23use spg_sql::ast::{BinOp, CastTarget, ColumnName, Expr, Literal, UnOp};
24use spg_storage::{ColumnSchema, DataType, Row, TsLexeme, TsQueryAst, Value};
25
26/// Resolution context for evaluating a single row. `table_alias` is the alias
27/// (or table name) callers should accept as the qualifier on a column ref —
28/// e.g. `FROM users AS u` makes `u.name` valid and rejects `other.name`.
29#[derive(Clone)]
30#[allow(missing_debug_implementations)] // sequence_resolver is a dyn Fn — no Debug
31pub struct EvalContext<'a> {
32    pub columns: &'a [ColumnSchema],
33    pub table_alias: Option<&'a str>,
34    /// v6.1.1 — bound parameters for `$N` placeholders inside the
35    /// expression tree. Empty for simple queries; populated by the
36    /// prepared-statement Execute path with Bind values converted
37    /// to `Value`. Index N (1-based per PG) hits `params[N-1]`.
38    pub params: &'a [Value],
39    /// v7.12.1 — session text-search config (from `SET
40    /// default_text_search_config = '<name>'`). Resolved when the
41    /// engine builds an `EvalContext` and consumed by the FTS
42    /// function dispatcher when `to_tsvector(text)` /
43    /// `plainto_tsquery(text)` etc are called without an explicit
44    /// config arg. `None` falls through to `simple`.
45    pub default_text_search_config: Option<&'a str>,
46    /// v7.17.0 Phase 1.1 — `nextval` / `currval` / `setval`
47    /// resolver. The engine builds this around a `&mut Catalog`
48    /// so apply_function can mutate sequence state without
49    /// eval owning a catalog reference. When `None`, sequence
50    /// functions return an error (read-only contexts).
51    pub sequence_resolver: Option<&'a SequenceResolver<'a>>,
52}
53
54/// v7.17.0 — sequence-mutating callback used by `apply_function`
55/// for `nextval` / `currval` / `setval`. Implemented by the
56/// engine to thread `&mut Catalog` access through an immutable
57/// `&EvalContext`.
58pub type SequenceResolver<'a> = dyn Fn(SequenceOp) -> Result<i64, EvalError> + 'a;
59
60/// v7.17.0 — sequence operation requested by an Expr eval.
61#[derive(Debug, Clone)]
62pub enum SequenceOp {
63    Next(String),
64    Curr(String),
65    Set {
66        name: String,
67        value: i64,
68        is_called: bool,
69    },
70}
71
72impl<'a> EvalContext<'a> {
73    pub const fn new(columns: &'a [ColumnSchema], table_alias: Option<&'a str>) -> Self {
74        Self {
75            columns,
76            table_alias,
77            params: &[],
78            default_text_search_config: None,
79            sequence_resolver: None,
80        }
81    }
82
83    /// v7.17.0 — attach a sequence resolver. The engine wraps a
84    /// `&mut Catalog` in a closure that performs the requested
85    /// SequenceOp.
86    #[must_use]
87    pub const fn with_sequence_resolver(mut self, resolver: &'a SequenceResolver<'a>) -> Self {
88        self.sequence_resolver = Some(resolver);
89        self
90    }
91
92    /// v6.1.1 — attach a parameter buffer for `$N` placeholder
93    /// resolution. The slice must outlive the context; callers
94    /// construct it from the prepared statement's Bind values.
95    #[must_use]
96    pub const fn with_params(mut self, params: &'a [Value]) -> Self {
97        self.params = params;
98        self
99    }
100
101    /// v7.12.1 — attach the session's
102    /// `default_text_search_config`. Used by the FTS function
103    /// dispatcher when no explicit config arg is given.
104    #[must_use]
105    pub const fn with_default_text_search_config(mut self, cfg: Option<&'a str>) -> Self {
106        self.default_text_search_config = cfg;
107        self
108    }
109}
110
111#[derive(Debug, Clone, PartialEq)]
112pub enum EvalError {
113    ColumnNotFound {
114        name: String,
115    },
116    UnknownQualifier {
117        qualifier: String,
118    },
119    DivisionByZero,
120    TypeMismatch {
121        detail: String,
122    },
123    /// v6.1.1 — `$N` reference past the number of bound parameters.
124    /// Either the client sent too few in Bind, or the SQL has a
125    /// placeholder the prepared statement didn't account for.
126    PlaceholderOutOfRange {
127        n: u16,
128        bound: u16,
129    },
130}
131
132impl core::fmt::Display for EvalError {
133    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
134        match self {
135            Self::ColumnNotFound { name } => write!(f, "column not found: {name}"),
136            Self::UnknownQualifier { qualifier } => {
137                write!(f, "unknown table qualifier: {qualifier}")
138            }
139            Self::DivisionByZero => f.write_str("division by zero"),
140            Self::TypeMismatch { detail } => write!(f, "type mismatch: {detail}"),
141            Self::PlaceholderOutOfRange { n, bound } => write!(
142                f,
143                "parameter ${n} referenced but only {bound} bound by client"
144            ),
145        }
146    }
147}
148
149pub fn eval_expr(expr: &Expr, row: &Row, ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
150    match expr {
151        Expr::Literal(l) => Ok(literal_to_value(l)),
152        Expr::Column(c) => resolve_column(c, row, ctx),
153        Expr::Placeholder(n) => {
154            let idx = usize::from(*n).saturating_sub(1);
155            ctx.params
156                .get(idx)
157                .cloned()
158                .ok_or_else(|| EvalError::PlaceholderOutOfRange {
159                    n: *n,
160                    bound: u16::try_from(ctx.params.len()).unwrap_or(u16::MAX),
161                })
162        }
163        Expr::Unary { op, expr } => {
164            let v = eval_expr(expr, row, ctx)?;
165            apply_unary(*op, v)
166        }
167        Expr::Binary { lhs, op, rhs } => {
168            let l = eval_expr(lhs, row, ctx)?;
169            let r = eval_expr(rhs, row, ctx)?;
170            // v7.17.0 Phase 2.5 — collation-aware text comparison.
171            // When either operand of a comparison op references a
172            // column declared `COLLATE "case_insensitive"` (or any
173            // MySQL `_ci` collation), case-fold both sides before
174            // the byte-wise compare so `WHERE name = 'foo'` matches
175            // stored `'Foo'`. Non-Text values fall straight through
176            // — the helper is a no-op outside Text-Text equality
177            // and inequality.
178            let (l, r) = collation_fold_for_compare(*op, lhs, rhs, l, r, ctx);
179            apply_binary(*op, l, r)
180        }
181        Expr::Cast { expr, target } => {
182            let v = eval_expr(expr, row, ctx)?;
183            cast_value(v, *target)
184        }
185        Expr::IsNull { expr, negated } => {
186            let v = eval_expr(expr, row, ctx)?;
187            let is_null = matches!(v, Value::Null);
188            Ok(Value::Bool(if *negated { !is_null } else { is_null }))
189        }
190        Expr::FunctionCall { name, args } => {
191            let evaluated: Result<Vec<Value>, _> =
192                args.iter().map(|a| eval_expr(a, row, ctx)).collect();
193            apply_function(name, &evaluated?, ctx)
194        }
195        Expr::Like {
196            expr,
197            pattern,
198            negated,
199        } => {
200            let v = eval_expr(expr, row, ctx)?;
201            let p = eval_expr(pattern, row, ctx)?;
202            // NULL on either side propagates to NULL — same as PG.
203            let (text, pat) = match (v, p) {
204                (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
205                (Value::Text(a), Value::Text(b)) => (a, b),
206                (Value::Text(_), other) | (other, _) => {
207                    return Err(EvalError::TypeMismatch {
208                        detail: format!("LIKE requires text operands, got {:?}", other.data_type()),
209                    });
210                }
211            };
212            let m = like_match(&text, &pat);
213            Ok(Value::Bool(if *negated { !m } else { m }))
214        }
215        Expr::Extract { field, source } => {
216            let v = eval_expr(source, row, ctx)?;
217            extract_field(*field, &v)
218        }
219        // v4.10: subquery nodes should have been resolved into
220        // Literal / Binary-Eq-OR chains by Engine::resolve_select_subqueries
221        // before the row loop. Anything reaching here is a bug.
222        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {
223            Err(EvalError::TypeMismatch {
224                detail: "subquery reached row eval — engine resolver bug".into(),
225            })
226        }
227        // v4.12: window functions should have been rewritten into
228        // synthetic __win_N column references by
229        // exec_select_with_window before row eval. Anything
230        // reaching here is similarly a bug.
231        Expr::WindowFunction { .. } => Err(EvalError::TypeMismatch {
232            detail: "window function reached row eval — engine rewrite bug".into(),
233        }),
234        // v7.10.10 — `ARRAY[expr, expr, …]` constructor.
235        // v7.11.13 — element-type detection: all integers →
236        // IntArray (or BigIntArray when widening), any Text →
237        // TextArray. Non-TEXT non-integer elements (Bool, Float)
238        // stringify into TextArray as the safe default.
239        Expr::Array(items) => {
240            let mut materialised: Vec<Value> = Vec::with_capacity(items.len());
241            for elem in items {
242                materialised.push(eval_expr(elem, row, ctx)?);
243            }
244            let mut has_text = false;
245            let mut has_bigint = false;
246            let mut has_int = false;
247            for v in &materialised {
248                match v {
249                    Value::Null => {}
250                    Value::Int(_) | Value::SmallInt(_) => has_int = true,
251                    Value::BigInt(_) => has_bigint = true,
252                    Value::Text(_) | Value::Json(_) => has_text = true,
253                    _ => has_text = true,
254                }
255            }
256            if has_text || (!has_int && !has_bigint) {
257                let out: Vec<Option<String>> = materialised
258                    .into_iter()
259                    .map(|v| match v {
260                        Value::Null => None,
261                        Value::Text(s) | Value::Json(s) => Some(s),
262                        other => Some(value_to_text_for_array(&other)),
263                    })
264                    .collect();
265                return Ok(Value::TextArray(out));
266            }
267            if has_bigint {
268                let out: Vec<Option<i64>> = materialised
269                    .into_iter()
270                    .map(|v| match v {
271                        Value::Null => None,
272                        Value::Int(n) => Some(i64::from(n)),
273                        Value::SmallInt(n) => Some(i64::from(n)),
274                        Value::BigInt(n) => Some(n),
275                        _ => unreachable!(),
276                    })
277                    .collect();
278                return Ok(Value::BigIntArray(out));
279            }
280            let out: Vec<Option<i32>> = materialised
281                .into_iter()
282                .map(|v| match v {
283                    Value::Null => None,
284                    Value::Int(n) => Some(n),
285                    Value::SmallInt(n) => Some(i32::from(n)),
286                    _ => unreachable!(),
287                })
288                .collect();
289            Ok(Value::IntArray(out))
290        }
291        // v7.10.12 — `arr[i]` PG-style 1-based indexing.
292        // Out-of-range indices (including i ≤ 0) return NULL.
293        Expr::ArraySubscript { target, index } => {
294            let target_v = eval_expr(target, row, ctx)?;
295            let idx_v = eval_expr(index, row, ctx)?;
296            if matches!(target_v, Value::Null) || matches!(idx_v, Value::Null) {
297                return Ok(Value::Null);
298            }
299            let i: i64 = match idx_v {
300                Value::Int(n) => i64::from(n),
301                Value::BigInt(n) => n,
302                Value::SmallInt(n) => i64::from(n),
303                other => {
304                    return Err(EvalError::TypeMismatch {
305                        detail: format!(
306                            "array subscript must be integer, got {:?}",
307                            other.data_type()
308                        ),
309                    });
310                }
311            };
312            if i < 1 {
313                return Ok(Value::Null);
314            }
315            let pos = (i - 1) as usize;
316            match target_v {
317                Value::TextArray(items) => match items.get(pos) {
318                    Some(Some(s)) => Ok(Value::Text(s.clone())),
319                    Some(None) | None => Ok(Value::Null),
320                },
321                Value::IntArray(items) => match items.get(pos) {
322                    Some(Some(n)) => Ok(Value::Int(*n)),
323                    Some(None) | None => Ok(Value::Null),
324                },
325                Value::BigIntArray(items) => match items.get(pos) {
326                    Some(Some(n)) => Ok(Value::BigInt(*n)),
327                    Some(None) | None => Ok(Value::Null),
328                },
329                other => Err(EvalError::TypeMismatch {
330                    detail: format!(
331                        "subscript target must be an array, got {:?}",
332                        other.data_type()
333                    ),
334                }),
335            }
336        }
337        // v7.10.12 — `x op ANY(arr)` / `x op ALL(arr)`. PG
338        // 3VL: ANY → true if any element compares-true; NULL if
339        // no true but some NULL; false otherwise. ALL: false if
340        // any compares-false; NULL if no false but some NULL;
341        // true otherwise.
342        Expr::AnyAll {
343            expr,
344            op,
345            array,
346            is_any,
347        } => {
348            let lhs = eval_expr(expr, row, ctx)?;
349            let arr = eval_expr(array, row, ctx)?;
350            if matches!(arr, Value::Null) {
351                return Ok(Value::Null);
352            }
353            let elems: Vec<Option<Value>> = match arr {
354                Value::TextArray(items) => items.into_iter().map(|o| o.map(Value::Text)).collect(),
355                Value::IntArray(items) => items.into_iter().map(|o| o.map(Value::Int)).collect(),
356                Value::BigIntArray(items) => {
357                    items.into_iter().map(|o| o.map(Value::BigInt)).collect()
358                }
359                other => {
360                    return Err(EvalError::TypeMismatch {
361                        detail: format!(
362                            "ANY/ALL right-hand side must be an array, got {:?}",
363                            other.data_type()
364                        ),
365                    });
366                }
367            };
368            let mut saw_null = matches!(lhs, Value::Null);
369            let mut saw_match = false;
370            let mut saw_mismatch = false;
371            for elem in elems {
372                let elem_v = match elem {
373                    Some(v) => v,
374                    None => {
375                        saw_null = true;
376                        continue;
377                    }
378                };
379                if matches!(lhs, Value::Null) {
380                    saw_null = true;
381                    continue;
382                }
383                match apply_binary(*op, lhs.clone(), elem_v) {
384                    Ok(Value::Bool(true)) => saw_match = true,
385                    Ok(Value::Bool(false)) => saw_mismatch = true,
386                    Ok(Value::Null) => saw_null = true,
387                    Ok(other) => {
388                        return Err(EvalError::TypeMismatch {
389                            detail: format!(
390                                "ANY/ALL comparison didn't return Bool: {:?}",
391                                other.data_type()
392                            ),
393                        });
394                    }
395                    Err(e) => return Err(e),
396                }
397            }
398            let result = if *is_any {
399                if saw_match {
400                    Value::Bool(true)
401                } else if saw_null {
402                    Value::Null
403                } else {
404                    Value::Bool(false)
405                }
406            } else if saw_mismatch {
407                Value::Bool(false)
408            } else if saw_null {
409                Value::Null
410            } else {
411                Value::Bool(true)
412            };
413            Ok(result)
414        }
415        // v7.13.0 — CASE WHEN … END (mailrs round-5 G9).
416        // Short-circuit on the first matching branch. Searched form
417        // (operand=None) treats each branch's WHEN as a Bool
418        // predicate. Simple form (operand=Some) compares with =.
419        // ELSE on no match; NULL if no ELSE.
420        Expr::Case {
421            operand,
422            branches,
423            else_branch,
424        } => {
425            let operand_value = match operand {
426                Some(o) => Some(eval_expr(o, row, ctx)?),
427                None => None,
428            };
429            for (when_expr, then_expr) in branches {
430                let when_value = eval_expr(when_expr, row, ctx)?;
431                let matched = match &operand_value {
432                    None => matches!(when_value, Value::Bool(true)),
433                    Some(op_v) => matches!(
434                        apply_binary(spg_sql::ast::BinOp::Eq, op_v.clone(), when_value)?,
435                        Value::Bool(true)
436                    ),
437                };
438                if matched {
439                    return eval_expr(then_expr, row, ctx);
440                }
441            }
442            match else_branch {
443                Some(e) => eval_expr(e, row, ctx),
444                None => Ok(Value::Null),
445            }
446        }
447    }
448}
449
450/// v7.10.10 — best-effort text rendering for non-TEXT array
451/// elements (numbers, bools, etc.). The PG rule is that
452/// `ARRAY[1, 2]` is `int[]`, but SPG's v7.10 only models TEXT[],
453/// so we widen by stringifying. NUMERIC formatting goes through
454/// the existing canonical helpers to stay consistent with
455/// `format_numeric` / `format_date` etc.
456fn value_to_text_for_array(v: &Value) -> String {
457    match v {
458        Value::Text(s) | Value::Json(s) => s.clone(),
459        Value::Int(n) => n.to_string(),
460        Value::BigInt(n) => n.to_string(),
461        Value::SmallInt(n) => n.to_string(),
462        Value::Bool(b) => {
463            if *b {
464                "true".into()
465            } else {
466                "false".into()
467            }
468        }
469        Value::Float(x) => format!("{x}"),
470        Value::Date(d) => format_date(*d),
471        Value::Timestamp(t) => format_timestamp(*t),
472        Value::Numeric { scaled, scale } => format_numeric(*scaled, *scale),
473        _ => format!("{v:?}"),
474    }
475}
476
477/// Pull an integer component (year / month / ... / microsecond) out
478/// of a `DATE` or `TIMESTAMP`. Returns NULL on a NULL source, errors
479/// when the source isn't a calendar type.
480fn extract_field(field: spg_sql::ast::ExtractField, v: &Value) -> Result<Value, EvalError> {
481    use spg_sql::ast::ExtractField as F;
482    if matches!(v, Value::Null) {
483        return Ok(Value::Null);
484    }
485    // INTERVAL has its own decomposition — `YEAR` / `MONTH` come from
486    // the months part, the rest from the microseconds part. PG matches
487    // this convention (months is normalised modulo 12 for MONTH).
488    if let Value::Interval { months, micros } = *v {
489        let years = months / 12;
490        let mons = months % 12;
491        let secs_total = micros / 1_000_000;
492        let frac = micros % 1_000_000;
493        let result = match field {
494            F::Year => i64::from(years),
495            F::Month => i64::from(mons),
496            F::Day => micros / 86_400_000_000,
497            F::Hour => (secs_total / 3600) % 24,
498            F::Minute => (secs_total / 60) % 60,
499            F::Second => secs_total % 60,
500            F::Microsecond => (secs_total % 60) * 1_000_000 + frac,
501            // total seconds in the interval (months count as 30 days,
502            // PG's justify_interval convention).
503            F::Epoch => i64::from(months) * 30 * 86_400 + secs_total,
504        };
505        return Ok(Value::BigInt(result));
506    }
507    let (days, day_micros) = match *v {
508        Value::Date(d) => (d, 0_i64),
509        Value::Timestamp(t) => {
510            let days = t.div_euclid(86_400_000_000);
511            let day_micros = t.rem_euclid(86_400_000_000);
512            (i32::try_from(days).unwrap_or(i32::MAX), day_micros)
513        }
514        _ => {
515            return Err(EvalError::TypeMismatch {
516                detail: format!(
517                    "EXTRACT requires DATE / TIMESTAMP / INTERVAL, got {:?}",
518                    v.data_type()
519                ),
520            });
521        }
522    };
523    let (y, m, d) = civil_components(days);
524    let secs = day_micros / 1_000_000;
525    let hh = secs / 3600;
526    let mm = (secs / 60) % 60;
527    let ss = secs % 60;
528    let frac = day_micros % 1_000_000;
529    let result = match field {
530        F::Year => i64::from(y),
531        F::Month => i64::from(m),
532        F::Day => i64::from(d),
533        F::Hour => hh,
534        F::Minute => mm,
535        F::Second => ss,
536        F::Microsecond => ss * 1_000_000 + frac,
537        // seconds since the unix epoch (truncated; PG returns
538        // numeric with fraction — mailrs casts ::BIGINT anyway).
539        F::Epoch => i64::from(days) * 86_400 + secs,
540    };
541    Ok(Value::BigInt(result))
542}
543
544/// Internal wrapper around the file-private `civil_from_days` so the
545/// public surface area doesn't change. Returns `(year, month, day)`.
546fn civil_components(days: i32) -> (i32, u32, u32) {
547    civil_from_days(days)
548}
549
550/// SQL `LIKE` matcher. Wildcards are `%` (any run, possibly empty) and `_`
551/// (exactly one char). `\` escapes the next pattern char so `\%` matches a
552/// literal `%`. Matches the whole input — no implicit anchoring needed
553/// since SQL `LIKE` is always full-string.
554fn like_match(text: &str, pattern: &str) -> bool {
555    let text: Vec<char> = text.chars().collect();
556    let pat: Vec<char> = pattern.chars().collect();
557    like_match_inner(&text, 0, &pat, 0)
558}
559
560fn like_match_inner(text: &[char], mut ti: usize, pat: &[char], mut pi: usize) -> bool {
561    while pi < pat.len() {
562        match pat[pi] {
563            '%' => {
564                // Collapse consecutive `%` and try every possible split.
565                while pi < pat.len() && pat[pi] == '%' {
566                    pi += 1;
567                }
568                if pi == pat.len() {
569                    return true;
570                }
571                for k in ti..=text.len() {
572                    if like_match_inner(text, k, pat, pi) {
573                        return true;
574                    }
575                }
576                return false;
577            }
578            '_' => {
579                if ti >= text.len() {
580                    return false;
581                }
582                ti += 1;
583                pi += 1;
584            }
585            '\\' if pi + 1 < pat.len() => {
586                let want = pat[pi + 1];
587                if ti >= text.len() || text[ti] != want {
588                    return false;
589                }
590                ti += 1;
591                pi += 2;
592            }
593            c => {
594                if ti >= text.len() || text[ti] != c {
595                    return false;
596                }
597                ti += 1;
598                pi += 1;
599            }
600        }
601    }
602    ti == text.len()
603}
604
605/// Dispatch on lowercased function name. v1.4 implements only a handful of
606/// scalar functions; aggregates land in v1.5 alongside GROUP BY.
607fn apply_function(name: &str, args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
608    match name.to_ascii_lowercase().as_str() {
609        // v7.17.0 Phase 1.1 — SEQUENCE accessor functions.
610        "nextval" => {
611            if args.len() != 1 {
612                return Err(EvalError::TypeMismatch {
613                    detail: format!("nextval() takes 1 arg, got {}", args.len()),
614                });
615            }
616            let seq_name = match &args[0] {
617                Value::Text(s) => s.clone(),
618                Value::Null => return Ok(Value::Null),
619                other => {
620                    return Err(EvalError::TypeMismatch {
621                        detail: format!(
622                            "nextval() argument must be TEXT, got {:?}",
623                            other.data_type()
624                        ),
625                    });
626                }
627            };
628            let resolver = ctx
629                .sequence_resolver
630                .ok_or_else(|| EvalError::TypeMismatch {
631                    detail: "nextval() requires a sequence resolver (read-only context)".into(),
632                })?;
633            let v = resolver(SequenceOp::Next(seq_name))?;
634            Ok(Value::BigInt(v))
635        }
636        "currval" => {
637            if args.len() != 1 {
638                return Err(EvalError::TypeMismatch {
639                    detail: format!("currval() takes 1 arg, got {}", args.len()),
640                });
641            }
642            let seq_name = match &args[0] {
643                Value::Text(s) => s.clone(),
644                Value::Null => return Ok(Value::Null),
645                other => {
646                    return Err(EvalError::TypeMismatch {
647                        detail: format!(
648                            "currval() argument must be TEXT, got {:?}",
649                            other.data_type()
650                        ),
651                    });
652                }
653            };
654            let resolver = ctx
655                .sequence_resolver
656                .ok_or_else(|| EvalError::TypeMismatch {
657                    detail: "currval() requires a sequence resolver (read-only context)".into(),
658                })?;
659            let v = resolver(SequenceOp::Curr(seq_name))?;
660            Ok(Value::BigInt(v))
661        }
662        "setval" => {
663            if args.len() != 2 && args.len() != 3 {
664                return Err(EvalError::TypeMismatch {
665                    detail: format!("setval() takes 2 or 3 args, got {}", args.len()),
666                });
667            }
668            let seq_name = match &args[0] {
669                Value::Text(s) => s.clone(),
670                Value::Null => return Ok(Value::Null),
671                other => {
672                    return Err(EvalError::TypeMismatch {
673                        detail: format!(
674                            "setval() name argument must be TEXT, got {:?}",
675                            other.data_type()
676                        ),
677                    });
678                }
679            };
680            let value = match &args[1] {
681                Value::SmallInt(n) => i64::from(*n),
682                Value::Int(n) => i64::from(*n),
683                Value::BigInt(n) => *n,
684                Value::Null => return Ok(Value::Null),
685                other => {
686                    return Err(EvalError::TypeMismatch {
687                        detail: format!(
688                            "setval() value argument must be integer, got {:?}",
689                            other.data_type()
690                        ),
691                    });
692                }
693            };
694            let is_called = if args.len() == 3 {
695                match &args[2] {
696                    Value::Bool(b) => *b,
697                    Value::Null => return Ok(Value::Null),
698                    other => {
699                        return Err(EvalError::TypeMismatch {
700                            detail: format!(
701                                "setval() is_called argument must be BOOL, got {:?}",
702                                other.data_type()
703                            ),
704                        });
705                    }
706                }
707            } else {
708                true
709            };
710            let resolver = ctx
711                .sequence_resolver
712                .ok_or_else(|| EvalError::TypeMismatch {
713                    detail: "setval() requires a sequence resolver (read-only context)".into(),
714                })?;
715            let v = resolver(SequenceOp::Set {
716                name: seq_name,
717                value,
718                is_called,
719            })?;
720            Ok(Value::BigInt(v))
721        }
722        // v7.22 (round-13) — char_length / character_length are the
723        // SQL-standard spellings PG accepts everywhere; pg_dump
724        // CHECK predicates carry them verbatim.
725        "length" | "char_length" | "character_length" => {
726            if args.len() != 1 {
727                return Err(EvalError::TypeMismatch {
728                    detail: format!("length() takes 1 arg, got {}", args.len()),
729                });
730            }
731            match &args[0] {
732                Value::Null => Ok(Value::Null),
733                Value::Text(s) => {
734                    let n = i32::try_from(s.chars().count()).unwrap_or(i32::MAX);
735                    Ok(Value::Int(n))
736                }
737                // v7.10.4 — PG semantics: length(bytea) returns
738                // byte count (= octet_length). Without this branch
739                // mailrs's INSERT … SELECT length(body) … against a
740                // BYTEA column would type-mismatch.
741                Value::Bytes(b) => {
742                    let n = i32::try_from(b.len()).unwrap_or(i32::MAX);
743                    Ok(Value::Int(n))
744                }
745                other => Err(EvalError::TypeMismatch {
746                    detail: format!("length() needs text or bytea, got {:?}", other.data_type()),
747                }),
748            }
749        }
750        // v7.10.4 — `OCTET_LENGTH(x)` returns byte count for both
751        // TEXT (UTF-8 byte length) and BYTEA. PG-spec name; aliases
752        // to length() for bytea by design.
753        "octet_length" => {
754            if args.len() != 1 {
755                return Err(EvalError::TypeMismatch {
756                    detail: format!("octet_length() takes 1 arg, got {}", args.len()),
757                });
758            }
759            match &args[0] {
760                Value::Null => Ok(Value::Null),
761                Value::Text(s) => {
762                    let n = i32::try_from(s.len()).unwrap_or(i32::MAX);
763                    Ok(Value::Int(n))
764                }
765                Value::Bytes(b) => {
766                    let n = i32::try_from(b.len()).unwrap_or(i32::MAX);
767                    Ok(Value::Int(n))
768                }
769                other => Err(EvalError::TypeMismatch {
770                    detail: format!(
771                        "octet_length() needs text or bytea, got {:?}",
772                        other.data_type()
773                    ),
774                }),
775            }
776        }
777        // v7.11.6 — `array_length(arr, dim)` returns the element
778        // count of `arr` along dimension `dim`. v7.11 only models
779        // single-dimension arrays so dim must be 1 (otherwise NULL,
780        // matching PG semantics for unsupported dimensions). NULL
781        // array → NULL. v7.11 TEXT[] only; non-array operand is
782        // a type mismatch.
783        "array_length" => {
784            if args.len() != 2 {
785                return Err(EvalError::TypeMismatch {
786                    detail: format!("array_length() takes 2 args, got {}", args.len()),
787                });
788            }
789            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
790                return Ok(Value::Null);
791            }
792            let len = match &args[0] {
793                Value::TextArray(items) => items.len(),
794                Value::IntArray(items) => items.len(),
795                Value::BigIntArray(items) => items.len(),
796                _ => {
797                    return Err(EvalError::TypeMismatch {
798                        detail: format!(
799                            "array_length() first arg must be an array, got {:?}",
800                            args[0].data_type()
801                        ),
802                    });
803                }
804            };
805            let dim: i64 = match args[1] {
806                Value::Int(n) => i64::from(n),
807                Value::BigInt(n) => n,
808                Value::SmallInt(n) => i64::from(n),
809                _ => {
810                    return Err(EvalError::TypeMismatch {
811                        detail: format!(
812                            "array_length() second arg must be integer, got {:?}",
813                            args[1].data_type()
814                        ),
815                    });
816                }
817            };
818            if dim != 1 {
819                return Ok(Value::Null);
820            }
821            let n = i32::try_from(len).unwrap_or(i32::MAX);
822            Ok(Value::Int(n))
823        }
824        // v7.11.6 — `array_position(arr, val)` returns 1-based
825        // index of the first element of `arr` equal to `val`, or
826        // NULL if not found. PG NULL semantics: NULL array → NULL;
827        // NULL val never matches (returns NULL if absent).
828        "array_position" => {
829            if args.len() != 2 {
830                return Err(EvalError::TypeMismatch {
831                    detail: format!("array_position() takes 2 args, got {}", args.len()),
832                });
833            }
834            if matches!(args[0], Value::Null) {
835                return Ok(Value::Null);
836            }
837            if matches!(args[1], Value::Null) {
838                return Ok(Value::Null);
839            }
840            match (&args[0], &args[1]) {
841                (Value::TextArray(items), Value::Text(needle)) => {
842                    for (idx, item) in items.iter().enumerate() {
843                        if let Some(s) = item
844                            && s == needle
845                        {
846                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
847                        }
848                    }
849                    Ok(Value::Null)
850                }
851                (Value::IntArray(items), needle_v)
852                    if matches!(
853                        needle_v,
854                        Value::Int(_) | Value::SmallInt(_) | Value::BigInt(_)
855                    ) =>
856                {
857                    let needle: i64 = match *needle_v {
858                        Value::Int(n) => i64::from(n),
859                        Value::SmallInt(n) => i64::from(n),
860                        Value::BigInt(n) => n,
861                        _ => unreachable!(),
862                    };
863                    for (idx, item) in items.iter().enumerate() {
864                        if let Some(n) = item
865                            && i64::from(*n) == needle
866                        {
867                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
868                        }
869                    }
870                    Ok(Value::Null)
871                }
872                (Value::BigIntArray(items), needle_v)
873                    if matches!(
874                        needle_v,
875                        Value::Int(_) | Value::SmallInt(_) | Value::BigInt(_)
876                    ) =>
877                {
878                    let needle: i64 = match *needle_v {
879                        Value::Int(n) => i64::from(n),
880                        Value::SmallInt(n) => i64::from(n),
881                        Value::BigInt(n) => n,
882                        _ => unreachable!(),
883                    };
884                    for (idx, item) in items.iter().enumerate() {
885                        if let Some(n) = item
886                            && *n == needle
887                        {
888                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
889                        }
890                    }
891                    Ok(Value::Null)
892                }
893                (
894                    arr @ (Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_)),
895                    other,
896                ) => Err(EvalError::TypeMismatch {
897                    detail: format!(
898                        "array_position() needle type {:?} doesn't match array {:?}",
899                        other.data_type(),
900                        arr.data_type()
901                    ),
902                }),
903                (other, _) => Err(EvalError::TypeMismatch {
904                    detail: format!(
905                        "array_position() first arg must be an array, got {:?}",
906                        other.data_type()
907                    ),
908                }),
909            }
910        }
911        // v7.11.15 — `substring(s, start)` / `substring(s, start, length)`
912        // for both TEXT and BYTEA. PG semantics: `start` is 1-based;
913        // values ≤ 0 clamp into the string (i.e. effective start is
914        // adjusted so the window still begins at index 1 — but
915        // `length` is reduced by the clipped prefix). A NULL arg
916        // makes the result NULL. Out-of-range windows return an
917        // empty value, not NULL.
918        "substring" | "substr" => {
919            if !matches!(args.len(), 2 | 3) {
920                return Err(EvalError::TypeMismatch {
921                    detail: format!("substring() takes 2 or 3 args, got {}", args.len()),
922                });
923            }
924            if args.iter().any(|a| matches!(a, Value::Null)) {
925                return Ok(Value::Null);
926            }
927            let start: i64 = match args[1] {
928                Value::Int(n) => i64::from(n),
929                Value::BigInt(n) => n,
930                Value::SmallInt(n) => i64::from(n),
931                _ => {
932                    return Err(EvalError::TypeMismatch {
933                        detail: format!(
934                            "substring() start must be integer, got {:?}",
935                            args[1].data_type()
936                        ),
937                    });
938                }
939            };
940            let length: Option<i64> = if args.len() == 3 {
941                match args[2] {
942                    Value::Int(n) => Some(i64::from(n)),
943                    Value::BigInt(n) => Some(n),
944                    Value::SmallInt(n) => Some(i64::from(n)),
945                    _ => {
946                        return Err(EvalError::TypeMismatch {
947                            detail: format!(
948                                "substring() length must be integer, got {:?}",
949                                args[2].data_type()
950                            ),
951                        });
952                    }
953                }
954            } else {
955                None
956            };
957            // PG: when length is given, end = start + length; if
958            // end < start the result is empty. Clip start to 1.
959            let (effective_start, effective_length): (i64, Option<i64>) = match length {
960                Some(len) => {
961                    let end = start.saturating_add(len);
962                    if end <= 1 || len < 0 {
963                        return Ok(match &args[0] {
964                            Value::Text(_) => Value::Text(String::new()),
965                            Value::Bytes(_) => Value::Bytes(Vec::new()),
966                            other => {
967                                return Err(EvalError::TypeMismatch {
968                                    detail: format!(
969                                        "substring() needs text or bytea, got {:?}",
970                                        other.data_type()
971                                    ),
972                                });
973                            }
974                        });
975                    }
976                    let eff_start = start.max(1);
977                    let eff_len = end - eff_start;
978                    (eff_start, Some(eff_len.max(0)))
979                }
980                None => (start.max(1), None),
981            };
982            match &args[0] {
983                Value::Text(s) => {
984                    // PG counts in characters (codepoints) for TEXT.
985                    let chars: Vec<char> = s.chars().collect();
986                    let skip = (effective_start - 1) as usize;
987                    if skip >= chars.len() {
988                        return Ok(Value::Text(String::new()));
989                    }
990                    let take = match effective_length {
991                        Some(n) => (n as usize).min(chars.len() - skip),
992                        None => chars.len() - skip,
993                    };
994                    Ok(Value::Text(chars[skip..skip + take].iter().collect()))
995                }
996                Value::Bytes(b) => {
997                    let skip = (effective_start - 1) as usize;
998                    if skip >= b.len() {
999                        return Ok(Value::Bytes(Vec::new()));
1000                    }
1001                    let take = match effective_length {
1002                        Some(n) => (n as usize).min(b.len() - skip),
1003                        None => b.len() - skip,
1004                    };
1005                    Ok(Value::Bytes(b[skip..skip + take].to_vec()))
1006                }
1007                other => Err(EvalError::TypeMismatch {
1008                    detail: format!(
1009                        "substring() needs text or bytea, got {:?}",
1010                        other.data_type()
1011                    ),
1012                }),
1013            }
1014        }
1015        // v7.11.15 — `position(needle, haystack)`. PG semantics:
1016        // 1-based byte/char index of first occurrence, or 0 if
1017        // absent. NULL on either operand → NULL. Empty needle
1018        // returns 1 (PG convention). Works on TEXT (char positions)
1019        // and BYTEA (byte positions). (The PG-spec syntax `position(
1020        // needle IN haystack)` is not parsed in v7.11; clients must
1021        // call the function-call form.)
1022        "position" => {
1023            if args.len() != 2 {
1024                return Err(EvalError::TypeMismatch {
1025                    detail: format!("position() takes 2 args, got {}", args.len()),
1026                });
1027            }
1028            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
1029                return Ok(Value::Null);
1030            }
1031            match (&args[0], &args[1]) {
1032                (Value::Text(needle), Value::Text(haystack)) => {
1033                    if needle.is_empty() {
1034                        return Ok(Value::Int(1));
1035                    }
1036                    // Char-based position (PG uses character count).
1037                    let h_chars: Vec<char> = haystack.chars().collect();
1038                    let n_chars: Vec<char> = needle.chars().collect();
1039                    if n_chars.len() > h_chars.len() {
1040                        return Ok(Value::Int(0));
1041                    }
1042                    for i in 0..=h_chars.len() - n_chars.len() {
1043                        if h_chars[i..i + n_chars.len()] == n_chars[..] {
1044                            return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1045                        }
1046                    }
1047                    Ok(Value::Int(0))
1048                }
1049                (Value::Bytes(needle), Value::Bytes(haystack)) => {
1050                    if needle.is_empty() {
1051                        return Ok(Value::Int(1));
1052                    }
1053                    if needle.len() > haystack.len() {
1054                        return Ok(Value::Int(0));
1055                    }
1056                    for i in 0..=haystack.len() - needle.len() {
1057                        if &haystack[i..i + needle.len()] == needle.as_slice() {
1058                            return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1059                        }
1060                    }
1061                    Ok(Value::Int(0))
1062                }
1063                (a, b) => Err(EvalError::TypeMismatch {
1064                    detail: format!(
1065                        "position() operands must both be text or both bytea, got {:?} and {:?}",
1066                        a.data_type(),
1067                        b.data_type()
1068                    ),
1069                }),
1070            }
1071        }
1072        "upper" => {
1073            if args.len() != 1 {
1074                return Err(EvalError::TypeMismatch {
1075                    detail: format!("upper() takes 1 arg, got {}", args.len()),
1076                });
1077            }
1078            match &args[0] {
1079                Value::Null => Ok(Value::Null),
1080                Value::Text(s) => Ok(Value::Text(s.to_uppercase())),
1081                other => Err(EvalError::TypeMismatch {
1082                    detail: format!("upper() needs text, got {:?}", other.data_type()),
1083                }),
1084            }
1085        }
1086        "lower" => {
1087            if args.len() != 1 {
1088                return Err(EvalError::TypeMismatch {
1089                    detail: format!("lower() takes 1 arg, got {}", args.len()),
1090                });
1091            }
1092            match &args[0] {
1093                Value::Null => Ok(Value::Null),
1094                Value::Text(s) => Ok(Value::Text(s.to_lowercase())),
1095                other => Err(EvalError::TypeMismatch {
1096                    detail: format!("lower() needs text, got {:?}", other.data_type()),
1097                }),
1098            }
1099        }
1100        "abs" => {
1101            if args.len() != 1 {
1102                return Err(EvalError::TypeMismatch {
1103                    detail: format!("abs() takes 1 arg, got {}", args.len()),
1104                });
1105            }
1106            match &args[0] {
1107                Value::Null => Ok(Value::Null),
1108                Value::Int(n) => Ok(Value::Int(n.wrapping_abs())),
1109                Value::BigInt(n) => Ok(Value::BigInt(n.wrapping_abs())),
1110                Value::Float(x) => Ok(Value::Float(x.abs())),
1111                other => Err(EvalError::TypeMismatch {
1112                    detail: format!("abs() needs numeric, got {:?}", other.data_type()),
1113                }),
1114            }
1115        }
1116        "coalesce" => {
1117            for a in args {
1118                if !matches!(a, Value::Null) {
1119                    return Ok(a.clone());
1120                }
1121            }
1122            Ok(Value::Null)
1123        }
1124        "date_trunc" => date_trunc(args),
1125        "date_part" => date_part(args),
1126        "age" => age(args),
1127        "to_char" => to_char(args),
1128        // v7.17.0 Phase 3.P0-29 — MySQL time aliases. WordPress,
1129        // Laravel, mysql-connector-python emit these constantly.
1130        // `unix_timestamp()` (bare) is folded by clock_replacement_for
1131        // into a BigInt literal — this arm only handles the 1-arg
1132        // form (TIMESTAMP / DATE → epoch seconds).
1133        "date_format" => date_format_mysql(args),
1134        "unix_timestamp" => unix_timestamp_of(args),
1135        "from_unixtime" => from_unixtime(args),
1136        // v7.17.0 Phase 3.8 — PG `format(fmt, args…)` sprintf-style.
1137        // Conversion specifiers: `%s` (literal string from arg),
1138        // `%I` (quoted identifier), `%L` (quoted SQL literal),
1139        // `%%` (literal `%`). `%n$X` argument-position prefix
1140        // (1-based). NULL arg → empty string for %s; NULL for %I
1141        // is an error in PG; NULL for %L renders as the SQL
1142        // literal `NULL`. Args missing for a position → error.
1143        "format" => format_string(args),
1144        // PG `concat(args...)` — variadic; coerces every arg to
1145        // its text representation; NULL arguments are silently
1146        // skipped (the canonical PG semantic — `concat()` is the
1147        // NULL-tolerant counterpart to the `||` operator which
1148        // propagates NULL).
1149        //
1150        // Reference:
1151        //   https://www.postgresql.org/docs/current/functions-string.html
1152        //   "Concatenates the text representations of all the
1153        //   arguments. NULL arguments are ignored."
1154        //
1155        // Edge cases:
1156        //   * `concat()` (no args) → ''
1157        //   * Every arg NULL → '' (NEVER returns NULL — distinct
1158        //     from `||` and from `array_agg`)
1159        //   * Bool → PG single-char form 't' / 'f'
1160        //   * SmallInt / Int / BigInt / Float / Numeric / Date /
1161        //     Timestamp / Json / Bytes → their canonical text
1162        //     rendering (shared with `format()`'s %s specifier
1163        //     via `value_to_format_text`).
1164        "concat" => {
1165            let mut out = String::new();
1166            for v in args {
1167                if matches!(v, Value::Null) {
1168                    continue;
1169                }
1170                out.push_str(&value_to_format_text(v));
1171            }
1172            Ok(Value::Text(out))
1173        }
1174        // PG `concat_ws(sep, val1 [, val2 ...])` — like concat but
1175        // with a separator inserted between each pair of NON-NULL
1176        // arguments. Critical semantic subtleties:
1177        //   * NULL separator → NULL result (the sep position is
1178        //     mandatory and poison-prone; this is the ONLY way
1179        //     concat_ws can return NULL).
1180        //   * NULL data args silently SKIPPED — the separator is
1181        //     NOT inserted around them. `concat_ws(',', 'a', NULL,
1182        //     'b')` → `'a,b'`, not `'a,,b'`.
1183        //   * Empty-string data args are KEPT (separator placed
1184        //     around them). `concat_ws(',', 'a', '', 'b')` →
1185        //     `'a,,b'`. Distinction with NULL matters for code
1186        //     like `concat_ws(', ', first_name, middle_name,
1187        //     last_name)`.
1188        //   * 0 args → arity error (sep is mandatory).
1189        //   * Only sep (no data) → '' (NOT NULL — distinct from
1190        //     the all-NULL data case which also returns '').
1191        //
1192        // Reference:
1193        //   https://www.postgresql.org/docs/current/functions-string.html
1194        // PG `trim` / `ltrim` / `rtrim` / `btrim`.
1195        //
1196        // Semantic anchors (PG-canonical):
1197        //   * Default chars set is the ASCII SPACE only (NOT the
1198        //     POSIX whitespace class — tab / newline / form-feed
1199        //     stay put unless explicitly listed in `chars`).
1200        //   * `chars` arg is a UTF-8 codepoint SET — any char in
1201        //     the set is stripped, not the substring.
1202        //   * `trim(s)` == `btrim(s)` == strip both ends.
1203        //   * `ltrim(s, c)` / `rtrim(s, c)` strip only the named
1204        //     side; inner occurrences are preserved.
1205        //   * NULL on EITHER arg → NULL result.
1206        //   * Non-text input is coerced via `value_to_format_text`
1207        //     so trim(42) returns '42'.
1208        //
1209        // Reference:
1210        //   https://www.postgresql.org/docs/current/functions-string.html
1211        // PG `replace(string, from, to)` — substring substitution
1212        // for every (non-overlapping, greedy left-to-right)
1213        // occurrence. Empty `from` passes input through unchanged
1214        // (PG behavior — avoids infinite loop). Inserted text is
1215        // NOT re-scanned for new matches (so `replace('a', 'a',
1216        // 'aa')` terminates at `'aa'`, not blows up). NULL on any
1217        // arg poisons.
1218        // PG `split_part(string, delimiter, n)` — split on delim,
1219        // return the n-th field (1-indexed). Negative n counts
1220        // from the end (PG 14+). Out-of-range n → '' (NOT NULL).
1221        // n = 0 → error. Empty delimiter → error. NULL on any
1222        // arg → NULL.
1223        // PG `repeat(string, n)` — duplicate the input N times.
1224        // n=0 → ''; n<0 → '' (PG does NOT error on negative);
1225        // NULL on any arg → NULL.
1226        // PG `lpad(string, length [, fill])` / `rpad(...)`.
1227        // length is the target CODEPOINT count. Truncation when
1228        // input longer (lpad keeps the LEFT side, rpad keeps
1229        // LEFT too — both wait truncate from the right side per
1230        // PG-verified behavior). Padding when shorter, using
1231        // `fill` (default SPACE) cycling for multi-char fills.
1232        // length<=0 → ''. Empty fill + needs padding → returns
1233        // input verbatim (potentially truncated). NULL on any
1234        // arg → NULL.
1235        // PG `strpos(string, substring)` — same as position()
1236        // but with reversed arg order. PG convention is
1237        // strpos(haystack, needle); position(needle, haystack).
1238        // Both are 1-indexed; 0 = not found; codepoint-counted.
1239        // PG `left(string, n)` / `right(string, n)` — head/tail
1240        // substring helpers. Negative n means "all but last/first
1241        // |n| chars" — slice from the OPPOSITE side. n=0 → ''.
1242        // Codepoint-counted. NULL on any arg → NULL.
1243        // PG `floor(x)` — largest integer <= x.
1244        //   * Negative floats floor TOWARD -infinity, NOT toward 0.
1245        //   * Integer types passthrough unchanged.
1246        //   * NULL → NULL.
1247        // PG `ceil(x)` / `ceiling(x)` — smallest integer >= x.
1248        //   * Negative floats round TOWARD zero (toward +inf):
1249        //     ceil(-1.5) → -1, NOT -2.
1250        //   * Integer types passthrough unchanged.
1251        //   * NULL → NULL.
1252        // PG `round(x)` / `round(x, scale)` — half-away-from-zero
1253        // rounding (NUMERIC semantic).
1254        //   * round(0.5) → 1; round(-0.5) → -1; round(2.5) → 3.
1255        //   * Two-arg form rounds to N decimal places (n>0) or to
1256        //     nearest 10^|n| (n<0).
1257        //   * Integer types passthrough unchanged.
1258        //   * NULL on any arg → NULL.
1259        // PG `trunc(x)` / `trunc(x, scale)` — truncate TOWARD zero.
1260        //   * Distinct from floor() which rounds toward -inf:
1261        //     trunc(-1.7)→-1; floor(-1.7)→-2.
1262        //   * Distinct from round() which rounds half-away:
1263        //     trunc(1.5)→1; round(1.5)→2.
1264        //   * Two-arg form truncates to N decimal places (or 10^|n|
1265        //     for negative n).
1266        //   * Integer types passthrough unchanged.
1267        //   * NULL on any arg → NULL.
1268        // PG `nullif(a, b)` — returns NULL if a = b, else a.
1269        // Canonical use cases:
1270        //   * Divide-by-zero protection: `x / nullif(y, 0)`
1271        //   * Empty-string normalisation: `nullif(field, '')`
1272        // Edge: nullif(NULL, NULL) returns NULL. nullif(NULL, x)
1273        // returns NULL. nullif(x, NULL) returns x (since NULL is
1274        // not == to anything per IS DISTINCT FROM semantic, x ≠ NULL).
1275        // PG `greatest(...)` / `least(...)` — variadic max/min.
1276        // NULL args silently skipped (PG-canonical). All-NULL → NULL.
1277        // Cross-type widening for numeric comparisons.
1278        // PG `mod(y, x)` — modulo. Result sign follows dividend.
1279        //   * mod(7, 3) = 1
1280        //   * mod(-7, 3) = -1
1281        //   * mod(7, -3) = 1
1282        //   * mod(-7, -3) = -1
1283        // Division by zero → error. NULL on any arg → NULL.
1284        // PG `power(x, y)` / `pow(x, y)` — x^y.
1285        // Integer exponent → exact via repeated multiplication
1286        // (no precision loss). Fractional exponent → exp(y*ln(x))
1287        // via the no_std exp/ln series helpers.
1288        // x=0 with negative y → error (1/0). NULL → NULL.
1289        // PG `sqrt(x)` — square root. Negative input → error.
1290        // PG `sign(x)` — -1 / 0 / 1.
1291        // PG `random()` — uniform float in [0, 1). Per-row /
1292        // per-call: each evaluation returns a different value
1293        // even within the same statement. Backed by a xorshift64*
1294        // PRNG with a process-static seed; not cryptographically
1295        // secure (use a cryptographic source for security tokens).
1296        "random" => {
1297            if !args.is_empty() {
1298                return Err(EvalError::TypeMismatch {
1299                    detail: alloc::format!("random() takes 0 args, got {}", args.len()),
1300                });
1301            }
1302            Ok(Value::Float(prng_next_f64()))
1303        }
1304        // v7.17.0 — PG `gen_random_uuid()` (built-in, no extension)
1305        // and the historical uuid-ossp `uuid_generate_v4()` alias.
1306        // Both produce a RFC 4122 v4 (random) UUID. This is the
1307        // function Django / Rails / Hibernate emit in `id UUID
1308        // PRIMARY KEY DEFAULT gen_random_uuid()`, the modern
1309        // default PK pattern.
1310        "gen_random_uuid" | "uuid_generate_v4" => {
1311            if !args.is_empty() {
1312                return Err(EvalError::TypeMismatch {
1313                    detail: alloc::format!("{name}() takes 0 args, got {}", args.len()),
1314                });
1315            }
1316            Ok(Value::Uuid(gen_random_uuid_bytes()))
1317        }
1318        "sign" => {
1319            if args.len() != 1 {
1320                return Err(EvalError::TypeMismatch {
1321                    detail: alloc::format!("sign() takes 1 arg, got {}", args.len()),
1322                });
1323            }
1324            match &args[0] {
1325                Value::Null => Ok(Value::Null),
1326                Value::SmallInt(n) => Ok(Value::SmallInt(n.signum())),
1327                Value::Int(n) => Ok(Value::Int(n.signum())),
1328                Value::BigInt(n) => Ok(Value::BigInt(n.signum())),
1329                Value::Float(x) => {
1330                    let s = if *x > 0.0 {
1331                        1.0
1332                    } else if *x < 0.0 {
1333                        -1.0
1334                    } else {
1335                        0.0
1336                    };
1337                    Ok(Value::Float(s))
1338                }
1339                Value::Numeric { scaled, scale } => {
1340                    let s = scaled.signum();
1341                    Ok(Value::Numeric {
1342                        scaled: s * pow10_i128(*scale),
1343                        scale: *scale,
1344                    })
1345                }
1346                other => Err(EvalError::TypeMismatch {
1347                    detail: alloc::format!("sign() needs numeric, got {:?}", other.data_type()),
1348                }),
1349            }
1350        }
1351        "sqrt" => {
1352            if args.len() != 1 {
1353                return Err(EvalError::TypeMismatch {
1354                    detail: alloc::format!("sqrt() takes 1 arg, got {}", args.len()),
1355                });
1356            }
1357            match &args[0] {
1358                Value::Null => Ok(Value::Null),
1359                v => {
1360                    let x = value_to_f64(v).ok_or_else(|| EvalError::TypeMismatch {
1361                        detail: alloc::format!("sqrt() needs numeric, got {:?}", v.data_type()),
1362                    })?;
1363                    if x < 0.0 {
1364                        return Err(EvalError::TypeMismatch {
1365                            detail: "sqrt(): negative input outside real domain".into(),
1366                        });
1367                    }
1368                    if x == 0.0 {
1369                        return Ok(Value::Float(0.0));
1370                    }
1371                    Ok(Value::Float(f64_sqrt(x)))
1372                }
1373            }
1374        }
1375        "power" | "pow" => {
1376            if args.len() != 2 {
1377                return Err(EvalError::TypeMismatch {
1378                    detail: alloc::format!("power() takes 2 args, got {}", args.len()),
1379                });
1380            }
1381            if args.iter().any(|v| matches!(v, Value::Null)) {
1382                return Ok(Value::Null);
1383            }
1384            let x = value_to_f64(&args[0]).ok_or_else(|| EvalError::TypeMismatch {
1385                detail: "power() needs numeric x".into(),
1386            })?;
1387            let y = value_to_f64(&args[1]).ok_or_else(|| EvalError::TypeMismatch {
1388                detail: "power() needs numeric y".into(),
1389            })?;
1390            // Integer-exponent fast path.
1391            let y_int = y as i32;
1392            if (y_int as f64) == y && y.abs() < 1024.0 {
1393                let result = f64_powi(x, y_int);
1394                return Ok(Value::Float(result));
1395            }
1396            // Fractional exponent — only defined for x >= 0 in real
1397            // arithmetic. Negative x raised to fractional power is
1398            // complex; reject cleanly.
1399            if x < 0.0 {
1400                return Err(EvalError::TypeMismatch {
1401                    detail: "power(): negative base with fractional exponent yields complex result"
1402                        .into(),
1403                });
1404            }
1405            if x == 0.0 && y < 0.0 {
1406                return Err(EvalError::TypeMismatch {
1407                    detail: "power(): 0 raised to negative power is undefined".into(),
1408                });
1409            }
1410            if x == 0.0 {
1411                return Ok(Value::Float(0.0));
1412            }
1413            Ok(Value::Float(f64_exp(y * f64_ln(x))))
1414        }
1415        "mod" => {
1416            if args.len() != 2 {
1417                return Err(EvalError::TypeMismatch {
1418                    detail: alloc::format!("mod() takes 2 args, got {}", args.len()),
1419                });
1420            }
1421            if args.iter().any(|v| matches!(v, Value::Null)) {
1422                return Ok(Value::Null);
1423            }
1424            let to_i64 = |v: &Value| -> Result<i64, EvalError> {
1425                match v {
1426                    Value::SmallInt(x) => Ok(i64::from(*x)),
1427                    Value::Int(x) => Ok(i64::from(*x)),
1428                    Value::BigInt(x) => Ok(*x),
1429                    other => Err(EvalError::TypeMismatch {
1430                        detail: alloc::format!("mod() needs integer, got {:?}", other.data_type()),
1431                    }),
1432                }
1433            };
1434            let y = to_i64(&args[0])?;
1435            let x = to_i64(&args[1])?;
1436            if x == 0 {
1437                return Err(EvalError::TypeMismatch {
1438                    detail: "mod(): division by zero".into(),
1439                });
1440            }
1441            // Rust's `%` operator on signed integers follows the
1442            // dividend's sign — same as PG.
1443            let result = y % x;
1444            // Pick the narrowest type that holds the result.
1445            if let Ok(small) = i16::try_from(result) {
1446                if matches!(args[0], Value::SmallInt(_)) && matches!(args[1], Value::SmallInt(_)) {
1447                    return Ok(Value::SmallInt(small));
1448                }
1449            }
1450            if let Ok(int_) = i32::try_from(result) {
1451                if !matches!(args[0], Value::BigInt(_)) && !matches!(args[1], Value::BigInt(_)) {
1452                    return Ok(Value::Int(int_));
1453                }
1454            }
1455            Ok(Value::BigInt(result))
1456        }
1457        "greatest" | "least" => {
1458            if args.is_empty() {
1459                return Err(EvalError::TypeMismatch {
1460                    detail: alloc::format!(
1461                        "{lc}() takes at least 1 arg",
1462                        lc = if name.eq_ignore_ascii_case("greatest") {
1463                            "greatest"
1464                        } else {
1465                            "least"
1466                        }
1467                    ),
1468                });
1469            }
1470            let non_null: alloc::vec::Vec<&Value> =
1471                args.iter().filter(|v| !matches!(v, Value::Null)).collect();
1472            if non_null.is_empty() {
1473                return Ok(Value::Null);
1474            }
1475            let is_greatest = name.eq_ignore_ascii_case("greatest");
1476            let mut best = non_null[0].clone();
1477            for v in &non_null[1..] {
1478                let ord = value_cmp_for_min_max(&best, v);
1479                let take = if is_greatest {
1480                    ord == core::cmp::Ordering::Less
1481                } else {
1482                    ord == core::cmp::Ordering::Greater
1483                };
1484                if take {
1485                    best = (*v).clone();
1486                }
1487            }
1488            Ok(best)
1489        }
1490        // MySQL `ifnull(a, b)` — alias for coalesce(a, b).
1491        // Used by every ORM with a MySQL target (Hibernate /
1492        // Laravel / Sequelize).
1493        "ifnull" => {
1494            if args.len() != 2 {
1495                return Err(EvalError::TypeMismatch {
1496                    detail: alloc::format!("ifnull() takes 2 args, got {}", args.len()),
1497                });
1498            }
1499            for v in args {
1500                if !matches!(v, Value::Null) {
1501                    return Ok(v.clone());
1502                }
1503            }
1504            Ok(Value::Null)
1505        }
1506        // MySQL `if(cond, then, else)` — alias for CASE WHEN.
1507        // NULL condition → else branch (MySQL semantic).
1508        // Integer condition: nonzero is true.
1509        "if" => {
1510            if args.len() != 3 {
1511                return Err(EvalError::TypeMismatch {
1512                    detail: alloc::format!(
1513                        "if() takes 3 args (cond, then, else), got {}",
1514                        args.len()
1515                    ),
1516                });
1517            }
1518            let truthy = match &args[0] {
1519                Value::Null => false,
1520                Value::Bool(b) => *b,
1521                Value::SmallInt(n) => *n != 0,
1522                Value::Int(n) => *n != 0,
1523                Value::BigInt(n) => *n != 0,
1524                Value::Float(x) => *x != 0.0,
1525                Value::Text(s) => !s.is_empty() && s != "0",
1526                _ => true,
1527            };
1528            if truthy {
1529                Ok(args[1].clone())
1530            } else {
1531                Ok(args[2].clone())
1532            }
1533        }
1534        "nullif" => {
1535            if args.len() != 2 {
1536                return Err(EvalError::TypeMismatch {
1537                    detail: alloc::format!("nullif() takes 2 args, got {}", args.len()),
1538                });
1539            }
1540            match (&args[0], &args[1]) {
1541                (Value::Null, _) => Ok(Value::Null),
1542                (a, Value::Null) => Ok(a.clone()),
1543                (a, b) => {
1544                    // Use value_cmp (already defined as Ord-like
1545                    // function in lib.rs) — but it's not accessible
1546                    // here. Fall back to direct equality.
1547                    if values_equal_for_nullif(a, b) {
1548                        Ok(Value::Null)
1549                    } else {
1550                        Ok(a.clone())
1551                    }
1552                }
1553            }
1554        }
1555        "trunc" => {
1556            match args.len() {
1557                1 => match &args[0] {
1558                    Value::Null => Ok(Value::Null),
1559                    Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1560                    Value::Float(x) => Ok(Value::Float(f64_trunc(*x))),
1561                    Value::Numeric { scaled, scale } => {
1562                        let factor = pow10_i128(*scale);
1563                        // Truncate toward zero — sign-preserving division.
1564                        let q = scaled / factor;
1565                        Ok(Value::Numeric {
1566                            scaled: q * factor,
1567                            scale: *scale,
1568                        })
1569                    }
1570                    other => Err(EvalError::TypeMismatch {
1571                        detail: alloc::format!(
1572                            "trunc() needs numeric, got {:?}",
1573                            other.data_type()
1574                        ),
1575                    }),
1576                },
1577                2 => {
1578                    if args.iter().any(|v| matches!(v, Value::Null)) {
1579                        return Ok(Value::Null);
1580                    }
1581                    let n = match &args[1] {
1582                        Value::SmallInt(x) => i32::from(*x),
1583                        Value::Int(x) => *x,
1584                        Value::BigInt(x) => {
1585                            i32::try_from(*x).map_err(|_| EvalError::TypeMismatch {
1586                                detail: "trunc(): scale must fit in i32".into(),
1587                            })?
1588                        }
1589                        other => {
1590                            return Err(EvalError::TypeMismatch {
1591                                detail: alloc::format!(
1592                                    "trunc(): scale must be integer, got {:?}",
1593                                    other.data_type()
1594                                ),
1595                            });
1596                        }
1597                    };
1598                    let x = match &args[0] {
1599                        Value::SmallInt(v) => f64::from(*v),
1600                        Value::Int(v) => f64::from(*v),
1601                        Value::BigInt(v) => *v as f64,
1602                        Value::Float(v) => *v,
1603                        Value::Numeric { scaled, scale } => {
1604                            (*scaled as f64) / f64_powi(10.0, i32::from(*scale))
1605                        }
1606                        other => {
1607                            return Err(EvalError::TypeMismatch {
1608                                detail: alloc::format!(
1609                                    "trunc() needs numeric x, got {:?}",
1610                                    other.data_type()
1611                                ),
1612                            });
1613                        }
1614                    };
1615                    let result = if n >= 0 {
1616                        let factor = f64_powi(10.0, n);
1617                        f64_trunc(x * factor) / factor
1618                    } else {
1619                        let factor = f64_powi(10.0, -n);
1620                        f64_trunc(x / factor) * factor
1621                    };
1622                    Ok(Value::Float(result))
1623                }
1624                _ => Err(EvalError::TypeMismatch {
1625                    detail: alloc::format!("trunc() takes 1 or 2 args, got {}", args.len()),
1626                }),
1627            }
1628        }
1629        "round" => {
1630            match args.len() {
1631                1 => match &args[0] {
1632                    Value::Null => Ok(Value::Null),
1633                    Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1634                    Value::Float(x) => Ok(Value::Float(f64_round_half_away(*x))),
1635                    Value::Numeric { scaled, scale } => {
1636                        let factor = pow10_i128(*scale);
1637                        let q = scaled.div_euclid(factor);
1638                        let r = scaled.rem_euclid(factor);
1639                        // Half-away-from-zero: if 2*r >= factor → round up.
1640                        let result = if 2 * r >= factor { q + 1 } else { q };
1641                        Ok(Value::Numeric {
1642                            scaled: result * factor,
1643                            scale: *scale,
1644                        })
1645                    }
1646                    other => Err(EvalError::TypeMismatch {
1647                        detail: alloc::format!(
1648                            "round() needs numeric, got {:?}",
1649                            other.data_type()
1650                        ),
1651                    }),
1652                },
1653                2 => {
1654                    if args.iter().any(|v| matches!(v, Value::Null)) {
1655                        return Ok(Value::Null);
1656                    }
1657                    let n = match &args[1] {
1658                        Value::SmallInt(x) => i32::from(*x),
1659                        Value::Int(x) => *x,
1660                        Value::BigInt(x) => {
1661                            i32::try_from(*x).map_err(|_| EvalError::TypeMismatch {
1662                                detail: "round(): scale must fit in i32".into(),
1663                            })?
1664                        }
1665                        other => {
1666                            return Err(EvalError::TypeMismatch {
1667                                detail: alloc::format!(
1668                                    "round(): scale must be integer, got {:?}",
1669                                    other.data_type()
1670                                ),
1671                            });
1672                        }
1673                    };
1674                    // Convert input to f64 for arithmetic
1675                    // simplicity (PG does NUMERIC math here but
1676                    // SPG's f64 path matches the dominant
1677                    // customer expectation for round(N, scale)
1678                    // patterns).
1679                    let x = match &args[0] {
1680                        Value::SmallInt(v) => f64::from(*v),
1681                        Value::Int(v) => f64::from(*v),
1682                        Value::BigInt(v) => *v as f64,
1683                        Value::Float(v) => *v,
1684                        Value::Numeric { scaled, scale } => {
1685                            (*scaled as f64) / f64_powi(10.0, i32::from(*scale))
1686                        }
1687                        other => {
1688                            return Err(EvalError::TypeMismatch {
1689                                detail: alloc::format!(
1690                                    "round() needs numeric x, got {:?}",
1691                                    other.data_type()
1692                                ),
1693                            });
1694                        }
1695                    };
1696                    // Avoid float precision drift from the
1697                    // 10^(-k) reciprocal — for n<0 work with the
1698                    // positive-exponent form: round(x / 10^|n|) *
1699                    // 10^|n|.
1700                    let result = if n >= 0 {
1701                        let factor = f64_powi(10.0, n);
1702                        f64_round_half_away(x * factor) / factor
1703                    } else {
1704                        let factor = f64_powi(10.0, -n);
1705                        f64_round_half_away(x / factor) * factor
1706                    };
1707                    Ok(Value::Float(result))
1708                }
1709                _ => Err(EvalError::TypeMismatch {
1710                    detail: alloc::format!("round() takes 1 or 2 args, got {}", args.len()),
1711                }),
1712            }
1713        }
1714        "ceil" | "ceiling" => {
1715            if args.len() != 1 {
1716                return Err(EvalError::TypeMismatch {
1717                    detail: alloc::format!("ceil() takes 1 arg, got {}", args.len()),
1718                });
1719            }
1720            match &args[0] {
1721                Value::Null => Ok(Value::Null),
1722                Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1723                Value::Float(x) => Ok(Value::Float(f64_ceil(*x))),
1724                Value::Numeric { scaled, scale } => {
1725                    let factor = pow10_i128(*scale);
1726                    let q = scaled.div_euclid(factor);
1727                    let r = scaled.rem_euclid(factor);
1728                    let result = if r == 0 { q } else { q + 1 };
1729                    Ok(Value::Numeric {
1730                        scaled: result * factor,
1731                        scale: *scale,
1732                    })
1733                }
1734                other => Err(EvalError::TypeMismatch {
1735                    detail: alloc::format!("ceil() needs numeric, got {:?}", other.data_type()),
1736                }),
1737            }
1738        }
1739        "floor" => {
1740            if args.len() != 1 {
1741                return Err(EvalError::TypeMismatch {
1742                    detail: alloc::format!("floor() takes 1 arg, got {}", args.len()),
1743                });
1744            }
1745            match &args[0] {
1746                Value::Null => Ok(Value::Null),
1747                Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1748                Value::Float(x) => Ok(Value::Float(f64_floor(*x))),
1749                Value::Numeric { scaled, scale } => {
1750                    let factor = pow10_i128(*scale);
1751                    let q = scaled.div_euclid(factor);
1752                    // div_euclid rounds toward -infinity which is
1753                    // exactly the floor semantic — perfect for
1754                    // negative values.
1755                    Ok(Value::Numeric {
1756                        scaled: q * factor,
1757                        scale: *scale,
1758                    })
1759                }
1760                other => Err(EvalError::TypeMismatch {
1761                    detail: alloc::format!("floor() needs numeric, got {:?}", other.data_type()),
1762                }),
1763            }
1764        }
1765        "left" => string_left_right(args, true, "left"),
1766        "right" => string_left_right(args, false, "right"),
1767        "strpos" => {
1768            if args.len() != 2 {
1769                return Err(EvalError::TypeMismatch {
1770                    detail: alloc::format!(
1771                        "strpos() takes 2 args (haystack, needle), got {}",
1772                        args.len()
1773                    ),
1774                });
1775            }
1776            if args.iter().any(|v| matches!(v, Value::Null)) {
1777                return Ok(Value::Null);
1778            }
1779            let haystack = value_to_format_text(&args[0]);
1780            let needle = value_to_format_text(&args[1]);
1781            if needle.is_empty() {
1782                return Ok(Value::Int(1));
1783            }
1784            let h_chars: Vec<char> = haystack.chars().collect();
1785            let n_chars: Vec<char> = needle.chars().collect();
1786            if n_chars.len() > h_chars.len() {
1787                return Ok(Value::Int(0));
1788            }
1789            for i in 0..=h_chars.len() - n_chars.len() {
1790                if h_chars[i..i + n_chars.len()] == n_chars[..] {
1791                    return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1792                }
1793            }
1794            Ok(Value::Int(0))
1795        }
1796        "lpad" => string_pad(args, true, "lpad"),
1797        "rpad" => string_pad(args, false, "rpad"),
1798        "repeat" => {
1799            if args.len() != 2 {
1800                return Err(EvalError::TypeMismatch {
1801                    detail: alloc::format!("repeat() takes 2 args, got {}", args.len()),
1802                });
1803            }
1804            if args.iter().any(|v| matches!(v, Value::Null)) {
1805                return Ok(Value::Null);
1806            }
1807            let s = value_to_format_text(&args[0]);
1808            let n = match &args[1] {
1809                Value::SmallInt(x) => i64::from(*x),
1810                Value::Int(x) => i64::from(*x),
1811                Value::BigInt(x) => *x,
1812                other => {
1813                    return Err(EvalError::TypeMismatch {
1814                        detail: alloc::format!(
1815                            "repeat(): n must be integer, got {:?}",
1816                            other.data_type()
1817                        ),
1818                    });
1819                }
1820            };
1821            if n <= 0 {
1822                return Ok(Value::Text(String::new()));
1823            }
1824            // Safety cap so a runaway argument doesn't allocate
1825            // terabytes. PG itself enforces a similar cap via
1826            // work_mem; SPG inherits a defensive 64MiB cap.
1827            const MAX_REPEAT_BYTES: usize = 64 * 1024 * 1024;
1828            let needed =
1829                s.len()
1830                    .checked_mul(n as usize)
1831                    .ok_or_else(|| EvalError::TypeMismatch {
1832                        detail: "repeat(): result size overflows usize".into(),
1833                    })?;
1834            if needed > MAX_REPEAT_BYTES {
1835                return Err(EvalError::TypeMismatch {
1836                    detail: alloc::format!(
1837                        "repeat(): result would exceed {MAX_REPEAT_BYTES} bytes"
1838                    ),
1839                });
1840            }
1841            Ok(Value::Text(s.repeat(n as usize)))
1842        }
1843        "split_part" => {
1844            if args.len() != 3 {
1845                return Err(EvalError::TypeMismatch {
1846                    detail: alloc::format!(
1847                        "split_part() takes 3 args (string, delim, n), got {}",
1848                        args.len()
1849                    ),
1850                });
1851            }
1852            if args.iter().any(|v| matches!(v, Value::Null)) {
1853                return Ok(Value::Null);
1854            }
1855            let s = value_to_format_text(&args[0]);
1856            let delim = value_to_format_text(&args[1]);
1857            if delim.is_empty() {
1858                return Err(EvalError::TypeMismatch {
1859                    detail: "split_part(): delimiter cannot be empty".into(),
1860                });
1861            }
1862            let n = match &args[2] {
1863                Value::SmallInt(x) => i64::from(*x),
1864                Value::Int(x) => i64::from(*x),
1865                Value::BigInt(x) => *x,
1866                other => {
1867                    return Err(EvalError::TypeMismatch {
1868                        detail: alloc::format!(
1869                            "split_part(): n must be integer, got {:?}",
1870                            other.data_type()
1871                        ),
1872                    });
1873                }
1874            };
1875            if n == 0 {
1876                return Err(EvalError::TypeMismatch {
1877                    detail: "split_part(): n must be nonzero (PG: 1-indexed)".into(),
1878                });
1879            }
1880            let parts: alloc::vec::Vec<&str> = s.split(&delim[..]).collect();
1881            let total = parts.len() as i64;
1882            let idx = if n > 0 {
1883                n - 1
1884            } else {
1885                // n=-1 → last (idx = total - 1)
1886                total + n
1887            };
1888            if idx < 0 || idx >= total {
1889                return Ok(Value::Text(String::new()));
1890            }
1891            Ok(Value::Text(parts[idx as usize].to_string()))
1892        }
1893        // PG `translate(s, from, to)` — char-by-char positional
1894        // mapping. Each codepoint in `from` is replaced by the
1895        // codepoint at the same index in `to`. When `from` is
1896        // longer than `to`, the extra `from` codepoints are
1897        // DELETED (not replaced). When `from` has duplicates,
1898        // the FIRST occurrence's mapping wins. NULL → NULL.
1899        "translate" => {
1900            if args.len() != 3 {
1901                return Err(EvalError::TypeMismatch {
1902                    detail: alloc::format!("translate() takes 3 args, got {}", args.len()),
1903                });
1904            }
1905            if args.iter().any(|v| matches!(v, Value::Null)) {
1906                return Ok(Value::Null);
1907            }
1908            let s = value_to_format_text(&args[0]);
1909            let from = value_to_format_text(&args[1]);
1910            let to = value_to_format_text(&args[2]);
1911            let from_chars: Vec<char> = from.chars().collect();
1912            let to_chars: Vec<char> = to.chars().collect();
1913            // Build the codepoint map. First occurrence wins.
1914            let mut map: alloc::collections::BTreeMap<char, Option<char>> =
1915                alloc::collections::BTreeMap::new();
1916            for (i, &fc) in from_chars.iter().enumerate() {
1917                if map.contains_key(&fc) {
1918                    continue;
1919                }
1920                let replacement = to_chars.get(i).copied();
1921                map.insert(fc, replacement);
1922            }
1923            let mut out = String::with_capacity(s.len());
1924            for c in s.chars() {
1925                match map.get(&c) {
1926                    Some(Some(r)) => out.push(*r),
1927                    Some(None) => {} // mapped to "deleted"
1928                    None => out.push(c),
1929                }
1930            }
1931            Ok(Value::Text(out))
1932        }
1933        "replace" => {
1934            if args.len() != 3 {
1935                return Err(EvalError::TypeMismatch {
1936                    detail: alloc::format!(
1937                        "replace() takes 3 args (string, from, to), got {}",
1938                        args.len()
1939                    ),
1940                });
1941            }
1942            if args.iter().any(|v| matches!(v, Value::Null)) {
1943                return Ok(Value::Null);
1944            }
1945            let s = value_to_format_text(&args[0]);
1946            let from = value_to_format_text(&args[1]);
1947            let to = value_to_format_text(&args[2]);
1948            if from.is_empty() {
1949                return Ok(Value::Text(s));
1950            }
1951            // std `String::replace` matches PG semantics exactly:
1952            // non-overlapping, left-to-right, no re-scan of
1953            // inserted text. Sealed test surface verifies the
1954            // edge cases independently.
1955            Ok(Value::Text(s.replace(&from[..], &to)))
1956        }
1957        "trim" | "btrim" => string_trim(args, TrimSide::Both, "trim"),
1958        "ltrim" => string_trim(args, TrimSide::Left, "ltrim"),
1959        "rtrim" => string_trim(args, TrimSide::Right, "rtrim"),
1960        "concat_ws" => {
1961            if args.is_empty() {
1962                return Err(EvalError::TypeMismatch {
1963                    detail: "concat_ws() requires at least 1 arg (the separator)".into(),
1964                });
1965            }
1966            // NULL separator poisons the result.
1967            let sep = match &args[0] {
1968                Value::Null => return Ok(Value::Null),
1969                v => value_to_format_text(v),
1970            };
1971            let mut out = String::new();
1972            let mut first = true;
1973            for v in &args[1..] {
1974                if matches!(v, Value::Null) {
1975                    continue;
1976                }
1977                if first {
1978                    first = false;
1979                } else {
1980                    out.push_str(&sep);
1981                }
1982                out.push_str(&value_to_format_text(v));
1983            }
1984            Ok(Value::Text(out))
1985        }
1986        // v7.17.0 Phase 3.7 — PG regex function family.
1987        "regexp_matches" => regexp_matches(args),
1988        "regexp_replace" => regexp_replace(args),
1989        "regexp_split_to_array" => regexp_split_to_array(args),
1990        // v7.17.0 Phase 3.P0-28 — PG JSON builder family.
1991        // to_json / to_jsonb coerce any value to JSON text (NULL
1992        // becomes the JSON literal 'null', not SQL NULL).
1993        "to_json" | "to_jsonb" => {
1994            if args.len() != 1 {
1995                return Err(EvalError::TypeMismatch {
1996                    detail: alloc::format!("to_json() takes 1 arg, got {}", args.len()),
1997                });
1998            }
1999            // Json input passes through verbatim — PG identity.
2000            if let Value::Json(s) = &args[0] {
2001                return Ok(Value::Json(s.clone()));
2002            }
2003            Ok(Value::Json(crate::json::value_to_json_text(&args[0])))
2004        }
2005        "json_build_object" | "jsonb_build_object" => crate::json::build_object(args),
2006        "json_build_array" | "jsonb_build_array" => crate::json::build_array(args),
2007        "jsonb_set" | "json_set" => crate::json::set(args),
2008        "jsonb_insert" | "json_insert" => crate::json::insert(args),
2009        // v7.17.0 Phase 3.9 — PG `jsonb_path_query` family.
2010        "jsonb_path_query" | "json_path_query" => {
2011            if args.len() != 2 {
2012                return Err(EvalError::TypeMismatch {
2013                    detail: alloc::format!("jsonb_path_query() takes 2 args, got {}", args.len()),
2014                });
2015            }
2016            crate::json::path_query(&args[0], &args[1])
2017        }
2018        "jsonb_path_query_first" | "json_path_query_first" => {
2019            if args.len() != 2 {
2020                return Err(EvalError::TypeMismatch {
2021                    detail: alloc::format!(
2022                        "jsonb_path_query_first() takes 2 args, got {}",
2023                        args.len()
2024                    ),
2025                });
2026            }
2027            crate::json::path_query_first(&args[0], &args[1])
2028        }
2029        "jsonb_path_query_array" | "json_path_query_array" => {
2030            if args.len() != 2 {
2031                return Err(EvalError::TypeMismatch {
2032                    detail: alloc::format!(
2033                        "jsonb_path_query_array() takes 2 args, got {}",
2034                        args.len()
2035                    ),
2036                });
2037            }
2038            crate::json::path_query_array(&args[0], &args[1])
2039        }
2040        // v7.17.0 Phase 7 — INET / CIDR network helpers.
2041        "host" => inet_host(args),
2042        "network" => inet_network(args),
2043        "masklen" => inet_masklen(args),
2044        // v6.4.3 — encode/decode + error_on_null SQL function bundle.
2045        "encode" => encode_text(args),
2046        "decode" => decode_text(args),
2047        "error_on_null" => error_on_null(args),
2048        // v7.12.1 — PG full-text search lexer / tsquery builders.
2049        // mailrs G-CRIT-3 acceptance path: `to_tsvector('english',
2050        // … || ' ' || … || …)` runs end-to-end against a tsvector
2051        // column with Porter stemming + standard english stopwords.
2052        "to_tsvector" => fts_to_tsvector(args, ctx),
2053        "plainto_tsquery" => fts_plainto_tsquery(args, ctx),
2054        "phraseto_tsquery" => fts_phraseto_tsquery(args, ctx),
2055        "websearch_to_tsquery" => fts_websearch_to_tsquery(args, ctx),
2056        "to_tsquery" => fts_to_tsquery(args, ctx),
2057        // v7.12.2 — ranking functions. mailrs's fallback search
2058        // query ORDERs BY ts_rank(search_vector, q) DESC.
2059        "ts_rank" => fts_ts_rank(args),
2060        "ts_rank_cd" => fts_ts_rank_cd(args),
2061        // v7.14.0 — PG dump preamble emits
2062        // `SELECT pg_catalog.set_config('search_path', '', false);`
2063        // and friends. SPG is single-schema; accept-as-no-op
2064        // returning either the new value or NULL.
2065        "set_config" => Ok(args.get(1).cloned().unwrap_or(Value::Null)),
2066        "current_setting" => Ok(Value::Text(String::new())),
2067        // PG `pg_catalog.*` discovery / cast helpers commonly
2068        // emitted by ORMs probing the server. Accept-as-no-op
2069        // with sensible defaults so the dump preamble doesn't
2070        // fail. `pg_get_serial_sequence` returns NULL (no
2071        // sequence — SPG has AUTO_INCREMENT instead).
2072        "pg_get_serial_sequence" | "pg_get_constraintdef" | "pg_get_indexdef" => Ok(Value::Null),
2073        "version" => Ok(Value::Text("PostgreSQL 16 (SPG-compat)".into())),
2074        // v7.17.0 Phase 3.P0-30 — session / introspection functions.
2075        // Engine-level dispatch so these compose inside expressions
2076        // (`WHERE schemaname = current_schema()`, `SELECT *,
2077        // database() AS db FROM t`) — the pgwire layer's canned
2078        // shortcuts only catch the bare top-level SELECT shape.
2079        // SPG is single-database + single-schema; the values
2080        // mirror the wire-layer canned defaults.
2081        "current_database" | "database" => Ok(Value::Text("spg".into())),
2082        "current_schema" => Ok(Value::Text("public".into())),
2083        "current_user" | "session_user" | "user" => Ok(Value::Text("admin".into())),
2084        // v7.17.0 Phase 3.P0-31 — `pg_typeof(any)` returns the
2085        // canonical PG lowercase type name. sqlx / SQLAlchemy /
2086        // Diesel emit this during describe; generic ORMs may
2087        // branch on it (`CASE WHEN pg_typeof(x) = 'jsonb' ...`).
2088        // NULL has no resolved value-level type → 'unknown' per
2089        // PG semantics.
2090        "pg_typeof" => {
2091            if args.len() != 1 {
2092                return Err(EvalError::TypeMismatch {
2093                    detail: format!("pg_typeof() takes 1 arg, got {}", args.len()),
2094                });
2095            }
2096            Ok(Value::Text(pg_typeof_name(&args[0]).into()))
2097        }
2098        // v7.17.0 — `nextval` / `currval` / `setval` are handled
2099        // at the top of this match against the SequenceResolver.
2100        // `lastval()` (no-arg session memory) still degrades to
2101        // NULL pending a Phase 1.1b session tracker.
2102        "lastval" => Ok(Value::Null),
2103        // v7.15.0 — pg_trgm: similarity, show_trgm. Match PG
2104        // semantics: similarity returns Jaccard of trigram sets;
2105        // show_trgm returns the trigram set as TEXT[]. NULL on
2106        // any NULL arg.
2107        "similarity" => {
2108            if args.len() != 2 {
2109                return Err(EvalError::TypeMismatch {
2110                    detail: format!("similarity() takes 2 args, got {}", args.len()),
2111                });
2112            }
2113            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2114                return Ok(Value::Null);
2115            }
2116            let a = match &args[0] {
2117                Value::Text(s) => s.as_str(),
2118                other => {
2119                    return Err(EvalError::TypeMismatch {
2120                        detail: format!("similarity() needs text, got {:?}", other.data_type()),
2121                    });
2122                }
2123            };
2124            let b = match &args[1] {
2125                Value::Text(s) => s.as_str(),
2126                other => {
2127                    return Err(EvalError::TypeMismatch {
2128                        detail: format!("similarity() needs text, got {:?}", other.data_type()),
2129                    });
2130                }
2131            };
2132            // PG returns REAL (f32) — we use Float (f64) and let
2133            // coerce_value narrow on assignment to a REAL column.
2134            Ok(Value::Float(spg_storage::trgm::similarity(a, b)))
2135        }
2136        "show_trgm" => {
2137            if args.len() != 1 {
2138                return Err(EvalError::TypeMismatch {
2139                    detail: format!("show_trgm() takes 1 arg, got {}", args.len()),
2140                });
2141            }
2142            if matches!(args[0], Value::Null) {
2143                return Ok(Value::Null);
2144            }
2145            let s = match &args[0] {
2146                Value::Text(s) => s.as_str(),
2147                other => {
2148                    return Err(EvalError::TypeMismatch {
2149                        detail: format!("show_trgm() needs text, got {:?}", other.data_type()),
2150                    });
2151                }
2152            };
2153            // PG returns the trigram set sorted lexicographically.
2154            // `extract_trigrams` already returns a BTreeSet so the
2155            // order is canonical.
2156            let trigrams: Vec<Option<String>> = spg_storage::trgm::extract_trigrams(s)
2157                .into_iter()
2158                .map(Some)
2159                .collect();
2160            Ok(Value::TextArray(trigrams))
2161        }
2162        other => Err(EvalError::TypeMismatch {
2163            detail: format!("unknown function `{other}`"),
2164        }),
2165    }
2166}
2167
2168/// v7.12.2 — `ts_rank([weights,] vec, query [, norm])`. v7.12.2
2169/// supports the canonical `(vec, query)` two-arg form mailrs uses;
2170/// optional weight-array / normalisation arguments error with an
2171/// "unsupported" message rather than silently changing semantics.
2172fn fts_ts_rank(args: &[Value]) -> Result<Value, EvalError> {
2173    let (vec, query) = parse_rank_args("ts_rank", args)?;
2174    match (vec, query) {
2175        (None, _) | (_, None) => Ok(Value::Null),
2176        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank(&v, &q)))),
2177    }
2178}
2179
2180fn fts_ts_rank_cd(args: &[Value]) -> Result<Value, EvalError> {
2181    let (vec, query) = parse_rank_args("ts_rank_cd", args)?;
2182    match (vec, query) {
2183        (None, _) | (_, None) => Ok(Value::Null),
2184        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank_cd(&v, &q)))),
2185    }
2186}
2187
2188fn parse_rank_args(
2189    name: &str,
2190    args: &[Value],
2191) -> Result<
2192    (
2193        Option<Vec<spg_storage::TsLexeme>>,
2194        Option<spg_storage::TsQueryAst>,
2195    ),
2196    EvalError,
2197> {
2198    if args.len() != 2 {
2199        return Err(EvalError::TypeMismatch {
2200            detail: format!(
2201                "{name}() takes 2 args in v7.12.2 (weights array + normalisation flag are v7.12.x carve-out), got {}",
2202                args.len()
2203            ),
2204        });
2205    }
2206    let vec = match &args[0] {
2207        Value::Null => None,
2208        Value::TsVector(v) => Some(v.clone()),
2209        other => {
2210            return Err(EvalError::TypeMismatch {
2211                detail: format!(
2212                    "{name}() first arg must be tsvector, got {:?}",
2213                    other.data_type()
2214                ),
2215            });
2216        }
2217    };
2218    let query = match &args[1] {
2219        Value::Null => None,
2220        Value::TsQuery(q) => Some(q.clone()),
2221        other => {
2222            return Err(EvalError::TypeMismatch {
2223                detail: format!(
2224                    "{name}() second arg must be tsquery, got {:?}",
2225                    other.data_type()
2226                ),
2227            });
2228        }
2229    };
2230    Ok((vec, query))
2231}
2232
2233/// v7.12.2 — `tsvector @@ tsquery` match operator. Either
2234/// ordering accepted (PG semantics). NULL on either side → NULL.
2235/// Anything that isn't tsvector/tsquery on either side is a type
2236/// mismatch. Returns BOOL.
2237fn ts_match(l: Value, r: Value) -> Result<Value, EvalError> {
2238    let (vec, query) = match (l, r) {
2239        (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
2240        (Value::TsVector(v), Value::TsQuery(q)) => (v, q),
2241        (Value::TsQuery(q), Value::TsVector(v)) => (v, q),
2242        (l, r) => {
2243            return Err(EvalError::TypeMismatch {
2244                detail: format!(
2245                    "@@ requires (tsvector, tsquery), got ({:?}, {:?})",
2246                    l.data_type(),
2247                    r.data_type()
2248                ),
2249            });
2250        }
2251    };
2252    Ok(Value::Bool(crate::fts::ts_query_matches(&vec, &query)))
2253}
2254
2255/// v7.12.1 — `to_tsvector([config,] text)`. With one arg the
2256/// session-resolved `default_text_search_config` is used (defaults
2257/// to `simple` when unset); with two args the first picks the
2258/// config. NULL text → NULL.
2259fn fts_to_tsvector(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2260    let (config, text) = parse_fts_args("to_tsvector", args, ctx)?;
2261    match text {
2262        None => Ok(Value::Null),
2263        Some(t) => Ok(Value::TsVector(crate::fts::to_tsvector(config, &t))),
2264    }
2265}
2266
2267fn fts_plainto_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2268    let (config, text) = parse_fts_args("plainto_tsquery", args, ctx)?;
2269    match text {
2270        None => Ok(Value::Null),
2271        Some(t) => Ok(Value::TsQuery(crate::fts::plainto_tsquery(config, &t))),
2272    }
2273}
2274
2275fn fts_phraseto_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2276    let (config, text) = parse_fts_args("phraseto_tsquery", args, ctx)?;
2277    match text {
2278        None => Ok(Value::Null),
2279        Some(t) => Ok(Value::TsQuery(crate::fts::phraseto_tsquery(config, &t))),
2280    }
2281}
2282
2283fn fts_websearch_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2284    let (config, text) = parse_fts_args("websearch_to_tsquery", args, ctx)?;
2285    match text {
2286        None => Ok(Value::Null),
2287        Some(t) => Ok(Value::TsQuery(crate::fts::websearch_to_tsquery(config, &t))),
2288    }
2289}
2290
2291fn fts_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2292    let (config, text) = parse_fts_args("to_tsquery", args, ctx)?;
2293    match text {
2294        None => Ok(Value::Null),
2295        Some(t) => Ok(Value::TsQuery(crate::fts::to_tsquery(config, &t)?)),
2296    }
2297}
2298
2299/// Parse the `(config, text)` / `(text)` argument pair shared by
2300/// all FTS builders. Returns the resolved config + the text
2301/// payload (None when text is NULL). The one-arg form pulls the
2302/// config from the session's `default_text_search_config`.
2303fn parse_fts_args(
2304    name: &str,
2305    args: &[Value],
2306    ctx: &EvalContext<'_>,
2307) -> Result<(crate::fts::TsConfig, Option<String>), EvalError> {
2308    let (config_arg, text_arg) = match args {
2309        [t] => (None, t),
2310        [c, t] => (Some(c), t),
2311        _ => {
2312            return Err(EvalError::TypeMismatch {
2313                detail: format!("{name}() takes 1 or 2 args, got {}", args.len()),
2314            });
2315        }
2316    };
2317    let config = match config_arg {
2318        None => match ctx.default_text_search_config {
2319            Some(name_str) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
2320                EvalError::TypeMismatch {
2321                    detail: format!(
2322                        "text search config not implemented: {name_str:?} (supported: simple, english)"
2323                    ),
2324                }
2325            })?,
2326            None => crate::fts::TsConfig::Simple,
2327        },
2328        Some(Value::Null) => return Ok((crate::fts::TsConfig::Simple, None)),
2329        Some(Value::Text(name_str)) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
2330            EvalError::TypeMismatch {
2331                detail: format!(
2332                    "text search config not implemented: {name_str:?} (supported: simple, english)"
2333                ),
2334            }
2335        })?,
2336        Some(other) => {
2337            return Err(EvalError::TypeMismatch {
2338                detail: format!(
2339                    "{name}() config arg must be text, got {:?}",
2340                    other.data_type()
2341                ),
2342            });
2343        }
2344    };
2345    let text = match text_arg {
2346        Value::Null => None,
2347        Value::Text(s) => Some(s.clone()),
2348        other => {
2349            return Err(EvalError::TypeMismatch {
2350                detail: format!(
2351                    "{name}() text arg must be text, got {:?}",
2352                    other.data_type()
2353                ),
2354            });
2355        }
2356    };
2357    Ok((config, text))
2358}
2359
2360/// v6.4.3 — `encode(bytes_as_text, format)`. PG works on bytea
2361/// arguments; SPG's value space treats Text as the byte container
2362/// (raw UTF-8 bytes). Supported formats: base64 (PG default),
2363/// base64url (RFC 4648 §5), base32hex (RFC 4648 §7 extended-hex),
2364/// hex.
2365fn encode_text(args: &[Value]) -> Result<Value, EvalError> {
2366    if args.len() != 2 {
2367        return Err(EvalError::TypeMismatch {
2368            detail: format!("encode() takes 2 args, got {}", args.len()),
2369        });
2370    }
2371    if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2372        return Ok(Value::Null);
2373    }
2374    let bytes: &[u8] = match &args[0] {
2375        Value::Text(s) => s.as_bytes(),
2376        other => {
2377            return Err(EvalError::TypeMismatch {
2378                detail: format!("encode() expects text bytes, got {:?}", other.data_type()),
2379            });
2380        }
2381    };
2382    let fmt = match &args[1] {
2383        Value::Text(s) => s.to_ascii_lowercase(),
2384        other => {
2385            return Err(EvalError::TypeMismatch {
2386                detail: format!("encode() format must be text, got {:?}", other.data_type()),
2387            });
2388        }
2389    };
2390    let out = match fmt.as_str() {
2391        "base64" => b64_encode(bytes, B64_STD),
2392        "base64url" => b64_encode(bytes, B64_URL),
2393        "base32hex" => b32hex_encode(bytes),
2394        "hex" => hex_encode(bytes),
2395        other => {
2396            return Err(EvalError::TypeMismatch {
2397                detail: format!("encode(): unknown format `{other}`"),
2398            });
2399        }
2400    };
2401    Ok(Value::Text(out))
2402}
2403
2404/// v6.4.3 — `decode(text, format)`. Inverse of `encode`; returns
2405/// Text containing the raw decoded bytes (caller may CAST to bytea
2406/// equivalent if SPG adds bytea later).
2407fn decode_text(args: &[Value]) -> Result<Value, EvalError> {
2408    if args.len() != 2 {
2409        return Err(EvalError::TypeMismatch {
2410            detail: format!("decode() takes 2 args, got {}", args.len()),
2411        });
2412    }
2413    if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2414        return Ok(Value::Null);
2415    }
2416    let text = match &args[0] {
2417        Value::Text(s) => s.as_str(),
2418        other => {
2419            return Err(EvalError::TypeMismatch {
2420                detail: format!("decode() expects text, got {:?}", other.data_type()),
2421            });
2422        }
2423    };
2424    let fmt = match &args[1] {
2425        Value::Text(s) => s.to_ascii_lowercase(),
2426        other => {
2427            return Err(EvalError::TypeMismatch {
2428                detail: format!("decode() format must be text, got {:?}", other.data_type()),
2429            });
2430        }
2431    };
2432    let bytes = match fmt.as_str() {
2433        "base64" => b64_decode(text, B64_STD)?,
2434        "base64url" => b64_decode(text, B64_URL)?,
2435        "base32hex" => b32hex_decode(text)?,
2436        "hex" => hex_decode(text)?,
2437        other => {
2438            return Err(EvalError::TypeMismatch {
2439                detail: format!("decode(): unknown format `{other}`"),
2440            });
2441        }
2442    };
2443    let s = String::from_utf8(bytes).map_err(|_| EvalError::TypeMismatch {
2444        detail: "decode(): result bytes are not valid UTF-8 (SPG stores raw bytes as Text)".into(),
2445    })?;
2446    Ok(Value::Text(s))
2447}
2448
2449/// v6.4.3 — `error_on_null(v)`. Returns `v` unchanged if non-NULL;
2450/// errors otherwise. Convenience to assert NOT NULL inside an
2451/// expression without wrapping it in COALESCE + raise hacks.
2452fn error_on_null(args: &[Value]) -> Result<Value, EvalError> {
2453    if args.len() != 1 {
2454        return Err(EvalError::TypeMismatch {
2455            detail: format!("error_on_null() takes 1 arg, got {}", args.len()),
2456        });
2457    }
2458    if matches!(args[0], Value::Null) {
2459        return Err(EvalError::TypeMismatch {
2460            detail: "error_on_null(): argument is NULL".into(),
2461        });
2462    }
2463    Ok(args[0].clone())
2464}
2465
2466// ── byte-level encoders ───────────────────────────────────────────
2467
2468const B64_STD: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2469const B64_URL: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2470const B32HEX_ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHIJKLMNOPQRSTUV";
2471
2472fn b64_encode(bytes: &[u8], alpha: &[u8; 64]) -> String {
2473    let mut out = String::with_capacity((bytes.len() + 2) / 3 * 4);
2474    let mut i = 0;
2475    while i + 3 <= bytes.len() {
2476        let n = ((bytes[i] as u32) << 16) | ((bytes[i + 1] as u32) << 8) | (bytes[i + 2] as u32);
2477        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2478        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2479        out.push(alpha[((n >> 6) & 0x3f) as usize] as char);
2480        out.push(alpha[(n & 0x3f) as usize] as char);
2481        i += 3;
2482    }
2483    let rem = bytes.len() - i;
2484    if rem == 1 {
2485        let n = (bytes[i] as u32) << 16;
2486        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2487        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2488        out.push('=');
2489        out.push('=');
2490    } else if rem == 2 {
2491        let n = ((bytes[i] as u32) << 16) | ((bytes[i + 1] as u32) << 8);
2492        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2493        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2494        out.push(alpha[((n >> 6) & 0x3f) as usize] as char);
2495        out.push('=');
2496    }
2497    out
2498}
2499
2500fn b64_decode(text: &str, alpha: &[u8; 64]) -> Result<Vec<u8>, EvalError> {
2501    let mut lookup = [255u8; 256];
2502    for (i, &c) in alpha.iter().enumerate() {
2503        lookup[c as usize] = i as u8;
2504    }
2505    let mut out = Vec::with_capacity(text.len() * 3 / 4);
2506    let mut buf: u32 = 0;
2507    let mut bits: u32 = 0;
2508    for c in text.bytes() {
2509        if c == b'=' {
2510            break;
2511        }
2512        if c == b'\n' || c == b'\r' || c == b' ' {
2513            continue;
2514        }
2515        let v = lookup[c as usize];
2516        if v == 255 {
2517            return Err(EvalError::TypeMismatch {
2518                detail: format!("decode(base64): invalid char {:?}", c as char),
2519            });
2520        }
2521        buf = (buf << 6) | v as u32;
2522        bits += 6;
2523        if bits >= 8 {
2524            bits -= 8;
2525            out.push(((buf >> bits) & 0xff) as u8);
2526        }
2527    }
2528    Ok(out)
2529}
2530
2531fn b32hex_encode(bytes: &[u8]) -> String {
2532    let mut out = String::with_capacity((bytes.len() * 8 + 4) / 5);
2533    let mut buf: u64 = 0;
2534    let mut bits: u32 = 0;
2535    for &b in bytes {
2536        buf = (buf << 8) | b as u64;
2537        bits += 8;
2538        while bits >= 5 {
2539            bits -= 5;
2540            out.push(B32HEX_ALPHABET[((buf >> bits) & 0x1f) as usize] as char);
2541        }
2542    }
2543    if bits > 0 {
2544        out.push(B32HEX_ALPHABET[((buf << (5 - bits)) & 0x1f) as usize] as char);
2545    }
2546    // Pad to multiple of 8.
2547    while out.len() % 8 != 0 {
2548        out.push('=');
2549    }
2550    out
2551}
2552
2553fn b32hex_decode(text: &str) -> Result<Vec<u8>, EvalError> {
2554    let mut lookup = [255u8; 256];
2555    for (i, &c) in B32HEX_ALPHABET.iter().enumerate() {
2556        lookup[c as usize] = i as u8;
2557        // base32hex is case-insensitive — also map lowercase.
2558        let lower = (c as char).to_ascii_lowercase() as u8;
2559        lookup[lower as usize] = i as u8;
2560    }
2561    let mut out = Vec::with_capacity(text.len() * 5 / 8);
2562    let mut buf: u64 = 0;
2563    let mut bits: u32 = 0;
2564    for c in text.bytes() {
2565        if c == b'=' {
2566            break;
2567        }
2568        if c == b'\n' || c == b'\r' || c == b' ' {
2569            continue;
2570        }
2571        let v = lookup[c as usize];
2572        if v == 255 {
2573            return Err(EvalError::TypeMismatch {
2574                detail: format!("decode(base32hex): invalid char {:?}", c as char),
2575            });
2576        }
2577        buf = (buf << 5) | v as u64;
2578        bits += 5;
2579        if bits >= 8 {
2580            bits -= 8;
2581            out.push(((buf >> bits) & 0xff) as u8);
2582        }
2583    }
2584    Ok(out)
2585}
2586
2587fn hex_encode(bytes: &[u8]) -> String {
2588    const HEX: &[u8; 16] = b"0123456789abcdef";
2589    let mut out = String::with_capacity(bytes.len() * 2);
2590    for &b in bytes {
2591        out.push(HEX[(b >> 4) as usize] as char);
2592        out.push(HEX[(b & 0xf) as usize] as char);
2593    }
2594    out
2595}
2596
2597fn hex_decode(text: &str) -> Result<Vec<u8>, EvalError> {
2598    let trimmed = text.trim();
2599    if trimmed.len() % 2 != 0 {
2600        return Err(EvalError::TypeMismatch {
2601            detail: "decode(hex): input length must be even".into(),
2602        });
2603    }
2604    let mut out = Vec::with_capacity(trimmed.len() / 2);
2605    let mut hi: u8 = 0;
2606    for (i, c) in trimmed.bytes().enumerate() {
2607        let v = match c {
2608            b'0'..=b'9' => c - b'0',
2609            b'a'..=b'f' => c - b'a' + 10,
2610            b'A'..=b'F' => c - b'A' + 10,
2611            _ => {
2612                return Err(EvalError::TypeMismatch {
2613                    detail: format!("decode(hex): invalid char {:?}", c as char),
2614                });
2615            }
2616        };
2617        if i % 2 == 0 {
2618            hi = v;
2619        } else {
2620            out.push((hi << 4) | v);
2621        }
2622    }
2623    Ok(out)
2624}
2625
2626/// `date_part(field_text, source)` — function form of `EXTRACT(field FROM
2627/// source)`. Same component dispatch (DATE / TIMESTAMP / INTERVAL) and
2628/// same `BigInt` return shape; PG returns double precision but we keep the
2629/// integer convention so the runner's `query I` shape works unchanged.
2630fn date_part(args: &[Value]) -> Result<Value, EvalError> {
2631    use spg_sql::ast::ExtractField as F;
2632    if args.len() != 2 {
2633        return Err(EvalError::TypeMismatch {
2634            detail: format!("date_part() takes 2 args, got {}", args.len()),
2635        });
2636    }
2637    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
2638        return Ok(Value::Null);
2639    }
2640    let Value::Text(field_name) = &args[0] else {
2641        return Err(EvalError::TypeMismatch {
2642            detail: format!(
2643                "date_part() needs a text field, got {:?}",
2644                args[0].data_type()
2645            ),
2646        });
2647    };
2648    let field = match field_name.to_ascii_lowercase().as_str() {
2649        "year" => F::Year,
2650        "month" => F::Month,
2651        "day" => F::Day,
2652        "hour" => F::Hour,
2653        "minute" => F::Minute,
2654        "second" => F::Second,
2655        "microsecond" | "microseconds" => F::Microsecond,
2656        "epoch" => F::Epoch,
2657        other => {
2658            return Err(EvalError::TypeMismatch {
2659                detail: format!(
2660                    "unknown date_part field {other:?}; \
2661                     supported: year, month, day, hour, minute, second, microsecond"
2662                ),
2663            });
2664        }
2665    };
2666    extract_field(field, &args[1])
2667}
2668
2669/// `age(t1, t2)` — return `t1 - t2` as an INTERVAL. v2.12 produces a
2670/// micros-only interval (no months normalisation) because PG's
2671/// month-justification rule is sensitive to the day-of-month walk and
2672/// adds material complexity for marginal corpus value.
2673///
2674/// `age(t)` (single-arg form) is intentionally unsupported in v2.12:
2675/// the dispatcher errors instead of guessing a clock source. Callers
2676/// who want PG's `age(t)` semantics should write `age(CURRENT_DATE, t)`
2677/// explicitly so the clock reference is visible at the SQL layer.
2678fn age(args: &[Value]) -> Result<Value, EvalError> {
2679    if args.is_empty() || args.len() > 2 {
2680        return Err(EvalError::TypeMismatch {
2681            detail: format!("age() takes 1 or 2 args, got {}", args.len()),
2682        });
2683    }
2684    if args.iter().any(|v| matches!(v, Value::Null)) {
2685        return Ok(Value::Null);
2686    }
2687    // Coerce to TIMESTAMP micros — DATE lifts to midnight; TIMESTAMP
2688    // stays as-is; anything else errors.
2689    let to_micros = |v: &Value| -> Result<i64, EvalError> {
2690        match v {
2691            Value::Timestamp(t) => Ok(*t),
2692            Value::Date(d) => Ok(i64::from(*d) * 86_400_000_000),
2693            other => Err(EvalError::TypeMismatch {
2694                detail: format!("age() needs DATE or TIMESTAMP, got {:?}", other.data_type()),
2695            }),
2696        }
2697    };
2698    if args.len() == 1 {
2699        return Err(EvalError::TypeMismatch {
2700            detail: "single-arg age() is unsupported in v2.12 \
2701                     (use age(CURRENT_DATE, t) explicitly)"
2702                .into(),
2703        });
2704    }
2705    let a = to_micros(&args[0])?;
2706    let b = to_micros(&args[1])?;
2707    let delta = a.checked_sub(b).ok_or(EvalError::TypeMismatch {
2708        detail: "age() subtraction overflows i64 microseconds".into(),
2709    })?;
2710    Ok(Value::Interval {
2711        months: 0,
2712        micros: delta,
2713    })
2714}
2715
2716// `to_char(value, format)` — render a DATE / TIMESTAMP through a PG
2717// format template. Supports the high-traffic placeholders:
2718//   YYYY YY MM Mon Month DD HH24 HH12 MI SS MS US AM PM
2719// Unrecognised characters pass through literally so the template's
2720// punctuation ('-', ':', ' ', '/') needs no escape mechanism.
2721
2722// ─── v7.17.0 Phase 7 — INET / CIDR text helpers ───────────────────────
2723//
2724// SPG stores network address types as Text. The host() / network() /
2725// masklen() helpers parse the textual `addr[/mask]` form and return
2726// the constituent pieces, matching PG's contract for the dominant
2727// customer surface (Django ORM / Rails ORM normalisation).
2728
2729fn inet_host(args: &[Value]) -> Result<Value, EvalError> {
2730    let s = match args {
2731        [Value::Text(s)] => s.clone(),
2732        [Value::Null] => return Ok(Value::Null),
2733        _ => {
2734            return Err(EvalError::TypeMismatch {
2735                detail: alloc::format!("host() takes one TEXT arg, got {} args", args.len()),
2736            });
2737        }
2738    };
2739    let host = s.split('/').next().unwrap_or("").to_string();
2740    Ok(Value::Text(host))
2741}
2742
2743fn inet_network(args: &[Value]) -> Result<Value, EvalError> {
2744    let s = match args {
2745        [Value::Text(s)] => s.clone(),
2746        [Value::Null] => return Ok(Value::Null),
2747        _ => {
2748            return Err(EvalError::TypeMismatch {
2749                detail: alloc::format!("network() takes one TEXT arg, got {} args", args.len()),
2750            });
2751        }
2752    };
2753    // For a `host/mask` form return the masked-network address.
2754    // SPG ships the simple "drop trailing octets per byte" path
2755    // for IPv4; full bit-level masking is out of v7.17 scope.
2756    let mut split = s.splitn(2, '/');
2757    let host = split.next().unwrap_or("").to_string();
2758    let mask: u32 = split.next().and_then(|m| m.parse().ok()).unwrap_or(32);
2759    if !host.contains('.') {
2760        // IPv6 / MACADDR — return the input unmasked.
2761        return Ok(Value::Text(s));
2762    }
2763    let octets: Vec<&str> = host.split('.').collect();
2764    if octets.len() != 4 {
2765        return Ok(Value::Text(s));
2766    }
2767    let keep_bytes = ((mask + 7) / 8) as usize;
2768    let mut out = alloc::string::String::new();
2769    for (i, oct) in octets.iter().enumerate() {
2770        if i > 0 {
2771            out.push('.');
2772        }
2773        if i < keep_bytes {
2774            out.push_str(oct);
2775        } else {
2776            out.push('0');
2777        }
2778    }
2779    out.push('/');
2780    out.push_str(&mask.to_string());
2781    Ok(Value::Text(out))
2782}
2783
2784fn inet_masklen(args: &[Value]) -> Result<Value, EvalError> {
2785    let s = match args {
2786        [Value::Text(s)] => s.clone(),
2787        [Value::Null] => return Ok(Value::Null),
2788        _ => {
2789            return Err(EvalError::TypeMismatch {
2790                detail: alloc::format!("masklen() takes one TEXT arg, got {} args", args.len()),
2791            });
2792        }
2793    };
2794    let mask: i32 = s
2795        .split_once('/')
2796        .and_then(|(_, m)| m.parse().ok())
2797        .unwrap_or(32);
2798    Ok(Value::Int(mask))
2799}
2800
2801// ─── v7.17.0 Phase 3.P0-47 — INET / CIDR containment + overlap ────────
2802//
2803// SPG stores INET / CIDR as Text (Phase 7 design); these helpers parse
2804// the textual `addr[/mask]` form into a (family, bytes, prefix_bits)
2805// triple and implement PG's network-comparison operators on that
2806// representation.
2807//
2808// PG semantics:
2809//   * `<<`  — strictly contained-in (LHS ⊊ RHS)
2810//   * `<<=` — contained-in-or-equal (LHS ⊆ RHS)
2811//   * `>>`, `>>=` — mirrors of the above
2812//   * `&&`  — overlap (either LHS ⊆ RHS or RHS ⊆ LHS)
2813//
2814// NULL on either side → NULL (3VL). Mixed family (v4 vs v6) is never
2815// contained / never overlaps but is not an error — same as PG.
2816
2817/// Parsed inet network: address bytes (4 for v4, 16 for v6) and the
2818/// network prefix length in bits.
2819struct InetNet {
2820    bytes: [u8; 16],
2821    /// 4 for IPv4, 16 for IPv6.
2822    family_bytes: u8,
2823    /// 0..=32 for v4, 0..=128 for v6.
2824    prefix_bits: u8,
2825}
2826
2827fn parse_inet_text(s: &str) -> Option<InetNet> {
2828    let mut split = s.splitn(2, '/');
2829    let host = split.next()?;
2830    let mask_str = split.next();
2831    if host.contains(':') {
2832        let bytes = parse_ipv6(host)?;
2833        let prefix_bits = match mask_str {
2834            Some(m) => m.parse::<u8>().ok().filter(|&n| n <= 128)?,
2835            None => 128,
2836        };
2837        let mut out = [0u8; 16];
2838        out.copy_from_slice(&bytes);
2839        Some(InetNet {
2840            bytes: out,
2841            family_bytes: 16,
2842            prefix_bits,
2843        })
2844    } else {
2845        let bytes = parse_ipv4(host)?;
2846        let prefix_bits = match mask_str {
2847            Some(m) => m.parse::<u8>().ok().filter(|&n| n <= 32)?,
2848            None => 32,
2849        };
2850        let mut out = [0u8; 16];
2851        out[..4].copy_from_slice(&bytes);
2852        Some(InetNet {
2853            bytes: out,
2854            family_bytes: 4,
2855            prefix_bits,
2856        })
2857    }
2858}
2859
2860fn parse_ipv4(s: &str) -> Option<[u8; 4]> {
2861    let parts: Vec<&str> = s.split('.').collect();
2862    if parts.len() != 4 {
2863        return None;
2864    }
2865    let mut out = [0u8; 4];
2866    for (i, p) in parts.iter().enumerate() {
2867        out[i] = p.parse::<u8>().ok()?;
2868    }
2869    Some(out)
2870}
2871
2872fn parse_ipv6(s: &str) -> Option<[u8; 16]> {
2873    // Split on the `::` shorthand at most once.
2874    let (head, tail) = match s.find("::") {
2875        Some(idx) => (&s[..idx], Some(&s[idx + 2..])),
2876        None => (s, None),
2877    };
2878    let head_groups: Vec<&str> = if head.is_empty() {
2879        Vec::new()
2880    } else {
2881        head.split(':').collect()
2882    };
2883    let tail_groups: Vec<&str> = match tail {
2884        Some(t) if !t.is_empty() => t.split(':').collect(),
2885        _ => Vec::new(),
2886    };
2887    let head_len = head_groups.len();
2888    let tail_len = tail_groups.len();
2889    // Without `::` we need exactly 8 groups; with `::` we need ≤ 7.
2890    if tail.is_none() {
2891        if head_len != 8 {
2892            return None;
2893        }
2894    } else if head_len + tail_len > 7 {
2895        return None;
2896    }
2897    let mut words = [0u16; 8];
2898    for (i, g) in head_groups.iter().enumerate() {
2899        words[i] = u16::from_str_radix(g, 16).ok()?;
2900    }
2901    let tail_start = 8 - tail_len;
2902    for (i, g) in tail_groups.iter().enumerate() {
2903        words[tail_start + i] = u16::from_str_radix(g, 16).ok()?;
2904    }
2905    let mut out = [0u8; 16];
2906    for (i, w) in words.iter().enumerate() {
2907        out[i * 2] = (w >> 8) as u8;
2908        out[i * 2 + 1] = (w & 0xff) as u8;
2909    }
2910    Some(out)
2911}
2912
2913/// Compare the first `prefix_bits` bits of `a` and `b`. Returns true if
2914/// they match. `prefix_bits` must not exceed the family size.
2915fn network_prefix_eq(a: &InetNet, b: &InetNet, prefix_bits: u8) -> bool {
2916    let full_bytes = (prefix_bits / 8) as usize;
2917    if a.bytes[..full_bytes] != b.bytes[..full_bytes] {
2918        return false;
2919    }
2920    let extra = prefix_bits % 8;
2921    if extra == 0 {
2922        return true;
2923    }
2924    let mask: u8 = 0xff << (8 - extra);
2925    (a.bytes[full_bytes] & mask) == (b.bytes[full_bytes] & mask)
2926}
2927
2928/// True iff network `a` is fully contained in network `b` (a ⊆ b).
2929fn inet_contained_eq(a: &InetNet, b: &InetNet) -> bool {
2930    if a.family_bytes != b.family_bytes {
2931        return false;
2932    }
2933    if a.prefix_bits < b.prefix_bits {
2934        return false;
2935    }
2936    network_prefix_eq(a, b, b.prefix_bits)
2937}
2938
2939/// True iff a and b are exactly the same network (same family + same
2940/// prefix + same masked address).
2941fn inet_networks_equal(a: &InetNet, b: &InetNet) -> bool {
2942    if a.family_bytes != b.family_bytes {
2943        return false;
2944    }
2945    if a.prefix_bits != b.prefix_bits {
2946        return false;
2947    }
2948    network_prefix_eq(a, b, a.prefix_bits)
2949}
2950
2951fn inet_op_bool_result(op: BinOp, l: &Value, r: &Value) -> Result<Value, EvalError> {
2952    if matches!(l, Value::Null) || matches!(r, Value::Null) {
2953        return Ok(Value::Null);
2954    }
2955    let (lt, rt) = match (l, r) {
2956        (Value::Text(a), Value::Text(b)) => (a, b),
2957        _ => {
2958            return Err(EvalError::TypeMismatch {
2959                detail: format!(
2960                    "inet operator requires TEXT/INET operands, got {:?} and {:?}",
2961                    l.data_type(),
2962                    r.data_type()
2963                ),
2964            });
2965        }
2966    };
2967    let a = parse_inet_text(lt).ok_or_else(|| EvalError::TypeMismatch {
2968        detail: format!("invalid inet text: {:?}", lt),
2969    })?;
2970    let b = parse_inet_text(rt).ok_or_else(|| EvalError::TypeMismatch {
2971        detail: format!("invalid inet text: {:?}", rt),
2972    })?;
2973    let result = match op {
2974        BinOp::InetContainedByEq => inet_contained_eq(&a, &b),
2975        BinOp::InetContainedBy => inet_contained_eq(&a, &b) && !inet_networks_equal(&a, &b),
2976        BinOp::InetContainsEq => inet_contained_eq(&b, &a),
2977        BinOp::InetContains => inet_contained_eq(&b, &a) && !inet_networks_equal(&a, &b),
2978        BinOp::InetOverlap => inet_contained_eq(&a, &b) || inet_contained_eq(&b, &a),
2979        _ => unreachable!("inet_op_bool_result called with non-inet op"),
2980    };
2981    Ok(Value::Bool(result))
2982}
2983
2984// ─── v7.17.0 Phase 3.7 — minimal POSIX-ERE-shaped regex matcher ───────
2985//
2986// SPG-engine is `#![no_std]` and has no external regex dependency, so
2987// this module hand-implements the subset of PG's regex needed by the
2988// dominant customer patterns. Supported syntax:
2989//
2990//   * literal characters (with `\.`, `\*`, `\+`, `\?`, `\(`, `\)`,
2991//     `\[`, `\]`, `\\`, `\^`, `\$`, `\|` escapes)
2992//   * `.` — any single character
2993//   * `*`, `+`, `?` — greedy quantifiers
2994//   * character classes: `[abc]`, `[^abc]`, `[a-z0-9_]`
2995//   * shortcut classes: `\d` `\D` `\w` `\W` `\s` `\S`
2996//   * anchors `^` `$`
2997//   * non-capturing groups `(...)`
2998//   * alternation `|`
2999//
3000// NOT supported in v7.17 (errors clearly):
3001//   * backreferences `\1`
3002//   * lookaround `(?=…)` `(?<=…)`
3003//   * named captures
3004//   * inline flag groups `(?i)`
3005//   * lazy quantifiers `*?` `+?` `??` — patterns containing `?` after
3006//     a quantifier are accepted but interpreted as the greedy form
3007//     (this is the v7.17 stop-gap; customers needing lazy semantics
3008//     should preprocess the pattern)
3009//   * counted repetition `{n,m}`
3010//
3011// The matcher uses a backtracking NFA-shaped walk; performance is fine
3012// for the small strings PG regex functions usually operate on.
3013
3014#[derive(Debug, Clone)]
3015enum ReNode {
3016    /// Single literal byte. ASCII fast-path; non-ASCII falls through
3017    /// to Any since the engine doesn't decode UTF-8 here.
3018    Literal(char),
3019    /// Any single character.
3020    AnyChar,
3021    /// Character class: (positive members list, negated flag).
3022    Class {
3023        members: Vec<ClassMember>,
3024        negated: bool,
3025    },
3026    /// Anchor start.
3027    Start,
3028    /// Anchor end.
3029    End,
3030    /// Greedy quantifier.
3031    Quant {
3032        inner: Box<ReNode>,
3033        min: usize,
3034        max: Option<usize>,
3035    },
3036    /// Concatenation of sub-nodes.
3037    Concat(Vec<ReNode>),
3038    /// Alternation.
3039    Alt(Vec<ReNode>),
3040}
3041
3042#[derive(Debug, Clone)]
3043enum ClassMember {
3044    Single(char),
3045    Range(char, char),
3046}
3047
3048fn re_compile(pat: &str) -> Result<ReNode, EvalError> {
3049    let chars: Vec<char> = pat.chars().collect();
3050    let mut p = 0;
3051    let n = re_parse_alt(&chars, &mut p)?;
3052    if p != chars.len() {
3053        return Err(EvalError::TypeMismatch {
3054            detail: alloc::format!("regex compile: trailing chars at pos {p} in {pat:?}"),
3055        });
3056    }
3057    Ok(n)
3058}
3059
3060fn re_parse_alt(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3061    let mut branches = alloc::vec![re_parse_concat(chars, p)?];
3062    while *p < chars.len() && chars[*p] == '|' {
3063        *p += 1;
3064        branches.push(re_parse_concat(chars, p)?);
3065    }
3066    if branches.len() == 1 {
3067        Ok(branches.pop().unwrap())
3068    } else {
3069        Ok(ReNode::Alt(branches))
3070    }
3071}
3072
3073fn re_parse_concat(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3074    let mut items: Vec<ReNode> = Vec::new();
3075    while *p < chars.len() {
3076        let c = chars[*p];
3077        if c == '|' || c == ')' {
3078            break;
3079        }
3080        let atom = re_parse_atom(chars, p)?;
3081        // Optional quantifier suffix.
3082        let quantified = if *p < chars.len() {
3083            match chars[*p] {
3084                '*' => {
3085                    *p += 1;
3086                    // v7.17 stop-gap: tolerate `*?` lazy quantifier
3087                    // by treating it as greedy. Skip the trailing
3088                    // `?` if present.
3089                    if *p < chars.len() && chars[*p] == '?' {
3090                        *p += 1;
3091                    }
3092                    ReNode::Quant {
3093                        inner: Box::new(atom),
3094                        min: 0,
3095                        max: None,
3096                    }
3097                }
3098                '+' => {
3099                    *p += 1;
3100                    if *p < chars.len() && chars[*p] == '?' {
3101                        *p += 1;
3102                    }
3103                    ReNode::Quant {
3104                        inner: Box::new(atom),
3105                        min: 1,
3106                        max: None,
3107                    }
3108                }
3109                '?' => {
3110                    *p += 1;
3111                    ReNode::Quant {
3112                        inner: Box::new(atom),
3113                        min: 0,
3114                        max: Some(1),
3115                    }
3116                }
3117                _ => atom,
3118            }
3119        } else {
3120            atom
3121        };
3122        items.push(quantified);
3123    }
3124    if items.len() == 1 {
3125        Ok(items.pop().unwrap())
3126    } else {
3127        Ok(ReNode::Concat(items))
3128    }
3129}
3130
3131fn re_parse_atom(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3132    let c = chars[*p];
3133    match c {
3134        '(' => {
3135            *p += 1;
3136            let inner = re_parse_alt(chars, p)?;
3137            if *p >= chars.len() || chars[*p] != ')' {
3138                return Err(EvalError::TypeMismatch {
3139                    detail: "regex compile: unmatched '('".into(),
3140                });
3141            }
3142            *p += 1;
3143            Ok(inner)
3144        }
3145        '[' => {
3146            *p += 1;
3147            let mut negated = false;
3148            if *p < chars.len() && chars[*p] == '^' {
3149                negated = true;
3150                *p += 1;
3151            }
3152            let mut members: Vec<ClassMember> = Vec::new();
3153            while *p < chars.len() && chars[*p] != ']' {
3154                let start = chars[*p];
3155                *p += 1;
3156                if *p + 1 < chars.len() && chars[*p] == '-' && chars[*p + 1] != ']' {
3157                    let end = chars[*p + 1];
3158                    *p += 2;
3159                    members.push(ClassMember::Range(start, end));
3160                } else {
3161                    members.push(ClassMember::Single(start));
3162                }
3163            }
3164            if *p >= chars.len() {
3165                return Err(EvalError::TypeMismatch {
3166                    detail: "regex compile: unmatched '['".into(),
3167                });
3168            }
3169            *p += 1; // consume ]
3170            Ok(ReNode::Class { members, negated })
3171        }
3172        '.' => {
3173            *p += 1;
3174            Ok(ReNode::AnyChar)
3175        }
3176        '^' => {
3177            *p += 1;
3178            Ok(ReNode::Start)
3179        }
3180        '$' => {
3181            *p += 1;
3182            Ok(ReNode::End)
3183        }
3184        '\\' => {
3185            *p += 1;
3186            if *p >= chars.len() {
3187                return Err(EvalError::TypeMismatch {
3188                    detail: "regex compile: dangling backslash".into(),
3189                });
3190            }
3191            let esc = chars[*p];
3192            *p += 1;
3193            match esc {
3194                'd' => Ok(ReNode::Class {
3195                    members: alloc::vec![ClassMember::Range('0', '9')],
3196                    negated: false,
3197                }),
3198                'D' => Ok(ReNode::Class {
3199                    members: alloc::vec![ClassMember::Range('0', '9')],
3200                    negated: true,
3201                }),
3202                'w' => Ok(ReNode::Class {
3203                    members: alloc::vec![
3204                        ClassMember::Range('a', 'z'),
3205                        ClassMember::Range('A', 'Z'),
3206                        ClassMember::Range('0', '9'),
3207                        ClassMember::Single('_'),
3208                    ],
3209                    negated: false,
3210                }),
3211                'W' => Ok(ReNode::Class {
3212                    members: alloc::vec![
3213                        ClassMember::Range('a', 'z'),
3214                        ClassMember::Range('A', 'Z'),
3215                        ClassMember::Range('0', '9'),
3216                        ClassMember::Single('_'),
3217                    ],
3218                    negated: true,
3219                }),
3220                's' => Ok(ReNode::Class {
3221                    members: alloc::vec![
3222                        ClassMember::Single(' '),
3223                        ClassMember::Single('\t'),
3224                        ClassMember::Single('\n'),
3225                        ClassMember::Single('\r'),
3226                    ],
3227                    negated: false,
3228                }),
3229                'S' => Ok(ReNode::Class {
3230                    members: alloc::vec![
3231                        ClassMember::Single(' '),
3232                        ClassMember::Single('\t'),
3233                        ClassMember::Single('\n'),
3234                        ClassMember::Single('\r'),
3235                    ],
3236                    negated: true,
3237                }),
3238                other => Ok(ReNode::Literal(other)),
3239            }
3240        }
3241        other => {
3242            *p += 1;
3243            Ok(ReNode::Literal(other))
3244        }
3245    }
3246}
3247
3248fn class_matches(member: &ClassMember, c: char) -> bool {
3249    match member {
3250        ClassMember::Single(s) => *s == c,
3251        ClassMember::Range(a, b) => c >= *a && c <= *b,
3252    }
3253}
3254
3255/// Try to match `node` starting at `pos` in `s`. Returns Some(end)
3256/// of the matched span (exclusive), or None if no match. Greedy
3257/// backtracking: each quantifier tries the longest viable repeat
3258/// and shrinks if the tail doesn't fit.
3259fn re_match_at(node: &ReNode, s: &[char], pos: usize) -> Option<usize> {
3260    match node {
3261        ReNode::Literal(c) => {
3262            if s.get(pos).copied() == Some(*c) {
3263                Some(pos + 1)
3264            } else {
3265                None
3266            }
3267        }
3268        ReNode::AnyChar => {
3269            if pos < s.len() && s[pos] != '\n' {
3270                Some(pos + 1)
3271            } else {
3272                None
3273            }
3274        }
3275        ReNode::Class { members, negated } => {
3276            let c = *s.get(pos)?;
3277            let hit = members.iter().any(|m| class_matches(m, c));
3278            if hit ^ negated { Some(pos + 1) } else { None }
3279        }
3280        ReNode::Start => {
3281            if pos == 0 {
3282                Some(pos)
3283            } else {
3284                None
3285            }
3286        }
3287        ReNode::End => {
3288            if pos == s.len() {
3289                Some(pos)
3290            } else {
3291                None
3292            }
3293        }
3294        ReNode::Concat(items) => {
3295            let mut p = pos;
3296            for it in items {
3297                p = re_match_at(it, s, p)?;
3298            }
3299            Some(p)
3300        }
3301        ReNode::Alt(branches) => {
3302            for b in branches {
3303                if let Some(p) = re_match_at(b, s, pos) {
3304                    return Some(p);
3305                }
3306            }
3307            None
3308        }
3309        ReNode::Quant { inner, min, max } => {
3310            // Greedy: gather as many matches as possible, then
3311            // shrink. v7.17 stop-gap doesn't continue the outer
3312            // tail match (we're at a leaf in concat already), so
3313            // we just return the longest match.
3314            let mut count = 0usize;
3315            let mut p = pos;
3316            loop {
3317                if let Some(cap) = max {
3318                    if count >= *cap {
3319                        break;
3320                    }
3321                }
3322                match re_match_at(inner, s, p) {
3323                    Some(np) if np > p => {
3324                        p = np;
3325                        count += 1;
3326                    }
3327                    _ => break,
3328                }
3329            }
3330            if count < *min {
3331                return None;
3332            }
3333            Some(p)
3334        }
3335    }
3336}
3337
3338/// Find the first match of `node` in `s`, starting at or after
3339/// `from`. Returns the (start, end) char positions of the match.
3340fn re_find(node: &ReNode, s: &[char], from: usize) -> Option<(usize, usize)> {
3341    let mut start = from;
3342    loop {
3343        if let Some(end) = re_match_at(node, s, start) {
3344            return Some((start, end));
3345        }
3346        if start >= s.len() {
3347            return None;
3348        }
3349        start += 1;
3350    }
3351}
3352
3353/// v7.17.0 Phase 3.7 — `regexp_matches(s, pat)` returns the FIRST
3354/// match as a single-element TEXT[]. (PG returns one row per match
3355/// across all captures; SPG simplifies to first-match-only TEXT[].
3356/// The `g` flag form `regexp_matches(s, pat, 'g')` falls through
3357/// to all-matches concatenation as a flat array.)
3358fn regexp_matches(args: &[Value]) -> Result<Value, EvalError> {
3359    let (text, pat, all_matches) = match args.len() {
3360        2 => (text_arg(&args[0])?, text_arg(&args[1])?, false),
3361        3 => {
3362            let flags = text_arg(&args[2])?.unwrap_or_default();
3363            (
3364                text_arg(&args[0])?,
3365                text_arg(&args[1])?,
3366                flags.contains('g'),
3367            )
3368        }
3369        n => {
3370            return Err(EvalError::TypeMismatch {
3371                detail: alloc::format!("regexp_matches() takes 2 or 3 args, got {n}"),
3372            });
3373        }
3374    };
3375    let Some(text) = text else {
3376        return Ok(Value::Null);
3377    };
3378    let Some(pat) = pat else {
3379        return Ok(Value::Null);
3380    };
3381    let node = re_compile(&pat)?;
3382    let chars: Vec<char> = text.chars().collect();
3383    let mut out: Vec<Option<String>> = Vec::new();
3384    let mut from = 0usize;
3385    while let Some((s_pos, e_pos)) = re_find(&node, &chars, from) {
3386        out.push(Some(chars[s_pos..e_pos].iter().collect()));
3387        if !all_matches {
3388            break;
3389        }
3390        // Advance past the match; if zero-width, step one.
3391        from = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3392        if from > chars.len() {
3393            break;
3394        }
3395    }
3396    Ok(Value::TextArray(out))
3397}
3398
3399/// v7.17.0 Phase 3.7 — `regexp_replace(s, pat, repl[, flags])`.
3400/// `flags` containing `g` replaces all matches; absent flag
3401/// replaces only the first match (PG default).
3402fn regexp_replace(args: &[Value]) -> Result<Value, EvalError> {
3403    let (text, pat, repl, flags) = match args.len() {
3404        3 => (
3405            text_arg(&args[0])?,
3406            text_arg(&args[1])?,
3407            text_arg(&args[2])?,
3408            String::new(),
3409        ),
3410        4 => (
3411            text_arg(&args[0])?,
3412            text_arg(&args[1])?,
3413            text_arg(&args[2])?,
3414            text_arg(&args[3])?.unwrap_or_default(),
3415        ),
3416        n => {
3417            return Err(EvalError::TypeMismatch {
3418                detail: alloc::format!("regexp_replace() takes 3 or 4 args, got {n}"),
3419            });
3420        }
3421    };
3422    let Some(text) = text else {
3423        return Ok(Value::Null);
3424    };
3425    let Some(pat) = pat else {
3426        return Ok(Value::Null);
3427    };
3428    let Some(repl) = repl else {
3429        return Ok(Value::Null);
3430    };
3431    let global = flags.contains('g');
3432    let node = re_compile(&pat)?;
3433    let chars: Vec<char> = text.chars().collect();
3434    let mut out = String::with_capacity(text.len());
3435    let mut from = 0usize;
3436    loop {
3437        match re_find(&node, &chars, from) {
3438            Some((s_pos, e_pos)) => {
3439                out.extend(chars[from..s_pos].iter());
3440                out.push_str(&repl);
3441                let step = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3442                from = step;
3443                if !global {
3444                    if from <= chars.len() {
3445                        out.extend(chars[from..].iter());
3446                    }
3447                    return Ok(Value::Text(out));
3448                }
3449                if from > chars.len() {
3450                    break;
3451                }
3452            }
3453            None => {
3454                out.extend(chars[from..].iter());
3455                break;
3456            }
3457        }
3458    }
3459    Ok(Value::Text(out))
3460}
3461
3462/// v7.17.0 Phase 3.7 — `regexp_split_to_array(s, pat)`. Returns
3463/// TEXT[] of the pieces between matches.
3464fn regexp_split_to_array(args: &[Value]) -> Result<Value, EvalError> {
3465    if args.len() != 2 {
3466        return Err(EvalError::TypeMismatch {
3467            detail: alloc::format!("regexp_split_to_array() takes 2 args, got {}", args.len()),
3468        });
3469    }
3470    let text = text_arg(&args[0])?;
3471    let pat = text_arg(&args[1])?;
3472    let Some(text) = text else {
3473        return Ok(Value::Null);
3474    };
3475    let Some(pat) = pat else {
3476        return Ok(Value::Null);
3477    };
3478    let node = re_compile(&pat)?;
3479    let chars: Vec<char> = text.chars().collect();
3480    let mut out: Vec<Option<String>> = Vec::new();
3481    let mut piece_start = 0usize;
3482    let mut from = 0usize;
3483    loop {
3484        match re_find(&node, &chars, from) {
3485            Some((s_pos, e_pos)) => {
3486                let piece: String = chars[piece_start..s_pos].iter().collect();
3487                out.push(Some(piece));
3488                let step = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3489                from = step;
3490                piece_start = step;
3491                if from > chars.len() {
3492                    break;
3493                }
3494            }
3495            None => {
3496                let tail: String = chars[piece_start..].iter().collect();
3497                out.push(Some(tail));
3498                break;
3499            }
3500        }
3501    }
3502    Ok(Value::TextArray(out))
3503}
3504
3505/// Helper: coerce a Value to an Option<String> for regex args. NULL
3506/// propagates as None (caller short-circuits to Value::Null).
3507fn text_arg(v: &Value) -> Result<Option<String>, EvalError> {
3508    match v {
3509        Value::Text(s) => Ok(Some(s.clone())),
3510        Value::Null => Ok(None),
3511        other => Err(EvalError::TypeMismatch {
3512            detail: alloc::format!(
3513                "regex function expects TEXT arg, got {:?}",
3514                other.data_type()
3515            ),
3516        }),
3517    }
3518}
3519
3520// PG trim family: which side to strip.
3521#[derive(Debug, Clone, Copy)]
3522enum TrimSide {
3523    Left,
3524    Right,
3525    Both,
3526}
3527
3528/// PG `left(s, n)` / `right(s, n)` shared implementation. Both
3529/// support negative n which means "all but |n| chars from the
3530/// opposite side". n=0 → ''. Codepoint-counted. NULL → NULL.
3531fn string_left_right(args: &[Value], is_left: bool, fn_name: &str) -> Result<Value, EvalError> {
3532    if args.len() != 2 {
3533        return Err(EvalError::TypeMismatch {
3534            detail: alloc::format!("{fn_name}() takes 2 args, got {}", args.len()),
3535        });
3536    }
3537    if args.iter().any(|v| matches!(v, Value::Null)) {
3538        return Ok(Value::Null);
3539    }
3540    let s = value_to_format_text(&args[0]);
3541    let n = match &args[1] {
3542        Value::SmallInt(x) => i64::from(*x),
3543        Value::Int(x) => i64::from(*x),
3544        Value::BigInt(x) => *x,
3545        other => {
3546            return Err(EvalError::TypeMismatch {
3547                detail: alloc::format!(
3548                    "{fn_name}(): n must be integer, got {:?}",
3549                    other.data_type()
3550                ),
3551            });
3552        }
3553    };
3554    let chars: Vec<char> = s.chars().collect();
3555    let len = chars.len() as i64;
3556    if n == 0 {
3557        return Ok(Value::Text(String::new()));
3558    }
3559    let (start, end) = if is_left {
3560        if n > 0 {
3561            (0usize, (n.min(len)) as usize)
3562        } else {
3563            // left(s, -k) → drop last |k| chars; keep [0..len - k]
3564            let drop = (-n).min(len);
3565            (0usize, (len - drop) as usize)
3566        }
3567    } else if n > 0 {
3568        // right(s, k) → keep last k chars; start = max(0, len-k)
3569        let start = (len - n).max(0);
3570        (start as usize, len as usize)
3571    } else {
3572        // right(s, -k) → drop first |k| chars; keep [k..len]
3573        let drop = (-n).min(len);
3574        (drop as usize, len as usize)
3575    };
3576    if start >= end {
3577        return Ok(Value::Text(String::new()));
3578    }
3579    Ok(Value::Text(chars[start..end].iter().collect()))
3580}
3581
3582/// Compare two values for min/max selection. Returns Equal when
3583/// values are equal (including cross-numeric-width), Less when
3584/// a < b, Greater when a > b. NULL handling is upstream.
3585fn value_cmp_for_min_max(a: &Value, b: &Value) -> core::cmp::Ordering {
3586    use core::cmp::Ordering;
3587    // Integer-widen first (covers SmallInt vs Int vs BigInt).
3588    let a_int = match a {
3589        Value::SmallInt(x) => Some(i64::from(*x)),
3590        Value::Int(x) => Some(i64::from(*x)),
3591        Value::BigInt(x) => Some(*x),
3592        _ => None,
3593    };
3594    let b_int = match b {
3595        Value::SmallInt(x) => Some(i64::from(*x)),
3596        Value::Int(x) => Some(i64::from(*x)),
3597        Value::BigInt(x) => Some(*x),
3598        _ => None,
3599    };
3600    if let (Some(av), Some(bv)) = (a_int, b_int) {
3601        return av.cmp(&bv);
3602    }
3603    // Float-widen.
3604    let a_f = value_to_f64(a);
3605    let b_f = value_to_f64(b);
3606    if let (Some(av), Some(bv)) = (a_f, b_f) {
3607        return av.partial_cmp(&bv).unwrap_or(Ordering::Equal);
3608    }
3609    // Text/Text.
3610    match (a, b) {
3611        (Value::Text(av), Value::Text(bv)) => av.cmp(bv),
3612        (Value::Bytes(av), Value::Bytes(bv)) => av.cmp(bv),
3613        _ => Ordering::Equal,
3614    }
3615}
3616
3617fn value_to_f64(v: &Value) -> Option<f64> {
3618    match v {
3619        Value::Float(x) => Some(*x),
3620        Value::SmallInt(x) => Some(f64::from(*x)),
3621        Value::Int(x) => Some(f64::from(*x)),
3622        Value::BigInt(x) => Some(*x as f64),
3623        Value::Numeric { scaled, scale } => {
3624            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3625        }
3626        _ => None,
3627    }
3628}
3629
3630/// PG-style equality for nullif. Handles cross-numeric-width
3631/// comparison (Int vs BigInt vs SmallInt vs Float vs Numeric);
3632/// text matches text exactly; everything else uses derived
3633/// PartialEq.
3634fn values_equal_for_nullif(a: &Value, b: &Value) -> bool {
3635    // Same-type fast path.
3636    if a == b {
3637        return true;
3638    }
3639    // Cross-int widening: SmallInt / Int / BigInt all comparable.
3640    let a_int = match a {
3641        Value::SmallInt(x) => Some(i64::from(*x)),
3642        Value::Int(x) => Some(i64::from(*x)),
3643        Value::BigInt(x) => Some(*x),
3644        _ => None,
3645    };
3646    let b_int = match b {
3647        Value::SmallInt(x) => Some(i64::from(*x)),
3648        Value::Int(x) => Some(i64::from(*x)),
3649        Value::BigInt(x) => Some(*x),
3650        _ => None,
3651    };
3652    if let (Some(a), Some(b)) = (a_int, b_int) {
3653        return a == b;
3654    }
3655    // Float / Numeric: widen to f64.
3656    let a_f = match a {
3657        Value::Float(x) => Some(*x),
3658        Value::SmallInt(x) => Some(f64::from(*x)),
3659        Value::Int(x) => Some(f64::from(*x)),
3660        Value::BigInt(x) => Some(*x as f64),
3661        Value::Numeric { scaled, scale } => {
3662            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3663        }
3664        _ => None,
3665    };
3666    let b_f = match b {
3667        Value::Float(x) => Some(*x),
3668        Value::SmallInt(x) => Some(f64::from(*x)),
3669        Value::Int(x) => Some(f64::from(*x)),
3670        Value::BigInt(x) => Some(*x as f64),
3671        Value::Numeric { scaled, scale } => {
3672            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3673        }
3674        _ => None,
3675    };
3676    if let (Some(a), Some(b)) = (a_f, b_f) {
3677        return a == b;
3678    }
3679    false
3680}
3681
3682/// no_std-compatible `trunc(x)` for f64 — truncate toward zero.
3683/// `as i64 as f64` already truncates toward zero for the in-range
3684/// case; the |x| > 2^53 branch returns x verbatim because the f64
3685/// is already integer-precision.
3686fn f64_trunc(x: f64) -> f64 {
3687    if x.is_nan() || x.is_infinite() {
3688        return x;
3689    }
3690    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3691        return x;
3692    }
3693    (x as i64) as f64
3694}
3695
3696/// xorshift64* PRNG state — process-static seed advanced on
3697/// every `random()` call. Not cryptographically secure; use
3698/// `gen_random_uuid` / future crypto-RNG functions when
3699/// security matters.
3700static PRNG_STATE: core::sync::atomic::AtomicU64 =
3701    core::sync::atomic::AtomicU64::new(0x2545_F491_4F6C_DD1D);
3702
3703/// Advance the PRNG and return the raw next 64-bit state.
3704/// Shared between `random()` and `gen_random_uuid()`. The CAS
3705/// loop guarantees concurrent callers each see a distinct value
3706/// — important for `gen_random_uuid` collision freedom under
3707/// concurrent INSERTs.
3708fn prng_next_u64() -> u64 {
3709    use core::sync::atomic::Ordering;
3710    let mut x = PRNG_STATE.load(Ordering::Relaxed);
3711    loop {
3712        if x == 0 {
3713            x = 0x2545_F491_4F6C_DD1D;
3714        }
3715        let mut next = x;
3716        next ^= next << 13;
3717        next ^= next >> 7;
3718        next ^= next << 17;
3719        match PRNG_STATE.compare_exchange_weak(x, next, Ordering::Relaxed, Ordering::Relaxed) {
3720            Ok(_) => return next,
3721            Err(seen) => x = seen,
3722        }
3723    }
3724}
3725
3726/// Advance the PRNG and return a uniform double in [0, 1).
3727fn prng_next_f64() -> f64 {
3728    // 53 bits of randomness mapped to [0, 1).
3729    let mantissa = prng_next_u64() >> 11;
3730    let denom = (1u64 << 53) as f64;
3731    mantissa as f64 / denom
3732}
3733
3734/// v7.17.0 — generate a RFC 4122 v4 (random) UUID. Layout: 16
3735/// random bytes with the version nibble (high nibble of byte 6)
3736/// pinned to `0100` (= 4) and the variant top bits (high two bits
3737/// of byte 8) pinned to `10` — exactly what PG's
3738/// `gen_random_uuid()` and the historical uuid-ossp
3739/// `uuid_generate_v4()` produce.
3740pub fn gen_random_uuid_bytes() -> [u8; 16] {
3741    let mut out = [0u8; 16];
3742    let hi = prng_next_u64().to_be_bytes();
3743    let lo = prng_next_u64().to_be_bytes();
3744    out[..8].copy_from_slice(&hi);
3745    out[8..].copy_from_slice(&lo);
3746    // Version 4: top nibble of byte 6 must be 0100.
3747    out[6] = (out[6] & 0x0f) | 0x40;
3748    // Variant 1 (RFC 4122): top two bits of byte 8 must be 10.
3749    out[8] = (out[8] & 0x3f) | 0x80;
3750    out
3751}
3752
3753/// no_std `f64::sqrt(x)` — square root via Newton's method
3754/// (Babylonian). Gives EXACT results for perfect squares
3755/// because the iteration converges to bit-exact precision in
3756/// floating-point. x must be non-negative (caller's contract).
3757fn f64_sqrt(x: f64) -> f64 {
3758    if x == 0.0 || x.is_nan() {
3759        return x;
3760    }
3761    if x.is_infinite() {
3762        return x;
3763    }
3764    // Initial guess via bit manipulation of the exponent: divide
3765    // the exponent by 2. Avoids needing a logarithm for the
3766    // seed and converges in ~5 iterations.
3767    let bits = x.to_bits();
3768    let exp = ((bits >> 52) & 0x7ff) as i64 - 1023;
3769    let new_exp = (exp / 2) + 1023;
3770    let mut guess = f64::from_bits(((new_exp as u64) & 0x7ff) << 52);
3771    // 5 Newton iterations are MORE than enough for f64 precision.
3772    for _ in 0..8 {
3773        guess = 0.5 * (guess + x / guess);
3774    }
3775    guess
3776}
3777
3778/// no_std `f64::exp(x)` — e^x via range-reduction + Taylor
3779/// series. Adequate for power(), exp(), and pseudo-random-ish
3780/// scales the engine uses; ~1e-12 relative error in the
3781/// common range.
3782fn f64_exp(x: f64) -> f64 {
3783    if x.is_nan() {
3784        return x;
3785    }
3786    if x > 709.0 {
3787        return f64::INFINITY;
3788    }
3789    if x < -745.0 {
3790        return 0.0;
3791    }
3792    // exp(x) = 2^k * exp(r) where r = x - k*ln(2), |r| <= ln(2)/2.
3793    const LN2: f64 = 0.6931471805599453;
3794    let k = f64_round_half_away(x / LN2) as i32;
3795    let r = x - (k as f64) * LN2;
3796    // Taylor series for exp(r): sum r^n / n!  (rapid for |r|<0.35)
3797    let mut term = 1.0;
3798    let mut sum = 1.0;
3799    for n in 1..=20 {
3800        term *= r / (n as f64);
3801        sum += term;
3802        if term.abs() < 1e-18 {
3803            break;
3804        }
3805    }
3806    // Multiply by 2^k.
3807    f64_powi(2.0, k) * sum
3808}
3809
3810/// no_std `f64::ln(x)` — natural log via range-reduction +
3811/// atanh series. x must be positive (caller's contract).
3812fn f64_ln(x: f64) -> f64 {
3813    if x <= 0.0 {
3814        return f64::NAN;
3815    }
3816    if x == 1.0 {
3817        return 0.0;
3818    }
3819    // x = 2^k * m where m in [0.5, 1.0). Then ln(x) = k*ln(2) + ln(m).
3820    const LN2: f64 = 0.6931471805599453;
3821    let mut k = 0i32;
3822    let mut m = x;
3823    while m >= 2.0 {
3824        m *= 0.5;
3825        k += 1;
3826    }
3827    while m < 1.0 {
3828        m *= 2.0;
3829        k -= 1;
3830    }
3831    // Now m in [1.0, 2.0). Use atanh series via u = (m-1)/(m+1).
3832    // ln(m) = 2*(u + u^3/3 + u^5/5 + ...). Converges fast.
3833    let u = (m - 1.0) / (m + 1.0);
3834    let u2 = u * u;
3835    let mut term = u;
3836    let mut sum = u;
3837    for k_iter in 1..50 {
3838        term *= u2;
3839        let denom = (2 * k_iter + 1) as f64;
3840        sum += term / denom;
3841        if (term / denom).abs() < 1e-18 {
3842            break;
3843        }
3844    }
3845    2.0 * sum + (k as f64) * LN2
3846}
3847
3848/// no_std `f64::powi` substitute — integer exponent for f64
3849/// base. Uses repeated multiplication; correct for the small
3850/// exponents the rounding / cast code uses (scale up to ±38).
3851fn f64_powi(base: f64, exp: i32) -> f64 {
3852    if exp == 0 {
3853        return 1.0;
3854    }
3855    let mut result = 1.0;
3856    let mut b = if exp > 0 { base } else { 1.0 / base };
3857    let mut e = exp.unsigned_abs();
3858    while e > 0 {
3859        if e & 1 == 1 {
3860            result *= b;
3861        }
3862        e >>= 1;
3863        if e > 0 {
3864            b *= b;
3865        }
3866    }
3867    result
3868}
3869
3870/// no_std-compatible `round(x)` for f64 with half-away-from-zero
3871/// rule (PG NUMERIC semantic — NOT banker's rounding).
3872fn f64_round_half_away(x: f64) -> f64 {
3873    if x.is_nan() || x.is_infinite() {
3874        return x;
3875    }
3876    if x >= 0.0 {
3877        f64_floor(x + 0.5)
3878    } else {
3879        f64_ceil(x - 0.5)
3880    }
3881}
3882
3883/// no_std-compatible `ceil(x)` for f64. Same shape as
3884/// `f64_floor` but rounds toward +infinity for fractional
3885/// values. Negative fractions round toward zero
3886/// (ceil(-1.5) → -1, NOT -2).
3887fn f64_ceil(x: f64) -> f64 {
3888    if x.is_nan() || x.is_infinite() {
3889        return x;
3890    }
3891    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3892        return x;
3893    }
3894    let trunc = (x as i64) as f64;
3895    if x > 0.0 && x != trunc {
3896        trunc + 1.0
3897    } else {
3898        trunc
3899    }
3900}
3901
3902/// no_std-compatible `floor(x)` for f64. SPG's engine is
3903/// `#![no_std]` and can't call `f64::floor` directly (libm).
3904/// This handles the floor semantic manually:
3905///   * NaN / Inf passthrough.
3906///   * Values outside i64 range are already integer-precision.
3907///   * Negative non-integers floor toward -infinity (the
3908///     critical PG-canonical semantic).
3909fn f64_floor(x: f64) -> f64 {
3910    if x.is_nan() || x.is_infinite() {
3911        return x;
3912    }
3913    // f64 representation: any value with |x| > 2^53 is integer
3914    // precision (mantissa is 52 bits), so floor is identity.
3915    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3916        return x;
3917    }
3918    let trunc = (x as i64) as f64;
3919    if x < 0.0 && x != trunc {
3920        trunc - 1.0
3921    } else {
3922        trunc
3923    }
3924}
3925
3926/// PG `lpad` / `rpad` shared implementation. Length is the
3927/// target codepoint count. When the input is longer than `length`,
3928/// truncate keeping the LEFT side (both lpad and rpad agree with
3929/// PG here). When shorter, pad with `fill` (default SPACE) cycling
3930/// for multi-char fills, on the appropriate side. Empty fill +
3931/// needs padding → returns input verbatim (potentially
3932/// truncated). NULL on any arg → NULL.
3933fn string_pad(args: &[Value], is_left: bool, fn_name: &str) -> Result<Value, EvalError> {
3934    if args.len() != 2 && args.len() != 3 {
3935        return Err(EvalError::TypeMismatch {
3936            detail: alloc::format!("{fn_name}() takes 2 or 3 args, got {}", args.len()),
3937        });
3938    }
3939    if args.iter().any(|v| matches!(v, Value::Null)) {
3940        return Ok(Value::Null);
3941    }
3942    let s = value_to_format_text(&args[0]);
3943    let target = match &args[1] {
3944        Value::SmallInt(x) => i64::from(*x),
3945        Value::Int(x) => i64::from(*x),
3946        Value::BigInt(x) => *x,
3947        other => {
3948            return Err(EvalError::TypeMismatch {
3949                detail: alloc::format!(
3950                    "{fn_name}(): length must be integer, got {:?}",
3951                    other.data_type()
3952                ),
3953            });
3954        }
3955    };
3956    let fill = if args.len() == 3 {
3957        value_to_format_text(&args[2])
3958    } else {
3959        String::from(" ")
3960    };
3961    if target <= 0 {
3962        return Ok(Value::Text(String::new()));
3963    }
3964    let target = target as usize;
3965    let s_chars: Vec<char> = s.chars().collect();
3966    if s_chars.len() >= target {
3967        // Truncate from the right (PG keeps LEFT side for both
3968        // lpad and rpad).
3969        return Ok(Value::Text(s_chars[..target].iter().collect()));
3970    }
3971    if fill.is_empty() {
3972        return Ok(Value::Text(s));
3973    }
3974    let pad_needed = target - s_chars.len();
3975    let fill_chars: Vec<char> = fill.chars().collect();
3976    let mut padding = String::with_capacity(pad_needed * 4);
3977    for i in 0..pad_needed {
3978        padding.push(fill_chars[i % fill_chars.len()]);
3979    }
3980    if is_left {
3981        Ok(Value::Text(padding + &s))
3982    } else {
3983        Ok(Value::Text(s + &padding))
3984    }
3985}
3986
3987/// PG `trim` / `ltrim` / `rtrim` / `btrim` shared implementation.
3988/// Accepts 1 or 2 args; coerces both to text via the standard
3989/// `value_to_format_text` helper; treats the chars arg as a SET
3990/// of UTF-8 codepoints (not a substring). NULL on either arg
3991/// poisons the result.
3992fn string_trim(args: &[Value], side: TrimSide, fn_name: &str) -> Result<Value, EvalError> {
3993    let (input, chars_str) = match args {
3994        [v] => (v.clone(), String::from(" ")),
3995        [v, c] => (v.clone(), {
3996            // NULL chars poisons.
3997            if matches!(c, Value::Null) {
3998                return Ok(Value::Null);
3999            }
4000            value_to_format_text(c)
4001        }),
4002        _ => {
4003            return Err(EvalError::TypeMismatch {
4004                detail: alloc::format!("{fn_name}() takes 1 or 2 args, got {}", args.len()),
4005            });
4006        }
4007    };
4008    if matches!(input, Value::Null) {
4009        return Ok(Value::Null);
4010    }
4011    let s = value_to_format_text(&input);
4012    let charset: alloc::collections::BTreeSet<char> = chars_str.chars().collect();
4013    let chars: Vec<char> = s.chars().collect();
4014    let mut start = 0usize;
4015    let mut end = chars.len();
4016    if matches!(side, TrimSide::Left | TrimSide::Both) {
4017        while start < end && charset.contains(&chars[start]) {
4018            start += 1;
4019        }
4020    }
4021    if matches!(side, TrimSide::Right | TrimSide::Both) {
4022        while end > start && charset.contains(&chars[end - 1]) {
4023            end -= 1;
4024        }
4025    }
4026    Ok(Value::Text(chars[start..end].iter().collect()))
4027}
4028
4029/// v7.17.0 Phase 3.8 — PG `format(fmtstr, args…)` with
4030/// sprintf-style conversion specifiers. Subset covered:
4031///   * `%s` — text rendering of the arg
4032///   * `%I` — quoted SQL identifier (always double-quoted; embedded
4033///     `"` doubled per SQL grammar)
4034///   * `%L` — quoted SQL literal (single-quoted; embedded `'`
4035///     doubled; NULL → literal `NULL`)
4036///   * `%%` — literal `%`
4037///   * `%n$X` — argument position (1-based) before the specifier
4038///     character (e.g. `%2$s` picks the 2nd arg)
4039fn format_string(args: &[Value]) -> Result<Value, EvalError> {
4040    if args.is_empty() {
4041        return Err(EvalError::TypeMismatch {
4042            detail: "format() takes at least 1 arg (format string)".into(),
4043        });
4044    }
4045    let fmt = match &args[0] {
4046        Value::Text(s) => s.clone(),
4047        Value::Null => return Ok(Value::Null),
4048        other => {
4049            return Err(EvalError::TypeMismatch {
4050                detail: format!(
4051                    "format(): first arg must be text, got {:?}",
4052                    other.data_type()
4053                ),
4054            });
4055        }
4056    };
4057    let arg_values = &args[1..];
4058    let mut out = String::new();
4059    let mut chars = fmt.chars().peekable();
4060    // Position cursor — next implicit arg picked when no `n$`
4061    // prefix is given. PG's format uses a 1-based cursor that
4062    // advances on each implicit-position spec.
4063    let mut implicit_cursor: usize = 0;
4064    while let Some(c) = chars.next() {
4065        if c != '%' {
4066            out.push(c);
4067            continue;
4068        }
4069        // Parse optional `n$` position prefix.
4070        let mut explicit_pos: Option<usize> = None;
4071        // Buffer the digits so we can roll back if no `$` follows.
4072        let mut digit_buf = String::new();
4073        while let Some(&d) = chars.peek() {
4074            if d.is_ascii_digit() {
4075                digit_buf.push(d);
4076                chars.next();
4077            } else {
4078                break;
4079            }
4080        }
4081        if !digit_buf.is_empty() && matches!(chars.peek(), Some(&'$')) {
4082            chars.next(); // consume `$`
4083            explicit_pos =
4084                Some(
4085                    digit_buf
4086                        .parse::<usize>()
4087                        .map_err(|_| EvalError::TypeMismatch {
4088                            detail: format!("format(): invalid arg position {digit_buf:?}"),
4089                        })?,
4090                );
4091            digit_buf.clear();
4092        }
4093        // Specifier character.
4094        let spec = match chars.next() {
4095            Some(c) => c,
4096            None => {
4097                return Err(EvalError::TypeMismatch {
4098                    detail: "format(): trailing `%` with no specifier".into(),
4099                });
4100            }
4101        };
4102        // Anything left in digit_buf (no `$`) was actually
4103        // pre-spec digits we now have to emit verbatim. PG would
4104        // treat them as width hint; v7.17 doesn't implement
4105        // width, but we don't want to silently drop the digits.
4106        // Strategy: ignore width for now and emit just the
4107        // converted value.
4108        let _ = digit_buf;
4109        if spec == '%' {
4110            out.push('%');
4111            continue;
4112        }
4113        let arg_index = match explicit_pos {
4114            Some(p) => p.saturating_sub(1),
4115            None => {
4116                let i = implicit_cursor;
4117                implicit_cursor += 1;
4118                i
4119            }
4120        };
4121        let arg = arg_values.get(arg_index).cloned().unwrap_or(Value::Null);
4122        match spec {
4123            's' => match arg {
4124                Value::Null => {} // PG: NULL renders as empty for %s.
4125                v => out.push_str(&value_to_format_text(&v)),
4126            },
4127            'I' => match arg {
4128                Value::Null => {
4129                    return Err(EvalError::TypeMismatch {
4130                        detail: "format(): NULL is not a valid identifier (%I)".into(),
4131                    });
4132                }
4133                v => {
4134                    let s = value_to_format_text(&v);
4135                    out.push('"');
4136                    for ch in s.chars() {
4137                        if ch == '"' {
4138                            out.push('"');
4139                            out.push('"');
4140                        } else {
4141                            out.push(ch);
4142                        }
4143                    }
4144                    out.push('"');
4145                }
4146            },
4147            'L' => match arg {
4148                Value::Null => out.push_str("NULL"),
4149                v => {
4150                    let s = value_to_format_text(&v);
4151                    out.push('\'');
4152                    for ch in s.chars() {
4153                        if ch == '\'' {
4154                            out.push('\'');
4155                            out.push('\'');
4156                        } else {
4157                            out.push(ch);
4158                        }
4159                    }
4160                    out.push('\'');
4161                }
4162            },
4163            other => {
4164                return Err(EvalError::TypeMismatch {
4165                    detail: format!(
4166                        "format(): unknown specifier '%{other}' \
4167                         (v7.17 supports %s %I %L %%)"
4168                    ),
4169                });
4170            }
4171        }
4172    }
4173    Ok(Value::Text(out))
4174}
4175
4176/// Helper: render a Value as text for format()'s %s / %I / %L
4177/// payload. Reuses the regular text-coercion table.
4178/// v7.17.0 Phase 3.P0-31 — map a `Value` to the canonical PG
4179/// type-name string returned by `pg_typeof`. Lowercase, matches
4180/// what real PostgreSQL emits (NOT SPG's UPPERCASE Display shape).
4181fn pg_typeof_name(v: &Value) -> &'static str {
4182    match v {
4183        Value::SmallInt(_) => "smallint",
4184        Value::Int(_) => "integer",
4185        Value::BigInt(_) => "bigint",
4186        Value::Float(_) => "double precision",
4187        Value::Text(_) => "text",
4188        Value::Bool(_) => "boolean",
4189        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => "vector",
4190        Value::Numeric { .. } => "numeric",
4191        Value::Date(_) => "date",
4192        Value::Timestamp(_) => "timestamp without time zone",
4193        Value::Interval { .. } => "interval",
4194        Value::Json(_) => {
4195            // SPG carries JSON and JSONB in the same Value::Json
4196            // variant; without a column ty hint we cannot tell
4197            // them apart at value level. Return "json" as the
4198            // conservative answer (PG's pg_typeof on a literal
4199            // `'{}'::json` returns "json"; the jsonb case is
4200            // covered when an explicit ::jsonb cast lands as
4201            // Value::Json too — see below override at call site).
4202            //
4203            // The eval-arm above for pg_typeof handles the
4204            // disambiguation via Expr-shape probing.
4205            "json"
4206        }
4207        Value::Bytes(_) => "bytea",
4208        Value::TextArray(_) => "text[]",
4209        Value::IntArray(_) => "integer[]",
4210        Value::BigIntArray(_) => "bigint[]",
4211        Value::TsVector(_) => "tsvector",
4212        Value::TsQuery(_) => "tsquery",
4213        Value::Uuid(_) => "uuid",
4214        Value::Null => "unknown",
4215        // Value is #[non_exhaustive]; future variants land here
4216        // until the table is updated.
4217        _ => "unknown",
4218    }
4219}
4220
4221fn value_to_format_text(v: &Value) -> String {
4222    match v {
4223        Value::Text(s) | Value::Json(s) => s.clone(),
4224        Value::SmallInt(n) => n.to_string(),
4225        Value::Int(n) => n.to_string(),
4226        Value::BigInt(n) => n.to_string(),
4227        Value::Float(x) => format!("{x}"),
4228        Value::Bool(b) => {
4229            if *b {
4230                "t".into()
4231            } else {
4232                "f".into()
4233            }
4234        }
4235        Value::Null => String::new(),
4236        other => format!("{other:?}"),
4237    }
4238}
4239
4240fn to_char(args: &[Value]) -> Result<Value, EvalError> {
4241    use core::fmt::Write as _;
4242    if args.len() != 2 {
4243        return Err(EvalError::TypeMismatch {
4244            detail: format!("to_char() takes 2 args, got {}", args.len()),
4245        });
4246    }
4247    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4248        return Ok(Value::Null);
4249    }
4250    let Value::Text(fmt) = &args[1] else {
4251        return Err(EvalError::TypeMismatch {
4252            detail: format!(
4253                "to_char() needs a text format, got {:?}",
4254                args[1].data_type()
4255            ),
4256        });
4257    };
4258    let (days, day_micros) = match &args[0] {
4259        Value::Date(d) => (*d, 0_i64),
4260        Value::Timestamp(t) => {
4261            let days = t.div_euclid(86_400_000_000);
4262            (
4263                i32::try_from(days).unwrap_or(i32::MAX),
4264                t.rem_euclid(86_400_000_000),
4265            )
4266        }
4267        other => {
4268            return Err(EvalError::TypeMismatch {
4269                detail: format!(
4270                    "to_char() needs DATE or TIMESTAMP, got {:?}",
4271                    other.data_type()
4272                ),
4273            });
4274        }
4275    };
4276    let (y, mo, d) = civil_from_days(days);
4277    let secs = day_micros / 1_000_000;
4278    let frac = day_micros % 1_000_000;
4279    // div_euclid keeps every value non-negative — the casts below are
4280    // sign-safe by construction. `secs ∈ [0, 86400)`, `frac ∈ [0,
4281    // 1_000_000)`, so all three quantities fit in u32.
4282    let hh24 = u32::try_from(secs / 3600).unwrap_or(0);
4283    let mi = u32::try_from((secs / 60) % 60).unwrap_or(0);
4284    let ss = u32::try_from(secs % 60).unwrap_or(0);
4285    let hh12 = match hh24 % 12 {
4286        0 => 12,
4287        x => x,
4288    };
4289    let ampm = if hh24 < 12 { "AM" } else { "PM" };
4290    let ms = u32::try_from(frac / 1_000).unwrap_or(0); // millisecond
4291    let us = u32::try_from(frac).unwrap_or(0); // microsecond (0..1_000_000)
4292
4293    let mut out = String::with_capacity(fmt.len() + 8);
4294    let bytes = fmt.as_bytes();
4295    let mut i = 0;
4296    // write! against a String never fails — discard the Result.
4297    while i < bytes.len() {
4298        // Try the longest prefixes first so "YYYY" wins over "YY".
4299        let rest = &bytes[i..];
4300        if rest.starts_with(b"YYYY") {
4301            let _ = write!(out, "{y:04}");
4302            i += 4;
4303        } else if rest.starts_with(b"YY") {
4304            #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
4305            let yy = (y.rem_euclid(100)) as u32;
4306            let _ = write!(out, "{yy:02}");
4307            i += 2;
4308        } else if rest.starts_with(b"Month") {
4309            out.push_str(MONTH_FULL[(mo - 1) as usize]);
4310            i += 5;
4311        } else if rest.starts_with(b"Mon") {
4312            out.push_str(MONTH_ABBR[(mo - 1) as usize]);
4313            i += 3;
4314        } else if rest.starts_with(b"MM") {
4315            let _ = write!(out, "{mo:02}");
4316            i += 2;
4317        } else if rest.starts_with(b"DD") {
4318            let _ = write!(out, "{d:02}");
4319            i += 2;
4320        } else if rest.starts_with(b"HH24") {
4321            let _ = write!(out, "{hh24:02}");
4322            i += 4;
4323        } else if rest.starts_with(b"HH12") {
4324            let _ = write!(out, "{hh12:02}");
4325            i += 4;
4326        } else if rest.starts_with(b"MI") {
4327            let _ = write!(out, "{mi:02}");
4328            i += 2;
4329        } else if rest.starts_with(b"SS") {
4330            let _ = write!(out, "{ss:02}");
4331            i += 2;
4332        } else if rest.starts_with(b"MS") {
4333            let _ = write!(out, "{ms:03}");
4334            i += 2;
4335        } else if rest.starts_with(b"US") {
4336            let _ = write!(out, "{us:06}");
4337            i += 2;
4338        } else if rest.starts_with(b"AM") || rest.starts_with(b"PM") {
4339            out.push_str(ampm);
4340            i += 2;
4341        } else {
4342            // Pass any non-placeholder byte through verbatim.
4343            out.push(bytes[i] as char);
4344            i += 1;
4345        }
4346    }
4347    Ok(Value::Text(out))
4348}
4349
4350const MONTH_FULL: [&str; 12] = [
4351    "January",
4352    "February",
4353    "March",
4354    "April",
4355    "May",
4356    "June",
4357    "July",
4358    "August",
4359    "September",
4360    "October",
4361    "November",
4362    "December",
4363];
4364const MONTH_ABBR: [&str; 12] = [
4365    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
4366];
4367
4368/// v7.17.0 Phase 3.P0-29 — MySQL `DATE_FORMAT(t, fmt)`.
4369///
4370/// Format tokens (MySQL 8.0 surface):
4371///   * `%Y` — 4-digit year  `%y` — 2-digit year
4372///   * `%m` — 01-12 month   `%c` — 1-12 month (no zero pad)
4373///   * `%d` — 01-31 day     `%e` — 1-31 day (no zero pad)
4374///   * `%H` — 00-23 hour    `%h` / `%I` — 01-12 hour
4375///   * `%i` — 00-59 MINUTE (NB: `%M` is month name in MySQL — easy
4376///     footgun if we mirror PG's `to_char` tokens by accident)
4377///   * `%s` / `%S` — 00-59 second
4378///   * `%f` — 000000-999999 microseconds (always 6 digits)
4379///   * `%p` — AM / PM
4380///   * `%M` — January-December (full month name)
4381///   * `%b` — Jan-Dec (abbreviated month name)
4382///   * `%%` — literal `%`
4383///
4384/// Unknown `%X` tokens pass through verbatim (MySQL emits the `%`
4385/// then the unknown letter).
4386fn date_format_mysql(args: &[Value]) -> Result<Value, EvalError> {
4387    use core::fmt::Write as _;
4388    if args.len() != 2 {
4389        return Err(EvalError::TypeMismatch {
4390            detail: format!("date_format() takes 2 args, got {}", args.len()),
4391        });
4392    }
4393    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4394        return Ok(Value::Null);
4395    }
4396    let Value::Text(fmt) = &args[1] else {
4397        return Err(EvalError::TypeMismatch {
4398            detail: format!(
4399                "date_format() needs a text format, got {:?}",
4400                args[1].data_type()
4401            ),
4402        });
4403    };
4404    let (days, day_micros) = match &args[0] {
4405        Value::Date(d) => (*d, 0_i64),
4406        Value::Timestamp(t) => {
4407            let days = t.div_euclid(86_400_000_000);
4408            (
4409                i32::try_from(days).unwrap_or(i32::MAX),
4410                t.rem_euclid(86_400_000_000),
4411            )
4412        }
4413        other => {
4414            return Err(EvalError::TypeMismatch {
4415                detail: format!(
4416                    "date_format() needs DATE or TIMESTAMP, got {:?}",
4417                    other.data_type()
4418                ),
4419            });
4420        }
4421    };
4422    let (y, mo, d) = civil_from_days(days);
4423    let secs = day_micros / 1_000_000;
4424    let frac = day_micros % 1_000_000;
4425    let hh24 = u32::try_from(secs / 3600).unwrap_or(0);
4426    let mi = u32::try_from((secs / 60) % 60).unwrap_or(0);
4427    let ss = u32::try_from(secs % 60).unwrap_or(0);
4428    let hh12 = match hh24 % 12 {
4429        0 => 12,
4430        x => x,
4431    };
4432    let ampm = if hh24 < 12 { "AM" } else { "PM" };
4433    let us = u32::try_from(frac).unwrap_or(0);
4434
4435    let mut out = String::with_capacity(fmt.len() + 8);
4436    let bytes = fmt.as_bytes();
4437    let mut i = 0;
4438    while i < bytes.len() {
4439        if bytes[i] != b'%' {
4440            out.push(bytes[i] as char);
4441            i += 1;
4442            continue;
4443        }
4444        if i + 1 >= bytes.len() {
4445            // Trailing `%` with no specifier — emit verbatim.
4446            out.push('%');
4447            i += 1;
4448            continue;
4449        }
4450        let token = bytes[i + 1];
4451        match token {
4452            b'Y' => {
4453                let _ = write!(out, "{y:04}");
4454            }
4455            b'y' => {
4456                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
4457                let yy = (y.rem_euclid(100)) as u32;
4458                let _ = write!(out, "{yy:02}");
4459            }
4460            b'm' => {
4461                let _ = write!(out, "{mo:02}");
4462            }
4463            b'c' => {
4464                let _ = write!(out, "{mo}");
4465            }
4466            b'd' => {
4467                let _ = write!(out, "{d:02}");
4468            }
4469            b'e' => {
4470                let _ = write!(out, "{d}");
4471            }
4472            b'H' => {
4473                let _ = write!(out, "{hh24:02}");
4474            }
4475            b'h' | b'I' => {
4476                let _ = write!(out, "{hh12:02}");
4477            }
4478            b'i' => {
4479                // MINUTE — distinct from PG's `MI` and from MySQL's
4480                // own `%M` (month name).
4481                let _ = write!(out, "{mi:02}");
4482            }
4483            b's' | b'S' => {
4484                let _ = write!(out, "{ss:02}");
4485            }
4486            b'f' => {
4487                let _ = write!(out, "{us:06}");
4488            }
4489            b'p' => {
4490                out.push_str(ampm);
4491            }
4492            b'M' => {
4493                out.push_str(MONTH_FULL[(mo - 1) as usize]);
4494            }
4495            b'b' => {
4496                out.push_str(MONTH_ABBR[(mo - 1) as usize]);
4497            }
4498            b'%' => {
4499                out.push('%');
4500            }
4501            other => {
4502                // Unknown specifier — MySQL emits the letter
4503                // verbatim (without the `%`).
4504                out.push(other as char);
4505            }
4506        }
4507        i += 2;
4508    }
4509    Ok(Value::Text(out))
4510}
4511
4512/// v7.17.0 Phase 3.P0-29 — `UNIX_TIMESTAMP(t)` returns epoch
4513/// seconds (BIGINT) for a TIMESTAMP / DATE.
4514///
4515/// Bare `UNIX_TIMESTAMP()` (no args) is folded to a BigInt literal
4516/// by clock_replacement_for at the rewrite layer — never reaches
4517/// this arm.
4518fn unix_timestamp_of(args: &[Value]) -> Result<Value, EvalError> {
4519    if args.len() != 1 {
4520        return Err(EvalError::TypeMismatch {
4521            detail: format!("unix_timestamp() takes 0 or 1 arg, got {}", args.len()),
4522        });
4523    }
4524    match &args[0] {
4525        Value::Null => Ok(Value::Null),
4526        Value::Timestamp(t) => Ok(Value::BigInt(t.div_euclid(1_000_000))),
4527        Value::Date(d) => Ok(Value::BigInt(i64::from(*d) * 86_400)),
4528        other => Err(EvalError::TypeMismatch {
4529            detail: format!(
4530                "unix_timestamp() needs DATE or TIMESTAMP, got {:?}",
4531                other.data_type()
4532            ),
4533        }),
4534    }
4535}
4536
4537/// v7.17.0 Phase 3.P0-29 — `FROM_UNIXTIME(n)` returns a TIMESTAMP
4538/// at `n` seconds past the Unix epoch. `FROM_UNIXTIME(n, fmt)`
4539/// applies MySQL date_format on top, returning TEXT.
4540fn from_unixtime(args: &[Value]) -> Result<Value, EvalError> {
4541    if !(1..=2).contains(&args.len()) {
4542        return Err(EvalError::TypeMismatch {
4543            detail: format!("from_unixtime() takes 1 or 2 args, got {}", args.len()),
4544        });
4545    }
4546    if args.iter().any(|v| matches!(v, Value::Null)) {
4547        return Ok(Value::Null);
4548    }
4549    let secs: i64 = match &args[0] {
4550        Value::SmallInt(n) => i64::from(*n),
4551        Value::Int(n) => i64::from(*n),
4552        Value::BigInt(n) => *n,
4553        Value::Float(x) => *x as i64,
4554        Value::Numeric { scaled, scale } => {
4555            let denom = 10_i128.pow(u32::from(*scale));
4556            i64::try_from(scaled.div_euclid(denom)).unwrap_or(i64::MAX)
4557        }
4558        other => {
4559            return Err(EvalError::TypeMismatch {
4560                detail: format!(
4561                    "from_unixtime() needs a numeric epoch second count, got {:?}",
4562                    other.data_type()
4563                ),
4564            });
4565        }
4566    };
4567    let ts = Value::Timestamp(secs.saturating_mul(1_000_000));
4568    if args.len() == 1 {
4569        Ok(ts)
4570    } else {
4571        date_format_mysql(&[ts, args[1].clone()])
4572    }
4573}
4574
4575/// `date_trunc(unit, timestamp)` — round a `TIMESTAMP` down to the
4576/// requested calendar boundary (year / month / day / hour / minute /
4577/// second). Returns the truncated `TIMESTAMP`. NULL on either side
4578/// propagates to NULL.
4579fn date_trunc(args: &[Value]) -> Result<Value, EvalError> {
4580    if args.len() != 2 {
4581        return Err(EvalError::TypeMismatch {
4582            detail: format!("date_trunc() takes 2 args, got {}", args.len()),
4583        });
4584    }
4585    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4586        return Ok(Value::Null);
4587    }
4588    let Value::Text(unit) = &args[0] else {
4589        return Err(EvalError::TypeMismatch {
4590            detail: format!(
4591                "date_trunc() needs a text unit, got {:?}",
4592                args[0].data_type()
4593            ),
4594        });
4595    };
4596    // Both DATE and TIMESTAMP sources are accepted. DATE lifts to
4597    // midnight first; the result is always TIMESTAMP.
4598    let micros = match &args[1] {
4599        Value::Timestamp(t) => *t,
4600        Value::Date(d) => i64::from(*d) * 86_400_000_000,
4601        other => {
4602            return Err(EvalError::TypeMismatch {
4603                detail: format!(
4604                    "date_trunc() needs DATE or TIMESTAMP, got {:?}",
4605                    other.data_type()
4606                ),
4607            });
4608        }
4609    };
4610    let unit_lc = unit.to_ascii_lowercase();
4611    let days = micros.div_euclid(86_400_000_000);
4612    let day_micros = micros.rem_euclid(86_400_000_000);
4613    let day_i32 = i32::try_from(days).unwrap_or(i32::MAX);
4614    let (y, m, _) = civil_from_days(day_i32);
4615    let truncated = match unit_lc.as_str() {
4616        "year" => i64::from(days_from_civil(y, 1, 1)) * 86_400_000_000,
4617        "month" => i64::from(days_from_civil(y, m, 1)) * 86_400_000_000,
4618        "day" => days * 86_400_000_000,
4619        "hour" => days * 86_400_000_000 + (day_micros / 3_600_000_000) * 3_600_000_000,
4620        "minute" => days * 86_400_000_000 + (day_micros / 60_000_000) * 60_000_000,
4621        "second" => days * 86_400_000_000 + (day_micros / 1_000_000) * 1_000_000,
4622        other => {
4623            return Err(EvalError::TypeMismatch {
4624                detail: format!(
4625                    "unknown date_trunc unit {other:?}; \
4626                     supported: year, month, day, hour, minute, second"
4627                ),
4628            });
4629        }
4630    };
4631    Ok(Value::Timestamp(truncated))
4632}
4633
4634/// PG-style `expr::TYPE` coercion. NULL always casts as NULL.
4635pub fn cast_value(v: Value, target: CastTarget) -> Result<Value, EvalError> {
4636    if matches!(v, Value::Null) {
4637        return Ok(Value::Null);
4638    }
4639    match target {
4640        CastTarget::Vector => cast_to_vector(v),
4641        CastTarget::Text => Ok(Value::Text(value_to_text(&v))),
4642        CastTarget::Int => cast_numeric_to_int(v),
4643        CastTarget::BigInt => cast_numeric_to_bigint(v),
4644        CastTarget::Float => cast_numeric_to_float(v),
4645        CastTarget::Bool => cast_to_bool(v),
4646        CastTarget::Date => cast_to_date(v),
4647        // TIMESTAMP and TIMESTAMPTZ have identical runtime
4648        // representation (i64 microseconds UTC).
4649        CastTarget::Timestamp | CastTarget::Timestamptz => cast_to_timestamp(v),
4650        // v7.9.25 — `expr::INTERVAL`. Currently only TEXT → Interval
4651        // is supported (the mailrs idiom: `$1::INTERVAL` where the
4652        // bound param is a string like `'7 days'`).
4653        CastTarget::Interval => cast_to_interval(v),
4654        // v7.9.25 — `::json` / `::jsonb`. Routes Text → Json
4655        // (validation is the producer's responsibility, same as
4656        // the column-INSERT path).
4657        CastTarget::Json | CastTarget::Jsonb => match v {
4658            Value::Json(s) => Ok(Value::Json(s)),
4659            Value::Text(s) => Ok(Value::Json(s)),
4660            other => Err(EvalError::TypeMismatch {
4661                detail: alloc::format!(
4662                    "::json / ::jsonb only accepts TEXT-shape inputs, got {:?}",
4663                    other.data_type()
4664                ),
4665            }),
4666        },
4667        // v7.17.0 Phase 5.3 — `::regtype` / `::regclass`. PG
4668        // semantics: each is a textual catalog-name surfacing as
4669        // a numeric OID at the wire layer that renders back as
4670        // the original name. SPG has no OID space, but pg_dump /
4671        // mailrs / Django code uses the cast purely for textual
4672        // round-trip — feeding `'public.t'::regclass::text` into
4673        // a downstream `format(…)` or string concat. We map to
4674        // that textual contract: Text in → Text out (the schema-
4675        // qualifier `public.` is stripped to match PG's default
4676        // search_path-aware rendering); numeric in → re-cast to
4677        // Text as best-effort; anything else errors.
4678        //
4679        // Pre-3.3 / pre-5.3 (v7.9.26) the cast surfaced a clean
4680        // error; this lifts to accept-and-textify so the dominant
4681        // dump-loader pattern unblocks. SPG-shaped queries that
4682        // genuinely need an OID for runtime joins are still
4683        // documented as unsupported.
4684        CastTarget::RegType | CastTarget::RegClass => match v {
4685            Value::Text(s) => {
4686                // Strip an optional `<schema>.` prefix — PG's
4687                // regclass render drops it when the schema is on
4688                // the search_path; SPG is single-schema so
4689                // dropping is always safe.
4690                let bare = s.rsplit('.').next().unwrap_or(&s).to_string();
4691                Ok(Value::Text(bare))
4692            }
4693            Value::Int(n) => Ok(Value::Text(alloc::format!("{n}"))),
4694            Value::BigInt(n) => Ok(Value::Text(alloc::format!("{n}"))),
4695            other => Err(EvalError::TypeMismatch {
4696                detail: alloc::format!(
4697                    "::regtype / ::regclass accepts TEXT (name) or integer (oid), got {:?}",
4698                    other.data_type()
4699                ),
4700            }),
4701        },
4702        // v7.10.11 — `::TEXT[]`. Decode PG external array form
4703        // when input is Text; pass through unchanged when it is
4704        // already TextArray. Anything else is a type mismatch.
4705        CastTarget::TextArray => match v {
4706            Value::TextArray(items) => Ok(Value::TextArray(items)),
4707            Value::Text(s) => decode_text_array_external(&s).map(Value::TextArray),
4708            other => Err(EvalError::TypeMismatch {
4709                detail: alloc::format!(
4710                    "::TEXT[] only accepts TEXT / TEXT[] inputs, got {:?}",
4711                    other.data_type()
4712                ),
4713            }),
4714        },
4715        // v7.11.13 — `::INT[]` / `::BIGINT[]`. Decode PG external
4716        // form `{1,2,3}` when input is Text; widen TextArray /
4717        // IntArray as appropriate.
4718        CastTarget::IntArray => cast_to_int_array(v),
4719        CastTarget::BigIntArray => cast_to_bigint_array(v),
4720        // v7.12.0 — `::tsvector` / `::tsquery`. Decodes PG external
4721        // form when input is Text; passes through unchanged when the
4722        // input is already the target type. Other inputs are a type
4723        // mismatch. Lexer / Porter stemmer arrive in v7.12.1; the
4724        // external-form cast at v7.12.0 is the path pg_dump and
4725        // direct-literal callers use.
4726        CastTarget::TsVector => match v {
4727            Value::TsVector(items) => Ok(Value::TsVector(items)),
4728            Value::Text(s) => decode_tsvector_external(&s).map(Value::TsVector),
4729            other => Err(EvalError::TypeMismatch {
4730                detail: alloc::format!(
4731                    "::tsvector only accepts TEXT / tsvector inputs, got {:?}",
4732                    other.data_type()
4733                ),
4734            }),
4735        },
4736        CastTarget::TsQuery => match v {
4737            Value::TsQuery(ast) => Ok(Value::TsQuery(ast)),
4738            Value::Text(s) => decode_tsquery_external(&s).map(Value::TsQuery),
4739            other => Err(EvalError::TypeMismatch {
4740                detail: alloc::format!(
4741                    "::tsquery only accepts TEXT / tsquery inputs, got {:?}",
4742                    other.data_type()
4743                ),
4744            }),
4745        },
4746        // v7.17.0 — `::uuid`. Identity for `uuid → uuid`; parse
4747        // text via the shared `parse_uuid_str`. Anything else is a
4748        // type mismatch — PG also rejects e.g. INT → UUID without
4749        // an explicit text bridge.
4750        CastTarget::Uuid => match v {
4751            Value::Uuid(b) => Ok(Value::Uuid(b)),
4752            Value::Text(s) => match spg_storage::parse_uuid_str(&s) {
4753                Some(b) => Ok(Value::Uuid(b)),
4754                None => Err(EvalError::TypeMismatch {
4755                    detail: alloc::format!("invalid input syntax for type uuid: {s:?}"),
4756                }),
4757            },
4758            other => Err(EvalError::TypeMismatch {
4759                detail: alloc::format!(
4760                    "::uuid only accepts TEXT / uuid inputs, got {:?}",
4761                    other.data_type()
4762                ),
4763            }),
4764        },
4765        // v7.18 — `::bytea`. Identity for `Bytes → Bytes`; decode
4766        // Text via the engine's PG-format bytea decoder (`\x`
4767        // hex form + `\NNN` escape form). Anything else is a type
4768        // mismatch — same shape as PG's contract. Closes the
4769        // mailrs D-pre #3 reverse-acceptance gap.
4770        CastTarget::Bytea => match v {
4771            Value::Bytes(b) => Ok(Value::Bytes(b)),
4772            Value::Text(s) => match crate::decode_bytea_literal(&s) {
4773                Ok(b) => Ok(Value::Bytes(b)),
4774                Err(msg) => Err(EvalError::TypeMismatch {
4775                    detail: alloc::format!("invalid input syntax for type bytea: {msg}"),
4776                }),
4777            },
4778            other => Err(EvalError::TypeMismatch {
4779                detail: alloc::format!(
4780                    "::bytea only accepts TEXT / bytea inputs, got {:?}",
4781                    other.data_type()
4782                ),
4783            }),
4784        },
4785    }
4786}
4787
4788fn cast_to_int_array(v: Value) -> Result<Value, EvalError> {
4789    match v {
4790        Value::IntArray(items) => Ok(Value::IntArray(items)),
4791        Value::BigIntArray(items) => {
4792            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
4793            for item in items {
4794                match item {
4795                    None => out.push(None),
4796                    Some(n) => match i32::try_from(n) {
4797                        Ok(x) => out.push(Some(x)),
4798                        Err(_) => {
4799                            return Err(EvalError::TypeMismatch {
4800                                detail: alloc::format!("::INT[] element {n} overflows i32"),
4801                            });
4802                        }
4803                    },
4804                }
4805            }
4806            Ok(Value::IntArray(out))
4807        }
4808        Value::Text(s) => decode_int_array_external(&s).map(Value::IntArray),
4809        Value::TextArray(items) => {
4810            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
4811            for item in items {
4812                match item {
4813                    None => out.push(None),
4814                    Some(s) => match s.parse::<i32>() {
4815                        Ok(n) => out.push(Some(n)),
4816                        Err(_) => {
4817                            return Err(EvalError::TypeMismatch {
4818                                detail: alloc::format!("::INT[] cannot parse {s:?}"),
4819                            });
4820                        }
4821                    },
4822                }
4823            }
4824            Ok(Value::IntArray(out))
4825        }
4826        other => Err(EvalError::TypeMismatch {
4827            detail: alloc::format!("::INT[] does not accept {:?}", other.data_type()),
4828        }),
4829    }
4830}
4831
4832fn cast_to_bigint_array(v: Value) -> Result<Value, EvalError> {
4833    match v {
4834        Value::BigIntArray(items) => Ok(Value::BigIntArray(items)),
4835        Value::IntArray(items) => Ok(Value::BigIntArray(
4836            items.into_iter().map(|x| x.map(i64::from)).collect(),
4837        )),
4838        Value::Text(s) => decode_bigint_array_external(&s).map(Value::BigIntArray),
4839        Value::TextArray(items) => {
4840            let mut out: Vec<Option<i64>> = Vec::with_capacity(items.len());
4841            for item in items {
4842                match item {
4843                    None => out.push(None),
4844                    Some(s) => match s.parse::<i64>() {
4845                        Ok(n) => out.push(Some(n)),
4846                        Err(_) => {
4847                            return Err(EvalError::TypeMismatch {
4848                                detail: alloc::format!("::BIGINT[] cannot parse {s:?}"),
4849                            });
4850                        }
4851                    },
4852                }
4853            }
4854            Ok(Value::BigIntArray(out))
4855        }
4856        other => Err(EvalError::TypeMismatch {
4857            detail: alloc::format!("::BIGINT[] does not accept {:?}", other.data_type()),
4858        }),
4859    }
4860}
4861
4862fn decode_int_array_external(s: &str) -> Result<Vec<Option<i32>>, EvalError> {
4863    let trimmed = s.trim();
4864    let inner = trimmed
4865        .strip_prefix('{')
4866        .and_then(|x| x.strip_suffix('}'))
4867        .ok_or_else(|| EvalError::TypeMismatch {
4868            detail: alloc::format!("INT[] literal {s:?} must be enclosed in '{{...}}'"),
4869        })?;
4870    if inner.trim().is_empty() {
4871        return Ok(Vec::new());
4872    }
4873    inner
4874        .split(',')
4875        .map(|part| {
4876            let p = part.trim();
4877            if p.eq_ignore_ascii_case("NULL") {
4878                Ok(None)
4879            } else {
4880                p.parse::<i32>()
4881                    .map(Some)
4882                    .map_err(|_| EvalError::TypeMismatch {
4883                        detail: alloc::format!("INT[] element {p:?} is not an i32"),
4884                    })
4885            }
4886        })
4887        .collect()
4888}
4889
4890fn decode_bigint_array_external(s: &str) -> Result<Vec<Option<i64>>, EvalError> {
4891    let trimmed = s.trim();
4892    let inner = trimmed
4893        .strip_prefix('{')
4894        .and_then(|x| x.strip_suffix('}'))
4895        .ok_or_else(|| EvalError::TypeMismatch {
4896            detail: alloc::format!("BIGINT[] literal {s:?} must be enclosed in '{{...}}'"),
4897        })?;
4898    if inner.trim().is_empty() {
4899        return Ok(Vec::new());
4900    }
4901    inner
4902        .split(',')
4903        .map(|part| {
4904            let p = part.trim();
4905            if p.eq_ignore_ascii_case("NULL") {
4906                Ok(None)
4907            } else {
4908                p.parse::<i64>()
4909                    .map(Some)
4910                    .map_err(|_| EvalError::TypeMismatch {
4911                        detail: alloc::format!("BIGINT[] element {p:?} is not an i64"),
4912                    })
4913            }
4914        })
4915        .collect()
4916}
4917
4918/// v7.10.11 — same decoder as `decode_text_array_literal` in
4919/// `lib.rs`, but lives here so the eval-time cast path stays
4920/// inside `spg-engine::eval`. Kept in lock-step with the engine
4921/// `coerce_value` decoder by tests.
4922fn decode_text_array_external(s: &str) -> Result<Vec<Option<String>>, EvalError> {
4923    let trimmed = s.trim();
4924    let inner = trimmed
4925        .strip_prefix('{')
4926        .and_then(|x| x.strip_suffix('}'))
4927        .ok_or_else(|| EvalError::TypeMismatch {
4928            detail: alloc::format!("TEXT[] literal {s:?} must be enclosed in '{{...}}'"),
4929        })?;
4930    let mut out: Vec<Option<String>> = Vec::new();
4931    if inner.trim().is_empty() {
4932        return Ok(out);
4933    }
4934    let bytes = inner.as_bytes();
4935    let mut i = 0;
4936    while i <= bytes.len() {
4937        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
4938            i += 1;
4939        }
4940        if i < bytes.len() && bytes[i] == b'"' {
4941            i += 1;
4942            let mut buf = String::new();
4943            while i < bytes.len() && bytes[i] != b'"' {
4944                if bytes[i] == b'\\' && i + 1 < bytes.len() {
4945                    buf.push(bytes[i + 1] as char);
4946                    i += 2;
4947                } else {
4948                    buf.push(bytes[i] as char);
4949                    i += 1;
4950                }
4951            }
4952            if i >= bytes.len() {
4953                return Err(EvalError::TypeMismatch {
4954                    detail: "unterminated quoted element in TEXT[] literal".into(),
4955                });
4956            }
4957            i += 1;
4958            out.push(Some(buf));
4959        } else {
4960            let start = i;
4961            while i < bytes.len() && bytes[i] != b',' {
4962                i += 1;
4963            }
4964            let raw = inner[start..i].trim();
4965            if raw.eq_ignore_ascii_case("NULL") {
4966                out.push(None);
4967            } else {
4968                out.push(Some(raw.to_string()));
4969            }
4970        }
4971        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
4972            i += 1;
4973        }
4974        if i >= bytes.len() {
4975            break;
4976        }
4977        if bytes[i] != b',' {
4978            return Err(EvalError::TypeMismatch {
4979                detail: "expected ',' between TEXT[] elements".into(),
4980            });
4981        }
4982        i += 1;
4983    }
4984    Ok(out)
4985}
4986
4987fn cast_to_interval(v: Value) -> Result<Value, EvalError> {
4988    match v {
4989        Value::Interval { months, micros } => Ok(Value::Interval { months, micros }),
4990        Value::Text(s) => {
4991            let (months, micros) = spg_sql::parser::parse_interval_text(&s).ok_or_else(|| {
4992                EvalError::TypeMismatch {
4993                    detail: alloc::format!("cannot parse {s:?} as INTERVAL"),
4994                }
4995            })?;
4996            Ok(Value::Interval { months, micros })
4997        }
4998        other => Err(EvalError::TypeMismatch {
4999            detail: alloc::format!(
5000                "::INTERVAL only accepts TEXT-shape inputs, got {:?}",
5001                other.data_type()
5002            ),
5003        }),
5004    }
5005}
5006
5007fn cast_to_date(v: Value) -> Result<Value, EvalError> {
5008    match v {
5009        Value::Date(d) => Ok(Value::Date(d)),
5010        // Integer literals carry days since the Unix epoch — used by
5011        // the `CURRENT_DATE` AST rewrite to inject the wall clock.
5012        Value::Int(n) => Ok(Value::Date(n)),
5013        Value::BigInt(n) => {
5014            i32::try_from(n)
5015                .map(Value::Date)
5016                .map_err(|_| EvalError::TypeMismatch {
5017                    detail: "bigint days-since-epoch out of DATE range".into(),
5018                })
5019        }
5020        // Timestamp truncates to its day boundary.
5021        Value::Timestamp(t) => {
5022            let days = t.div_euclid(86_400_000_000);
5023            i32::try_from(days)
5024                .map(Value::Date)
5025                .map_err(|_| EvalError::TypeMismatch {
5026                    detail: "timestamp out of DATE range".into(),
5027                })
5028        }
5029        Value::Text(s) => parse_date_literal(&s)
5030            .map(Value::Date)
5031            .ok_or(EvalError::TypeMismatch {
5032                detail: format!("cannot parse {s:?} as DATE (expected YYYY-MM-DD)"),
5033            }),
5034        other => Err(EvalError::TypeMismatch {
5035            detail: format!("cannot cast {:?} to DATE", other.data_type()),
5036        }),
5037    }
5038}
5039
5040fn cast_to_timestamp(v: Value) -> Result<Value, EvalError> {
5041    match v {
5042        Value::Timestamp(t) => Ok(Value::Timestamp(t)),
5043        // Int / BigInt carry microseconds since the Unix epoch — used
5044        // by the `NOW()` / `CURRENT_TIMESTAMP` AST rewrite to inject
5045        // the wall clock as a plain integer literal.
5046        Value::Int(n) => Ok(Value::Timestamp(i64::from(n))),
5047        Value::BigInt(n) => Ok(Value::Timestamp(n)),
5048        // DATE → TIMESTAMP picks midnight on the date.
5049        Value::Date(d) => Ok(Value::Timestamp(i64::from(d) * 86_400_000_000)),
5050        Value::Text(s) => {
5051            parse_timestamp_literal(&s)
5052                .map(Value::Timestamp)
5053                .ok_or(EvalError::TypeMismatch {
5054                    detail: format!(
5055                        "cannot parse {s:?} as TIMESTAMP \
5056                     (expected YYYY-MM-DD[ HH:MM:SS[.ffffff]])"
5057                    ),
5058                })
5059        }
5060        other => Err(EvalError::TypeMismatch {
5061            detail: format!("cannot cast {:?} to TIMESTAMP", other.data_type()),
5062        }),
5063    }
5064}
5065
5066fn value_to_text(v: &Value) -> String {
5067    match v {
5068        // v7.5.0 — Value is #[non_exhaustive]; any future variant
5069        // without explicit text rendering hits the Debug fallback
5070        // at the end.
5071        Value::SmallInt(n) => format!("{n}"),
5072        Value::Int(n) => format!("{n}"),
5073        Value::BigInt(n) => format!("{n}"),
5074        Value::Float(x) => format!("{x}"),
5075        // v4.9: JSON renders identically to Text — both are raw UTF-8.
5076        Value::Text(s) | Value::Json(s) => s.clone(),
5077        Value::Bool(b) => (if *b { "true" } else { "false" }).into(),
5078        Value::Vector(v) => {
5079            let cells: Vec<String> = v.iter().map(|x| format!("{x}")).collect();
5080            format!("[{}]", cells.join(", "))
5081        }
5082        // v6.0.1: render SQ8 cells dequantised, so SELECT output
5083        // matches the pgvector wire shape clients expect. The
5084        // recall envelope already absorbs the ≤ (max-min)/255/2
5085        // dequantisation error.
5086        Value::Sq8Vector(q) => {
5087            let cells: Vec<String> = spg_storage::quantize::dequantize(q)
5088                .iter()
5089                .map(|x| format!("{x}"))
5090                .collect();
5091            format!("[{}]", cells.join(", "))
5092        }
5093        // v6.0.3: HalfVector cells dequantise bit-exactly to f32
5094        // for SELECT output.
5095        Value::HalfVector(h) => {
5096            let cells: Vec<String> = h.to_f32_vec().iter().map(|x| format!("{x}")).collect();
5097            format!("[{}]", cells.join(", "))
5098        }
5099        Value::Numeric { scaled, scale } => format_numeric(*scaled, *scale),
5100        Value::Date(d) => format_date(*d),
5101        Value::Timestamp(t) => format_timestamp(*t),
5102        Value::Interval { months, micros } => format_interval(*months, *micros),
5103        Value::Null => "NULL".into(),
5104        // v7.10.4 — BYTEA renders as PG hex form.
5105        Value::Bytes(b) => format_bytea_hex(b),
5106        // v7.10.9 — TEXT[] / INT[] / BIGINT[] render PG external form.
5107        Value::TextArray(items) => format_text_array(items),
5108        Value::IntArray(items) => format_int_array(items),
5109        Value::BigIntArray(items) => format_bigint_array(items),
5110        // v7.12.0 — tsvector / tsquery render PG external form.
5111        Value::TsVector(lexs) => format_tsvector(lexs),
5112        Value::TsQuery(ast) => format_tsquery(ast),
5113        // v7.17.0 — UUID renders canonical lowercase 8-4-4-4-12
5114        // hyphenated form (PG `uuid_out`).
5115        Value::Uuid(b) => spg_storage::format_uuid(b),
5116        // v7.17.0 Phase 3.P0-32 — TIME canonical text.
5117        Value::Time(us) => format_time(*us),
5118        // v7.17.0 Phase 3.P0-34 — TIMETZ canonical text.
5119        Value::TimeTz { us, offset_secs } => format_timetz(*us, *offset_secs),
5120        // v7.17.0 Phase 3.P0-33 — YEAR 4-digit zero-padded.
5121        Value::Year(y) => format!("{y:04}"),
5122        // v7.17.0 Phase 3.P0-35 — MONEY en_US locale.
5123        Value::Money(c) => format_money(*c),
5124        // v7.17.0 Phase 3.P0-38 — Range canonical form. Routes
5125        // through the engine's format_range_text to share the
5126        // single renderer with pgwire / sqllogictest.
5127        Value::Range { .. } => crate::format_range_text(v),
5128        // v7.17.0 Phase 3.P0-39 — Hstore canonical PG text form.
5129        Value::Hstore(pairs) => crate::format_hstore_text(pairs),
5130        // v7.17.0 Phase 3.P0-40 — 2D array canonical PG text form.
5131        Value::IntArray2D(rows) => crate::format_int_2d_text_pub(rows),
5132        Value::BigIntArray2D(rows) => crate::format_bigint_2d_text_pub(rows),
5133        Value::TextArray2D(rows) => crate::format_text_2d_text_pub(rows),
5134        // v7.5.0 — #[non_exhaustive] fallback for future Value variants.
5135        _ => format!("{v:?}"),
5136    }
5137}
5138
5139/// Render a `Date` (days since epoch) as `YYYY-MM-DD`. Negative values
5140/// for pre-1970 dates render with a leading `-` on the year.
5141pub fn format_date(days: i32) -> String {
5142    let (y, m, d) = civil_from_days(days);
5143    format!("{y:04}-{m:02}-{d:02}")
5144}
5145
5146/// Render a `Timestamp` (microseconds since epoch) as
5147/// `YYYY-MM-DD HH:MM:SS[.fff...]`. Trailing-zero fractional digits are
5148/// dropped; a whole-second value has no fractional part.
5149/// v7.15.0 — PG-canonical TIMESTAMPTZ wire format. Storage is
5150/// the same i64 microseconds UTC as TIMESTAMP, but the canonical
5151/// PG text output appends the session's UTC-offset suffix (`+00`
5152/// for the default UTC session, the form pg_dump emits). Mailrs
5153/// round-8 acceptance criterion: `SELECT col FROM tstz` should
5154/// round-trip to a literal that re-INSERTs without semantic
5155/// drift.
5156pub fn format_timestamptz(micros: i64) -> String {
5157    let base = format_timestamp(micros);
5158    let mut s = String::with_capacity(base.len() + 3);
5159    s.push_str(&base);
5160    s.push_str("+00");
5161    s
5162}
5163
5164/// v7.17.0 Phase 3.P0-35 — PG `money` canonical text form, en_US
5165/// locale: `$N,NNN.CC`, negative → `-$1.23`. Mirrors PG's
5166/// `cash_out` for `lc_monetary = 'en_US.UTF-8'`.
5167pub fn format_money(cents: i64) -> String {
5168    let neg = cents < 0;
5169    let abs = cents.unsigned_abs();
5170    let dollars = abs / 100;
5171    let cc = abs % 100;
5172    // Insert comma thousands separators in the integer portion.
5173    let dollar_str = dollars.to_string();
5174    let bytes = dollar_str.as_bytes();
5175    let mut int_part = String::with_capacity(dollar_str.len() + dollar_str.len() / 3);
5176    for (i, b) in bytes.iter().enumerate() {
5177        // Position from the right: insert ',' before every 3rd
5178        // digit (except the first).
5179        let from_right = bytes.len() - i;
5180        if i > 0 && from_right % 3 == 0 {
5181            int_part.push(',');
5182        }
5183        int_part.push(*b as char);
5184    }
5185    let sign = if neg { "-" } else { "" };
5186    format!("{sign}${int_part}.{cc:02}")
5187}
5188
5189/// v7.17.0 Phase 3.P0-34 — PG `TIMETZ` canonical text form
5190/// `HH:MM:SS[.ffffff]±HH[:MM]`. Mirrors PG `timetz_out`. The
5191/// offset uses `±HH` for whole-hour offsets and `±HH:MM` for
5192/// sub-hour offsets (matching PG's "minimal display" rule).
5193pub fn format_timetz(us: i64, offset_secs: i32) -> String {
5194    let time = format_time(us);
5195    let sign = if offset_secs < 0 { '-' } else { '+' };
5196    let abs = offset_secs.unsigned_abs();
5197    let oh = abs / 3600;
5198    let om = (abs % 3600) / 60;
5199    if om == 0 {
5200        format!("{time}{sign}{oh:02}")
5201    } else {
5202        format!("{time}{sign}{oh:02}:{om:02}")
5203    }
5204}
5205
5206/// v7.17.0 Phase 3.P0-32 — PG `TIME` canonical text form
5207/// `HH:MM:SS[.ffffff]`. Mirrors PG `time_out`. Trailing zeros in
5208/// the fractional component are stripped — `12:00:00.500000`
5209/// renders as `12:00:00.5` to match PG's text output.
5210pub fn format_time(us: i64) -> String {
5211    let total_secs = us.div_euclid(1_000_000);
5212    let frac = us.rem_euclid(1_000_000);
5213    let hh = total_secs / 3600;
5214    let mm = (total_secs / 60) % 60;
5215    let ss = total_secs % 60;
5216    if frac == 0 {
5217        format!("{hh:02}:{mm:02}:{ss:02}")
5218    } else {
5219        let raw = format!("{frac:06}");
5220        let trimmed = raw.trim_end_matches('0');
5221        format!("{hh:02}:{mm:02}:{ss:02}.{trimmed}")
5222    }
5223}
5224
5225pub fn format_timestamp(micros: i64) -> String {
5226    const MICROS_PER_DAY: i64 = 86_400_000_000;
5227    // Split into day + intra-day part with proper floor division so
5228    // negative timestamps render right too.
5229    let days = micros.div_euclid(MICROS_PER_DAY);
5230    let day_micros = micros.rem_euclid(MICROS_PER_DAY);
5231    let day_i32 = i32::try_from(days).unwrap_or(i32::MAX);
5232    let (y, m, d) = civil_from_days(day_i32);
5233    let secs = day_micros / 1_000_000;
5234    let frac = day_micros % 1_000_000;
5235    let hh = secs / 3600;
5236    let mm = (secs / 60) % 60;
5237    let ss = secs % 60;
5238    if frac == 0 {
5239        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}")
5240    } else {
5241        // Strip trailing zeros from the 6-digit fractional component.
5242        let raw = format!("{frac:06}");
5243        let trimmed = raw.trim_end_matches('0');
5244        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}.{trimmed}")
5245    }
5246}
5247
5248/// Howard Hinnant's `civil_from_days` — converts days since the Unix
5249/// epoch back to a proleptic-Gregorian (year, month, day) triple. Both
5250/// directions of this calendar conversion live in `eval.rs` so the
5251/// engine never reaches for `std` time facilities.
5252#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5253fn civil_from_days(days: i32) -> (i32, u32, u32) {
5254    let z = i64::from(days) + 719_468;
5255    let era = z.div_euclid(146_097);
5256    // doe ∈ [0, 146_097); fits in u32 with room to spare. Same for
5257    // every other quantity below — `as u32` truncations are safe by
5258    // construction.
5259    let doe = (z - era * 146_097) as u32;
5260    let yoe = (doe.saturating_sub(doe / 1460) + doe / 36524 - doe / 146_096) / 365;
5261    let y_base = i64::from(yoe) + era * 400;
5262    let doy = doe.saturating_sub(365 * yoe + yoe / 4 - yoe / 100);
5263    let mp = (5 * doy + 2) / 153;
5264    let d = doy.saturating_sub((153 * mp + 2) / 5) + 1;
5265    let m = if mp < 10 { mp + 3 } else { mp - 9 };
5266    let y = if m <= 2 { y_base + 1 } else { y_base };
5267    (y as i32, m, d)
5268}
5269
5270/// Inverse of `civil_from_days` — converts (year, month, day) to days
5271/// since 1970-01-01. Out-of-range months / days saturate.
5272#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5273pub fn days_from_civil(y: i32, m: u32, d: u32) -> i32 {
5274    let y_adj = if m <= 2 {
5275        i64::from(y) - 1
5276    } else {
5277        i64::from(y)
5278    };
5279    let era = y_adj.div_euclid(400);
5280    let yoe = (y_adj - era * 400) as u32;
5281    let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d.saturating_sub(1);
5282    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
5283    let total = era * 146_097 + i64::from(doe) - 719_468;
5284    i32::try_from(total).unwrap_or(i32::MAX)
5285}
5286
5287/// Parse `YYYY-MM-DD` into a `Date` (days since Unix epoch). Returns
5288/// `None` on shape / numeric failure; the engine surfaces that as a
5289/// `TypeMismatch` with the original text included.
5290pub fn parse_date_literal(s: &str) -> Option<i32> {
5291    let bytes = s.as_bytes();
5292    if bytes.len() != 10 || bytes[4] != b'-' || bytes[7] != b'-' {
5293        return None;
5294    }
5295    let y: i32 = s[0..4].parse().ok()?;
5296    let m: u32 = s[5..7].parse().ok()?;
5297    let d: u32 = s[8..10].parse().ok()?;
5298    if !(1..=12).contains(&m) || !(1..=31).contains(&d) {
5299        return None;
5300    }
5301    Some(days_from_civil(y, m, d))
5302}
5303
5304/// Parse `YYYY-MM-DD[ HH:MM:SS[.ffffff]]` into a `Timestamp`
5305/// (microseconds since Unix epoch). The time portion is optional;
5306/// missing → midnight. The fractional portion accepts 1–6 digits and
5307/// pads with zeros to microseconds.
5308pub fn parse_timestamp_literal(s: &str) -> Option<i64> {
5309    let trimmed = s.trim();
5310    let (date_part, time_part) = match trimmed.find([' ', 'T']) {
5311        Some(i) => (&trimmed[..i], Some(&trimmed[i + 1..])),
5312        None => (trimmed, None),
5313    };
5314    let days = parse_date_literal(date_part)?;
5315    let (day_micros, tz_offset_micros) = match time_part {
5316        None => (0, 0),
5317        Some(t) => parse_time_of_day_micros(t)?,
5318    };
5319    // PG semantics: a TIMESTAMPTZ literal with an explicit offset
5320    // is normalised to UTC for storage. `'12:00:00+09'` means
5321    // 12:00:00 in a UTC+09 zone → 03:00:00 UTC → subtract the
5322    // positive offset (or add the negative one). Storage is i64
5323    // microseconds UTC for both TIMESTAMP and TIMESTAMPTZ (see
5324    // spg-storage::DataType::Timestamptz docs); the wire-level
5325    // round-trip then re-applies the session timezone on the
5326    // SELECT side when format_timestamp is asked for a TZ-aware
5327    // render.
5328    Some(i64::from(days) * 86_400_000_000 + day_micros - tz_offset_micros)
5329}
5330
5331/// v7.15.0 — Parse `HH:MM:SS[.frac][<tz>]` and return
5332/// `(day_micros, tz_offset_micros)` where `day_micros` is the
5333/// local-clock seconds-of-day in microseconds and
5334/// `tz_offset_micros` is the UTC offset (positive = east of
5335/// UTC, negative = west). Caller subtracts the offset to
5336/// normalise to UTC. PG's recognised TZ shapes after the
5337/// seconds (or frac) part:
5338///   * `+OO[:MM]` / `-OO[:MM]` — numeric offset
5339///   * `+OOMM` / `-OOMM` (no colon, less common but legal)
5340///   * ` UTC` / `UTC` / `Z` — explicit zero offset
5341/// Anything else after the seconds = parse failure (the caller
5342/// surfaces as "cannot parse … as TIMESTAMP").
5343fn parse_time_of_day_micros(t: &str) -> Option<(i64, i64)> {
5344    let t = t.trim();
5345    // Detect & strip optional TZ suffix. Anchor on the first
5346    // `+` / `-` AFTER position 8 (so the leading sign on a
5347    // negative offset can't be mistaken for an `HH:MM:SS-OO`
5348    // boundary if the time itself is somehow malformed).
5349    // ` UTC` and trailing `Z` also count as zero-offset TZ tags.
5350    let (core, tz_micros) = if let Some(rest) = t.strip_suffix('Z') {
5351        (rest, 0i64)
5352    } else if let Some(rest) = t.strip_suffix(" UTC").or_else(|| t.strip_suffix("UTC")) {
5353        (rest, 0i64)
5354    } else if let Some((idx, sign_byte)) = find_offset_sign(t) {
5355        let suffix = &t[idx..];
5356        let micros = parse_tz_offset_suffix(suffix, sign_byte == b'+')?;
5357        (&t[..idx], micros)
5358    } else {
5359        (t, 0i64)
5360    };
5361    let (time, frac_str) = match core.split_once('.') {
5362        Some((a, b)) => (a, Some(b)),
5363        None => (core, None),
5364    };
5365    let bytes = time.as_bytes();
5366    if bytes.len() != 8 || bytes[2] != b':' || bytes[5] != b':' {
5367        return None;
5368    }
5369    let hh: i64 = time[0..2].parse().ok()?;
5370    let mm: i64 = time[3..5].parse().ok()?;
5371    let ss: i64 = time[6..8].parse().ok()?;
5372    if !(0..24).contains(&hh) || !(0..60).contains(&mm) || !(0..60).contains(&ss) {
5373        return None;
5374    }
5375    let frac_micros: i64 = match frac_str {
5376        None => 0,
5377        Some(f) => {
5378            // Pad right with zeros to 6 digits, then truncate extras.
5379            if f.is_empty() || f.len() > 9 {
5380                return None;
5381            }
5382            let mut padded = String::with_capacity(6);
5383            padded.push_str(&f[..f.len().min(6)]);
5384            while padded.len() < 6 {
5385                padded.push('0');
5386            }
5387            padded.parse().ok()?
5388        }
5389    };
5390    Some((
5391        ((hh * 3600 + mm * 60 + ss) * 1_000_000) + frac_micros,
5392        tz_micros,
5393    ))
5394}
5395
5396/// Find the index of the TZ-offset sign byte (`+` or `-`) that
5397/// terminates an `HH:MM:SS[.fff]` time string, or `None` when
5398/// the time carries no numeric TZ suffix. Anchors past the first
5399/// 8 bytes (`HH:MM:SS`) so the seconds/minutes colons don't
5400/// confuse the scan.
5401fn find_offset_sign(t: &str) -> Option<(usize, u8)> {
5402    let bytes = t.as_bytes();
5403    // Start past `HH:MM:SS` (8 bytes).
5404    if bytes.len() < 9 {
5405        return None;
5406    }
5407    for i in 8..bytes.len() {
5408        match bytes[i] {
5409            b'+' | b'-' => return Some((i, bytes[i])),
5410            _ => {}
5411        }
5412    }
5413    None
5414}
5415
5416/// Parse `+OO`, `+OO:MM`, `+OOMM`, `-OO`, `-OO:MM`, `-OOMM` into
5417/// a UTC-offset microsecond delta. `is_positive` reflects the
5418/// already-stripped sign.
5419fn parse_tz_offset_suffix(suffix: &str, is_positive: bool) -> Option<i64> {
5420    // suffix starts with `+` or `-`; strip it.
5421    let body = &suffix[1..];
5422    let (hh, mm): (i64, i64) = if let Some((h, m)) = body.split_once(':') {
5423        (h.parse().ok()?, m.parse().ok()?)
5424    } else {
5425        match body.len() {
5426            2 => (body.parse().ok()?, 0),
5427            3 => {
5428                // PG's "+0530" form lacks the colon; but a 3-char
5429                // body is `OOM` which is ambiguous (`+053` ?). PG
5430                // doesn't emit that; reject.
5431                return None;
5432            }
5433            4 => {
5434                let h: i64 = body[0..2].parse().ok()?;
5435                let m: i64 = body[2..4].parse().ok()?;
5436                (h, m)
5437            }
5438            _ => return None,
5439        }
5440    };
5441    if !(0..=18).contains(&hh) || !(0..60).contains(&mm) {
5442        return None;
5443    }
5444    let abs = (hh * 3600 + mm * 60) * 1_000_000;
5445    Some(if is_positive { abs } else { -abs })
5446}
5447
5448/// Render an `Interval { months, micros }` in a PG-ish shape. The output
5449/// mirrors `psql`'s text format: years/months from the months part,
5450/// days/HH:MM:SS[.frac] from the microsecond part. Empty parts are
5451/// omitted; an all-zero interval renders as `0`.
5452pub fn format_interval(months: i32, micros: i64) -> String {
5453    const MICROS_PER_DAY: i64 = 86_400_000_000;
5454    let mut parts: Vec<String> = Vec::new();
5455    let years = months / 12;
5456    let mons = months % 12;
5457    // PG renders the unit in the singular only for `+1`; `-1` and any
5458    // other value pluralise. Helper closes over that rule.
5459    let unit = |n: i64, singular: &'static str, plural: &'static str| -> &'static str {
5460        if n == 1 { singular } else { plural }
5461    };
5462    if years != 0 {
5463        parts.push(format!(
5464            "{years} {}",
5465            unit(i64::from(years), "year", "years")
5466        ));
5467    }
5468    if mons != 0 {
5469        parts.push(format!("{mons} {}", unit(i64::from(mons), "mon", "mons")));
5470    }
5471    let days = micros / MICROS_PER_DAY;
5472    let mut rem = micros % MICROS_PER_DAY;
5473    if days != 0 {
5474        parts.push(format!("{days} {}", unit(days, "day", "days")));
5475    }
5476    if rem != 0 {
5477        let neg = rem < 0;
5478        if neg {
5479            rem = -rem;
5480        }
5481        let secs = rem / 1_000_000;
5482        let frac = rem % 1_000_000;
5483        let hh = secs / 3600;
5484        let mm = (secs / 60) % 60;
5485        let ss = secs % 60;
5486        let sign = if neg { "-" } else { "" };
5487        if frac == 0 {
5488            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}"));
5489        } else {
5490            let raw = format!("{frac:06}");
5491            let trimmed = raw.trim_end_matches('0');
5492            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}.{trimmed}"));
5493        }
5494    }
5495    if parts.is_empty() {
5496        "0".into()
5497    } else {
5498        parts.join(" ")
5499    }
5500}
5501
5502/// Add `months` (signed) to a `(year, month, day)` triple using PG's
5503/// clamp-to-last-day rule (so `'2024-01-31' + 1 month` → `'2024-02-29'`).
5504fn add_months_to_civil(y: i32, m: u32, d: u32, months: i32) -> (i32, u32, u32) {
5505    let total_months = i64::from(y) * 12 + i64::from(m) - 1 + i64::from(months);
5506    let new_year = i32::try_from(total_months.div_euclid(12)).unwrap_or(i32::MAX);
5507    let new_month_zero = total_months.rem_euclid(12);
5508    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5509    let new_month = (new_month_zero as u32) + 1;
5510    let max_day = days_in_month(new_year, new_month);
5511    (new_year, new_month, d.min(max_day))
5512}
5513
5514const fn days_in_month(y: i32, m: u32) -> u32 {
5515    match m {
5516        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
5517        2 => {
5518            // Proleptic Gregorian leap rule.
5519            if y.rem_euclid(4) == 0 && (y.rem_euclid(100) != 0 || y.rem_euclid(400) == 0) {
5520                29
5521            } else {
5522                28
5523            }
5524        }
5525        // 4 / 6 / 9 / 11 plus any out-of-range month (callers normalise
5526        // first, but be defensive) get the 30-day fallback.
5527        _ => 30,
5528    }
5529}
5530
5531/// v7.10.9 — render a TEXT[] in PG's external array form
5532/// (`{a,b,NULL}`). Elements containing whitespace, commas,
5533/// quotes, or braces get double-quoted with `\\` / `\"` escapes.
5534/// NULL elements use the literal token `NULL`. Public so the
5535/// wire layer can produce the canonical text-mode encoding.
5536pub fn format_text_array(items: &[Option<String>]) -> String {
5537    let mut out = String::with_capacity(2 + items.len() * 8);
5538    out.push('{');
5539    for (i, item) in items.iter().enumerate() {
5540        if i > 0 {
5541            out.push(',');
5542        }
5543        match item {
5544            None => out.push_str("NULL"),
5545            Some(s) => {
5546                let needs_quote = s.is_empty()
5547                    || s.eq_ignore_ascii_case("NULL")
5548                    || s.chars()
5549                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
5550                if needs_quote {
5551                    out.push('"');
5552                    for c in s.chars() {
5553                        if c == '"' || c == '\\' {
5554                            out.push('\\');
5555                        }
5556                        out.push(c);
5557                    }
5558                    out.push('"');
5559                } else {
5560                    out.push_str(s);
5561                }
5562            }
5563        }
5564    }
5565    out.push('}');
5566    out
5567}
5568
5569/// v7.11.14 — render an INT[] in PG's external array form
5570/// (`{1,2,NULL}`). Integer payloads never need quoting. NULL
5571/// elements use the literal token `NULL`.
5572pub fn format_int_array(items: &[Option<i32>]) -> String {
5573    let mut out = String::with_capacity(2 + items.len() * 4);
5574    out.push('{');
5575    for (i, item) in items.iter().enumerate() {
5576        if i > 0 {
5577            out.push(',');
5578        }
5579        match item {
5580            None => out.push_str("NULL"),
5581            Some(n) => out.push_str(&n.to_string()),
5582        }
5583    }
5584    out.push('}');
5585    out
5586}
5587
5588/// v7.11.14 — render a BIGINT[] in PG's external array form
5589/// (`{1,2,NULL}`).
5590pub fn format_bigint_array(items: &[Option<i64>]) -> String {
5591    let mut out = String::with_capacity(2 + items.len() * 6);
5592    out.push('{');
5593    for (i, item) in items.iter().enumerate() {
5594        if i > 0 {
5595            out.push(',');
5596        }
5597        match item {
5598            None => out.push_str("NULL"),
5599            Some(n) => out.push_str(&n.to_string()),
5600        }
5601    }
5602    out.push('}');
5603    out
5604}
5605
5606/// v7.12.0 — render a `tsvector` in PG's external form:
5607/// `'lex':1,2A 'word':3` (single-quoted lexemes, optional
5608/// `:positions`, optional weight letter `A/B/C/D` per position).
5609/// Lexemes already arrive sorted + deduped from the engine. Used
5610/// by the wire layer (OID 3614) and by SELECT-text output.
5611pub fn format_tsvector(lexs: &[TsLexeme]) -> String {
5612    let mut out = String::with_capacity(lexs.len() * 12);
5613    for (i, l) in lexs.iter().enumerate() {
5614        if i > 0 {
5615            out.push(' ');
5616        }
5617        out.push('\'');
5618        for c in l.word.chars() {
5619            if c == '\'' {
5620                out.push('\'');
5621            }
5622            out.push(c);
5623        }
5624        out.push('\'');
5625        if !l.positions.is_empty() {
5626            for (pi, p) in l.positions.iter().enumerate() {
5627                out.push(if pi == 0 { ':' } else { ',' });
5628                out.push_str(&p.to_string());
5629            }
5630            // v7.12.0 — weight is per-lexeme (the v7.12 design
5631            // collapses PG's per-position weight into one letter).
5632            // Emit once after the last position; default `D`
5633            // (weight=0) stays implicit.
5634            match l.weight {
5635                3 => out.push('A'),
5636                2 => out.push('B'),
5637                1 => out.push('C'),
5638                _ => {}
5639            }
5640        }
5641    }
5642    out
5643}
5644
5645/// v7.12.0 — render a `tsquery` in PG's external form. Operator
5646/// precedence: `!` > `&` > `|`. Phrase distance shown as `<N>`.
5647pub fn format_tsquery(ast: &TsQueryAst) -> String {
5648    fn go(ast: &TsQueryAst, parent_prec: u8, out: &mut String) {
5649        // 0 = top, 1 = OR, 2 = AND, 3 = NOT/Phrase, 4 = atom.
5650        let (own_prec, write_self): (u8, &dyn Fn(&mut String)) = match ast {
5651            TsQueryAst::Or(_, _) => (1, &|_| {}),
5652            TsQueryAst::And(_, _) | TsQueryAst::Phrase { .. } => (2, &|_| {}),
5653            TsQueryAst::Not(_) => (3, &|_| {}),
5654            TsQueryAst::Term { .. } => (4, &|_| {}),
5655        };
5656        let need_parens = own_prec < parent_prec;
5657        if need_parens {
5658            out.push('(');
5659        }
5660        match ast {
5661            TsQueryAst::Term { word, .. } => {
5662                out.push('\'');
5663                for c in word.chars() {
5664                    if c == '\'' {
5665                        out.push('\'');
5666                    }
5667                    out.push(c);
5668                }
5669                out.push('\'');
5670            }
5671            TsQueryAst::And(a, b) => {
5672                go(a, own_prec, out);
5673                out.push_str(" & ");
5674                go(b, own_prec, out);
5675            }
5676            TsQueryAst::Or(a, b) => {
5677                go(a, own_prec, out);
5678                out.push_str(" | ");
5679                go(b, own_prec, out);
5680            }
5681            TsQueryAst::Not(x) => {
5682                out.push('!');
5683                go(x, own_prec, out);
5684            }
5685            TsQueryAst::Phrase {
5686                left,
5687                right,
5688                distance,
5689            } => {
5690                go(left, own_prec, out);
5691                out.push_str(&alloc::format!(" <{distance}> "));
5692                go(right, own_prec, out);
5693            }
5694        }
5695        write_self(out);
5696        if need_parens {
5697            out.push(')');
5698        }
5699    }
5700    let mut out = String::new();
5701    go(ast, 0, &mut out);
5702    out
5703}
5704
5705/// v7.12.0 — decode PG external form `'word':1,2A 'other':3` into
5706/// a `Vec<TsLexeme>`. Lexemes are sorted ascending by `word` (with
5707/// duplicates merged on positions) so the output matches the
5708/// engine invariant. Empty input yields an empty vector.
5709///
5710/// v7.12.0 only ships the cast-literal entry. Full `to_tsvector`
5711/// (Unicode word-split + Porter stemming + stopwords) lands in
5712/// v7.12.1.
5713pub fn decode_tsvector_external(s: &str) -> Result<Vec<TsLexeme>, EvalError> {
5714    let mut out: Vec<TsLexeme> = Vec::new();
5715    let mut i = 0;
5716    let bytes = s.as_bytes();
5717    while i < bytes.len() {
5718        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
5719            i += 1;
5720        }
5721        if i >= bytes.len() {
5722            break;
5723        }
5724        // Quoted form `'word'` (with embedded `''` for a literal
5725        // single quote, mirroring PG).
5726        let word = if bytes[i] == b'\'' {
5727            i += 1;
5728            let mut w = String::new();
5729            loop {
5730                if i >= bytes.len() {
5731                    return Err(EvalError::TypeMismatch {
5732                        detail: "tsvector literal: unterminated quoted lexeme".into(),
5733                    });
5734                }
5735                let b = bytes[i];
5736                if b == b'\'' {
5737                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
5738                        w.push('\'');
5739                        i += 2;
5740                    } else {
5741                        i += 1;
5742                        break;
5743                    }
5744                } else {
5745                    w.push(b as char);
5746                    i += 1;
5747                }
5748            }
5749            w
5750        } else {
5751            // Bare form — read until whitespace, ':' or end.
5752            let start = i;
5753            while i < bytes.len() && !bytes[i].is_ascii_whitespace() && bytes[i] != b':' {
5754                i += 1;
5755            }
5756            core::str::from_utf8(&bytes[start..i])
5757                .map_err(|_| EvalError::TypeMismatch {
5758                    detail: "tsvector literal: non-UTF-8 lexeme".into(),
5759                })?
5760                .to_string()
5761        };
5762        if word.is_empty() {
5763            return Err(EvalError::TypeMismatch {
5764                detail: "tsvector literal: empty lexeme".into(),
5765            });
5766        }
5767        // Optional `:pos[,pos][,pos]`. Each position is u16; each
5768        // may carry a trailing weight letter A/B/C/D.
5769        let mut positions: Vec<u16> = Vec::new();
5770        let mut weight: u8 = 0;
5771        if i < bytes.len() && bytes[i] == b':' {
5772            i += 1;
5773            loop {
5774                let start = i;
5775                while i < bytes.len() && bytes[i].is_ascii_digit() {
5776                    i += 1;
5777                }
5778                if start == i {
5779                    return Err(EvalError::TypeMismatch {
5780                        detail: "tsvector literal: expected digit after ':'".into(),
5781                    });
5782                }
5783                let num: u16 = core::str::from_utf8(&bytes[start..i])
5784                    .expect("ascii digits")
5785                    .parse()
5786                    .map_err(|_| EvalError::TypeMismatch {
5787                        detail: alloc::format!(
5788                            "tsvector literal: position {} overflows u16",
5789                            core::str::from_utf8(&bytes[start..i]).unwrap_or("?")
5790                        ),
5791                    })?;
5792                positions.push(num);
5793                if i < bytes.len() {
5794                    let w = bytes[i];
5795                    if matches!(w, b'A' | b'B' | b'C' | b'D') {
5796                        weight = match w {
5797                            b'A' => 3,
5798                            b'B' => 2,
5799                            b'C' => 1,
5800                            _ => 0,
5801                        };
5802                        i += 1;
5803                    }
5804                }
5805                if i < bytes.len() && bytes[i] == b',' {
5806                    i += 1;
5807                    continue;
5808                }
5809                break;
5810            }
5811        }
5812        positions.sort_unstable();
5813        positions.dedup();
5814        // Merge into the output vector — sorted insert by word,
5815        // duplicate words merge positions.
5816        match out.binary_search_by(|l| l.word.as_str().cmp(word.as_str())) {
5817            Ok(idx) => {
5818                for p in positions {
5819                    if !out[idx].positions.contains(&p) {
5820                        out[idx].positions.push(p);
5821                    }
5822                }
5823                out[idx].positions.sort_unstable();
5824                if weight != 0 {
5825                    out[idx].weight = weight;
5826                }
5827            }
5828            Err(idx) => {
5829                out.insert(
5830                    idx,
5831                    TsLexeme {
5832                        word,
5833                        positions,
5834                        weight,
5835                    },
5836                );
5837            }
5838        }
5839    }
5840    Ok(out)
5841}
5842
5843/// v7.12.0 — decode PG external form `'foo' & 'bar' | !'baz'`
5844/// into a `TsQueryAst`. v7.12.0 supports the canonical
5845/// `to_tsquery` surface: single-quoted lexemes, `&` / `|` / `!`,
5846/// parens, and phrase `<N>`. Bare lexemes are accepted too. Full
5847/// `plainto_tsquery` / `websearch_to_tsquery` arrive in v7.12.1.
5848pub fn decode_tsquery_external(s: &str) -> Result<TsQueryAst, EvalError> {
5849    let mut p = TsQueryParser {
5850        bytes: s.as_bytes(),
5851        pos: 0,
5852    };
5853    p.skip_ws();
5854    if p.pos >= p.bytes.len() {
5855        return Err(EvalError::TypeMismatch {
5856            detail: "tsquery literal: empty".into(),
5857        });
5858    }
5859    let ast = p.parse_or()?;
5860    p.skip_ws();
5861    if p.pos < p.bytes.len() {
5862        return Err(EvalError::TypeMismatch {
5863            detail: alloc::format!("tsquery literal: trailing garbage at offset {}", p.pos),
5864        });
5865    }
5866    Ok(ast)
5867}
5868
5869struct TsQueryParser<'a> {
5870    bytes: &'a [u8],
5871    pos: usize,
5872}
5873
5874impl<'a> TsQueryParser<'a> {
5875    fn skip_ws(&mut self) {
5876        while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_whitespace() {
5877            self.pos += 1;
5878        }
5879    }
5880    fn peek(&self) -> Option<u8> {
5881        self.bytes.get(self.pos).copied()
5882    }
5883    fn parse_or(&mut self) -> Result<TsQueryAst, EvalError> {
5884        let mut lhs = self.parse_and()?;
5885        loop {
5886            self.skip_ws();
5887            if self.peek() != Some(b'|') {
5888                return Ok(lhs);
5889            }
5890            self.pos += 1;
5891            let rhs = self.parse_and()?;
5892            lhs = TsQueryAst::Or(Box::new(lhs), Box::new(rhs));
5893        }
5894    }
5895    fn parse_and(&mut self) -> Result<TsQueryAst, EvalError> {
5896        let mut lhs = self.parse_unary()?;
5897        loop {
5898            self.skip_ws();
5899            match self.peek() {
5900                Some(b'&') => {
5901                    self.pos += 1;
5902                    let rhs = self.parse_unary()?;
5903                    lhs = TsQueryAst::And(Box::new(lhs), Box::new(rhs));
5904                }
5905                Some(b'<') => {
5906                    // Phrase distance `<N>`.
5907                    self.pos += 1;
5908                    let start = self.pos;
5909                    while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
5910                        self.pos += 1;
5911                    }
5912                    if start == self.pos || self.peek() != Some(b'>') {
5913                        return Err(EvalError::TypeMismatch {
5914                            detail: "tsquery literal: malformed <N> phrase operator".into(),
5915                        });
5916                    }
5917                    let n: u16 = core::str::from_utf8(&self.bytes[start..self.pos])
5918                        .expect("ascii digits")
5919                        .parse()
5920                        .map_err(|_| EvalError::TypeMismatch {
5921                            detail: "tsquery literal: phrase distance overflows u16".into(),
5922                        })?;
5923                    self.pos += 1; // consume '>'
5924                    let rhs = self.parse_unary()?;
5925                    lhs = TsQueryAst::Phrase {
5926                        left: Box::new(lhs),
5927                        right: Box::new(rhs),
5928                        distance: n,
5929                    };
5930                }
5931                _ => return Ok(lhs),
5932            }
5933        }
5934    }
5935    fn parse_unary(&mut self) -> Result<TsQueryAst, EvalError> {
5936        self.skip_ws();
5937        if self.peek() == Some(b'!') {
5938            self.pos += 1;
5939            let inner = self.parse_unary()?;
5940            return Ok(TsQueryAst::Not(Box::new(inner)));
5941        }
5942        self.parse_atom()
5943    }
5944    fn parse_atom(&mut self) -> Result<TsQueryAst, EvalError> {
5945        self.skip_ws();
5946        match self.peek() {
5947            Some(b'(') => {
5948                self.pos += 1;
5949                let inner = self.parse_or()?;
5950                self.skip_ws();
5951                if self.peek() != Some(b')') {
5952                    return Err(EvalError::TypeMismatch {
5953                        detail: "tsquery literal: missing ')'".into(),
5954                    });
5955                }
5956                self.pos += 1;
5957                Ok(inner)
5958            }
5959            Some(b'\'') => {
5960                self.pos += 1;
5961                let mut w = String::new();
5962                loop {
5963                    match self.peek() {
5964                        None => {
5965                            return Err(EvalError::TypeMismatch {
5966                                detail: "tsquery literal: unterminated quoted lexeme".into(),
5967                            });
5968                        }
5969                        Some(b'\'') => {
5970                            if self.bytes.get(self.pos + 1) == Some(&b'\'') {
5971                                w.push('\'');
5972                                self.pos += 2;
5973                            } else {
5974                                self.pos += 1;
5975                                break;
5976                            }
5977                        }
5978                        Some(b) => {
5979                            w.push(b as char);
5980                            self.pos += 1;
5981                        }
5982                    }
5983                }
5984                // Optional `:WEIGHT_MASK` (digit-mask) — v7.12.0
5985                // accepts but always stores 0 (any).
5986                self.skip_weight_suffix();
5987                Ok(TsQueryAst::Term {
5988                    word: w,
5989                    weight_mask: 0,
5990                })
5991            }
5992            Some(b) if b.is_ascii_alphanumeric() || b == b'_' => {
5993                let start = self.pos;
5994                while self.pos < self.bytes.len() {
5995                    let c = self.bytes[self.pos];
5996                    if c.is_ascii_alphanumeric() || c == b'_' {
5997                        self.pos += 1;
5998                    } else {
5999                        break;
6000                    }
6001                }
6002                let w = core::str::from_utf8(&self.bytes[start..self.pos])
6003                    .map_err(|_| EvalError::TypeMismatch {
6004                        detail: "tsquery literal: non-UTF-8 lexeme".into(),
6005                    })?
6006                    .to_string();
6007                self.skip_weight_suffix();
6008                Ok(TsQueryAst::Term {
6009                    word: w,
6010                    weight_mask: 0,
6011                })
6012            }
6013            Some(b) => Err(EvalError::TypeMismatch {
6014                detail: alloc::format!(
6015                    "tsquery literal: unexpected byte {:?} at offset {}",
6016                    b as char,
6017                    self.pos
6018                ),
6019            }),
6020            None => Err(EvalError::TypeMismatch {
6021                detail: "tsquery literal: expected term".into(),
6022            }),
6023        }
6024    }
6025    fn skip_weight_suffix(&mut self) {
6026        if self.peek() != Some(b':') {
6027            return;
6028        }
6029        self.pos += 1;
6030        while let Some(b) = self.peek() {
6031            if matches!(
6032                b,
6033                b'A' | b'B' | b'C' | b'D' | b'a' | b'b' | b'c' | b'd' | b'*'
6034            ) || b.is_ascii_digit()
6035            {
6036                self.pos += 1;
6037            } else {
6038                break;
6039            }
6040        }
6041    }
6042}
6043
6044/// v7.10.4 — render a BYTEA payload in PG's hex output format
6045/// (`\x` prefix, lowercase hex pairs). Public so the wire layer
6046/// can emit the canonical bytea-as-text representation.
6047pub fn format_bytea_hex(b: &[u8]) -> String {
6048    let mut out = String::with_capacity(2 + 2 * b.len());
6049    out.push_str("\\x");
6050    const HEX: &[u8; 16] = b"0123456789abcdef";
6051    for byte in b {
6052        out.push(HEX[(byte >> 4) as usize] as char);
6053        out.push(HEX[(byte & 0x0F) as usize] as char);
6054    }
6055    out
6056}
6057
6058/// Render a `Numeric { scaled, scale }` as its decimal text form.
6059/// Negative `scaled` prepends `-` to the absolute value's digits; the
6060/// integer / fractional split is by character count, padding the
6061/// fractional side with leading zeros to exactly `scale` chars.
6062pub fn format_numeric(scaled: i128, scale: u8) -> String {
6063    if scale == 0 {
6064        return format!("{scaled}");
6065    }
6066    let negative = scaled < 0;
6067    let mag_str = scaled.unsigned_abs().to_string();
6068    let mag_bytes = mag_str.as_bytes();
6069    let scale_u = scale as usize;
6070    let mut out = String::with_capacity(mag_str.len() + 3);
6071    if negative {
6072        out.push('-');
6073    }
6074    if mag_bytes.len() <= scale_u {
6075        out.push('0');
6076        out.push('.');
6077        for _ in mag_bytes.len()..scale_u {
6078            out.push('0');
6079        }
6080        out.push_str(&mag_str);
6081    } else {
6082        let split = mag_bytes.len() - scale_u;
6083        out.push_str(&mag_str[..split]);
6084        out.push('.');
6085        out.push_str(&mag_str[split..]);
6086    }
6087    out
6088}
6089
6090fn cast_numeric_to_int(v: Value) -> Result<Value, EvalError> {
6091    match v {
6092        Value::Int(n) => Ok(Value::Int(n)),
6093        Value::BigInt(n) => i32::try_from(n)
6094            .map(Value::Int)
6095            .map_err(|_| EvalError::TypeMismatch {
6096                detail: format!("bigint {n} does not fit in int"),
6097            }),
6098        #[allow(clippy::cast_possible_truncation)]
6099        Value::Float(x) => Ok(Value::Int(x as i32)),
6100        Value::Text(s) => {
6101            s.trim()
6102                .parse::<i32>()
6103                .map(Value::Int)
6104                .map_err(|_| EvalError::TypeMismatch {
6105                    detail: format!("cannot parse {s:?} as int"),
6106                })
6107        }
6108        Value::Bool(b) => Ok(Value::Int(i32::from(b))),
6109        other => Err(EvalError::TypeMismatch {
6110            detail: format!("cannot cast {:?} to int", other.data_type()),
6111        }),
6112    }
6113}
6114
6115fn cast_numeric_to_bigint(v: Value) -> Result<Value, EvalError> {
6116    match v {
6117        Value::Int(n) => Ok(Value::BigInt(i64::from(n))),
6118        Value::BigInt(n) => Ok(Value::BigInt(n)),
6119        #[allow(clippy::cast_possible_truncation)]
6120        Value::Float(x) => Ok(Value::BigInt(x as i64)),
6121        Value::Text(s) => {
6122            s.trim()
6123                .parse::<i64>()
6124                .map(Value::BigInt)
6125                .map_err(|_| EvalError::TypeMismatch {
6126                    detail: format!("cannot parse {s:?} as bigint"),
6127                })
6128        }
6129        Value::Bool(b) => Ok(Value::BigInt(i64::from(b))),
6130        other => Err(EvalError::TypeMismatch {
6131            detail: format!("cannot cast {:?} to bigint", other.data_type()),
6132        }),
6133    }
6134}
6135
6136fn cast_numeric_to_float(v: Value) -> Result<Value, EvalError> {
6137    match v {
6138        Value::Int(n) => Ok(Value::Float(f64::from(n))),
6139        #[allow(clippy::cast_precision_loss)]
6140        Value::BigInt(n) => Ok(Value::Float(n as f64)),
6141        Value::Float(x) => Ok(Value::Float(x)),
6142        Value::Text(s) => {
6143            s.trim()
6144                .parse::<f64>()
6145                .map(Value::Float)
6146                .map_err(|_| EvalError::TypeMismatch {
6147                    detail: format!("cannot parse {s:?} as float"),
6148                })
6149        }
6150        other => Err(EvalError::TypeMismatch {
6151            detail: format!("cannot cast {:?} to float", other.data_type()),
6152        }),
6153    }
6154}
6155
6156fn cast_to_bool(v: Value) -> Result<Value, EvalError> {
6157    match v {
6158        Value::Bool(b) => Ok(Value::Bool(b)),
6159        Value::Int(n) => Ok(Value::Bool(n != 0)),
6160        Value::BigInt(n) => Ok(Value::Bool(n != 0)),
6161        Value::Text(s) => {
6162            let lo = s.trim().to_ascii_lowercase();
6163            match lo.as_str() {
6164                "true" | "t" | "yes" | "y" | "1" | "on" => Ok(Value::Bool(true)),
6165                "false" | "f" | "no" | "n" | "0" | "off" => Ok(Value::Bool(false)),
6166                _ => Err(EvalError::TypeMismatch {
6167                    detail: format!("cannot parse {s:?} as bool"),
6168                }),
6169            }
6170        }
6171        other => Err(EvalError::TypeMismatch {
6172            detail: format!("cannot cast {:?} to bool", other.data_type()),
6173        }),
6174    }
6175}
6176
6177/// Parse a `Value::Text("[1.0, 2.0, 3.0]")` into a `Value::Vector(..)`. Mirrors
6178/// pgvector's `'[..]'::vector` cast. NULL casts as NULL.
6179pub fn cast_to_vector(v: Value) -> Result<Value, EvalError> {
6180    match v {
6181        Value::Null => Ok(Value::Null),
6182        Value::Vector(v) => Ok(Value::Vector(v)),
6183        Value::Text(s) => parse_vector_text(&s)
6184            .map(Value::Vector)
6185            .ok_or(EvalError::TypeMismatch {
6186                detail: format!("cannot parse {s:?} as a vector literal"),
6187            }),
6188        other => Err(EvalError::TypeMismatch {
6189            detail: format!("::vector requires text input, got {:?}", other.data_type()),
6190        }),
6191    }
6192}
6193
6194/// Parse `"[1.0, 2.0, -3]"` into `Vec<f32>`. Returns `None` on malformed input.
6195pub fn parse_vector_text(s: &str) -> Option<Vec<f32>> {
6196    let trimmed = s.trim();
6197    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
6198    let trimmed_inner = inner.trim();
6199    if trimmed_inner.is_empty() {
6200        return Some(Vec::new());
6201    }
6202    let mut out = Vec::new();
6203    for part in trimmed_inner.split(',') {
6204        let f: f32 = part.trim().parse().ok()?;
6205        out.push(f);
6206    }
6207    Some(out)
6208}
6209
6210fn literal_to_value(l: &Literal) -> Value {
6211    match l {
6212        Literal::Integer(n) => {
6213            if let Ok(small) = i32::try_from(*n) {
6214                Value::Int(small)
6215            } else {
6216                Value::BigInt(*n)
6217            }
6218        }
6219        Literal::Float(x) => Value::Float(*x),
6220        Literal::String(s) => Value::Text(s.clone()),
6221        Literal::Vector(v) => Value::Vector(v.clone()),
6222        Literal::TextArray(items) => Value::TextArray(items.clone()),
6223        Literal::IntArray(items) => Value::IntArray(items.clone()),
6224        Literal::BigIntArray(items) => Value::BigIntArray(items.clone()),
6225        Literal::Bool(b) => Value::Bool(*b),
6226        Literal::Null => Value::Null,
6227        Literal::Interval { months, micros, .. } => Value::Interval {
6228            months: *months,
6229            micros: *micros,
6230        },
6231    }
6232}
6233
6234/// v7.17.0 Phase 2.5 — look up the collation of a column reference
6235/// in the current evaluation context. Returns `None` when the
6236/// expression is not a column reference (e.g. literal / function
6237/// call) or the column can't be resolved (caller falls back to
6238/// `Collation::Binary` semantics).
6239pub(crate) fn column_collation(e: &Expr, ctx: &EvalContext<'_>) -> Option<spg_storage::Collation> {
6240    let Expr::Column(c) = e else {
6241        return None;
6242    };
6243    if let Some(q) = &c.qualifier {
6244        let composite = alloc::format!("{q}.{name}", name = c.name);
6245        if let Some(s) = ctx.columns.iter().find(|s| s.name == composite) {
6246            return Some(s.collation);
6247        }
6248    }
6249    if let Some(s) = ctx.columns.iter().find(|s| s.name == c.name) {
6250        return Some(s.collation);
6251    }
6252    // Bare-name fallback for joined schemas (same shape as
6253    // resolve_column): match a single composite ending in
6254    // ".<name>".
6255    let suffix = alloc::format!(".{name}", name = c.name);
6256    let mut matches = ctx.columns.iter().filter(|s| s.name.ends_with(&suffix));
6257    let first = matches.next();
6258    let extra = matches.next();
6259    match (first, extra) {
6260        (Some(s), None) => Some(s.collation),
6261        _ => None,
6262    }
6263}
6264
6265/// v7.17.0 Phase 2.5 — if the comparison op is text-equality and
6266/// either operand references a CaseInsensitive column, return
6267/// ASCII-folded copies of both Text values; otherwise pass
6268/// through. Only Eq / NotEq / Lt / LtEq / Gt / GtEq trigger the
6269/// fold — relational operators on text still honour collation
6270/// the same way (PG semantics). Non-Text values pass through.
6271fn collation_fold_for_compare(
6272    op: BinOp,
6273    lhs: &Expr,
6274    rhs: &Expr,
6275    l: Value,
6276    r: Value,
6277    ctx: &EvalContext<'_>,
6278) -> (Value, Value) {
6279    if !matches!(
6280        op,
6281        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
6282    ) {
6283        return (l, r);
6284    }
6285    let lhs_col = column_collation(lhs, ctx);
6286    let rhs_col = column_collation(rhs, ctx);
6287    let ci = matches!(lhs_col, Some(spg_storage::Collation::CaseInsensitive))
6288        || matches!(rhs_col, Some(spg_storage::Collation::CaseInsensitive));
6289    if !ci {
6290        return (l, r);
6291    }
6292    let fold = |v: Value| match v {
6293        Value::Text(s) => Value::Text(s.to_ascii_lowercase()),
6294        other => other,
6295    };
6296    (fold(l), fold(r))
6297}
6298
6299fn resolve_column(c: &ColumnName, row: &Row, ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
6300    if let Some(q) = &c.qualifier {
6301        // Multi-table evaluation (joins): the synthesised schema uses
6302        // composite column names "alias.column" so we look that up
6303        // directly. Falls back to the single-table case below if the
6304        // composite isn't present.
6305        let composite = alloc::format!("{q}.{name}", name = c.name);
6306        if let Some(pos) = ctx.columns.iter().position(|s| s.name == composite) {
6307            return Ok(row.values[pos].clone());
6308        }
6309        let expected = ctx.table_alias.ok_or_else(|| EvalError::UnknownQualifier {
6310            qualifier: q.clone(),
6311        })?;
6312        if q != expected {
6313            return Err(EvalError::UnknownQualifier {
6314                qualifier: q.clone(),
6315            });
6316        }
6317    }
6318    if let Some(pos) = ctx.columns.iter().position(|s| s.name == c.name) {
6319        return Ok(row.values[pos].clone());
6320    }
6321    // Bare-name fallback for joined schemas: match any single composite
6322    // column ending in ".<name>"; ambiguity is an error.
6323    let suffix = alloc::format!(".{name}", name = c.name);
6324    let mut matches = ctx
6325        .columns
6326        .iter()
6327        .enumerate()
6328        .filter(|(_, s)| s.name.ends_with(&suffix));
6329    let first = matches.next();
6330    let extra = matches.next();
6331    match (first, extra) {
6332        (Some((pos, _)), None) => Ok(row.values[pos].clone()),
6333        (Some(_), Some(_)) => Err(EvalError::TypeMismatch {
6334            detail: alloc::format!("ambiguous column reference: {}", c.name),
6335        }),
6336        _ => Err(EvalError::ColumnNotFound {
6337            name: c.name.clone(),
6338        }),
6339    }
6340}
6341
6342fn apply_unary(op: UnOp, v: Value) -> Result<Value, EvalError> {
6343    match (op, v) {
6344        (_, Value::Null) => Ok(Value::Null),
6345        (UnOp::Neg, Value::Int(n)) => {
6346            n.checked_neg()
6347                .map(Value::Int)
6348                .ok_or(EvalError::TypeMismatch {
6349                    detail: "integer overflow on unary -".into(),
6350                })
6351        }
6352        (UnOp::Neg, Value::BigInt(n)) => {
6353            n.checked_neg()
6354                .map(Value::BigInt)
6355                .ok_or(EvalError::TypeMismatch {
6356                    detail: "bigint overflow on unary -".into(),
6357                })
6358        }
6359        (UnOp::Neg, Value::Float(x)) => Ok(Value::Float(-x)),
6360        (UnOp::Neg, other) => Err(EvalError::TypeMismatch {
6361            detail: format!("unary - applied to {:?}", other.data_type()),
6362        }),
6363        (UnOp::BitNot, Value::SmallInt(n)) => Ok(Value::Int(!i32::from(n))),
6364        (UnOp::BitNot, Value::Int(n)) => Ok(Value::Int(!n)),
6365        (UnOp::BitNot, Value::BigInt(n)) => Ok(Value::BigInt(!n)),
6366        (UnOp::BitNot, other) => Err(EvalError::TypeMismatch {
6367            detail: format!("cannot apply ~ to {other:?}"),
6368        }),
6369        (UnOp::Not, Value::Bool(b)) => Ok(Value::Bool(!b)),
6370        (UnOp::Not, other) => Err(EvalError::TypeMismatch {
6371            detail: format!("NOT applied to {:?}", other.data_type()),
6372        }),
6373    }
6374}
6375
6376/// v7.9.27b — true when two values are "not distinct" per PG:
6377/// both NULL counts as equal; otherwise reduces to regular Eq.
6378fn values_not_distinct(l: &Value, r: &Value) -> bool {
6379    match (l, r) {
6380        (Value::Null, Value::Null) => true,
6381        (Value::Null, _) | (_, Value::Null) => false,
6382        _ => l == r,
6383    }
6384}
6385
6386fn apply_binary(op: BinOp, l: Value, r: Value) -> Result<Value, EvalError> {
6387    // SQL three-valued logic for AND / OR with NULL is special — handle before
6388    // the general NULL-propagation rule.
6389    if let BinOp::And = op {
6390        return and_3vl(l, r);
6391    }
6392    if let BinOp::Or = op {
6393        return or_3vl(l, r);
6394    }
6395    // v7.9.27b — IS [NOT] DISTINCT FROM. NULL-safe equality:
6396    // `NULL IS NOT DISTINCT FROM NULL` → true. mailrs pg_dump.
6397    if let BinOp::IsNotDistinctFrom = op {
6398        return Ok(Value::Bool(values_not_distinct(&l, &r)));
6399    }
6400    if let BinOp::IsDistinctFrom = op {
6401        return Ok(Value::Bool(!values_not_distinct(&l, &r)));
6402    }
6403    // Everything else: any NULL operand → NULL.
6404    if l.is_null() || r.is_null() {
6405        return Ok(Value::Null);
6406    }
6407    // NUMERIC arithmetic and comparisons run in fixed-point; promote
6408    // integers to a common NUMERIC scale and stay in i128 throughout.
6409    if matches!(l, Value::Numeric { .. }) || matches!(r, Value::Numeric { .. }) {
6410        return apply_binary_numeric(op, l, r);
6411    }
6412    // Date / Timestamp arithmetic. PG semantics:
6413    //   * date + int      → date  (int is days)
6414    //   * int + date      → date
6415    //   * date - int      → date
6416    //   * date - date     → int   (days, signed)
6417    //   * timestamp - timestamp → bigint (microseconds, signed)
6418    // Other date/time math (`timestamp + int`, INTERVAL) lands later.
6419    if let Some(result) = apply_binary_calendar(op, &l, &r)? {
6420        return Ok(result);
6421    }
6422    match op {
6423        BinOp::Add => arith(l, r, i64::checked_add, |a, b| a + b, "+"),
6424        BinOp::Sub => arith(l, r, i64::checked_sub, |a, b| a - b, "-"),
6425        BinOp::Mul => arith(l, r, i64::checked_mul, |a, b| a * b, "*"),
6426        BinOp::Div => div_op(l, r),
6427        BinOp::L2Distance => l2_distance(l, r),
6428        BinOp::InnerProduct => inner_product(l, r),
6429        BinOp::CosineDistance => cosine_distance(l, r),
6430        BinOp::Concat => Ok(text_concat(&l, &r)),
6431        BinOp::BitOr => bitop(l, r, |a, b| a | b, "|"),
6432        BinOp::BitAnd => bitop(l, r, |a, b| a & b, "&"),
6433        BinOp::JsonGet => crate::json::path_get(&l, &r, false),
6434        BinOp::JsonGetText => crate::json::path_get(&l, &r, true),
6435        BinOp::JsonGetPath => crate::json::path_walk(&l, &r, false),
6436        BinOp::JsonGetPathText => crate::json::path_walk(&l, &r, true),
6437        BinOp::JsonContains => crate::json::contains(&l, &r),
6438        // v7.12.2 — `@@` match. NULL on either side → NULL; PG
6439        // accepts both orderings so we normalise.
6440        BinOp::TsMatch => ts_match(l, r),
6441        // v7.17.0 Phase 3.P0-47 — PG INET / CIDR containment + overlap.
6442        BinOp::InetContainedBy
6443        | BinOp::InetContainedByEq
6444        | BinOp::InetContains
6445        | BinOp::InetContainsEq
6446        | BinOp::InetOverlap => inet_op_bool_result(op, &l, &r),
6447        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6448            compare(op, &l, &r)
6449        }
6450        BinOp::And | BinOp::Or | BinOp::IsDistinctFrom | BinOp::IsNotDistinctFrom => {
6451            unreachable!("handled above")
6452        }
6453    }
6454}
6455
6456/// Calendar arithmetic. Returns `Some(value)` when the operand pair
6457/// is a date/time combo this function understands, `None` to let the
6458/// caller fall through to the regular numeric / text paths.
6459fn apply_binary_calendar(op: BinOp, l: &Value, r: &Value) -> Result<Option<Value>, EvalError> {
6460    let int_value = |v: &Value| -> Option<i64> {
6461        match v {
6462            Value::SmallInt(n) => Some(i64::from(*n)),
6463            Value::Int(n) => Some(i64::from(*n)),
6464            Value::BigInt(n) => Some(*n),
6465            _ => None,
6466        }
6467    };
6468    // Most-specific cases first — DATE-DATE / TS-TS subtraction before
6469    // DATE-integer subtraction, otherwise the latter swallows the
6470    // former with an `int_value(Date) = None` no-op fall-through.
6471    match (l, r) {
6472        (Value::Date(a), Value::Date(b)) if op == BinOp::Sub => {
6473            return Ok(Some(Value::BigInt(i64::from(*a) - i64::from(*b))));
6474        }
6475        (Value::Timestamp(a), Value::Timestamp(b)) if op == BinOp::Sub => {
6476            let delta = a.checked_sub(*b).ok_or(EvalError::TypeMismatch {
6477                detail: "TIMESTAMP - TIMESTAMP overflows i64 microseconds".into(),
6478            })?;
6479            return Ok(Some(Value::BigInt(delta)));
6480        }
6481        _ => {}
6482    }
6483    // INTERVAL arithmetic. PG: timestamp ± interval → timestamp,
6484    // date ± interval → date (if interval is pure days/months with no
6485    // sub-day component) else timestamp, interval ± interval → interval.
6486    if let Some(out) = apply_binary_interval(op, l, r)? {
6487        return Ok(Some(out));
6488    }
6489    match (l, r) {
6490        (Value::Date(d), other) if op == BinOp::Add => {
6491            if let Some(n) = int_value(other) {
6492                let days = i64::from(*d).saturating_add(n);
6493                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6494                    detail: "DATE + integer overflows DATE range".into(),
6495                })?;
6496                return Ok(Some(Value::Date(days32)));
6497            }
6498        }
6499        (other, Value::Date(d)) if op == BinOp::Add => {
6500            if let Some(n) = int_value(other) {
6501                let days = i64::from(*d).saturating_add(n);
6502                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6503                    detail: "integer + DATE overflows DATE range".into(),
6504                })?;
6505                return Ok(Some(Value::Date(days32)));
6506            }
6507        }
6508        (Value::Date(d), other) if op == BinOp::Sub => {
6509            if let Some(n) = int_value(other) {
6510                let days = i64::from(*d).saturating_sub(n);
6511                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6512                    detail: "DATE - integer overflows DATE range".into(),
6513                })?;
6514                return Ok(Some(Value::Date(days32)));
6515            }
6516        }
6517        _ => {}
6518    }
6519    Ok(None)
6520}
6521
6522/// INTERVAL-aware binary ops. Recognises:
6523///   timestamp ± interval → timestamp
6524///   date ± interval      → date (if interval is integral days/months only)
6525///                       → timestamp (if interval has sub-day micros)
6526///   interval ± interval  → interval
6527/// Commutative for `+`. Returns `None` for unrecognised operand pairs so
6528/// the caller can fall through.
6529pub(crate) fn apply_binary_interval(
6530    op: BinOp,
6531    l: &Value,
6532    r: &Value,
6533) -> Result<Option<Value>, EvalError> {
6534    // Normalise so the interval (if any) is always on the right for Add;
6535    // Sub stays left-handed because it isn't commutative.
6536    let (lhs, rhs, sign): (&Value, &Value, i64) = match (l, r, op) {
6537        (Value::Interval { .. }, _, BinOp::Add) => (r, l, 1),
6538        (_, Value::Interval { .. }, BinOp::Add) => (l, r, 1),
6539        (_, Value::Interval { .. }, BinOp::Sub) => (l, r, -1),
6540        _ => return Ok(None),
6541    };
6542    let Value::Interval {
6543        months: rhs_months,
6544        micros: rhs_us,
6545    } = rhs
6546    else {
6547        unreachable!("rhs guaranteed to be Interval by the match above");
6548    };
6549    let signed_months = i64::from(*rhs_months) * sign;
6550    let signed_micros = rhs_us.checked_mul(sign).ok_or(EvalError::TypeMismatch {
6551        detail: "INTERVAL micros overflows on negation".into(),
6552    })?;
6553    match lhs {
6554        Value::Timestamp(t) => Ok(Some(Value::Timestamp(add_interval_to_micros(
6555            *t,
6556            signed_months,
6557            signed_micros,
6558        )?))),
6559        Value::Date(d) => {
6560            // Date + interval stays a date when the interval has zero
6561            // sub-day microseconds; otherwise promote to TIMESTAMP at
6562            // midnight of the (months-shifted) date first.
6563            let day_aligned = signed_micros.rem_euclid(86_400_000_000) == 0;
6564            if day_aligned {
6565                let micros_per_day = 86_400_000_000_i64;
6566                let days_delta = signed_micros / micros_per_day;
6567                let shifted = shift_date_by_months(*d, signed_months)?;
6568                let new_days =
6569                    i64::from(shifted)
6570                        .checked_add(days_delta)
6571                        .ok_or(EvalError::TypeMismatch {
6572                            detail: "DATE ± INTERVAL overflows DATE range".into(),
6573                        })?;
6574                let days32 = i32::try_from(new_days).map_err(|_| EvalError::TypeMismatch {
6575                    detail: "DATE ± INTERVAL overflows DATE range".into(),
6576                })?;
6577                Ok(Some(Value::Date(days32)))
6578            } else {
6579                let base =
6580                    i64::from(*d)
6581                        .checked_mul(86_400_000_000)
6582                        .ok_or(EvalError::TypeMismatch {
6583                            detail: "DATE → TIMESTAMP lift overflows for INTERVAL math".into(),
6584                        })?;
6585                Ok(Some(Value::Timestamp(add_interval_to_micros(
6586                    base,
6587                    signed_months,
6588                    signed_micros,
6589                )?)))
6590            }
6591        }
6592        Value::Interval {
6593            months: lhs_months,
6594            micros: lhs_us,
6595        } => {
6596            let new_months = i64::from(*lhs_months)
6597                .checked_add(signed_months)
6598                .and_then(|n| i32::try_from(n).ok())
6599                .ok_or(EvalError::TypeMismatch {
6600                    detail: "INTERVAL ± INTERVAL months overflows i32".into(),
6601                })?;
6602            let new_micros = lhs_us
6603                .checked_add(signed_micros)
6604                .ok_or(EvalError::TypeMismatch {
6605                    detail: "INTERVAL ± INTERVAL micros overflows i64".into(),
6606                })?;
6607            Ok(Some(Value::Interval {
6608                months: new_months,
6609                micros: new_micros,
6610            }))
6611        }
6612        _ => Err(EvalError::TypeMismatch {
6613            detail: format!(
6614                "operator {op:?} not defined for {:?} and INTERVAL",
6615                lhs.data_type()
6616            ),
6617        }),
6618    }
6619}
6620
6621/// Shift a `Date` by a signed number of months using the PG clamp rule.
6622fn shift_date_by_months(d: i32, months: i64) -> Result<i32, EvalError> {
6623    let (y, m, day) = civil_from_days(d);
6624    let months_i32 = i32::try_from(months).map_err(|_| EvalError::TypeMismatch {
6625        detail: "INTERVAL months delta out of i32 range".into(),
6626    })?;
6627    let (ny, nm, nd) = add_months_to_civil(y, m, day, months_i32);
6628    Ok(days_from_civil(ny, nm, nd))
6629}
6630
6631/// Add (months, micros) to a `Timestamp` (microseconds since epoch).
6632/// Months part is applied through civil calendar with clamp-to-last-day;
6633/// micros part is plain i64 addition with overflow guard.
6634fn add_interval_to_micros(t: i64, months: i64, micros: i64) -> Result<i64, EvalError> {
6635    let mut out = t;
6636    if months != 0 {
6637        const MICROS_PER_DAY: i64 = 86_400_000_000;
6638        let days = out.div_euclid(MICROS_PER_DAY);
6639        let day_micros = out.rem_euclid(MICROS_PER_DAY);
6640        let day_i32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6641            detail: "TIMESTAMP day component out of i32 range for INTERVAL months math".into(),
6642        })?;
6643        let shifted_days = shift_date_by_months(day_i32, months)?;
6644        out = i64::from(shifted_days)
6645            .checked_mul(MICROS_PER_DAY)
6646            .and_then(|n| n.checked_add(day_micros))
6647            .ok_or(EvalError::TypeMismatch {
6648                detail: "TIMESTAMP ± INTERVAL months overflows i64 microseconds".into(),
6649            })?;
6650    }
6651    out.checked_add(micros).ok_or(EvalError::TypeMismatch {
6652        detail: "TIMESTAMP ± INTERVAL micros overflows i64".into(),
6653    })
6654}
6655
6656/// Dispatch for any binary op when at least one operand is NUMERIC.
6657/// Other-side integers / floats are promoted to a NUMERIC at a common
6658/// scale; all add / sub / mul / div / compare paths stay in i128.
6659#[allow(clippy::needless_pass_by_value)] // mirrors `apply_binary`'s by-value calling convention
6660fn apply_binary_numeric(op: BinOp, l: Value, r: Value) -> Result<Value, EvalError> {
6661    // Float still wins — Numeric + Float coerces both to f64 and runs
6662    // through the float path. PG demotes Numeric to float in this mix
6663    // too (the documented behaviour for `numeric + double precision`).
6664    let float_path = matches!(l, Value::Float(_)) || matches!(r, Value::Float(_));
6665    if float_path {
6666        let af = as_f64(&l)?;
6667        let bf = as_f64(&r)?;
6668        return match op {
6669            BinOp::Add => Ok(Value::Float(af + bf)),
6670            BinOp::Sub => Ok(Value::Float(af - bf)),
6671            BinOp::Mul => Ok(Value::Float(af * bf)),
6672            BinOp::Div => {
6673                if bf == 0.0 {
6674                    Err(EvalError::DivisionByZero)
6675                } else {
6676                    Ok(Value::Float(af / bf))
6677                }
6678            }
6679            BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6680                let ord = af.partial_cmp(&bf).ok_or(EvalError::TypeMismatch {
6681                    detail: "NaN in NUMERIC/Float comparison".into(),
6682                })?;
6683                Ok(Value::Bool(cmp_to_bool(op, ord)))
6684            }
6685            BinOp::Concat => Ok(text_concat(&l, &r)),
6686            other => Err(EvalError::TypeMismatch {
6687                detail: format!("operator {other:?} not defined for NUMERIC and Float"),
6688            }),
6689        };
6690    }
6691    // Promote integer ↔ numeric to a shared scale (max of both sides).
6692    let (a, sa) = numeric_or_widen(&l).ok_or_else(|| EvalError::TypeMismatch {
6693        detail: format!("NUMERIC op against non-numeric {:?}", l.data_type()),
6694    })?;
6695    let (b, sb) = numeric_or_widen(&r).ok_or_else(|| EvalError::TypeMismatch {
6696        detail: format!("NUMERIC op against non-numeric {:?}", r.data_type()),
6697    })?;
6698    match op {
6699        BinOp::Add | BinOp::Sub => {
6700            let target_scale = sa.max(sb);
6701            let lhs = rescale(a, sa, target_scale).ok_or(EvalError::TypeMismatch {
6702                detail: "NUMERIC overflow on rescale".into(),
6703            })?;
6704            let rhs = rescale(b, sb, target_scale).ok_or(EvalError::TypeMismatch {
6705                detail: "NUMERIC overflow on rescale".into(),
6706            })?;
6707            let r = match op {
6708                BinOp::Add => lhs.checked_add(rhs),
6709                BinOp::Sub => lhs.checked_sub(rhs),
6710                _ => unreachable!(),
6711            }
6712            .ok_or(EvalError::TypeMismatch {
6713                detail: "NUMERIC overflow on +/-".into(),
6714            })?;
6715            Ok(Value::Numeric {
6716                scaled: r,
6717                scale: target_scale,
6718            })
6719        }
6720        BinOp::Mul => {
6721            let scaled = a.checked_mul(b).ok_or(EvalError::TypeMismatch {
6722                detail: "NUMERIC overflow on *".into(),
6723            })?;
6724            Ok(Value::Numeric {
6725                scaled,
6726                scale: sa.saturating_add(sb),
6727            })
6728        }
6729        BinOp::Div => {
6730            if b == 0 {
6731                return Err(EvalError::DivisionByZero);
6732            }
6733            // Result scale: keep the wider operand's scale. Pre-scale
6734            // the numerator so the integer division retains that many
6735            // fractional digits. Round half-away-from-zero.
6736            let target_scale = sa.max(sb);
6737            // Numerator effective scale becomes sa + target_scale; we
6738            // bring it up to (target_scale + sb) so the divisor's scale
6739            // cancels cleanly.
6740            let bump = pow10_i128(target_scale.saturating_add(sb).saturating_sub(sa));
6741            let num = a.checked_mul(bump).ok_or(EvalError::TypeMismatch {
6742                detail: "NUMERIC overflow on / scaling".into(),
6743            })?;
6744            let half = if b >= 0 { b / 2 } else { -(b / 2) };
6745            let adj = if (num >= 0) == (b >= 0) {
6746                num + half
6747            } else {
6748                num - half
6749            };
6750            Ok(Value::Numeric {
6751                scaled: adj / b,
6752                scale: target_scale,
6753            })
6754        }
6755        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6756            let target_scale = sa.max(sb);
6757            let lhs = rescale(a, sa, target_scale).ok_or(EvalError::TypeMismatch {
6758                detail: "NUMERIC overflow on rescale".into(),
6759            })?;
6760            let rhs = rescale(b, sb, target_scale).ok_or(EvalError::TypeMismatch {
6761                detail: "NUMERIC overflow on rescale".into(),
6762            })?;
6763            Ok(Value::Bool(cmp_to_bool(op, lhs.cmp(&rhs))))
6764        }
6765        BinOp::Concat => Ok(text_concat(&l, &r)),
6766        other => Err(EvalError::TypeMismatch {
6767            detail: format!("operator {other:?} not defined for NUMERIC"),
6768        }),
6769    }
6770}
6771
6772/// Express `v` as a `(scaled_i128, scale)` pair. Plain integers come
6773/// back with `scale=0`; NUMERIC keeps its own scale. Anything else
6774/// returns `None` and the caller raises a type error.
6775fn numeric_or_widen(v: &Value) -> Option<(i128, u8)> {
6776    match v {
6777        Value::Numeric { scaled, scale } => Some((*scaled, *scale)),
6778        Value::Int(n) => Some((i128::from(*n), 0)),
6779        Value::SmallInt(n) => Some((i128::from(*n), 0)),
6780        Value::BigInt(n) => Some((i128::from(*n), 0)),
6781        _ => None,
6782    }
6783}
6784
6785fn rescale(scaled: i128, src: u8, dst: u8) -> Option<i128> {
6786    if src == dst {
6787        return Some(scaled);
6788    }
6789    if dst > src {
6790        scaled.checked_mul(pow10_i128(dst - src))
6791    } else {
6792        let drop = pow10_i128(src - dst);
6793        let half = drop / 2;
6794        let r = if scaled >= 0 {
6795            scaled + half
6796        } else {
6797            scaled - half
6798        };
6799        Some(r / drop)
6800    }
6801}
6802
6803const fn pow10_i128(p: u8) -> i128 {
6804    let mut acc: i128 = 1;
6805    let mut i = 0;
6806    while i < p {
6807        acc *= 10;
6808        i += 1;
6809    }
6810    acc
6811}
6812
6813const fn cmp_to_bool(op: BinOp, ord: core::cmp::Ordering) -> bool {
6814    use core::cmp::Ordering::{Equal, Greater, Less};
6815    match op {
6816        BinOp::Eq => matches!(ord, Equal),
6817        BinOp::NotEq => !matches!(ord, Equal),
6818        BinOp::Lt => matches!(ord, Less),
6819        BinOp::LtEq => matches!(ord, Less | Equal),
6820        BinOp::Gt => matches!(ord, Greater),
6821        BinOp::GtEq => matches!(ord, Greater | Equal),
6822        _ => false,
6823    }
6824}
6825
6826/// SQL `||` string concatenation. Operands are coerced to text via the same
6827/// rule as `::text` cast. NULL propagates (handled above; this function only
6828/// runs with non-NULL operands).
6829fn text_concat(l: &Value, r: &Value) -> Value {
6830    // v7.11.8 — PG `||` overloads: TEXT[] || TEXT[] = concatenated array;
6831    // TEXT[] || TEXT (or TEXT || TEXT[]) prepends/appends the single
6832    // element. NULL || anything = NULL (PG semantics for arrays;
6833    // text concat treats NULL the same way after value_to_text).
6834    match (l, r) {
6835        (Value::Null, _) | (_, Value::Null) => {
6836            // PG text concat: NULL || x = NULL. Array concat: NULL || x = NULL.
6837            // Keep the legacy text path (value_to_text handles Null as ""),
6838            // but for arrays we surface real NULL to match PG.
6839            if matches!(
6840                l,
6841                Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_) | Value::Bytes(_)
6842            ) || matches!(
6843                r,
6844                Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_) | Value::Bytes(_)
6845            ) {
6846                return Value::Null;
6847            }
6848        }
6849        (Value::TextArray(a), Value::TextArray(b)) => {
6850            let mut out = a.clone();
6851            out.extend(b.iter().cloned());
6852            return Value::TextArray(out);
6853        }
6854        (Value::TextArray(a), Value::Text(s)) => {
6855            let mut out = a.clone();
6856            out.push(Some(s.clone()));
6857            return Value::TextArray(out);
6858        }
6859        (Value::Text(s), Value::TextArray(b)) => {
6860            let mut out: alloc::vec::Vec<Option<alloc::string::String>> =
6861                alloc::vec::Vec::with_capacity(1 + b.len());
6862            out.push(Some(s.clone()));
6863            out.extend(b.iter().cloned());
6864            return Value::TextArray(out);
6865        }
6866        // v7.11.13 — IntArray / BigIntArray `||` overloads. Same
6867        // PG semantics as TEXT[]: array||array concatenates, and
6868        // array||scalar appends/prepends. Mixed Int/BigInt widens
6869        // to BigIntArray.
6870        (Value::IntArray(a), Value::IntArray(b)) => {
6871            let mut out = a.clone();
6872            out.extend(b.iter().copied());
6873            return Value::IntArray(out);
6874        }
6875        (Value::IntArray(a), Value::Int(n)) => {
6876            let mut out = a.clone();
6877            out.push(Some(*n));
6878            return Value::IntArray(out);
6879        }
6880        (Value::IntArray(a), Value::SmallInt(n)) => {
6881            let mut out = a.clone();
6882            out.push(Some(i32::from(*n)));
6883            return Value::IntArray(out);
6884        }
6885        (Value::Int(n), Value::IntArray(b)) => {
6886            let mut out: alloc::vec::Vec<Option<i32>> = alloc::vec::Vec::with_capacity(1 + b.len());
6887            out.push(Some(*n));
6888            out.extend(b.iter().copied());
6889            return Value::IntArray(out);
6890        }
6891        (Value::SmallInt(n), Value::IntArray(b)) => {
6892            let mut out: alloc::vec::Vec<Option<i32>> = alloc::vec::Vec::with_capacity(1 + b.len());
6893            out.push(Some(i32::from(*n)));
6894            out.extend(b.iter().copied());
6895            return Value::IntArray(out);
6896        }
6897        (Value::BigIntArray(a), Value::BigIntArray(b)) => {
6898            let mut out = a.clone();
6899            out.extend(b.iter().copied());
6900            return Value::BigIntArray(out);
6901        }
6902        (Value::BigIntArray(a), Value::IntArray(b)) => {
6903            let mut out = a.clone();
6904            out.extend(b.iter().map(|o| o.map(i64::from)));
6905            return Value::BigIntArray(out);
6906        }
6907        (Value::IntArray(a), Value::BigIntArray(b)) => {
6908            let mut out: alloc::vec::Vec<Option<i64>> =
6909                a.iter().map(|o| o.map(i64::from)).collect();
6910            out.extend(b.iter().copied());
6911            return Value::BigIntArray(out);
6912        }
6913        (Value::BigIntArray(a), Value::BigInt(n)) => {
6914            let mut out = a.clone();
6915            out.push(Some(*n));
6916            return Value::BigIntArray(out);
6917        }
6918        (Value::BigIntArray(a), Value::Int(n)) => {
6919            let mut out = a.clone();
6920            out.push(Some(i64::from(*n)));
6921            return Value::BigIntArray(out);
6922        }
6923        (Value::BigIntArray(a), Value::SmallInt(n)) => {
6924            let mut out = a.clone();
6925            out.push(Some(i64::from(*n)));
6926            return Value::BigIntArray(out);
6927        }
6928        (Value::BigInt(n), Value::BigIntArray(b)) => {
6929            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
6930            out.push(Some(*n));
6931            out.extend(b.iter().copied());
6932            return Value::BigIntArray(out);
6933        }
6934        (Value::Int(n), Value::BigIntArray(b)) => {
6935            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
6936            out.push(Some(i64::from(*n)));
6937            out.extend(b.iter().copied());
6938            return Value::BigIntArray(out);
6939        }
6940        (Value::SmallInt(n), Value::BigIntArray(b)) => {
6941            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
6942            out.push(Some(i64::from(*n)));
6943            out.extend(b.iter().copied());
6944            return Value::BigIntArray(out);
6945        }
6946        // v7.11.15 — BYTEA `||` is byte concatenation.
6947        (Value::Bytes(a), Value::Bytes(b)) => {
6948            let mut out = a.clone();
6949            out.extend_from_slice(b);
6950            return Value::Bytes(out);
6951        }
6952        _ => {}
6953    }
6954    let a = value_to_text(l);
6955    let b = value_to_text(r);
6956    Value::Text(a + &b)
6957}
6958
6959/// pgvector inner-product `<#>`. Returns the *negative* dot product so
6960/// smaller still means more similar — same convention as pgvector.
6961fn inner_product(l: Value, r: Value) -> Result<Value, EvalError> {
6962    let (a, b) = unwrap_vec_pair(l, r, "<#>")?;
6963    let mut dot: f64 = 0.0;
6964    for (x, y) in a.iter().zip(b.iter()) {
6965        dot += f64::from(*x) * f64::from(*y);
6966    }
6967    Ok(Value::Float(-dot))
6968}
6969
6970/// pgvector cosine distance `<=>` — `1 - (a·b) / (‖a‖ ‖b‖)`. A zero-norm
6971/// operand produces NaN (matches pgvector).
6972fn cosine_distance(l: Value, r: Value) -> Result<Value, EvalError> {
6973    let (a, b) = unwrap_vec_pair(l, r, "<=>")?;
6974    let mut dot: f64 = 0.0;
6975    let mut na: f64 = 0.0;
6976    let mut nb: f64 = 0.0;
6977    for (x, y) in a.iter().zip(b.iter()) {
6978        let xf = f64::from(*x);
6979        let yf = f64::from(*y);
6980        dot += xf * yf;
6981        na += xf * xf;
6982        nb += yf * yf;
6983    }
6984    let denom = sqrt_newton(na) * sqrt_newton(nb);
6985    if denom == 0.0 {
6986        return Ok(Value::Float(f64::NAN));
6987    }
6988    Ok(Value::Float(1.0 - dot / denom))
6989}
6990
6991fn unwrap_vec_pair(l: Value, r: Value, op: &str) -> Result<(Vec<f32>, Vec<f32>), EvalError> {
6992    // v6.0.1: SQ8 cells coming through the SQL evaluator are
6993    // dequantised to f32 here so the existing scalar distance
6994    // arithmetic stays intact. HNSW kNN search continues to use
6995    // the asymmetric ADC variant inside `cell_to_query_metric_
6996    // distance` — this path only runs when a vector expression
6997    // lands in the evaluator (full-scan ORDER BY, SELECT
6998    // projection of `v <-> $1`, etc.).
6999    let to_f32 = |v: Value| -> Option<Vec<f32>> {
7000        match v {
7001            Value::Vector(a) => Some(a),
7002            Value::Sq8Vector(q) => Some(spg_storage::quantize::dequantize(&q)),
7003            // v6.0.3: bit-exact dequant for halfvec cells.
7004            Value::HalfVector(h) => Some(h.to_f32_vec()),
7005            _ => None,
7006        }
7007    };
7008    let l_ty = l.data_type();
7009    let r_ty = r.data_type();
7010    match (to_f32(l), to_f32(r)) {
7011        (Some(a), Some(b)) => {
7012            if a.len() != b.len() {
7013                return Err(EvalError::TypeMismatch {
7014                    detail: format!("vector dim mismatch in {op}: {} vs {}", a.len(), b.len()),
7015                });
7016            }
7017            Ok((a, b))
7018        }
7019        _ => Err(EvalError::TypeMismatch {
7020            detail: format!("{op} requires two vectors, got {l_ty:?} and {r_ty:?}"),
7021        }),
7022    }
7023}
7024
7025/// Numeric arithmetic with widening.
7026/// - both `Int` → `Int` (with overflow check)
7027/// - `Int` op `BigInt` (either side) → `BigInt`
7028/// - any `Float` involved → `Float`
7029/// Bitwise integer op (`|` / `&`). PG defines these for integer
7030/// types only — SmallInt widens to Int, Int x BigInt widens to
7031/// BigInt, anything else is a type error (mailrs embed round-12).
7032fn bitop(
7033    l: Value,
7034    r: Value,
7035    f: impl Fn(i64, i64) -> i64,
7036    op_name: &str,
7037) -> Result<Value, EvalError> {
7038    let widen = |v: Value| -> Value {
7039        match v {
7040            Value::SmallInt(n) => Value::Int(i32::from(n)),
7041            other => other,
7042        }
7043    };
7044    match (widen(l), widen(r)) {
7045        (Value::Int(a), Value::Int(b)) => {
7046            let result = f(i64::from(a), i64::from(b));
7047            // Two i32 inputs can't overflow i32 under | / &.
7048            Ok(Value::Int(result as i32))
7049        }
7050        (Value::Int(a), Value::BigInt(b)) | (Value::BigInt(b), Value::Int(a)) => {
7051            Ok(Value::BigInt(f(i64::from(a), b)))
7052        }
7053        (Value::BigInt(a), Value::BigInt(b)) => Ok(Value::BigInt(f(a, b))),
7054        (a, b) => Err(EvalError::TypeMismatch {
7055            detail: format!("cannot apply {op_name} to {a:?} and {b:?}"),
7056        }),
7057    }
7058}
7059
7060fn arith(
7061    l: Value,
7062    r: Value,
7063    int_op: impl Fn(i64, i64) -> Option<i64>,
7064    float_op: impl Fn(f64, f64) -> f64,
7065    op_name: &str,
7066) -> Result<Value, EvalError> {
7067    // Widen SmallInt to Int up front so the rest of the arithmetic
7068    // table only deals with Int / BigInt / Float pairs.
7069    let widen = |v: Value| -> Value {
7070        match v {
7071            Value::SmallInt(n) => Value::Int(i32::from(n)),
7072            other => other,
7073        }
7074    };
7075    let l = widen(l);
7076    let r = widen(r);
7077    match (l, r) {
7078        (Value::Int(a), Value::Int(b)) => {
7079            let result = int_op(i64::from(a), i64::from(b)).ok_or(EvalError::TypeMismatch {
7080                detail: format!("integer overflow on {op_name}"),
7081            })?;
7082            if let Ok(small) = i32::try_from(result) {
7083                Ok(Value::Int(small))
7084            } else {
7085                Ok(Value::BigInt(result))
7086            }
7087        }
7088        (Value::Int(a), Value::BigInt(b)) | (Value::BigInt(b), Value::Int(a)) => {
7089            let result = int_op(i64::from(a), b).ok_or(EvalError::TypeMismatch {
7090                detail: format!("bigint overflow on {op_name}"),
7091            })?;
7092            Ok(Value::BigInt(result))
7093        }
7094        (Value::BigInt(a), Value::BigInt(b)) => {
7095            let result = int_op(a, b).ok_or(EvalError::TypeMismatch {
7096                detail: format!("bigint overflow on {op_name}"),
7097            })?;
7098            Ok(Value::BigInt(result))
7099        }
7100        (a, b)
7101            if a.data_type() == Some(DataType::Float) || b.data_type() == Some(DataType::Float) =>
7102        {
7103            let af = as_f64(&a)?;
7104            let bf = as_f64(&b)?;
7105            Ok(Value::Float(float_op(af, bf)))
7106        }
7107        (a, b) => Err(EvalError::TypeMismatch {
7108            detail: format!(
7109                "{op_name} applied to non-numeric: {:?} vs {:?}",
7110                a.data_type(),
7111                b.data_type()
7112            ),
7113        }),
7114    }
7115}
7116
7117/// L2 (Euclidean) distance between two vectors of equal dimension.
7118/// Returned as `Value::Float(d)` so it composes with the existing
7119/// comparison / sort plumbing. Mismatched dims or non-vector operands
7120/// raise `TypeMismatch`.
7121#[allow(clippy::many_single_char_names)] // l, r, a, b, d are the natural names
7122fn l2_distance(l: Value, r: Value) -> Result<Value, EvalError> {
7123    // v6.0.1: route both operands through `unwrap_vec_pair` so SQ8
7124    // cells dequantise on the way in. Sub-f64 precision loss is
7125    // negligible vs the dequantisation noise the SQ8 path already
7126    // ships with.
7127    let (a, b) = unwrap_vec_pair(l, r, "<->")?;
7128    let mut sum: f64 = 0.0;
7129    for (x, y) in a.iter().zip(b.iter()) {
7130        let d = f64::from(*x) - f64::from(*y);
7131        sum += d * d;
7132    }
7133    Ok(Value::Float(sqrt_newton(sum)))
7134}
7135
7136/// Self-built `sqrt` for `f64` — `std::f64::sqrt` lives in `std`, which the
7137/// engine's `no_std` constraint disallows. Newton-Raphson with a few rounds
7138/// reaches IEEE-754 precision for the inputs we'll see (sum of squares of
7139/// f32-derived distances, always non-negative, never NaN).
7140fn sqrt_newton(x: f64) -> f64 {
7141    if x <= 0.0 {
7142        return 0.0;
7143    }
7144    let mut g = x;
7145    // 10 iterations is conservative; 6 already converges to ulp for typical
7146    // distances.
7147    for _ in 0..10 {
7148        g = 0.5 * (g + x / g);
7149    }
7150    g
7151}
7152
7153fn div_op(l: Value, r: Value) -> Result<Value, EvalError> {
7154    let any_float = matches!(l.data_type(), Some(DataType::Float))
7155        || matches!(r.data_type(), Some(DataType::Float));
7156    if any_float {
7157        let a = as_f64(&l)?;
7158        let b = as_f64(&r)?;
7159        if b == 0.0 {
7160            return Err(EvalError::DivisionByZero);
7161        }
7162        return Ok(Value::Float(a / b));
7163    }
7164    arith(
7165        l,
7166        r,
7167        |a, b| {
7168            if b == 0 { None } else { Some(a / b) }
7169        },
7170        |a, b| a / b,
7171        "/",
7172    )
7173    .map_err(|e| match e {
7174        // The closure returns None on b == 0; translate that into the dedicated
7175        // DivisionByZero variant instead of "integer overflow on /".
7176        EvalError::TypeMismatch { detail } if detail.contains('/') => EvalError::DivisionByZero,
7177        other => other,
7178    })
7179}
7180
7181fn as_f64(v: &Value) -> Result<f64, EvalError> {
7182    match v {
7183        Value::SmallInt(n) => Ok(f64::from(*n)),
7184        Value::Int(n) => Ok(f64::from(*n)),
7185        #[allow(clippy::cast_precision_loss)]
7186        Value::BigInt(n) => Ok(*n as f64),
7187        Value::Float(x) => Ok(*x),
7188        #[allow(clippy::cast_precision_loss)]
7189        Value::Numeric { scaled, scale } => {
7190            let mut div = 1.0_f64;
7191            for _ in 0..*scale {
7192                div *= 10.0;
7193            }
7194            Ok((*scaled as f64) / div)
7195        }
7196        other => Err(EvalError::TypeMismatch {
7197            detail: format!("cannot convert {:?} to FLOAT", other.data_type()),
7198        }),
7199    }
7200}
7201
7202fn compare(op: BinOp, l: &Value, r: &Value) -> Result<Value, EvalError> {
7203    let ord = match (l, r) {
7204        (Value::Int(a), Value::Int(b)) => i64::from(*a).cmp(&i64::from(*b)),
7205        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7206        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
7207        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
7208        (a, b)
7209            if matches!(a.data_type(), Some(DataType::Float))
7210                || matches!(b.data_type(), Some(DataType::Float)) =>
7211        {
7212            let af = as_f64(a)?;
7213            let bf = as_f64(b)?;
7214            af.partial_cmp(&bf).ok_or(EvalError::TypeMismatch {
7215                detail: "NaN in comparison".into(),
7216            })?
7217        }
7218        (Value::Text(a), Value::Text(b)) => a.cmp(b),
7219        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
7220        // Date / Timestamp compare on their integer storage repr.
7221        // Cross-domain (Date vs Timestamp) lifts the Date to the
7222        // matching midnight TIMESTAMP first.
7223        (Value::Date(a), Value::Date(b)) => a.cmp(b),
7224        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
7225        (Value::Date(a), Value::Timestamp(b)) => (i64::from(*a) * 86_400_000_000).cmp(b),
7226        (Value::Timestamp(a), Value::Date(b)) => a.cmp(&(i64::from(*b) * 86_400_000_000)),
7227        // PG-style implicit coercion: comparing a DATE / TIMESTAMP
7228        // column against a text literal lifts the literal into the
7229        // matching domain (e.g. `day >= '2024-01-01'`).
7230        (Value::Date(a), Value::Text(b)) => {
7231            let bd = parse_date_literal(b).ok_or_else(|| EvalError::TypeMismatch {
7232                detail: format!("cannot parse {b:?} as DATE for comparison"),
7233            })?;
7234            a.cmp(&bd)
7235        }
7236        (Value::Text(a), Value::Date(b)) => {
7237            let ad = parse_date_literal(a).ok_or_else(|| EvalError::TypeMismatch {
7238                detail: format!("cannot parse {a:?} as DATE for comparison"),
7239            })?;
7240            ad.cmp(b)
7241        }
7242        (Value::Timestamp(a), Value::Text(b)) => {
7243            let bt = parse_timestamp_literal(b).ok_or_else(|| EvalError::TypeMismatch {
7244                detail: format!("cannot parse {b:?} as TIMESTAMP for comparison"),
7245            })?;
7246            a.cmp(&bt)
7247        }
7248        (Value::Text(a), Value::Timestamp(b)) => {
7249            let at = parse_timestamp_literal(a).ok_or_else(|| EvalError::TypeMismatch {
7250                detail: format!("cannot parse {a:?} as TIMESTAMP for comparison"),
7251            })?;
7252            at.cmp(b)
7253        }
7254        // v7.17.0 — UUID byte-wise comparison; both sides UUID.
7255        (Value::Uuid(a), Value::Uuid(b)) => a.cmp(b),
7256        // v7.17.0 — PG promotes a `text` literal compared against a
7257        // `uuid` column into uuid (unknown-type literal inference).
7258        // Without this, `WHERE id = '550e...'` falls through to the
7259        // generic TypeMismatch — the application's literal becomes
7260        // an error rather than a comparison.
7261        (Value::Uuid(a), Value::Text(b)) => {
7262            let bu = spg_storage::parse_uuid_str(b).ok_or_else(|| EvalError::TypeMismatch {
7263                detail: format!("invalid input syntax for type uuid: {b:?}"),
7264            })?;
7265            a.cmp(&bu)
7266        }
7267        (Value::Text(a), Value::Uuid(b)) => {
7268            let au = spg_storage::parse_uuid_str(a).ok_or_else(|| EvalError::TypeMismatch {
7269                detail: format!("invalid input syntax for type uuid: {a:?}"),
7270            })?;
7271            au.cmp(b)
7272        }
7273        (a, b) => {
7274            return Err(EvalError::TypeMismatch {
7275                detail: format!(
7276                    "comparison between {:?} and {:?}",
7277                    a.data_type(),
7278                    b.data_type()
7279                ),
7280            });
7281        }
7282    };
7283    let result = match op {
7284        BinOp::Eq => ord.is_eq(),
7285        BinOp::NotEq => !ord.is_eq(),
7286        BinOp::Lt => ord.is_lt(),
7287        BinOp::LtEq => ord.is_le(),
7288        BinOp::Gt => ord.is_gt(),
7289        BinOp::GtEq => ord.is_ge(),
7290        BinOp::And
7291        | BinOp::Or
7292        | BinOp::BitOr
7293        | BinOp::BitAnd
7294        | BinOp::Add
7295        | BinOp::Sub
7296        | BinOp::Mul
7297        | BinOp::Div
7298        | BinOp::L2Distance
7299        | BinOp::InnerProduct
7300        | BinOp::CosineDistance
7301        | BinOp::Concat
7302        | BinOp::JsonGet
7303        | BinOp::JsonGetText
7304        | BinOp::JsonGetPath
7305        | BinOp::JsonGetPathText
7306        | BinOp::JsonContains
7307        | BinOp::TsMatch
7308        | BinOp::IsDistinctFrom
7309        | BinOp::IsNotDistinctFrom
7310        | BinOp::InetContainedBy
7311        | BinOp::InetContainedByEq
7312        | BinOp::InetContains
7313        | BinOp::InetContainsEq
7314        | BinOp::InetOverlap => {
7315            unreachable!("compare() only called with comparison ops")
7316        }
7317    };
7318    Ok(Value::Bool(result))
7319}
7320
7321// SQL three-valued AND / OR.
7322fn and_3vl(l: Value, r: Value) -> Result<Value, EvalError> {
7323    match (l, r) {
7324        (Value::Bool(false), _) | (_, Value::Bool(false)) => Ok(Value::Bool(false)),
7325        (Value::Bool(true), Value::Bool(true)) => Ok(Value::Bool(true)),
7326        (Value::Null, _) | (_, Value::Null) => Ok(Value::Null),
7327        (a, b) => Err(EvalError::TypeMismatch {
7328            detail: format!(
7329                "AND on non-boolean: {:?} and {:?}",
7330                a.data_type(),
7331                b.data_type()
7332            ),
7333        }),
7334    }
7335}
7336
7337fn or_3vl(l: Value, r: Value) -> Result<Value, EvalError> {
7338    match (l, r) {
7339        (Value::Bool(true), _) | (_, Value::Bool(true)) => Ok(Value::Bool(true)),
7340        (Value::Bool(false), Value::Bool(false)) => Ok(Value::Bool(false)),
7341        (Value::Null, _) | (_, Value::Null) => Ok(Value::Null),
7342        (a, b) => Err(EvalError::TypeMismatch {
7343            detail: format!(
7344                "OR on non-boolean: {:?} and {:?}",
7345                a.data_type(),
7346                b.data_type()
7347            ),
7348        }),
7349    }
7350}
7351
7352#[cfg(test)]
7353mod tests {
7354    use super::*;
7355    use alloc::vec;
7356    use spg_storage::{ColumnSchema, Row};
7357
7358    fn col(name: &str, ty: DataType) -> ColumnSchema {
7359        ColumnSchema::new(name, ty, true)
7360    }
7361
7362    fn ctx<'a>(cols: &'a [ColumnSchema], alias: Option<&'a str>) -> EvalContext<'a> {
7363        EvalContext::new(cols, alias)
7364    }
7365
7366    fn lit(n: i64) -> Expr {
7367        Expr::Literal(Literal::Integer(n))
7368    }
7369
7370    fn null() -> Expr {
7371        Expr::Literal(Literal::Null)
7372    }
7373
7374    fn col_ref(name: &str) -> Expr {
7375        Expr::Column(ColumnName {
7376            qualifier: None,
7377            name: name.into(),
7378        })
7379    }
7380
7381    #[test]
7382    fn literal_evaluates_to_value() {
7383        let r = Row::new(vec![]);
7384        let cs: [ColumnSchema; 0] = [];
7385        let c = ctx(&cs, None);
7386        assert_eq!(eval_expr(&lit(42), &r, &c).unwrap(), Value::Int(42));
7387        assert_eq!(
7388            eval_expr(&Expr::Literal(Literal::Float(1.5)), &r, &c).unwrap(),
7389            Value::Float(1.5)
7390        );
7391        assert_eq!(eval_expr(&null(), &r, &c).unwrap(), Value::Null);
7392    }
7393
7394    #[test]
7395    fn column_lookup_unqualified() {
7396        let cs = vec![col("a", DataType::Int), col("b", DataType::Text)];
7397        let r = Row::new(vec![Value::Int(7), Value::Text("hi".into())]);
7398        let c = ctx(&cs, None);
7399        assert_eq!(eval_expr(&col_ref("a"), &r, &c).unwrap(), Value::Int(7));
7400        assert_eq!(
7401            eval_expr(&col_ref("b"), &r, &c).unwrap(),
7402            Value::Text("hi".into())
7403        );
7404    }
7405
7406    #[test]
7407    fn column_not_found_errors() {
7408        let cs = vec![col("a", DataType::Int)];
7409        let r = Row::new(vec![Value::Int(0)]);
7410        let c = ctx(&cs, None);
7411        let err = eval_expr(&col_ref("ghost"), &r, &c).unwrap_err();
7412        assert!(matches!(err, EvalError::ColumnNotFound { ref name } if name == "ghost"));
7413    }
7414
7415    #[test]
7416    fn qualified_column_matches_alias() {
7417        let cs = vec![col("a", DataType::Int)];
7418        let r = Row::new(vec![Value::Int(5)]);
7419        let c = ctx(&cs, Some("u"));
7420        let qualified = Expr::Column(ColumnName {
7421            qualifier: Some("u".into()),
7422            name: "a".into(),
7423        });
7424        assert_eq!(eval_expr(&qualified, &r, &c).unwrap(), Value::Int(5));
7425    }
7426
7427    #[test]
7428    fn qualified_column_unknown_alias_errors() {
7429        let cs = vec![col("a", DataType::Int)];
7430        let r = Row::new(vec![Value::Int(5)]);
7431        let c = ctx(&cs, Some("u"));
7432        let wrong = Expr::Column(ColumnName {
7433            qualifier: Some("x".into()),
7434            name: "a".into(),
7435        });
7436        assert!(matches!(
7437            eval_expr(&wrong, &r, &c).unwrap_err(),
7438            EvalError::UnknownQualifier { .. }
7439        ));
7440    }
7441
7442    #[test]
7443    fn arithmetic_with_widening() {
7444        let r = Row::new(vec![]);
7445        let cs: [ColumnSchema; 0] = [];
7446        let c = ctx(&cs, None);
7447        let e = Expr::Binary {
7448            lhs: alloc::boxed::Box::new(lit(2)),
7449            op: BinOp::Add,
7450            rhs: alloc::boxed::Box::new(Expr::Literal(Literal::Float(0.5))),
7451        };
7452        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Float(2.5));
7453    }
7454
7455    #[test]
7456    fn division_by_zero_errors() {
7457        let r = Row::new(vec![]);
7458        let cs: [ColumnSchema; 0] = [];
7459        let c = ctx(&cs, None);
7460        let e = Expr::Binary {
7461            lhs: alloc::boxed::Box::new(lit(1)),
7462            op: BinOp::Div,
7463            rhs: alloc::boxed::Box::new(lit(0)),
7464        };
7465        assert_eq!(
7466            eval_expr(&e, &r, &c).unwrap_err(),
7467            EvalError::DivisionByZero
7468        );
7469    }
7470
7471    #[test]
7472    fn comparison_returns_bool() {
7473        let r = Row::new(vec![]);
7474        let cs: [ColumnSchema; 0] = [];
7475        let c = ctx(&cs, None);
7476        let e = Expr::Binary {
7477            lhs: alloc::boxed::Box::new(lit(1)),
7478            op: BinOp::Lt,
7479            rhs: alloc::boxed::Box::new(lit(2)),
7480        };
7481        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Bool(true));
7482    }
7483
7484    #[test]
7485    fn null_propagates_through_arithmetic() {
7486        let r = Row::new(vec![]);
7487        let cs: [ColumnSchema; 0] = [];
7488        let c = ctx(&cs, None);
7489        let e = Expr::Binary {
7490            lhs: alloc::boxed::Box::new(lit(1)),
7491            op: BinOp::Add,
7492            rhs: alloc::boxed::Box::new(null()),
7493        };
7494        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Null);
7495    }
7496
7497    #[test]
7498    fn and_three_valued_logic() {
7499        let r = Row::new(vec![]);
7500        let cs: [ColumnSchema; 0] = [];
7501        let c = ctx(&cs, None);
7502        let tt = |a: bool, b_null: bool| Expr::Binary {
7503            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::Bool(a))),
7504            op: BinOp::And,
7505            rhs: alloc::boxed::Box::new(if b_null {
7506                null()
7507            } else {
7508                Expr::Literal(Literal::Bool(true))
7509            }),
7510        };
7511        // FALSE AND NULL → FALSE
7512        assert_eq!(
7513            eval_expr(&tt(false, true), &r, &c).unwrap(),
7514            Value::Bool(false)
7515        );
7516        // TRUE AND NULL → NULL
7517        assert_eq!(eval_expr(&tt(true, true), &r, &c).unwrap(), Value::Null);
7518        // TRUE AND TRUE → TRUE
7519        assert_eq!(
7520            eval_expr(&tt(true, false), &r, &c).unwrap(),
7521            Value::Bool(true)
7522        );
7523    }
7524
7525    #[test]
7526    fn or_three_valued_logic() {
7527        let r = Row::new(vec![]);
7528        let cs: [ColumnSchema; 0] = [];
7529        let c = ctx(&cs, None);
7530        let or_with_null = |a: bool| Expr::Binary {
7531            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::Bool(a))),
7532            op: BinOp::Or,
7533            rhs: alloc::boxed::Box::new(null()),
7534        };
7535        // TRUE OR NULL → TRUE
7536        assert_eq!(
7537            eval_expr(&or_with_null(true), &r, &c).unwrap(),
7538            Value::Bool(true)
7539        );
7540        // FALSE OR NULL → NULL
7541        assert_eq!(
7542            eval_expr(&or_with_null(false), &r, &c).unwrap(),
7543            Value::Null
7544        );
7545    }
7546
7547    #[test]
7548    fn not_on_null_is_null() {
7549        let r = Row::new(vec![]);
7550        let cs: [ColumnSchema; 0] = [];
7551        let c = ctx(&cs, None);
7552        let e = Expr::Unary {
7553            op: UnOp::Not,
7554            expr: alloc::boxed::Box::new(null()),
7555        };
7556        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Null);
7557    }
7558
7559    #[test]
7560    fn text_comparison_lexicographic() {
7561        let r = Row::new(vec![]);
7562        let cs: [ColumnSchema; 0] = [];
7563        let c = ctx(&cs, None);
7564        let e = Expr::Binary {
7565            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::String("apple".into()))),
7566            op: BinOp::Lt,
7567            rhs: alloc::boxed::Box::new(Expr::Literal(Literal::String("banana".into()))),
7568        };
7569        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Bool(true));
7570    }
7571
7572    #[test]
7573    fn interval_format_basics() {
7574        assert_eq!(format_interval(0, 0), "0");
7575        assert_eq!(format_interval(0, 86_400_000_000), "1 day");
7576        assert_eq!(format_interval(0, -86_400_000_000), "-1 days");
7577        assert_eq!(format_interval(0, 3_600_000_000), "01:00:00");
7578        assert_eq!(
7579            format_interval(0, 86_400_000_000 + 9_000_000),
7580            "1 day 00:00:09"
7581        );
7582        assert_eq!(format_interval(14, 0), "1 year 2 mons");
7583        assert_eq!(format_interval(-1, 0), "-1 mons");
7584    }
7585
7586    #[test]
7587    fn interval_add_to_timestamp_micros_part() {
7588        // 2024-01-01 00:00:00 + INTERVAL '1 hour' = 2024-01-01 01:00:00
7589        let ts = i64::from(days_from_civil(2024, 1, 1)) * 86_400_000_000;
7590        let r = add_interval_to_micros(ts, 0, 3_600_000_000).unwrap();
7591        let expected = ts + 3_600_000_000;
7592        assert_eq!(r, expected);
7593    }
7594
7595    #[test]
7596    fn interval_clamp_month_end() {
7597        // 2024-01-31 + 1 month = 2024-02-29 (leap year).
7598        let d = days_from_civil(2024, 1, 31);
7599        let shifted = shift_date_by_months(d, 1).unwrap();
7600        let (y, m, day) = civil_from_days(shifted);
7601        assert_eq!((y, m, day), (2024, 2, 29));
7602        // 2023-01-31 + 1 month = 2023-02-28 (non-leap).
7603        let d = days_from_civil(2023, 1, 31);
7604        let shifted = shift_date_by_months(d, 1).unwrap();
7605        let (y, m, day) = civil_from_days(shifted);
7606        assert_eq!((y, m, day), (2023, 2, 28));
7607        // 2024-03-31 - 1 month = 2024-02-29.
7608        let d = days_from_civil(2024, 3, 31);
7609        let shifted = shift_date_by_months(d, -1).unwrap();
7610        let (y, m, day) = civil_from_days(shifted);
7611        assert_eq!((y, m, day), (2024, 2, 29));
7612    }
7613
7614    #[test]
7615    fn interval_date_plus_pure_days_stays_date() {
7616        // DATE + INTERVAL '7 days' must stay DATE.
7617        let d = days_from_civil(2024, 6, 1);
7618        let lhs = Value::Date(d);
7619        let rhs = Value::Interval {
7620            months: 0,
7621            micros: 7 * 86_400_000_000,
7622        };
7623        let v = apply_binary_interval(BinOp::Add, &lhs, &rhs)
7624            .unwrap()
7625            .unwrap();
7626        let expected = days_from_civil(2024, 6, 8);
7627        assert_eq!(v, Value::Date(expected));
7628    }
7629
7630    #[test]
7631    fn interval_date_plus_sub_day_lifts_to_timestamp() {
7632        // DATE + INTERVAL '1 hour' must lift to TIMESTAMP.
7633        let d = days_from_civil(2024, 6, 1);
7634        let lhs = Value::Date(d);
7635        let rhs = Value::Interval {
7636            months: 0,
7637            micros: 3_600_000_000,
7638        };
7639        let v = apply_binary_interval(BinOp::Add, &lhs, &rhs)
7640            .unwrap()
7641            .unwrap();
7642        let expected = i64::from(d) * 86_400_000_000 + 3_600_000_000;
7643        assert_eq!(v, Value::Timestamp(expected));
7644    }
7645}