Skip to main content

spg_engine/
eval.rs

1//! Expression evaluator. Given a parsed `Expr`, a `Row`, and the row's column
2//! schema, produce a `Value`. v0.4 implements:
3//!
4//! - literals
5//! - column lookups (bare and qualified `t.col`)
6//! - unary minus / NOT
7//! - binary arithmetic, comparison, AND, OR
8//! - numeric widening (`Int → BigInt → Float`) at evaluation time
9//! - SQL three-valued logic for NULL:
10//!     * any arithmetic / comparison op with a NULL operand → NULL
11//!     * `TRUE OR NULL` → TRUE, `FALSE OR NULL` → NULL,
12//!     * `FALSE AND NULL` → FALSE, `TRUE AND NULL` → NULL,
13//!     * `NOT NULL` → NULL
14//!
15//! v0.4 deliberately does *not* implement: function calls, string
16//! concatenation, IS NULL / IS NOT NULL, BETWEEN, IN, etc. Those come later.
17
18use alloc::boxed::Box;
19use alloc::format;
20use alloc::string::{String, ToString};
21use alloc::vec::Vec;
22
23use spg_sql::ast::{BinOp, CastTarget, ColumnName, Expr, Literal, UnOp};
24use spg_storage::{ColumnSchema, DataType, Row, TsLexeme, TsQueryAst, Value};
25
26/// Resolution context for evaluating a single row. `table_alias` is the alias
27/// (or table name) callers should accept as the qualifier on a column ref —
28/// e.g. `FROM users AS u` makes `u.name` valid and rejects `other.name`.
29#[derive(Clone)]
30#[allow(missing_debug_implementations)] // sequence_resolver is a dyn Fn — no Debug
31pub struct EvalContext<'a> {
32    pub columns: &'a [ColumnSchema],
33    pub table_alias: Option<&'a str>,
34    /// v6.1.1 — bound parameters for `$N` placeholders inside the
35    /// expression tree. Empty for simple queries; populated by the
36    /// prepared-statement Execute path with Bind values converted
37    /// to `Value`. Index N (1-based per PG) hits `params[N-1]`.
38    pub params: &'a [Value],
39    /// v7.12.1 — session text-search config (from `SET
40    /// default_text_search_config = '<name>'`). Resolved when the
41    /// engine builds an `EvalContext` and consumed by the FTS
42    /// function dispatcher when `to_tsvector(text)` /
43    /// `plainto_tsquery(text)` etc are called without an explicit
44    /// config arg. `None` falls through to `simple`.
45    pub default_text_search_config: Option<&'a str>,
46    /// v7.17.0 Phase 1.1 — `nextval` / `currval` / `setval`
47    /// resolver. The engine builds this around a `&mut Catalog`
48    /// so apply_function can mutate sequence state without
49    /// eval owning a catalog reference. When `None`, sequence
50    /// functions return an error (read-only contexts).
51    pub sequence_resolver: Option<&'a SequenceResolver<'a>>,
52}
53
54/// v7.17.0 — sequence-mutating callback used by `apply_function`
55/// for `nextval` / `currval` / `setval`. Implemented by the
56/// engine to thread `&mut Catalog` access through an immutable
57/// `&EvalContext`.
58pub type SequenceResolver<'a> = dyn Fn(SequenceOp) -> Result<i64, EvalError> + 'a;
59
60/// v7.17.0 — sequence operation requested by an Expr eval.
61#[derive(Debug, Clone)]
62pub enum SequenceOp {
63    Next(String),
64    Curr(String),
65    Set {
66        name: String,
67        value: i64,
68        is_called: bool,
69    },
70}
71
72impl<'a> EvalContext<'a> {
73    pub const fn new(columns: &'a [ColumnSchema], table_alias: Option<&'a str>) -> Self {
74        Self {
75            columns,
76            table_alias,
77            params: &[],
78            default_text_search_config: None,
79            sequence_resolver: None,
80        }
81    }
82
83    /// v7.17.0 — attach a sequence resolver. The engine wraps a
84    /// `&mut Catalog` in a closure that performs the requested
85    /// SequenceOp.
86    #[must_use]
87    pub const fn with_sequence_resolver(mut self, resolver: &'a SequenceResolver<'a>) -> Self {
88        self.sequence_resolver = Some(resolver);
89        self
90    }
91
92    /// v6.1.1 — attach a parameter buffer for `$N` placeholder
93    /// resolution. The slice must outlive the context; callers
94    /// construct it from the prepared statement's Bind values.
95    #[must_use]
96    pub const fn with_params(mut self, params: &'a [Value]) -> Self {
97        self.params = params;
98        self
99    }
100
101    /// v7.12.1 — attach the session's
102    /// `default_text_search_config`. Used by the FTS function
103    /// dispatcher when no explicit config arg is given.
104    #[must_use]
105    pub const fn with_default_text_search_config(mut self, cfg: Option<&'a str>) -> Self {
106        self.default_text_search_config = cfg;
107        self
108    }
109}
110
111#[derive(Debug, Clone, PartialEq)]
112pub enum EvalError {
113    ColumnNotFound {
114        name: String,
115    },
116    UnknownQualifier {
117        qualifier: String,
118    },
119    DivisionByZero,
120    TypeMismatch {
121        detail: String,
122    },
123    /// v6.1.1 — `$N` reference past the number of bound parameters.
124    /// Either the client sent too few in Bind, or the SQL has a
125    /// placeholder the prepared statement didn't account for.
126    PlaceholderOutOfRange {
127        n: u16,
128        bound: u16,
129    },
130}
131
132impl core::fmt::Display for EvalError {
133    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
134        match self {
135            Self::ColumnNotFound { name } => write!(f, "column not found: {name}"),
136            Self::UnknownQualifier { qualifier } => {
137                write!(f, "unknown table qualifier: {qualifier}")
138            }
139            Self::DivisionByZero => f.write_str("division by zero"),
140            Self::TypeMismatch { detail } => write!(f, "type mismatch: {detail}"),
141            Self::PlaceholderOutOfRange { n, bound } => write!(
142                f,
143                "parameter ${n} referenced but only {bound} bound by client"
144            ),
145        }
146    }
147}
148
149pub fn eval_expr(expr: &Expr, row: &Row, ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
150    match expr {
151        Expr::Literal(l) => Ok(literal_to_value(l)),
152        Expr::Column(c) => resolve_column(c, row, ctx),
153        Expr::Placeholder(n) => {
154            let idx = usize::from(*n).saturating_sub(1);
155            ctx.params
156                .get(idx)
157                .cloned()
158                .ok_or_else(|| EvalError::PlaceholderOutOfRange {
159                    n: *n,
160                    bound: u16::try_from(ctx.params.len()).unwrap_or(u16::MAX),
161                })
162        }
163        Expr::Unary { op, expr } => {
164            let v = eval_expr(expr, row, ctx)?;
165            apply_unary(*op, v)
166        }
167        Expr::Binary { lhs, op, rhs } => {
168            let l = eval_expr(lhs, row, ctx)?;
169            let r = eval_expr(rhs, row, ctx)?;
170            // v7.17.0 Phase 2.5 — collation-aware text comparison.
171            // When either operand of a comparison op references a
172            // column declared `COLLATE "case_insensitive"` (or any
173            // MySQL `_ci` collation), case-fold both sides before
174            // the byte-wise compare so `WHERE name = 'foo'` matches
175            // stored `'Foo'`. Non-Text values fall straight through
176            // — the helper is a no-op outside Text-Text equality
177            // and inequality.
178            let (l, r) = collation_fold_for_compare(*op, lhs, rhs, l, r, ctx);
179            apply_binary(*op, l, r)
180        }
181        Expr::Cast { expr, target } => {
182            let v = eval_expr(expr, row, ctx)?;
183            cast_value(v, *target)
184        }
185        Expr::IsNull { expr, negated } => {
186            let v = eval_expr(expr, row, ctx)?;
187            let is_null = matches!(v, Value::Null);
188            Ok(Value::Bool(if *negated { !is_null } else { is_null }))
189        }
190        Expr::FunctionCall { name, args } => {
191            let evaluated: Result<Vec<Value>, _> =
192                args.iter().map(|a| eval_expr(a, row, ctx)).collect();
193            apply_function(name, &evaluated?, ctx)
194        }
195        Expr::Like {
196            expr,
197            pattern,
198            negated,
199        } => {
200            let v = eval_expr(expr, row, ctx)?;
201            let p = eval_expr(pattern, row, ctx)?;
202            // NULL on either side propagates to NULL — same as PG.
203            let (text, pat) = match (v, p) {
204                (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
205                (Value::Text(a), Value::Text(b)) => (a, b),
206                (Value::Text(_), other) | (other, _) => {
207                    return Err(EvalError::TypeMismatch {
208                        detail: format!("LIKE requires text operands, got {:?}", other.data_type()),
209                    });
210                }
211            };
212            let m = like_match(&text, &pat);
213            Ok(Value::Bool(if *negated { !m } else { m }))
214        }
215        Expr::Extract { field, source } => {
216            let v = eval_expr(source, row, ctx)?;
217            extract_field(*field, &v)
218        }
219        // v4.10: subquery nodes should have been resolved into
220        // Literal / Binary-Eq-OR chains by Engine::resolve_select_subqueries
221        // before the row loop. Anything reaching here is a bug.
222        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {
223            Err(EvalError::TypeMismatch {
224                detail: "subquery reached row eval — engine resolver bug".into(),
225            })
226        }
227        // v4.12: window functions should have been rewritten into
228        // synthetic __win_N column references by
229        // exec_select_with_window before row eval. Anything
230        // reaching here is similarly a bug.
231        Expr::WindowFunction { .. } => Err(EvalError::TypeMismatch {
232            detail: "window function reached row eval — engine rewrite bug".into(),
233        }),
234        // v7.10.10 — `ARRAY[expr, expr, …]` constructor.
235        // v7.11.13 — element-type detection: all integers →
236        // IntArray (or BigIntArray when widening), any Text →
237        // TextArray. Non-TEXT non-integer elements (Bool, Float)
238        // stringify into TextArray as the safe default.
239        Expr::Array(items) => {
240            let mut materialised: Vec<Value> = Vec::with_capacity(items.len());
241            for elem in items {
242                materialised.push(eval_expr(elem, row, ctx)?);
243            }
244            let mut has_text = false;
245            let mut has_bigint = false;
246            let mut has_int = false;
247            for v in &materialised {
248                match v {
249                    Value::Null => {}
250                    Value::Int(_) | Value::SmallInt(_) => has_int = true,
251                    Value::BigInt(_) => has_bigint = true,
252                    Value::Text(_) | Value::Json(_) => has_text = true,
253                    _ => has_text = true,
254                }
255            }
256            if has_text || (!has_int && !has_bigint) {
257                let out: Vec<Option<String>> = materialised
258                    .into_iter()
259                    .map(|v| match v {
260                        Value::Null => None,
261                        Value::Text(s) | Value::Json(s) => Some(s),
262                        other => Some(value_to_text_for_array(&other)),
263                    })
264                    .collect();
265                return Ok(Value::TextArray(out));
266            }
267            if has_bigint {
268                let out: Vec<Option<i64>> = materialised
269                    .into_iter()
270                    .map(|v| match v {
271                        Value::Null => None,
272                        Value::Int(n) => Some(i64::from(n)),
273                        Value::SmallInt(n) => Some(i64::from(n)),
274                        Value::BigInt(n) => Some(n),
275                        _ => unreachable!(),
276                    })
277                    .collect();
278                return Ok(Value::BigIntArray(out));
279            }
280            let out: Vec<Option<i32>> = materialised
281                .into_iter()
282                .map(|v| match v {
283                    Value::Null => None,
284                    Value::Int(n) => Some(n),
285                    Value::SmallInt(n) => Some(i32::from(n)),
286                    _ => unreachable!(),
287                })
288                .collect();
289            Ok(Value::IntArray(out))
290        }
291        // v7.10.12 — `arr[i]` PG-style 1-based indexing.
292        // Out-of-range indices (including i ≤ 0) return NULL.
293        Expr::ArraySubscript { target, index } => {
294            let target_v = eval_expr(target, row, ctx)?;
295            let idx_v = eval_expr(index, row, ctx)?;
296            if matches!(target_v, Value::Null) || matches!(idx_v, Value::Null) {
297                return Ok(Value::Null);
298            }
299            let i: i64 = match idx_v {
300                Value::Int(n) => i64::from(n),
301                Value::BigInt(n) => n,
302                Value::SmallInt(n) => i64::from(n),
303                other => {
304                    return Err(EvalError::TypeMismatch {
305                        detail: format!(
306                            "array subscript must be integer, got {:?}",
307                            other.data_type()
308                        ),
309                    });
310                }
311            };
312            if i < 1 {
313                return Ok(Value::Null);
314            }
315            let pos = (i - 1) as usize;
316            match target_v {
317                Value::TextArray(items) => match items.get(pos) {
318                    Some(Some(s)) => Ok(Value::Text(s.clone())),
319                    Some(None) | None => Ok(Value::Null),
320                },
321                Value::IntArray(items) => match items.get(pos) {
322                    Some(Some(n)) => Ok(Value::Int(*n)),
323                    Some(None) | None => Ok(Value::Null),
324                },
325                Value::BigIntArray(items) => match items.get(pos) {
326                    Some(Some(n)) => Ok(Value::BigInt(*n)),
327                    Some(None) | None => Ok(Value::Null),
328                },
329                other => Err(EvalError::TypeMismatch {
330                    detail: format!(
331                        "subscript target must be an array, got {:?}",
332                        other.data_type()
333                    ),
334                }),
335            }
336        }
337        // v7.10.12 — `x op ANY(arr)` / `x op ALL(arr)`. PG
338        // 3VL: ANY → true if any element compares-true; NULL if
339        // no true but some NULL; false otherwise. ALL: false if
340        // any compares-false; NULL if no false but some NULL;
341        // true otherwise.
342        Expr::AnyAll {
343            expr,
344            op,
345            array,
346            is_any,
347        } => {
348            let lhs = eval_expr(expr, row, ctx)?;
349            let arr = eval_expr(array, row, ctx)?;
350            if matches!(arr, Value::Null) {
351                return Ok(Value::Null);
352            }
353            let elems: Vec<Option<Value>> = match arr {
354                Value::TextArray(items) => items.into_iter().map(|o| o.map(Value::Text)).collect(),
355                Value::IntArray(items) => items.into_iter().map(|o| o.map(Value::Int)).collect(),
356                Value::BigIntArray(items) => {
357                    items.into_iter().map(|o| o.map(Value::BigInt)).collect()
358                }
359                other => {
360                    return Err(EvalError::TypeMismatch {
361                        detail: format!(
362                            "ANY/ALL right-hand side must be an array, got {:?}",
363                            other.data_type()
364                        ),
365                    });
366                }
367            };
368            let mut saw_null = matches!(lhs, Value::Null);
369            let mut saw_match = false;
370            let mut saw_mismatch = false;
371            for elem in elems {
372                let elem_v = match elem {
373                    Some(v) => v,
374                    None => {
375                        saw_null = true;
376                        continue;
377                    }
378                };
379                if matches!(lhs, Value::Null) {
380                    saw_null = true;
381                    continue;
382                }
383                match apply_binary(*op, lhs.clone(), elem_v) {
384                    Ok(Value::Bool(true)) => saw_match = true,
385                    Ok(Value::Bool(false)) => saw_mismatch = true,
386                    Ok(Value::Null) => saw_null = true,
387                    Ok(other) => {
388                        return Err(EvalError::TypeMismatch {
389                            detail: format!(
390                                "ANY/ALL comparison didn't return Bool: {:?}",
391                                other.data_type()
392                            ),
393                        });
394                    }
395                    Err(e) => return Err(e),
396                }
397            }
398            let result = if *is_any {
399                if saw_match {
400                    Value::Bool(true)
401                } else if saw_null {
402                    Value::Null
403                } else {
404                    Value::Bool(false)
405                }
406            } else if saw_mismatch {
407                Value::Bool(false)
408            } else if saw_null {
409                Value::Null
410            } else {
411                Value::Bool(true)
412            };
413            Ok(result)
414        }
415        // v7.13.0 — CASE WHEN … END (mailrs round-5 G9).
416        // Short-circuit on the first matching branch. Searched form
417        // (operand=None) treats each branch's WHEN as a Bool
418        // predicate. Simple form (operand=Some) compares with =.
419        // ELSE on no match; NULL if no ELSE.
420        Expr::Case {
421            operand,
422            branches,
423            else_branch,
424        } => {
425            let operand_value = match operand {
426                Some(o) => Some(eval_expr(o, row, ctx)?),
427                None => None,
428            };
429            for (when_expr, then_expr) in branches {
430                let when_value = eval_expr(when_expr, row, ctx)?;
431                let matched = match &operand_value {
432                    None => matches!(when_value, Value::Bool(true)),
433                    Some(op_v) => matches!(
434                        apply_binary(spg_sql::ast::BinOp::Eq, op_v.clone(), when_value)?,
435                        Value::Bool(true)
436                    ),
437                };
438                if matched {
439                    return eval_expr(then_expr, row, ctx);
440                }
441            }
442            match else_branch {
443                Some(e) => eval_expr(e, row, ctx),
444                None => Ok(Value::Null),
445            }
446        }
447    }
448}
449
450/// v7.10.10 — best-effort text rendering for non-TEXT array
451/// elements (numbers, bools, etc.). The PG rule is that
452/// `ARRAY[1, 2]` is `int[]`, but SPG's v7.10 only models TEXT[],
453/// so we widen by stringifying. NUMERIC formatting goes through
454/// the existing canonical helpers to stay consistent with
455/// `format_numeric` / `format_date` etc.
456fn value_to_text_for_array(v: &Value) -> String {
457    match v {
458        Value::Text(s) | Value::Json(s) => s.clone(),
459        Value::Int(n) => n.to_string(),
460        Value::BigInt(n) => n.to_string(),
461        Value::SmallInt(n) => n.to_string(),
462        Value::Bool(b) => {
463            if *b {
464                "true".into()
465            } else {
466                "false".into()
467            }
468        }
469        Value::Float(x) => format!("{x}"),
470        Value::Date(d) => format_date(*d),
471        Value::Timestamp(t) => format_timestamp(*t),
472        Value::Numeric { scaled, scale } => format_numeric(*scaled, *scale),
473        _ => format!("{v:?}"),
474    }
475}
476
477/// Pull an integer component (year / month / ... / microsecond) out
478/// of a `DATE` or `TIMESTAMP`. Returns NULL on a NULL source, errors
479/// when the source isn't a calendar type.
480fn extract_field(field: spg_sql::ast::ExtractField, v: &Value) -> Result<Value, EvalError> {
481    use spg_sql::ast::ExtractField as F;
482    if matches!(v, Value::Null) {
483        return Ok(Value::Null);
484    }
485    // INTERVAL has its own decomposition — `YEAR` / `MONTH` come from
486    // the months part, the rest from the microseconds part. PG matches
487    // this convention (months is normalised modulo 12 for MONTH).
488    if let Value::Interval { months, micros } = *v {
489        let years = months / 12;
490        let mons = months % 12;
491        let secs_total = micros / 1_000_000;
492        let frac = micros % 1_000_000;
493        let result = match field {
494            F::Year => i64::from(years),
495            F::Month => i64::from(mons),
496            F::Day => micros / 86_400_000_000,
497            F::Hour => (secs_total / 3600) % 24,
498            F::Minute => (secs_total / 60) % 60,
499            F::Second => secs_total % 60,
500            F::Microsecond => (secs_total % 60) * 1_000_000 + frac,
501        };
502        return Ok(Value::BigInt(result));
503    }
504    let (days, day_micros) = match *v {
505        Value::Date(d) => (d, 0_i64),
506        Value::Timestamp(t) => {
507            let days = t.div_euclid(86_400_000_000);
508            let day_micros = t.rem_euclid(86_400_000_000);
509            (i32::try_from(days).unwrap_or(i32::MAX), day_micros)
510        }
511        _ => {
512            return Err(EvalError::TypeMismatch {
513                detail: format!(
514                    "EXTRACT requires DATE / TIMESTAMP / INTERVAL, got {:?}",
515                    v.data_type()
516                ),
517            });
518        }
519    };
520    let (y, m, d) = civil_components(days);
521    let secs = day_micros / 1_000_000;
522    let hh = secs / 3600;
523    let mm = (secs / 60) % 60;
524    let ss = secs % 60;
525    let frac = day_micros % 1_000_000;
526    let result = match field {
527        F::Year => i64::from(y),
528        F::Month => i64::from(m),
529        F::Day => i64::from(d),
530        F::Hour => hh,
531        F::Minute => mm,
532        F::Second => ss,
533        F::Microsecond => ss * 1_000_000 + frac,
534    };
535    Ok(Value::BigInt(result))
536}
537
538/// Internal wrapper around the file-private `civil_from_days` so the
539/// public surface area doesn't change. Returns `(year, month, day)`.
540fn civil_components(days: i32) -> (i32, u32, u32) {
541    civil_from_days(days)
542}
543
544/// SQL `LIKE` matcher. Wildcards are `%` (any run, possibly empty) and `_`
545/// (exactly one char). `\` escapes the next pattern char so `\%` matches a
546/// literal `%`. Matches the whole input — no implicit anchoring needed
547/// since SQL `LIKE` is always full-string.
548fn like_match(text: &str, pattern: &str) -> bool {
549    let text: Vec<char> = text.chars().collect();
550    let pat: Vec<char> = pattern.chars().collect();
551    like_match_inner(&text, 0, &pat, 0)
552}
553
554fn like_match_inner(text: &[char], mut ti: usize, pat: &[char], mut pi: usize) -> bool {
555    while pi < pat.len() {
556        match pat[pi] {
557            '%' => {
558                // Collapse consecutive `%` and try every possible split.
559                while pi < pat.len() && pat[pi] == '%' {
560                    pi += 1;
561                }
562                if pi == pat.len() {
563                    return true;
564                }
565                for k in ti..=text.len() {
566                    if like_match_inner(text, k, pat, pi) {
567                        return true;
568                    }
569                }
570                return false;
571            }
572            '_' => {
573                if ti >= text.len() {
574                    return false;
575                }
576                ti += 1;
577                pi += 1;
578            }
579            '\\' if pi + 1 < pat.len() => {
580                let want = pat[pi + 1];
581                if ti >= text.len() || text[ti] != want {
582                    return false;
583                }
584                ti += 1;
585                pi += 2;
586            }
587            c => {
588                if ti >= text.len() || text[ti] != c {
589                    return false;
590                }
591                ti += 1;
592                pi += 1;
593            }
594        }
595    }
596    ti == text.len()
597}
598
599/// Dispatch on lowercased function name. v1.4 implements only a handful of
600/// scalar functions; aggregates land in v1.5 alongside GROUP BY.
601fn apply_function(name: &str, args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
602    match name.to_ascii_lowercase().as_str() {
603        // v7.17.0 Phase 1.1 — SEQUENCE accessor functions.
604        "nextval" => {
605            if args.len() != 1 {
606                return Err(EvalError::TypeMismatch {
607                    detail: format!("nextval() takes 1 arg, got {}", args.len()),
608                });
609            }
610            let seq_name = match &args[0] {
611                Value::Text(s) => s.clone(),
612                Value::Null => return Ok(Value::Null),
613                other => {
614                    return Err(EvalError::TypeMismatch {
615                        detail: format!(
616                            "nextval() argument must be TEXT, got {:?}",
617                            other.data_type()
618                        ),
619                    });
620                }
621            };
622            let resolver = ctx
623                .sequence_resolver
624                .ok_or_else(|| EvalError::TypeMismatch {
625                    detail: "nextval() requires a sequence resolver (read-only context)".into(),
626                })?;
627            let v = resolver(SequenceOp::Next(seq_name))?;
628            Ok(Value::BigInt(v))
629        }
630        "currval" => {
631            if args.len() != 1 {
632                return Err(EvalError::TypeMismatch {
633                    detail: format!("currval() takes 1 arg, got {}", args.len()),
634                });
635            }
636            let seq_name = match &args[0] {
637                Value::Text(s) => s.clone(),
638                Value::Null => return Ok(Value::Null),
639                other => {
640                    return Err(EvalError::TypeMismatch {
641                        detail: format!(
642                            "currval() argument must be TEXT, got {:?}",
643                            other.data_type()
644                        ),
645                    });
646                }
647            };
648            let resolver = ctx
649                .sequence_resolver
650                .ok_or_else(|| EvalError::TypeMismatch {
651                    detail: "currval() requires a sequence resolver (read-only context)".into(),
652                })?;
653            let v = resolver(SequenceOp::Curr(seq_name))?;
654            Ok(Value::BigInt(v))
655        }
656        "setval" => {
657            if args.len() != 2 && args.len() != 3 {
658                return Err(EvalError::TypeMismatch {
659                    detail: format!("setval() takes 2 or 3 args, got {}", args.len()),
660                });
661            }
662            let seq_name = match &args[0] {
663                Value::Text(s) => s.clone(),
664                Value::Null => return Ok(Value::Null),
665                other => {
666                    return Err(EvalError::TypeMismatch {
667                        detail: format!(
668                            "setval() name argument must be TEXT, got {:?}",
669                            other.data_type()
670                        ),
671                    });
672                }
673            };
674            let value = match &args[1] {
675                Value::SmallInt(n) => i64::from(*n),
676                Value::Int(n) => i64::from(*n),
677                Value::BigInt(n) => *n,
678                Value::Null => return Ok(Value::Null),
679                other => {
680                    return Err(EvalError::TypeMismatch {
681                        detail: format!(
682                            "setval() value argument must be integer, got {:?}",
683                            other.data_type()
684                        ),
685                    });
686                }
687            };
688            let is_called = if args.len() == 3 {
689                match &args[2] {
690                    Value::Bool(b) => *b,
691                    Value::Null => return Ok(Value::Null),
692                    other => {
693                        return Err(EvalError::TypeMismatch {
694                            detail: format!(
695                                "setval() is_called argument must be BOOL, got {:?}",
696                                other.data_type()
697                            ),
698                        });
699                    }
700                }
701            } else {
702                true
703            };
704            let resolver = ctx
705                .sequence_resolver
706                .ok_or_else(|| EvalError::TypeMismatch {
707                    detail: "setval() requires a sequence resolver (read-only context)".into(),
708                })?;
709            let v = resolver(SequenceOp::Set {
710                name: seq_name,
711                value,
712                is_called,
713            })?;
714            Ok(Value::BigInt(v))
715        }
716        "length" => {
717            if args.len() != 1 {
718                return Err(EvalError::TypeMismatch {
719                    detail: format!("length() takes 1 arg, got {}", args.len()),
720                });
721            }
722            match &args[0] {
723                Value::Null => Ok(Value::Null),
724                Value::Text(s) => {
725                    let n = i32::try_from(s.chars().count()).unwrap_or(i32::MAX);
726                    Ok(Value::Int(n))
727                }
728                // v7.10.4 — PG semantics: length(bytea) returns
729                // byte count (= octet_length). Without this branch
730                // mailrs's INSERT … SELECT length(body) … against a
731                // BYTEA column would type-mismatch.
732                Value::Bytes(b) => {
733                    let n = i32::try_from(b.len()).unwrap_or(i32::MAX);
734                    Ok(Value::Int(n))
735                }
736                other => Err(EvalError::TypeMismatch {
737                    detail: format!("length() needs text or bytea, got {:?}", other.data_type()),
738                }),
739            }
740        }
741        // v7.10.4 — `OCTET_LENGTH(x)` returns byte count for both
742        // TEXT (UTF-8 byte length) and BYTEA. PG-spec name; aliases
743        // to length() for bytea by design.
744        "octet_length" => {
745            if args.len() != 1 {
746                return Err(EvalError::TypeMismatch {
747                    detail: format!("octet_length() takes 1 arg, got {}", args.len()),
748                });
749            }
750            match &args[0] {
751                Value::Null => Ok(Value::Null),
752                Value::Text(s) => {
753                    let n = i32::try_from(s.len()).unwrap_or(i32::MAX);
754                    Ok(Value::Int(n))
755                }
756                Value::Bytes(b) => {
757                    let n = i32::try_from(b.len()).unwrap_or(i32::MAX);
758                    Ok(Value::Int(n))
759                }
760                other => Err(EvalError::TypeMismatch {
761                    detail: format!(
762                        "octet_length() needs text or bytea, got {:?}",
763                        other.data_type()
764                    ),
765                }),
766            }
767        }
768        // v7.11.6 — `array_length(arr, dim)` returns the element
769        // count of `arr` along dimension `dim`. v7.11 only models
770        // single-dimension arrays so dim must be 1 (otherwise NULL,
771        // matching PG semantics for unsupported dimensions). NULL
772        // array → NULL. v7.11 TEXT[] only; non-array operand is
773        // a type mismatch.
774        "array_length" => {
775            if args.len() != 2 {
776                return Err(EvalError::TypeMismatch {
777                    detail: format!("array_length() takes 2 args, got {}", args.len()),
778                });
779            }
780            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
781                return Ok(Value::Null);
782            }
783            let len = match &args[0] {
784                Value::TextArray(items) => items.len(),
785                Value::IntArray(items) => items.len(),
786                Value::BigIntArray(items) => items.len(),
787                _ => {
788                    return Err(EvalError::TypeMismatch {
789                        detail: format!(
790                            "array_length() first arg must be an array, got {:?}",
791                            args[0].data_type()
792                        ),
793                    });
794                }
795            };
796            let dim: i64 = match args[1] {
797                Value::Int(n) => i64::from(n),
798                Value::BigInt(n) => n,
799                Value::SmallInt(n) => i64::from(n),
800                _ => {
801                    return Err(EvalError::TypeMismatch {
802                        detail: format!(
803                            "array_length() second arg must be integer, got {:?}",
804                            args[1].data_type()
805                        ),
806                    });
807                }
808            };
809            if dim != 1 {
810                return Ok(Value::Null);
811            }
812            let n = i32::try_from(len).unwrap_or(i32::MAX);
813            Ok(Value::Int(n))
814        }
815        // v7.11.6 — `array_position(arr, val)` returns 1-based
816        // index of the first element of `arr` equal to `val`, or
817        // NULL if not found. PG NULL semantics: NULL array → NULL;
818        // NULL val never matches (returns NULL if absent).
819        "array_position" => {
820            if args.len() != 2 {
821                return Err(EvalError::TypeMismatch {
822                    detail: format!("array_position() takes 2 args, got {}", args.len()),
823                });
824            }
825            if matches!(args[0], Value::Null) {
826                return Ok(Value::Null);
827            }
828            if matches!(args[1], Value::Null) {
829                return Ok(Value::Null);
830            }
831            match (&args[0], &args[1]) {
832                (Value::TextArray(items), Value::Text(needle)) => {
833                    for (idx, item) in items.iter().enumerate() {
834                        if let Some(s) = item
835                            && s == needle
836                        {
837                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
838                        }
839                    }
840                    Ok(Value::Null)
841                }
842                (Value::IntArray(items), needle_v)
843                    if matches!(
844                        needle_v,
845                        Value::Int(_) | Value::SmallInt(_) | Value::BigInt(_)
846                    ) =>
847                {
848                    let needle: i64 = match *needle_v {
849                        Value::Int(n) => i64::from(n),
850                        Value::SmallInt(n) => i64::from(n),
851                        Value::BigInt(n) => n,
852                        _ => unreachable!(),
853                    };
854                    for (idx, item) in items.iter().enumerate() {
855                        if let Some(n) = item
856                            && i64::from(*n) == needle
857                        {
858                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
859                        }
860                    }
861                    Ok(Value::Null)
862                }
863                (Value::BigIntArray(items), needle_v)
864                    if matches!(
865                        needle_v,
866                        Value::Int(_) | Value::SmallInt(_) | Value::BigInt(_)
867                    ) =>
868                {
869                    let needle: i64 = match *needle_v {
870                        Value::Int(n) => i64::from(n),
871                        Value::SmallInt(n) => i64::from(n),
872                        Value::BigInt(n) => n,
873                        _ => unreachable!(),
874                    };
875                    for (idx, item) in items.iter().enumerate() {
876                        if let Some(n) = item
877                            && *n == needle
878                        {
879                            return Ok(Value::Int(i32::try_from(idx + 1).unwrap_or(i32::MAX)));
880                        }
881                    }
882                    Ok(Value::Null)
883                }
884                (
885                    arr @ (Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_)),
886                    other,
887                ) => Err(EvalError::TypeMismatch {
888                    detail: format!(
889                        "array_position() needle type {:?} doesn't match array {:?}",
890                        other.data_type(),
891                        arr.data_type()
892                    ),
893                }),
894                (other, _) => Err(EvalError::TypeMismatch {
895                    detail: format!(
896                        "array_position() first arg must be an array, got {:?}",
897                        other.data_type()
898                    ),
899                }),
900            }
901        }
902        // v7.11.15 — `substring(s, start)` / `substring(s, start, length)`
903        // for both TEXT and BYTEA. PG semantics: `start` is 1-based;
904        // values ≤ 0 clamp into the string (i.e. effective start is
905        // adjusted so the window still begins at index 1 — but
906        // `length` is reduced by the clipped prefix). A NULL arg
907        // makes the result NULL. Out-of-range windows return an
908        // empty value, not NULL.
909        "substring" | "substr" => {
910            if !matches!(args.len(), 2 | 3) {
911                return Err(EvalError::TypeMismatch {
912                    detail: format!("substring() takes 2 or 3 args, got {}", args.len()),
913                });
914            }
915            if args.iter().any(|a| matches!(a, Value::Null)) {
916                return Ok(Value::Null);
917            }
918            let start: i64 = match args[1] {
919                Value::Int(n) => i64::from(n),
920                Value::BigInt(n) => n,
921                Value::SmallInt(n) => i64::from(n),
922                _ => {
923                    return Err(EvalError::TypeMismatch {
924                        detail: format!(
925                            "substring() start must be integer, got {:?}",
926                            args[1].data_type()
927                        ),
928                    });
929                }
930            };
931            let length: Option<i64> = if args.len() == 3 {
932                match args[2] {
933                    Value::Int(n) => Some(i64::from(n)),
934                    Value::BigInt(n) => Some(n),
935                    Value::SmallInt(n) => Some(i64::from(n)),
936                    _ => {
937                        return Err(EvalError::TypeMismatch {
938                            detail: format!(
939                                "substring() length must be integer, got {:?}",
940                                args[2].data_type()
941                            ),
942                        });
943                    }
944                }
945            } else {
946                None
947            };
948            // PG: when length is given, end = start + length; if
949            // end < start the result is empty. Clip start to 1.
950            let (effective_start, effective_length): (i64, Option<i64>) = match length {
951                Some(len) => {
952                    let end = start.saturating_add(len);
953                    if end <= 1 || len < 0 {
954                        return Ok(match &args[0] {
955                            Value::Text(_) => Value::Text(String::new()),
956                            Value::Bytes(_) => Value::Bytes(Vec::new()),
957                            other => {
958                                return Err(EvalError::TypeMismatch {
959                                    detail: format!(
960                                        "substring() needs text or bytea, got {:?}",
961                                        other.data_type()
962                                    ),
963                                });
964                            }
965                        });
966                    }
967                    let eff_start = start.max(1);
968                    let eff_len = end - eff_start;
969                    (eff_start, Some(eff_len.max(0)))
970                }
971                None => (start.max(1), None),
972            };
973            match &args[0] {
974                Value::Text(s) => {
975                    // PG counts in characters (codepoints) for TEXT.
976                    let chars: Vec<char> = s.chars().collect();
977                    let skip = (effective_start - 1) as usize;
978                    if skip >= chars.len() {
979                        return Ok(Value::Text(String::new()));
980                    }
981                    let take = match effective_length {
982                        Some(n) => (n as usize).min(chars.len() - skip),
983                        None => chars.len() - skip,
984                    };
985                    Ok(Value::Text(chars[skip..skip + take].iter().collect()))
986                }
987                Value::Bytes(b) => {
988                    let skip = (effective_start - 1) as usize;
989                    if skip >= b.len() {
990                        return Ok(Value::Bytes(Vec::new()));
991                    }
992                    let take = match effective_length {
993                        Some(n) => (n as usize).min(b.len() - skip),
994                        None => b.len() - skip,
995                    };
996                    Ok(Value::Bytes(b[skip..skip + take].to_vec()))
997                }
998                other => Err(EvalError::TypeMismatch {
999                    detail: format!(
1000                        "substring() needs text or bytea, got {:?}",
1001                        other.data_type()
1002                    ),
1003                }),
1004            }
1005        }
1006        // v7.11.15 — `position(needle, haystack)`. PG semantics:
1007        // 1-based byte/char index of first occurrence, or 0 if
1008        // absent. NULL on either operand → NULL. Empty needle
1009        // returns 1 (PG convention). Works on TEXT (char positions)
1010        // and BYTEA (byte positions). (The PG-spec syntax `position(
1011        // needle IN haystack)` is not parsed in v7.11; clients must
1012        // call the function-call form.)
1013        "position" => {
1014            if args.len() != 2 {
1015                return Err(EvalError::TypeMismatch {
1016                    detail: format!("position() takes 2 args, got {}", args.len()),
1017                });
1018            }
1019            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
1020                return Ok(Value::Null);
1021            }
1022            match (&args[0], &args[1]) {
1023                (Value::Text(needle), Value::Text(haystack)) => {
1024                    if needle.is_empty() {
1025                        return Ok(Value::Int(1));
1026                    }
1027                    // Char-based position (PG uses character count).
1028                    let h_chars: Vec<char> = haystack.chars().collect();
1029                    let n_chars: Vec<char> = needle.chars().collect();
1030                    if n_chars.len() > h_chars.len() {
1031                        return Ok(Value::Int(0));
1032                    }
1033                    for i in 0..=h_chars.len() - n_chars.len() {
1034                        if h_chars[i..i + n_chars.len()] == n_chars[..] {
1035                            return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1036                        }
1037                    }
1038                    Ok(Value::Int(0))
1039                }
1040                (Value::Bytes(needle), Value::Bytes(haystack)) => {
1041                    if needle.is_empty() {
1042                        return Ok(Value::Int(1));
1043                    }
1044                    if needle.len() > haystack.len() {
1045                        return Ok(Value::Int(0));
1046                    }
1047                    for i in 0..=haystack.len() - needle.len() {
1048                        if &haystack[i..i + needle.len()] == needle.as_slice() {
1049                            return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1050                        }
1051                    }
1052                    Ok(Value::Int(0))
1053                }
1054                (a, b) => Err(EvalError::TypeMismatch {
1055                    detail: format!(
1056                        "position() operands must both be text or both bytea, got {:?} and {:?}",
1057                        a.data_type(),
1058                        b.data_type()
1059                    ),
1060                }),
1061            }
1062        }
1063        "upper" => {
1064            if args.len() != 1 {
1065                return Err(EvalError::TypeMismatch {
1066                    detail: format!("upper() takes 1 arg, got {}", args.len()),
1067                });
1068            }
1069            match &args[0] {
1070                Value::Null => Ok(Value::Null),
1071                Value::Text(s) => Ok(Value::Text(s.to_uppercase())),
1072                other => Err(EvalError::TypeMismatch {
1073                    detail: format!("upper() needs text, got {:?}", other.data_type()),
1074                }),
1075            }
1076        }
1077        "lower" => {
1078            if args.len() != 1 {
1079                return Err(EvalError::TypeMismatch {
1080                    detail: format!("lower() takes 1 arg, got {}", args.len()),
1081                });
1082            }
1083            match &args[0] {
1084                Value::Null => Ok(Value::Null),
1085                Value::Text(s) => Ok(Value::Text(s.to_lowercase())),
1086                other => Err(EvalError::TypeMismatch {
1087                    detail: format!("lower() needs text, got {:?}", other.data_type()),
1088                }),
1089            }
1090        }
1091        "abs" => {
1092            if args.len() != 1 {
1093                return Err(EvalError::TypeMismatch {
1094                    detail: format!("abs() takes 1 arg, got {}", args.len()),
1095                });
1096            }
1097            match &args[0] {
1098                Value::Null => Ok(Value::Null),
1099                Value::Int(n) => Ok(Value::Int(n.wrapping_abs())),
1100                Value::BigInt(n) => Ok(Value::BigInt(n.wrapping_abs())),
1101                Value::Float(x) => Ok(Value::Float(x.abs())),
1102                other => Err(EvalError::TypeMismatch {
1103                    detail: format!("abs() needs numeric, got {:?}", other.data_type()),
1104                }),
1105            }
1106        }
1107        "coalesce" => {
1108            for a in args {
1109                if !matches!(a, Value::Null) {
1110                    return Ok(a.clone());
1111                }
1112            }
1113            Ok(Value::Null)
1114        }
1115        "date_trunc" => date_trunc(args),
1116        "date_part" => date_part(args),
1117        "age" => age(args),
1118        "to_char" => to_char(args),
1119        // v7.17.0 Phase 3.P0-29 — MySQL time aliases. WordPress,
1120        // Laravel, mysql-connector-python emit these constantly.
1121        // `unix_timestamp()` (bare) is folded by clock_replacement_for
1122        // into a BigInt literal — this arm only handles the 1-arg
1123        // form (TIMESTAMP / DATE → epoch seconds).
1124        "date_format" => date_format_mysql(args),
1125        "unix_timestamp" => unix_timestamp_of(args),
1126        "from_unixtime" => from_unixtime(args),
1127        // v7.17.0 Phase 3.8 — PG `format(fmt, args…)` sprintf-style.
1128        // Conversion specifiers: `%s` (literal string from arg),
1129        // `%I` (quoted identifier), `%L` (quoted SQL literal),
1130        // `%%` (literal `%`). `%n$X` argument-position prefix
1131        // (1-based). NULL arg → empty string for %s; NULL for %I
1132        // is an error in PG; NULL for %L renders as the SQL
1133        // literal `NULL`. Args missing for a position → error.
1134        "format" => format_string(args),
1135        // PG `concat(args...)` — variadic; coerces every arg to
1136        // its text representation; NULL arguments are silently
1137        // skipped (the canonical PG semantic — `concat()` is the
1138        // NULL-tolerant counterpart to the `||` operator which
1139        // propagates NULL).
1140        //
1141        // Reference:
1142        //   https://www.postgresql.org/docs/current/functions-string.html
1143        //   "Concatenates the text representations of all the
1144        //   arguments. NULL arguments are ignored."
1145        //
1146        // Edge cases:
1147        //   * `concat()` (no args) → ''
1148        //   * Every arg NULL → '' (NEVER returns NULL — distinct
1149        //     from `||` and from `array_agg`)
1150        //   * Bool → PG single-char form 't' / 'f'
1151        //   * SmallInt / Int / BigInt / Float / Numeric / Date /
1152        //     Timestamp / Json / Bytes → their canonical text
1153        //     rendering (shared with `format()`'s %s specifier
1154        //     via `value_to_format_text`).
1155        "concat" => {
1156            let mut out = String::new();
1157            for v in args {
1158                if matches!(v, Value::Null) {
1159                    continue;
1160                }
1161                out.push_str(&value_to_format_text(v));
1162            }
1163            Ok(Value::Text(out))
1164        }
1165        // PG `concat_ws(sep, val1 [, val2 ...])` — like concat but
1166        // with a separator inserted between each pair of NON-NULL
1167        // arguments. Critical semantic subtleties:
1168        //   * NULL separator → NULL result (the sep position is
1169        //     mandatory and poison-prone; this is the ONLY way
1170        //     concat_ws can return NULL).
1171        //   * NULL data args silently SKIPPED — the separator is
1172        //     NOT inserted around them. `concat_ws(',', 'a', NULL,
1173        //     'b')` → `'a,b'`, not `'a,,b'`.
1174        //   * Empty-string data args are KEPT (separator placed
1175        //     around them). `concat_ws(',', 'a', '', 'b')` →
1176        //     `'a,,b'`. Distinction with NULL matters for code
1177        //     like `concat_ws(', ', first_name, middle_name,
1178        //     last_name)`.
1179        //   * 0 args → arity error (sep is mandatory).
1180        //   * Only sep (no data) → '' (NOT NULL — distinct from
1181        //     the all-NULL data case which also returns '').
1182        //
1183        // Reference:
1184        //   https://www.postgresql.org/docs/current/functions-string.html
1185        // PG `trim` / `ltrim` / `rtrim` / `btrim`.
1186        //
1187        // Semantic anchors (PG-canonical):
1188        //   * Default chars set is the ASCII SPACE only (NOT the
1189        //     POSIX whitespace class — tab / newline / form-feed
1190        //     stay put unless explicitly listed in `chars`).
1191        //   * `chars` arg is a UTF-8 codepoint SET — any char in
1192        //     the set is stripped, not the substring.
1193        //   * `trim(s)` == `btrim(s)` == strip both ends.
1194        //   * `ltrim(s, c)` / `rtrim(s, c)` strip only the named
1195        //     side; inner occurrences are preserved.
1196        //   * NULL on EITHER arg → NULL result.
1197        //   * Non-text input is coerced via `value_to_format_text`
1198        //     so trim(42) returns '42'.
1199        //
1200        // Reference:
1201        //   https://www.postgresql.org/docs/current/functions-string.html
1202        // PG `replace(string, from, to)` — substring substitution
1203        // for every (non-overlapping, greedy left-to-right)
1204        // occurrence. Empty `from` passes input through unchanged
1205        // (PG behavior — avoids infinite loop). Inserted text is
1206        // NOT re-scanned for new matches (so `replace('a', 'a',
1207        // 'aa')` terminates at `'aa'`, not blows up). NULL on any
1208        // arg poisons.
1209        // PG `split_part(string, delimiter, n)` — split on delim,
1210        // return the n-th field (1-indexed). Negative n counts
1211        // from the end (PG 14+). Out-of-range n → '' (NOT NULL).
1212        // n = 0 → error. Empty delimiter → error. NULL on any
1213        // arg → NULL.
1214        // PG `repeat(string, n)` — duplicate the input N times.
1215        // n=0 → ''; n<0 → '' (PG does NOT error on negative);
1216        // NULL on any arg → NULL.
1217        // PG `lpad(string, length [, fill])` / `rpad(...)`.
1218        // length is the target CODEPOINT count. Truncation when
1219        // input longer (lpad keeps the LEFT side, rpad keeps
1220        // LEFT too — both wait truncate from the right side per
1221        // PG-verified behavior). Padding when shorter, using
1222        // `fill` (default SPACE) cycling for multi-char fills.
1223        // length<=0 → ''. Empty fill + needs padding → returns
1224        // input verbatim (potentially truncated). NULL on any
1225        // arg → NULL.
1226        // PG `strpos(string, substring)` — same as position()
1227        // but with reversed arg order. PG convention is
1228        // strpos(haystack, needle); position(needle, haystack).
1229        // Both are 1-indexed; 0 = not found; codepoint-counted.
1230        // PG `left(string, n)` / `right(string, n)` — head/tail
1231        // substring helpers. Negative n means "all but last/first
1232        // |n| chars" — slice from the OPPOSITE side. n=0 → ''.
1233        // Codepoint-counted. NULL on any arg → NULL.
1234        // PG `floor(x)` — largest integer <= x.
1235        //   * Negative floats floor TOWARD -infinity, NOT toward 0.
1236        //   * Integer types passthrough unchanged.
1237        //   * NULL → NULL.
1238        // PG `ceil(x)` / `ceiling(x)` — smallest integer >= x.
1239        //   * Negative floats round TOWARD zero (toward +inf):
1240        //     ceil(-1.5) → -1, NOT -2.
1241        //   * Integer types passthrough unchanged.
1242        //   * NULL → NULL.
1243        // PG `round(x)` / `round(x, scale)` — half-away-from-zero
1244        // rounding (NUMERIC semantic).
1245        //   * round(0.5) → 1; round(-0.5) → -1; round(2.5) → 3.
1246        //   * Two-arg form rounds to N decimal places (n>0) or to
1247        //     nearest 10^|n| (n<0).
1248        //   * Integer types passthrough unchanged.
1249        //   * NULL on any arg → NULL.
1250        // PG `trunc(x)` / `trunc(x, scale)` — truncate TOWARD zero.
1251        //   * Distinct from floor() which rounds toward -inf:
1252        //     trunc(-1.7)→-1; floor(-1.7)→-2.
1253        //   * Distinct from round() which rounds half-away:
1254        //     trunc(1.5)→1; round(1.5)→2.
1255        //   * Two-arg form truncates to N decimal places (or 10^|n|
1256        //     for negative n).
1257        //   * Integer types passthrough unchanged.
1258        //   * NULL on any arg → NULL.
1259        // PG `nullif(a, b)` — returns NULL if a = b, else a.
1260        // Canonical use cases:
1261        //   * Divide-by-zero protection: `x / nullif(y, 0)`
1262        //   * Empty-string normalisation: `nullif(field, '')`
1263        // Edge: nullif(NULL, NULL) returns NULL. nullif(NULL, x)
1264        // returns NULL. nullif(x, NULL) returns x (since NULL is
1265        // not == to anything per IS DISTINCT FROM semantic, x ≠ NULL).
1266        // PG `greatest(...)` / `least(...)` — variadic max/min.
1267        // NULL args silently skipped (PG-canonical). All-NULL → NULL.
1268        // Cross-type widening for numeric comparisons.
1269        // PG `mod(y, x)` — modulo. Result sign follows dividend.
1270        //   * mod(7, 3) = 1
1271        //   * mod(-7, 3) = -1
1272        //   * mod(7, -3) = 1
1273        //   * mod(-7, -3) = -1
1274        // Division by zero → error. NULL on any arg → NULL.
1275        // PG `power(x, y)` / `pow(x, y)` — x^y.
1276        // Integer exponent → exact via repeated multiplication
1277        // (no precision loss). Fractional exponent → exp(y*ln(x))
1278        // via the no_std exp/ln series helpers.
1279        // x=0 with negative y → error (1/0). NULL → NULL.
1280        // PG `sqrt(x)` — square root. Negative input → error.
1281        // PG `sign(x)` — -1 / 0 / 1.
1282        // PG `random()` — uniform float in [0, 1). Per-row /
1283        // per-call: each evaluation returns a different value
1284        // even within the same statement. Backed by a xorshift64*
1285        // PRNG with a process-static seed; not cryptographically
1286        // secure (use a cryptographic source for security tokens).
1287        "random" => {
1288            if !args.is_empty() {
1289                return Err(EvalError::TypeMismatch {
1290                    detail: alloc::format!("random() takes 0 args, got {}", args.len()),
1291                });
1292            }
1293            Ok(Value::Float(prng_next_f64()))
1294        }
1295        // v7.17.0 — PG `gen_random_uuid()` (built-in, no extension)
1296        // and the historical uuid-ossp `uuid_generate_v4()` alias.
1297        // Both produce a RFC 4122 v4 (random) UUID. This is the
1298        // function Django / Rails / Hibernate emit in `id UUID
1299        // PRIMARY KEY DEFAULT gen_random_uuid()`, the modern
1300        // default PK pattern.
1301        "gen_random_uuid" | "uuid_generate_v4" => {
1302            if !args.is_empty() {
1303                return Err(EvalError::TypeMismatch {
1304                    detail: alloc::format!("{name}() takes 0 args, got {}", args.len()),
1305                });
1306            }
1307            Ok(Value::Uuid(gen_random_uuid_bytes()))
1308        }
1309        "sign" => {
1310            if args.len() != 1 {
1311                return Err(EvalError::TypeMismatch {
1312                    detail: alloc::format!("sign() takes 1 arg, got {}", args.len()),
1313                });
1314            }
1315            match &args[0] {
1316                Value::Null => Ok(Value::Null),
1317                Value::SmallInt(n) => Ok(Value::SmallInt(n.signum())),
1318                Value::Int(n) => Ok(Value::Int(n.signum())),
1319                Value::BigInt(n) => Ok(Value::BigInt(n.signum())),
1320                Value::Float(x) => {
1321                    let s = if *x > 0.0 {
1322                        1.0
1323                    } else if *x < 0.0 {
1324                        -1.0
1325                    } else {
1326                        0.0
1327                    };
1328                    Ok(Value::Float(s))
1329                }
1330                Value::Numeric { scaled, scale } => {
1331                    let s = scaled.signum();
1332                    Ok(Value::Numeric {
1333                        scaled: s * pow10_i128(*scale),
1334                        scale: *scale,
1335                    })
1336                }
1337                other => Err(EvalError::TypeMismatch {
1338                    detail: alloc::format!("sign() needs numeric, got {:?}", other.data_type()),
1339                }),
1340            }
1341        }
1342        "sqrt" => {
1343            if args.len() != 1 {
1344                return Err(EvalError::TypeMismatch {
1345                    detail: alloc::format!("sqrt() takes 1 arg, got {}", args.len()),
1346                });
1347            }
1348            match &args[0] {
1349                Value::Null => Ok(Value::Null),
1350                v => {
1351                    let x = value_to_f64(v).ok_or_else(|| EvalError::TypeMismatch {
1352                        detail: alloc::format!("sqrt() needs numeric, got {:?}", v.data_type()),
1353                    })?;
1354                    if x < 0.0 {
1355                        return Err(EvalError::TypeMismatch {
1356                            detail: "sqrt(): negative input outside real domain".into(),
1357                        });
1358                    }
1359                    if x == 0.0 {
1360                        return Ok(Value::Float(0.0));
1361                    }
1362                    Ok(Value::Float(f64_sqrt(x)))
1363                }
1364            }
1365        }
1366        "power" | "pow" => {
1367            if args.len() != 2 {
1368                return Err(EvalError::TypeMismatch {
1369                    detail: alloc::format!("power() takes 2 args, got {}", args.len()),
1370                });
1371            }
1372            if args.iter().any(|v| matches!(v, Value::Null)) {
1373                return Ok(Value::Null);
1374            }
1375            let x = value_to_f64(&args[0]).ok_or_else(|| EvalError::TypeMismatch {
1376                detail: "power() needs numeric x".into(),
1377            })?;
1378            let y = value_to_f64(&args[1]).ok_or_else(|| EvalError::TypeMismatch {
1379                detail: "power() needs numeric y".into(),
1380            })?;
1381            // Integer-exponent fast path.
1382            let y_int = y as i32;
1383            if (y_int as f64) == y && y.abs() < 1024.0 {
1384                let result = f64_powi(x, y_int);
1385                return Ok(Value::Float(result));
1386            }
1387            // Fractional exponent — only defined for x >= 0 in real
1388            // arithmetic. Negative x raised to fractional power is
1389            // complex; reject cleanly.
1390            if x < 0.0 {
1391                return Err(EvalError::TypeMismatch {
1392                    detail: "power(): negative base with fractional exponent yields complex result"
1393                        .into(),
1394                });
1395            }
1396            if x == 0.0 && y < 0.0 {
1397                return Err(EvalError::TypeMismatch {
1398                    detail: "power(): 0 raised to negative power is undefined".into(),
1399                });
1400            }
1401            if x == 0.0 {
1402                return Ok(Value::Float(0.0));
1403            }
1404            Ok(Value::Float(f64_exp(y * f64_ln(x))))
1405        }
1406        "mod" => {
1407            if args.len() != 2 {
1408                return Err(EvalError::TypeMismatch {
1409                    detail: alloc::format!("mod() takes 2 args, got {}", args.len()),
1410                });
1411            }
1412            if args.iter().any(|v| matches!(v, Value::Null)) {
1413                return Ok(Value::Null);
1414            }
1415            let to_i64 = |v: &Value| -> Result<i64, EvalError> {
1416                match v {
1417                    Value::SmallInt(x) => Ok(i64::from(*x)),
1418                    Value::Int(x) => Ok(i64::from(*x)),
1419                    Value::BigInt(x) => Ok(*x),
1420                    other => Err(EvalError::TypeMismatch {
1421                        detail: alloc::format!("mod() needs integer, got {:?}", other.data_type()),
1422                    }),
1423                }
1424            };
1425            let y = to_i64(&args[0])?;
1426            let x = to_i64(&args[1])?;
1427            if x == 0 {
1428                return Err(EvalError::TypeMismatch {
1429                    detail: "mod(): division by zero".into(),
1430                });
1431            }
1432            // Rust's `%` operator on signed integers follows the
1433            // dividend's sign — same as PG.
1434            let result = y % x;
1435            // Pick the narrowest type that holds the result.
1436            if let Ok(small) = i16::try_from(result) {
1437                if matches!(args[0], Value::SmallInt(_)) && matches!(args[1], Value::SmallInt(_)) {
1438                    return Ok(Value::SmallInt(small));
1439                }
1440            }
1441            if let Ok(int_) = i32::try_from(result) {
1442                if !matches!(args[0], Value::BigInt(_)) && !matches!(args[1], Value::BigInt(_)) {
1443                    return Ok(Value::Int(int_));
1444                }
1445            }
1446            Ok(Value::BigInt(result))
1447        }
1448        "greatest" | "least" => {
1449            if args.is_empty() {
1450                return Err(EvalError::TypeMismatch {
1451                    detail: alloc::format!(
1452                        "{lc}() takes at least 1 arg",
1453                        lc = if name.eq_ignore_ascii_case("greatest") {
1454                            "greatest"
1455                        } else {
1456                            "least"
1457                        }
1458                    ),
1459                });
1460            }
1461            let non_null: alloc::vec::Vec<&Value> =
1462                args.iter().filter(|v| !matches!(v, Value::Null)).collect();
1463            if non_null.is_empty() {
1464                return Ok(Value::Null);
1465            }
1466            let is_greatest = name.eq_ignore_ascii_case("greatest");
1467            let mut best = non_null[0].clone();
1468            for v in &non_null[1..] {
1469                let ord = value_cmp_for_min_max(&best, v);
1470                let take = if is_greatest {
1471                    ord == core::cmp::Ordering::Less
1472                } else {
1473                    ord == core::cmp::Ordering::Greater
1474                };
1475                if take {
1476                    best = (*v).clone();
1477                }
1478            }
1479            Ok(best)
1480        }
1481        // MySQL `ifnull(a, b)` — alias for coalesce(a, b).
1482        // Used by every ORM with a MySQL target (Hibernate /
1483        // Laravel / Sequelize).
1484        "ifnull" => {
1485            if args.len() != 2 {
1486                return Err(EvalError::TypeMismatch {
1487                    detail: alloc::format!("ifnull() takes 2 args, got {}", args.len()),
1488                });
1489            }
1490            for v in args {
1491                if !matches!(v, Value::Null) {
1492                    return Ok(v.clone());
1493                }
1494            }
1495            Ok(Value::Null)
1496        }
1497        // MySQL `if(cond, then, else)` — alias for CASE WHEN.
1498        // NULL condition → else branch (MySQL semantic).
1499        // Integer condition: nonzero is true.
1500        "if" => {
1501            if args.len() != 3 {
1502                return Err(EvalError::TypeMismatch {
1503                    detail: alloc::format!(
1504                        "if() takes 3 args (cond, then, else), got {}",
1505                        args.len()
1506                    ),
1507                });
1508            }
1509            let truthy = match &args[0] {
1510                Value::Null => false,
1511                Value::Bool(b) => *b,
1512                Value::SmallInt(n) => *n != 0,
1513                Value::Int(n) => *n != 0,
1514                Value::BigInt(n) => *n != 0,
1515                Value::Float(x) => *x != 0.0,
1516                Value::Text(s) => !s.is_empty() && s != "0",
1517                _ => true,
1518            };
1519            if truthy {
1520                Ok(args[1].clone())
1521            } else {
1522                Ok(args[2].clone())
1523            }
1524        }
1525        "nullif" => {
1526            if args.len() != 2 {
1527                return Err(EvalError::TypeMismatch {
1528                    detail: alloc::format!("nullif() takes 2 args, got {}", args.len()),
1529                });
1530            }
1531            match (&args[0], &args[1]) {
1532                (Value::Null, _) => Ok(Value::Null),
1533                (a, Value::Null) => Ok(a.clone()),
1534                (a, b) => {
1535                    // Use value_cmp (already defined as Ord-like
1536                    // function in lib.rs) — but it's not accessible
1537                    // here. Fall back to direct equality.
1538                    if values_equal_for_nullif(a, b) {
1539                        Ok(Value::Null)
1540                    } else {
1541                        Ok(a.clone())
1542                    }
1543                }
1544            }
1545        }
1546        "trunc" => {
1547            match args.len() {
1548                1 => match &args[0] {
1549                    Value::Null => Ok(Value::Null),
1550                    Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1551                    Value::Float(x) => Ok(Value::Float(f64_trunc(*x))),
1552                    Value::Numeric { scaled, scale } => {
1553                        let factor = pow10_i128(*scale);
1554                        // Truncate toward zero — sign-preserving division.
1555                        let q = scaled / factor;
1556                        Ok(Value::Numeric {
1557                            scaled: q * factor,
1558                            scale: *scale,
1559                        })
1560                    }
1561                    other => Err(EvalError::TypeMismatch {
1562                        detail: alloc::format!(
1563                            "trunc() needs numeric, got {:?}",
1564                            other.data_type()
1565                        ),
1566                    }),
1567                },
1568                2 => {
1569                    if args.iter().any(|v| matches!(v, Value::Null)) {
1570                        return Ok(Value::Null);
1571                    }
1572                    let n = match &args[1] {
1573                        Value::SmallInt(x) => i32::from(*x),
1574                        Value::Int(x) => *x,
1575                        Value::BigInt(x) => {
1576                            i32::try_from(*x).map_err(|_| EvalError::TypeMismatch {
1577                                detail: "trunc(): scale must fit in i32".into(),
1578                            })?
1579                        }
1580                        other => {
1581                            return Err(EvalError::TypeMismatch {
1582                                detail: alloc::format!(
1583                                    "trunc(): scale must be integer, got {:?}",
1584                                    other.data_type()
1585                                ),
1586                            });
1587                        }
1588                    };
1589                    let x = match &args[0] {
1590                        Value::SmallInt(v) => f64::from(*v),
1591                        Value::Int(v) => f64::from(*v),
1592                        Value::BigInt(v) => *v as f64,
1593                        Value::Float(v) => *v,
1594                        Value::Numeric { scaled, scale } => {
1595                            (*scaled as f64) / f64_powi(10.0, i32::from(*scale))
1596                        }
1597                        other => {
1598                            return Err(EvalError::TypeMismatch {
1599                                detail: alloc::format!(
1600                                    "trunc() needs numeric x, got {:?}",
1601                                    other.data_type()
1602                                ),
1603                            });
1604                        }
1605                    };
1606                    let result = if n >= 0 {
1607                        let factor = f64_powi(10.0, n);
1608                        f64_trunc(x * factor) / factor
1609                    } else {
1610                        let factor = f64_powi(10.0, -n);
1611                        f64_trunc(x / factor) * factor
1612                    };
1613                    Ok(Value::Float(result))
1614                }
1615                _ => Err(EvalError::TypeMismatch {
1616                    detail: alloc::format!("trunc() takes 1 or 2 args, got {}", args.len()),
1617                }),
1618            }
1619        }
1620        "round" => {
1621            match args.len() {
1622                1 => match &args[0] {
1623                    Value::Null => Ok(Value::Null),
1624                    Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1625                    Value::Float(x) => Ok(Value::Float(f64_round_half_away(*x))),
1626                    Value::Numeric { scaled, scale } => {
1627                        let factor = pow10_i128(*scale);
1628                        let q = scaled.div_euclid(factor);
1629                        let r = scaled.rem_euclid(factor);
1630                        // Half-away-from-zero: if 2*r >= factor → round up.
1631                        let result = if 2 * r >= factor { q + 1 } else { q };
1632                        Ok(Value::Numeric {
1633                            scaled: result * factor,
1634                            scale: *scale,
1635                        })
1636                    }
1637                    other => Err(EvalError::TypeMismatch {
1638                        detail: alloc::format!(
1639                            "round() needs numeric, got {:?}",
1640                            other.data_type()
1641                        ),
1642                    }),
1643                },
1644                2 => {
1645                    if args.iter().any(|v| matches!(v, Value::Null)) {
1646                        return Ok(Value::Null);
1647                    }
1648                    let n = match &args[1] {
1649                        Value::SmallInt(x) => i32::from(*x),
1650                        Value::Int(x) => *x,
1651                        Value::BigInt(x) => {
1652                            i32::try_from(*x).map_err(|_| EvalError::TypeMismatch {
1653                                detail: "round(): scale must fit in i32".into(),
1654                            })?
1655                        }
1656                        other => {
1657                            return Err(EvalError::TypeMismatch {
1658                                detail: alloc::format!(
1659                                    "round(): scale must be integer, got {:?}",
1660                                    other.data_type()
1661                                ),
1662                            });
1663                        }
1664                    };
1665                    // Convert input to f64 for arithmetic
1666                    // simplicity (PG does NUMERIC math here but
1667                    // SPG's f64 path matches the dominant
1668                    // customer expectation for round(N, scale)
1669                    // patterns).
1670                    let x = match &args[0] {
1671                        Value::SmallInt(v) => f64::from(*v),
1672                        Value::Int(v) => f64::from(*v),
1673                        Value::BigInt(v) => *v as f64,
1674                        Value::Float(v) => *v,
1675                        Value::Numeric { scaled, scale } => {
1676                            (*scaled as f64) / f64_powi(10.0, i32::from(*scale))
1677                        }
1678                        other => {
1679                            return Err(EvalError::TypeMismatch {
1680                                detail: alloc::format!(
1681                                    "round() needs numeric x, got {:?}",
1682                                    other.data_type()
1683                                ),
1684                            });
1685                        }
1686                    };
1687                    // Avoid float precision drift from the
1688                    // 10^(-k) reciprocal — for n<0 work with the
1689                    // positive-exponent form: round(x / 10^|n|) *
1690                    // 10^|n|.
1691                    let result = if n >= 0 {
1692                        let factor = f64_powi(10.0, n);
1693                        f64_round_half_away(x * factor) / factor
1694                    } else {
1695                        let factor = f64_powi(10.0, -n);
1696                        f64_round_half_away(x / factor) * factor
1697                    };
1698                    Ok(Value::Float(result))
1699                }
1700                _ => Err(EvalError::TypeMismatch {
1701                    detail: alloc::format!("round() takes 1 or 2 args, got {}", args.len()),
1702                }),
1703            }
1704        }
1705        "ceil" | "ceiling" => {
1706            if args.len() != 1 {
1707                return Err(EvalError::TypeMismatch {
1708                    detail: alloc::format!("ceil() takes 1 arg, got {}", args.len()),
1709                });
1710            }
1711            match &args[0] {
1712                Value::Null => Ok(Value::Null),
1713                Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1714                Value::Float(x) => Ok(Value::Float(f64_ceil(*x))),
1715                Value::Numeric { scaled, scale } => {
1716                    let factor = pow10_i128(*scale);
1717                    let q = scaled.div_euclid(factor);
1718                    let r = scaled.rem_euclid(factor);
1719                    let result = if r == 0 { q } else { q + 1 };
1720                    Ok(Value::Numeric {
1721                        scaled: result * factor,
1722                        scale: *scale,
1723                    })
1724                }
1725                other => Err(EvalError::TypeMismatch {
1726                    detail: alloc::format!("ceil() needs numeric, got {:?}", other.data_type()),
1727                }),
1728            }
1729        }
1730        "floor" => {
1731            if args.len() != 1 {
1732                return Err(EvalError::TypeMismatch {
1733                    detail: alloc::format!("floor() takes 1 arg, got {}", args.len()),
1734                });
1735            }
1736            match &args[0] {
1737                Value::Null => Ok(Value::Null),
1738                Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_) => Ok(args[0].clone()),
1739                Value::Float(x) => Ok(Value::Float(f64_floor(*x))),
1740                Value::Numeric { scaled, scale } => {
1741                    let factor = pow10_i128(*scale);
1742                    let q = scaled.div_euclid(factor);
1743                    // div_euclid rounds toward -infinity which is
1744                    // exactly the floor semantic — perfect for
1745                    // negative values.
1746                    Ok(Value::Numeric {
1747                        scaled: q * factor,
1748                        scale: *scale,
1749                    })
1750                }
1751                other => Err(EvalError::TypeMismatch {
1752                    detail: alloc::format!("floor() needs numeric, got {:?}", other.data_type()),
1753                }),
1754            }
1755        }
1756        "left" => string_left_right(args, true, "left"),
1757        "right" => string_left_right(args, false, "right"),
1758        "strpos" => {
1759            if args.len() != 2 {
1760                return Err(EvalError::TypeMismatch {
1761                    detail: alloc::format!(
1762                        "strpos() takes 2 args (haystack, needle), got {}",
1763                        args.len()
1764                    ),
1765                });
1766            }
1767            if args.iter().any(|v| matches!(v, Value::Null)) {
1768                return Ok(Value::Null);
1769            }
1770            let haystack = value_to_format_text(&args[0]);
1771            let needle = value_to_format_text(&args[1]);
1772            if needle.is_empty() {
1773                return Ok(Value::Int(1));
1774            }
1775            let h_chars: Vec<char> = haystack.chars().collect();
1776            let n_chars: Vec<char> = needle.chars().collect();
1777            if n_chars.len() > h_chars.len() {
1778                return Ok(Value::Int(0));
1779            }
1780            for i in 0..=h_chars.len() - n_chars.len() {
1781                if h_chars[i..i + n_chars.len()] == n_chars[..] {
1782                    return Ok(Value::Int(i32::try_from(i + 1).unwrap_or(i32::MAX)));
1783                }
1784            }
1785            Ok(Value::Int(0))
1786        }
1787        "lpad" => string_pad(args, true, "lpad"),
1788        "rpad" => string_pad(args, false, "rpad"),
1789        "repeat" => {
1790            if args.len() != 2 {
1791                return Err(EvalError::TypeMismatch {
1792                    detail: alloc::format!("repeat() takes 2 args, got {}", args.len()),
1793                });
1794            }
1795            if args.iter().any(|v| matches!(v, Value::Null)) {
1796                return Ok(Value::Null);
1797            }
1798            let s = value_to_format_text(&args[0]);
1799            let n = match &args[1] {
1800                Value::SmallInt(x) => i64::from(*x),
1801                Value::Int(x) => i64::from(*x),
1802                Value::BigInt(x) => *x,
1803                other => {
1804                    return Err(EvalError::TypeMismatch {
1805                        detail: alloc::format!(
1806                            "repeat(): n must be integer, got {:?}",
1807                            other.data_type()
1808                        ),
1809                    });
1810                }
1811            };
1812            if n <= 0 {
1813                return Ok(Value::Text(String::new()));
1814            }
1815            // Safety cap so a runaway argument doesn't allocate
1816            // terabytes. PG itself enforces a similar cap via
1817            // work_mem; SPG inherits a defensive 64MiB cap.
1818            const MAX_REPEAT_BYTES: usize = 64 * 1024 * 1024;
1819            let needed =
1820                s.len()
1821                    .checked_mul(n as usize)
1822                    .ok_or_else(|| EvalError::TypeMismatch {
1823                        detail: "repeat(): result size overflows usize".into(),
1824                    })?;
1825            if needed > MAX_REPEAT_BYTES {
1826                return Err(EvalError::TypeMismatch {
1827                    detail: alloc::format!(
1828                        "repeat(): result would exceed {MAX_REPEAT_BYTES} bytes"
1829                    ),
1830                });
1831            }
1832            Ok(Value::Text(s.repeat(n as usize)))
1833        }
1834        "split_part" => {
1835            if args.len() != 3 {
1836                return Err(EvalError::TypeMismatch {
1837                    detail: alloc::format!(
1838                        "split_part() takes 3 args (string, delim, n), got {}",
1839                        args.len()
1840                    ),
1841                });
1842            }
1843            if args.iter().any(|v| matches!(v, Value::Null)) {
1844                return Ok(Value::Null);
1845            }
1846            let s = value_to_format_text(&args[0]);
1847            let delim = value_to_format_text(&args[1]);
1848            if delim.is_empty() {
1849                return Err(EvalError::TypeMismatch {
1850                    detail: "split_part(): delimiter cannot be empty".into(),
1851                });
1852            }
1853            let n = match &args[2] {
1854                Value::SmallInt(x) => i64::from(*x),
1855                Value::Int(x) => i64::from(*x),
1856                Value::BigInt(x) => *x,
1857                other => {
1858                    return Err(EvalError::TypeMismatch {
1859                        detail: alloc::format!(
1860                            "split_part(): n must be integer, got {:?}",
1861                            other.data_type()
1862                        ),
1863                    });
1864                }
1865            };
1866            if n == 0 {
1867                return Err(EvalError::TypeMismatch {
1868                    detail: "split_part(): n must be nonzero (PG: 1-indexed)".into(),
1869                });
1870            }
1871            let parts: alloc::vec::Vec<&str> = s.split(&delim[..]).collect();
1872            let total = parts.len() as i64;
1873            let idx = if n > 0 {
1874                n - 1
1875            } else {
1876                // n=-1 → last (idx = total - 1)
1877                total + n
1878            };
1879            if idx < 0 || idx >= total {
1880                return Ok(Value::Text(String::new()));
1881            }
1882            Ok(Value::Text(parts[idx as usize].to_string()))
1883        }
1884        // PG `translate(s, from, to)` — char-by-char positional
1885        // mapping. Each codepoint in `from` is replaced by the
1886        // codepoint at the same index in `to`. When `from` is
1887        // longer than `to`, the extra `from` codepoints are
1888        // DELETED (not replaced). When `from` has duplicates,
1889        // the FIRST occurrence's mapping wins. NULL → NULL.
1890        "translate" => {
1891            if args.len() != 3 {
1892                return Err(EvalError::TypeMismatch {
1893                    detail: alloc::format!("translate() takes 3 args, got {}", args.len()),
1894                });
1895            }
1896            if args.iter().any(|v| matches!(v, Value::Null)) {
1897                return Ok(Value::Null);
1898            }
1899            let s = value_to_format_text(&args[0]);
1900            let from = value_to_format_text(&args[1]);
1901            let to = value_to_format_text(&args[2]);
1902            let from_chars: Vec<char> = from.chars().collect();
1903            let to_chars: Vec<char> = to.chars().collect();
1904            // Build the codepoint map. First occurrence wins.
1905            let mut map: alloc::collections::BTreeMap<char, Option<char>> =
1906                alloc::collections::BTreeMap::new();
1907            for (i, &fc) in from_chars.iter().enumerate() {
1908                if map.contains_key(&fc) {
1909                    continue;
1910                }
1911                let replacement = to_chars.get(i).copied();
1912                map.insert(fc, replacement);
1913            }
1914            let mut out = String::with_capacity(s.len());
1915            for c in s.chars() {
1916                match map.get(&c) {
1917                    Some(Some(r)) => out.push(*r),
1918                    Some(None) => {} // mapped to "deleted"
1919                    None => out.push(c),
1920                }
1921            }
1922            Ok(Value::Text(out))
1923        }
1924        "replace" => {
1925            if args.len() != 3 {
1926                return Err(EvalError::TypeMismatch {
1927                    detail: alloc::format!(
1928                        "replace() takes 3 args (string, from, to), got {}",
1929                        args.len()
1930                    ),
1931                });
1932            }
1933            if args.iter().any(|v| matches!(v, Value::Null)) {
1934                return Ok(Value::Null);
1935            }
1936            let s = value_to_format_text(&args[0]);
1937            let from = value_to_format_text(&args[1]);
1938            let to = value_to_format_text(&args[2]);
1939            if from.is_empty() {
1940                return Ok(Value::Text(s));
1941            }
1942            // std `String::replace` matches PG semantics exactly:
1943            // non-overlapping, left-to-right, no re-scan of
1944            // inserted text. Sealed test surface verifies the
1945            // edge cases independently.
1946            Ok(Value::Text(s.replace(&from[..], &to)))
1947        }
1948        "trim" | "btrim" => string_trim(args, TrimSide::Both, "trim"),
1949        "ltrim" => string_trim(args, TrimSide::Left, "ltrim"),
1950        "rtrim" => string_trim(args, TrimSide::Right, "rtrim"),
1951        "concat_ws" => {
1952            if args.is_empty() {
1953                return Err(EvalError::TypeMismatch {
1954                    detail: "concat_ws() requires at least 1 arg (the separator)".into(),
1955                });
1956            }
1957            // NULL separator poisons the result.
1958            let sep = match &args[0] {
1959                Value::Null => return Ok(Value::Null),
1960                v => value_to_format_text(v),
1961            };
1962            let mut out = String::new();
1963            let mut first = true;
1964            for v in &args[1..] {
1965                if matches!(v, Value::Null) {
1966                    continue;
1967                }
1968                if first {
1969                    first = false;
1970                } else {
1971                    out.push_str(&sep);
1972                }
1973                out.push_str(&value_to_format_text(v));
1974            }
1975            Ok(Value::Text(out))
1976        }
1977        // v7.17.0 Phase 3.7 — PG regex function family.
1978        "regexp_matches" => regexp_matches(args),
1979        "regexp_replace" => regexp_replace(args),
1980        "regexp_split_to_array" => regexp_split_to_array(args),
1981        // v7.17.0 Phase 3.P0-28 — PG JSON builder family.
1982        // to_json / to_jsonb coerce any value to JSON text (NULL
1983        // becomes the JSON literal 'null', not SQL NULL).
1984        "to_json" | "to_jsonb" => {
1985            if args.len() != 1 {
1986                return Err(EvalError::TypeMismatch {
1987                    detail: alloc::format!("to_json() takes 1 arg, got {}", args.len()),
1988                });
1989            }
1990            // Json input passes through verbatim — PG identity.
1991            if let Value::Json(s) = &args[0] {
1992                return Ok(Value::Json(s.clone()));
1993            }
1994            Ok(Value::Json(crate::json::value_to_json_text(&args[0])))
1995        }
1996        "json_build_object" | "jsonb_build_object" => crate::json::build_object(args),
1997        "json_build_array" | "jsonb_build_array" => crate::json::build_array(args),
1998        "jsonb_set" | "json_set" => crate::json::set(args),
1999        "jsonb_insert" | "json_insert" => crate::json::insert(args),
2000        // v7.17.0 Phase 3.9 — PG `jsonb_path_query` family.
2001        "jsonb_path_query" | "json_path_query" => {
2002            if args.len() != 2 {
2003                return Err(EvalError::TypeMismatch {
2004                    detail: alloc::format!("jsonb_path_query() takes 2 args, got {}", args.len()),
2005                });
2006            }
2007            crate::json::path_query(&args[0], &args[1])
2008        }
2009        "jsonb_path_query_first" | "json_path_query_first" => {
2010            if args.len() != 2 {
2011                return Err(EvalError::TypeMismatch {
2012                    detail: alloc::format!(
2013                        "jsonb_path_query_first() takes 2 args, got {}",
2014                        args.len()
2015                    ),
2016                });
2017            }
2018            crate::json::path_query_first(&args[0], &args[1])
2019        }
2020        "jsonb_path_query_array" | "json_path_query_array" => {
2021            if args.len() != 2 {
2022                return Err(EvalError::TypeMismatch {
2023                    detail: alloc::format!(
2024                        "jsonb_path_query_array() takes 2 args, got {}",
2025                        args.len()
2026                    ),
2027                });
2028            }
2029            crate::json::path_query_array(&args[0], &args[1])
2030        }
2031        // v7.17.0 Phase 7 — INET / CIDR network helpers.
2032        "host" => inet_host(args),
2033        "network" => inet_network(args),
2034        "masklen" => inet_masklen(args),
2035        // v6.4.3 — encode/decode + error_on_null SQL function bundle.
2036        "encode" => encode_text(args),
2037        "decode" => decode_text(args),
2038        "error_on_null" => error_on_null(args),
2039        // v7.12.1 — PG full-text search lexer / tsquery builders.
2040        // mailrs G-CRIT-3 acceptance path: `to_tsvector('english',
2041        // … || ' ' || … || …)` runs end-to-end against a tsvector
2042        // column with Porter stemming + standard english stopwords.
2043        "to_tsvector" => fts_to_tsvector(args, ctx),
2044        "plainto_tsquery" => fts_plainto_tsquery(args, ctx),
2045        "phraseto_tsquery" => fts_phraseto_tsquery(args, ctx),
2046        "websearch_to_tsquery" => fts_websearch_to_tsquery(args, ctx),
2047        "to_tsquery" => fts_to_tsquery(args, ctx),
2048        // v7.12.2 — ranking functions. mailrs's fallback search
2049        // query ORDERs BY ts_rank(search_vector, q) DESC.
2050        "ts_rank" => fts_ts_rank(args),
2051        "ts_rank_cd" => fts_ts_rank_cd(args),
2052        // v7.14.0 — PG dump preamble emits
2053        // `SELECT pg_catalog.set_config('search_path', '', false);`
2054        // and friends. SPG is single-schema; accept-as-no-op
2055        // returning either the new value or NULL.
2056        "set_config" => Ok(args.get(1).cloned().unwrap_or(Value::Null)),
2057        "current_setting" => Ok(Value::Text(String::new())),
2058        // PG `pg_catalog.*` discovery / cast helpers commonly
2059        // emitted by ORMs probing the server. Accept-as-no-op
2060        // with sensible defaults so the dump preamble doesn't
2061        // fail. `pg_get_serial_sequence` returns NULL (no
2062        // sequence — SPG has AUTO_INCREMENT instead).
2063        "pg_get_serial_sequence" | "pg_get_constraintdef" | "pg_get_indexdef" => Ok(Value::Null),
2064        "version" => Ok(Value::Text("PostgreSQL 16 (SPG-compat)".into())),
2065        // v7.17.0 Phase 3.P0-30 — session / introspection functions.
2066        // Engine-level dispatch so these compose inside expressions
2067        // (`WHERE schemaname = current_schema()`, `SELECT *,
2068        // database() AS db FROM t`) — the pgwire layer's canned
2069        // shortcuts only catch the bare top-level SELECT shape.
2070        // SPG is single-database + single-schema; the values
2071        // mirror the wire-layer canned defaults.
2072        "current_database" | "database" => Ok(Value::Text("spg".into())),
2073        "current_schema" => Ok(Value::Text("public".into())),
2074        "current_user" | "session_user" | "user" => Ok(Value::Text("admin".into())),
2075        // v7.17.0 Phase 3.P0-31 — `pg_typeof(any)` returns the
2076        // canonical PG lowercase type name. sqlx / SQLAlchemy /
2077        // Diesel emit this during describe; generic ORMs may
2078        // branch on it (`CASE WHEN pg_typeof(x) = 'jsonb' ...`).
2079        // NULL has no resolved value-level type → 'unknown' per
2080        // PG semantics.
2081        "pg_typeof" => {
2082            if args.len() != 1 {
2083                return Err(EvalError::TypeMismatch {
2084                    detail: format!("pg_typeof() takes 1 arg, got {}", args.len()),
2085                });
2086            }
2087            Ok(Value::Text(pg_typeof_name(&args[0]).into()))
2088        }
2089        // v7.17.0 — `nextval` / `currval` / `setval` are handled
2090        // at the top of this match against the SequenceResolver.
2091        // `lastval()` (no-arg session memory) still degrades to
2092        // NULL pending a Phase 1.1b session tracker.
2093        "lastval" => Ok(Value::Null),
2094        // v7.15.0 — pg_trgm: similarity, show_trgm. Match PG
2095        // semantics: similarity returns Jaccard of trigram sets;
2096        // show_trgm returns the trigram set as TEXT[]. NULL on
2097        // any NULL arg.
2098        "similarity" => {
2099            if args.len() != 2 {
2100                return Err(EvalError::TypeMismatch {
2101                    detail: format!("similarity() takes 2 args, got {}", args.len()),
2102                });
2103            }
2104            if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2105                return Ok(Value::Null);
2106            }
2107            let a = match &args[0] {
2108                Value::Text(s) => s.as_str(),
2109                other => {
2110                    return Err(EvalError::TypeMismatch {
2111                        detail: format!("similarity() needs text, got {:?}", other.data_type()),
2112                    });
2113                }
2114            };
2115            let b = match &args[1] {
2116                Value::Text(s) => s.as_str(),
2117                other => {
2118                    return Err(EvalError::TypeMismatch {
2119                        detail: format!("similarity() needs text, got {:?}", other.data_type()),
2120                    });
2121                }
2122            };
2123            // PG returns REAL (f32) — we use Float (f64) and let
2124            // coerce_value narrow on assignment to a REAL column.
2125            Ok(Value::Float(spg_storage::trgm::similarity(a, b)))
2126        }
2127        "show_trgm" => {
2128            if args.len() != 1 {
2129                return Err(EvalError::TypeMismatch {
2130                    detail: format!("show_trgm() takes 1 arg, got {}", args.len()),
2131                });
2132            }
2133            if matches!(args[0], Value::Null) {
2134                return Ok(Value::Null);
2135            }
2136            let s = match &args[0] {
2137                Value::Text(s) => s.as_str(),
2138                other => {
2139                    return Err(EvalError::TypeMismatch {
2140                        detail: format!("show_trgm() needs text, got {:?}", other.data_type()),
2141                    });
2142                }
2143            };
2144            // PG returns the trigram set sorted lexicographically.
2145            // `extract_trigrams` already returns a BTreeSet so the
2146            // order is canonical.
2147            let trigrams: Vec<Option<String>> = spg_storage::trgm::extract_trigrams(s)
2148                .into_iter()
2149                .map(Some)
2150                .collect();
2151            Ok(Value::TextArray(trigrams))
2152        }
2153        other => Err(EvalError::TypeMismatch {
2154            detail: format!("unknown function `{other}`"),
2155        }),
2156    }
2157}
2158
2159/// v7.12.2 — `ts_rank([weights,] vec, query [, norm])`. v7.12.2
2160/// supports the canonical `(vec, query)` two-arg form mailrs uses;
2161/// optional weight-array / normalisation arguments error with an
2162/// "unsupported" message rather than silently changing semantics.
2163fn fts_ts_rank(args: &[Value]) -> Result<Value, EvalError> {
2164    let (vec, query) = parse_rank_args("ts_rank", args)?;
2165    match (vec, query) {
2166        (None, _) | (_, None) => Ok(Value::Null),
2167        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank(&v, &q)))),
2168    }
2169}
2170
2171fn fts_ts_rank_cd(args: &[Value]) -> Result<Value, EvalError> {
2172    let (vec, query) = parse_rank_args("ts_rank_cd", args)?;
2173    match (vec, query) {
2174        (None, _) | (_, None) => Ok(Value::Null),
2175        (Some(v), Some(q)) => Ok(Value::Float(f64::from(crate::fts::ts_rank_cd(&v, &q)))),
2176    }
2177}
2178
2179fn parse_rank_args(
2180    name: &str,
2181    args: &[Value],
2182) -> Result<
2183    (
2184        Option<Vec<spg_storage::TsLexeme>>,
2185        Option<spg_storage::TsQueryAst>,
2186    ),
2187    EvalError,
2188> {
2189    if args.len() != 2 {
2190        return Err(EvalError::TypeMismatch {
2191            detail: format!(
2192                "{name}() takes 2 args in v7.12.2 (weights array + normalisation flag are v7.12.x carve-out), got {}",
2193                args.len()
2194            ),
2195        });
2196    }
2197    let vec = match &args[0] {
2198        Value::Null => None,
2199        Value::TsVector(v) => Some(v.clone()),
2200        other => {
2201            return Err(EvalError::TypeMismatch {
2202                detail: format!(
2203                    "{name}() first arg must be tsvector, got {:?}",
2204                    other.data_type()
2205                ),
2206            });
2207        }
2208    };
2209    let query = match &args[1] {
2210        Value::Null => None,
2211        Value::TsQuery(q) => Some(q.clone()),
2212        other => {
2213            return Err(EvalError::TypeMismatch {
2214                detail: format!(
2215                    "{name}() second arg must be tsquery, got {:?}",
2216                    other.data_type()
2217                ),
2218            });
2219        }
2220    };
2221    Ok((vec, query))
2222}
2223
2224/// v7.12.2 — `tsvector @@ tsquery` match operator. Either
2225/// ordering accepted (PG semantics). NULL on either side → NULL.
2226/// Anything that isn't tsvector/tsquery on either side is a type
2227/// mismatch. Returns BOOL.
2228fn ts_match(l: Value, r: Value) -> Result<Value, EvalError> {
2229    let (vec, query) = match (l, r) {
2230        (Value::Null, _) | (_, Value::Null) => return Ok(Value::Null),
2231        (Value::TsVector(v), Value::TsQuery(q)) => (v, q),
2232        (Value::TsQuery(q), Value::TsVector(v)) => (v, q),
2233        (l, r) => {
2234            return Err(EvalError::TypeMismatch {
2235                detail: format!(
2236                    "@@ requires (tsvector, tsquery), got ({:?}, {:?})",
2237                    l.data_type(),
2238                    r.data_type()
2239                ),
2240            });
2241        }
2242    };
2243    Ok(Value::Bool(crate::fts::ts_query_matches(&vec, &query)))
2244}
2245
2246/// v7.12.1 — `to_tsvector([config,] text)`. With one arg the
2247/// session-resolved `default_text_search_config` is used (defaults
2248/// to `simple` when unset); with two args the first picks the
2249/// config. NULL text → NULL.
2250fn fts_to_tsvector(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2251    let (config, text) = parse_fts_args("to_tsvector", args, ctx)?;
2252    match text {
2253        None => Ok(Value::Null),
2254        Some(t) => Ok(Value::TsVector(crate::fts::to_tsvector(config, &t))),
2255    }
2256}
2257
2258fn fts_plainto_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2259    let (config, text) = parse_fts_args("plainto_tsquery", args, ctx)?;
2260    match text {
2261        None => Ok(Value::Null),
2262        Some(t) => Ok(Value::TsQuery(crate::fts::plainto_tsquery(config, &t))),
2263    }
2264}
2265
2266fn fts_phraseto_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2267    let (config, text) = parse_fts_args("phraseto_tsquery", args, ctx)?;
2268    match text {
2269        None => Ok(Value::Null),
2270        Some(t) => Ok(Value::TsQuery(crate::fts::phraseto_tsquery(config, &t))),
2271    }
2272}
2273
2274fn fts_websearch_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2275    let (config, text) = parse_fts_args("websearch_to_tsquery", args, ctx)?;
2276    match text {
2277        None => Ok(Value::Null),
2278        Some(t) => Ok(Value::TsQuery(crate::fts::websearch_to_tsquery(config, &t))),
2279    }
2280}
2281
2282fn fts_to_tsquery(args: &[Value], ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
2283    let (config, text) = parse_fts_args("to_tsquery", args, ctx)?;
2284    match text {
2285        None => Ok(Value::Null),
2286        Some(t) => Ok(Value::TsQuery(crate::fts::to_tsquery(config, &t)?)),
2287    }
2288}
2289
2290/// Parse the `(config, text)` / `(text)` argument pair shared by
2291/// all FTS builders. Returns the resolved config + the text
2292/// payload (None when text is NULL). The one-arg form pulls the
2293/// config from the session's `default_text_search_config`.
2294fn parse_fts_args(
2295    name: &str,
2296    args: &[Value],
2297    ctx: &EvalContext<'_>,
2298) -> Result<(crate::fts::TsConfig, Option<String>), EvalError> {
2299    let (config_arg, text_arg) = match args {
2300        [t] => (None, t),
2301        [c, t] => (Some(c), t),
2302        _ => {
2303            return Err(EvalError::TypeMismatch {
2304                detail: format!("{name}() takes 1 or 2 args, got {}", args.len()),
2305            });
2306        }
2307    };
2308    let config = match config_arg {
2309        None => match ctx.default_text_search_config {
2310            Some(name_str) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
2311                EvalError::TypeMismatch {
2312                    detail: format!(
2313                        "text search config not implemented: {name_str:?} (supported: simple, english)"
2314                    ),
2315                }
2316            })?,
2317            None => crate::fts::TsConfig::Simple,
2318        },
2319        Some(Value::Null) => return Ok((crate::fts::TsConfig::Simple, None)),
2320        Some(Value::Text(name_str)) => crate::fts::TsConfig::from_name(name_str).ok_or_else(|| {
2321            EvalError::TypeMismatch {
2322                detail: format!(
2323                    "text search config not implemented: {name_str:?} (supported: simple, english)"
2324                ),
2325            }
2326        })?,
2327        Some(other) => {
2328            return Err(EvalError::TypeMismatch {
2329                detail: format!(
2330                    "{name}() config arg must be text, got {:?}",
2331                    other.data_type()
2332                ),
2333            });
2334        }
2335    };
2336    let text = match text_arg {
2337        Value::Null => None,
2338        Value::Text(s) => Some(s.clone()),
2339        other => {
2340            return Err(EvalError::TypeMismatch {
2341                detail: format!(
2342                    "{name}() text arg must be text, got {:?}",
2343                    other.data_type()
2344                ),
2345            });
2346        }
2347    };
2348    Ok((config, text))
2349}
2350
2351/// v6.4.3 — `encode(bytes_as_text, format)`. PG works on bytea
2352/// arguments; SPG's value space treats Text as the byte container
2353/// (raw UTF-8 bytes). Supported formats: base64 (PG default),
2354/// base64url (RFC 4648 §5), base32hex (RFC 4648 §7 extended-hex),
2355/// hex.
2356fn encode_text(args: &[Value]) -> Result<Value, EvalError> {
2357    if args.len() != 2 {
2358        return Err(EvalError::TypeMismatch {
2359            detail: format!("encode() takes 2 args, got {}", args.len()),
2360        });
2361    }
2362    if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2363        return Ok(Value::Null);
2364    }
2365    let bytes: &[u8] = match &args[0] {
2366        Value::Text(s) => s.as_bytes(),
2367        other => {
2368            return Err(EvalError::TypeMismatch {
2369                detail: format!("encode() expects text bytes, got {:?}", other.data_type()),
2370            });
2371        }
2372    };
2373    let fmt = match &args[1] {
2374        Value::Text(s) => s.to_ascii_lowercase(),
2375        other => {
2376            return Err(EvalError::TypeMismatch {
2377                detail: format!("encode() format must be text, got {:?}", other.data_type()),
2378            });
2379        }
2380    };
2381    let out = match fmt.as_str() {
2382        "base64" => b64_encode(bytes, B64_STD),
2383        "base64url" => b64_encode(bytes, B64_URL),
2384        "base32hex" => b32hex_encode(bytes),
2385        "hex" => hex_encode(bytes),
2386        other => {
2387            return Err(EvalError::TypeMismatch {
2388                detail: format!("encode(): unknown format `{other}`"),
2389            });
2390        }
2391    };
2392    Ok(Value::Text(out))
2393}
2394
2395/// v6.4.3 — `decode(text, format)`. Inverse of `encode`; returns
2396/// Text containing the raw decoded bytes (caller may CAST to bytea
2397/// equivalent if SPG adds bytea later).
2398fn decode_text(args: &[Value]) -> Result<Value, EvalError> {
2399    if args.len() != 2 {
2400        return Err(EvalError::TypeMismatch {
2401            detail: format!("decode() takes 2 args, got {}", args.len()),
2402        });
2403    }
2404    if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) {
2405        return Ok(Value::Null);
2406    }
2407    let text = match &args[0] {
2408        Value::Text(s) => s.as_str(),
2409        other => {
2410            return Err(EvalError::TypeMismatch {
2411                detail: format!("decode() expects text, got {:?}", other.data_type()),
2412            });
2413        }
2414    };
2415    let fmt = match &args[1] {
2416        Value::Text(s) => s.to_ascii_lowercase(),
2417        other => {
2418            return Err(EvalError::TypeMismatch {
2419                detail: format!("decode() format must be text, got {:?}", other.data_type()),
2420            });
2421        }
2422    };
2423    let bytes = match fmt.as_str() {
2424        "base64" => b64_decode(text, B64_STD)?,
2425        "base64url" => b64_decode(text, B64_URL)?,
2426        "base32hex" => b32hex_decode(text)?,
2427        "hex" => hex_decode(text)?,
2428        other => {
2429            return Err(EvalError::TypeMismatch {
2430                detail: format!("decode(): unknown format `{other}`"),
2431            });
2432        }
2433    };
2434    let s = String::from_utf8(bytes).map_err(|_| EvalError::TypeMismatch {
2435        detail: "decode(): result bytes are not valid UTF-8 (SPG stores raw bytes as Text)".into(),
2436    })?;
2437    Ok(Value::Text(s))
2438}
2439
2440/// v6.4.3 — `error_on_null(v)`. Returns `v` unchanged if non-NULL;
2441/// errors otherwise. Convenience to assert NOT NULL inside an
2442/// expression without wrapping it in COALESCE + raise hacks.
2443fn error_on_null(args: &[Value]) -> Result<Value, EvalError> {
2444    if args.len() != 1 {
2445        return Err(EvalError::TypeMismatch {
2446            detail: format!("error_on_null() takes 1 arg, got {}", args.len()),
2447        });
2448    }
2449    if matches!(args[0], Value::Null) {
2450        return Err(EvalError::TypeMismatch {
2451            detail: "error_on_null(): argument is NULL".into(),
2452        });
2453    }
2454    Ok(args[0].clone())
2455}
2456
2457// ── byte-level encoders ───────────────────────────────────────────
2458
2459const B64_STD: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2460const B64_URL: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2461const B32HEX_ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHIJKLMNOPQRSTUV";
2462
2463fn b64_encode(bytes: &[u8], alpha: &[u8; 64]) -> String {
2464    let mut out = String::with_capacity((bytes.len() + 2) / 3 * 4);
2465    let mut i = 0;
2466    while i + 3 <= bytes.len() {
2467        let n = ((bytes[i] as u32) << 16) | ((bytes[i + 1] as u32) << 8) | (bytes[i + 2] as u32);
2468        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2469        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2470        out.push(alpha[((n >> 6) & 0x3f) as usize] as char);
2471        out.push(alpha[(n & 0x3f) as usize] as char);
2472        i += 3;
2473    }
2474    let rem = bytes.len() - i;
2475    if rem == 1 {
2476        let n = (bytes[i] as u32) << 16;
2477        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2478        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2479        out.push('=');
2480        out.push('=');
2481    } else if rem == 2 {
2482        let n = ((bytes[i] as u32) << 16) | ((bytes[i + 1] as u32) << 8);
2483        out.push(alpha[((n >> 18) & 0x3f) as usize] as char);
2484        out.push(alpha[((n >> 12) & 0x3f) as usize] as char);
2485        out.push(alpha[((n >> 6) & 0x3f) as usize] as char);
2486        out.push('=');
2487    }
2488    out
2489}
2490
2491fn b64_decode(text: &str, alpha: &[u8; 64]) -> Result<Vec<u8>, EvalError> {
2492    let mut lookup = [255u8; 256];
2493    for (i, &c) in alpha.iter().enumerate() {
2494        lookup[c as usize] = i as u8;
2495    }
2496    let mut out = Vec::with_capacity(text.len() * 3 / 4);
2497    let mut buf: u32 = 0;
2498    let mut bits: u32 = 0;
2499    for c in text.bytes() {
2500        if c == b'=' {
2501            break;
2502        }
2503        if c == b'\n' || c == b'\r' || c == b' ' {
2504            continue;
2505        }
2506        let v = lookup[c as usize];
2507        if v == 255 {
2508            return Err(EvalError::TypeMismatch {
2509                detail: format!("decode(base64): invalid char {:?}", c as char),
2510            });
2511        }
2512        buf = (buf << 6) | v as u32;
2513        bits += 6;
2514        if bits >= 8 {
2515            bits -= 8;
2516            out.push(((buf >> bits) & 0xff) as u8);
2517        }
2518    }
2519    Ok(out)
2520}
2521
2522fn b32hex_encode(bytes: &[u8]) -> String {
2523    let mut out = String::with_capacity((bytes.len() * 8 + 4) / 5);
2524    let mut buf: u64 = 0;
2525    let mut bits: u32 = 0;
2526    for &b in bytes {
2527        buf = (buf << 8) | b as u64;
2528        bits += 8;
2529        while bits >= 5 {
2530            bits -= 5;
2531            out.push(B32HEX_ALPHABET[((buf >> bits) & 0x1f) as usize] as char);
2532        }
2533    }
2534    if bits > 0 {
2535        out.push(B32HEX_ALPHABET[((buf << (5 - bits)) & 0x1f) as usize] as char);
2536    }
2537    // Pad to multiple of 8.
2538    while out.len() % 8 != 0 {
2539        out.push('=');
2540    }
2541    out
2542}
2543
2544fn b32hex_decode(text: &str) -> Result<Vec<u8>, EvalError> {
2545    let mut lookup = [255u8; 256];
2546    for (i, &c) in B32HEX_ALPHABET.iter().enumerate() {
2547        lookup[c as usize] = i as u8;
2548        // base32hex is case-insensitive — also map lowercase.
2549        let lower = (c as char).to_ascii_lowercase() as u8;
2550        lookup[lower as usize] = i as u8;
2551    }
2552    let mut out = Vec::with_capacity(text.len() * 5 / 8);
2553    let mut buf: u64 = 0;
2554    let mut bits: u32 = 0;
2555    for c in text.bytes() {
2556        if c == b'=' {
2557            break;
2558        }
2559        if c == b'\n' || c == b'\r' || c == b' ' {
2560            continue;
2561        }
2562        let v = lookup[c as usize];
2563        if v == 255 {
2564            return Err(EvalError::TypeMismatch {
2565                detail: format!("decode(base32hex): invalid char {:?}", c as char),
2566            });
2567        }
2568        buf = (buf << 5) | v as u64;
2569        bits += 5;
2570        if bits >= 8 {
2571            bits -= 8;
2572            out.push(((buf >> bits) & 0xff) as u8);
2573        }
2574    }
2575    Ok(out)
2576}
2577
2578fn hex_encode(bytes: &[u8]) -> String {
2579    const HEX: &[u8; 16] = b"0123456789abcdef";
2580    let mut out = String::with_capacity(bytes.len() * 2);
2581    for &b in bytes {
2582        out.push(HEX[(b >> 4) as usize] as char);
2583        out.push(HEX[(b & 0xf) as usize] as char);
2584    }
2585    out
2586}
2587
2588fn hex_decode(text: &str) -> Result<Vec<u8>, EvalError> {
2589    let trimmed = text.trim();
2590    if trimmed.len() % 2 != 0 {
2591        return Err(EvalError::TypeMismatch {
2592            detail: "decode(hex): input length must be even".into(),
2593        });
2594    }
2595    let mut out = Vec::with_capacity(trimmed.len() / 2);
2596    let mut hi: u8 = 0;
2597    for (i, c) in trimmed.bytes().enumerate() {
2598        let v = match c {
2599            b'0'..=b'9' => c - b'0',
2600            b'a'..=b'f' => c - b'a' + 10,
2601            b'A'..=b'F' => c - b'A' + 10,
2602            _ => {
2603                return Err(EvalError::TypeMismatch {
2604                    detail: format!("decode(hex): invalid char {:?}", c as char),
2605                });
2606            }
2607        };
2608        if i % 2 == 0 {
2609            hi = v;
2610        } else {
2611            out.push((hi << 4) | v);
2612        }
2613    }
2614    Ok(out)
2615}
2616
2617/// `date_part(field_text, source)` — function form of `EXTRACT(field FROM
2618/// source)`. Same component dispatch (DATE / TIMESTAMP / INTERVAL) and
2619/// same `BigInt` return shape; PG returns double precision but we keep the
2620/// integer convention so the runner's `query I` shape works unchanged.
2621fn date_part(args: &[Value]) -> Result<Value, EvalError> {
2622    use spg_sql::ast::ExtractField as F;
2623    if args.len() != 2 {
2624        return Err(EvalError::TypeMismatch {
2625            detail: format!("date_part() takes 2 args, got {}", args.len()),
2626        });
2627    }
2628    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
2629        return Ok(Value::Null);
2630    }
2631    let Value::Text(field_name) = &args[0] else {
2632        return Err(EvalError::TypeMismatch {
2633            detail: format!(
2634                "date_part() needs a text field, got {:?}",
2635                args[0].data_type()
2636            ),
2637        });
2638    };
2639    let field = match field_name.to_ascii_lowercase().as_str() {
2640        "year" => F::Year,
2641        "month" => F::Month,
2642        "day" => F::Day,
2643        "hour" => F::Hour,
2644        "minute" => F::Minute,
2645        "second" => F::Second,
2646        "microsecond" | "microseconds" => F::Microsecond,
2647        other => {
2648            return Err(EvalError::TypeMismatch {
2649                detail: format!(
2650                    "unknown date_part field {other:?}; \
2651                     supported: year, month, day, hour, minute, second, microsecond"
2652                ),
2653            });
2654        }
2655    };
2656    extract_field(field, &args[1])
2657}
2658
2659/// `age(t1, t2)` — return `t1 - t2` as an INTERVAL. v2.12 produces a
2660/// micros-only interval (no months normalisation) because PG's
2661/// month-justification rule is sensitive to the day-of-month walk and
2662/// adds material complexity for marginal corpus value.
2663///
2664/// `age(t)` (single-arg form) is intentionally unsupported in v2.12:
2665/// the dispatcher errors instead of guessing a clock source. Callers
2666/// who want PG's `age(t)` semantics should write `age(CURRENT_DATE, t)`
2667/// explicitly so the clock reference is visible at the SQL layer.
2668fn age(args: &[Value]) -> Result<Value, EvalError> {
2669    if args.is_empty() || args.len() > 2 {
2670        return Err(EvalError::TypeMismatch {
2671            detail: format!("age() takes 1 or 2 args, got {}", args.len()),
2672        });
2673    }
2674    if args.iter().any(|v| matches!(v, Value::Null)) {
2675        return Ok(Value::Null);
2676    }
2677    // Coerce to TIMESTAMP micros — DATE lifts to midnight; TIMESTAMP
2678    // stays as-is; anything else errors.
2679    let to_micros = |v: &Value| -> Result<i64, EvalError> {
2680        match v {
2681            Value::Timestamp(t) => Ok(*t),
2682            Value::Date(d) => Ok(i64::from(*d) * 86_400_000_000),
2683            other => Err(EvalError::TypeMismatch {
2684                detail: format!("age() needs DATE or TIMESTAMP, got {:?}", other.data_type()),
2685            }),
2686        }
2687    };
2688    if args.len() == 1 {
2689        return Err(EvalError::TypeMismatch {
2690            detail: "single-arg age() is unsupported in v2.12 \
2691                     (use age(CURRENT_DATE, t) explicitly)"
2692                .into(),
2693        });
2694    }
2695    let a = to_micros(&args[0])?;
2696    let b = to_micros(&args[1])?;
2697    let delta = a.checked_sub(b).ok_or(EvalError::TypeMismatch {
2698        detail: "age() subtraction overflows i64 microseconds".into(),
2699    })?;
2700    Ok(Value::Interval {
2701        months: 0,
2702        micros: delta,
2703    })
2704}
2705
2706// `to_char(value, format)` — render a DATE / TIMESTAMP through a PG
2707// format template. Supports the high-traffic placeholders:
2708//   YYYY YY MM Mon Month DD HH24 HH12 MI SS MS US AM PM
2709// Unrecognised characters pass through literally so the template's
2710// punctuation ('-', ':', ' ', '/') needs no escape mechanism.
2711
2712// ─── v7.17.0 Phase 7 — INET / CIDR text helpers ───────────────────────
2713//
2714// SPG stores network address types as Text. The host() / network() /
2715// masklen() helpers parse the textual `addr[/mask]` form and return
2716// the constituent pieces, matching PG's contract for the dominant
2717// customer surface (Django ORM / Rails ORM normalisation).
2718
2719fn inet_host(args: &[Value]) -> Result<Value, EvalError> {
2720    let s = match args {
2721        [Value::Text(s)] => s.clone(),
2722        [Value::Null] => return Ok(Value::Null),
2723        _ => {
2724            return Err(EvalError::TypeMismatch {
2725                detail: alloc::format!("host() takes one TEXT arg, got {} args", args.len()),
2726            });
2727        }
2728    };
2729    let host = s.split('/').next().unwrap_or("").to_string();
2730    Ok(Value::Text(host))
2731}
2732
2733fn inet_network(args: &[Value]) -> Result<Value, EvalError> {
2734    let s = match args {
2735        [Value::Text(s)] => s.clone(),
2736        [Value::Null] => return Ok(Value::Null),
2737        _ => {
2738            return Err(EvalError::TypeMismatch {
2739                detail: alloc::format!("network() takes one TEXT arg, got {} args", args.len()),
2740            });
2741        }
2742    };
2743    // For a `host/mask` form return the masked-network address.
2744    // SPG ships the simple "drop trailing octets per byte" path
2745    // for IPv4; full bit-level masking is out of v7.17 scope.
2746    let mut split = s.splitn(2, '/');
2747    let host = split.next().unwrap_or("").to_string();
2748    let mask: u32 = split.next().and_then(|m| m.parse().ok()).unwrap_or(32);
2749    if !host.contains('.') {
2750        // IPv6 / MACADDR — return the input unmasked.
2751        return Ok(Value::Text(s));
2752    }
2753    let octets: Vec<&str> = host.split('.').collect();
2754    if octets.len() != 4 {
2755        return Ok(Value::Text(s));
2756    }
2757    let keep_bytes = ((mask + 7) / 8) as usize;
2758    let mut out = alloc::string::String::new();
2759    for (i, oct) in octets.iter().enumerate() {
2760        if i > 0 {
2761            out.push('.');
2762        }
2763        if i < keep_bytes {
2764            out.push_str(oct);
2765        } else {
2766            out.push('0');
2767        }
2768    }
2769    out.push('/');
2770    out.push_str(&mask.to_string());
2771    Ok(Value::Text(out))
2772}
2773
2774fn inet_masklen(args: &[Value]) -> Result<Value, EvalError> {
2775    let s = match args {
2776        [Value::Text(s)] => s.clone(),
2777        [Value::Null] => return Ok(Value::Null),
2778        _ => {
2779            return Err(EvalError::TypeMismatch {
2780                detail: alloc::format!("masklen() takes one TEXT arg, got {} args", args.len()),
2781            });
2782        }
2783    };
2784    let mask: i32 = s
2785        .split_once('/')
2786        .and_then(|(_, m)| m.parse().ok())
2787        .unwrap_or(32);
2788    Ok(Value::Int(mask))
2789}
2790
2791// ─── v7.17.0 Phase 3.P0-47 — INET / CIDR containment + overlap ────────
2792//
2793// SPG stores INET / CIDR as Text (Phase 7 design); these helpers parse
2794// the textual `addr[/mask]` form into a (family, bytes, prefix_bits)
2795// triple and implement PG's network-comparison operators on that
2796// representation.
2797//
2798// PG semantics:
2799//   * `<<`  — strictly contained-in (LHS ⊊ RHS)
2800//   * `<<=` — contained-in-or-equal (LHS ⊆ RHS)
2801//   * `>>`, `>>=` — mirrors of the above
2802//   * `&&`  — overlap (either LHS ⊆ RHS or RHS ⊆ LHS)
2803//
2804// NULL on either side → NULL (3VL). Mixed family (v4 vs v6) is never
2805// contained / never overlaps but is not an error — same as PG.
2806
2807/// Parsed inet network: address bytes (4 for v4, 16 for v6) and the
2808/// network prefix length in bits.
2809struct InetNet {
2810    bytes: [u8; 16],
2811    /// 4 for IPv4, 16 for IPv6.
2812    family_bytes: u8,
2813    /// 0..=32 for v4, 0..=128 for v6.
2814    prefix_bits: u8,
2815}
2816
2817fn parse_inet_text(s: &str) -> Option<InetNet> {
2818    let mut split = s.splitn(2, '/');
2819    let host = split.next()?;
2820    let mask_str = split.next();
2821    if host.contains(':') {
2822        let bytes = parse_ipv6(host)?;
2823        let prefix_bits = match mask_str {
2824            Some(m) => m.parse::<u8>().ok().filter(|&n| n <= 128)?,
2825            None => 128,
2826        };
2827        let mut out = [0u8; 16];
2828        out.copy_from_slice(&bytes);
2829        Some(InetNet {
2830            bytes: out,
2831            family_bytes: 16,
2832            prefix_bits,
2833        })
2834    } else {
2835        let bytes = parse_ipv4(host)?;
2836        let prefix_bits = match mask_str {
2837            Some(m) => m.parse::<u8>().ok().filter(|&n| n <= 32)?,
2838            None => 32,
2839        };
2840        let mut out = [0u8; 16];
2841        out[..4].copy_from_slice(&bytes);
2842        Some(InetNet {
2843            bytes: out,
2844            family_bytes: 4,
2845            prefix_bits,
2846        })
2847    }
2848}
2849
2850fn parse_ipv4(s: &str) -> Option<[u8; 4]> {
2851    let parts: Vec<&str> = s.split('.').collect();
2852    if parts.len() != 4 {
2853        return None;
2854    }
2855    let mut out = [0u8; 4];
2856    for (i, p) in parts.iter().enumerate() {
2857        out[i] = p.parse::<u8>().ok()?;
2858    }
2859    Some(out)
2860}
2861
2862fn parse_ipv6(s: &str) -> Option<[u8; 16]> {
2863    // Split on the `::` shorthand at most once.
2864    let (head, tail) = match s.find("::") {
2865        Some(idx) => (&s[..idx], Some(&s[idx + 2..])),
2866        None => (s, None),
2867    };
2868    let head_groups: Vec<&str> = if head.is_empty() {
2869        Vec::new()
2870    } else {
2871        head.split(':').collect()
2872    };
2873    let tail_groups: Vec<&str> = match tail {
2874        Some(t) if !t.is_empty() => t.split(':').collect(),
2875        _ => Vec::new(),
2876    };
2877    let head_len = head_groups.len();
2878    let tail_len = tail_groups.len();
2879    // Without `::` we need exactly 8 groups; with `::` we need ≤ 7.
2880    if tail.is_none() {
2881        if head_len != 8 {
2882            return None;
2883        }
2884    } else if head_len + tail_len > 7 {
2885        return None;
2886    }
2887    let mut words = [0u16; 8];
2888    for (i, g) in head_groups.iter().enumerate() {
2889        words[i] = u16::from_str_radix(g, 16).ok()?;
2890    }
2891    let tail_start = 8 - tail_len;
2892    for (i, g) in tail_groups.iter().enumerate() {
2893        words[tail_start + i] = u16::from_str_radix(g, 16).ok()?;
2894    }
2895    let mut out = [0u8; 16];
2896    for (i, w) in words.iter().enumerate() {
2897        out[i * 2] = (w >> 8) as u8;
2898        out[i * 2 + 1] = (w & 0xff) as u8;
2899    }
2900    Some(out)
2901}
2902
2903/// Compare the first `prefix_bits` bits of `a` and `b`. Returns true if
2904/// they match. `prefix_bits` must not exceed the family size.
2905fn network_prefix_eq(a: &InetNet, b: &InetNet, prefix_bits: u8) -> bool {
2906    let full_bytes = (prefix_bits / 8) as usize;
2907    if a.bytes[..full_bytes] != b.bytes[..full_bytes] {
2908        return false;
2909    }
2910    let extra = prefix_bits % 8;
2911    if extra == 0 {
2912        return true;
2913    }
2914    let mask: u8 = 0xff << (8 - extra);
2915    (a.bytes[full_bytes] & mask) == (b.bytes[full_bytes] & mask)
2916}
2917
2918/// True iff network `a` is fully contained in network `b` (a ⊆ b).
2919fn inet_contained_eq(a: &InetNet, b: &InetNet) -> bool {
2920    if a.family_bytes != b.family_bytes {
2921        return false;
2922    }
2923    if a.prefix_bits < b.prefix_bits {
2924        return false;
2925    }
2926    network_prefix_eq(a, b, b.prefix_bits)
2927}
2928
2929/// True iff a and b are exactly the same network (same family + same
2930/// prefix + same masked address).
2931fn inet_networks_equal(a: &InetNet, b: &InetNet) -> bool {
2932    if a.family_bytes != b.family_bytes {
2933        return false;
2934    }
2935    if a.prefix_bits != b.prefix_bits {
2936        return false;
2937    }
2938    network_prefix_eq(a, b, a.prefix_bits)
2939}
2940
2941fn inet_op_bool_result(op: BinOp, l: &Value, r: &Value) -> Result<Value, EvalError> {
2942    if matches!(l, Value::Null) || matches!(r, Value::Null) {
2943        return Ok(Value::Null);
2944    }
2945    let (lt, rt) = match (l, r) {
2946        (Value::Text(a), Value::Text(b)) => (a, b),
2947        _ => {
2948            return Err(EvalError::TypeMismatch {
2949                detail: format!(
2950                    "inet operator requires TEXT/INET operands, got {:?} and {:?}",
2951                    l.data_type(),
2952                    r.data_type()
2953                ),
2954            });
2955        }
2956    };
2957    let a = parse_inet_text(lt).ok_or_else(|| EvalError::TypeMismatch {
2958        detail: format!("invalid inet text: {:?}", lt),
2959    })?;
2960    let b = parse_inet_text(rt).ok_or_else(|| EvalError::TypeMismatch {
2961        detail: format!("invalid inet text: {:?}", rt),
2962    })?;
2963    let result = match op {
2964        BinOp::InetContainedByEq => inet_contained_eq(&a, &b),
2965        BinOp::InetContainedBy => inet_contained_eq(&a, &b) && !inet_networks_equal(&a, &b),
2966        BinOp::InetContainsEq => inet_contained_eq(&b, &a),
2967        BinOp::InetContains => inet_contained_eq(&b, &a) && !inet_networks_equal(&a, &b),
2968        BinOp::InetOverlap => inet_contained_eq(&a, &b) || inet_contained_eq(&b, &a),
2969        _ => unreachable!("inet_op_bool_result called with non-inet op"),
2970    };
2971    Ok(Value::Bool(result))
2972}
2973
2974// ─── v7.17.0 Phase 3.7 — minimal POSIX-ERE-shaped regex matcher ───────
2975//
2976// SPG-engine is `#![no_std]` and has no external regex dependency, so
2977// this module hand-implements the subset of PG's regex needed by the
2978// dominant customer patterns. Supported syntax:
2979//
2980//   * literal characters (with `\.`, `\*`, `\+`, `\?`, `\(`, `\)`,
2981//     `\[`, `\]`, `\\`, `\^`, `\$`, `\|` escapes)
2982//   * `.` — any single character
2983//   * `*`, `+`, `?` — greedy quantifiers
2984//   * character classes: `[abc]`, `[^abc]`, `[a-z0-9_]`
2985//   * shortcut classes: `\d` `\D` `\w` `\W` `\s` `\S`
2986//   * anchors `^` `$`
2987//   * non-capturing groups `(...)`
2988//   * alternation `|`
2989//
2990// NOT supported in v7.17 (errors clearly):
2991//   * backreferences `\1`
2992//   * lookaround `(?=…)` `(?<=…)`
2993//   * named captures
2994//   * inline flag groups `(?i)`
2995//   * lazy quantifiers `*?` `+?` `??` — patterns containing `?` after
2996//     a quantifier are accepted but interpreted as the greedy form
2997//     (this is the v7.17 stop-gap; customers needing lazy semantics
2998//     should preprocess the pattern)
2999//   * counted repetition `{n,m}`
3000//
3001// The matcher uses a backtracking NFA-shaped walk; performance is fine
3002// for the small strings PG regex functions usually operate on.
3003
3004#[derive(Debug, Clone)]
3005enum ReNode {
3006    /// Single literal byte. ASCII fast-path; non-ASCII falls through
3007    /// to Any since the engine doesn't decode UTF-8 here.
3008    Literal(char),
3009    /// Any single character.
3010    AnyChar,
3011    /// Character class: (positive members list, negated flag).
3012    Class {
3013        members: Vec<ClassMember>,
3014        negated: bool,
3015    },
3016    /// Anchor start.
3017    Start,
3018    /// Anchor end.
3019    End,
3020    /// Greedy quantifier.
3021    Quant {
3022        inner: Box<ReNode>,
3023        min: usize,
3024        max: Option<usize>,
3025    },
3026    /// Concatenation of sub-nodes.
3027    Concat(Vec<ReNode>),
3028    /// Alternation.
3029    Alt(Vec<ReNode>),
3030}
3031
3032#[derive(Debug, Clone)]
3033enum ClassMember {
3034    Single(char),
3035    Range(char, char),
3036}
3037
3038fn re_compile(pat: &str) -> Result<ReNode, EvalError> {
3039    let chars: Vec<char> = pat.chars().collect();
3040    let mut p = 0;
3041    let n = re_parse_alt(&chars, &mut p)?;
3042    if p != chars.len() {
3043        return Err(EvalError::TypeMismatch {
3044            detail: alloc::format!("regex compile: trailing chars at pos {p} in {pat:?}"),
3045        });
3046    }
3047    Ok(n)
3048}
3049
3050fn re_parse_alt(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3051    let mut branches = alloc::vec![re_parse_concat(chars, p)?];
3052    while *p < chars.len() && chars[*p] == '|' {
3053        *p += 1;
3054        branches.push(re_parse_concat(chars, p)?);
3055    }
3056    if branches.len() == 1 {
3057        Ok(branches.pop().unwrap())
3058    } else {
3059        Ok(ReNode::Alt(branches))
3060    }
3061}
3062
3063fn re_parse_concat(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3064    let mut items: Vec<ReNode> = Vec::new();
3065    while *p < chars.len() {
3066        let c = chars[*p];
3067        if c == '|' || c == ')' {
3068            break;
3069        }
3070        let atom = re_parse_atom(chars, p)?;
3071        // Optional quantifier suffix.
3072        let quantified = if *p < chars.len() {
3073            match chars[*p] {
3074                '*' => {
3075                    *p += 1;
3076                    // v7.17 stop-gap: tolerate `*?` lazy quantifier
3077                    // by treating it as greedy. Skip the trailing
3078                    // `?` if present.
3079                    if *p < chars.len() && chars[*p] == '?' {
3080                        *p += 1;
3081                    }
3082                    ReNode::Quant {
3083                        inner: Box::new(atom),
3084                        min: 0,
3085                        max: None,
3086                    }
3087                }
3088                '+' => {
3089                    *p += 1;
3090                    if *p < chars.len() && chars[*p] == '?' {
3091                        *p += 1;
3092                    }
3093                    ReNode::Quant {
3094                        inner: Box::new(atom),
3095                        min: 1,
3096                        max: None,
3097                    }
3098                }
3099                '?' => {
3100                    *p += 1;
3101                    ReNode::Quant {
3102                        inner: Box::new(atom),
3103                        min: 0,
3104                        max: Some(1),
3105                    }
3106                }
3107                _ => atom,
3108            }
3109        } else {
3110            atom
3111        };
3112        items.push(quantified);
3113    }
3114    if items.len() == 1 {
3115        Ok(items.pop().unwrap())
3116    } else {
3117        Ok(ReNode::Concat(items))
3118    }
3119}
3120
3121fn re_parse_atom(chars: &[char], p: &mut usize) -> Result<ReNode, EvalError> {
3122    let c = chars[*p];
3123    match c {
3124        '(' => {
3125            *p += 1;
3126            let inner = re_parse_alt(chars, p)?;
3127            if *p >= chars.len() || chars[*p] != ')' {
3128                return Err(EvalError::TypeMismatch {
3129                    detail: "regex compile: unmatched '('".into(),
3130                });
3131            }
3132            *p += 1;
3133            Ok(inner)
3134        }
3135        '[' => {
3136            *p += 1;
3137            let mut negated = false;
3138            if *p < chars.len() && chars[*p] == '^' {
3139                negated = true;
3140                *p += 1;
3141            }
3142            let mut members: Vec<ClassMember> = Vec::new();
3143            while *p < chars.len() && chars[*p] != ']' {
3144                let start = chars[*p];
3145                *p += 1;
3146                if *p + 1 < chars.len() && chars[*p] == '-' && chars[*p + 1] != ']' {
3147                    let end = chars[*p + 1];
3148                    *p += 2;
3149                    members.push(ClassMember::Range(start, end));
3150                } else {
3151                    members.push(ClassMember::Single(start));
3152                }
3153            }
3154            if *p >= chars.len() {
3155                return Err(EvalError::TypeMismatch {
3156                    detail: "regex compile: unmatched '['".into(),
3157                });
3158            }
3159            *p += 1; // consume ]
3160            Ok(ReNode::Class { members, negated })
3161        }
3162        '.' => {
3163            *p += 1;
3164            Ok(ReNode::AnyChar)
3165        }
3166        '^' => {
3167            *p += 1;
3168            Ok(ReNode::Start)
3169        }
3170        '$' => {
3171            *p += 1;
3172            Ok(ReNode::End)
3173        }
3174        '\\' => {
3175            *p += 1;
3176            if *p >= chars.len() {
3177                return Err(EvalError::TypeMismatch {
3178                    detail: "regex compile: dangling backslash".into(),
3179                });
3180            }
3181            let esc = chars[*p];
3182            *p += 1;
3183            match esc {
3184                'd' => Ok(ReNode::Class {
3185                    members: alloc::vec![ClassMember::Range('0', '9')],
3186                    negated: false,
3187                }),
3188                'D' => Ok(ReNode::Class {
3189                    members: alloc::vec![ClassMember::Range('0', '9')],
3190                    negated: true,
3191                }),
3192                'w' => Ok(ReNode::Class {
3193                    members: alloc::vec![
3194                        ClassMember::Range('a', 'z'),
3195                        ClassMember::Range('A', 'Z'),
3196                        ClassMember::Range('0', '9'),
3197                        ClassMember::Single('_'),
3198                    ],
3199                    negated: false,
3200                }),
3201                'W' => Ok(ReNode::Class {
3202                    members: alloc::vec![
3203                        ClassMember::Range('a', 'z'),
3204                        ClassMember::Range('A', 'Z'),
3205                        ClassMember::Range('0', '9'),
3206                        ClassMember::Single('_'),
3207                    ],
3208                    negated: true,
3209                }),
3210                's' => Ok(ReNode::Class {
3211                    members: alloc::vec![
3212                        ClassMember::Single(' '),
3213                        ClassMember::Single('\t'),
3214                        ClassMember::Single('\n'),
3215                        ClassMember::Single('\r'),
3216                    ],
3217                    negated: false,
3218                }),
3219                'S' => Ok(ReNode::Class {
3220                    members: alloc::vec![
3221                        ClassMember::Single(' '),
3222                        ClassMember::Single('\t'),
3223                        ClassMember::Single('\n'),
3224                        ClassMember::Single('\r'),
3225                    ],
3226                    negated: true,
3227                }),
3228                other => Ok(ReNode::Literal(other)),
3229            }
3230        }
3231        other => {
3232            *p += 1;
3233            Ok(ReNode::Literal(other))
3234        }
3235    }
3236}
3237
3238fn class_matches(member: &ClassMember, c: char) -> bool {
3239    match member {
3240        ClassMember::Single(s) => *s == c,
3241        ClassMember::Range(a, b) => c >= *a && c <= *b,
3242    }
3243}
3244
3245/// Try to match `node` starting at `pos` in `s`. Returns Some(end)
3246/// of the matched span (exclusive), or None if no match. Greedy
3247/// backtracking: each quantifier tries the longest viable repeat
3248/// and shrinks if the tail doesn't fit.
3249fn re_match_at(node: &ReNode, s: &[char], pos: usize) -> Option<usize> {
3250    match node {
3251        ReNode::Literal(c) => {
3252            if s.get(pos).copied() == Some(*c) {
3253                Some(pos + 1)
3254            } else {
3255                None
3256            }
3257        }
3258        ReNode::AnyChar => {
3259            if pos < s.len() && s[pos] != '\n' {
3260                Some(pos + 1)
3261            } else {
3262                None
3263            }
3264        }
3265        ReNode::Class { members, negated } => {
3266            let c = *s.get(pos)?;
3267            let hit = members.iter().any(|m| class_matches(m, c));
3268            if hit ^ negated { Some(pos + 1) } else { None }
3269        }
3270        ReNode::Start => {
3271            if pos == 0 {
3272                Some(pos)
3273            } else {
3274                None
3275            }
3276        }
3277        ReNode::End => {
3278            if pos == s.len() {
3279                Some(pos)
3280            } else {
3281                None
3282            }
3283        }
3284        ReNode::Concat(items) => {
3285            let mut p = pos;
3286            for it in items {
3287                p = re_match_at(it, s, p)?;
3288            }
3289            Some(p)
3290        }
3291        ReNode::Alt(branches) => {
3292            for b in branches {
3293                if let Some(p) = re_match_at(b, s, pos) {
3294                    return Some(p);
3295                }
3296            }
3297            None
3298        }
3299        ReNode::Quant { inner, min, max } => {
3300            // Greedy: gather as many matches as possible, then
3301            // shrink. v7.17 stop-gap doesn't continue the outer
3302            // tail match (we're at a leaf in concat already), so
3303            // we just return the longest match.
3304            let mut count = 0usize;
3305            let mut p = pos;
3306            loop {
3307                if let Some(cap) = max {
3308                    if count >= *cap {
3309                        break;
3310                    }
3311                }
3312                match re_match_at(inner, s, p) {
3313                    Some(np) if np > p => {
3314                        p = np;
3315                        count += 1;
3316                    }
3317                    _ => break,
3318                }
3319            }
3320            if count < *min {
3321                return None;
3322            }
3323            Some(p)
3324        }
3325    }
3326}
3327
3328/// Find the first match of `node` in `s`, starting at or after
3329/// `from`. Returns the (start, end) char positions of the match.
3330fn re_find(node: &ReNode, s: &[char], from: usize) -> Option<(usize, usize)> {
3331    let mut start = from;
3332    loop {
3333        if let Some(end) = re_match_at(node, s, start) {
3334            return Some((start, end));
3335        }
3336        if start >= s.len() {
3337            return None;
3338        }
3339        start += 1;
3340    }
3341}
3342
3343/// v7.17.0 Phase 3.7 — `regexp_matches(s, pat)` returns the FIRST
3344/// match as a single-element TEXT[]. (PG returns one row per match
3345/// across all captures; SPG simplifies to first-match-only TEXT[].
3346/// The `g` flag form `regexp_matches(s, pat, 'g')` falls through
3347/// to all-matches concatenation as a flat array.)
3348fn regexp_matches(args: &[Value]) -> Result<Value, EvalError> {
3349    let (text, pat, all_matches) = match args.len() {
3350        2 => (text_arg(&args[0])?, text_arg(&args[1])?, false),
3351        3 => {
3352            let flags = text_arg(&args[2])?.unwrap_or_default();
3353            (
3354                text_arg(&args[0])?,
3355                text_arg(&args[1])?,
3356                flags.contains('g'),
3357            )
3358        }
3359        n => {
3360            return Err(EvalError::TypeMismatch {
3361                detail: alloc::format!("regexp_matches() takes 2 or 3 args, got {n}"),
3362            });
3363        }
3364    };
3365    let Some(text) = text else {
3366        return Ok(Value::Null);
3367    };
3368    let Some(pat) = pat else {
3369        return Ok(Value::Null);
3370    };
3371    let node = re_compile(&pat)?;
3372    let chars: Vec<char> = text.chars().collect();
3373    let mut out: Vec<Option<String>> = Vec::new();
3374    let mut from = 0usize;
3375    while let Some((s_pos, e_pos)) = re_find(&node, &chars, from) {
3376        out.push(Some(chars[s_pos..e_pos].iter().collect()));
3377        if !all_matches {
3378            break;
3379        }
3380        // Advance past the match; if zero-width, step one.
3381        from = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3382        if from > chars.len() {
3383            break;
3384        }
3385    }
3386    Ok(Value::TextArray(out))
3387}
3388
3389/// v7.17.0 Phase 3.7 — `regexp_replace(s, pat, repl[, flags])`.
3390/// `flags` containing `g` replaces all matches; absent flag
3391/// replaces only the first match (PG default).
3392fn regexp_replace(args: &[Value]) -> Result<Value, EvalError> {
3393    let (text, pat, repl, flags) = match args.len() {
3394        3 => (
3395            text_arg(&args[0])?,
3396            text_arg(&args[1])?,
3397            text_arg(&args[2])?,
3398            String::new(),
3399        ),
3400        4 => (
3401            text_arg(&args[0])?,
3402            text_arg(&args[1])?,
3403            text_arg(&args[2])?,
3404            text_arg(&args[3])?.unwrap_or_default(),
3405        ),
3406        n => {
3407            return Err(EvalError::TypeMismatch {
3408                detail: alloc::format!("regexp_replace() takes 3 or 4 args, got {n}"),
3409            });
3410        }
3411    };
3412    let Some(text) = text else {
3413        return Ok(Value::Null);
3414    };
3415    let Some(pat) = pat else {
3416        return Ok(Value::Null);
3417    };
3418    let Some(repl) = repl else {
3419        return Ok(Value::Null);
3420    };
3421    let global = flags.contains('g');
3422    let node = re_compile(&pat)?;
3423    let chars: Vec<char> = text.chars().collect();
3424    let mut out = String::with_capacity(text.len());
3425    let mut from = 0usize;
3426    loop {
3427        match re_find(&node, &chars, from) {
3428            Some((s_pos, e_pos)) => {
3429                out.extend(chars[from..s_pos].iter());
3430                out.push_str(&repl);
3431                let step = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3432                from = step;
3433                if !global {
3434                    if from <= chars.len() {
3435                        out.extend(chars[from..].iter());
3436                    }
3437                    return Ok(Value::Text(out));
3438                }
3439                if from > chars.len() {
3440                    break;
3441                }
3442            }
3443            None => {
3444                out.extend(chars[from..].iter());
3445                break;
3446            }
3447        }
3448    }
3449    Ok(Value::Text(out))
3450}
3451
3452/// v7.17.0 Phase 3.7 — `regexp_split_to_array(s, pat)`. Returns
3453/// TEXT[] of the pieces between matches.
3454fn regexp_split_to_array(args: &[Value]) -> Result<Value, EvalError> {
3455    if args.len() != 2 {
3456        return Err(EvalError::TypeMismatch {
3457            detail: alloc::format!("regexp_split_to_array() takes 2 args, got {}", args.len()),
3458        });
3459    }
3460    let text = text_arg(&args[0])?;
3461    let pat = text_arg(&args[1])?;
3462    let Some(text) = text else {
3463        return Ok(Value::Null);
3464    };
3465    let Some(pat) = pat else {
3466        return Ok(Value::Null);
3467    };
3468    let node = re_compile(&pat)?;
3469    let chars: Vec<char> = text.chars().collect();
3470    let mut out: Vec<Option<String>> = Vec::new();
3471    let mut piece_start = 0usize;
3472    let mut from = 0usize;
3473    loop {
3474        match re_find(&node, &chars, from) {
3475            Some((s_pos, e_pos)) => {
3476                let piece: String = chars[piece_start..s_pos].iter().collect();
3477                out.push(Some(piece));
3478                let step = if e_pos > s_pos { e_pos } else { e_pos + 1 };
3479                from = step;
3480                piece_start = step;
3481                if from > chars.len() {
3482                    break;
3483                }
3484            }
3485            None => {
3486                let tail: String = chars[piece_start..].iter().collect();
3487                out.push(Some(tail));
3488                break;
3489            }
3490        }
3491    }
3492    Ok(Value::TextArray(out))
3493}
3494
3495/// Helper: coerce a Value to an Option<String> for regex args. NULL
3496/// propagates as None (caller short-circuits to Value::Null).
3497fn text_arg(v: &Value) -> Result<Option<String>, EvalError> {
3498    match v {
3499        Value::Text(s) => Ok(Some(s.clone())),
3500        Value::Null => Ok(None),
3501        other => Err(EvalError::TypeMismatch {
3502            detail: alloc::format!(
3503                "regex function expects TEXT arg, got {:?}",
3504                other.data_type()
3505            ),
3506        }),
3507    }
3508}
3509
3510// PG trim family: which side to strip.
3511#[derive(Debug, Clone, Copy)]
3512enum TrimSide {
3513    Left,
3514    Right,
3515    Both,
3516}
3517
3518/// PG `left(s, n)` / `right(s, n)` shared implementation. Both
3519/// support negative n which means "all but |n| chars from the
3520/// opposite side". n=0 → ''. Codepoint-counted. NULL → NULL.
3521fn string_left_right(args: &[Value], is_left: bool, fn_name: &str) -> Result<Value, EvalError> {
3522    if args.len() != 2 {
3523        return Err(EvalError::TypeMismatch {
3524            detail: alloc::format!("{fn_name}() takes 2 args, got {}", args.len()),
3525        });
3526    }
3527    if args.iter().any(|v| matches!(v, Value::Null)) {
3528        return Ok(Value::Null);
3529    }
3530    let s = value_to_format_text(&args[0]);
3531    let n = match &args[1] {
3532        Value::SmallInt(x) => i64::from(*x),
3533        Value::Int(x) => i64::from(*x),
3534        Value::BigInt(x) => *x,
3535        other => {
3536            return Err(EvalError::TypeMismatch {
3537                detail: alloc::format!(
3538                    "{fn_name}(): n must be integer, got {:?}",
3539                    other.data_type()
3540                ),
3541            });
3542        }
3543    };
3544    let chars: Vec<char> = s.chars().collect();
3545    let len = chars.len() as i64;
3546    if n == 0 {
3547        return Ok(Value::Text(String::new()));
3548    }
3549    let (start, end) = if is_left {
3550        if n > 0 {
3551            (0usize, (n.min(len)) as usize)
3552        } else {
3553            // left(s, -k) → drop last |k| chars; keep [0..len - k]
3554            let drop = (-n).min(len);
3555            (0usize, (len - drop) as usize)
3556        }
3557    } else if n > 0 {
3558        // right(s, k) → keep last k chars; start = max(0, len-k)
3559        let start = (len - n).max(0);
3560        (start as usize, len as usize)
3561    } else {
3562        // right(s, -k) → drop first |k| chars; keep [k..len]
3563        let drop = (-n).min(len);
3564        (drop as usize, len as usize)
3565    };
3566    if start >= end {
3567        return Ok(Value::Text(String::new()));
3568    }
3569    Ok(Value::Text(chars[start..end].iter().collect()))
3570}
3571
3572/// Compare two values for min/max selection. Returns Equal when
3573/// values are equal (including cross-numeric-width), Less when
3574/// a < b, Greater when a > b. NULL handling is upstream.
3575fn value_cmp_for_min_max(a: &Value, b: &Value) -> core::cmp::Ordering {
3576    use core::cmp::Ordering;
3577    // Integer-widen first (covers SmallInt vs Int vs BigInt).
3578    let a_int = match a {
3579        Value::SmallInt(x) => Some(i64::from(*x)),
3580        Value::Int(x) => Some(i64::from(*x)),
3581        Value::BigInt(x) => Some(*x),
3582        _ => None,
3583    };
3584    let b_int = match b {
3585        Value::SmallInt(x) => Some(i64::from(*x)),
3586        Value::Int(x) => Some(i64::from(*x)),
3587        Value::BigInt(x) => Some(*x),
3588        _ => None,
3589    };
3590    if let (Some(av), Some(bv)) = (a_int, b_int) {
3591        return av.cmp(&bv);
3592    }
3593    // Float-widen.
3594    let a_f = value_to_f64(a);
3595    let b_f = value_to_f64(b);
3596    if let (Some(av), Some(bv)) = (a_f, b_f) {
3597        return av.partial_cmp(&bv).unwrap_or(Ordering::Equal);
3598    }
3599    // Text/Text.
3600    match (a, b) {
3601        (Value::Text(av), Value::Text(bv)) => av.cmp(bv),
3602        (Value::Bytes(av), Value::Bytes(bv)) => av.cmp(bv),
3603        _ => Ordering::Equal,
3604    }
3605}
3606
3607fn value_to_f64(v: &Value) -> Option<f64> {
3608    match v {
3609        Value::Float(x) => Some(*x),
3610        Value::SmallInt(x) => Some(f64::from(*x)),
3611        Value::Int(x) => Some(f64::from(*x)),
3612        Value::BigInt(x) => Some(*x as f64),
3613        Value::Numeric { scaled, scale } => {
3614            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3615        }
3616        _ => None,
3617    }
3618}
3619
3620/// PG-style equality for nullif. Handles cross-numeric-width
3621/// comparison (Int vs BigInt vs SmallInt vs Float vs Numeric);
3622/// text matches text exactly; everything else uses derived
3623/// PartialEq.
3624fn values_equal_for_nullif(a: &Value, b: &Value) -> bool {
3625    // Same-type fast path.
3626    if a == b {
3627        return true;
3628    }
3629    // Cross-int widening: SmallInt / Int / BigInt all comparable.
3630    let a_int = match a {
3631        Value::SmallInt(x) => Some(i64::from(*x)),
3632        Value::Int(x) => Some(i64::from(*x)),
3633        Value::BigInt(x) => Some(*x),
3634        _ => None,
3635    };
3636    let b_int = match b {
3637        Value::SmallInt(x) => Some(i64::from(*x)),
3638        Value::Int(x) => Some(i64::from(*x)),
3639        Value::BigInt(x) => Some(*x),
3640        _ => None,
3641    };
3642    if let (Some(a), Some(b)) = (a_int, b_int) {
3643        return a == b;
3644    }
3645    // Float / Numeric: widen to f64.
3646    let a_f = match a {
3647        Value::Float(x) => Some(*x),
3648        Value::SmallInt(x) => Some(f64::from(*x)),
3649        Value::Int(x) => Some(f64::from(*x)),
3650        Value::BigInt(x) => Some(*x as f64),
3651        Value::Numeric { scaled, scale } => {
3652            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3653        }
3654        _ => None,
3655    };
3656    let b_f = match b {
3657        Value::Float(x) => Some(*x),
3658        Value::SmallInt(x) => Some(f64::from(*x)),
3659        Value::Int(x) => Some(f64::from(*x)),
3660        Value::BigInt(x) => Some(*x as f64),
3661        Value::Numeric { scaled, scale } => {
3662            Some((*scaled as f64) / f64_powi(10.0, i32::from(*scale)))
3663        }
3664        _ => None,
3665    };
3666    if let (Some(a), Some(b)) = (a_f, b_f) {
3667        return a == b;
3668    }
3669    false
3670}
3671
3672/// no_std-compatible `trunc(x)` for f64 — truncate toward zero.
3673/// `as i64 as f64` already truncates toward zero for the in-range
3674/// case; the |x| > 2^53 branch returns x verbatim because the f64
3675/// is already integer-precision.
3676fn f64_trunc(x: f64) -> f64 {
3677    if x.is_nan() || x.is_infinite() {
3678        return x;
3679    }
3680    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3681        return x;
3682    }
3683    (x as i64) as f64
3684}
3685
3686/// xorshift64* PRNG state — process-static seed advanced on
3687/// every `random()` call. Not cryptographically secure; use
3688/// `gen_random_uuid` / future crypto-RNG functions when
3689/// security matters.
3690static PRNG_STATE: core::sync::atomic::AtomicU64 =
3691    core::sync::atomic::AtomicU64::new(0x2545_F491_4F6C_DD1D);
3692
3693/// Advance the PRNG and return the raw next 64-bit state.
3694/// Shared between `random()` and `gen_random_uuid()`. The CAS
3695/// loop guarantees concurrent callers each see a distinct value
3696/// — important for `gen_random_uuid` collision freedom under
3697/// concurrent INSERTs.
3698fn prng_next_u64() -> u64 {
3699    use core::sync::atomic::Ordering;
3700    let mut x = PRNG_STATE.load(Ordering::Relaxed);
3701    loop {
3702        if x == 0 {
3703            x = 0x2545_F491_4F6C_DD1D;
3704        }
3705        let mut next = x;
3706        next ^= next << 13;
3707        next ^= next >> 7;
3708        next ^= next << 17;
3709        match PRNG_STATE.compare_exchange_weak(x, next, Ordering::Relaxed, Ordering::Relaxed) {
3710            Ok(_) => return next,
3711            Err(seen) => x = seen,
3712        }
3713    }
3714}
3715
3716/// Advance the PRNG and return a uniform double in [0, 1).
3717fn prng_next_f64() -> f64 {
3718    // 53 bits of randomness mapped to [0, 1).
3719    let mantissa = prng_next_u64() >> 11;
3720    let denom = (1u64 << 53) as f64;
3721    mantissa as f64 / denom
3722}
3723
3724/// v7.17.0 — generate a RFC 4122 v4 (random) UUID. Layout: 16
3725/// random bytes with the version nibble (high nibble of byte 6)
3726/// pinned to `0100` (= 4) and the variant top bits (high two bits
3727/// of byte 8) pinned to `10` — exactly what PG's
3728/// `gen_random_uuid()` and the historical uuid-ossp
3729/// `uuid_generate_v4()` produce.
3730pub fn gen_random_uuid_bytes() -> [u8; 16] {
3731    let mut out = [0u8; 16];
3732    let hi = prng_next_u64().to_be_bytes();
3733    let lo = prng_next_u64().to_be_bytes();
3734    out[..8].copy_from_slice(&hi);
3735    out[8..].copy_from_slice(&lo);
3736    // Version 4: top nibble of byte 6 must be 0100.
3737    out[6] = (out[6] & 0x0f) | 0x40;
3738    // Variant 1 (RFC 4122): top two bits of byte 8 must be 10.
3739    out[8] = (out[8] & 0x3f) | 0x80;
3740    out
3741}
3742
3743/// no_std `f64::sqrt(x)` — square root via Newton's method
3744/// (Babylonian). Gives EXACT results for perfect squares
3745/// because the iteration converges to bit-exact precision in
3746/// floating-point. x must be non-negative (caller's contract).
3747fn f64_sqrt(x: f64) -> f64 {
3748    if x == 0.0 || x.is_nan() {
3749        return x;
3750    }
3751    if x.is_infinite() {
3752        return x;
3753    }
3754    // Initial guess via bit manipulation of the exponent: divide
3755    // the exponent by 2. Avoids needing a logarithm for the
3756    // seed and converges in ~5 iterations.
3757    let bits = x.to_bits();
3758    let exp = ((bits >> 52) & 0x7ff) as i64 - 1023;
3759    let new_exp = (exp / 2) + 1023;
3760    let mut guess = f64::from_bits(((new_exp as u64) & 0x7ff) << 52);
3761    // 5 Newton iterations are MORE than enough for f64 precision.
3762    for _ in 0..8 {
3763        guess = 0.5 * (guess + x / guess);
3764    }
3765    guess
3766}
3767
3768/// no_std `f64::exp(x)` — e^x via range-reduction + Taylor
3769/// series. Adequate for power(), exp(), and pseudo-random-ish
3770/// scales the engine uses; ~1e-12 relative error in the
3771/// common range.
3772fn f64_exp(x: f64) -> f64 {
3773    if x.is_nan() {
3774        return x;
3775    }
3776    if x > 709.0 {
3777        return f64::INFINITY;
3778    }
3779    if x < -745.0 {
3780        return 0.0;
3781    }
3782    // exp(x) = 2^k * exp(r) where r = x - k*ln(2), |r| <= ln(2)/2.
3783    const LN2: f64 = 0.6931471805599453;
3784    let k = f64_round_half_away(x / LN2) as i32;
3785    let r = x - (k as f64) * LN2;
3786    // Taylor series for exp(r): sum r^n / n!  (rapid for |r|<0.35)
3787    let mut term = 1.0;
3788    let mut sum = 1.0;
3789    for n in 1..=20 {
3790        term *= r / (n as f64);
3791        sum += term;
3792        if term.abs() < 1e-18 {
3793            break;
3794        }
3795    }
3796    // Multiply by 2^k.
3797    f64_powi(2.0, k) * sum
3798}
3799
3800/// no_std `f64::ln(x)` — natural log via range-reduction +
3801/// atanh series. x must be positive (caller's contract).
3802fn f64_ln(x: f64) -> f64 {
3803    if x <= 0.0 {
3804        return f64::NAN;
3805    }
3806    if x == 1.0 {
3807        return 0.0;
3808    }
3809    // x = 2^k * m where m in [0.5, 1.0). Then ln(x) = k*ln(2) + ln(m).
3810    const LN2: f64 = 0.6931471805599453;
3811    let mut k = 0i32;
3812    let mut m = x;
3813    while m >= 2.0 {
3814        m *= 0.5;
3815        k += 1;
3816    }
3817    while m < 1.0 {
3818        m *= 2.0;
3819        k -= 1;
3820    }
3821    // Now m in [1.0, 2.0). Use atanh series via u = (m-1)/(m+1).
3822    // ln(m) = 2*(u + u^3/3 + u^5/5 + ...). Converges fast.
3823    let u = (m - 1.0) / (m + 1.0);
3824    let u2 = u * u;
3825    let mut term = u;
3826    let mut sum = u;
3827    for k_iter in 1..50 {
3828        term *= u2;
3829        let denom = (2 * k_iter + 1) as f64;
3830        sum += term / denom;
3831        if (term / denom).abs() < 1e-18 {
3832            break;
3833        }
3834    }
3835    2.0 * sum + (k as f64) * LN2
3836}
3837
3838/// no_std `f64::powi` substitute — integer exponent for f64
3839/// base. Uses repeated multiplication; correct for the small
3840/// exponents the rounding / cast code uses (scale up to ±38).
3841fn f64_powi(base: f64, exp: i32) -> f64 {
3842    if exp == 0 {
3843        return 1.0;
3844    }
3845    let mut result = 1.0;
3846    let mut b = if exp > 0 { base } else { 1.0 / base };
3847    let mut e = exp.unsigned_abs();
3848    while e > 0 {
3849        if e & 1 == 1 {
3850            result *= b;
3851        }
3852        e >>= 1;
3853        if e > 0 {
3854            b *= b;
3855        }
3856    }
3857    result
3858}
3859
3860/// no_std-compatible `round(x)` for f64 with half-away-from-zero
3861/// rule (PG NUMERIC semantic — NOT banker's rounding).
3862fn f64_round_half_away(x: f64) -> f64 {
3863    if x.is_nan() || x.is_infinite() {
3864        return x;
3865    }
3866    if x >= 0.0 {
3867        f64_floor(x + 0.5)
3868    } else {
3869        f64_ceil(x - 0.5)
3870    }
3871}
3872
3873/// no_std-compatible `ceil(x)` for f64. Same shape as
3874/// `f64_floor` but rounds toward +infinity for fractional
3875/// values. Negative fractions round toward zero
3876/// (ceil(-1.5) → -1, NOT -2).
3877fn f64_ceil(x: f64) -> f64 {
3878    if x.is_nan() || x.is_infinite() {
3879        return x;
3880    }
3881    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3882        return x;
3883    }
3884    let trunc = (x as i64) as f64;
3885    if x > 0.0 && x != trunc {
3886        trunc + 1.0
3887    } else {
3888        trunc
3889    }
3890}
3891
3892/// no_std-compatible `floor(x)` for f64. SPG's engine is
3893/// `#![no_std]` and can't call `f64::floor` directly (libm).
3894/// This handles the floor semantic manually:
3895///   * NaN / Inf passthrough.
3896///   * Values outside i64 range are already integer-precision.
3897///   * Negative non-integers floor toward -infinity (the
3898///     critical PG-canonical semantic).
3899fn f64_floor(x: f64) -> f64 {
3900    if x.is_nan() || x.is_infinite() {
3901        return x;
3902    }
3903    // f64 representation: any value with |x| > 2^53 is integer
3904    // precision (mantissa is 52 bits), so floor is identity.
3905    if x >= 9_007_199_254_740_992.0 || x <= -9_007_199_254_740_992.0 {
3906        return x;
3907    }
3908    let trunc = (x as i64) as f64;
3909    if x < 0.0 && x != trunc {
3910        trunc - 1.0
3911    } else {
3912        trunc
3913    }
3914}
3915
3916/// PG `lpad` / `rpad` shared implementation. Length is the
3917/// target codepoint count. When the input is longer than `length`,
3918/// truncate keeping the LEFT side (both lpad and rpad agree with
3919/// PG here). When shorter, pad with `fill` (default SPACE) cycling
3920/// for multi-char fills, on the appropriate side. Empty fill +
3921/// needs padding → returns input verbatim (potentially
3922/// truncated). NULL on any arg → NULL.
3923fn string_pad(args: &[Value], is_left: bool, fn_name: &str) -> Result<Value, EvalError> {
3924    if args.len() != 2 && args.len() != 3 {
3925        return Err(EvalError::TypeMismatch {
3926            detail: alloc::format!("{fn_name}() takes 2 or 3 args, got {}", args.len()),
3927        });
3928    }
3929    if args.iter().any(|v| matches!(v, Value::Null)) {
3930        return Ok(Value::Null);
3931    }
3932    let s = value_to_format_text(&args[0]);
3933    let target = match &args[1] {
3934        Value::SmallInt(x) => i64::from(*x),
3935        Value::Int(x) => i64::from(*x),
3936        Value::BigInt(x) => *x,
3937        other => {
3938            return Err(EvalError::TypeMismatch {
3939                detail: alloc::format!(
3940                    "{fn_name}(): length must be integer, got {:?}",
3941                    other.data_type()
3942                ),
3943            });
3944        }
3945    };
3946    let fill = if args.len() == 3 {
3947        value_to_format_text(&args[2])
3948    } else {
3949        String::from(" ")
3950    };
3951    if target <= 0 {
3952        return Ok(Value::Text(String::new()));
3953    }
3954    let target = target as usize;
3955    let s_chars: Vec<char> = s.chars().collect();
3956    if s_chars.len() >= target {
3957        // Truncate from the right (PG keeps LEFT side for both
3958        // lpad and rpad).
3959        return Ok(Value::Text(s_chars[..target].iter().collect()));
3960    }
3961    if fill.is_empty() {
3962        return Ok(Value::Text(s));
3963    }
3964    let pad_needed = target - s_chars.len();
3965    let fill_chars: Vec<char> = fill.chars().collect();
3966    let mut padding = String::with_capacity(pad_needed * 4);
3967    for i in 0..pad_needed {
3968        padding.push(fill_chars[i % fill_chars.len()]);
3969    }
3970    if is_left {
3971        Ok(Value::Text(padding + &s))
3972    } else {
3973        Ok(Value::Text(s + &padding))
3974    }
3975}
3976
3977/// PG `trim` / `ltrim` / `rtrim` / `btrim` shared implementation.
3978/// Accepts 1 or 2 args; coerces both to text via the standard
3979/// `value_to_format_text` helper; treats the chars arg as a SET
3980/// of UTF-8 codepoints (not a substring). NULL on either arg
3981/// poisons the result.
3982fn string_trim(args: &[Value], side: TrimSide, fn_name: &str) -> Result<Value, EvalError> {
3983    let (input, chars_str) = match args {
3984        [v] => (v.clone(), String::from(" ")),
3985        [v, c] => (v.clone(), {
3986            // NULL chars poisons.
3987            if matches!(c, Value::Null) {
3988                return Ok(Value::Null);
3989            }
3990            value_to_format_text(c)
3991        }),
3992        _ => {
3993            return Err(EvalError::TypeMismatch {
3994                detail: alloc::format!("{fn_name}() takes 1 or 2 args, got {}", args.len()),
3995            });
3996        }
3997    };
3998    if matches!(input, Value::Null) {
3999        return Ok(Value::Null);
4000    }
4001    let s = value_to_format_text(&input);
4002    let charset: alloc::collections::BTreeSet<char> = chars_str.chars().collect();
4003    let chars: Vec<char> = s.chars().collect();
4004    let mut start = 0usize;
4005    let mut end = chars.len();
4006    if matches!(side, TrimSide::Left | TrimSide::Both) {
4007        while start < end && charset.contains(&chars[start]) {
4008            start += 1;
4009        }
4010    }
4011    if matches!(side, TrimSide::Right | TrimSide::Both) {
4012        while end > start && charset.contains(&chars[end - 1]) {
4013            end -= 1;
4014        }
4015    }
4016    Ok(Value::Text(chars[start..end].iter().collect()))
4017}
4018
4019/// v7.17.0 Phase 3.8 — PG `format(fmtstr, args…)` with
4020/// sprintf-style conversion specifiers. Subset covered:
4021///   * `%s` — text rendering of the arg
4022///   * `%I` — quoted SQL identifier (always double-quoted; embedded
4023///     `"` doubled per SQL grammar)
4024///   * `%L` — quoted SQL literal (single-quoted; embedded `'`
4025///     doubled; NULL → literal `NULL`)
4026///   * `%%` — literal `%`
4027///   * `%n$X` — argument position (1-based) before the specifier
4028///     character (e.g. `%2$s` picks the 2nd arg)
4029fn format_string(args: &[Value]) -> Result<Value, EvalError> {
4030    if args.is_empty() {
4031        return Err(EvalError::TypeMismatch {
4032            detail: "format() takes at least 1 arg (format string)".into(),
4033        });
4034    }
4035    let fmt = match &args[0] {
4036        Value::Text(s) => s.clone(),
4037        Value::Null => return Ok(Value::Null),
4038        other => {
4039            return Err(EvalError::TypeMismatch {
4040                detail: format!(
4041                    "format(): first arg must be text, got {:?}",
4042                    other.data_type()
4043                ),
4044            });
4045        }
4046    };
4047    let arg_values = &args[1..];
4048    let mut out = String::new();
4049    let mut chars = fmt.chars().peekable();
4050    // Position cursor — next implicit arg picked when no `n$`
4051    // prefix is given. PG's format uses a 1-based cursor that
4052    // advances on each implicit-position spec.
4053    let mut implicit_cursor: usize = 0;
4054    while let Some(c) = chars.next() {
4055        if c != '%' {
4056            out.push(c);
4057            continue;
4058        }
4059        // Parse optional `n$` position prefix.
4060        let mut explicit_pos: Option<usize> = None;
4061        // Buffer the digits so we can roll back if no `$` follows.
4062        let mut digit_buf = String::new();
4063        while let Some(&d) = chars.peek() {
4064            if d.is_ascii_digit() {
4065                digit_buf.push(d);
4066                chars.next();
4067            } else {
4068                break;
4069            }
4070        }
4071        if !digit_buf.is_empty() && matches!(chars.peek(), Some(&'$')) {
4072            chars.next(); // consume `$`
4073            explicit_pos =
4074                Some(
4075                    digit_buf
4076                        .parse::<usize>()
4077                        .map_err(|_| EvalError::TypeMismatch {
4078                            detail: format!("format(): invalid arg position {digit_buf:?}"),
4079                        })?,
4080                );
4081            digit_buf.clear();
4082        }
4083        // Specifier character.
4084        let spec = match chars.next() {
4085            Some(c) => c,
4086            None => {
4087                return Err(EvalError::TypeMismatch {
4088                    detail: "format(): trailing `%` with no specifier".into(),
4089                });
4090            }
4091        };
4092        // Anything left in digit_buf (no `$`) was actually
4093        // pre-spec digits we now have to emit verbatim. PG would
4094        // treat them as width hint; v7.17 doesn't implement
4095        // width, but we don't want to silently drop the digits.
4096        // Strategy: ignore width for now and emit just the
4097        // converted value.
4098        let _ = digit_buf;
4099        if spec == '%' {
4100            out.push('%');
4101            continue;
4102        }
4103        let arg_index = match explicit_pos {
4104            Some(p) => p.saturating_sub(1),
4105            None => {
4106                let i = implicit_cursor;
4107                implicit_cursor += 1;
4108                i
4109            }
4110        };
4111        let arg = arg_values.get(arg_index).cloned().unwrap_or(Value::Null);
4112        match spec {
4113            's' => match arg {
4114                Value::Null => {} // PG: NULL renders as empty for %s.
4115                v => out.push_str(&value_to_format_text(&v)),
4116            },
4117            'I' => match arg {
4118                Value::Null => {
4119                    return Err(EvalError::TypeMismatch {
4120                        detail: "format(): NULL is not a valid identifier (%I)".into(),
4121                    });
4122                }
4123                v => {
4124                    let s = value_to_format_text(&v);
4125                    out.push('"');
4126                    for ch in s.chars() {
4127                        if ch == '"' {
4128                            out.push('"');
4129                            out.push('"');
4130                        } else {
4131                            out.push(ch);
4132                        }
4133                    }
4134                    out.push('"');
4135                }
4136            },
4137            'L' => match arg {
4138                Value::Null => out.push_str("NULL"),
4139                v => {
4140                    let s = value_to_format_text(&v);
4141                    out.push('\'');
4142                    for ch in s.chars() {
4143                        if ch == '\'' {
4144                            out.push('\'');
4145                            out.push('\'');
4146                        } else {
4147                            out.push(ch);
4148                        }
4149                    }
4150                    out.push('\'');
4151                }
4152            },
4153            other => {
4154                return Err(EvalError::TypeMismatch {
4155                    detail: format!(
4156                        "format(): unknown specifier '%{other}' \
4157                         (v7.17 supports %s %I %L %%)"
4158                    ),
4159                });
4160            }
4161        }
4162    }
4163    Ok(Value::Text(out))
4164}
4165
4166/// Helper: render a Value as text for format()'s %s / %I / %L
4167/// payload. Reuses the regular text-coercion table.
4168/// v7.17.0 Phase 3.P0-31 — map a `Value` to the canonical PG
4169/// type-name string returned by `pg_typeof`. Lowercase, matches
4170/// what real PostgreSQL emits (NOT SPG's UPPERCASE Display shape).
4171fn pg_typeof_name(v: &Value) -> &'static str {
4172    match v {
4173        Value::SmallInt(_) => "smallint",
4174        Value::Int(_) => "integer",
4175        Value::BigInt(_) => "bigint",
4176        Value::Float(_) => "double precision",
4177        Value::Text(_) => "text",
4178        Value::Bool(_) => "boolean",
4179        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => "vector",
4180        Value::Numeric { .. } => "numeric",
4181        Value::Date(_) => "date",
4182        Value::Timestamp(_) => "timestamp without time zone",
4183        Value::Interval { .. } => "interval",
4184        Value::Json(_) => {
4185            // SPG carries JSON and JSONB in the same Value::Json
4186            // variant; without a column ty hint we cannot tell
4187            // them apart at value level. Return "json" as the
4188            // conservative answer (PG's pg_typeof on a literal
4189            // `'{}'::json` returns "json"; the jsonb case is
4190            // covered when an explicit ::jsonb cast lands as
4191            // Value::Json too — see below override at call site).
4192            //
4193            // The eval-arm above for pg_typeof handles the
4194            // disambiguation via Expr-shape probing.
4195            "json"
4196        }
4197        Value::Bytes(_) => "bytea",
4198        Value::TextArray(_) => "text[]",
4199        Value::IntArray(_) => "integer[]",
4200        Value::BigIntArray(_) => "bigint[]",
4201        Value::TsVector(_) => "tsvector",
4202        Value::TsQuery(_) => "tsquery",
4203        Value::Uuid(_) => "uuid",
4204        Value::Null => "unknown",
4205        // Value is #[non_exhaustive]; future variants land here
4206        // until the table is updated.
4207        _ => "unknown",
4208    }
4209}
4210
4211fn value_to_format_text(v: &Value) -> String {
4212    match v {
4213        Value::Text(s) | Value::Json(s) => s.clone(),
4214        Value::SmallInt(n) => n.to_string(),
4215        Value::Int(n) => n.to_string(),
4216        Value::BigInt(n) => n.to_string(),
4217        Value::Float(x) => format!("{x}"),
4218        Value::Bool(b) => {
4219            if *b {
4220                "t".into()
4221            } else {
4222                "f".into()
4223            }
4224        }
4225        Value::Null => String::new(),
4226        other => format!("{other:?}"),
4227    }
4228}
4229
4230fn to_char(args: &[Value]) -> Result<Value, EvalError> {
4231    use core::fmt::Write as _;
4232    if args.len() != 2 {
4233        return Err(EvalError::TypeMismatch {
4234            detail: format!("to_char() takes 2 args, got {}", args.len()),
4235        });
4236    }
4237    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4238        return Ok(Value::Null);
4239    }
4240    let Value::Text(fmt) = &args[1] else {
4241        return Err(EvalError::TypeMismatch {
4242            detail: format!(
4243                "to_char() needs a text format, got {:?}",
4244                args[1].data_type()
4245            ),
4246        });
4247    };
4248    let (days, day_micros) = match &args[0] {
4249        Value::Date(d) => (*d, 0_i64),
4250        Value::Timestamp(t) => {
4251            let days = t.div_euclid(86_400_000_000);
4252            (
4253                i32::try_from(days).unwrap_or(i32::MAX),
4254                t.rem_euclid(86_400_000_000),
4255            )
4256        }
4257        other => {
4258            return Err(EvalError::TypeMismatch {
4259                detail: format!(
4260                    "to_char() needs DATE or TIMESTAMP, got {:?}",
4261                    other.data_type()
4262                ),
4263            });
4264        }
4265    };
4266    let (y, mo, d) = civil_from_days(days);
4267    let secs = day_micros / 1_000_000;
4268    let frac = day_micros % 1_000_000;
4269    // div_euclid keeps every value non-negative — the casts below are
4270    // sign-safe by construction. `secs ∈ [0, 86400)`, `frac ∈ [0,
4271    // 1_000_000)`, so all three quantities fit in u32.
4272    let hh24 = u32::try_from(secs / 3600).unwrap_or(0);
4273    let mi = u32::try_from((secs / 60) % 60).unwrap_or(0);
4274    let ss = u32::try_from(secs % 60).unwrap_or(0);
4275    let hh12 = match hh24 % 12 {
4276        0 => 12,
4277        x => x,
4278    };
4279    let ampm = if hh24 < 12 { "AM" } else { "PM" };
4280    let ms = u32::try_from(frac / 1_000).unwrap_or(0); // millisecond
4281    let us = u32::try_from(frac).unwrap_or(0); // microsecond (0..1_000_000)
4282
4283    let mut out = String::with_capacity(fmt.len() + 8);
4284    let bytes = fmt.as_bytes();
4285    let mut i = 0;
4286    // write! against a String never fails — discard the Result.
4287    while i < bytes.len() {
4288        // Try the longest prefixes first so "YYYY" wins over "YY".
4289        let rest = &bytes[i..];
4290        if rest.starts_with(b"YYYY") {
4291            let _ = write!(out, "{y:04}");
4292            i += 4;
4293        } else if rest.starts_with(b"YY") {
4294            #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
4295            let yy = (y.rem_euclid(100)) as u32;
4296            let _ = write!(out, "{yy:02}");
4297            i += 2;
4298        } else if rest.starts_with(b"Month") {
4299            out.push_str(MONTH_FULL[(mo - 1) as usize]);
4300            i += 5;
4301        } else if rest.starts_with(b"Mon") {
4302            out.push_str(MONTH_ABBR[(mo - 1) as usize]);
4303            i += 3;
4304        } else if rest.starts_with(b"MM") {
4305            let _ = write!(out, "{mo:02}");
4306            i += 2;
4307        } else if rest.starts_with(b"DD") {
4308            let _ = write!(out, "{d:02}");
4309            i += 2;
4310        } else if rest.starts_with(b"HH24") {
4311            let _ = write!(out, "{hh24:02}");
4312            i += 4;
4313        } else if rest.starts_with(b"HH12") {
4314            let _ = write!(out, "{hh12:02}");
4315            i += 4;
4316        } else if rest.starts_with(b"MI") {
4317            let _ = write!(out, "{mi:02}");
4318            i += 2;
4319        } else if rest.starts_with(b"SS") {
4320            let _ = write!(out, "{ss:02}");
4321            i += 2;
4322        } else if rest.starts_with(b"MS") {
4323            let _ = write!(out, "{ms:03}");
4324            i += 2;
4325        } else if rest.starts_with(b"US") {
4326            let _ = write!(out, "{us:06}");
4327            i += 2;
4328        } else if rest.starts_with(b"AM") || rest.starts_with(b"PM") {
4329            out.push_str(ampm);
4330            i += 2;
4331        } else {
4332            // Pass any non-placeholder byte through verbatim.
4333            out.push(bytes[i] as char);
4334            i += 1;
4335        }
4336    }
4337    Ok(Value::Text(out))
4338}
4339
4340const MONTH_FULL: [&str; 12] = [
4341    "January",
4342    "February",
4343    "March",
4344    "April",
4345    "May",
4346    "June",
4347    "July",
4348    "August",
4349    "September",
4350    "October",
4351    "November",
4352    "December",
4353];
4354const MONTH_ABBR: [&str; 12] = [
4355    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
4356];
4357
4358/// v7.17.0 Phase 3.P0-29 — MySQL `DATE_FORMAT(t, fmt)`.
4359///
4360/// Format tokens (MySQL 8.0 surface):
4361///   * `%Y` — 4-digit year  `%y` — 2-digit year
4362///   * `%m` — 01-12 month   `%c` — 1-12 month (no zero pad)
4363///   * `%d` — 01-31 day     `%e` — 1-31 day (no zero pad)
4364///   * `%H` — 00-23 hour    `%h` / `%I` — 01-12 hour
4365///   * `%i` — 00-59 MINUTE (NB: `%M` is month name in MySQL — easy
4366///     footgun if we mirror PG's `to_char` tokens by accident)
4367///   * `%s` / `%S` — 00-59 second
4368///   * `%f` — 000000-999999 microseconds (always 6 digits)
4369///   * `%p` — AM / PM
4370///   * `%M` — January-December (full month name)
4371///   * `%b` — Jan-Dec (abbreviated month name)
4372///   * `%%` — literal `%`
4373///
4374/// Unknown `%X` tokens pass through verbatim (MySQL emits the `%`
4375/// then the unknown letter).
4376fn date_format_mysql(args: &[Value]) -> Result<Value, EvalError> {
4377    use core::fmt::Write as _;
4378    if args.len() != 2 {
4379        return Err(EvalError::TypeMismatch {
4380            detail: format!("date_format() takes 2 args, got {}", args.len()),
4381        });
4382    }
4383    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4384        return Ok(Value::Null);
4385    }
4386    let Value::Text(fmt) = &args[1] else {
4387        return Err(EvalError::TypeMismatch {
4388            detail: format!(
4389                "date_format() needs a text format, got {:?}",
4390                args[1].data_type()
4391            ),
4392        });
4393    };
4394    let (days, day_micros) = match &args[0] {
4395        Value::Date(d) => (*d, 0_i64),
4396        Value::Timestamp(t) => {
4397            let days = t.div_euclid(86_400_000_000);
4398            (
4399                i32::try_from(days).unwrap_or(i32::MAX),
4400                t.rem_euclid(86_400_000_000),
4401            )
4402        }
4403        other => {
4404            return Err(EvalError::TypeMismatch {
4405                detail: format!(
4406                    "date_format() needs DATE or TIMESTAMP, got {:?}",
4407                    other.data_type()
4408                ),
4409            });
4410        }
4411    };
4412    let (y, mo, d) = civil_from_days(days);
4413    let secs = day_micros / 1_000_000;
4414    let frac = day_micros % 1_000_000;
4415    let hh24 = u32::try_from(secs / 3600).unwrap_or(0);
4416    let mi = u32::try_from((secs / 60) % 60).unwrap_or(0);
4417    let ss = u32::try_from(secs % 60).unwrap_or(0);
4418    let hh12 = match hh24 % 12 {
4419        0 => 12,
4420        x => x,
4421    };
4422    let ampm = if hh24 < 12 { "AM" } else { "PM" };
4423    let us = u32::try_from(frac).unwrap_or(0);
4424
4425    let mut out = String::with_capacity(fmt.len() + 8);
4426    let bytes = fmt.as_bytes();
4427    let mut i = 0;
4428    while i < bytes.len() {
4429        if bytes[i] != b'%' {
4430            out.push(bytes[i] as char);
4431            i += 1;
4432            continue;
4433        }
4434        if i + 1 >= bytes.len() {
4435            // Trailing `%` with no specifier — emit verbatim.
4436            out.push('%');
4437            i += 1;
4438            continue;
4439        }
4440        let token = bytes[i + 1];
4441        match token {
4442            b'Y' => {
4443                let _ = write!(out, "{y:04}");
4444            }
4445            b'y' => {
4446                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
4447                let yy = (y.rem_euclid(100)) as u32;
4448                let _ = write!(out, "{yy:02}");
4449            }
4450            b'm' => {
4451                let _ = write!(out, "{mo:02}");
4452            }
4453            b'c' => {
4454                let _ = write!(out, "{mo}");
4455            }
4456            b'd' => {
4457                let _ = write!(out, "{d:02}");
4458            }
4459            b'e' => {
4460                let _ = write!(out, "{d}");
4461            }
4462            b'H' => {
4463                let _ = write!(out, "{hh24:02}");
4464            }
4465            b'h' | b'I' => {
4466                let _ = write!(out, "{hh12:02}");
4467            }
4468            b'i' => {
4469                // MINUTE — distinct from PG's `MI` and from MySQL's
4470                // own `%M` (month name).
4471                let _ = write!(out, "{mi:02}");
4472            }
4473            b's' | b'S' => {
4474                let _ = write!(out, "{ss:02}");
4475            }
4476            b'f' => {
4477                let _ = write!(out, "{us:06}");
4478            }
4479            b'p' => {
4480                out.push_str(ampm);
4481            }
4482            b'M' => {
4483                out.push_str(MONTH_FULL[(mo - 1) as usize]);
4484            }
4485            b'b' => {
4486                out.push_str(MONTH_ABBR[(mo - 1) as usize]);
4487            }
4488            b'%' => {
4489                out.push('%');
4490            }
4491            other => {
4492                // Unknown specifier — MySQL emits the letter
4493                // verbatim (without the `%`).
4494                out.push(other as char);
4495            }
4496        }
4497        i += 2;
4498    }
4499    Ok(Value::Text(out))
4500}
4501
4502/// v7.17.0 Phase 3.P0-29 — `UNIX_TIMESTAMP(t)` returns epoch
4503/// seconds (BIGINT) for a TIMESTAMP / DATE.
4504///
4505/// Bare `UNIX_TIMESTAMP()` (no args) is folded to a BigInt literal
4506/// by clock_replacement_for at the rewrite layer — never reaches
4507/// this arm.
4508fn unix_timestamp_of(args: &[Value]) -> Result<Value, EvalError> {
4509    if args.len() != 1 {
4510        return Err(EvalError::TypeMismatch {
4511            detail: format!("unix_timestamp() takes 0 or 1 arg, got {}", args.len()),
4512        });
4513    }
4514    match &args[0] {
4515        Value::Null => Ok(Value::Null),
4516        Value::Timestamp(t) => Ok(Value::BigInt(t.div_euclid(1_000_000))),
4517        Value::Date(d) => Ok(Value::BigInt(i64::from(*d) * 86_400)),
4518        other => Err(EvalError::TypeMismatch {
4519            detail: format!(
4520                "unix_timestamp() needs DATE or TIMESTAMP, got {:?}",
4521                other.data_type()
4522            ),
4523        }),
4524    }
4525}
4526
4527/// v7.17.0 Phase 3.P0-29 — `FROM_UNIXTIME(n)` returns a TIMESTAMP
4528/// at `n` seconds past the Unix epoch. `FROM_UNIXTIME(n, fmt)`
4529/// applies MySQL date_format on top, returning TEXT.
4530fn from_unixtime(args: &[Value]) -> Result<Value, EvalError> {
4531    if !(1..=2).contains(&args.len()) {
4532        return Err(EvalError::TypeMismatch {
4533            detail: format!("from_unixtime() takes 1 or 2 args, got {}", args.len()),
4534        });
4535    }
4536    if args.iter().any(|v| matches!(v, Value::Null)) {
4537        return Ok(Value::Null);
4538    }
4539    let secs: i64 = match &args[0] {
4540        Value::SmallInt(n) => i64::from(*n),
4541        Value::Int(n) => i64::from(*n),
4542        Value::BigInt(n) => *n,
4543        Value::Float(x) => *x as i64,
4544        Value::Numeric { scaled, scale } => {
4545            let denom = 10_i128.pow(u32::from(*scale));
4546            i64::try_from(scaled.div_euclid(denom)).unwrap_or(i64::MAX)
4547        }
4548        other => {
4549            return Err(EvalError::TypeMismatch {
4550                detail: format!(
4551                    "from_unixtime() needs a numeric epoch second count, got {:?}",
4552                    other.data_type()
4553                ),
4554            });
4555        }
4556    };
4557    let ts = Value::Timestamp(secs.saturating_mul(1_000_000));
4558    if args.len() == 1 {
4559        Ok(ts)
4560    } else {
4561        date_format_mysql(&[ts, args[1].clone()])
4562    }
4563}
4564
4565/// `date_trunc(unit, timestamp)` — round a `TIMESTAMP` down to the
4566/// requested calendar boundary (year / month / day / hour / minute /
4567/// second). Returns the truncated `TIMESTAMP`. NULL on either side
4568/// propagates to NULL.
4569fn date_trunc(args: &[Value]) -> Result<Value, EvalError> {
4570    if args.len() != 2 {
4571        return Err(EvalError::TypeMismatch {
4572            detail: format!("date_trunc() takes 2 args, got {}", args.len()),
4573        });
4574    }
4575    if matches!(&args[0], Value::Null) || matches!(&args[1], Value::Null) {
4576        return Ok(Value::Null);
4577    }
4578    let Value::Text(unit) = &args[0] else {
4579        return Err(EvalError::TypeMismatch {
4580            detail: format!(
4581                "date_trunc() needs a text unit, got {:?}",
4582                args[0].data_type()
4583            ),
4584        });
4585    };
4586    // Both DATE and TIMESTAMP sources are accepted. DATE lifts to
4587    // midnight first; the result is always TIMESTAMP.
4588    let micros = match &args[1] {
4589        Value::Timestamp(t) => *t,
4590        Value::Date(d) => i64::from(*d) * 86_400_000_000,
4591        other => {
4592            return Err(EvalError::TypeMismatch {
4593                detail: format!(
4594                    "date_trunc() needs DATE or TIMESTAMP, got {:?}",
4595                    other.data_type()
4596                ),
4597            });
4598        }
4599    };
4600    let unit_lc = unit.to_ascii_lowercase();
4601    let days = micros.div_euclid(86_400_000_000);
4602    let day_micros = micros.rem_euclid(86_400_000_000);
4603    let day_i32 = i32::try_from(days).unwrap_or(i32::MAX);
4604    let (y, m, _) = civil_from_days(day_i32);
4605    let truncated = match unit_lc.as_str() {
4606        "year" => i64::from(days_from_civil(y, 1, 1)) * 86_400_000_000,
4607        "month" => i64::from(days_from_civil(y, m, 1)) * 86_400_000_000,
4608        "day" => days * 86_400_000_000,
4609        "hour" => days * 86_400_000_000 + (day_micros / 3_600_000_000) * 3_600_000_000,
4610        "minute" => days * 86_400_000_000 + (day_micros / 60_000_000) * 60_000_000,
4611        "second" => days * 86_400_000_000 + (day_micros / 1_000_000) * 1_000_000,
4612        other => {
4613            return Err(EvalError::TypeMismatch {
4614                detail: format!(
4615                    "unknown date_trunc unit {other:?}; \
4616                     supported: year, month, day, hour, minute, second"
4617                ),
4618            });
4619        }
4620    };
4621    Ok(Value::Timestamp(truncated))
4622}
4623
4624/// PG-style `expr::TYPE` coercion. NULL always casts as NULL.
4625pub fn cast_value(v: Value, target: CastTarget) -> Result<Value, EvalError> {
4626    if matches!(v, Value::Null) {
4627        return Ok(Value::Null);
4628    }
4629    match target {
4630        CastTarget::Vector => cast_to_vector(v),
4631        CastTarget::Text => Ok(Value::Text(value_to_text(&v))),
4632        CastTarget::Int => cast_numeric_to_int(v),
4633        CastTarget::BigInt => cast_numeric_to_bigint(v),
4634        CastTarget::Float => cast_numeric_to_float(v),
4635        CastTarget::Bool => cast_to_bool(v),
4636        CastTarget::Date => cast_to_date(v),
4637        // TIMESTAMP and TIMESTAMPTZ have identical runtime
4638        // representation (i64 microseconds UTC).
4639        CastTarget::Timestamp | CastTarget::Timestamptz => cast_to_timestamp(v),
4640        // v7.9.25 — `expr::INTERVAL`. Currently only TEXT → Interval
4641        // is supported (the mailrs idiom: `$1::INTERVAL` where the
4642        // bound param is a string like `'7 days'`).
4643        CastTarget::Interval => cast_to_interval(v),
4644        // v7.9.25 — `::json` / `::jsonb`. Routes Text → Json
4645        // (validation is the producer's responsibility, same as
4646        // the column-INSERT path).
4647        CastTarget::Json | CastTarget::Jsonb => match v {
4648            Value::Json(s) => Ok(Value::Json(s)),
4649            Value::Text(s) => Ok(Value::Json(s)),
4650            other => Err(EvalError::TypeMismatch {
4651                detail: alloc::format!(
4652                    "::json / ::jsonb only accepts TEXT-shape inputs, got {:?}",
4653                    other.data_type()
4654                ),
4655            }),
4656        },
4657        // v7.17.0 Phase 5.3 — `::regtype` / `::regclass`. PG
4658        // semantics: each is a textual catalog-name surfacing as
4659        // a numeric OID at the wire layer that renders back as
4660        // the original name. SPG has no OID space, but pg_dump /
4661        // mailrs / Django code uses the cast purely for textual
4662        // round-trip — feeding `'public.t'::regclass::text` into
4663        // a downstream `format(…)` or string concat. We map to
4664        // that textual contract: Text in → Text out (the schema-
4665        // qualifier `public.` is stripped to match PG's default
4666        // search_path-aware rendering); numeric in → re-cast to
4667        // Text as best-effort; anything else errors.
4668        //
4669        // Pre-3.3 / pre-5.3 (v7.9.26) the cast surfaced a clean
4670        // error; this lifts to accept-and-textify so the dominant
4671        // dump-loader pattern unblocks. SPG-shaped queries that
4672        // genuinely need an OID for runtime joins are still
4673        // documented as unsupported.
4674        CastTarget::RegType | CastTarget::RegClass => match v {
4675            Value::Text(s) => {
4676                // Strip an optional `<schema>.` prefix — PG's
4677                // regclass render drops it when the schema is on
4678                // the search_path; SPG is single-schema so
4679                // dropping is always safe.
4680                let bare = s.rsplit('.').next().unwrap_or(&s).to_string();
4681                Ok(Value::Text(bare))
4682            }
4683            Value::Int(n) => Ok(Value::Text(alloc::format!("{n}"))),
4684            Value::BigInt(n) => Ok(Value::Text(alloc::format!("{n}"))),
4685            other => Err(EvalError::TypeMismatch {
4686                detail: alloc::format!(
4687                    "::regtype / ::regclass accepts TEXT (name) or integer (oid), got {:?}",
4688                    other.data_type()
4689                ),
4690            }),
4691        },
4692        // v7.10.11 — `::TEXT[]`. Decode PG external array form
4693        // when input is Text; pass through unchanged when it is
4694        // already TextArray. Anything else is a type mismatch.
4695        CastTarget::TextArray => match v {
4696            Value::TextArray(items) => Ok(Value::TextArray(items)),
4697            Value::Text(s) => decode_text_array_external(&s).map(Value::TextArray),
4698            other => Err(EvalError::TypeMismatch {
4699                detail: alloc::format!(
4700                    "::TEXT[] only accepts TEXT / TEXT[] inputs, got {:?}",
4701                    other.data_type()
4702                ),
4703            }),
4704        },
4705        // v7.11.13 — `::INT[]` / `::BIGINT[]`. Decode PG external
4706        // form `{1,2,3}` when input is Text; widen TextArray /
4707        // IntArray as appropriate.
4708        CastTarget::IntArray => cast_to_int_array(v),
4709        CastTarget::BigIntArray => cast_to_bigint_array(v),
4710        // v7.12.0 — `::tsvector` / `::tsquery`. Decodes PG external
4711        // form when input is Text; passes through unchanged when the
4712        // input is already the target type. Other inputs are a type
4713        // mismatch. Lexer / Porter stemmer arrive in v7.12.1; the
4714        // external-form cast at v7.12.0 is the path pg_dump and
4715        // direct-literal callers use.
4716        CastTarget::TsVector => match v {
4717            Value::TsVector(items) => Ok(Value::TsVector(items)),
4718            Value::Text(s) => decode_tsvector_external(&s).map(Value::TsVector),
4719            other => Err(EvalError::TypeMismatch {
4720                detail: alloc::format!(
4721                    "::tsvector only accepts TEXT / tsvector inputs, got {:?}",
4722                    other.data_type()
4723                ),
4724            }),
4725        },
4726        CastTarget::TsQuery => match v {
4727            Value::TsQuery(ast) => Ok(Value::TsQuery(ast)),
4728            Value::Text(s) => decode_tsquery_external(&s).map(Value::TsQuery),
4729            other => Err(EvalError::TypeMismatch {
4730                detail: alloc::format!(
4731                    "::tsquery only accepts TEXT / tsquery inputs, got {:?}",
4732                    other.data_type()
4733                ),
4734            }),
4735        },
4736        // v7.17.0 — `::uuid`. Identity for `uuid → uuid`; parse
4737        // text via the shared `parse_uuid_str`. Anything else is a
4738        // type mismatch — PG also rejects e.g. INT → UUID without
4739        // an explicit text bridge.
4740        CastTarget::Uuid => match v {
4741            Value::Uuid(b) => Ok(Value::Uuid(b)),
4742            Value::Text(s) => match spg_storage::parse_uuid_str(&s) {
4743                Some(b) => Ok(Value::Uuid(b)),
4744                None => Err(EvalError::TypeMismatch {
4745                    detail: alloc::format!("invalid input syntax for type uuid: {s:?}"),
4746                }),
4747            },
4748            other => Err(EvalError::TypeMismatch {
4749                detail: alloc::format!(
4750                    "::uuid only accepts TEXT / uuid inputs, got {:?}",
4751                    other.data_type()
4752                ),
4753            }),
4754        },
4755    }
4756}
4757
4758fn cast_to_int_array(v: Value) -> Result<Value, EvalError> {
4759    match v {
4760        Value::IntArray(items) => Ok(Value::IntArray(items)),
4761        Value::BigIntArray(items) => {
4762            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
4763            for item in items {
4764                match item {
4765                    None => out.push(None),
4766                    Some(n) => match i32::try_from(n) {
4767                        Ok(x) => out.push(Some(x)),
4768                        Err(_) => {
4769                            return Err(EvalError::TypeMismatch {
4770                                detail: alloc::format!("::INT[] element {n} overflows i32"),
4771                            });
4772                        }
4773                    },
4774                }
4775            }
4776            Ok(Value::IntArray(out))
4777        }
4778        Value::Text(s) => decode_int_array_external(&s).map(Value::IntArray),
4779        Value::TextArray(items) => {
4780            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
4781            for item in items {
4782                match item {
4783                    None => out.push(None),
4784                    Some(s) => match s.parse::<i32>() {
4785                        Ok(n) => out.push(Some(n)),
4786                        Err(_) => {
4787                            return Err(EvalError::TypeMismatch {
4788                                detail: alloc::format!("::INT[] cannot parse {s:?}"),
4789                            });
4790                        }
4791                    },
4792                }
4793            }
4794            Ok(Value::IntArray(out))
4795        }
4796        other => Err(EvalError::TypeMismatch {
4797            detail: alloc::format!("::INT[] does not accept {:?}", other.data_type()),
4798        }),
4799    }
4800}
4801
4802fn cast_to_bigint_array(v: Value) -> Result<Value, EvalError> {
4803    match v {
4804        Value::BigIntArray(items) => Ok(Value::BigIntArray(items)),
4805        Value::IntArray(items) => Ok(Value::BigIntArray(
4806            items.into_iter().map(|x| x.map(i64::from)).collect(),
4807        )),
4808        Value::Text(s) => decode_bigint_array_external(&s).map(Value::BigIntArray),
4809        Value::TextArray(items) => {
4810            let mut out: Vec<Option<i64>> = Vec::with_capacity(items.len());
4811            for item in items {
4812                match item {
4813                    None => out.push(None),
4814                    Some(s) => match s.parse::<i64>() {
4815                        Ok(n) => out.push(Some(n)),
4816                        Err(_) => {
4817                            return Err(EvalError::TypeMismatch {
4818                                detail: alloc::format!("::BIGINT[] cannot parse {s:?}"),
4819                            });
4820                        }
4821                    },
4822                }
4823            }
4824            Ok(Value::BigIntArray(out))
4825        }
4826        other => Err(EvalError::TypeMismatch {
4827            detail: alloc::format!("::BIGINT[] does not accept {:?}", other.data_type()),
4828        }),
4829    }
4830}
4831
4832fn decode_int_array_external(s: &str) -> Result<Vec<Option<i32>>, EvalError> {
4833    let trimmed = s.trim();
4834    let inner = trimmed
4835        .strip_prefix('{')
4836        .and_then(|x| x.strip_suffix('}'))
4837        .ok_or_else(|| EvalError::TypeMismatch {
4838            detail: alloc::format!("INT[] literal {s:?} must be enclosed in '{{...}}'"),
4839        })?;
4840    if inner.trim().is_empty() {
4841        return Ok(Vec::new());
4842    }
4843    inner
4844        .split(',')
4845        .map(|part| {
4846            let p = part.trim();
4847            if p.eq_ignore_ascii_case("NULL") {
4848                Ok(None)
4849            } else {
4850                p.parse::<i32>()
4851                    .map(Some)
4852                    .map_err(|_| EvalError::TypeMismatch {
4853                        detail: alloc::format!("INT[] element {p:?} is not an i32"),
4854                    })
4855            }
4856        })
4857        .collect()
4858}
4859
4860fn decode_bigint_array_external(s: &str) -> Result<Vec<Option<i64>>, EvalError> {
4861    let trimmed = s.trim();
4862    let inner = trimmed
4863        .strip_prefix('{')
4864        .and_then(|x| x.strip_suffix('}'))
4865        .ok_or_else(|| EvalError::TypeMismatch {
4866            detail: alloc::format!("BIGINT[] literal {s:?} must be enclosed in '{{...}}'"),
4867        })?;
4868    if inner.trim().is_empty() {
4869        return Ok(Vec::new());
4870    }
4871    inner
4872        .split(',')
4873        .map(|part| {
4874            let p = part.trim();
4875            if p.eq_ignore_ascii_case("NULL") {
4876                Ok(None)
4877            } else {
4878                p.parse::<i64>()
4879                    .map(Some)
4880                    .map_err(|_| EvalError::TypeMismatch {
4881                        detail: alloc::format!("BIGINT[] element {p:?} is not an i64"),
4882                    })
4883            }
4884        })
4885        .collect()
4886}
4887
4888/// v7.10.11 — same decoder as `decode_text_array_literal` in
4889/// `lib.rs`, but lives here so the eval-time cast path stays
4890/// inside `spg-engine::eval`. Kept in lock-step with the engine
4891/// `coerce_value` decoder by tests.
4892fn decode_text_array_external(s: &str) -> Result<Vec<Option<String>>, EvalError> {
4893    let trimmed = s.trim();
4894    let inner = trimmed
4895        .strip_prefix('{')
4896        .and_then(|x| x.strip_suffix('}'))
4897        .ok_or_else(|| EvalError::TypeMismatch {
4898            detail: alloc::format!("TEXT[] literal {s:?} must be enclosed in '{{...}}'"),
4899        })?;
4900    let mut out: Vec<Option<String>> = Vec::new();
4901    if inner.trim().is_empty() {
4902        return Ok(out);
4903    }
4904    let bytes = inner.as_bytes();
4905    let mut i = 0;
4906    while i <= bytes.len() {
4907        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
4908            i += 1;
4909        }
4910        if i < bytes.len() && bytes[i] == b'"' {
4911            i += 1;
4912            let mut buf = String::new();
4913            while i < bytes.len() && bytes[i] != b'"' {
4914                if bytes[i] == b'\\' && i + 1 < bytes.len() {
4915                    buf.push(bytes[i + 1] as char);
4916                    i += 2;
4917                } else {
4918                    buf.push(bytes[i] as char);
4919                    i += 1;
4920                }
4921            }
4922            if i >= bytes.len() {
4923                return Err(EvalError::TypeMismatch {
4924                    detail: "unterminated quoted element in TEXT[] literal".into(),
4925                });
4926            }
4927            i += 1;
4928            out.push(Some(buf));
4929        } else {
4930            let start = i;
4931            while i < bytes.len() && bytes[i] != b',' {
4932                i += 1;
4933            }
4934            let raw = inner[start..i].trim();
4935            if raw.eq_ignore_ascii_case("NULL") {
4936                out.push(None);
4937            } else {
4938                out.push(Some(raw.to_string()));
4939            }
4940        }
4941        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
4942            i += 1;
4943        }
4944        if i >= bytes.len() {
4945            break;
4946        }
4947        if bytes[i] != b',' {
4948            return Err(EvalError::TypeMismatch {
4949                detail: "expected ',' between TEXT[] elements".into(),
4950            });
4951        }
4952        i += 1;
4953    }
4954    Ok(out)
4955}
4956
4957fn cast_to_interval(v: Value) -> Result<Value, EvalError> {
4958    match v {
4959        Value::Interval { months, micros } => Ok(Value::Interval { months, micros }),
4960        Value::Text(s) => {
4961            let (months, micros) = spg_sql::parser::parse_interval_text(&s).ok_or_else(|| {
4962                EvalError::TypeMismatch {
4963                    detail: alloc::format!("cannot parse {s:?} as INTERVAL"),
4964                }
4965            })?;
4966            Ok(Value::Interval { months, micros })
4967        }
4968        other => Err(EvalError::TypeMismatch {
4969            detail: alloc::format!(
4970                "::INTERVAL only accepts TEXT-shape inputs, got {:?}",
4971                other.data_type()
4972            ),
4973        }),
4974    }
4975}
4976
4977fn cast_to_date(v: Value) -> Result<Value, EvalError> {
4978    match v {
4979        Value::Date(d) => Ok(Value::Date(d)),
4980        // Integer literals carry days since the Unix epoch — used by
4981        // the `CURRENT_DATE` AST rewrite to inject the wall clock.
4982        Value::Int(n) => Ok(Value::Date(n)),
4983        Value::BigInt(n) => {
4984            i32::try_from(n)
4985                .map(Value::Date)
4986                .map_err(|_| EvalError::TypeMismatch {
4987                    detail: "bigint days-since-epoch out of DATE range".into(),
4988                })
4989        }
4990        // Timestamp truncates to its day boundary.
4991        Value::Timestamp(t) => {
4992            let days = t.div_euclid(86_400_000_000);
4993            i32::try_from(days)
4994                .map(Value::Date)
4995                .map_err(|_| EvalError::TypeMismatch {
4996                    detail: "timestamp out of DATE range".into(),
4997                })
4998        }
4999        Value::Text(s) => parse_date_literal(&s)
5000            .map(Value::Date)
5001            .ok_or(EvalError::TypeMismatch {
5002                detail: format!("cannot parse {s:?} as DATE (expected YYYY-MM-DD)"),
5003            }),
5004        other => Err(EvalError::TypeMismatch {
5005            detail: format!("cannot cast {:?} to DATE", other.data_type()),
5006        }),
5007    }
5008}
5009
5010fn cast_to_timestamp(v: Value) -> Result<Value, EvalError> {
5011    match v {
5012        Value::Timestamp(t) => Ok(Value::Timestamp(t)),
5013        // Int / BigInt carry microseconds since the Unix epoch — used
5014        // by the `NOW()` / `CURRENT_TIMESTAMP` AST rewrite to inject
5015        // the wall clock as a plain integer literal.
5016        Value::Int(n) => Ok(Value::Timestamp(i64::from(n))),
5017        Value::BigInt(n) => Ok(Value::Timestamp(n)),
5018        // DATE → TIMESTAMP picks midnight on the date.
5019        Value::Date(d) => Ok(Value::Timestamp(i64::from(d) * 86_400_000_000)),
5020        Value::Text(s) => {
5021            parse_timestamp_literal(&s)
5022                .map(Value::Timestamp)
5023                .ok_or(EvalError::TypeMismatch {
5024                    detail: format!(
5025                        "cannot parse {s:?} as TIMESTAMP \
5026                     (expected YYYY-MM-DD[ HH:MM:SS[.ffffff]])"
5027                    ),
5028                })
5029        }
5030        other => Err(EvalError::TypeMismatch {
5031            detail: format!("cannot cast {:?} to TIMESTAMP", other.data_type()),
5032        }),
5033    }
5034}
5035
5036fn value_to_text(v: &Value) -> String {
5037    match v {
5038        // v7.5.0 — Value is #[non_exhaustive]; any future variant
5039        // without explicit text rendering hits the Debug fallback
5040        // at the end.
5041        Value::SmallInt(n) => format!("{n}"),
5042        Value::Int(n) => format!("{n}"),
5043        Value::BigInt(n) => format!("{n}"),
5044        Value::Float(x) => format!("{x}"),
5045        // v4.9: JSON renders identically to Text — both are raw UTF-8.
5046        Value::Text(s) | Value::Json(s) => s.clone(),
5047        Value::Bool(b) => (if *b { "true" } else { "false" }).into(),
5048        Value::Vector(v) => {
5049            let cells: Vec<String> = v.iter().map(|x| format!("{x}")).collect();
5050            format!("[{}]", cells.join(", "))
5051        }
5052        // v6.0.1: render SQ8 cells dequantised, so SELECT output
5053        // matches the pgvector wire shape clients expect. The
5054        // recall envelope already absorbs the ≤ (max-min)/255/2
5055        // dequantisation error.
5056        Value::Sq8Vector(q) => {
5057            let cells: Vec<String> = spg_storage::quantize::dequantize(q)
5058                .iter()
5059                .map(|x| format!("{x}"))
5060                .collect();
5061            format!("[{}]", cells.join(", "))
5062        }
5063        // v6.0.3: HalfVector cells dequantise bit-exactly to f32
5064        // for SELECT output.
5065        Value::HalfVector(h) => {
5066            let cells: Vec<String> = h.to_f32_vec().iter().map(|x| format!("{x}")).collect();
5067            format!("[{}]", cells.join(", "))
5068        }
5069        Value::Numeric { scaled, scale } => format_numeric(*scaled, *scale),
5070        Value::Date(d) => format_date(*d),
5071        Value::Timestamp(t) => format_timestamp(*t),
5072        Value::Interval { months, micros } => format_interval(*months, *micros),
5073        Value::Null => "NULL".into(),
5074        // v7.10.4 — BYTEA renders as PG hex form.
5075        Value::Bytes(b) => format_bytea_hex(b),
5076        // v7.10.9 — TEXT[] / INT[] / BIGINT[] render PG external form.
5077        Value::TextArray(items) => format_text_array(items),
5078        Value::IntArray(items) => format_int_array(items),
5079        Value::BigIntArray(items) => format_bigint_array(items),
5080        // v7.12.0 — tsvector / tsquery render PG external form.
5081        Value::TsVector(lexs) => format_tsvector(lexs),
5082        Value::TsQuery(ast) => format_tsquery(ast),
5083        // v7.17.0 — UUID renders canonical lowercase 8-4-4-4-12
5084        // hyphenated form (PG `uuid_out`).
5085        Value::Uuid(b) => spg_storage::format_uuid(b),
5086        // v7.17.0 Phase 3.P0-32 — TIME canonical text.
5087        Value::Time(us) => format_time(*us),
5088        // v7.17.0 Phase 3.P0-34 — TIMETZ canonical text.
5089        Value::TimeTz { us, offset_secs } => format_timetz(*us, *offset_secs),
5090        // v7.17.0 Phase 3.P0-33 — YEAR 4-digit zero-padded.
5091        Value::Year(y) => format!("{y:04}"),
5092        // v7.17.0 Phase 3.P0-35 — MONEY en_US locale.
5093        Value::Money(c) => format_money(*c),
5094        // v7.17.0 Phase 3.P0-38 — Range canonical form. Routes
5095        // through the engine's format_range_text to share the
5096        // single renderer with pgwire / sqllogictest.
5097        Value::Range { .. } => crate::format_range_text(v),
5098        // v7.17.0 Phase 3.P0-39 — Hstore canonical PG text form.
5099        Value::Hstore(pairs) => crate::format_hstore_text(pairs),
5100        // v7.17.0 Phase 3.P0-40 — 2D array canonical PG text form.
5101        Value::IntArray2D(rows) => crate::format_int_2d_text_pub(rows),
5102        Value::BigIntArray2D(rows) => crate::format_bigint_2d_text_pub(rows),
5103        Value::TextArray2D(rows) => crate::format_text_2d_text_pub(rows),
5104        // v7.5.0 — #[non_exhaustive] fallback for future Value variants.
5105        _ => format!("{v:?}"),
5106    }
5107}
5108
5109/// Render a `Date` (days since epoch) as `YYYY-MM-DD`. Negative values
5110/// for pre-1970 dates render with a leading `-` on the year.
5111pub fn format_date(days: i32) -> String {
5112    let (y, m, d) = civil_from_days(days);
5113    format!("{y:04}-{m:02}-{d:02}")
5114}
5115
5116/// Render a `Timestamp` (microseconds since epoch) as
5117/// `YYYY-MM-DD HH:MM:SS[.fff...]`. Trailing-zero fractional digits are
5118/// dropped; a whole-second value has no fractional part.
5119/// v7.15.0 — PG-canonical TIMESTAMPTZ wire format. Storage is
5120/// the same i64 microseconds UTC as TIMESTAMP, but the canonical
5121/// PG text output appends the session's UTC-offset suffix (`+00`
5122/// for the default UTC session, the form pg_dump emits). Mailrs
5123/// round-8 acceptance criterion: `SELECT col FROM tstz` should
5124/// round-trip to a literal that re-INSERTs without semantic
5125/// drift.
5126pub fn format_timestamptz(micros: i64) -> String {
5127    let base = format_timestamp(micros);
5128    let mut s = String::with_capacity(base.len() + 3);
5129    s.push_str(&base);
5130    s.push_str("+00");
5131    s
5132}
5133
5134/// v7.17.0 Phase 3.P0-35 — PG `money` canonical text form, en_US
5135/// locale: `$N,NNN.CC`, negative → `-$1.23`. Mirrors PG's
5136/// `cash_out` for `lc_monetary = 'en_US.UTF-8'`.
5137pub fn format_money(cents: i64) -> String {
5138    let neg = cents < 0;
5139    let abs = cents.unsigned_abs();
5140    let dollars = abs / 100;
5141    let cc = abs % 100;
5142    // Insert comma thousands separators in the integer portion.
5143    let dollar_str = dollars.to_string();
5144    let bytes = dollar_str.as_bytes();
5145    let mut int_part = String::with_capacity(dollar_str.len() + dollar_str.len() / 3);
5146    for (i, b) in bytes.iter().enumerate() {
5147        // Position from the right: insert ',' before every 3rd
5148        // digit (except the first).
5149        let from_right = bytes.len() - i;
5150        if i > 0 && from_right % 3 == 0 {
5151            int_part.push(',');
5152        }
5153        int_part.push(*b as char);
5154    }
5155    let sign = if neg { "-" } else { "" };
5156    format!("{sign}${int_part}.{cc:02}")
5157}
5158
5159/// v7.17.0 Phase 3.P0-34 — PG `TIMETZ` canonical text form
5160/// `HH:MM:SS[.ffffff]±HH[:MM]`. Mirrors PG `timetz_out`. The
5161/// offset uses `±HH` for whole-hour offsets and `±HH:MM` for
5162/// sub-hour offsets (matching PG's "minimal display" rule).
5163pub fn format_timetz(us: i64, offset_secs: i32) -> String {
5164    let time = format_time(us);
5165    let sign = if offset_secs < 0 { '-' } else { '+' };
5166    let abs = offset_secs.unsigned_abs();
5167    let oh = abs / 3600;
5168    let om = (abs % 3600) / 60;
5169    if om == 0 {
5170        format!("{time}{sign}{oh:02}")
5171    } else {
5172        format!("{time}{sign}{oh:02}:{om:02}")
5173    }
5174}
5175
5176/// v7.17.0 Phase 3.P0-32 — PG `TIME` canonical text form
5177/// `HH:MM:SS[.ffffff]`. Mirrors PG `time_out`. Trailing zeros in
5178/// the fractional component are stripped — `12:00:00.500000`
5179/// renders as `12:00:00.5` to match PG's text output.
5180pub fn format_time(us: i64) -> String {
5181    let total_secs = us.div_euclid(1_000_000);
5182    let frac = us.rem_euclid(1_000_000);
5183    let hh = total_secs / 3600;
5184    let mm = (total_secs / 60) % 60;
5185    let ss = total_secs % 60;
5186    if frac == 0 {
5187        format!("{hh:02}:{mm:02}:{ss:02}")
5188    } else {
5189        let raw = format!("{frac:06}");
5190        let trimmed = raw.trim_end_matches('0');
5191        format!("{hh:02}:{mm:02}:{ss:02}.{trimmed}")
5192    }
5193}
5194
5195pub fn format_timestamp(micros: i64) -> String {
5196    const MICROS_PER_DAY: i64 = 86_400_000_000;
5197    // Split into day + intra-day part with proper floor division so
5198    // negative timestamps render right too.
5199    let days = micros.div_euclid(MICROS_PER_DAY);
5200    let day_micros = micros.rem_euclid(MICROS_PER_DAY);
5201    let day_i32 = i32::try_from(days).unwrap_or(i32::MAX);
5202    let (y, m, d) = civil_from_days(day_i32);
5203    let secs = day_micros / 1_000_000;
5204    let frac = day_micros % 1_000_000;
5205    let hh = secs / 3600;
5206    let mm = (secs / 60) % 60;
5207    let ss = secs % 60;
5208    if frac == 0 {
5209        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}")
5210    } else {
5211        // Strip trailing zeros from the 6-digit fractional component.
5212        let raw = format!("{frac:06}");
5213        let trimmed = raw.trim_end_matches('0');
5214        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}.{trimmed}")
5215    }
5216}
5217
5218/// Howard Hinnant's `civil_from_days` — converts days since the Unix
5219/// epoch back to a proleptic-Gregorian (year, month, day) triple. Both
5220/// directions of this calendar conversion live in `eval.rs` so the
5221/// engine never reaches for `std` time facilities.
5222#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5223fn civil_from_days(days: i32) -> (i32, u32, u32) {
5224    let z = i64::from(days) + 719_468;
5225    let era = z.div_euclid(146_097);
5226    // doe ∈ [0, 146_097); fits in u32 with room to spare. Same for
5227    // every other quantity below — `as u32` truncations are safe by
5228    // construction.
5229    let doe = (z - era * 146_097) as u32;
5230    let yoe = (doe.saturating_sub(doe / 1460) + doe / 36524 - doe / 146_096) / 365;
5231    let y_base = i64::from(yoe) + era * 400;
5232    let doy = doe.saturating_sub(365 * yoe + yoe / 4 - yoe / 100);
5233    let mp = (5 * doy + 2) / 153;
5234    let d = doy.saturating_sub((153 * mp + 2) / 5) + 1;
5235    let m = if mp < 10 { mp + 3 } else { mp - 9 };
5236    let y = if m <= 2 { y_base + 1 } else { y_base };
5237    (y as i32, m, d)
5238}
5239
5240/// Inverse of `civil_from_days` — converts (year, month, day) to days
5241/// since 1970-01-01. Out-of-range months / days saturate.
5242#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5243pub fn days_from_civil(y: i32, m: u32, d: u32) -> i32 {
5244    let y_adj = if m <= 2 {
5245        i64::from(y) - 1
5246    } else {
5247        i64::from(y)
5248    };
5249    let era = y_adj.div_euclid(400);
5250    let yoe = (y_adj - era * 400) as u32;
5251    let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d.saturating_sub(1);
5252    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
5253    let total = era * 146_097 + i64::from(doe) - 719_468;
5254    i32::try_from(total).unwrap_or(i32::MAX)
5255}
5256
5257/// Parse `YYYY-MM-DD` into a `Date` (days since Unix epoch). Returns
5258/// `None` on shape / numeric failure; the engine surfaces that as a
5259/// `TypeMismatch` with the original text included.
5260pub fn parse_date_literal(s: &str) -> Option<i32> {
5261    let bytes = s.as_bytes();
5262    if bytes.len() != 10 || bytes[4] != b'-' || bytes[7] != b'-' {
5263        return None;
5264    }
5265    let y: i32 = s[0..4].parse().ok()?;
5266    let m: u32 = s[5..7].parse().ok()?;
5267    let d: u32 = s[8..10].parse().ok()?;
5268    if !(1..=12).contains(&m) || !(1..=31).contains(&d) {
5269        return None;
5270    }
5271    Some(days_from_civil(y, m, d))
5272}
5273
5274/// Parse `YYYY-MM-DD[ HH:MM:SS[.ffffff]]` into a `Timestamp`
5275/// (microseconds since Unix epoch). The time portion is optional;
5276/// missing → midnight. The fractional portion accepts 1–6 digits and
5277/// pads with zeros to microseconds.
5278pub fn parse_timestamp_literal(s: &str) -> Option<i64> {
5279    let trimmed = s.trim();
5280    let (date_part, time_part) = match trimmed.find([' ', 'T']) {
5281        Some(i) => (&trimmed[..i], Some(&trimmed[i + 1..])),
5282        None => (trimmed, None),
5283    };
5284    let days = parse_date_literal(date_part)?;
5285    let (day_micros, tz_offset_micros) = match time_part {
5286        None => (0, 0),
5287        Some(t) => parse_time_of_day_micros(t)?,
5288    };
5289    // PG semantics: a TIMESTAMPTZ literal with an explicit offset
5290    // is normalised to UTC for storage. `'12:00:00+09'` means
5291    // 12:00:00 in a UTC+09 zone → 03:00:00 UTC → subtract the
5292    // positive offset (or add the negative one). Storage is i64
5293    // microseconds UTC for both TIMESTAMP and TIMESTAMPTZ (see
5294    // spg-storage::DataType::Timestamptz docs); the wire-level
5295    // round-trip then re-applies the session timezone on the
5296    // SELECT side when format_timestamp is asked for a TZ-aware
5297    // render.
5298    Some(i64::from(days) * 86_400_000_000 + day_micros - tz_offset_micros)
5299}
5300
5301/// v7.15.0 — Parse `HH:MM:SS[.frac][<tz>]` and return
5302/// `(day_micros, tz_offset_micros)` where `day_micros` is the
5303/// local-clock seconds-of-day in microseconds and
5304/// `tz_offset_micros` is the UTC offset (positive = east of
5305/// UTC, negative = west). Caller subtracts the offset to
5306/// normalise to UTC. PG's recognised TZ shapes after the
5307/// seconds (or frac) part:
5308///   * `+OO[:MM]` / `-OO[:MM]` — numeric offset
5309///   * `+OOMM` / `-OOMM` (no colon, less common but legal)
5310///   * ` UTC` / `UTC` / `Z` — explicit zero offset
5311/// Anything else after the seconds = parse failure (the caller
5312/// surfaces as "cannot parse … as TIMESTAMP").
5313fn parse_time_of_day_micros(t: &str) -> Option<(i64, i64)> {
5314    let t = t.trim();
5315    // Detect & strip optional TZ suffix. Anchor on the first
5316    // `+` / `-` AFTER position 8 (so the leading sign on a
5317    // negative offset can't be mistaken for an `HH:MM:SS-OO`
5318    // boundary if the time itself is somehow malformed).
5319    // ` UTC` and trailing `Z` also count as zero-offset TZ tags.
5320    let (core, tz_micros) = if let Some(rest) = t.strip_suffix('Z') {
5321        (rest, 0i64)
5322    } else if let Some(rest) = t.strip_suffix(" UTC").or_else(|| t.strip_suffix("UTC")) {
5323        (rest, 0i64)
5324    } else if let Some((idx, sign_byte)) = find_offset_sign(t) {
5325        let suffix = &t[idx..];
5326        let micros = parse_tz_offset_suffix(suffix, sign_byte == b'+')?;
5327        (&t[..idx], micros)
5328    } else {
5329        (t, 0i64)
5330    };
5331    let (time, frac_str) = match core.split_once('.') {
5332        Some((a, b)) => (a, Some(b)),
5333        None => (core, None),
5334    };
5335    let bytes = time.as_bytes();
5336    if bytes.len() != 8 || bytes[2] != b':' || bytes[5] != b':' {
5337        return None;
5338    }
5339    let hh: i64 = time[0..2].parse().ok()?;
5340    let mm: i64 = time[3..5].parse().ok()?;
5341    let ss: i64 = time[6..8].parse().ok()?;
5342    if !(0..24).contains(&hh) || !(0..60).contains(&mm) || !(0..60).contains(&ss) {
5343        return None;
5344    }
5345    let frac_micros: i64 = match frac_str {
5346        None => 0,
5347        Some(f) => {
5348            // Pad right with zeros to 6 digits, then truncate extras.
5349            if f.is_empty() || f.len() > 9 {
5350                return None;
5351            }
5352            let mut padded = String::with_capacity(6);
5353            padded.push_str(&f[..f.len().min(6)]);
5354            while padded.len() < 6 {
5355                padded.push('0');
5356            }
5357            padded.parse().ok()?
5358        }
5359    };
5360    Some((
5361        ((hh * 3600 + mm * 60 + ss) * 1_000_000) + frac_micros,
5362        tz_micros,
5363    ))
5364}
5365
5366/// Find the index of the TZ-offset sign byte (`+` or `-`) that
5367/// terminates an `HH:MM:SS[.fff]` time string, or `None` when
5368/// the time carries no numeric TZ suffix. Anchors past the first
5369/// 8 bytes (`HH:MM:SS`) so the seconds/minutes colons don't
5370/// confuse the scan.
5371fn find_offset_sign(t: &str) -> Option<(usize, u8)> {
5372    let bytes = t.as_bytes();
5373    // Start past `HH:MM:SS` (8 bytes).
5374    if bytes.len() < 9 {
5375        return None;
5376    }
5377    for i in 8..bytes.len() {
5378        match bytes[i] {
5379            b'+' | b'-' => return Some((i, bytes[i])),
5380            _ => {}
5381        }
5382    }
5383    None
5384}
5385
5386/// Parse `+OO`, `+OO:MM`, `+OOMM`, `-OO`, `-OO:MM`, `-OOMM` into
5387/// a UTC-offset microsecond delta. `is_positive` reflects the
5388/// already-stripped sign.
5389fn parse_tz_offset_suffix(suffix: &str, is_positive: bool) -> Option<i64> {
5390    // suffix starts with `+` or `-`; strip it.
5391    let body = &suffix[1..];
5392    let (hh, mm): (i64, i64) = if let Some((h, m)) = body.split_once(':') {
5393        (h.parse().ok()?, m.parse().ok()?)
5394    } else {
5395        match body.len() {
5396            2 => (body.parse().ok()?, 0),
5397            3 => {
5398                // PG's "+0530" form lacks the colon; but a 3-char
5399                // body is `OOM` which is ambiguous (`+053` ?). PG
5400                // doesn't emit that; reject.
5401                return None;
5402            }
5403            4 => {
5404                let h: i64 = body[0..2].parse().ok()?;
5405                let m: i64 = body[2..4].parse().ok()?;
5406                (h, m)
5407            }
5408            _ => return None,
5409        }
5410    };
5411    if !(0..=18).contains(&hh) || !(0..60).contains(&mm) {
5412        return None;
5413    }
5414    let abs = (hh * 3600 + mm * 60) * 1_000_000;
5415    Some(if is_positive { abs } else { -abs })
5416}
5417
5418/// Render an `Interval { months, micros }` in a PG-ish shape. The output
5419/// mirrors `psql`'s text format: years/months from the months part,
5420/// days/HH:MM:SS[.frac] from the microsecond part. Empty parts are
5421/// omitted; an all-zero interval renders as `0`.
5422pub fn format_interval(months: i32, micros: i64) -> String {
5423    const MICROS_PER_DAY: i64 = 86_400_000_000;
5424    let mut parts: Vec<String> = Vec::new();
5425    let years = months / 12;
5426    let mons = months % 12;
5427    // PG renders the unit in the singular only for `+1`; `-1` and any
5428    // other value pluralise. Helper closes over that rule.
5429    let unit = |n: i64, singular: &'static str, plural: &'static str| -> &'static str {
5430        if n == 1 { singular } else { plural }
5431    };
5432    if years != 0 {
5433        parts.push(format!(
5434            "{years} {}",
5435            unit(i64::from(years), "year", "years")
5436        ));
5437    }
5438    if mons != 0 {
5439        parts.push(format!("{mons} {}", unit(i64::from(mons), "mon", "mons")));
5440    }
5441    let days = micros / MICROS_PER_DAY;
5442    let mut rem = micros % MICROS_PER_DAY;
5443    if days != 0 {
5444        parts.push(format!("{days} {}", unit(days, "day", "days")));
5445    }
5446    if rem != 0 {
5447        let neg = rem < 0;
5448        if neg {
5449            rem = -rem;
5450        }
5451        let secs = rem / 1_000_000;
5452        let frac = rem % 1_000_000;
5453        let hh = secs / 3600;
5454        let mm = (secs / 60) % 60;
5455        let ss = secs % 60;
5456        let sign = if neg { "-" } else { "" };
5457        if frac == 0 {
5458            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}"));
5459        } else {
5460            let raw = format!("{frac:06}");
5461            let trimmed = raw.trim_end_matches('0');
5462            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}.{trimmed}"));
5463        }
5464    }
5465    if parts.is_empty() {
5466        "0".into()
5467    } else {
5468        parts.join(" ")
5469    }
5470}
5471
5472/// Add `months` (signed) to a `(year, month, day)` triple using PG's
5473/// clamp-to-last-day rule (so `'2024-01-31' + 1 month` → `'2024-02-29'`).
5474fn add_months_to_civil(y: i32, m: u32, d: u32, months: i32) -> (i32, u32, u32) {
5475    let total_months = i64::from(y) * 12 + i64::from(m) - 1 + i64::from(months);
5476    let new_year = i32::try_from(total_months.div_euclid(12)).unwrap_or(i32::MAX);
5477    let new_month_zero = total_months.rem_euclid(12);
5478    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
5479    let new_month = (new_month_zero as u32) + 1;
5480    let max_day = days_in_month(new_year, new_month);
5481    (new_year, new_month, d.min(max_day))
5482}
5483
5484const fn days_in_month(y: i32, m: u32) -> u32 {
5485    match m {
5486        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
5487        2 => {
5488            // Proleptic Gregorian leap rule.
5489            if y.rem_euclid(4) == 0 && (y.rem_euclid(100) != 0 || y.rem_euclid(400) == 0) {
5490                29
5491            } else {
5492                28
5493            }
5494        }
5495        // 4 / 6 / 9 / 11 plus any out-of-range month (callers normalise
5496        // first, but be defensive) get the 30-day fallback.
5497        _ => 30,
5498    }
5499}
5500
5501/// v7.10.9 — render a TEXT[] in PG's external array form
5502/// (`{a,b,NULL}`). Elements containing whitespace, commas,
5503/// quotes, or braces get double-quoted with `\\` / `\"` escapes.
5504/// NULL elements use the literal token `NULL`. Public so the
5505/// wire layer can produce the canonical text-mode encoding.
5506pub fn format_text_array(items: &[Option<String>]) -> String {
5507    let mut out = String::with_capacity(2 + items.len() * 8);
5508    out.push('{');
5509    for (i, item) in items.iter().enumerate() {
5510        if i > 0 {
5511            out.push(',');
5512        }
5513        match item {
5514            None => out.push_str("NULL"),
5515            Some(s) => {
5516                let needs_quote = s.is_empty()
5517                    || s.eq_ignore_ascii_case("NULL")
5518                    || s.chars()
5519                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
5520                if needs_quote {
5521                    out.push('"');
5522                    for c in s.chars() {
5523                        if c == '"' || c == '\\' {
5524                            out.push('\\');
5525                        }
5526                        out.push(c);
5527                    }
5528                    out.push('"');
5529                } else {
5530                    out.push_str(s);
5531                }
5532            }
5533        }
5534    }
5535    out.push('}');
5536    out
5537}
5538
5539/// v7.11.14 — render an INT[] in PG's external array form
5540/// (`{1,2,NULL}`). Integer payloads never need quoting. NULL
5541/// elements use the literal token `NULL`.
5542pub fn format_int_array(items: &[Option<i32>]) -> String {
5543    let mut out = String::with_capacity(2 + items.len() * 4);
5544    out.push('{');
5545    for (i, item) in items.iter().enumerate() {
5546        if i > 0 {
5547            out.push(',');
5548        }
5549        match item {
5550            None => out.push_str("NULL"),
5551            Some(n) => out.push_str(&n.to_string()),
5552        }
5553    }
5554    out.push('}');
5555    out
5556}
5557
5558/// v7.11.14 — render a BIGINT[] in PG's external array form
5559/// (`{1,2,NULL}`).
5560pub fn format_bigint_array(items: &[Option<i64>]) -> String {
5561    let mut out = String::with_capacity(2 + items.len() * 6);
5562    out.push('{');
5563    for (i, item) in items.iter().enumerate() {
5564        if i > 0 {
5565            out.push(',');
5566        }
5567        match item {
5568            None => out.push_str("NULL"),
5569            Some(n) => out.push_str(&n.to_string()),
5570        }
5571    }
5572    out.push('}');
5573    out
5574}
5575
5576/// v7.12.0 — render a `tsvector` in PG's external form:
5577/// `'lex':1,2A 'word':3` (single-quoted lexemes, optional
5578/// `:positions`, optional weight letter `A/B/C/D` per position).
5579/// Lexemes already arrive sorted + deduped from the engine. Used
5580/// by the wire layer (OID 3614) and by SELECT-text output.
5581pub fn format_tsvector(lexs: &[TsLexeme]) -> String {
5582    let mut out = String::with_capacity(lexs.len() * 12);
5583    for (i, l) in lexs.iter().enumerate() {
5584        if i > 0 {
5585            out.push(' ');
5586        }
5587        out.push('\'');
5588        for c in l.word.chars() {
5589            if c == '\'' {
5590                out.push('\'');
5591            }
5592            out.push(c);
5593        }
5594        out.push('\'');
5595        if !l.positions.is_empty() {
5596            for (pi, p) in l.positions.iter().enumerate() {
5597                out.push(if pi == 0 { ':' } else { ',' });
5598                out.push_str(&p.to_string());
5599            }
5600            // v7.12.0 — weight is per-lexeme (the v7.12 design
5601            // collapses PG's per-position weight into one letter).
5602            // Emit once after the last position; default `D`
5603            // (weight=0) stays implicit.
5604            match l.weight {
5605                3 => out.push('A'),
5606                2 => out.push('B'),
5607                1 => out.push('C'),
5608                _ => {}
5609            }
5610        }
5611    }
5612    out
5613}
5614
5615/// v7.12.0 — render a `tsquery` in PG's external form. Operator
5616/// precedence: `!` > `&` > `|`. Phrase distance shown as `<N>`.
5617pub fn format_tsquery(ast: &TsQueryAst) -> String {
5618    fn go(ast: &TsQueryAst, parent_prec: u8, out: &mut String) {
5619        // 0 = top, 1 = OR, 2 = AND, 3 = NOT/Phrase, 4 = atom.
5620        let (own_prec, write_self): (u8, &dyn Fn(&mut String)) = match ast {
5621            TsQueryAst::Or(_, _) => (1, &|_| {}),
5622            TsQueryAst::And(_, _) | TsQueryAst::Phrase { .. } => (2, &|_| {}),
5623            TsQueryAst::Not(_) => (3, &|_| {}),
5624            TsQueryAst::Term { .. } => (4, &|_| {}),
5625        };
5626        let need_parens = own_prec < parent_prec;
5627        if need_parens {
5628            out.push('(');
5629        }
5630        match ast {
5631            TsQueryAst::Term { word, .. } => {
5632                out.push('\'');
5633                for c in word.chars() {
5634                    if c == '\'' {
5635                        out.push('\'');
5636                    }
5637                    out.push(c);
5638                }
5639                out.push('\'');
5640            }
5641            TsQueryAst::And(a, b) => {
5642                go(a, own_prec, out);
5643                out.push_str(" & ");
5644                go(b, own_prec, out);
5645            }
5646            TsQueryAst::Or(a, b) => {
5647                go(a, own_prec, out);
5648                out.push_str(" | ");
5649                go(b, own_prec, out);
5650            }
5651            TsQueryAst::Not(x) => {
5652                out.push('!');
5653                go(x, own_prec, out);
5654            }
5655            TsQueryAst::Phrase {
5656                left,
5657                right,
5658                distance,
5659            } => {
5660                go(left, own_prec, out);
5661                out.push_str(&alloc::format!(" <{distance}> "));
5662                go(right, own_prec, out);
5663            }
5664        }
5665        write_self(out);
5666        if need_parens {
5667            out.push(')');
5668        }
5669    }
5670    let mut out = String::new();
5671    go(ast, 0, &mut out);
5672    out
5673}
5674
5675/// v7.12.0 — decode PG external form `'word':1,2A 'other':3` into
5676/// a `Vec<TsLexeme>`. Lexemes are sorted ascending by `word` (with
5677/// duplicates merged on positions) so the output matches the
5678/// engine invariant. Empty input yields an empty vector.
5679///
5680/// v7.12.0 only ships the cast-literal entry. Full `to_tsvector`
5681/// (Unicode word-split + Porter stemming + stopwords) lands in
5682/// v7.12.1.
5683pub fn decode_tsvector_external(s: &str) -> Result<Vec<TsLexeme>, EvalError> {
5684    let mut out: Vec<TsLexeme> = Vec::new();
5685    let mut i = 0;
5686    let bytes = s.as_bytes();
5687    while i < bytes.len() {
5688        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
5689            i += 1;
5690        }
5691        if i >= bytes.len() {
5692            break;
5693        }
5694        // Quoted form `'word'` (with embedded `''` for a literal
5695        // single quote, mirroring PG).
5696        let word = if bytes[i] == b'\'' {
5697            i += 1;
5698            let mut w = String::new();
5699            loop {
5700                if i >= bytes.len() {
5701                    return Err(EvalError::TypeMismatch {
5702                        detail: "tsvector literal: unterminated quoted lexeme".into(),
5703                    });
5704                }
5705                let b = bytes[i];
5706                if b == b'\'' {
5707                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
5708                        w.push('\'');
5709                        i += 2;
5710                    } else {
5711                        i += 1;
5712                        break;
5713                    }
5714                } else {
5715                    w.push(b as char);
5716                    i += 1;
5717                }
5718            }
5719            w
5720        } else {
5721            // Bare form — read until whitespace, ':' or end.
5722            let start = i;
5723            while i < bytes.len() && !bytes[i].is_ascii_whitespace() && bytes[i] != b':' {
5724                i += 1;
5725            }
5726            core::str::from_utf8(&bytes[start..i])
5727                .map_err(|_| EvalError::TypeMismatch {
5728                    detail: "tsvector literal: non-UTF-8 lexeme".into(),
5729                })?
5730                .to_string()
5731        };
5732        if word.is_empty() {
5733            return Err(EvalError::TypeMismatch {
5734                detail: "tsvector literal: empty lexeme".into(),
5735            });
5736        }
5737        // Optional `:pos[,pos][,pos]`. Each position is u16; each
5738        // may carry a trailing weight letter A/B/C/D.
5739        let mut positions: Vec<u16> = Vec::new();
5740        let mut weight: u8 = 0;
5741        if i < bytes.len() && bytes[i] == b':' {
5742            i += 1;
5743            loop {
5744                let start = i;
5745                while i < bytes.len() && bytes[i].is_ascii_digit() {
5746                    i += 1;
5747                }
5748                if start == i {
5749                    return Err(EvalError::TypeMismatch {
5750                        detail: "tsvector literal: expected digit after ':'".into(),
5751                    });
5752                }
5753                let num: u16 = core::str::from_utf8(&bytes[start..i])
5754                    .expect("ascii digits")
5755                    .parse()
5756                    .map_err(|_| EvalError::TypeMismatch {
5757                        detail: alloc::format!(
5758                            "tsvector literal: position {} overflows u16",
5759                            core::str::from_utf8(&bytes[start..i]).unwrap_or("?")
5760                        ),
5761                    })?;
5762                positions.push(num);
5763                if i < bytes.len() {
5764                    let w = bytes[i];
5765                    if matches!(w, b'A' | b'B' | b'C' | b'D') {
5766                        weight = match w {
5767                            b'A' => 3,
5768                            b'B' => 2,
5769                            b'C' => 1,
5770                            _ => 0,
5771                        };
5772                        i += 1;
5773                    }
5774                }
5775                if i < bytes.len() && bytes[i] == b',' {
5776                    i += 1;
5777                    continue;
5778                }
5779                break;
5780            }
5781        }
5782        positions.sort_unstable();
5783        positions.dedup();
5784        // Merge into the output vector — sorted insert by word,
5785        // duplicate words merge positions.
5786        match out.binary_search_by(|l| l.word.as_str().cmp(word.as_str())) {
5787            Ok(idx) => {
5788                for p in positions {
5789                    if !out[idx].positions.contains(&p) {
5790                        out[idx].positions.push(p);
5791                    }
5792                }
5793                out[idx].positions.sort_unstable();
5794                if weight != 0 {
5795                    out[idx].weight = weight;
5796                }
5797            }
5798            Err(idx) => {
5799                out.insert(
5800                    idx,
5801                    TsLexeme {
5802                        word,
5803                        positions,
5804                        weight,
5805                    },
5806                );
5807            }
5808        }
5809    }
5810    Ok(out)
5811}
5812
5813/// v7.12.0 — decode PG external form `'foo' & 'bar' | !'baz'`
5814/// into a `TsQueryAst`. v7.12.0 supports the canonical
5815/// `to_tsquery` surface: single-quoted lexemes, `&` / `|` / `!`,
5816/// parens, and phrase `<N>`. Bare lexemes are accepted too. Full
5817/// `plainto_tsquery` / `websearch_to_tsquery` arrive in v7.12.1.
5818pub fn decode_tsquery_external(s: &str) -> Result<TsQueryAst, EvalError> {
5819    let mut p = TsQueryParser {
5820        bytes: s.as_bytes(),
5821        pos: 0,
5822    };
5823    p.skip_ws();
5824    if p.pos >= p.bytes.len() {
5825        return Err(EvalError::TypeMismatch {
5826            detail: "tsquery literal: empty".into(),
5827        });
5828    }
5829    let ast = p.parse_or()?;
5830    p.skip_ws();
5831    if p.pos < p.bytes.len() {
5832        return Err(EvalError::TypeMismatch {
5833            detail: alloc::format!("tsquery literal: trailing garbage at offset {}", p.pos),
5834        });
5835    }
5836    Ok(ast)
5837}
5838
5839struct TsQueryParser<'a> {
5840    bytes: &'a [u8],
5841    pos: usize,
5842}
5843
5844impl<'a> TsQueryParser<'a> {
5845    fn skip_ws(&mut self) {
5846        while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_whitespace() {
5847            self.pos += 1;
5848        }
5849    }
5850    fn peek(&self) -> Option<u8> {
5851        self.bytes.get(self.pos).copied()
5852    }
5853    fn parse_or(&mut self) -> Result<TsQueryAst, EvalError> {
5854        let mut lhs = self.parse_and()?;
5855        loop {
5856            self.skip_ws();
5857            if self.peek() != Some(b'|') {
5858                return Ok(lhs);
5859            }
5860            self.pos += 1;
5861            let rhs = self.parse_and()?;
5862            lhs = TsQueryAst::Or(Box::new(lhs), Box::new(rhs));
5863        }
5864    }
5865    fn parse_and(&mut self) -> Result<TsQueryAst, EvalError> {
5866        let mut lhs = self.parse_unary()?;
5867        loop {
5868            self.skip_ws();
5869            match self.peek() {
5870                Some(b'&') => {
5871                    self.pos += 1;
5872                    let rhs = self.parse_unary()?;
5873                    lhs = TsQueryAst::And(Box::new(lhs), Box::new(rhs));
5874                }
5875                Some(b'<') => {
5876                    // Phrase distance `<N>`.
5877                    self.pos += 1;
5878                    let start = self.pos;
5879                    while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_digit() {
5880                        self.pos += 1;
5881                    }
5882                    if start == self.pos || self.peek() != Some(b'>') {
5883                        return Err(EvalError::TypeMismatch {
5884                            detail: "tsquery literal: malformed <N> phrase operator".into(),
5885                        });
5886                    }
5887                    let n: u16 = core::str::from_utf8(&self.bytes[start..self.pos])
5888                        .expect("ascii digits")
5889                        .parse()
5890                        .map_err(|_| EvalError::TypeMismatch {
5891                            detail: "tsquery literal: phrase distance overflows u16".into(),
5892                        })?;
5893                    self.pos += 1; // consume '>'
5894                    let rhs = self.parse_unary()?;
5895                    lhs = TsQueryAst::Phrase {
5896                        left: Box::new(lhs),
5897                        right: Box::new(rhs),
5898                        distance: n,
5899                    };
5900                }
5901                _ => return Ok(lhs),
5902            }
5903        }
5904    }
5905    fn parse_unary(&mut self) -> Result<TsQueryAst, EvalError> {
5906        self.skip_ws();
5907        if self.peek() == Some(b'!') {
5908            self.pos += 1;
5909            let inner = self.parse_unary()?;
5910            return Ok(TsQueryAst::Not(Box::new(inner)));
5911        }
5912        self.parse_atom()
5913    }
5914    fn parse_atom(&mut self) -> Result<TsQueryAst, EvalError> {
5915        self.skip_ws();
5916        match self.peek() {
5917            Some(b'(') => {
5918                self.pos += 1;
5919                let inner = self.parse_or()?;
5920                self.skip_ws();
5921                if self.peek() != Some(b')') {
5922                    return Err(EvalError::TypeMismatch {
5923                        detail: "tsquery literal: missing ')'".into(),
5924                    });
5925                }
5926                self.pos += 1;
5927                Ok(inner)
5928            }
5929            Some(b'\'') => {
5930                self.pos += 1;
5931                let mut w = String::new();
5932                loop {
5933                    match self.peek() {
5934                        None => {
5935                            return Err(EvalError::TypeMismatch {
5936                                detail: "tsquery literal: unterminated quoted lexeme".into(),
5937                            });
5938                        }
5939                        Some(b'\'') => {
5940                            if self.bytes.get(self.pos + 1) == Some(&b'\'') {
5941                                w.push('\'');
5942                                self.pos += 2;
5943                            } else {
5944                                self.pos += 1;
5945                                break;
5946                            }
5947                        }
5948                        Some(b) => {
5949                            w.push(b as char);
5950                            self.pos += 1;
5951                        }
5952                    }
5953                }
5954                // Optional `:WEIGHT_MASK` (digit-mask) — v7.12.0
5955                // accepts but always stores 0 (any).
5956                self.skip_weight_suffix();
5957                Ok(TsQueryAst::Term {
5958                    word: w,
5959                    weight_mask: 0,
5960                })
5961            }
5962            Some(b) if b.is_ascii_alphanumeric() || b == b'_' => {
5963                let start = self.pos;
5964                while self.pos < self.bytes.len() {
5965                    let c = self.bytes[self.pos];
5966                    if c.is_ascii_alphanumeric() || c == b'_' {
5967                        self.pos += 1;
5968                    } else {
5969                        break;
5970                    }
5971                }
5972                let w = core::str::from_utf8(&self.bytes[start..self.pos])
5973                    .map_err(|_| EvalError::TypeMismatch {
5974                        detail: "tsquery literal: non-UTF-8 lexeme".into(),
5975                    })?
5976                    .to_string();
5977                self.skip_weight_suffix();
5978                Ok(TsQueryAst::Term {
5979                    word: w,
5980                    weight_mask: 0,
5981                })
5982            }
5983            Some(b) => Err(EvalError::TypeMismatch {
5984                detail: alloc::format!(
5985                    "tsquery literal: unexpected byte {:?} at offset {}",
5986                    b as char,
5987                    self.pos
5988                ),
5989            }),
5990            None => Err(EvalError::TypeMismatch {
5991                detail: "tsquery literal: expected term".into(),
5992            }),
5993        }
5994    }
5995    fn skip_weight_suffix(&mut self) {
5996        if self.peek() != Some(b':') {
5997            return;
5998        }
5999        self.pos += 1;
6000        while let Some(b) = self.peek() {
6001            if matches!(
6002                b,
6003                b'A' | b'B' | b'C' | b'D' | b'a' | b'b' | b'c' | b'd' | b'*'
6004            ) || b.is_ascii_digit()
6005            {
6006                self.pos += 1;
6007            } else {
6008                break;
6009            }
6010        }
6011    }
6012}
6013
6014/// v7.10.4 — render a BYTEA payload in PG's hex output format
6015/// (`\x` prefix, lowercase hex pairs). Public so the wire layer
6016/// can emit the canonical bytea-as-text representation.
6017pub fn format_bytea_hex(b: &[u8]) -> String {
6018    let mut out = String::with_capacity(2 + 2 * b.len());
6019    out.push_str("\\x");
6020    const HEX: &[u8; 16] = b"0123456789abcdef";
6021    for byte in b {
6022        out.push(HEX[(byte >> 4) as usize] as char);
6023        out.push(HEX[(byte & 0x0F) as usize] as char);
6024    }
6025    out
6026}
6027
6028/// Render a `Numeric { scaled, scale }` as its decimal text form.
6029/// Negative `scaled` prepends `-` to the absolute value's digits; the
6030/// integer / fractional split is by character count, padding the
6031/// fractional side with leading zeros to exactly `scale` chars.
6032pub fn format_numeric(scaled: i128, scale: u8) -> String {
6033    if scale == 0 {
6034        return format!("{scaled}");
6035    }
6036    let negative = scaled < 0;
6037    let mag_str = scaled.unsigned_abs().to_string();
6038    let mag_bytes = mag_str.as_bytes();
6039    let scale_u = scale as usize;
6040    let mut out = String::with_capacity(mag_str.len() + 3);
6041    if negative {
6042        out.push('-');
6043    }
6044    if mag_bytes.len() <= scale_u {
6045        out.push('0');
6046        out.push('.');
6047        for _ in mag_bytes.len()..scale_u {
6048            out.push('0');
6049        }
6050        out.push_str(&mag_str);
6051    } else {
6052        let split = mag_bytes.len() - scale_u;
6053        out.push_str(&mag_str[..split]);
6054        out.push('.');
6055        out.push_str(&mag_str[split..]);
6056    }
6057    out
6058}
6059
6060fn cast_numeric_to_int(v: Value) -> Result<Value, EvalError> {
6061    match v {
6062        Value::Int(n) => Ok(Value::Int(n)),
6063        Value::BigInt(n) => i32::try_from(n)
6064            .map(Value::Int)
6065            .map_err(|_| EvalError::TypeMismatch {
6066                detail: format!("bigint {n} does not fit in int"),
6067            }),
6068        #[allow(clippy::cast_possible_truncation)]
6069        Value::Float(x) => Ok(Value::Int(x as i32)),
6070        Value::Text(s) => {
6071            s.trim()
6072                .parse::<i32>()
6073                .map(Value::Int)
6074                .map_err(|_| EvalError::TypeMismatch {
6075                    detail: format!("cannot parse {s:?} as int"),
6076                })
6077        }
6078        Value::Bool(b) => Ok(Value::Int(i32::from(b))),
6079        other => Err(EvalError::TypeMismatch {
6080            detail: format!("cannot cast {:?} to int", other.data_type()),
6081        }),
6082    }
6083}
6084
6085fn cast_numeric_to_bigint(v: Value) -> Result<Value, EvalError> {
6086    match v {
6087        Value::Int(n) => Ok(Value::BigInt(i64::from(n))),
6088        Value::BigInt(n) => Ok(Value::BigInt(n)),
6089        #[allow(clippy::cast_possible_truncation)]
6090        Value::Float(x) => Ok(Value::BigInt(x as i64)),
6091        Value::Text(s) => {
6092            s.trim()
6093                .parse::<i64>()
6094                .map(Value::BigInt)
6095                .map_err(|_| EvalError::TypeMismatch {
6096                    detail: format!("cannot parse {s:?} as bigint"),
6097                })
6098        }
6099        Value::Bool(b) => Ok(Value::BigInt(i64::from(b))),
6100        other => Err(EvalError::TypeMismatch {
6101            detail: format!("cannot cast {:?} to bigint", other.data_type()),
6102        }),
6103    }
6104}
6105
6106fn cast_numeric_to_float(v: Value) -> Result<Value, EvalError> {
6107    match v {
6108        Value::Int(n) => Ok(Value::Float(f64::from(n))),
6109        #[allow(clippy::cast_precision_loss)]
6110        Value::BigInt(n) => Ok(Value::Float(n as f64)),
6111        Value::Float(x) => Ok(Value::Float(x)),
6112        Value::Text(s) => {
6113            s.trim()
6114                .parse::<f64>()
6115                .map(Value::Float)
6116                .map_err(|_| EvalError::TypeMismatch {
6117                    detail: format!("cannot parse {s:?} as float"),
6118                })
6119        }
6120        other => Err(EvalError::TypeMismatch {
6121            detail: format!("cannot cast {:?} to float", other.data_type()),
6122        }),
6123    }
6124}
6125
6126fn cast_to_bool(v: Value) -> Result<Value, EvalError> {
6127    match v {
6128        Value::Bool(b) => Ok(Value::Bool(b)),
6129        Value::Int(n) => Ok(Value::Bool(n != 0)),
6130        Value::BigInt(n) => Ok(Value::Bool(n != 0)),
6131        Value::Text(s) => {
6132            let lo = s.trim().to_ascii_lowercase();
6133            match lo.as_str() {
6134                "true" | "t" | "yes" | "y" | "1" | "on" => Ok(Value::Bool(true)),
6135                "false" | "f" | "no" | "n" | "0" | "off" => Ok(Value::Bool(false)),
6136                _ => Err(EvalError::TypeMismatch {
6137                    detail: format!("cannot parse {s:?} as bool"),
6138                }),
6139            }
6140        }
6141        other => Err(EvalError::TypeMismatch {
6142            detail: format!("cannot cast {:?} to bool", other.data_type()),
6143        }),
6144    }
6145}
6146
6147/// Parse a `Value::Text("[1.0, 2.0, 3.0]")` into a `Value::Vector(..)`. Mirrors
6148/// pgvector's `'[..]'::vector` cast. NULL casts as NULL.
6149pub fn cast_to_vector(v: Value) -> Result<Value, EvalError> {
6150    match v {
6151        Value::Null => Ok(Value::Null),
6152        Value::Vector(v) => Ok(Value::Vector(v)),
6153        Value::Text(s) => parse_vector_text(&s)
6154            .map(Value::Vector)
6155            .ok_or(EvalError::TypeMismatch {
6156                detail: format!("cannot parse {s:?} as a vector literal"),
6157            }),
6158        other => Err(EvalError::TypeMismatch {
6159            detail: format!("::vector requires text input, got {:?}", other.data_type()),
6160        }),
6161    }
6162}
6163
6164/// Parse `"[1.0, 2.0, -3]"` into `Vec<f32>`. Returns `None` on malformed input.
6165pub fn parse_vector_text(s: &str) -> Option<Vec<f32>> {
6166    let trimmed = s.trim();
6167    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
6168    let trimmed_inner = inner.trim();
6169    if trimmed_inner.is_empty() {
6170        return Some(Vec::new());
6171    }
6172    let mut out = Vec::new();
6173    for part in trimmed_inner.split(',') {
6174        let f: f32 = part.trim().parse().ok()?;
6175        out.push(f);
6176    }
6177    Some(out)
6178}
6179
6180fn literal_to_value(l: &Literal) -> Value {
6181    match l {
6182        Literal::Integer(n) => {
6183            if let Ok(small) = i32::try_from(*n) {
6184                Value::Int(small)
6185            } else {
6186                Value::BigInt(*n)
6187            }
6188        }
6189        Literal::Float(x) => Value::Float(*x),
6190        Literal::String(s) => Value::Text(s.clone()),
6191        Literal::Vector(v) => Value::Vector(v.clone()),
6192        Literal::Bool(b) => Value::Bool(*b),
6193        Literal::Null => Value::Null,
6194        Literal::Interval { months, micros, .. } => Value::Interval {
6195            months: *months,
6196            micros: *micros,
6197        },
6198    }
6199}
6200
6201/// v7.17.0 Phase 2.5 — look up the collation of a column reference
6202/// in the current evaluation context. Returns `None` when the
6203/// expression is not a column reference (e.g. literal / function
6204/// call) or the column can't be resolved (caller falls back to
6205/// `Collation::Binary` semantics).
6206pub(crate) fn column_collation(e: &Expr, ctx: &EvalContext<'_>) -> Option<spg_storage::Collation> {
6207    let Expr::Column(c) = e else {
6208        return None;
6209    };
6210    if let Some(q) = &c.qualifier {
6211        let composite = alloc::format!("{q}.{name}", name = c.name);
6212        if let Some(s) = ctx.columns.iter().find(|s| s.name == composite) {
6213            return Some(s.collation);
6214        }
6215    }
6216    if let Some(s) = ctx.columns.iter().find(|s| s.name == c.name) {
6217        return Some(s.collation);
6218    }
6219    // Bare-name fallback for joined schemas (same shape as
6220    // resolve_column): match a single composite ending in
6221    // ".<name>".
6222    let suffix = alloc::format!(".{name}", name = c.name);
6223    let mut matches = ctx.columns.iter().filter(|s| s.name.ends_with(&suffix));
6224    let first = matches.next();
6225    let extra = matches.next();
6226    match (first, extra) {
6227        (Some(s), None) => Some(s.collation),
6228        _ => None,
6229    }
6230}
6231
6232/// v7.17.0 Phase 2.5 — if the comparison op is text-equality and
6233/// either operand references a CaseInsensitive column, return
6234/// ASCII-folded copies of both Text values; otherwise pass
6235/// through. Only Eq / NotEq / Lt / LtEq / Gt / GtEq trigger the
6236/// fold — relational operators on text still honour collation
6237/// the same way (PG semantics). Non-Text values pass through.
6238fn collation_fold_for_compare(
6239    op: BinOp,
6240    lhs: &Expr,
6241    rhs: &Expr,
6242    l: Value,
6243    r: Value,
6244    ctx: &EvalContext<'_>,
6245) -> (Value, Value) {
6246    if !matches!(
6247        op,
6248        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
6249    ) {
6250        return (l, r);
6251    }
6252    let lhs_col = column_collation(lhs, ctx);
6253    let rhs_col = column_collation(rhs, ctx);
6254    let ci = matches!(lhs_col, Some(spg_storage::Collation::CaseInsensitive))
6255        || matches!(rhs_col, Some(spg_storage::Collation::CaseInsensitive));
6256    if !ci {
6257        return (l, r);
6258    }
6259    let fold = |v: Value| match v {
6260        Value::Text(s) => Value::Text(s.to_ascii_lowercase()),
6261        other => other,
6262    };
6263    (fold(l), fold(r))
6264}
6265
6266fn resolve_column(c: &ColumnName, row: &Row, ctx: &EvalContext<'_>) -> Result<Value, EvalError> {
6267    if let Some(q) = &c.qualifier {
6268        // Multi-table evaluation (joins): the synthesised schema uses
6269        // composite column names "alias.column" so we look that up
6270        // directly. Falls back to the single-table case below if the
6271        // composite isn't present.
6272        let composite = alloc::format!("{q}.{name}", name = c.name);
6273        if let Some(pos) = ctx.columns.iter().position(|s| s.name == composite) {
6274            return Ok(row.values[pos].clone());
6275        }
6276        let expected = ctx.table_alias.ok_or_else(|| EvalError::UnknownQualifier {
6277            qualifier: q.clone(),
6278        })?;
6279        if q != expected {
6280            return Err(EvalError::UnknownQualifier {
6281                qualifier: q.clone(),
6282            });
6283        }
6284    }
6285    if let Some(pos) = ctx.columns.iter().position(|s| s.name == c.name) {
6286        return Ok(row.values[pos].clone());
6287    }
6288    // Bare-name fallback for joined schemas: match any single composite
6289    // column ending in ".<name>"; ambiguity is an error.
6290    let suffix = alloc::format!(".{name}", name = c.name);
6291    let mut matches = ctx
6292        .columns
6293        .iter()
6294        .enumerate()
6295        .filter(|(_, s)| s.name.ends_with(&suffix));
6296    let first = matches.next();
6297    let extra = matches.next();
6298    match (first, extra) {
6299        (Some((pos, _)), None) => Ok(row.values[pos].clone()),
6300        (Some(_), Some(_)) => Err(EvalError::TypeMismatch {
6301            detail: alloc::format!("ambiguous column reference: {}", c.name),
6302        }),
6303        _ => Err(EvalError::ColumnNotFound {
6304            name: c.name.clone(),
6305        }),
6306    }
6307}
6308
6309fn apply_unary(op: UnOp, v: Value) -> Result<Value, EvalError> {
6310    match (op, v) {
6311        (_, Value::Null) => Ok(Value::Null),
6312        (UnOp::Neg, Value::Int(n)) => {
6313            n.checked_neg()
6314                .map(Value::Int)
6315                .ok_or(EvalError::TypeMismatch {
6316                    detail: "integer overflow on unary -".into(),
6317                })
6318        }
6319        (UnOp::Neg, Value::BigInt(n)) => {
6320            n.checked_neg()
6321                .map(Value::BigInt)
6322                .ok_or(EvalError::TypeMismatch {
6323                    detail: "bigint overflow on unary -".into(),
6324                })
6325        }
6326        (UnOp::Neg, Value::Float(x)) => Ok(Value::Float(-x)),
6327        (UnOp::Neg, other) => Err(EvalError::TypeMismatch {
6328            detail: format!("unary - applied to {:?}", other.data_type()),
6329        }),
6330        (UnOp::Not, Value::Bool(b)) => Ok(Value::Bool(!b)),
6331        (UnOp::Not, other) => Err(EvalError::TypeMismatch {
6332            detail: format!("NOT applied to {:?}", other.data_type()),
6333        }),
6334    }
6335}
6336
6337/// v7.9.27b — true when two values are "not distinct" per PG:
6338/// both NULL counts as equal; otherwise reduces to regular Eq.
6339fn values_not_distinct(l: &Value, r: &Value) -> bool {
6340    match (l, r) {
6341        (Value::Null, Value::Null) => true,
6342        (Value::Null, _) | (_, Value::Null) => false,
6343        _ => l == r,
6344    }
6345}
6346
6347fn apply_binary(op: BinOp, l: Value, r: Value) -> Result<Value, EvalError> {
6348    // SQL three-valued logic for AND / OR with NULL is special — handle before
6349    // the general NULL-propagation rule.
6350    if let BinOp::And = op {
6351        return and_3vl(l, r);
6352    }
6353    if let BinOp::Or = op {
6354        return or_3vl(l, r);
6355    }
6356    // v7.9.27b — IS [NOT] DISTINCT FROM. NULL-safe equality:
6357    // `NULL IS NOT DISTINCT FROM NULL` → true. mailrs pg_dump.
6358    if let BinOp::IsNotDistinctFrom = op {
6359        return Ok(Value::Bool(values_not_distinct(&l, &r)));
6360    }
6361    if let BinOp::IsDistinctFrom = op {
6362        return Ok(Value::Bool(!values_not_distinct(&l, &r)));
6363    }
6364    // Everything else: any NULL operand → NULL.
6365    if l.is_null() || r.is_null() {
6366        return Ok(Value::Null);
6367    }
6368    // NUMERIC arithmetic and comparisons run in fixed-point; promote
6369    // integers to a common NUMERIC scale and stay in i128 throughout.
6370    if matches!(l, Value::Numeric { .. }) || matches!(r, Value::Numeric { .. }) {
6371        return apply_binary_numeric(op, l, r);
6372    }
6373    // Date / Timestamp arithmetic. PG semantics:
6374    //   * date + int      → date  (int is days)
6375    //   * int + date      → date
6376    //   * date - int      → date
6377    //   * date - date     → int   (days, signed)
6378    //   * timestamp - timestamp → bigint (microseconds, signed)
6379    // Other date/time math (`timestamp + int`, INTERVAL) lands later.
6380    if let Some(result) = apply_binary_calendar(op, &l, &r)? {
6381        return Ok(result);
6382    }
6383    match op {
6384        BinOp::Add => arith(l, r, i64::checked_add, |a, b| a + b, "+"),
6385        BinOp::Sub => arith(l, r, i64::checked_sub, |a, b| a - b, "-"),
6386        BinOp::Mul => arith(l, r, i64::checked_mul, |a, b| a * b, "*"),
6387        BinOp::Div => div_op(l, r),
6388        BinOp::L2Distance => l2_distance(l, r),
6389        BinOp::InnerProduct => inner_product(l, r),
6390        BinOp::CosineDistance => cosine_distance(l, r),
6391        BinOp::Concat => Ok(text_concat(&l, &r)),
6392        BinOp::JsonGet => crate::json::path_get(&l, &r, false),
6393        BinOp::JsonGetText => crate::json::path_get(&l, &r, true),
6394        BinOp::JsonGetPath => crate::json::path_walk(&l, &r, false),
6395        BinOp::JsonGetPathText => crate::json::path_walk(&l, &r, true),
6396        BinOp::JsonContains => crate::json::contains(&l, &r),
6397        // v7.12.2 — `@@` match. NULL on either side → NULL; PG
6398        // accepts both orderings so we normalise.
6399        BinOp::TsMatch => ts_match(l, r),
6400        // v7.17.0 Phase 3.P0-47 — PG INET / CIDR containment + overlap.
6401        BinOp::InetContainedBy
6402        | BinOp::InetContainedByEq
6403        | BinOp::InetContains
6404        | BinOp::InetContainsEq
6405        | BinOp::InetOverlap => inet_op_bool_result(op, &l, &r),
6406        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6407            compare(op, &l, &r)
6408        }
6409        BinOp::And | BinOp::Or | BinOp::IsDistinctFrom | BinOp::IsNotDistinctFrom => {
6410            unreachable!("handled above")
6411        }
6412    }
6413}
6414
6415/// Calendar arithmetic. Returns `Some(value)` when the operand pair
6416/// is a date/time combo this function understands, `None` to let the
6417/// caller fall through to the regular numeric / text paths.
6418fn apply_binary_calendar(op: BinOp, l: &Value, r: &Value) -> Result<Option<Value>, EvalError> {
6419    let int_value = |v: &Value| -> Option<i64> {
6420        match v {
6421            Value::SmallInt(n) => Some(i64::from(*n)),
6422            Value::Int(n) => Some(i64::from(*n)),
6423            Value::BigInt(n) => Some(*n),
6424            _ => None,
6425        }
6426    };
6427    // Most-specific cases first — DATE-DATE / TS-TS subtraction before
6428    // DATE-integer subtraction, otherwise the latter swallows the
6429    // former with an `int_value(Date) = None` no-op fall-through.
6430    match (l, r) {
6431        (Value::Date(a), Value::Date(b)) if op == BinOp::Sub => {
6432            return Ok(Some(Value::BigInt(i64::from(*a) - i64::from(*b))));
6433        }
6434        (Value::Timestamp(a), Value::Timestamp(b)) if op == BinOp::Sub => {
6435            let delta = a.checked_sub(*b).ok_or(EvalError::TypeMismatch {
6436                detail: "TIMESTAMP - TIMESTAMP overflows i64 microseconds".into(),
6437            })?;
6438            return Ok(Some(Value::BigInt(delta)));
6439        }
6440        _ => {}
6441    }
6442    // INTERVAL arithmetic. PG: timestamp ± interval → timestamp,
6443    // date ± interval → date (if interval is pure days/months with no
6444    // sub-day component) else timestamp, interval ± interval → interval.
6445    if let Some(out) = apply_binary_interval(op, l, r)? {
6446        return Ok(Some(out));
6447    }
6448    match (l, r) {
6449        (Value::Date(d), other) if op == BinOp::Add => {
6450            if let Some(n) = int_value(other) {
6451                let days = i64::from(*d).saturating_add(n);
6452                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6453                    detail: "DATE + integer overflows DATE range".into(),
6454                })?;
6455                return Ok(Some(Value::Date(days32)));
6456            }
6457        }
6458        (other, Value::Date(d)) if op == BinOp::Add => {
6459            if let Some(n) = int_value(other) {
6460                let days = i64::from(*d).saturating_add(n);
6461                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6462                    detail: "integer + DATE overflows DATE range".into(),
6463                })?;
6464                return Ok(Some(Value::Date(days32)));
6465            }
6466        }
6467        (Value::Date(d), other) if op == BinOp::Sub => {
6468            if let Some(n) = int_value(other) {
6469                let days = i64::from(*d).saturating_sub(n);
6470                let days32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6471                    detail: "DATE - integer overflows DATE range".into(),
6472                })?;
6473                return Ok(Some(Value::Date(days32)));
6474            }
6475        }
6476        _ => {}
6477    }
6478    Ok(None)
6479}
6480
6481/// INTERVAL-aware binary ops. Recognises:
6482///   timestamp ± interval → timestamp
6483///   date ± interval      → date (if interval is integral days/months only)
6484///                       → timestamp (if interval has sub-day micros)
6485///   interval ± interval  → interval
6486/// Commutative for `+`. Returns `None` for unrecognised operand pairs so
6487/// the caller can fall through.
6488pub(crate) fn apply_binary_interval(
6489    op: BinOp,
6490    l: &Value,
6491    r: &Value,
6492) -> Result<Option<Value>, EvalError> {
6493    // Normalise so the interval (if any) is always on the right for Add;
6494    // Sub stays left-handed because it isn't commutative.
6495    let (lhs, rhs, sign): (&Value, &Value, i64) = match (l, r, op) {
6496        (Value::Interval { .. }, _, BinOp::Add) => (r, l, 1),
6497        (_, Value::Interval { .. }, BinOp::Add) => (l, r, 1),
6498        (_, Value::Interval { .. }, BinOp::Sub) => (l, r, -1),
6499        _ => return Ok(None),
6500    };
6501    let Value::Interval {
6502        months: rhs_months,
6503        micros: rhs_us,
6504    } = rhs
6505    else {
6506        unreachable!("rhs guaranteed to be Interval by the match above");
6507    };
6508    let signed_months = i64::from(*rhs_months) * sign;
6509    let signed_micros = rhs_us.checked_mul(sign).ok_or(EvalError::TypeMismatch {
6510        detail: "INTERVAL micros overflows on negation".into(),
6511    })?;
6512    match lhs {
6513        Value::Timestamp(t) => Ok(Some(Value::Timestamp(add_interval_to_micros(
6514            *t,
6515            signed_months,
6516            signed_micros,
6517        )?))),
6518        Value::Date(d) => {
6519            // Date + interval stays a date when the interval has zero
6520            // sub-day microseconds; otherwise promote to TIMESTAMP at
6521            // midnight of the (months-shifted) date first.
6522            let day_aligned = signed_micros.rem_euclid(86_400_000_000) == 0;
6523            if day_aligned {
6524                let micros_per_day = 86_400_000_000_i64;
6525                let days_delta = signed_micros / micros_per_day;
6526                let shifted = shift_date_by_months(*d, signed_months)?;
6527                let new_days =
6528                    i64::from(shifted)
6529                        .checked_add(days_delta)
6530                        .ok_or(EvalError::TypeMismatch {
6531                            detail: "DATE ± INTERVAL overflows DATE range".into(),
6532                        })?;
6533                let days32 = i32::try_from(new_days).map_err(|_| EvalError::TypeMismatch {
6534                    detail: "DATE ± INTERVAL overflows DATE range".into(),
6535                })?;
6536                Ok(Some(Value::Date(days32)))
6537            } else {
6538                let base =
6539                    i64::from(*d)
6540                        .checked_mul(86_400_000_000)
6541                        .ok_or(EvalError::TypeMismatch {
6542                            detail: "DATE → TIMESTAMP lift overflows for INTERVAL math".into(),
6543                        })?;
6544                Ok(Some(Value::Timestamp(add_interval_to_micros(
6545                    base,
6546                    signed_months,
6547                    signed_micros,
6548                )?)))
6549            }
6550        }
6551        Value::Interval {
6552            months: lhs_months,
6553            micros: lhs_us,
6554        } => {
6555            let new_months = i64::from(*lhs_months)
6556                .checked_add(signed_months)
6557                .and_then(|n| i32::try_from(n).ok())
6558                .ok_or(EvalError::TypeMismatch {
6559                    detail: "INTERVAL ± INTERVAL months overflows i32".into(),
6560                })?;
6561            let new_micros = lhs_us
6562                .checked_add(signed_micros)
6563                .ok_or(EvalError::TypeMismatch {
6564                    detail: "INTERVAL ± INTERVAL micros overflows i64".into(),
6565                })?;
6566            Ok(Some(Value::Interval {
6567                months: new_months,
6568                micros: new_micros,
6569            }))
6570        }
6571        _ => Err(EvalError::TypeMismatch {
6572            detail: format!(
6573                "operator {op:?} not defined for {:?} and INTERVAL",
6574                lhs.data_type()
6575            ),
6576        }),
6577    }
6578}
6579
6580/// Shift a `Date` by a signed number of months using the PG clamp rule.
6581fn shift_date_by_months(d: i32, months: i64) -> Result<i32, EvalError> {
6582    let (y, m, day) = civil_from_days(d);
6583    let months_i32 = i32::try_from(months).map_err(|_| EvalError::TypeMismatch {
6584        detail: "INTERVAL months delta out of i32 range".into(),
6585    })?;
6586    let (ny, nm, nd) = add_months_to_civil(y, m, day, months_i32);
6587    Ok(days_from_civil(ny, nm, nd))
6588}
6589
6590/// Add (months, micros) to a `Timestamp` (microseconds since epoch).
6591/// Months part is applied through civil calendar with clamp-to-last-day;
6592/// micros part is plain i64 addition with overflow guard.
6593fn add_interval_to_micros(t: i64, months: i64, micros: i64) -> Result<i64, EvalError> {
6594    let mut out = t;
6595    if months != 0 {
6596        const MICROS_PER_DAY: i64 = 86_400_000_000;
6597        let days = out.div_euclid(MICROS_PER_DAY);
6598        let day_micros = out.rem_euclid(MICROS_PER_DAY);
6599        let day_i32 = i32::try_from(days).map_err(|_| EvalError::TypeMismatch {
6600            detail: "TIMESTAMP day component out of i32 range for INTERVAL months math".into(),
6601        })?;
6602        let shifted_days = shift_date_by_months(day_i32, months)?;
6603        out = i64::from(shifted_days)
6604            .checked_mul(MICROS_PER_DAY)
6605            .and_then(|n| n.checked_add(day_micros))
6606            .ok_or(EvalError::TypeMismatch {
6607                detail: "TIMESTAMP ± INTERVAL months overflows i64 microseconds".into(),
6608            })?;
6609    }
6610    out.checked_add(micros).ok_or(EvalError::TypeMismatch {
6611        detail: "TIMESTAMP ± INTERVAL micros overflows i64".into(),
6612    })
6613}
6614
6615/// Dispatch for any binary op when at least one operand is NUMERIC.
6616/// Other-side integers / floats are promoted to a NUMERIC at a common
6617/// scale; all add / sub / mul / div / compare paths stay in i128.
6618#[allow(clippy::needless_pass_by_value)] // mirrors `apply_binary`'s by-value calling convention
6619fn apply_binary_numeric(op: BinOp, l: Value, r: Value) -> Result<Value, EvalError> {
6620    // Float still wins — Numeric + Float coerces both to f64 and runs
6621    // through the float path. PG demotes Numeric to float in this mix
6622    // too (the documented behaviour for `numeric + double precision`).
6623    let float_path = matches!(l, Value::Float(_)) || matches!(r, Value::Float(_));
6624    if float_path {
6625        let af = as_f64(&l)?;
6626        let bf = as_f64(&r)?;
6627        return match op {
6628            BinOp::Add => Ok(Value::Float(af + bf)),
6629            BinOp::Sub => Ok(Value::Float(af - bf)),
6630            BinOp::Mul => Ok(Value::Float(af * bf)),
6631            BinOp::Div => {
6632                if bf == 0.0 {
6633                    Err(EvalError::DivisionByZero)
6634                } else {
6635                    Ok(Value::Float(af / bf))
6636                }
6637            }
6638            BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6639                let ord = af.partial_cmp(&bf).ok_or(EvalError::TypeMismatch {
6640                    detail: "NaN in NUMERIC/Float comparison".into(),
6641                })?;
6642                Ok(Value::Bool(cmp_to_bool(op, ord)))
6643            }
6644            BinOp::Concat => Ok(text_concat(&l, &r)),
6645            other => Err(EvalError::TypeMismatch {
6646                detail: format!("operator {other:?} not defined for NUMERIC and Float"),
6647            }),
6648        };
6649    }
6650    // Promote integer ↔ numeric to a shared scale (max of both sides).
6651    let (a, sa) = numeric_or_widen(&l).ok_or_else(|| EvalError::TypeMismatch {
6652        detail: format!("NUMERIC op against non-numeric {:?}", l.data_type()),
6653    })?;
6654    let (b, sb) = numeric_or_widen(&r).ok_or_else(|| EvalError::TypeMismatch {
6655        detail: format!("NUMERIC op against non-numeric {:?}", r.data_type()),
6656    })?;
6657    match op {
6658        BinOp::Add | BinOp::Sub => {
6659            let target_scale = sa.max(sb);
6660            let lhs = rescale(a, sa, target_scale).ok_or(EvalError::TypeMismatch {
6661                detail: "NUMERIC overflow on rescale".into(),
6662            })?;
6663            let rhs = rescale(b, sb, target_scale).ok_or(EvalError::TypeMismatch {
6664                detail: "NUMERIC overflow on rescale".into(),
6665            })?;
6666            let r = match op {
6667                BinOp::Add => lhs.checked_add(rhs),
6668                BinOp::Sub => lhs.checked_sub(rhs),
6669                _ => unreachable!(),
6670            }
6671            .ok_or(EvalError::TypeMismatch {
6672                detail: "NUMERIC overflow on +/-".into(),
6673            })?;
6674            Ok(Value::Numeric {
6675                scaled: r,
6676                scale: target_scale,
6677            })
6678        }
6679        BinOp::Mul => {
6680            let scaled = a.checked_mul(b).ok_or(EvalError::TypeMismatch {
6681                detail: "NUMERIC overflow on *".into(),
6682            })?;
6683            Ok(Value::Numeric {
6684                scaled,
6685                scale: sa.saturating_add(sb),
6686            })
6687        }
6688        BinOp::Div => {
6689            if b == 0 {
6690                return Err(EvalError::DivisionByZero);
6691            }
6692            // Result scale: keep the wider operand's scale. Pre-scale
6693            // the numerator so the integer division retains that many
6694            // fractional digits. Round half-away-from-zero.
6695            let target_scale = sa.max(sb);
6696            // Numerator effective scale becomes sa + target_scale; we
6697            // bring it up to (target_scale + sb) so the divisor's scale
6698            // cancels cleanly.
6699            let bump = pow10_i128(target_scale.saturating_add(sb).saturating_sub(sa));
6700            let num = a.checked_mul(bump).ok_or(EvalError::TypeMismatch {
6701                detail: "NUMERIC overflow on / scaling".into(),
6702            })?;
6703            let half = if b >= 0 { b / 2 } else { -(b / 2) };
6704            let adj = if (num >= 0) == (b >= 0) {
6705                num + half
6706            } else {
6707                num - half
6708            };
6709            Ok(Value::Numeric {
6710                scaled: adj / b,
6711                scale: target_scale,
6712            })
6713        }
6714        BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => {
6715            let target_scale = sa.max(sb);
6716            let lhs = rescale(a, sa, target_scale).ok_or(EvalError::TypeMismatch {
6717                detail: "NUMERIC overflow on rescale".into(),
6718            })?;
6719            let rhs = rescale(b, sb, target_scale).ok_or(EvalError::TypeMismatch {
6720                detail: "NUMERIC overflow on rescale".into(),
6721            })?;
6722            Ok(Value::Bool(cmp_to_bool(op, lhs.cmp(&rhs))))
6723        }
6724        BinOp::Concat => Ok(text_concat(&l, &r)),
6725        other => Err(EvalError::TypeMismatch {
6726            detail: format!("operator {other:?} not defined for NUMERIC"),
6727        }),
6728    }
6729}
6730
6731/// Express `v` as a `(scaled_i128, scale)` pair. Plain integers come
6732/// back with `scale=0`; NUMERIC keeps its own scale. Anything else
6733/// returns `None` and the caller raises a type error.
6734fn numeric_or_widen(v: &Value) -> Option<(i128, u8)> {
6735    match v {
6736        Value::Numeric { scaled, scale } => Some((*scaled, *scale)),
6737        Value::Int(n) => Some((i128::from(*n), 0)),
6738        Value::SmallInt(n) => Some((i128::from(*n), 0)),
6739        Value::BigInt(n) => Some((i128::from(*n), 0)),
6740        _ => None,
6741    }
6742}
6743
6744fn rescale(scaled: i128, src: u8, dst: u8) -> Option<i128> {
6745    if src == dst {
6746        return Some(scaled);
6747    }
6748    if dst > src {
6749        scaled.checked_mul(pow10_i128(dst - src))
6750    } else {
6751        let drop = pow10_i128(src - dst);
6752        let half = drop / 2;
6753        let r = if scaled >= 0 {
6754            scaled + half
6755        } else {
6756            scaled - half
6757        };
6758        Some(r / drop)
6759    }
6760}
6761
6762const fn pow10_i128(p: u8) -> i128 {
6763    let mut acc: i128 = 1;
6764    let mut i = 0;
6765    while i < p {
6766        acc *= 10;
6767        i += 1;
6768    }
6769    acc
6770}
6771
6772const fn cmp_to_bool(op: BinOp, ord: core::cmp::Ordering) -> bool {
6773    use core::cmp::Ordering::{Equal, Greater, Less};
6774    match op {
6775        BinOp::Eq => matches!(ord, Equal),
6776        BinOp::NotEq => !matches!(ord, Equal),
6777        BinOp::Lt => matches!(ord, Less),
6778        BinOp::LtEq => matches!(ord, Less | Equal),
6779        BinOp::Gt => matches!(ord, Greater),
6780        BinOp::GtEq => matches!(ord, Greater | Equal),
6781        _ => false,
6782    }
6783}
6784
6785/// SQL `||` string concatenation. Operands are coerced to text via the same
6786/// rule as `::text` cast. NULL propagates (handled above; this function only
6787/// runs with non-NULL operands).
6788fn text_concat(l: &Value, r: &Value) -> Value {
6789    // v7.11.8 — PG `||` overloads: TEXT[] || TEXT[] = concatenated array;
6790    // TEXT[] || TEXT (or TEXT || TEXT[]) prepends/appends the single
6791    // element. NULL || anything = NULL (PG semantics for arrays;
6792    // text concat treats NULL the same way after value_to_text).
6793    match (l, r) {
6794        (Value::Null, _) | (_, Value::Null) => {
6795            // PG text concat: NULL || x = NULL. Array concat: NULL || x = NULL.
6796            // Keep the legacy text path (value_to_text handles Null as ""),
6797            // but for arrays we surface real NULL to match PG.
6798            if matches!(
6799                l,
6800                Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_) | Value::Bytes(_)
6801            ) || matches!(
6802                r,
6803                Value::TextArray(_) | Value::IntArray(_) | Value::BigIntArray(_) | Value::Bytes(_)
6804            ) {
6805                return Value::Null;
6806            }
6807        }
6808        (Value::TextArray(a), Value::TextArray(b)) => {
6809            let mut out = a.clone();
6810            out.extend(b.iter().cloned());
6811            return Value::TextArray(out);
6812        }
6813        (Value::TextArray(a), Value::Text(s)) => {
6814            let mut out = a.clone();
6815            out.push(Some(s.clone()));
6816            return Value::TextArray(out);
6817        }
6818        (Value::Text(s), Value::TextArray(b)) => {
6819            let mut out: alloc::vec::Vec<Option<alloc::string::String>> =
6820                alloc::vec::Vec::with_capacity(1 + b.len());
6821            out.push(Some(s.clone()));
6822            out.extend(b.iter().cloned());
6823            return Value::TextArray(out);
6824        }
6825        // v7.11.13 — IntArray / BigIntArray `||` overloads. Same
6826        // PG semantics as TEXT[]: array||array concatenates, and
6827        // array||scalar appends/prepends. Mixed Int/BigInt widens
6828        // to BigIntArray.
6829        (Value::IntArray(a), Value::IntArray(b)) => {
6830            let mut out = a.clone();
6831            out.extend(b.iter().copied());
6832            return Value::IntArray(out);
6833        }
6834        (Value::IntArray(a), Value::Int(n)) => {
6835            let mut out = a.clone();
6836            out.push(Some(*n));
6837            return Value::IntArray(out);
6838        }
6839        (Value::IntArray(a), Value::SmallInt(n)) => {
6840            let mut out = a.clone();
6841            out.push(Some(i32::from(*n)));
6842            return Value::IntArray(out);
6843        }
6844        (Value::Int(n), Value::IntArray(b)) => {
6845            let mut out: alloc::vec::Vec<Option<i32>> = alloc::vec::Vec::with_capacity(1 + b.len());
6846            out.push(Some(*n));
6847            out.extend(b.iter().copied());
6848            return Value::IntArray(out);
6849        }
6850        (Value::SmallInt(n), Value::IntArray(b)) => {
6851            let mut out: alloc::vec::Vec<Option<i32>> = alloc::vec::Vec::with_capacity(1 + b.len());
6852            out.push(Some(i32::from(*n)));
6853            out.extend(b.iter().copied());
6854            return Value::IntArray(out);
6855        }
6856        (Value::BigIntArray(a), Value::BigIntArray(b)) => {
6857            let mut out = a.clone();
6858            out.extend(b.iter().copied());
6859            return Value::BigIntArray(out);
6860        }
6861        (Value::BigIntArray(a), Value::IntArray(b)) => {
6862            let mut out = a.clone();
6863            out.extend(b.iter().map(|o| o.map(i64::from)));
6864            return Value::BigIntArray(out);
6865        }
6866        (Value::IntArray(a), Value::BigIntArray(b)) => {
6867            let mut out: alloc::vec::Vec<Option<i64>> =
6868                a.iter().map(|o| o.map(i64::from)).collect();
6869            out.extend(b.iter().copied());
6870            return Value::BigIntArray(out);
6871        }
6872        (Value::BigIntArray(a), Value::BigInt(n)) => {
6873            let mut out = a.clone();
6874            out.push(Some(*n));
6875            return Value::BigIntArray(out);
6876        }
6877        (Value::BigIntArray(a), Value::Int(n)) => {
6878            let mut out = a.clone();
6879            out.push(Some(i64::from(*n)));
6880            return Value::BigIntArray(out);
6881        }
6882        (Value::BigIntArray(a), Value::SmallInt(n)) => {
6883            let mut out = a.clone();
6884            out.push(Some(i64::from(*n)));
6885            return Value::BigIntArray(out);
6886        }
6887        (Value::BigInt(n), Value::BigIntArray(b)) => {
6888            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
6889            out.push(Some(*n));
6890            out.extend(b.iter().copied());
6891            return Value::BigIntArray(out);
6892        }
6893        (Value::Int(n), Value::BigIntArray(b)) => {
6894            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
6895            out.push(Some(i64::from(*n)));
6896            out.extend(b.iter().copied());
6897            return Value::BigIntArray(out);
6898        }
6899        (Value::SmallInt(n), Value::BigIntArray(b)) => {
6900            let mut out: alloc::vec::Vec<Option<i64>> = alloc::vec::Vec::with_capacity(1 + b.len());
6901            out.push(Some(i64::from(*n)));
6902            out.extend(b.iter().copied());
6903            return Value::BigIntArray(out);
6904        }
6905        // v7.11.15 — BYTEA `||` is byte concatenation.
6906        (Value::Bytes(a), Value::Bytes(b)) => {
6907            let mut out = a.clone();
6908            out.extend_from_slice(b);
6909            return Value::Bytes(out);
6910        }
6911        _ => {}
6912    }
6913    let a = value_to_text(l);
6914    let b = value_to_text(r);
6915    Value::Text(a + &b)
6916}
6917
6918/// pgvector inner-product `<#>`. Returns the *negative* dot product so
6919/// smaller still means more similar — same convention as pgvector.
6920fn inner_product(l: Value, r: Value) -> Result<Value, EvalError> {
6921    let (a, b) = unwrap_vec_pair(l, r, "<#>")?;
6922    let mut dot: f64 = 0.0;
6923    for (x, y) in a.iter().zip(b.iter()) {
6924        dot += f64::from(*x) * f64::from(*y);
6925    }
6926    Ok(Value::Float(-dot))
6927}
6928
6929/// pgvector cosine distance `<=>` — `1 - (a·b) / (‖a‖ ‖b‖)`. A zero-norm
6930/// operand produces NaN (matches pgvector).
6931fn cosine_distance(l: Value, r: Value) -> Result<Value, EvalError> {
6932    let (a, b) = unwrap_vec_pair(l, r, "<=>")?;
6933    let mut dot: f64 = 0.0;
6934    let mut na: f64 = 0.0;
6935    let mut nb: f64 = 0.0;
6936    for (x, y) in a.iter().zip(b.iter()) {
6937        let xf = f64::from(*x);
6938        let yf = f64::from(*y);
6939        dot += xf * yf;
6940        na += xf * xf;
6941        nb += yf * yf;
6942    }
6943    let denom = sqrt_newton(na) * sqrt_newton(nb);
6944    if denom == 0.0 {
6945        return Ok(Value::Float(f64::NAN));
6946    }
6947    Ok(Value::Float(1.0 - dot / denom))
6948}
6949
6950fn unwrap_vec_pair(l: Value, r: Value, op: &str) -> Result<(Vec<f32>, Vec<f32>), EvalError> {
6951    // v6.0.1: SQ8 cells coming through the SQL evaluator are
6952    // dequantised to f32 here so the existing scalar distance
6953    // arithmetic stays intact. HNSW kNN search continues to use
6954    // the asymmetric ADC variant inside `cell_to_query_metric_
6955    // distance` — this path only runs when a vector expression
6956    // lands in the evaluator (full-scan ORDER BY, SELECT
6957    // projection of `v <-> $1`, etc.).
6958    let to_f32 = |v: Value| -> Option<Vec<f32>> {
6959        match v {
6960            Value::Vector(a) => Some(a),
6961            Value::Sq8Vector(q) => Some(spg_storage::quantize::dequantize(&q)),
6962            // v6.0.3: bit-exact dequant for halfvec cells.
6963            Value::HalfVector(h) => Some(h.to_f32_vec()),
6964            _ => None,
6965        }
6966    };
6967    let l_ty = l.data_type();
6968    let r_ty = r.data_type();
6969    match (to_f32(l), to_f32(r)) {
6970        (Some(a), Some(b)) => {
6971            if a.len() != b.len() {
6972                return Err(EvalError::TypeMismatch {
6973                    detail: format!("vector dim mismatch in {op}: {} vs {}", a.len(), b.len()),
6974                });
6975            }
6976            Ok((a, b))
6977        }
6978        _ => Err(EvalError::TypeMismatch {
6979            detail: format!("{op} requires two vectors, got {l_ty:?} and {r_ty:?}"),
6980        }),
6981    }
6982}
6983
6984/// Numeric arithmetic with widening.
6985/// - both `Int` → `Int` (with overflow check)
6986/// - `Int` op `BigInt` (either side) → `BigInt`
6987/// - any `Float` involved → `Float`
6988fn arith(
6989    l: Value,
6990    r: Value,
6991    int_op: impl Fn(i64, i64) -> Option<i64>,
6992    float_op: impl Fn(f64, f64) -> f64,
6993    op_name: &str,
6994) -> Result<Value, EvalError> {
6995    // Widen SmallInt to Int up front so the rest of the arithmetic
6996    // table only deals with Int / BigInt / Float pairs.
6997    let widen = |v: Value| -> Value {
6998        match v {
6999            Value::SmallInt(n) => Value::Int(i32::from(n)),
7000            other => other,
7001        }
7002    };
7003    let l = widen(l);
7004    let r = widen(r);
7005    match (l, r) {
7006        (Value::Int(a), Value::Int(b)) => {
7007            let result = int_op(i64::from(a), i64::from(b)).ok_or(EvalError::TypeMismatch {
7008                detail: format!("integer overflow on {op_name}"),
7009            })?;
7010            if let Ok(small) = i32::try_from(result) {
7011                Ok(Value::Int(small))
7012            } else {
7013                Ok(Value::BigInt(result))
7014            }
7015        }
7016        (Value::Int(a), Value::BigInt(b)) | (Value::BigInt(b), Value::Int(a)) => {
7017            let result = int_op(i64::from(a), b).ok_or(EvalError::TypeMismatch {
7018                detail: format!("bigint overflow on {op_name}"),
7019            })?;
7020            Ok(Value::BigInt(result))
7021        }
7022        (Value::BigInt(a), Value::BigInt(b)) => {
7023            let result = int_op(a, b).ok_or(EvalError::TypeMismatch {
7024                detail: format!("bigint overflow on {op_name}"),
7025            })?;
7026            Ok(Value::BigInt(result))
7027        }
7028        (a, b)
7029            if a.data_type() == Some(DataType::Float) || b.data_type() == Some(DataType::Float) =>
7030        {
7031            let af = as_f64(&a)?;
7032            let bf = as_f64(&b)?;
7033            Ok(Value::Float(float_op(af, bf)))
7034        }
7035        (a, b) => Err(EvalError::TypeMismatch {
7036            detail: format!(
7037                "{op_name} applied to non-numeric: {:?} vs {:?}",
7038                a.data_type(),
7039                b.data_type()
7040            ),
7041        }),
7042    }
7043}
7044
7045/// L2 (Euclidean) distance between two vectors of equal dimension.
7046/// Returned as `Value::Float(d)` so it composes with the existing
7047/// comparison / sort plumbing. Mismatched dims or non-vector operands
7048/// raise `TypeMismatch`.
7049#[allow(clippy::many_single_char_names)] // l, r, a, b, d are the natural names
7050fn l2_distance(l: Value, r: Value) -> Result<Value, EvalError> {
7051    // v6.0.1: route both operands through `unwrap_vec_pair` so SQ8
7052    // cells dequantise on the way in. Sub-f64 precision loss is
7053    // negligible vs the dequantisation noise the SQ8 path already
7054    // ships with.
7055    let (a, b) = unwrap_vec_pair(l, r, "<->")?;
7056    let mut sum: f64 = 0.0;
7057    for (x, y) in a.iter().zip(b.iter()) {
7058        let d = f64::from(*x) - f64::from(*y);
7059        sum += d * d;
7060    }
7061    Ok(Value::Float(sqrt_newton(sum)))
7062}
7063
7064/// Self-built `sqrt` for `f64` — `std::f64::sqrt` lives in `std`, which the
7065/// engine's `no_std` constraint disallows. Newton-Raphson with a few rounds
7066/// reaches IEEE-754 precision for the inputs we'll see (sum of squares of
7067/// f32-derived distances, always non-negative, never NaN).
7068fn sqrt_newton(x: f64) -> f64 {
7069    if x <= 0.0 {
7070        return 0.0;
7071    }
7072    let mut g = x;
7073    // 10 iterations is conservative; 6 already converges to ulp for typical
7074    // distances.
7075    for _ in 0..10 {
7076        g = 0.5 * (g + x / g);
7077    }
7078    g
7079}
7080
7081fn div_op(l: Value, r: Value) -> Result<Value, EvalError> {
7082    let any_float = matches!(l.data_type(), Some(DataType::Float))
7083        || matches!(r.data_type(), Some(DataType::Float));
7084    if any_float {
7085        let a = as_f64(&l)?;
7086        let b = as_f64(&r)?;
7087        if b == 0.0 {
7088            return Err(EvalError::DivisionByZero);
7089        }
7090        return Ok(Value::Float(a / b));
7091    }
7092    arith(
7093        l,
7094        r,
7095        |a, b| {
7096            if b == 0 { None } else { Some(a / b) }
7097        },
7098        |a, b| a / b,
7099        "/",
7100    )
7101    .map_err(|e| match e {
7102        // The closure returns None on b == 0; translate that into the dedicated
7103        // DivisionByZero variant instead of "integer overflow on /".
7104        EvalError::TypeMismatch { detail } if detail.contains('/') => EvalError::DivisionByZero,
7105        other => other,
7106    })
7107}
7108
7109fn as_f64(v: &Value) -> Result<f64, EvalError> {
7110    match v {
7111        Value::SmallInt(n) => Ok(f64::from(*n)),
7112        Value::Int(n) => Ok(f64::from(*n)),
7113        #[allow(clippy::cast_precision_loss)]
7114        Value::BigInt(n) => Ok(*n as f64),
7115        Value::Float(x) => Ok(*x),
7116        #[allow(clippy::cast_precision_loss)]
7117        Value::Numeric { scaled, scale } => {
7118            let mut div = 1.0_f64;
7119            for _ in 0..*scale {
7120                div *= 10.0;
7121            }
7122            Ok((*scaled as f64) / div)
7123        }
7124        other => Err(EvalError::TypeMismatch {
7125            detail: format!("cannot convert {:?} to FLOAT", other.data_type()),
7126        }),
7127    }
7128}
7129
7130fn compare(op: BinOp, l: &Value, r: &Value) -> Result<Value, EvalError> {
7131    let ord = match (l, r) {
7132        (Value::Int(a), Value::Int(b)) => i64::from(*a).cmp(&i64::from(*b)),
7133        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7134        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
7135        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
7136        (a, b)
7137            if matches!(a.data_type(), Some(DataType::Float))
7138                || matches!(b.data_type(), Some(DataType::Float)) =>
7139        {
7140            let af = as_f64(a)?;
7141            let bf = as_f64(b)?;
7142            af.partial_cmp(&bf).ok_or(EvalError::TypeMismatch {
7143                detail: "NaN in comparison".into(),
7144            })?
7145        }
7146        (Value::Text(a), Value::Text(b)) => a.cmp(b),
7147        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
7148        // Date / Timestamp compare on their integer storage repr.
7149        // Cross-domain (Date vs Timestamp) lifts the Date to the
7150        // matching midnight TIMESTAMP first.
7151        (Value::Date(a), Value::Date(b)) => a.cmp(b),
7152        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
7153        (Value::Date(a), Value::Timestamp(b)) => (i64::from(*a) * 86_400_000_000).cmp(b),
7154        (Value::Timestamp(a), Value::Date(b)) => a.cmp(&(i64::from(*b) * 86_400_000_000)),
7155        // PG-style implicit coercion: comparing a DATE / TIMESTAMP
7156        // column against a text literal lifts the literal into the
7157        // matching domain (e.g. `day >= '2024-01-01'`).
7158        (Value::Date(a), Value::Text(b)) => {
7159            let bd = parse_date_literal(b).ok_or_else(|| EvalError::TypeMismatch {
7160                detail: format!("cannot parse {b:?} as DATE for comparison"),
7161            })?;
7162            a.cmp(&bd)
7163        }
7164        (Value::Text(a), Value::Date(b)) => {
7165            let ad = parse_date_literal(a).ok_or_else(|| EvalError::TypeMismatch {
7166                detail: format!("cannot parse {a:?} as DATE for comparison"),
7167            })?;
7168            ad.cmp(b)
7169        }
7170        (Value::Timestamp(a), Value::Text(b)) => {
7171            let bt = parse_timestamp_literal(b).ok_or_else(|| EvalError::TypeMismatch {
7172                detail: format!("cannot parse {b:?} as TIMESTAMP for comparison"),
7173            })?;
7174            a.cmp(&bt)
7175        }
7176        (Value::Text(a), Value::Timestamp(b)) => {
7177            let at = parse_timestamp_literal(a).ok_or_else(|| EvalError::TypeMismatch {
7178                detail: format!("cannot parse {a:?} as TIMESTAMP for comparison"),
7179            })?;
7180            at.cmp(b)
7181        }
7182        // v7.17.0 — UUID byte-wise comparison; both sides UUID.
7183        (Value::Uuid(a), Value::Uuid(b)) => a.cmp(b),
7184        // v7.17.0 — PG promotes a `text` literal compared against a
7185        // `uuid` column into uuid (unknown-type literal inference).
7186        // Without this, `WHERE id = '550e...'` falls through to the
7187        // generic TypeMismatch — the application's literal becomes
7188        // an error rather than a comparison.
7189        (Value::Uuid(a), Value::Text(b)) => {
7190            let bu = spg_storage::parse_uuid_str(b).ok_or_else(|| EvalError::TypeMismatch {
7191                detail: format!("invalid input syntax for type uuid: {b:?}"),
7192            })?;
7193            a.cmp(&bu)
7194        }
7195        (Value::Text(a), Value::Uuid(b)) => {
7196            let au = spg_storage::parse_uuid_str(a).ok_or_else(|| EvalError::TypeMismatch {
7197                detail: format!("invalid input syntax for type uuid: {a:?}"),
7198            })?;
7199            au.cmp(b)
7200        }
7201        (a, b) => {
7202            return Err(EvalError::TypeMismatch {
7203                detail: format!(
7204                    "comparison between {:?} and {:?}",
7205                    a.data_type(),
7206                    b.data_type()
7207                ),
7208            });
7209        }
7210    };
7211    let result = match op {
7212        BinOp::Eq => ord.is_eq(),
7213        BinOp::NotEq => !ord.is_eq(),
7214        BinOp::Lt => ord.is_lt(),
7215        BinOp::LtEq => ord.is_le(),
7216        BinOp::Gt => ord.is_gt(),
7217        BinOp::GtEq => ord.is_ge(),
7218        BinOp::And
7219        | BinOp::Or
7220        | BinOp::Add
7221        | BinOp::Sub
7222        | BinOp::Mul
7223        | BinOp::Div
7224        | BinOp::L2Distance
7225        | BinOp::InnerProduct
7226        | BinOp::CosineDistance
7227        | BinOp::Concat
7228        | BinOp::JsonGet
7229        | BinOp::JsonGetText
7230        | BinOp::JsonGetPath
7231        | BinOp::JsonGetPathText
7232        | BinOp::JsonContains
7233        | BinOp::TsMatch
7234        | BinOp::IsDistinctFrom
7235        | BinOp::IsNotDistinctFrom
7236        | BinOp::InetContainedBy
7237        | BinOp::InetContainedByEq
7238        | BinOp::InetContains
7239        | BinOp::InetContainsEq
7240        | BinOp::InetOverlap => {
7241            unreachable!("compare() only called with comparison ops")
7242        }
7243    };
7244    Ok(Value::Bool(result))
7245}
7246
7247// SQL three-valued AND / OR.
7248fn and_3vl(l: Value, r: Value) -> Result<Value, EvalError> {
7249    match (l, r) {
7250        (Value::Bool(false), _) | (_, Value::Bool(false)) => Ok(Value::Bool(false)),
7251        (Value::Bool(true), Value::Bool(true)) => Ok(Value::Bool(true)),
7252        (Value::Null, _) | (_, Value::Null) => Ok(Value::Null),
7253        (a, b) => Err(EvalError::TypeMismatch {
7254            detail: format!(
7255                "AND on non-boolean: {:?} and {:?}",
7256                a.data_type(),
7257                b.data_type()
7258            ),
7259        }),
7260    }
7261}
7262
7263fn or_3vl(l: Value, r: Value) -> Result<Value, EvalError> {
7264    match (l, r) {
7265        (Value::Bool(true), _) | (_, Value::Bool(true)) => Ok(Value::Bool(true)),
7266        (Value::Bool(false), Value::Bool(false)) => Ok(Value::Bool(false)),
7267        (Value::Null, _) | (_, Value::Null) => Ok(Value::Null),
7268        (a, b) => Err(EvalError::TypeMismatch {
7269            detail: format!(
7270                "OR on non-boolean: {:?} and {:?}",
7271                a.data_type(),
7272                b.data_type()
7273            ),
7274        }),
7275    }
7276}
7277
7278#[cfg(test)]
7279mod tests {
7280    use super::*;
7281    use alloc::vec;
7282    use spg_storage::{ColumnSchema, Row};
7283
7284    fn col(name: &str, ty: DataType) -> ColumnSchema {
7285        ColumnSchema::new(name, ty, true)
7286    }
7287
7288    fn ctx<'a>(cols: &'a [ColumnSchema], alias: Option<&'a str>) -> EvalContext<'a> {
7289        EvalContext::new(cols, alias)
7290    }
7291
7292    fn lit(n: i64) -> Expr {
7293        Expr::Literal(Literal::Integer(n))
7294    }
7295
7296    fn null() -> Expr {
7297        Expr::Literal(Literal::Null)
7298    }
7299
7300    fn col_ref(name: &str) -> Expr {
7301        Expr::Column(ColumnName {
7302            qualifier: None,
7303            name: name.into(),
7304        })
7305    }
7306
7307    #[test]
7308    fn literal_evaluates_to_value() {
7309        let r = Row::new(vec![]);
7310        let cs: [ColumnSchema; 0] = [];
7311        let c = ctx(&cs, None);
7312        assert_eq!(eval_expr(&lit(42), &r, &c).unwrap(), Value::Int(42));
7313        assert_eq!(
7314            eval_expr(&Expr::Literal(Literal::Float(1.5)), &r, &c).unwrap(),
7315            Value::Float(1.5)
7316        );
7317        assert_eq!(eval_expr(&null(), &r, &c).unwrap(), Value::Null);
7318    }
7319
7320    #[test]
7321    fn column_lookup_unqualified() {
7322        let cs = vec![col("a", DataType::Int), col("b", DataType::Text)];
7323        let r = Row::new(vec![Value::Int(7), Value::Text("hi".into())]);
7324        let c = ctx(&cs, None);
7325        assert_eq!(eval_expr(&col_ref("a"), &r, &c).unwrap(), Value::Int(7));
7326        assert_eq!(
7327            eval_expr(&col_ref("b"), &r, &c).unwrap(),
7328            Value::Text("hi".into())
7329        );
7330    }
7331
7332    #[test]
7333    fn column_not_found_errors() {
7334        let cs = vec![col("a", DataType::Int)];
7335        let r = Row::new(vec![Value::Int(0)]);
7336        let c = ctx(&cs, None);
7337        let err = eval_expr(&col_ref("ghost"), &r, &c).unwrap_err();
7338        assert!(matches!(err, EvalError::ColumnNotFound { ref name } if name == "ghost"));
7339    }
7340
7341    #[test]
7342    fn qualified_column_matches_alias() {
7343        let cs = vec![col("a", DataType::Int)];
7344        let r = Row::new(vec![Value::Int(5)]);
7345        let c = ctx(&cs, Some("u"));
7346        let qualified = Expr::Column(ColumnName {
7347            qualifier: Some("u".into()),
7348            name: "a".into(),
7349        });
7350        assert_eq!(eval_expr(&qualified, &r, &c).unwrap(), Value::Int(5));
7351    }
7352
7353    #[test]
7354    fn qualified_column_unknown_alias_errors() {
7355        let cs = vec![col("a", DataType::Int)];
7356        let r = Row::new(vec![Value::Int(5)]);
7357        let c = ctx(&cs, Some("u"));
7358        let wrong = Expr::Column(ColumnName {
7359            qualifier: Some("x".into()),
7360            name: "a".into(),
7361        });
7362        assert!(matches!(
7363            eval_expr(&wrong, &r, &c).unwrap_err(),
7364            EvalError::UnknownQualifier { .. }
7365        ));
7366    }
7367
7368    #[test]
7369    fn arithmetic_with_widening() {
7370        let r = Row::new(vec![]);
7371        let cs: [ColumnSchema; 0] = [];
7372        let c = ctx(&cs, None);
7373        let e = Expr::Binary {
7374            lhs: alloc::boxed::Box::new(lit(2)),
7375            op: BinOp::Add,
7376            rhs: alloc::boxed::Box::new(Expr::Literal(Literal::Float(0.5))),
7377        };
7378        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Float(2.5));
7379    }
7380
7381    #[test]
7382    fn division_by_zero_errors() {
7383        let r = Row::new(vec![]);
7384        let cs: [ColumnSchema; 0] = [];
7385        let c = ctx(&cs, None);
7386        let e = Expr::Binary {
7387            lhs: alloc::boxed::Box::new(lit(1)),
7388            op: BinOp::Div,
7389            rhs: alloc::boxed::Box::new(lit(0)),
7390        };
7391        assert_eq!(
7392            eval_expr(&e, &r, &c).unwrap_err(),
7393            EvalError::DivisionByZero
7394        );
7395    }
7396
7397    #[test]
7398    fn comparison_returns_bool() {
7399        let r = Row::new(vec![]);
7400        let cs: [ColumnSchema; 0] = [];
7401        let c = ctx(&cs, None);
7402        let e = Expr::Binary {
7403            lhs: alloc::boxed::Box::new(lit(1)),
7404            op: BinOp::Lt,
7405            rhs: alloc::boxed::Box::new(lit(2)),
7406        };
7407        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Bool(true));
7408    }
7409
7410    #[test]
7411    fn null_propagates_through_arithmetic() {
7412        let r = Row::new(vec![]);
7413        let cs: [ColumnSchema; 0] = [];
7414        let c = ctx(&cs, None);
7415        let e = Expr::Binary {
7416            lhs: alloc::boxed::Box::new(lit(1)),
7417            op: BinOp::Add,
7418            rhs: alloc::boxed::Box::new(null()),
7419        };
7420        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Null);
7421    }
7422
7423    #[test]
7424    fn and_three_valued_logic() {
7425        let r = Row::new(vec![]);
7426        let cs: [ColumnSchema; 0] = [];
7427        let c = ctx(&cs, None);
7428        let tt = |a: bool, b_null: bool| Expr::Binary {
7429            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::Bool(a))),
7430            op: BinOp::And,
7431            rhs: alloc::boxed::Box::new(if b_null {
7432                null()
7433            } else {
7434                Expr::Literal(Literal::Bool(true))
7435            }),
7436        };
7437        // FALSE AND NULL → FALSE
7438        assert_eq!(
7439            eval_expr(&tt(false, true), &r, &c).unwrap(),
7440            Value::Bool(false)
7441        );
7442        // TRUE AND NULL → NULL
7443        assert_eq!(eval_expr(&tt(true, true), &r, &c).unwrap(), Value::Null);
7444        // TRUE AND TRUE → TRUE
7445        assert_eq!(
7446            eval_expr(&tt(true, false), &r, &c).unwrap(),
7447            Value::Bool(true)
7448        );
7449    }
7450
7451    #[test]
7452    fn or_three_valued_logic() {
7453        let r = Row::new(vec![]);
7454        let cs: [ColumnSchema; 0] = [];
7455        let c = ctx(&cs, None);
7456        let or_with_null = |a: bool| Expr::Binary {
7457            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::Bool(a))),
7458            op: BinOp::Or,
7459            rhs: alloc::boxed::Box::new(null()),
7460        };
7461        // TRUE OR NULL → TRUE
7462        assert_eq!(
7463            eval_expr(&or_with_null(true), &r, &c).unwrap(),
7464            Value::Bool(true)
7465        );
7466        // FALSE OR NULL → NULL
7467        assert_eq!(
7468            eval_expr(&or_with_null(false), &r, &c).unwrap(),
7469            Value::Null
7470        );
7471    }
7472
7473    #[test]
7474    fn not_on_null_is_null() {
7475        let r = Row::new(vec![]);
7476        let cs: [ColumnSchema; 0] = [];
7477        let c = ctx(&cs, None);
7478        let e = Expr::Unary {
7479            op: UnOp::Not,
7480            expr: alloc::boxed::Box::new(null()),
7481        };
7482        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Null);
7483    }
7484
7485    #[test]
7486    fn text_comparison_lexicographic() {
7487        let r = Row::new(vec![]);
7488        let cs: [ColumnSchema; 0] = [];
7489        let c = ctx(&cs, None);
7490        let e = Expr::Binary {
7491            lhs: alloc::boxed::Box::new(Expr::Literal(Literal::String("apple".into()))),
7492            op: BinOp::Lt,
7493            rhs: alloc::boxed::Box::new(Expr::Literal(Literal::String("banana".into()))),
7494        };
7495        assert_eq!(eval_expr(&e, &r, &c).unwrap(), Value::Bool(true));
7496    }
7497
7498    #[test]
7499    fn interval_format_basics() {
7500        assert_eq!(format_interval(0, 0), "0");
7501        assert_eq!(format_interval(0, 86_400_000_000), "1 day");
7502        assert_eq!(format_interval(0, -86_400_000_000), "-1 days");
7503        assert_eq!(format_interval(0, 3_600_000_000), "01:00:00");
7504        assert_eq!(
7505            format_interval(0, 86_400_000_000 + 9_000_000),
7506            "1 day 00:00:09"
7507        );
7508        assert_eq!(format_interval(14, 0), "1 year 2 mons");
7509        assert_eq!(format_interval(-1, 0), "-1 mons");
7510    }
7511
7512    #[test]
7513    fn interval_add_to_timestamp_micros_part() {
7514        // 2024-01-01 00:00:00 + INTERVAL '1 hour' = 2024-01-01 01:00:00
7515        let ts = i64::from(days_from_civil(2024, 1, 1)) * 86_400_000_000;
7516        let r = add_interval_to_micros(ts, 0, 3_600_000_000).unwrap();
7517        let expected = ts + 3_600_000_000;
7518        assert_eq!(r, expected);
7519    }
7520
7521    #[test]
7522    fn interval_clamp_month_end() {
7523        // 2024-01-31 + 1 month = 2024-02-29 (leap year).
7524        let d = days_from_civil(2024, 1, 31);
7525        let shifted = shift_date_by_months(d, 1).unwrap();
7526        let (y, m, day) = civil_from_days(shifted);
7527        assert_eq!((y, m, day), (2024, 2, 29));
7528        // 2023-01-31 + 1 month = 2023-02-28 (non-leap).
7529        let d = days_from_civil(2023, 1, 31);
7530        let shifted = shift_date_by_months(d, 1).unwrap();
7531        let (y, m, day) = civil_from_days(shifted);
7532        assert_eq!((y, m, day), (2023, 2, 28));
7533        // 2024-03-31 - 1 month = 2024-02-29.
7534        let d = days_from_civil(2024, 3, 31);
7535        let shifted = shift_date_by_months(d, -1).unwrap();
7536        let (y, m, day) = civil_from_days(shifted);
7537        assert_eq!((y, m, day), (2024, 2, 29));
7538    }
7539
7540    #[test]
7541    fn interval_date_plus_pure_days_stays_date() {
7542        // DATE + INTERVAL '7 days' must stay DATE.
7543        let d = days_from_civil(2024, 6, 1);
7544        let lhs = Value::Date(d);
7545        let rhs = Value::Interval {
7546            months: 0,
7547            micros: 7 * 86_400_000_000,
7548        };
7549        let v = apply_binary_interval(BinOp::Add, &lhs, &rhs)
7550            .unwrap()
7551            .unwrap();
7552        let expected = days_from_civil(2024, 6, 8);
7553        assert_eq!(v, Value::Date(expected));
7554    }
7555
7556    #[test]
7557    fn interval_date_plus_sub_day_lifts_to_timestamp() {
7558        // DATE + INTERVAL '1 hour' must lift to TIMESTAMP.
7559        let d = days_from_civil(2024, 6, 1);
7560        let lhs = Value::Date(d);
7561        let rhs = Value::Interval {
7562            months: 0,
7563            micros: 3_600_000_000,
7564        };
7565        let v = apply_binary_interval(BinOp::Add, &lhs, &rhs)
7566            .unwrap()
7567            .unwrap();
7568        let expected = i64::from(d) * 86_400_000_000 + 3_600_000_000;
7569        assert_eq!(v, Value::Timestamp(expected));
7570    }
7571}