Skip to main content

spg_engine/eval/
cast.rs

1//! `expr::TYPE` CAST evaluation (cut 29 — extracted from `eval.rs`).
2//!
3//! Implements PG-style runtime coercion: the giant `cast_value`
4//! dispatcher plus its per-target helpers (numeric / bool / array /
5//! date / timestamp / interval / vector). Date and timestamp casts
6//! defer to the calendar parsers (`parse_date_literal` /
7//! `parse_timestamp_literal`) that stay in `eval.rs`; tsvector /
8//! tsquery casts defer to the FTS codecs re-exported from
9//! `eval::textsearch`.
10
11use alloc::format;
12use alloc::string::{String, ToString};
13use alloc::vec::Vec;
14
15use spg_sql::ast::CastTarget;
16use spg_storage::Value;
17
18use super::{
19    EvalError, decode_tsquery_external, decode_tsvector_external, parse_date_literal,
20    parse_timestamp_literal, value_to_text,
21};
22
23/// PG-style `expr::TYPE` coercion. NULL always casts as NULL.
24pub fn cast_value(v: Value, target: CastTarget) -> Result<Value, EvalError> {
25    if matches!(v, Value::Null) {
26        return Ok(Value::Null);
27    }
28    match target {
29        CastTarget::Vector => cast_to_vector(v),
30        CastTarget::Text => Ok(Value::Text(value_to_text(&v))),
31        CastTarget::Int => cast_numeric_to_int(v),
32        CastTarget::BigInt => cast_numeric_to_bigint(v),
33        CastTarget::Float => cast_numeric_to_float(v),
34        CastTarget::Bool => cast_to_bool(v),
35        CastTarget::Date => cast_to_date(v),
36        // TIMESTAMP and TIMESTAMPTZ have identical runtime
37        // representation (i64 microseconds UTC).
38        CastTarget::Timestamp | CastTarget::Timestamptz => cast_to_timestamp(v),
39        // v7.9.25 — `expr::INTERVAL`. Currently only TEXT → Interval
40        // is supported (the mailrs idiom: `$1::INTERVAL` where the
41        // bound param is a string like `'7 days'`).
42        CastTarget::Interval => cast_to_interval(v),
43        // v7.9.25 — `::json` / `::jsonb`. Routes Text → Json
44        // (validation is the producer's responsibility, same as
45        // the column-INSERT path).
46        CastTarget::Json | CastTarget::Jsonb => match v {
47            Value::Json(s) => Ok(Value::Json(s)),
48            Value::Text(s) => Ok(Value::Json(s)),
49            other => Err(EvalError::TypeMismatch {
50                detail: alloc::format!(
51                    "::json / ::jsonb only accepts TEXT-shape inputs, got {:?}",
52                    other.data_type()
53                ),
54            }),
55        },
56        // v7.17.0 Phase 5.3 — `::regtype` / `::regclass`. PG
57        // semantics: each is a textual catalog-name surfacing as
58        // a numeric OID at the wire layer that renders back as
59        // the original name. SPG has no OID space, but pg_dump /
60        // mailrs / Django code uses the cast purely for textual
61        // round-trip — feeding `'public.t'::regclass::text` into
62        // a downstream `format(…)` or string concat. We map to
63        // that textual contract: Text in → Text out (the schema-
64        // qualifier `public.` is stripped to match PG's default
65        // search_path-aware rendering); numeric in → re-cast to
66        // Text as best-effort; anything else errors.
67        //
68        // Pre-3.3 / pre-5.3 (v7.9.26) the cast surfaced a clean
69        // error; this lifts to accept-and-textify so the dominant
70        // dump-loader pattern unblocks. SPG-shaped queries that
71        // genuinely need an OID for runtime joins are still
72        // documented as unsupported.
73        CastTarget::RegType | CastTarget::RegClass => match v {
74            Value::Text(s) => {
75                // Strip an optional `<schema>.` prefix — PG's
76                // regclass render drops it when the schema is on
77                // the search_path; SPG is single-schema so
78                // dropping is always safe.
79                let bare = s.rsplit('.').next().unwrap_or(&s).to_string();
80                Ok(Value::Text(bare))
81            }
82            Value::Int(n) => Ok(Value::Text(alloc::format!("{n}"))),
83            Value::BigInt(n) => Ok(Value::Text(alloc::format!("{n}"))),
84            other => Err(EvalError::TypeMismatch {
85                detail: alloc::format!(
86                    "::regtype / ::regclass accepts TEXT (name) or integer (oid), got {:?}",
87                    other.data_type()
88                ),
89            }),
90        },
91        // v7.10.11 — `::TEXT[]`. Decode PG external array form
92        // when input is Text; pass through unchanged when it is
93        // already TextArray. Anything else is a type mismatch.
94        CastTarget::TextArray => match v {
95            Value::TextArray(items) => Ok(Value::TextArray(items)),
96            Value::Text(s) => decode_text_array_external(&s).map(Value::TextArray),
97            other => Err(EvalError::TypeMismatch {
98                detail: alloc::format!(
99                    "::TEXT[] only accepts TEXT / TEXT[] inputs, got {:?}",
100                    other.data_type()
101                ),
102            }),
103        },
104        // v7.11.13 — `::INT[]` / `::BIGINT[]`. Decode PG external
105        // form `{1,2,3}` when input is Text; widen TextArray /
106        // IntArray as appropriate.
107        CastTarget::IntArray => cast_to_int_array(v),
108        CastTarget::BigIntArray => cast_to_bigint_array(v),
109        // v7.12.0 — `::tsvector` / `::tsquery`. Decodes PG external
110        // form when input is Text; passes through unchanged when the
111        // input is already the target type. Other inputs are a type
112        // mismatch. Lexer / Porter stemmer arrive in v7.12.1; the
113        // external-form cast at v7.12.0 is the path pg_dump and
114        // direct-literal callers use.
115        CastTarget::TsVector => match v {
116            Value::TsVector(items) => Ok(Value::TsVector(items)),
117            Value::Text(s) => decode_tsvector_external(&s).map(Value::TsVector),
118            other => Err(EvalError::TypeMismatch {
119                detail: alloc::format!(
120                    "::tsvector only accepts TEXT / tsvector inputs, got {:?}",
121                    other.data_type()
122                ),
123            }),
124        },
125        CastTarget::TsQuery => match v {
126            Value::TsQuery(ast) => Ok(Value::TsQuery(ast)),
127            Value::Text(s) => decode_tsquery_external(&s).map(Value::TsQuery),
128            other => Err(EvalError::TypeMismatch {
129                detail: alloc::format!(
130                    "::tsquery only accepts TEXT / tsquery inputs, got {:?}",
131                    other.data_type()
132                ),
133            }),
134        },
135        // v7.17.0 — `::uuid`. Identity for `uuid → uuid`; parse
136        // text via the shared `parse_uuid_str`. Anything else is a
137        // type mismatch — PG also rejects e.g. INT → UUID without
138        // an explicit text bridge.
139        CastTarget::Uuid => match v {
140            Value::Uuid(b) => Ok(Value::Uuid(b)),
141            Value::Text(s) => match spg_storage::parse_uuid_str(&s) {
142                Some(b) => Ok(Value::Uuid(b)),
143                None => Err(EvalError::TypeMismatch {
144                    detail: alloc::format!("invalid input syntax for type uuid: {s:?}"),
145                }),
146            },
147            other => Err(EvalError::TypeMismatch {
148                detail: alloc::format!(
149                    "::uuid only accepts TEXT / uuid inputs, got {:?}",
150                    other.data_type()
151                ),
152            }),
153        },
154        // v7.18 — `::bytea`. Identity for `Bytes → Bytes`; decode
155        // Text via the engine's PG-format bytea decoder (`\x`
156        // hex form + `\NNN` escape form). Anything else is a type
157        // mismatch — same shape as PG's contract. Closes the
158        // mailrs D-pre #3 reverse-acceptance gap.
159        CastTarget::Bytea => match v {
160            Value::Bytes(b) => Ok(Value::Bytes(b)),
161            Value::Text(s) => match crate::conversions::decode_bytea_literal(&s) {
162                Ok(b) => Ok(Value::Bytes(b)),
163                Err(msg) => Err(EvalError::TypeMismatch {
164                    detail: alloc::format!("invalid input syntax for type bytea: {msg}"),
165                }),
166            },
167            other => Err(EvalError::TypeMismatch {
168                detail: alloc::format!(
169                    "::bytea only accepts TEXT / bytea inputs, got {:?}",
170                    other.data_type()
171                ),
172            }),
173        },
174    }
175}
176
177fn cast_to_int_array(v: Value) -> Result<Value, EvalError> {
178    match v {
179        Value::IntArray(items) => Ok(Value::IntArray(items)),
180        Value::BigIntArray(items) => {
181            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
182            for item in items {
183                match item {
184                    None => out.push(None),
185                    Some(n) => match i32::try_from(n) {
186                        Ok(x) => out.push(Some(x)),
187                        Err(_) => {
188                            return Err(EvalError::TypeMismatch {
189                                detail: alloc::format!("::INT[] element {n} overflows i32"),
190                            });
191                        }
192                    },
193                }
194            }
195            Ok(Value::IntArray(out))
196        }
197        Value::Text(s) => decode_int_array_external(&s).map(Value::IntArray),
198        Value::TextArray(items) => {
199            let mut out: Vec<Option<i32>> = Vec::with_capacity(items.len());
200            for item in items {
201                match item {
202                    None => out.push(None),
203                    Some(s) => match s.parse::<i32>() {
204                        Ok(n) => out.push(Some(n)),
205                        Err(_) => {
206                            return Err(EvalError::TypeMismatch {
207                                detail: alloc::format!("::INT[] cannot parse {s:?}"),
208                            });
209                        }
210                    },
211                }
212            }
213            Ok(Value::IntArray(out))
214        }
215        other => Err(EvalError::TypeMismatch {
216            detail: alloc::format!("::INT[] does not accept {:?}", other.data_type()),
217        }),
218    }
219}
220
221fn cast_to_bigint_array(v: Value) -> Result<Value, EvalError> {
222    match v {
223        Value::BigIntArray(items) => Ok(Value::BigIntArray(items)),
224        Value::IntArray(items) => Ok(Value::BigIntArray(
225            items.into_iter().map(|x| x.map(i64::from)).collect(),
226        )),
227        Value::Text(s) => decode_bigint_array_external(&s).map(Value::BigIntArray),
228        Value::TextArray(items) => {
229            let mut out: Vec<Option<i64>> = Vec::with_capacity(items.len());
230            for item in items {
231                match item {
232                    None => out.push(None),
233                    Some(s) => match s.parse::<i64>() {
234                        Ok(n) => out.push(Some(n)),
235                        Err(_) => {
236                            return Err(EvalError::TypeMismatch {
237                                detail: alloc::format!("::BIGINT[] cannot parse {s:?}"),
238                            });
239                        }
240                    },
241                }
242            }
243            Ok(Value::BigIntArray(out))
244        }
245        other => Err(EvalError::TypeMismatch {
246            detail: alloc::format!("::BIGINT[] does not accept {:?}", other.data_type()),
247        }),
248    }
249}
250
251fn decode_int_array_external(s: &str) -> Result<Vec<Option<i32>>, EvalError> {
252    let trimmed = s.trim();
253    let inner = trimmed
254        .strip_prefix('{')
255        .and_then(|x| x.strip_suffix('}'))
256        .ok_or_else(|| EvalError::TypeMismatch {
257            detail: alloc::format!("INT[] literal {s:?} must be enclosed in '{{...}}'"),
258        })?;
259    if inner.trim().is_empty() {
260        return Ok(Vec::new());
261    }
262    inner
263        .split(',')
264        .map(|part| {
265            let p = part.trim();
266            if p.eq_ignore_ascii_case("NULL") {
267                Ok(None)
268            } else {
269                p.parse::<i32>()
270                    .map(Some)
271                    .map_err(|_| EvalError::TypeMismatch {
272                        detail: alloc::format!("INT[] element {p:?} is not an i32"),
273                    })
274            }
275        })
276        .collect()
277}
278
279fn decode_bigint_array_external(s: &str) -> Result<Vec<Option<i64>>, EvalError> {
280    let trimmed = s.trim();
281    let inner = trimmed
282        .strip_prefix('{')
283        .and_then(|x| x.strip_suffix('}'))
284        .ok_or_else(|| EvalError::TypeMismatch {
285            detail: alloc::format!("BIGINT[] literal {s:?} must be enclosed in '{{...}}'"),
286        })?;
287    if inner.trim().is_empty() {
288        return Ok(Vec::new());
289    }
290    inner
291        .split(',')
292        .map(|part| {
293            let p = part.trim();
294            if p.eq_ignore_ascii_case("NULL") {
295                Ok(None)
296            } else {
297                p.parse::<i64>()
298                    .map(Some)
299                    .map_err(|_| EvalError::TypeMismatch {
300                        detail: alloc::format!("BIGINT[] element {p:?} is not an i64"),
301                    })
302            }
303        })
304        .collect()
305}
306
307/// v7.10.11 — same decoder as `decode_text_array_literal` in
308/// `lib.rs`, but lives here so the eval-time cast path stays
309/// inside `spg-engine::eval`. Kept in lock-step with the engine
310/// `coerce_value` decoder by tests.
311fn decode_text_array_external(s: &str) -> Result<Vec<Option<String>>, EvalError> {
312    let trimmed = s.trim();
313    let inner = trimmed
314        .strip_prefix('{')
315        .and_then(|x| x.strip_suffix('}'))
316        .ok_or_else(|| EvalError::TypeMismatch {
317            detail: alloc::format!("TEXT[] literal {s:?} must be enclosed in '{{...}}'"),
318        })?;
319    let mut out: Vec<Option<String>> = Vec::new();
320    if inner.trim().is_empty() {
321        return Ok(out);
322    }
323    let bytes = inner.as_bytes();
324    let mut i = 0;
325    while i <= bytes.len() {
326        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
327            i += 1;
328        }
329        if i < bytes.len() && bytes[i] == b'"' {
330            i += 1;
331            let mut buf = String::new();
332            while i < bytes.len() && bytes[i] != b'"' {
333                if bytes[i] == b'\\' && i + 1 < bytes.len() {
334                    buf.push(bytes[i + 1] as char);
335                    i += 2;
336                } else {
337                    buf.push(bytes[i] as char);
338                    i += 1;
339                }
340            }
341            if i >= bytes.len() {
342                return Err(EvalError::TypeMismatch {
343                    detail: "unterminated quoted element in TEXT[] literal".into(),
344                });
345            }
346            i += 1;
347            out.push(Some(buf));
348        } else {
349            let start = i;
350            while i < bytes.len() && bytes[i] != b',' {
351                i += 1;
352            }
353            let raw = inner[start..i].trim();
354            if raw.eq_ignore_ascii_case("NULL") {
355                out.push(None);
356            } else {
357                out.push(Some(raw.to_string()));
358            }
359        }
360        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
361            i += 1;
362        }
363        if i >= bytes.len() {
364            break;
365        }
366        if bytes[i] != b',' {
367            return Err(EvalError::TypeMismatch {
368                detail: "expected ',' between TEXT[] elements".into(),
369            });
370        }
371        i += 1;
372    }
373    Ok(out)
374}
375
376fn cast_to_interval(v: Value) -> Result<Value, EvalError> {
377    match v {
378        Value::Interval { months, micros } => Ok(Value::Interval { months, micros }),
379        Value::Text(s) => {
380            let (months, micros) = spg_sql::parser::parse_interval_text(&s).ok_or_else(|| {
381                EvalError::TypeMismatch {
382                    detail: alloc::format!("cannot parse {s:?} as INTERVAL"),
383                }
384            })?;
385            Ok(Value::Interval { months, micros })
386        }
387        other => Err(EvalError::TypeMismatch {
388            detail: alloc::format!(
389                "::INTERVAL only accepts TEXT-shape inputs, got {:?}",
390                other.data_type()
391            ),
392        }),
393    }
394}
395
396fn cast_to_date(v: Value) -> Result<Value, EvalError> {
397    match v {
398        Value::Date(d) => Ok(Value::Date(d)),
399        // Integer literals carry days since the Unix epoch — used by
400        // the `CURRENT_DATE` AST rewrite to inject the wall clock.
401        Value::Int(n) => Ok(Value::Date(n)),
402        Value::BigInt(n) => {
403            i32::try_from(n)
404                .map(Value::Date)
405                .map_err(|_| EvalError::TypeMismatch {
406                    detail: "bigint days-since-epoch out of DATE range".into(),
407                })
408        }
409        // Timestamp truncates to its day boundary.
410        Value::Timestamp(t) => {
411            let days = t.div_euclid(86_400_000_000);
412            i32::try_from(days)
413                .map(Value::Date)
414                .map_err(|_| EvalError::TypeMismatch {
415                    detail: "timestamp out of DATE range".into(),
416                })
417        }
418        Value::Text(s) => parse_date_literal(&s)
419            .map(Value::Date)
420            .ok_or(EvalError::TypeMismatch {
421                detail: format!("cannot parse {s:?} as DATE (expected YYYY-MM-DD)"),
422            }),
423        other => Err(EvalError::TypeMismatch {
424            detail: format!("cannot cast {:?} to DATE", other.data_type()),
425        }),
426    }
427}
428
429fn cast_to_timestamp(v: Value) -> Result<Value, EvalError> {
430    match v {
431        Value::Timestamp(t) => Ok(Value::Timestamp(t)),
432        // Int / BigInt carry microseconds since the Unix epoch — used
433        // by the `NOW()` / `CURRENT_TIMESTAMP` AST rewrite to inject
434        // the wall clock as a plain integer literal.
435        Value::Int(n) => Ok(Value::Timestamp(i64::from(n))),
436        Value::BigInt(n) => Ok(Value::Timestamp(n)),
437        // DATE → TIMESTAMP picks midnight on the date.
438        Value::Date(d) => Ok(Value::Timestamp(i64::from(d) * 86_400_000_000)),
439        Value::Text(s) => {
440            parse_timestamp_literal(&s)
441                .map(Value::Timestamp)
442                .ok_or(EvalError::TypeMismatch {
443                    detail: format!(
444                        "cannot parse {s:?} as TIMESTAMP \
445                     (expected YYYY-MM-DD[ HH:MM:SS[.ffffff]])"
446                    ),
447                })
448        }
449        other => Err(EvalError::TypeMismatch {
450            detail: format!("cannot cast {:?} to TIMESTAMP", other.data_type()),
451        }),
452    }
453}
454
455fn cast_numeric_to_int(v: Value) -> Result<Value, EvalError> {
456    match v {
457        Value::Int(n) => Ok(Value::Int(n)),
458        Value::BigInt(n) => i32::try_from(n)
459            .map(Value::Int)
460            .map_err(|_| EvalError::TypeMismatch {
461                detail: format!("bigint {n} does not fit in int"),
462            }),
463        #[allow(clippy::cast_possible_truncation)]
464        Value::Float(x) => Ok(Value::Int(x as i32)),
465        Value::Text(s) => {
466            s.trim()
467                .parse::<i32>()
468                .map(Value::Int)
469                .map_err(|_| EvalError::TypeMismatch {
470                    detail: format!("cannot parse {s:?} as int"),
471                })
472        }
473        Value::Bool(b) => Ok(Value::Int(i32::from(b))),
474        other => Err(EvalError::TypeMismatch {
475            detail: format!("cannot cast {:?} to int", other.data_type()),
476        }),
477    }
478}
479
480fn cast_numeric_to_bigint(v: Value) -> Result<Value, EvalError> {
481    match v {
482        Value::Int(n) => Ok(Value::BigInt(i64::from(n))),
483        Value::BigInt(n) => Ok(Value::BigInt(n)),
484        #[allow(clippy::cast_possible_truncation)]
485        Value::Float(x) => Ok(Value::BigInt(x as i64)),
486        Value::Text(s) => {
487            s.trim()
488                .parse::<i64>()
489                .map(Value::BigInt)
490                .map_err(|_| EvalError::TypeMismatch {
491                    detail: format!("cannot parse {s:?} as bigint"),
492                })
493        }
494        Value::Bool(b) => Ok(Value::BigInt(i64::from(b))),
495        other => Err(EvalError::TypeMismatch {
496            detail: format!("cannot cast {:?} to bigint", other.data_type()),
497        }),
498    }
499}
500
501fn cast_numeric_to_float(v: Value) -> Result<Value, EvalError> {
502    match v {
503        Value::Int(n) => Ok(Value::Float(f64::from(n))),
504        #[allow(clippy::cast_precision_loss)]
505        Value::BigInt(n) => Ok(Value::Float(n as f64)),
506        Value::Float(x) => Ok(Value::Float(x)),
507        Value::Text(s) => {
508            s.trim()
509                .parse::<f64>()
510                .map(Value::Float)
511                .map_err(|_| EvalError::TypeMismatch {
512                    detail: format!("cannot parse {s:?} as float"),
513                })
514        }
515        other => Err(EvalError::TypeMismatch {
516            detail: format!("cannot cast {:?} to float", other.data_type()),
517        }),
518    }
519}
520
521fn cast_to_bool(v: Value) -> Result<Value, EvalError> {
522    match v {
523        Value::Bool(b) => Ok(Value::Bool(b)),
524        Value::Int(n) => Ok(Value::Bool(n != 0)),
525        Value::BigInt(n) => Ok(Value::Bool(n != 0)),
526        Value::Text(s) => {
527            let lo = s.trim().to_ascii_lowercase();
528            match lo.as_str() {
529                "true" | "t" | "yes" | "y" | "1" | "on" => Ok(Value::Bool(true)),
530                "false" | "f" | "no" | "n" | "0" | "off" => Ok(Value::Bool(false)),
531                _ => Err(EvalError::TypeMismatch {
532                    detail: format!("cannot parse {s:?} as bool"),
533                }),
534            }
535        }
536        other => Err(EvalError::TypeMismatch {
537            detail: format!("cannot cast {:?} to bool", other.data_type()),
538        }),
539    }
540}
541
542/// Parse a `Value::Text("[1.0, 2.0, 3.0]")` into a `Value::Vector(..)`. Mirrors
543/// pgvector's `'[..]'::vector` cast. NULL casts as NULL.
544pub fn cast_to_vector(v: Value) -> Result<Value, EvalError> {
545    match v {
546        Value::Null => Ok(Value::Null),
547        Value::Vector(v) => Ok(Value::Vector(v)),
548        Value::Text(s) => parse_vector_text(&s)
549            .map(Value::Vector)
550            .ok_or(EvalError::TypeMismatch {
551                detail: format!("cannot parse {s:?} as a vector literal"),
552            }),
553        other => Err(EvalError::TypeMismatch {
554            detail: format!("::vector requires text input, got {:?}", other.data_type()),
555        }),
556    }
557}
558
559/// Parse `"[1.0, 2.0, -3]"` into `Vec<f32>`. Returns `None` on malformed input.
560pub fn parse_vector_text(s: &str) -> Option<Vec<f32>> {
561    let trimmed = s.trim();
562    let inner = trimmed.strip_prefix('[')?.strip_suffix(']')?;
563    let trimmed_inner = inner.trim();
564    if trimmed_inner.is_empty() {
565        return Some(Vec::new());
566    }
567    let mut out = Vec::new();
568    for part in trimmed_inner.split(',') {
569        let f: f32 = part.trim().parse().ok()?;
570        out.push(f);
571    }
572    Some(out)
573}