Skip to main content

spg_engine/eval/
format.rs

1//! Canonical PG text representation of typed values + date/time literal
2//! parsing, split out of `eval.rs` (cut 31). The value→text formatters
3//! (`format_date` / `format_timestamp` / `format_timestamptz` /
4//! `format_time` / `format_timetz` / `format_money` / `format_interval`
5//! / `format_numeric` and the array formatters) plus the inverse
6//! `parse_date_literal` / `parse_timestamp_literal` text→value parsers
7//! with their TZ-suffix helpers. `civil_from_days` stays in `eval.rs`
8//! (shared with the date SQL functions there); the calendar-arithmetic
9//! helpers (`add_months_to_civil` / `days_in_month`) stay alongside
10//! `shift_date_by_months` in `eval.rs`.
11
12use alloc::format;
13use alloc::string::{String, ToString};
14use alloc::vec::Vec;
15
16use super::civil_from_days;
17
18/// Render a `Date` (days since epoch) as `YYYY-MM-DD`. Negative values
19/// for pre-1970 dates render with a leading `-` on the year.
20pub fn format_date(days: i32) -> String {
21    let (y, m, d) = civil_from_days(days);
22    format!("{y:04}-{m:02}-{d:02}")
23}
24
25/// Render a `Timestamp` (microseconds since epoch) as
26/// `YYYY-MM-DD HH:MM:SS[.fff...]`. Trailing-zero fractional digits are
27/// dropped; a whole-second value has no fractional part.
28/// v7.15.0 — PG-canonical TIMESTAMPTZ wire format. Storage is
29/// the same i64 microseconds UTC as TIMESTAMP, but the canonical
30/// PG text output appends the session's UTC-offset suffix (`+00`
31/// for the default UTC session, the form pg_dump emits). Mailrs
32/// round-8 acceptance criterion: `SELECT col FROM tstz` should
33/// round-trip to a literal that re-INSERTs without semantic
34/// drift.
35pub fn format_timestamptz(micros: i64) -> String {
36    let base = format_timestamp(micros);
37    let mut s = String::with_capacity(base.len() + 3);
38    s.push_str(&base);
39    s.push_str("+00");
40    s
41}
42
43/// v7.17.0 Phase 3.P0-35 — PG `money` canonical text form, en_US
44/// locale: `$N,NNN.CC`, negative → `-$1.23`. Mirrors PG's
45/// `cash_out` for `lc_monetary = 'en_US.UTF-8'`.
46pub fn format_money(cents: i64) -> String {
47    let neg = cents < 0;
48    let abs = cents.unsigned_abs();
49    let dollars = abs / 100;
50    let cc = abs % 100;
51    // Insert comma thousands separators in the integer portion.
52    let dollar_str = dollars.to_string();
53    let bytes = dollar_str.as_bytes();
54    let mut int_part = String::with_capacity(dollar_str.len() + dollar_str.len() / 3);
55    for (i, b) in bytes.iter().enumerate() {
56        // Position from the right: insert ',' before every 3rd
57        // digit (except the first).
58        let from_right = bytes.len() - i;
59        if i > 0 && from_right % 3 == 0 {
60            int_part.push(',');
61        }
62        int_part.push(*b as char);
63    }
64    let sign = if neg { "-" } else { "" };
65    format!("{sign}${int_part}.{cc:02}")
66}
67
68/// v7.17.0 Phase 3.P0-34 — PG `TIMETZ` canonical text form
69/// `HH:MM:SS[.ffffff]±HH[:MM]`. Mirrors PG `timetz_out`. The
70/// offset uses `±HH` for whole-hour offsets and `±HH:MM` for
71/// sub-hour offsets (matching PG's "minimal display" rule).
72pub fn format_timetz(us: i64, offset_secs: i32) -> String {
73    let time = format_time(us);
74    let sign = if offset_secs < 0 { '-' } else { '+' };
75    let abs = offset_secs.unsigned_abs();
76    let oh = abs / 3600;
77    let om = (abs % 3600) / 60;
78    if om == 0 {
79        format!("{time}{sign}{oh:02}")
80    } else {
81        format!("{time}{sign}{oh:02}:{om:02}")
82    }
83}
84
85/// v7.17.0 Phase 3.P0-32 — PG `TIME` canonical text form
86/// `HH:MM:SS[.ffffff]`. Mirrors PG `time_out`. Trailing zeros in
87/// the fractional component are stripped — `12:00:00.500000`
88/// renders as `12:00:00.5` to match PG's text output.
89pub fn format_time(us: i64) -> String {
90    let total_secs = us.div_euclid(1_000_000);
91    let frac = us.rem_euclid(1_000_000);
92    let hh = total_secs / 3600;
93    let mm = (total_secs / 60) % 60;
94    let ss = total_secs % 60;
95    if frac == 0 {
96        format!("{hh:02}:{mm:02}:{ss:02}")
97    } else {
98        let raw = format!("{frac:06}");
99        let trimmed = raw.trim_end_matches('0');
100        format!("{hh:02}:{mm:02}:{ss:02}.{trimmed}")
101    }
102}
103
104pub fn format_timestamp(micros: i64) -> String {
105    const MICROS_PER_DAY: i64 = 86_400_000_000;
106    // Split into day + intra-day part with proper floor division so
107    // negative timestamps render right too.
108    let days = micros.div_euclid(MICROS_PER_DAY);
109    let day_micros = micros.rem_euclid(MICROS_PER_DAY);
110    let day_i32 = i32::try_from(days).unwrap_or(i32::MAX);
111    let (y, m, d) = civil_from_days(day_i32);
112    let secs = day_micros / 1_000_000;
113    let frac = day_micros % 1_000_000;
114    let hh = secs / 3600;
115    let mm = (secs / 60) % 60;
116    let ss = secs % 60;
117    if frac == 0 {
118        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}")
119    } else {
120        // Strip trailing zeros from the 6-digit fractional component.
121        let raw = format!("{frac:06}");
122        let trimmed = raw.trim_end_matches('0');
123        format!("{y:04}-{m:02}-{d:02} {hh:02}:{mm:02}:{ss:02}.{trimmed}")
124    }
125}
126
127/// Inverse of `civil_from_days` — converts (year, month, day) to days
128/// since 1970-01-01. Out-of-range months / days saturate.
129#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
130pub fn days_from_civil(y: i32, m: u32, d: u32) -> i32 {
131    let y_adj = if m <= 2 {
132        i64::from(y) - 1
133    } else {
134        i64::from(y)
135    };
136    let era = y_adj.div_euclid(400);
137    let yoe = (y_adj - era * 400) as u32;
138    let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d.saturating_sub(1);
139    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
140    let total = era * 146_097 + i64::from(doe) - 719_468;
141    i32::try_from(total).unwrap_or(i32::MAX)
142}
143
144/// Parse `YYYY-MM-DD` into a `Date` (days since Unix epoch). Returns
145/// `None` on shape / numeric failure; the engine surfaces that as a
146/// `TypeMismatch` with the original text included.
147pub fn parse_date_literal(s: &str) -> Option<i32> {
148    let bytes = s.as_bytes();
149    if bytes.len() != 10 || bytes[4] != b'-' || bytes[7] != b'-' {
150        return None;
151    }
152    let y: i32 = s[0..4].parse().ok()?;
153    let m: u32 = s[5..7].parse().ok()?;
154    let d: u32 = s[8..10].parse().ok()?;
155    if !(1..=12).contains(&m) || !(1..=31).contains(&d) {
156        return None;
157    }
158    Some(days_from_civil(y, m, d))
159}
160
161/// Parse `YYYY-MM-DD[ HH:MM:SS[.ffffff]]` into a `Timestamp`
162/// (microseconds since Unix epoch). The time portion is optional;
163/// missing → midnight. The fractional portion accepts 1–6 digits and
164/// pads with zeros to microseconds.
165pub fn parse_timestamp_literal(s: &str) -> Option<i64> {
166    let trimmed = s.trim();
167    let (date_part, time_part) = match trimmed.find([' ', 'T']) {
168        Some(i) => (&trimmed[..i], Some(&trimmed[i + 1..])),
169        None => (trimmed, None),
170    };
171    let days = parse_date_literal(date_part)?;
172    let (day_micros, tz_offset_micros) = match time_part {
173        None => (0, 0),
174        Some(t) => parse_time_of_day_micros(t)?,
175    };
176    // PG semantics: a TIMESTAMPTZ literal with an explicit offset
177    // is normalised to UTC for storage. `'12:00:00+09'` means
178    // 12:00:00 in a UTC+09 zone → 03:00:00 UTC → subtract the
179    // positive offset (or add the negative one). Storage is i64
180    // microseconds UTC for both TIMESTAMP and TIMESTAMPTZ (see
181    // spg-storage::DataType::Timestamptz docs); the wire-level
182    // round-trip then re-applies the session timezone on the
183    // SELECT side when format_timestamp is asked for a TZ-aware
184    // render.
185    Some(i64::from(days) * 86_400_000_000 + day_micros - tz_offset_micros)
186}
187
188/// v7.15.0 — Parse `HH:MM:SS[.frac][<tz>]` and return
189/// `(day_micros, tz_offset_micros)` where `day_micros` is the
190/// local-clock seconds-of-day in microseconds and
191/// `tz_offset_micros` is the UTC offset (positive = east of
192/// UTC, negative = west). Caller subtracts the offset to
193/// normalise to UTC. PG's recognised TZ shapes after the
194/// seconds (or frac) part:
195///   * `+OO[:MM]` / `-OO[:MM]` — numeric offset
196///   * `+OOMM` / `-OOMM` (no colon, less common but legal)
197///   * ` UTC` / `UTC` / `Z` — explicit zero offset
198/// Anything else after the seconds = parse failure (the caller
199/// surfaces as "cannot parse … as TIMESTAMP").
200fn parse_time_of_day_micros(t: &str) -> Option<(i64, i64)> {
201    let t = t.trim();
202    // Detect & strip optional TZ suffix. Anchor on the first
203    // `+` / `-` AFTER position 8 (so the leading sign on a
204    // negative offset can't be mistaken for an `HH:MM:SS-OO`
205    // boundary if the time itself is somehow malformed).
206    // ` UTC` and trailing `Z` also count as zero-offset TZ tags.
207    let (core, tz_micros) = if let Some(rest) = t.strip_suffix('Z') {
208        (rest, 0i64)
209    } else if let Some(rest) = t.strip_suffix(" UTC").or_else(|| t.strip_suffix("UTC")) {
210        (rest, 0i64)
211    } else if let Some((idx, sign_byte)) = find_offset_sign(t) {
212        let suffix = &t[idx..];
213        let micros = parse_tz_offset_suffix(suffix, sign_byte == b'+')?;
214        (&t[..idx], micros)
215    } else {
216        (t, 0i64)
217    };
218    let (time, frac_str) = match core.split_once('.') {
219        Some((a, b)) => (a, Some(b)),
220        None => (core, None),
221    };
222    let bytes = time.as_bytes();
223    if bytes.len() != 8 || bytes[2] != b':' || bytes[5] != b':' {
224        return None;
225    }
226    let hh: i64 = time[0..2].parse().ok()?;
227    let mm: i64 = time[3..5].parse().ok()?;
228    let ss: i64 = time[6..8].parse().ok()?;
229    if !(0..24).contains(&hh) || !(0..60).contains(&mm) || !(0..60).contains(&ss) {
230        return None;
231    }
232    let frac_micros: i64 = match frac_str {
233        None => 0,
234        Some(f) => {
235            // Pad right with zeros to 6 digits, then truncate extras.
236            if f.is_empty() || f.len() > 9 {
237                return None;
238            }
239            let mut padded = String::with_capacity(6);
240            padded.push_str(&f[..f.len().min(6)]);
241            while padded.len() < 6 {
242                padded.push('0');
243            }
244            padded.parse().ok()?
245        }
246    };
247    Some((
248        ((hh * 3600 + mm * 60 + ss) * 1_000_000) + frac_micros,
249        tz_micros,
250    ))
251}
252
253/// Find the index of the TZ-offset sign byte (`+` or `-`) that
254/// terminates an `HH:MM:SS[.fff]` time string, or `None` when
255/// the time carries no numeric TZ suffix. Anchors past the first
256/// 8 bytes (`HH:MM:SS`) so the seconds/minutes colons don't
257/// confuse the scan.
258fn find_offset_sign(t: &str) -> Option<(usize, u8)> {
259    let bytes = t.as_bytes();
260    // Start past `HH:MM:SS` (8 bytes).
261    if bytes.len() < 9 {
262        return None;
263    }
264    for i in 8..bytes.len() {
265        match bytes[i] {
266            b'+' | b'-' => return Some((i, bytes[i])),
267            _ => {}
268        }
269    }
270    None
271}
272
273/// Parse `+OO`, `+OO:MM`, `+OOMM`, `-OO`, `-OO:MM`, `-OOMM` into
274/// a UTC-offset microsecond delta. `is_positive` reflects the
275/// already-stripped sign.
276fn parse_tz_offset_suffix(suffix: &str, is_positive: bool) -> Option<i64> {
277    // suffix starts with `+` or `-`; strip it.
278    let body = &suffix[1..];
279    let (hh, mm): (i64, i64) = if let Some((h, m)) = body.split_once(':') {
280        (h.parse().ok()?, m.parse().ok()?)
281    } else {
282        match body.len() {
283            2 => (body.parse().ok()?, 0),
284            3 => {
285                // PG's "+0530" form lacks the colon; but a 3-char
286                // body is `OOM` which is ambiguous (`+053` ?). PG
287                // doesn't emit that; reject.
288                return None;
289            }
290            4 => {
291                let h: i64 = body[0..2].parse().ok()?;
292                let m: i64 = body[2..4].parse().ok()?;
293                (h, m)
294            }
295            _ => return None,
296        }
297    };
298    if !(0..=18).contains(&hh) || !(0..60).contains(&mm) {
299        return None;
300    }
301    let abs = (hh * 3600 + mm * 60) * 1_000_000;
302    Some(if is_positive { abs } else { -abs })
303}
304
305/// Render an `Interval { months, micros }` in a PG-ish shape. The output
306/// mirrors `psql`'s text format: years/months from the months part,
307/// days/HH:MM:SS[.frac] from the microsecond part. Empty parts are
308/// omitted; an all-zero interval renders as `0`.
309pub fn format_interval(months: i32, micros: i64) -> String {
310    const MICROS_PER_DAY: i64 = 86_400_000_000;
311    let mut parts: Vec<String> = Vec::new();
312    let years = months / 12;
313    let mons = months % 12;
314    // PG renders the unit in the singular only for `+1`; `-1` and any
315    // other value pluralise. Helper closes over that rule.
316    let unit = |n: i64, singular: &'static str, plural: &'static str| -> &'static str {
317        if n == 1 { singular } else { plural }
318    };
319    if years != 0 {
320        parts.push(format!(
321            "{years} {}",
322            unit(i64::from(years), "year", "years")
323        ));
324    }
325    if mons != 0 {
326        parts.push(format!("{mons} {}", unit(i64::from(mons), "mon", "mons")));
327    }
328    let days = micros / MICROS_PER_DAY;
329    let mut rem = micros % MICROS_PER_DAY;
330    if days != 0 {
331        parts.push(format!("{days} {}", unit(days, "day", "days")));
332    }
333    if rem != 0 {
334        let neg = rem < 0;
335        if neg {
336            rem = -rem;
337        }
338        let secs = rem / 1_000_000;
339        let frac = rem % 1_000_000;
340        let hh = secs / 3600;
341        let mm = (secs / 60) % 60;
342        let ss = secs % 60;
343        let sign = if neg { "-" } else { "" };
344        if frac == 0 {
345            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}"));
346        } else {
347            let raw = format!("{frac:06}");
348            let trimmed = raw.trim_end_matches('0');
349            parts.push(format!("{sign}{hh:02}:{mm:02}:{ss:02}.{trimmed}"));
350        }
351    }
352    if parts.is_empty() {
353        "0".into()
354    } else {
355        parts.join(" ")
356    }
357}
358
359/// v7.10.9 — render a TEXT[] in PG's external array form
360/// (`{a,b,NULL}`). Elements containing whitespace, commas,
361/// quotes, or braces get double-quoted with `\\` / `\"` escapes.
362/// NULL elements use the literal token `NULL`. Public so the
363/// wire layer can produce the canonical text-mode encoding.
364pub fn format_text_array(items: &[Option<String>]) -> String {
365    let mut out = String::with_capacity(2 + items.len() * 8);
366    out.push('{');
367    for (i, item) in items.iter().enumerate() {
368        if i > 0 {
369            out.push(',');
370        }
371        match item {
372            None => out.push_str("NULL"),
373            Some(s) => {
374                let needs_quote = s.is_empty()
375                    || s.eq_ignore_ascii_case("NULL")
376                    || s.chars()
377                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
378                if needs_quote {
379                    out.push('"');
380                    for c in s.chars() {
381                        if c == '"' || c == '\\' {
382                            out.push('\\');
383                        }
384                        out.push(c);
385                    }
386                    out.push('"');
387                } else {
388                    out.push_str(s);
389                }
390            }
391        }
392    }
393    out.push('}');
394    out
395}
396
397/// v7.11.14 — render an INT[] in PG's external array form
398/// (`{1,2,NULL}`). Integer payloads never need quoting. NULL
399/// elements use the literal token `NULL`.
400pub fn format_int_array(items: &[Option<i32>]) -> String {
401    let mut out = String::with_capacity(2 + items.len() * 4);
402    out.push('{');
403    for (i, item) in items.iter().enumerate() {
404        if i > 0 {
405            out.push(',');
406        }
407        match item {
408            None => out.push_str("NULL"),
409            Some(n) => out.push_str(&n.to_string()),
410        }
411    }
412    out.push('}');
413    out
414}
415
416/// v7.11.14 — render a BIGINT[] in PG's external array form
417/// (`{1,2,NULL}`).
418pub fn format_bigint_array(items: &[Option<i64>]) -> String {
419    let mut out = String::with_capacity(2 + items.len() * 6);
420    out.push('{');
421    for (i, item) in items.iter().enumerate() {
422        if i > 0 {
423            out.push(',');
424        }
425        match item {
426            None => out.push_str("NULL"),
427            Some(n) => out.push_str(&n.to_string()),
428        }
429    }
430    out.push('}');
431    out
432}
433
434/// v7.10.4 — render a BYTEA payload in PG's hex output format
435/// (`\x` prefix, lowercase hex pairs). Public so the wire layer
436/// can emit the canonical bytea-as-text representation.
437pub fn format_bytea_hex(b: &[u8]) -> String {
438    let mut out = String::with_capacity(2 + 2 * b.len());
439    out.push_str("\\x");
440    const HEX: &[u8; 16] = b"0123456789abcdef";
441    for byte in b {
442        out.push(HEX[(byte >> 4) as usize] as char);
443        out.push(HEX[(byte & 0x0F) as usize] as char);
444    }
445    out
446}
447
448/// Render a `Numeric { scaled, scale }` as its decimal text form.
449/// Negative `scaled` prepends `-` to the absolute value's digits; the
450/// integer / fractional split is by character count, padding the
451/// fractional side with leading zeros to exactly `scale` chars.
452pub fn format_numeric(scaled: i128, scale: u8) -> String {
453    if scale == 0 {
454        return format!("{scaled}");
455    }
456    let negative = scaled < 0;
457    let mag_str = scaled.unsigned_abs().to_string();
458    let mag_bytes = mag_str.as_bytes();
459    let scale_u = scale as usize;
460    let mut out = String::with_capacity(mag_str.len() + 3);
461    if negative {
462        out.push('-');
463    }
464    if mag_bytes.len() <= scale_u {
465        out.push('0');
466        out.push('.');
467        for _ in mag_bytes.len()..scale_u {
468            out.push('0');
469        }
470        out.push_str(&mag_str);
471    } else {
472        let split = mag_bytes.len() - scale_u;
473        out.push_str(&mag_str[..split]);
474        out.push('.');
475        out.push_str(&mag_str[split..]);
476    }
477    out
478}