Skip to main content

oracledb_protocol/thin/
number.rs

1#![forbid(unsafe_code)]
2
3//! Inline, lossless Oracle `NUMBER` representation (bead rust-oracledb-65w).
4//!
5//! Oracle `NUMBER` is up to 40 significant decimal digits (the wire form carries
6//! up to 20 base-100 mantissa bytes) with a decimal exponent in roughly
7//! `-130..=125`. The common case — a value with at most 38 significant digits —
8//! fits losslessly in an `i128` coefficient plus an `i16` scale, allocating
9//! nothing. The owned [`crate::thin::QueryValue::Number`] used to carry a heap
10//! `String` per cell; this module replaces that inline payload so a NUMBER-heavy
11//! row stops doing one `malloc` per NUMBER column.
12//!
13//! ## Losslessness
14//!
15//! Some wire forms cannot be represented exactly inline:
16//!
17//! - A 39- or 40-digit integer can exceed `i128::MAX` (`~1.7e38`, 39 digits).
18//! - The decoder's special single-byte negative sentinel renders as the literal
19//!   text `-1e126`, which is not a plain `coefficient × 10^-scale` decimal.
20//!
21//! For any such value the representation FALLS BACK to a boxed canonical-text
22//! carrier ([`OracleNumber::Text`]) so correctness is never sacrificed. The
23//! fallback is boxed (`Box<str>`) so the enum — and therefore
24//! [`crate::thin::QueryValue`] — stays within its 32-byte budget.
25//!
26//! ## Single shared formatter
27//!
28//! [`OracleNumber::fmt_into`] is the ONE canonical formatter. It is BYTE-IDENTICAL
29//! to the legacy [`super::codecs::decode_number_text_into`] text path (proven by
30//! `tests/number_inline_byte_identical.rs` over the whole NUMBER domain). Every
31//! consumer — `Display`, `FromSql<String>`, the OSON/JSON number text, and the
32//! borrowed `QueryValueRef::Number` arena path — routes through it, so the owned
33//! and borrowed decode paths can never diverge by even one byte.
34
35use crate::Result;
36
37/// Upper bound on the significant decimal digits the wire NUMBER digit walk can
38/// emit into a stack buffer. Oracle NUMBER carries at most 40 significant
39/// digits (20 base-100 mantissa bytes); +2 slack covers the `first_digit == 10`
40/// base-100 carry the legacy walk can append.
41pub(crate) const MAX_DIGITS: usize = 42;
42
43/// Stack-decoded parts of a wire NUMBER (no heap allocation). Mirror of
44/// [`DecodedNumber`] but with digits written into a caller stack buffer.
45pub(crate) enum DecodedNumberStack {
46    /// A single-byte sentinel whose canonical text is fixed.
47    Sentinel {
48        text: &'static str,
49        is_integer: bool,
50    },
51    /// The decoded parts; `digit_len` significant digits were written to the
52    /// caller's stack buffer.
53    Parts {
54        digit_len: usize,
55        is_negative: bool,
56        decimal_point_index: i16,
57        is_integer: bool,
58        /// The i128 coefficient FUSED during the digit walk (bead
59        /// rust-oracledb-shh): `Some(coeff)` is byte-identical to a second
60        /// `digits_to_i128(&digit_buf[..digit_len], is_negative)` pass; `None`
61        /// signals i128 overflow (39–40 digit values), in which case the caller
62        /// spills to boxed text using the still-filled `digit_buf` exactly as
63        /// before. The sign is already applied.
64        coefficient: Option<i128>,
65    },
66}
67
68/// Inline, lossless decimal carrier for an Oracle `NUMBER`.
69///
70/// The common case is [`OracleNumber::Inline`] (`coefficient × 10^-scale`,
71/// allocation-free). Values that cannot be represented exactly inline fall back
72/// to [`OracleNumber::Text`] (a boxed canonical-text carrier).
73#[derive(Clone, Debug, PartialEq, Eq)]
74pub enum OracleNumber {
75    /// `value == coefficient × 10^-scale`, with the sign carried in
76    /// `coefficient`. `scale` may be negative (the value has trailing zeros to
77    /// the left of the implied point). `is_integer` mirrors the legacy decoder's
78    /// flag — whether the canonical text contains a decimal point — so the
79    /// Python int-vs-float dispatch is preserved exactly.
80    ///
81    /// The coefficient is stored as its little-endian `i128` bytes rather than a
82    /// bare `i128` field: a bare `i128` forces 16-byte alignment, which rounds
83    /// the enum up to 32 bytes and would blow `QueryValue`'s 32-byte budget once
84    /// the discriminant is added. The `[u8; 16]` form keeps 8-byte alignment so
85    /// `OracleNumber` is 24 bytes. Access via [`OracleNumber::coefficient`].
86    Inline {
87        coefficient_le: [u8; 16],
88        scale: i16,
89        is_integer: bool,
90    },
91    /// Defensive fallback for values that do not fit the inline form exactly
92    /// (39–40 significant digit integers that overflow `i128`, or the `-1e126`
93    /// single-byte sentinel). Boxed so the enum stays small.
94    Text { text: Box<str>, is_integer: bool },
95}
96
97impl OracleNumber {
98    /// Build the inline variant from a real `i128` coefficient (stored as its
99    /// little-endian bytes to keep the enum 8-byte aligned).
100    fn inline(coefficient: i128, scale: i16, is_integer: bool) -> Self {
101        OracleNumber::Inline {
102            coefficient_le: coefficient.to_le_bytes(),
103            scale,
104            is_integer,
105        }
106    }
107
108    /// The inline coefficient as an `i128`, or `None` for the boxed-text
109    /// fallback. `value == coefficient × 10^-scale`.
110    pub fn coefficient(&self) -> Option<i128> {
111        match self {
112            OracleNumber::Inline { coefficient_le, .. } => {
113                Some(i128::from_le_bytes(*coefficient_le))
114            }
115            OracleNumber::Text { .. } => None,
116        }
117    }
118
119    /// The inline scale, or `None` for the boxed-text fallback.
120    pub fn scale(&self) -> Option<i16> {
121        match self {
122            OracleNumber::Inline { scale, .. } => Some(*scale),
123            OracleNumber::Text { .. } => None,
124        }
125    }
126
127    /// Decode an Oracle `NUMBER` wire form into the inline representation,
128    /// falling back to a boxed canonical-text carrier when the value cannot be
129    /// represented exactly inline. The canonical text — whether produced inline
130    /// or stored in the fallback — is byte-identical to the legacy decoder.
131    ///
132    /// ZERO-ALLOCATION for the common inline case: the digit walk writes into a
133    /// fixed stack buffer (Oracle NUMBER has at most 40 significant digits), and
134    /// the inline coefficient/scale is folded directly — no scratch `Vec`/`String`
135    /// is heap-allocated. Only the rare text fallback (sentinel / i128 overflow)
136    /// touches the heap, and only then.
137    pub fn from_wire(bytes: &[u8]) -> Result<Self> {
138        // Stack scratch: up to 40 significant decimal digits + slack for the
139        // base-100 carry the digit walk can append.
140        let mut digit_buf = [0u8; MAX_DIGITS];
141        match super::codecs::decode_number_parts_stack(bytes, &mut digit_buf)? {
142            // Single-byte sentinels: format their canonical text once.
143            DecodedNumberStack::Sentinel { text, is_integer } => Ok(OracleNumber::Text {
144                text: text.into(),
145                is_integer,
146            }),
147            DecodedNumberStack::Parts {
148                digit_len,
149                is_negative,
150                decimal_point_index,
151                is_integer,
152                coefficient,
153            } => {
154                let digits = &digit_buf[..digit_len];
155                // The i128 coefficient was FUSED during the digit walk (bead
156                // rust-oracledb-shh): `Some` is byte-identical to the old second
157                // `digits_to_i128(digits, is_negative)` pass; `None` is the same
158                // i128-overflow signal (39–40 digit value), which spills to text
159                // using the still-filled `digits` exactly as before.
160                match coefficient {
161                    Some(coefficient) => {
162                        // scale = len - decimal_point_index (implied fractional
163                        // positions; may be negative for trailing-zero integers).
164                        let len = i32::try_from(digits.len()).unwrap_or(i32::MAX);
165                        let scale_i32 = len - i32::from(decimal_point_index);
166                        match i16::try_from(scale_i32) {
167                            Ok(scale) => Ok(OracleNumber::inline(coefficient, scale, is_integer)),
168                            // Scale out of i16 range (cannot happen for valid
169                            // Oracle NUMBER, but stay defensive): keep the text.
170                            Err(_) => Ok(Self::spill_text(
171                                digits,
172                                is_negative,
173                                decimal_point_index,
174                                is_integer,
175                            )),
176                        }
177                    }
178                    // i128 overflow (39–40 digit value): spill to boxed text.
179                    None => Ok(Self::spill_text(
180                        digits,
181                        is_negative,
182                        decimal_point_index,
183                        is_integer,
184                    )),
185                }
186            }
187        }
188    }
189
190    /// Format the digits into a boxed-text fallback (the rare path: i128 overflow
191    /// or out-of-range scale). Uses the SAME formatter fragment as the inline
192    /// path, so the text is byte-identical.
193    fn spill_text(
194        digits: &[u8],
195        is_negative: bool,
196        decimal_point_index: i16,
197        is_integer: bool,
198    ) -> Self {
199        let mut text = String::new();
200        super::codecs::format_number_digits(digits, is_negative, decimal_point_index, &mut text);
201        OracleNumber::Text {
202            text: text.into_boxed_str(),
203            is_integer,
204        }
205    }
206
207    /// Construct from already-canonical decimal text (the bind / parse path).
208    /// Parses the text into the inline form when it fits, else keeps it boxed.
209    /// The text MUST already be canonical Oracle `NUMBER` text (the form the
210    /// decoder emits); this does not re-canonicalize.
211    pub fn from_canonical_text(text: &str) -> Self {
212        Self::from_canonical_text_with_flag(text, !text.contains('.'))
213    }
214
215    /// Like [`Self::from_canonical_text`] but with the caller-supplied
216    /// `is_integer` flag (the borrowed fetch path already decoded it from the
217    /// wire, so it is authoritative — preserve it verbatim).
218    pub fn from_canonical_text_with_flag(text: &str, is_integer: bool) -> Self {
219        match parse_canonical_inline(text) {
220            Some((coefficient, scale)) => OracleNumber::inline(coefficient, scale, is_integer),
221            None => OracleNumber::Text {
222                text: text.into(),
223                is_integer,
224            },
225        }
226    }
227
228    /// Borrow the canonical text when it is stored as boxed text (the fallback
229    /// form), else `None` — the inline numeric form synthesizes its text on
230    /// demand and has no `&str` to lend.
231    pub fn as_borrowed_text(&self) -> Option<&str> {
232        match self {
233            OracleNumber::Text { text, .. } => Some(text),
234            OracleNumber::Inline { .. } => None,
235        }
236    }
237
238    /// Whether the canonical text is integral (carries no decimal point).
239    /// Mirrors the legacy `is_integer` flag exactly.
240    pub fn is_integer(&self) -> bool {
241        match self {
242            OracleNumber::Inline { is_integer, .. } | OracleNumber::Text { is_integer, .. } => {
243                *is_integer
244            }
245        }
246    }
247
248    /// THE single shared canonical formatter. Appends the canonical decimal text
249    /// to `out`. Byte-identical to [`super::codecs::decode_number_text_into`].
250    pub fn fmt_into(&self, out: &mut String) {
251        match self {
252            OracleNumber::Text { text, .. } => out.push_str(text),
253            OracleNumber::Inline {
254                coefficient_le,
255                scale,
256                ..
257            } => fmt_inline_into(i128::from_le_bytes(*coefficient_le), *scale, out),
258        }
259    }
260
261    /// Canonical decimal text as an owned `String`.
262    pub fn to_canonical_string(&self) -> String {
263        let mut out = String::new();
264        self.fmt_into(&mut out);
265        out
266    }
267
268    /// Canonical decimal text as a `Cow`: borrowed for the boxed-text fallback
269    /// (zero allocation), owned for the inline form (formatted once on demand).
270    pub fn to_canonical_cow(&self) -> std::borrow::Cow<'_, str> {
271        match self {
272            OracleNumber::Text { text, .. } => std::borrow::Cow::Borrowed(text),
273            OracleNumber::Inline { .. } => std::borrow::Cow::Owned(self.to_canonical_string()),
274        }
275    }
276
277    /// Exact `i64` when the value is an integer that fits; else `None`.
278    pub fn to_i64(&self) -> Option<i64> {
279        match self {
280            OracleNumber::Inline {
281                coefficient_le,
282                scale,
283                ..
284            } => inline_to_i128(i128::from_le_bytes(*coefficient_le), *scale)
285                .and_then(|v| i64::try_from(v).ok()),
286            OracleNumber::Text { text, .. } => text.parse::<i64>().ok(),
287        }
288    }
289
290    /// Exact `i128` when the value is an integer that fits; else `None`.
291    pub fn to_i128(&self) -> Option<i128> {
292        match self {
293            OracleNumber::Inline {
294                coefficient_le,
295                scale,
296                ..
297            } => inline_to_i128(i128::from_le_bytes(*coefficient_le), *scale),
298            OracleNumber::Text { text, .. } => text.parse::<i128>().ok(),
299        }
300    }
301}
302
303/// Outcome of the wire digit walk: either a sentinel/overflow case that must be
304/// kept as text, or the decoded parts the inline form is built from.
305pub(crate) enum DecodedNumber {
306    /// The canonical text is already in `text`; keep it verbatim (the special
307    /// single-byte sentinel cases that are not plain `coeff × 10^-scale`).
308    Text { is_integer: bool },
309    /// Parts to fold into the inline coefficient/scale form.
310    Parts {
311        is_negative: bool,
312        decimal_point_index: i16,
313        is_integer: bool,
314    },
315}
316
317/// Fold the significant decimal `digits` (each 0..=9) into an `i128` coefficient
318/// with the given sign, returning `None` on overflow (39–40 digit values that
319/// exceed `i128`).
320///
321/// This is the reference the FUSED in-walk accumulator (bead rust-oracledb-shh,
322/// `decode_number_parts_stack`) must reproduce byte-for-byte. It is retained as
323/// the differential oracle for that fusion (see the `fused_coefficient_matches_
324/// reference_walk` test) and is otherwise unused in production code.
325#[cfg(test)]
326fn digits_to_i128(digits: &[u8], is_negative: bool) -> Option<i128> {
327    let mut acc: i128 = 0;
328    for &d in digits {
329        acc = acc.checked_mul(10)?.checked_add(i128::from(d))?;
330    }
331    if is_negative {
332        Some(-acc)
333    } else {
334        Some(acc)
335    }
336}
337
338/// Reconstruct an exact integer `i128` from the inline form, or `None` if the
339/// value is fractional or the scaling overflows.
340fn inline_to_i128(coefficient: i128, scale: i16) -> Option<i128> {
341    match scale.cmp(&0) {
342        std::cmp::Ordering::Equal => Some(coefficient),
343        // Negative scale: value = coefficient × 10^(-scale), an integer.
344        std::cmp::Ordering::Less => {
345            let mut v = coefficient;
346            for _ in 0..(-(i32::from(scale))) {
347                v = v.checked_mul(10)?;
348            }
349            Some(v)
350        }
351        // Positive scale: integral only if the trailing `scale` digits are zero.
352        std::cmp::Ordering::Greater => {
353            let mut divisor: i128 = 1;
354            for _ in 0..i32::from(scale) {
355                divisor = divisor.checked_mul(10)?;
356            }
357            if coefficient % divisor == 0 {
358                Some(coefficient / divisor)
359            } else {
360                None
361            }
362        }
363    }
364}
365
366/// Format the inline `coefficient × 10^-scale` form into canonical Oracle
367/// `NUMBER` text, BYTE-IDENTICAL to the legacy `decode_number_text_into`.
368///
369/// The legacy formatter works from `digits` (significant decimal digits, no
370/// leading/trailing zeros except as positioned) and `decimal_point_index`. Here
371/// the equivalent inputs are recovered as: the absolute coefficient's decimal
372/// digits, and `decimal_point_index = digit_count - scale`.
373fn fmt_inline_into(coefficient: i128, scale: i16, out: &mut String) {
374    // Zero is always rendered "0" (matches the legacy single-byte-zero path and
375    // the negative-zero canonicalization).
376    if coefficient == 0 {
377        out.push('0');
378        return;
379    }
380
381    let is_negative = coefficient < 0;
382    // Build the significant-digit string of |coefficient|. unsigned_abs avoids
383    // the i128::MIN overflow trap.
384    let mut buf = [0u8; 40];
385    let mut mag = coefficient.unsigned_abs();
386    let mut idx = buf.len();
387    while mag > 0 {
388        idx -= 1;
389        buf[idx] = b'0' + (mag % 10) as u8;
390        mag /= 10;
391    }
392    let digits = &buf[idx..];
393    let digit_count = digits.len() as i32;
394    let decimal_point_index = digit_count - i32::from(scale);
395
396    if is_negative {
397        out.push('-');
398    }
399
400    if decimal_point_index <= 0 {
401        // "0." + (-decimal_point_index) zeros + all digits.
402        out.push_str("0.");
403        for _ in decimal_point_index..0 {
404            out.push('0');
405        }
406        for &d in digits {
407            out.push(d as char);
408        }
409        return;
410    }
411
412    // decimal_point_index > 0: emit digits, inserting '.' at the point, and pad
413    // trailing zeros when the point is past the last digit.
414    for (i, &d) in digits.iter().enumerate() {
415        if i as i32 == decimal_point_index {
416            out.push('.');
417        }
418        out.push(d as char);
419    }
420    if decimal_point_index > digit_count {
421        for _ in digit_count..decimal_point_index {
422            out.push('0');
423        }
424    }
425}
426
427/// Parse already-canonical Oracle `NUMBER` text into `(coefficient, scale)`,
428/// returning `None` if it does not fit `i128`/`i16` (then the caller keeps the
429/// text). The input is the decoder's canonical form: an optional `-`, digits,
430/// an optional single `.`, no exponent (except the `-1e126` sentinel, which has
431/// an `e` and is therefore rejected here -> text fallback).
432fn parse_canonical_inline(text: &str) -> Option<(i128, i16)> {
433    let (is_negative, rest) = match text.strip_prefix('-') {
434        Some(r) => (true, r),
435        None => (false, text),
436    };
437    if rest.is_empty() {
438        return None;
439    }
440    let (int_part, frac_part) = match rest.split_once('.') {
441        Some((i, f)) => (i, f),
442        None => (rest, ""),
443    };
444    // Canonical text never contains an exponent or any non-digit beyond one '.'.
445    if !int_part.bytes().all(|b| b.is_ascii_digit())
446        || !frac_part.bytes().all(|b| b.is_ascii_digit())
447    {
448        return None;
449    }
450    let mut acc: i128 = 0;
451    for b in int_part.bytes().chain(frac_part.bytes()) {
452        acc = acc.checked_mul(10)?.checked_add(i128::from(b - b'0'))?;
453    }
454    let coefficient = if is_negative { acc.checked_neg()? } else { acc };
455    let scale = i16::try_from(frac_part.len()).ok()?;
456    Some((coefficient, scale))
457}
458
459impl std::fmt::Display for OracleNumber {
460    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
461        let mut s = String::new();
462        self.fmt_into(&mut s);
463        f.write_str(&s)
464    }
465}
466
467#[cfg(test)]
468mod tests {
469    use super::*;
470    use crate::thin::codecs::{decode_number_parts_stack, encode_number_text};
471
472    /// Differential proof for the fused i128 accumulator (bead rust-oracledb-shh):
473    /// the `coefficient` fused during `decode_number_parts_stack`'s digit walk
474    /// MUST equal the reference second pass `digits_to_i128(digits, is_negative)`
475    /// over the still-filled digit buffer — including the overflow (`None`)
476    /// boundary. If these ever diverge, the inline NUMBER coefficient (and thus
477    /// the canonical text, the i64/i128 reconstruct, the whole parity surface)
478    /// would silently drift, so this is the gate for the optimization.
479    fn assert_fused_matches_reference(wire: &[u8], label: &str) {
480        let mut digit_buf = [0u8; MAX_DIGITS];
481        let parts = decode_number_parts_stack(wire, &mut digit_buf).expect("decode valid wire");
482        if let DecodedNumberStack::Parts {
483            digit_len,
484            is_negative,
485            coefficient,
486            ..
487        } = parts
488        {
489            let reference = digits_to_i128(&digit_buf[..digit_len], is_negative);
490            assert_eq!(
491                coefficient, reference,
492                "{label}: fused coefficient {coefficient:?} != reference walk {reference:?} \
493                 (wire={wire:02x?})"
494            );
495        }
496    }
497
498    #[test]
499    fn fused_coefficient_matches_reference_walk_corpus() {
500        // Spans the inline domain plus the i128-overflow boundary (39–40 digits).
501        let corpus: &[&str] = &[
502            "0",
503            "1",
504            "-1",
505            "9",
506            "-9",
507            "10",
508            "99",
509            "-99",
510            "100",
511            "12345",
512            "-12345",
513            "0.5",
514            "-0.5",
515            "3.14159",
516            "100.001",
517            "0.0001",
518            "1000000000000000000",
519            "12345678901234567890",
520            "123456789012345678901234567890",
521            // 38 significant digits (max inline precision).
522            "12345678901234567890123456789012345678",
523            "-12345678901234567890123456789012345678",
524            "0.12345678901234567890123456789012345678",
525            // 39+ digits: i128 overflow -> fused must latch None, same as ref.
526            "123456789012345678901234567890123456789",
527            "9999999999999999999999999999999999999999", // 40 nines
528            "1e125",
529            "-1e125",
530            "1e-120",
531        ];
532        for text in corpus {
533            let wire = encode_number_text(text).unwrap_or_else(|e| panic!("encode {text}: {e:?}"));
534            assert_fused_matches_reference(&wire, text);
535        }
536    }
537
538    #[test]
539    fn inline_form_fits_the_size_budget() {
540        // The inline carrier must stay <= 24 bytes (8-byte aligned via the
541        // [u8;16] coefficient) so `QueryValue` holds its 32-byte budget.
542        assert!(core::mem::size_of::<OracleNumber>() <= 24);
543        assert_eq!(core::mem::align_of::<OracleNumber>(), 8);
544    }
545
546    #[test]
547    fn formatter_matches_known_canonical_text() {
548        // coefficient × 10^-scale -> canonical text, spot checks.
549        let cases: &[(i128, i16, bool, &str)] = &[
550            (0, 0, true, "0"),
551            (1, 0, true, "1"),
552            (-1, 0, true, "-1"),
553            (5, 1, false, "0.5"),
554            (-5, 1, false, "-0.5"),
555            (314159, 5, false, "3.14159"),
556            (1, -2, true, "100"), // 1 × 10^2
557            (12, 0, true, "12"),
558            (100001, 3, false, "100.001"),
559            (15, 1, false, "1.5"),
560        ];
561        for &(coeff, scale, is_int, expect) in cases {
562            let n = OracleNumber::inline(coeff, scale, is_int);
563            assert_eq!(
564                n.to_canonical_string(),
565                expect,
566                "coeff={coeff} scale={scale}"
567            );
568            assert_eq!(n.is_integer(), is_int);
569        }
570    }
571
572    #[test]
573    fn from_canonical_text_round_trips() {
574        for text in [
575            "0",
576            "1",
577            "-1",
578            "0.5",
579            "100",
580            "0.001",
581            "12345678901234567890",
582        ] {
583            let n = OracleNumber::from_canonical_text(text);
584            assert_eq!(n.to_canonical_string(), text);
585        }
586    }
587
588    #[test]
589    fn overflow_value_falls_back_to_text_losslessly() {
590        // A 40-digit integer exceeds i128 (39 digits max); the canonical text
591        // round-trips through the boxed fallback exactly. Built via from_wire of
592        // a synthetic value would require the encoder, so assert the fallback
593        // constructor preserves the text verbatim.
594        let big = "1234567890123456789012345678901234567890"; // 40 digits
595        let n = OracleNumber::from_canonical_text(big);
596        assert!(
597            matches!(n, OracleNumber::Text { .. }),
598            "40-digit -> text fallback"
599        );
600        assert_eq!(n.to_canonical_string(), big);
601    }
602}