oracledb_protocol/thin/number.rs
1#![forbid(unsafe_code)]
2
3//! Inline, lossless Oracle `NUMBER` representation (bead rust-oracledb-65w).
4//!
5//! Oracle `NUMBER` is up to 40 significant decimal digits (the wire form carries
6//! up to 20 base-100 mantissa bytes) with a decimal exponent in roughly
7//! `-130..=125`. The common case — a value with at most 38 significant digits —
8//! fits losslessly in an `i128` coefficient plus an `i16` scale, allocating
9//! nothing. The owned [`crate::thin::QueryValue::Number`] used to carry a heap
10//! `String` per cell; this module replaces that inline payload so a NUMBER-heavy
11//! row stops doing one `malloc` per NUMBER column.
12//!
13//! ## Losslessness
14//!
15//! Some wire forms cannot be represented exactly inline:
16//!
17//! - A 39- or 40-digit integer can exceed `i128::MAX` (`~1.7e38`, 39 digits).
18//! - The decoder's special single-byte negative sentinel renders as the literal
19//! text `-1e126`, which is not a plain `coefficient × 10^-scale` decimal.
20//!
21//! For any such value the representation FALLS BACK to a boxed canonical-text
22//! carrier ([`OracleNumber::Text`]) so correctness is never sacrificed. The
23//! fallback is boxed (`Box<str>`) so the enum — and therefore
24//! [`crate::thin::QueryValue`] — stays within its 32-byte budget.
25//!
26//! ## Single shared formatter
27//!
28//! [`OracleNumber::fmt_into`] is the ONE canonical formatter. It is BYTE-IDENTICAL
29//! to the legacy [`super::codecs::decode_number_text_into`] text path (proven by
30//! `tests/number_inline_byte_identical.rs` over the whole NUMBER domain). Every
31//! consumer — `Display`, `FromSql<String>`, the OSON/JSON number text, and the
32//! borrowed `QueryValueRef::Number` arena path — routes through it, so the owned
33//! and borrowed decode paths can never diverge by even one byte.
34
35use crate::Result;
36
37/// Upper bound on the significant decimal digits the wire NUMBER digit walk can
38/// emit into a stack buffer. Oracle NUMBER carries at most 40 significant
39/// digits (20 base-100 mantissa bytes); +2 slack covers the `first_digit == 10`
40/// base-100 carry the legacy walk can append.
41pub(crate) const MAX_DIGITS: usize = 42;
42
43/// Stack-decoded parts of a wire NUMBER (no heap allocation). Mirror of
44/// [`DecodedNumber`] but with digits written into a caller stack buffer.
45pub(crate) enum DecodedNumberStack {
46 /// A single-byte sentinel whose canonical text is fixed.
47 Sentinel {
48 text: &'static str,
49 is_integer: bool,
50 },
51 /// The decoded parts; `digit_len` significant digits were written to the
52 /// caller's stack buffer.
53 Parts {
54 digit_len: usize,
55 is_negative: bool,
56 decimal_point_index: i16,
57 is_integer: bool,
58 /// The i128 coefficient FUSED during the digit walk (bead
59 /// rust-oracledb-shh): `Some(coeff)` is byte-identical to a second
60 /// `digits_to_i128(&digit_buf[..digit_len], is_negative)` pass; `None`
61 /// signals i128 overflow (39–40 digit values), in which case the caller
62 /// spills to boxed text using the still-filled `digit_buf` exactly as
63 /// before. The sign is already applied.
64 coefficient: Option<i128>,
65 },
66}
67
68/// Inline, lossless decimal carrier for an Oracle `NUMBER`.
69///
70/// The common case is [`OracleNumber::Inline`] (`coefficient × 10^-scale`,
71/// allocation-free). Values that cannot be represented exactly inline fall back
72/// to [`OracleNumber::Text`] (a boxed canonical-text carrier).
73#[derive(Clone, Debug, PartialEq, Eq)]
74pub enum OracleNumber {
75 /// `value == coefficient × 10^-scale`, with the sign carried in
76 /// `coefficient`. `scale` may be negative (the value has trailing zeros to
77 /// the left of the implied point). `is_integer` mirrors the legacy decoder's
78 /// flag — whether the canonical text contains a decimal point — so the
79 /// Python int-vs-float dispatch is preserved exactly.
80 ///
81 /// The coefficient is stored as its little-endian `i128` bytes rather than a
82 /// bare `i128` field: a bare `i128` forces 16-byte alignment, which rounds
83 /// the enum up to 32 bytes and would blow `QueryValue`'s 32-byte budget once
84 /// the discriminant is added. The `[u8; 16]` form keeps 8-byte alignment so
85 /// `OracleNumber` is 24 bytes. Access via [`OracleNumber::coefficient`].
86 Inline {
87 coefficient_le: [u8; 16],
88 scale: i16,
89 is_integer: bool,
90 },
91 /// Defensive fallback for values that do not fit the inline form exactly
92 /// (39–40 significant digit integers that overflow `i128`, or the `-1e126`
93 /// single-byte sentinel). Boxed so the enum stays small.
94 Text { text: Box<str>, is_integer: bool },
95}
96
97impl OracleNumber {
98 /// Build the inline variant from a real `i128` coefficient (stored as its
99 /// little-endian bytes to keep the enum 8-byte aligned).
100 fn inline(coefficient: i128, scale: i16, is_integer: bool) -> Self {
101 OracleNumber::Inline {
102 coefficient_le: coefficient.to_le_bytes(),
103 scale,
104 is_integer,
105 }
106 }
107
108 /// The inline coefficient as an `i128`, or `None` for the boxed-text
109 /// fallback. `value == coefficient × 10^-scale`.
110 pub fn coefficient(&self) -> Option<i128> {
111 match self {
112 OracleNumber::Inline { coefficient_le, .. } => {
113 Some(i128::from_le_bytes(*coefficient_le))
114 }
115 OracleNumber::Text { .. } => None,
116 }
117 }
118
119 /// The inline scale, or `None` for the boxed-text fallback.
120 pub fn scale(&self) -> Option<i16> {
121 match self {
122 OracleNumber::Inline { scale, .. } => Some(*scale),
123 OracleNumber::Text { .. } => None,
124 }
125 }
126
127 /// Decode an Oracle `NUMBER` wire form into the inline representation,
128 /// falling back to a boxed canonical-text carrier when the value cannot be
129 /// represented exactly inline. The canonical text — whether produced inline
130 /// or stored in the fallback — is byte-identical to the legacy decoder.
131 ///
132 /// ZERO-ALLOCATION for the common inline case: the digit walk writes into a
133 /// fixed stack buffer (Oracle NUMBER has at most 40 significant digits), and
134 /// the inline coefficient/scale is folded directly — no scratch `Vec`/`String`
135 /// is heap-allocated. Only the rare text fallback (sentinel / i128 overflow)
136 /// touches the heap, and only then.
137 pub fn from_wire(bytes: &[u8]) -> Result<Self> {
138 // Stack scratch: up to 40 significant decimal digits + slack for the
139 // base-100 carry the digit walk can append.
140 let mut digit_buf = [0u8; MAX_DIGITS];
141 match super::codecs::decode_number_parts_stack(bytes, &mut digit_buf)? {
142 // Single-byte sentinels: format their canonical text once.
143 DecodedNumberStack::Sentinel { text, is_integer } => Ok(OracleNumber::Text {
144 text: text.into(),
145 is_integer,
146 }),
147 DecodedNumberStack::Parts {
148 digit_len,
149 is_negative,
150 decimal_point_index,
151 is_integer,
152 coefficient,
153 } => {
154 let digits = &digit_buf[..digit_len];
155 // The i128 coefficient was FUSED during the digit walk (bead
156 // rust-oracledb-shh): `Some` is byte-identical to the old second
157 // `digits_to_i128(digits, is_negative)` pass; `None` is the same
158 // i128-overflow signal (39–40 digit value), which spills to text
159 // using the still-filled `digits` exactly as before.
160 match coefficient {
161 Some(coefficient) => {
162 // scale = len - decimal_point_index (implied fractional
163 // positions; may be negative for trailing-zero integers).
164 let len = i32::try_from(digits.len()).unwrap_or(i32::MAX);
165 let scale_i32 = len - i32::from(decimal_point_index);
166 match i16::try_from(scale_i32) {
167 Ok(scale) => Ok(OracleNumber::inline(coefficient, scale, is_integer)),
168 // Scale out of i16 range (cannot happen for valid
169 // Oracle NUMBER, but stay defensive): keep the text.
170 Err(_) => Ok(Self::spill_text(
171 digits,
172 is_negative,
173 decimal_point_index,
174 is_integer,
175 )),
176 }
177 }
178 // i128 overflow (39–40 digit value): spill to boxed text.
179 None => Ok(Self::spill_text(
180 digits,
181 is_negative,
182 decimal_point_index,
183 is_integer,
184 )),
185 }
186 }
187 }
188 }
189
190 /// Format the digits into a boxed-text fallback (the rare path: i128 overflow
191 /// or out-of-range scale). Uses the SAME formatter fragment as the inline
192 /// path, so the text is byte-identical.
193 fn spill_text(
194 digits: &[u8],
195 is_negative: bool,
196 decimal_point_index: i16,
197 is_integer: bool,
198 ) -> Self {
199 let mut text = String::new();
200 super::codecs::format_number_digits(digits, is_negative, decimal_point_index, &mut text);
201 OracleNumber::Text {
202 text: text.into_boxed_str(),
203 is_integer,
204 }
205 }
206
207 /// Construct from already-canonical decimal text (the bind / parse path).
208 /// Parses the text into the inline form when it fits, else keeps it boxed.
209 /// The text MUST already be canonical Oracle `NUMBER` text (the form the
210 /// decoder emits); this does not re-canonicalize.
211 pub fn from_canonical_text(text: &str) -> Self {
212 Self::from_canonical_text_with_flag(text, !text.contains('.'))
213 }
214
215 /// Like [`Self::from_canonical_text`] but with the caller-supplied
216 /// `is_integer` flag (the borrowed fetch path already decoded it from the
217 /// wire, so it is authoritative — preserve it verbatim).
218 pub fn from_canonical_text_with_flag(text: &str, is_integer: bool) -> Self {
219 match parse_canonical_inline(text) {
220 Some((coefficient, scale)) => OracleNumber::inline(coefficient, scale, is_integer),
221 None => OracleNumber::Text {
222 text: text.into(),
223 is_integer,
224 },
225 }
226 }
227
228 /// Borrow the canonical text when it is stored as boxed text (the fallback
229 /// form), else `None` — the inline numeric form synthesizes its text on
230 /// demand and has no `&str` to lend.
231 pub fn as_borrowed_text(&self) -> Option<&str> {
232 match self {
233 OracleNumber::Text { text, .. } => Some(text),
234 OracleNumber::Inline { .. } => None,
235 }
236 }
237
238 /// Whether the canonical text is integral (carries no decimal point).
239 /// Mirrors the legacy `is_integer` flag exactly.
240 pub fn is_integer(&self) -> bool {
241 match self {
242 OracleNumber::Inline { is_integer, .. } | OracleNumber::Text { is_integer, .. } => {
243 *is_integer
244 }
245 }
246 }
247
248 /// THE single shared canonical formatter. Appends the canonical decimal text
249 /// to `out`. Byte-identical to [`super::codecs::decode_number_text_into`].
250 pub fn fmt_into(&self, out: &mut String) {
251 match self {
252 OracleNumber::Text { text, .. } => out.push_str(text),
253 OracleNumber::Inline {
254 coefficient_le,
255 scale,
256 ..
257 } => fmt_inline_into(i128::from_le_bytes(*coefficient_le), *scale, out),
258 }
259 }
260
261 /// Canonical decimal text as an owned `String`.
262 pub fn to_canonical_string(&self) -> String {
263 let mut out = String::new();
264 self.fmt_into(&mut out);
265 out
266 }
267
268 /// Canonical decimal text as a `Cow`: borrowed for the boxed-text fallback
269 /// (zero allocation), owned for the inline form (formatted once on demand).
270 pub fn to_canonical_cow(&self) -> std::borrow::Cow<'_, str> {
271 match self {
272 OracleNumber::Text { text, .. } => std::borrow::Cow::Borrowed(text),
273 OracleNumber::Inline { .. } => std::borrow::Cow::Owned(self.to_canonical_string()),
274 }
275 }
276
277 /// Exact `i64` when the value is an integer that fits; else `None`.
278 pub fn to_i64(&self) -> Option<i64> {
279 match self {
280 OracleNumber::Inline {
281 coefficient_le,
282 scale,
283 ..
284 } => inline_to_i128(i128::from_le_bytes(*coefficient_le), *scale)
285 .and_then(|v| i64::try_from(v).ok()),
286 OracleNumber::Text { text, .. } => text.parse::<i64>().ok(),
287 }
288 }
289
290 /// Exact `i128` when the value is an integer that fits; else `None`.
291 pub fn to_i128(&self) -> Option<i128> {
292 match self {
293 OracleNumber::Inline {
294 coefficient_le,
295 scale,
296 ..
297 } => inline_to_i128(i128::from_le_bytes(*coefficient_le), *scale),
298 OracleNumber::Text { text, .. } => text.parse::<i128>().ok(),
299 }
300 }
301}
302
303/// Outcome of the wire digit walk: either a sentinel/overflow case that must be
304/// kept as text, or the decoded parts the inline form is built from.
305pub(crate) enum DecodedNumber {
306 /// The canonical text is already in `text`; keep it verbatim (the special
307 /// single-byte sentinel cases that are not plain `coeff × 10^-scale`).
308 Text { is_integer: bool },
309 /// Parts to fold into the inline coefficient/scale form.
310 Parts {
311 is_negative: bool,
312 decimal_point_index: i16,
313 is_integer: bool,
314 },
315}
316
317/// Fold the significant decimal `digits` (each 0..=9) into an `i128` coefficient
318/// with the given sign, returning `None` on overflow (39–40 digit values that
319/// exceed `i128`).
320///
321/// This is the reference the FUSED in-walk accumulator (bead rust-oracledb-shh,
322/// `decode_number_parts_stack`) must reproduce byte-for-byte. It is retained as
323/// the differential oracle for that fusion (see the `fused_coefficient_matches_
324/// reference_walk` test) and is otherwise unused in production code.
325#[cfg(test)]
326fn digits_to_i128(digits: &[u8], is_negative: bool) -> Option<i128> {
327 let mut acc: i128 = 0;
328 for &d in digits {
329 acc = acc.checked_mul(10)?.checked_add(i128::from(d))?;
330 }
331 if is_negative {
332 Some(-acc)
333 } else {
334 Some(acc)
335 }
336}
337
338/// Reconstruct an exact integer `i128` from the inline form, or `None` if the
339/// value is fractional or the scaling overflows.
340fn inline_to_i128(coefficient: i128, scale: i16) -> Option<i128> {
341 match scale.cmp(&0) {
342 std::cmp::Ordering::Equal => Some(coefficient),
343 // Negative scale: value = coefficient × 10^(-scale), an integer.
344 std::cmp::Ordering::Less => {
345 let mut v = coefficient;
346 for _ in 0..(-(i32::from(scale))) {
347 v = v.checked_mul(10)?;
348 }
349 Some(v)
350 }
351 // Positive scale: integral only if the trailing `scale` digits are zero.
352 std::cmp::Ordering::Greater => {
353 let mut divisor: i128 = 1;
354 for _ in 0..i32::from(scale) {
355 divisor = divisor.checked_mul(10)?;
356 }
357 if coefficient % divisor == 0 {
358 Some(coefficient / divisor)
359 } else {
360 None
361 }
362 }
363 }
364}
365
366/// Format the inline `coefficient × 10^-scale` form into canonical Oracle
367/// `NUMBER` text, BYTE-IDENTICAL to the legacy `decode_number_text_into`.
368///
369/// The legacy formatter works from `digits` (significant decimal digits, no
370/// leading/trailing zeros except as positioned) and `decimal_point_index`. Here
371/// the equivalent inputs are recovered as: the absolute coefficient's decimal
372/// digits, and `decimal_point_index = digit_count - scale`.
373fn fmt_inline_into(coefficient: i128, scale: i16, out: &mut String) {
374 // Zero is always rendered "0" (matches the legacy single-byte-zero path and
375 // the negative-zero canonicalization).
376 if coefficient == 0 {
377 out.push('0');
378 return;
379 }
380
381 let is_negative = coefficient < 0;
382 // Build the significant-digit string of |coefficient|. unsigned_abs avoids
383 // the i128::MIN overflow trap.
384 let mut buf = [0u8; 40];
385 let mut mag = coefficient.unsigned_abs();
386 let mut idx = buf.len();
387 while mag > 0 {
388 idx -= 1;
389 buf[idx] = b'0' + (mag % 10) as u8;
390 mag /= 10;
391 }
392 let digits = &buf[idx..];
393 let digit_count = digits.len() as i32;
394 let decimal_point_index = digit_count - i32::from(scale);
395
396 if is_negative {
397 out.push('-');
398 }
399
400 if decimal_point_index <= 0 {
401 // "0." + (-decimal_point_index) zeros + all digits.
402 out.push_str("0.");
403 for _ in decimal_point_index..0 {
404 out.push('0');
405 }
406 for &d in digits {
407 out.push(d as char);
408 }
409 return;
410 }
411
412 // decimal_point_index > 0: emit digits, inserting '.' at the point, and pad
413 // trailing zeros when the point is past the last digit.
414 for (i, &d) in digits.iter().enumerate() {
415 if i as i32 == decimal_point_index {
416 out.push('.');
417 }
418 out.push(d as char);
419 }
420 if decimal_point_index > digit_count {
421 for _ in digit_count..decimal_point_index {
422 out.push('0');
423 }
424 }
425}
426
427/// Parse already-canonical Oracle `NUMBER` text into `(coefficient, scale)`,
428/// returning `None` if it does not fit `i128`/`i16` (then the caller keeps the
429/// text). The input is the decoder's canonical form: an optional `-`, digits,
430/// an optional single `.`, no exponent (except the `-1e126` sentinel, which has
431/// an `e` and is therefore rejected here -> text fallback).
432fn parse_canonical_inline(text: &str) -> Option<(i128, i16)> {
433 let (is_negative, rest) = match text.strip_prefix('-') {
434 Some(r) => (true, r),
435 None => (false, text),
436 };
437 if rest.is_empty() {
438 return None;
439 }
440 let (int_part, frac_part) = match rest.split_once('.') {
441 Some((i, f)) => (i, f),
442 None => (rest, ""),
443 };
444 // Canonical text never contains an exponent or any non-digit beyond one '.'.
445 if !int_part.bytes().all(|b| b.is_ascii_digit())
446 || !frac_part.bytes().all(|b| b.is_ascii_digit())
447 {
448 return None;
449 }
450 let mut acc: i128 = 0;
451 for b in int_part.bytes().chain(frac_part.bytes()) {
452 acc = acc.checked_mul(10)?.checked_add(i128::from(b - b'0'))?;
453 }
454 let coefficient = if is_negative { acc.checked_neg()? } else { acc };
455 let scale = i16::try_from(frac_part.len()).ok()?;
456 Some((coefficient, scale))
457}
458
459impl std::fmt::Display for OracleNumber {
460 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
461 let mut s = String::new();
462 self.fmt_into(&mut s);
463 f.write_str(&s)
464 }
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470 use crate::thin::codecs::{decode_number_parts_stack, encode_number_text};
471
472 /// Differential proof for the fused i128 accumulator (bead rust-oracledb-shh):
473 /// the `coefficient` fused during `decode_number_parts_stack`'s digit walk
474 /// MUST equal the reference second pass `digits_to_i128(digits, is_negative)`
475 /// over the still-filled digit buffer — including the overflow (`None`)
476 /// boundary. If these ever diverge, the inline NUMBER coefficient (and thus
477 /// the canonical text, the i64/i128 reconstruct, the whole parity surface)
478 /// would silently drift, so this is the gate for the optimization.
479 fn assert_fused_matches_reference(wire: &[u8], label: &str) {
480 let mut digit_buf = [0u8; MAX_DIGITS];
481 let parts = decode_number_parts_stack(wire, &mut digit_buf).expect("decode valid wire");
482 if let DecodedNumberStack::Parts {
483 digit_len,
484 is_negative,
485 coefficient,
486 ..
487 } = parts
488 {
489 let reference = digits_to_i128(&digit_buf[..digit_len], is_negative);
490 assert_eq!(
491 coefficient, reference,
492 "{label}: fused coefficient {coefficient:?} != reference walk {reference:?} \
493 (wire={wire:02x?})"
494 );
495 }
496 }
497
498 #[test]
499 fn fused_coefficient_matches_reference_walk_corpus() {
500 // Spans the inline domain plus the i128-overflow boundary (39–40 digits).
501 let corpus: &[&str] = &[
502 "0",
503 "1",
504 "-1",
505 "9",
506 "-9",
507 "10",
508 "99",
509 "-99",
510 "100",
511 "12345",
512 "-12345",
513 "0.5",
514 "-0.5",
515 "3.14159",
516 "100.001",
517 "0.0001",
518 "1000000000000000000",
519 "12345678901234567890",
520 "123456789012345678901234567890",
521 // 38 significant digits (max inline precision).
522 "12345678901234567890123456789012345678",
523 "-12345678901234567890123456789012345678",
524 "0.12345678901234567890123456789012345678",
525 // 39+ digits: i128 overflow -> fused must latch None, same as ref.
526 "123456789012345678901234567890123456789",
527 "9999999999999999999999999999999999999999", // 40 nines
528 "1e125",
529 "-1e125",
530 "1e-120",
531 ];
532 for text in corpus {
533 let wire = encode_number_text(text).unwrap_or_else(|e| panic!("encode {text}: {e:?}"));
534 assert_fused_matches_reference(&wire, text);
535 }
536 }
537
538 #[test]
539 fn inline_form_fits_the_size_budget() {
540 // The inline carrier must stay <= 24 bytes (8-byte aligned via the
541 // [u8;16] coefficient) so `QueryValue` holds its 32-byte budget.
542 assert!(core::mem::size_of::<OracleNumber>() <= 24);
543 assert_eq!(core::mem::align_of::<OracleNumber>(), 8);
544 }
545
546 #[test]
547 fn formatter_matches_known_canonical_text() {
548 // coefficient × 10^-scale -> canonical text, spot checks.
549 let cases: &[(i128, i16, bool, &str)] = &[
550 (0, 0, true, "0"),
551 (1, 0, true, "1"),
552 (-1, 0, true, "-1"),
553 (5, 1, false, "0.5"),
554 (-5, 1, false, "-0.5"),
555 (314159, 5, false, "3.14159"),
556 (1, -2, true, "100"), // 1 × 10^2
557 (12, 0, true, "12"),
558 (100001, 3, false, "100.001"),
559 (15, 1, false, "1.5"),
560 ];
561 for &(coeff, scale, is_int, expect) in cases {
562 let n = OracleNumber::inline(coeff, scale, is_int);
563 assert_eq!(
564 n.to_canonical_string(),
565 expect,
566 "coeff={coeff} scale={scale}"
567 );
568 assert_eq!(n.is_integer(), is_int);
569 }
570 }
571
572 #[test]
573 fn from_canonical_text_round_trips() {
574 for text in [
575 "0",
576 "1",
577 "-1",
578 "0.5",
579 "100",
580 "0.001",
581 "12345678901234567890",
582 ] {
583 let n = OracleNumber::from_canonical_text(text);
584 assert_eq!(n.to_canonical_string(), text);
585 }
586 }
587
588 #[test]
589 fn overflow_value_falls_back_to_text_losslessly() {
590 // A 40-digit integer exceeds i128 (39 digits max); the canonical text
591 // round-trips through the boxed fallback exactly. Built via from_wire of
592 // a synthetic value would require the encoder, so assert the fallback
593 // constructor preserves the text verbatim.
594 let big = "1234567890123456789012345678901234567890"; // 40 digits
595 let n = OracleNumber::from_canonical_text(big);
596 assert!(
597 matches!(n, OracleNumber::Text { .. }),
598 "40-digit -> text fallback"
599 );
600 assert_eq!(n.to_canonical_string(), big);
601 }
602}