oracledb_protocol/thin/number.rs
1#![forbid(unsafe_code)]
2
3//! Inline, lossless Oracle `NUMBER` representation (bead rust-oracledb-65w).
4//!
5//! Oracle `NUMBER` is up to 40 significant decimal digits (the wire form carries
6//! up to 20 base-100 mantissa bytes) with a decimal exponent in roughly
7//! `-130..=125`. The common case — a value with at most 38 significant digits —
8//! fits losslessly in an `i128` coefficient plus an `i16` scale, allocating
9//! nothing. The owned [`crate::thin::QueryValue::Number`] used to carry a heap
10//! `String` per cell; this module replaces that inline payload so a NUMBER-heavy
11//! row stops doing one `malloc` per NUMBER column.
12//!
13//! ## Losslessness
14//!
15//! Some wire forms cannot be represented exactly inline:
16//!
17//! - A 39- or 40-digit integer can exceed `i128::MAX` (`~1.7e38`, 39 digits).
18//! - The decoder's special single-byte negative sentinel renders as the literal
19//! text `-1e126`, which is not a plain `coefficient × 10^-scale` decimal.
20//!
21//! For any such value the representation FALLS BACK to a boxed canonical-text
22//! carrier ([`OracleNumber::Text`]) so correctness is never sacrificed. The
23//! fallback is boxed (`Box<str>`) so the enum — and therefore
24//! [`crate::thin::QueryValue`] — stays within its 32-byte budget.
25//!
26//! ## Single shared formatter
27//!
28//! [`OracleNumber::fmt_into`] is the ONE canonical formatter. It is BYTE-IDENTICAL
29//! to the legacy [`super::codecs::decode_number_text_into`] text path (proven by
30//! `tests/number_inline_byte_identical.rs` over the whole NUMBER domain). Every
31//! consumer — `Display`, `FromSql<String>`, the OSON/JSON number text, and the
32//! borrowed `QueryValueRef::Number` arena path — routes through it, so the owned
33//! and borrowed decode paths can never diverge by even one byte.
34
35use crate::Result;
36
37/// Upper bound on the significant decimal digits the wire NUMBER digit walk can
38/// emit into a stack buffer. Oracle NUMBER carries at most 40 significant
39/// digits (20 base-100 mantissa bytes); +2 slack covers the `first_digit == 10`
40/// base-100 carry the legacy walk can append.
41pub(crate) const MAX_DIGITS: usize = 42;
42
43/// Stack-decoded parts of a wire NUMBER (no heap allocation). Mirror of
44/// [`DecodedNumber`] but with digits written into a caller stack buffer.
45pub(crate) enum DecodedNumberStack {
46 /// A single-byte sentinel whose canonical text is fixed.
47 Sentinel {
48 text: &'static str,
49 is_integer: bool,
50 },
51 /// The decoded parts; `digit_len` significant digits were written to the
52 /// caller's stack buffer.
53 Parts {
54 digit_len: usize,
55 is_negative: bool,
56 decimal_point_index: i16,
57 is_integer: bool,
58 },
59}
60
61/// Inline, lossless decimal carrier for an Oracle `NUMBER`.
62///
63/// The common case is [`OracleNumber::Inline`] (`coefficient × 10^-scale`,
64/// allocation-free). Values that cannot be represented exactly inline fall back
65/// to [`OracleNumber::Text`] (a boxed canonical-text carrier).
66#[derive(Clone, Debug, PartialEq, Eq)]
67pub enum OracleNumber {
68 /// `value == coefficient × 10^-scale`, with the sign carried in
69 /// `coefficient`. `scale` may be negative (the value has trailing zeros to
70 /// the left of the implied point). `is_integer` mirrors the legacy decoder's
71 /// flag — whether the canonical text contains a decimal point — so the
72 /// Python int-vs-float dispatch is preserved exactly.
73 ///
74 /// The coefficient is stored as its little-endian `i128` bytes rather than a
75 /// bare `i128` field: a bare `i128` forces 16-byte alignment, which rounds
76 /// the enum up to 32 bytes and would blow `QueryValue`'s 32-byte budget once
77 /// the discriminant is added. The `[u8; 16]` form keeps 8-byte alignment so
78 /// `OracleNumber` is 24 bytes. Access via [`OracleNumber::coefficient`].
79 Inline {
80 coefficient_le: [u8; 16],
81 scale: i16,
82 is_integer: bool,
83 },
84 /// Defensive fallback for values that do not fit the inline form exactly
85 /// (39–40 significant digit integers that overflow `i128`, or the `-1e126`
86 /// single-byte sentinel). Boxed so the enum stays small.
87 Text { text: Box<str>, is_integer: bool },
88}
89
90impl OracleNumber {
91 /// Build the inline variant from a real `i128` coefficient (stored as its
92 /// little-endian bytes to keep the enum 8-byte aligned).
93 fn inline(coefficient: i128, scale: i16, is_integer: bool) -> Self {
94 OracleNumber::Inline {
95 coefficient_le: coefficient.to_le_bytes(),
96 scale,
97 is_integer,
98 }
99 }
100
101 /// The inline coefficient as an `i128`, or `None` for the boxed-text
102 /// fallback. `value == coefficient × 10^-scale`.
103 pub fn coefficient(&self) -> Option<i128> {
104 match self {
105 OracleNumber::Inline { coefficient_le, .. } => {
106 Some(i128::from_le_bytes(*coefficient_le))
107 }
108 OracleNumber::Text { .. } => None,
109 }
110 }
111
112 /// The inline scale, or `None` for the boxed-text fallback.
113 pub fn scale(&self) -> Option<i16> {
114 match self {
115 OracleNumber::Inline { scale, .. } => Some(*scale),
116 OracleNumber::Text { .. } => None,
117 }
118 }
119
120 /// Decode an Oracle `NUMBER` wire form into the inline representation,
121 /// falling back to a boxed canonical-text carrier when the value cannot be
122 /// represented exactly inline. The canonical text — whether produced inline
123 /// or stored in the fallback — is byte-identical to the legacy decoder.
124 ///
125 /// ZERO-ALLOCATION for the common inline case: the digit walk writes into a
126 /// fixed stack buffer (Oracle NUMBER has at most 40 significant digits), and
127 /// the inline coefficient/scale is folded directly — no scratch `Vec`/`String`
128 /// is heap-allocated. Only the rare text fallback (sentinel / i128 overflow)
129 /// touches the heap, and only then.
130 pub fn from_wire(bytes: &[u8]) -> Result<Self> {
131 // Stack scratch: up to 40 significant decimal digits + slack for the
132 // base-100 carry the digit walk can append.
133 let mut digit_buf = [0u8; MAX_DIGITS];
134 match super::codecs::decode_number_parts_stack(bytes, &mut digit_buf)? {
135 // Single-byte sentinels: format their canonical text once.
136 DecodedNumberStack::Sentinel { text, is_integer } => Ok(OracleNumber::Text {
137 text: text.into(),
138 is_integer,
139 }),
140 DecodedNumberStack::Parts {
141 digit_len,
142 is_negative,
143 decimal_point_index,
144 is_integer,
145 } => {
146 let digits = &digit_buf[..digit_len];
147 // Fold the decimal digits into an i128 coefficient. `digits` is
148 // the significant-digit run (up to 40); >38 may overflow i128.
149 match digits_to_i128(digits, is_negative) {
150 Some(coefficient) => {
151 // scale = len - decimal_point_index (implied fractional
152 // positions; may be negative for trailing-zero integers).
153 let len = i32::try_from(digits.len()).unwrap_or(i32::MAX);
154 let scale_i32 = len - i32::from(decimal_point_index);
155 match i16::try_from(scale_i32) {
156 Ok(scale) => Ok(OracleNumber::inline(coefficient, scale, is_integer)),
157 // Scale out of i16 range (cannot happen for valid
158 // Oracle NUMBER, but stay defensive): keep the text.
159 Err(_) => Ok(Self::spill_text(
160 digits,
161 is_negative,
162 decimal_point_index,
163 is_integer,
164 )),
165 }
166 }
167 // i128 overflow (39–40 digit value): spill to boxed text.
168 None => Ok(Self::spill_text(
169 digits,
170 is_negative,
171 decimal_point_index,
172 is_integer,
173 )),
174 }
175 }
176 }
177 }
178
179 /// Format the digits into a boxed-text fallback (the rare path: i128 overflow
180 /// or out-of-range scale). Uses the SAME formatter fragment as the inline
181 /// path, so the text is byte-identical.
182 fn spill_text(
183 digits: &[u8],
184 is_negative: bool,
185 decimal_point_index: i16,
186 is_integer: bool,
187 ) -> Self {
188 let mut text = String::new();
189 super::codecs::format_number_digits(digits, is_negative, decimal_point_index, &mut text);
190 OracleNumber::Text {
191 text: text.into_boxed_str(),
192 is_integer,
193 }
194 }
195
196 /// Construct from already-canonical decimal text (the bind / parse path).
197 /// Parses the text into the inline form when it fits, else keeps it boxed.
198 /// The text MUST already be canonical Oracle `NUMBER` text (the form the
199 /// decoder emits); this does not re-canonicalize.
200 pub fn from_canonical_text(text: &str) -> Self {
201 Self::from_canonical_text_with_flag(text, !text.contains('.'))
202 }
203
204 /// Like [`Self::from_canonical_text`] but with the caller-supplied
205 /// `is_integer` flag (the borrowed fetch path already decoded it from the
206 /// wire, so it is authoritative — preserve it verbatim).
207 pub fn from_canonical_text_with_flag(text: &str, is_integer: bool) -> Self {
208 match parse_canonical_inline(text) {
209 Some((coefficient, scale)) => OracleNumber::inline(coefficient, scale, is_integer),
210 None => OracleNumber::Text {
211 text: text.into(),
212 is_integer,
213 },
214 }
215 }
216
217 /// Borrow the canonical text when it is stored as boxed text (the fallback
218 /// form), else `None` — the inline numeric form synthesizes its text on
219 /// demand and has no `&str` to lend.
220 pub fn as_borrowed_text(&self) -> Option<&str> {
221 match self {
222 OracleNumber::Text { text, .. } => Some(text),
223 OracleNumber::Inline { .. } => None,
224 }
225 }
226
227 /// Whether the canonical text is integral (carries no decimal point).
228 /// Mirrors the legacy `is_integer` flag exactly.
229 pub fn is_integer(&self) -> bool {
230 match self {
231 OracleNumber::Inline { is_integer, .. } | OracleNumber::Text { is_integer, .. } => {
232 *is_integer
233 }
234 }
235 }
236
237 /// THE single shared canonical formatter. Appends the canonical decimal text
238 /// to `out`. Byte-identical to [`super::codecs::decode_number_text_into`].
239 pub fn fmt_into(&self, out: &mut String) {
240 match self {
241 OracleNumber::Text { text, .. } => out.push_str(text),
242 OracleNumber::Inline {
243 coefficient_le,
244 scale,
245 ..
246 } => fmt_inline_into(i128::from_le_bytes(*coefficient_le), *scale, out),
247 }
248 }
249
250 /// Canonical decimal text as an owned `String`.
251 pub fn to_canonical_string(&self) -> String {
252 let mut out = String::new();
253 self.fmt_into(&mut out);
254 out
255 }
256
257 /// Canonical decimal text as a `Cow`: borrowed for the boxed-text fallback
258 /// (zero allocation), owned for the inline form (formatted once on demand).
259 pub fn to_canonical_cow(&self) -> std::borrow::Cow<'_, str> {
260 match self {
261 OracleNumber::Text { text, .. } => std::borrow::Cow::Borrowed(text),
262 OracleNumber::Inline { .. } => std::borrow::Cow::Owned(self.to_canonical_string()),
263 }
264 }
265
266 /// Exact `i64` when the value is an integer that fits; else `None`.
267 pub fn to_i64(&self) -> Option<i64> {
268 match self {
269 OracleNumber::Inline {
270 coefficient_le,
271 scale,
272 ..
273 } => inline_to_i128(i128::from_le_bytes(*coefficient_le), *scale)
274 .and_then(|v| i64::try_from(v).ok()),
275 OracleNumber::Text { text, .. } => text.parse::<i64>().ok(),
276 }
277 }
278
279 /// Exact `i128` when the value is an integer that fits; else `None`.
280 pub fn to_i128(&self) -> Option<i128> {
281 match self {
282 OracleNumber::Inline {
283 coefficient_le,
284 scale,
285 ..
286 } => inline_to_i128(i128::from_le_bytes(*coefficient_le), *scale),
287 OracleNumber::Text { text, .. } => text.parse::<i128>().ok(),
288 }
289 }
290}
291
292/// Outcome of the wire digit walk: either a sentinel/overflow case that must be
293/// kept as text, or the decoded parts the inline form is built from.
294pub(crate) enum DecodedNumber {
295 /// The canonical text is already in `text`; keep it verbatim (the special
296 /// single-byte sentinel cases that are not plain `coeff × 10^-scale`).
297 Text { is_integer: bool },
298 /// Parts to fold into the inline coefficient/scale form.
299 Parts {
300 is_negative: bool,
301 decimal_point_index: i16,
302 is_integer: bool,
303 },
304}
305
306/// Fold the significant decimal `digits` (each 0..=9) into an `i128` coefficient
307/// with the given sign, returning `None` on overflow (39–40 digit values that
308/// exceed `i128`).
309fn digits_to_i128(digits: &[u8], is_negative: bool) -> Option<i128> {
310 let mut acc: i128 = 0;
311 for &d in digits {
312 acc = acc.checked_mul(10)?.checked_add(i128::from(d))?;
313 }
314 if is_negative {
315 Some(-acc)
316 } else {
317 Some(acc)
318 }
319}
320
321/// Reconstruct an exact integer `i128` from the inline form, or `None` if the
322/// value is fractional or the scaling overflows.
323fn inline_to_i128(coefficient: i128, scale: i16) -> Option<i128> {
324 match scale.cmp(&0) {
325 std::cmp::Ordering::Equal => Some(coefficient),
326 // Negative scale: value = coefficient × 10^(-scale), an integer.
327 std::cmp::Ordering::Less => {
328 let mut v = coefficient;
329 for _ in 0..(-(i32::from(scale))) {
330 v = v.checked_mul(10)?;
331 }
332 Some(v)
333 }
334 // Positive scale: integral only if the trailing `scale` digits are zero.
335 std::cmp::Ordering::Greater => {
336 let mut divisor: i128 = 1;
337 for _ in 0..i32::from(scale) {
338 divisor = divisor.checked_mul(10)?;
339 }
340 if coefficient % divisor == 0 {
341 Some(coefficient / divisor)
342 } else {
343 None
344 }
345 }
346 }
347}
348
349/// Format the inline `coefficient × 10^-scale` form into canonical Oracle
350/// `NUMBER` text, BYTE-IDENTICAL to the legacy `decode_number_text_into`.
351///
352/// The legacy formatter works from `digits` (significant decimal digits, no
353/// leading/trailing zeros except as positioned) and `decimal_point_index`. Here
354/// the equivalent inputs are recovered as: the absolute coefficient's decimal
355/// digits, and `decimal_point_index = digit_count - scale`.
356fn fmt_inline_into(coefficient: i128, scale: i16, out: &mut String) {
357 // Zero is always rendered "0" (matches the legacy single-byte-zero path and
358 // the negative-zero canonicalization).
359 if coefficient == 0 {
360 out.push('0');
361 return;
362 }
363
364 let is_negative = coefficient < 0;
365 // Build the significant-digit string of |coefficient|. unsigned_abs avoids
366 // the i128::MIN overflow trap.
367 let mut buf = [0u8; 40];
368 let mut mag = coefficient.unsigned_abs();
369 let mut idx = buf.len();
370 while mag > 0 {
371 idx -= 1;
372 buf[idx] = b'0' + (mag % 10) as u8;
373 mag /= 10;
374 }
375 let digits = &buf[idx..];
376 let digit_count = digits.len() as i32;
377 let decimal_point_index = digit_count - i32::from(scale);
378
379 if is_negative {
380 out.push('-');
381 }
382
383 if decimal_point_index <= 0 {
384 // "0." + (-decimal_point_index) zeros + all digits.
385 out.push_str("0.");
386 for _ in decimal_point_index..0 {
387 out.push('0');
388 }
389 for &d in digits {
390 out.push(d as char);
391 }
392 return;
393 }
394
395 // decimal_point_index > 0: emit digits, inserting '.' at the point, and pad
396 // trailing zeros when the point is past the last digit.
397 for (i, &d) in digits.iter().enumerate() {
398 if i as i32 == decimal_point_index {
399 out.push('.');
400 }
401 out.push(d as char);
402 }
403 if decimal_point_index > digit_count {
404 for _ in digit_count..decimal_point_index {
405 out.push('0');
406 }
407 }
408}
409
410/// Parse already-canonical Oracle `NUMBER` text into `(coefficient, scale)`,
411/// returning `None` if it does not fit `i128`/`i16` (then the caller keeps the
412/// text). The input is the decoder's canonical form: an optional `-`, digits,
413/// an optional single `.`, no exponent (except the `-1e126` sentinel, which has
414/// an `e` and is therefore rejected here -> text fallback).
415fn parse_canonical_inline(text: &str) -> Option<(i128, i16)> {
416 let (is_negative, rest) = match text.strip_prefix('-') {
417 Some(r) => (true, r),
418 None => (false, text),
419 };
420 if rest.is_empty() {
421 return None;
422 }
423 let (int_part, frac_part) = match rest.split_once('.') {
424 Some((i, f)) => (i, f),
425 None => (rest, ""),
426 };
427 // Canonical text never contains an exponent or any non-digit beyond one '.'.
428 if !int_part.bytes().all(|b| b.is_ascii_digit())
429 || !frac_part.bytes().all(|b| b.is_ascii_digit())
430 {
431 return None;
432 }
433 let mut acc: i128 = 0;
434 for b in int_part.bytes().chain(frac_part.bytes()) {
435 acc = acc.checked_mul(10)?.checked_add(i128::from(b - b'0'))?;
436 }
437 let coefficient = if is_negative { acc.checked_neg()? } else { acc };
438 let scale = i16::try_from(frac_part.len()).ok()?;
439 Some((coefficient, scale))
440}
441
442impl std::fmt::Display for OracleNumber {
443 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444 let mut s = String::new();
445 self.fmt_into(&mut s);
446 f.write_str(&s)
447 }
448}
449
450#[cfg(test)]
451mod tests {
452 use super::*;
453
454 #[test]
455 fn inline_form_fits_the_size_budget() {
456 // The inline carrier must stay <= 24 bytes (8-byte aligned via the
457 // [u8;16] coefficient) so `QueryValue` holds its 32-byte budget.
458 assert!(core::mem::size_of::<OracleNumber>() <= 24);
459 assert_eq!(core::mem::align_of::<OracleNumber>(), 8);
460 }
461
462 #[test]
463 fn formatter_matches_known_canonical_text() {
464 // coefficient × 10^-scale -> canonical text, spot checks.
465 let cases: &[(i128, i16, bool, &str)] = &[
466 (0, 0, true, "0"),
467 (1, 0, true, "1"),
468 (-1, 0, true, "-1"),
469 (5, 1, false, "0.5"),
470 (-5, 1, false, "-0.5"),
471 (314159, 5, false, "3.14159"),
472 (1, -2, true, "100"), // 1 × 10^2
473 (12, 0, true, "12"),
474 (100001, 3, false, "100.001"),
475 (15, 1, false, "1.5"),
476 ];
477 for &(coeff, scale, is_int, expect) in cases {
478 let n = OracleNumber::inline(coeff, scale, is_int);
479 assert_eq!(
480 n.to_canonical_string(),
481 expect,
482 "coeff={coeff} scale={scale}"
483 );
484 assert_eq!(n.is_integer(), is_int);
485 }
486 }
487
488 #[test]
489 fn from_canonical_text_round_trips() {
490 for text in [
491 "0",
492 "1",
493 "-1",
494 "0.5",
495 "100",
496 "0.001",
497 "12345678901234567890",
498 ] {
499 let n = OracleNumber::from_canonical_text(text);
500 assert_eq!(n.to_canonical_string(), text);
501 }
502 }
503
504 #[test]
505 fn overflow_value_falls_back_to_text_losslessly() {
506 // A 40-digit integer exceeds i128 (39 digits max); the canonical text
507 // round-trips through the boxed fallback exactly. Built via from_wire of
508 // a synthetic value would require the encoder, so assert the fallback
509 // constructor preserves the text verbatim.
510 let big = "1234567890123456789012345678901234567890"; // 40 digits
511 let n = OracleNumber::from_canonical_text(big);
512 assert!(
513 matches!(n, OracleNumber::Text { .. }),
514 "40-digit -> text fallback"
515 );
516 assert_eq!(n.to_canonical_string(), big);
517 }
518}