luna_core/
numeric.rs

1//! Lua numeral conversion core (stone candidate: pure functions, no runtime
2//! types). Two consumers: the lexer (literal tokens, shape pre-validated by
3//! scanning) and the VM/stdlib (`str2num` — luaO_str2num semantics with
4//! whitespace and sign). Versioning is expressed as capability flags so this
5//! module stays dialect-agnostic.
6
7/// Result of parsing a Lua numeric literal — either an integer or a float
8/// (Lua 5.1 collapses everything to float at this layer).
9#[derive(Clone, Copy, PartialEq, Debug)]
10pub enum Num {
11    /// Integer-typed numeral.
12    Int(i64),
13    /// Float-typed numeral.
14    Float(f64),
15}
16
17impl Num {
18    /// Lossy conversion to `f64`. Integer-to-float follows Lua's coercion
19    /// (`Int` is cast as `i as f64`).
20    pub fn as_f64(self) -> f64 {
21        match self {
22            Num::Int(i) => i as f64,
23            Num::Float(f) => f,
24        }
25    }
26
27    fn negate(self) -> Num {
28        match self {
29            Num::Int(i) => Num::Int(i.wrapping_neg()),
30            Num::Float(f) => Num::Float(-f),
31        }
32    }
33}
34
35/// Decode a single ASCII hex digit (`0-9`, `a-f`, `A-F`) into its numeric
36/// value, or return `None` for a non-hex byte.
37pub fn hex_digit(c: u8) -> Option<u32> {
38    match c {
39        b'0'..=b'9' => Some((c - b'0') as u32),
40        b'a'..=b'f' => Some((c - b'a' + 10) as u32),
41        b'A'..=b'F' => Some((c - b'A' + 10) as u32),
42        _ => None,
43    }
44}
45
46/// Decimal numeral (no sign, no surrounding space).
47/// `int_ok = false` forces float results (Lua 5.1: numbers are doubles).
48/// `neg` is whether a leading '-' was stripped by the caller: it widens the
49/// integer range by one unit (PUC l_str2int's `+ neg`) so that the magnitude
50/// 2^63 parses as an integer — letting `tonumber("-9223372036854775808")`
51/// recover minint. The caller still applies the actual negation.
52pub fn dec_literal(text: &[u8], int_ok: bool, neg: bool) -> Option<Num> {
53    let mut i = 0;
54    let mut int_digits = 0;
55    while i < text.len() && text[i].is_ascii_digit() {
56        i += 1;
57        int_digits += 1;
58    }
59    let mut frac_digits = 0;
60    let mut has_dot = false;
61    if i < text.len() && text[i] == b'.' {
62        has_dot = true;
63        i += 1;
64        while i < text.len() && text[i].is_ascii_digit() {
65            i += 1;
66            frac_digits += 1;
67        }
68    }
69    if int_digits + frac_digits == 0 {
70        return None;
71    }
72    let mut has_exp = false;
73    if i < text.len() && matches!(text[i], b'e' | b'E') {
74        has_exp = true;
75        i += 1;
76        if i < text.len() && matches!(text[i], b'+' | b'-') {
77            i += 1;
78        }
79        let mut digits = 0;
80        while i < text.len() && text[i].is_ascii_digit() {
81            i += 1;
82            digits += 1;
83        }
84        if digits == 0 {
85            return None;
86        }
87    }
88    if i != text.len() {
89        return None;
90    }
91    let s = str::from_utf8(text).expect("ascii numeral");
92    if !has_dot && !has_exp && int_ok {
93        // decimal integer; accumulate the magnitude in u64 with PUC's overflow
94        // rule (l_str2int). The `+ neg` widens the last accepted digit so the
95        // magnitude 2^63 is taken as an integer when negative (== minint);
96        // on overflow it becomes a float. The caller applies the sign, so we
97        // return the wrapped magnitude (2^63 as i64 is the minint bit pattern).
98        const MAXBY10: u64 = i64::MAX as u64 / 10;
99        const MAXLAST: u64 = i64::MAX as u64 % 10;
100        let mut a: u64 = 0;
101        let mut overflow = false;
102        for &c in s.as_bytes() {
103            let d = (c - b'0') as u64;
104            if a >= MAXBY10 && (a > MAXBY10 || d > MAXLAST + neg as u64) {
105                overflow = true;
106                break;
107            }
108            a = a * 10 + d;
109        }
110        if !overflow {
111            return Some(Num::Int(a as i64));
112        }
113    }
114    s.parse::<f64>().ok().map(Num::Float)
115}
116
117/// Hex numeral after the `0x` prefix (no sign, no surrounding space).
118pub fn hex_literal(text: &[u8], int_ok: bool, float_ok: bool) -> Option<Num> {
119    let mut i = 0;
120    while i < text.len() && hex_digit(text[i]).is_some() {
121        i += 1;
122    }
123    let int_end = i;
124    let mut has_dot = false;
125    let mut frac = 0..0;
126    if i < text.len() && text[i] == b'.' {
127        has_dot = true;
128        i += 1;
129        let fs = i;
130        while i < text.len() && hex_digit(text[i]).is_some() {
131            i += 1;
132        }
133        frac = fs..i;
134    }
135    if int_end + frac.len() == 0 {
136        return None;
137    }
138    let has_exp = i < text.len() && matches!(text[i], b'p' | b'P');
139    let mut pexp: i64 = 0;
140    if has_exp {
141        i += 1;
142        let mut sign = 1i64;
143        if i < text.len() && matches!(text[i], b'+' | b'-') {
144            sign = if text[i] == b'-' { -1 } else { 1 };
145            i += 1;
146        }
147        let mut digits = 0;
148        let mut e: i64 = 0;
149        while i < text.len() && text[i].is_ascii_digit() {
150            e = (e * 10 + (text[i] - b'0') as i64).min(1 << 40);
151            i += 1;
152            digits += 1;
153        }
154        if digits == 0 {
155            return None;
156        }
157        pexp = sign * e;
158    }
159    if i != text.len() {
160        return None;
161    }
162    if !has_exp && !has_dot {
163        if int_ok {
164            // pure hex integer: wraps modulo 2^64 (5.3+ semantics)
165            let mut v: u64 = 0;
166            for &c in &text[..int_end] {
167                v = v
168                    .wrapping_mul(16)
169                    .wrapping_add(hex_digit(c).unwrap() as u64);
170            }
171            return Some(Num::Int(v as i64));
172        }
173        // ≤5.2 had no integer subtype: PUC `lua_strx2number` accumulates
174        // every hex digit in `lua_Number` (a double), so a 150-digit literal
175        // gives the actual mathematical value (~4e180) rather than the
176        // wrapped low-64 bits. math.lua 5.2 :59 bakes that exact equality.
177        let mut v: f64 = 0.0;
178        for &c in &text[..int_end] {
179            v = v * 16.0 + hex_digit(c).unwrap() as f64;
180        }
181        return Some(Num::Float(v));
182    }
183    if !float_ok {
184        return None;
185    }
186    // value = mant * 2^(4*exp4 + pexp); digits beyond 64 mantissa bits fold
187    // into the exponent (integer part) or the sticky bit (fraction part)
188    let mut mant: u64 = 0;
189    let mut sticky = false;
190    let mut exp4: i64 = 0;
191    for &c in &text[..int_end] {
192        let d = hex_digit(c).unwrap() as u64;
193        if mant >> 60 == 0 {
194            mant = mant * 16 + d;
195        } else {
196            sticky |= d != 0;
197            exp4 += 1;
198        }
199    }
200    for &c in &text[frac] {
201        let d = hex_digit(c).unwrap() as u64;
202        if mant >> 60 == 0 {
203            mant = mant * 16 + d;
204            exp4 -= 1;
205        } else {
206            sticky |= d != 0;
207        }
208    }
209    Some(Num::Float(compose_f64(mant, sticky, exp4 * 4 + pexp)))
210}
211
212/// luaO_str2num: optional surrounding whitespace and sign, decimal or hex.
213/// Used by VM string→number coercion and `tonumber`.
214pub fn str2num(s: &[u8], int_ok: bool, hex_float_ok: bool) -> Option<Num> {
215    let is_space = |c: &&u8| matches!(**c, b' ' | b'\t' | b'\n' | 0x0B | 0x0C | b'\r');
216    let mut s = s;
217    while s.first().filter(is_space).is_some() {
218        s = &s[1..];
219    }
220    while s.last().filter(is_space).is_some() {
221        s = &s[..s.len() - 1];
222    }
223    let neg = match s.first() {
224        Some(b'-') => {
225            s = &s[1..];
226            true
227        }
228        Some(b'+') => {
229            s = &s[1..];
230            false
231        }
232        _ => false,
233    };
234    let n = if s.len() > 2 && s[0] == b'0' && matches!(s[1], b'x' | b'X') {
235        hex_literal(&s[2..], int_ok, hex_float_ok)?
236    } else {
237        dec_literal(s, int_ok, neg)?
238    };
239    Some(if neg { n.negate() } else { n })
240}
241
242/// Round a 64-bit mantissa (+sticky) to f64 and scale by 2^exp.
243fn compose_f64(mant: u64, sticky: bool, exp: i64) -> f64 {
244    if mant == 0 {
245        return 0.0;
246    }
247    let bits = 64 - mant.leading_zeros() as i64;
248    let (m, extra) = if bits <= 53 {
249        (mant, 0i64)
250    } else {
251        let excess = (bits - 53) as u32;
252        let kept = mant >> excess;
253        let rem = mant & ((1u64 << excess) - 1);
254        let half = 1u64 << (excess - 1);
255        let round_up = rem > half || (rem == half && (sticky || kept & 1 == 1));
256        (kept + round_up as u64, excess as i64)
257    };
258    scale_f64(m as f64, exp + extra)
259}
260
261fn exp2(e: i64) -> f64 {
262    debug_assert!((-1022..=1023).contains(&e));
263    f64::from_bits(((e + 1023) as u64) << 52)
264}
265
266fn scale_f64(mut f: f64, mut e: i64) -> f64 {
267    while e > 1023 {
268        f *= exp2(1023);
269        e -= 1023;
270        if f.is_infinite() {
271            return f;
272        }
273    }
274    while e < -1022 {
275        f *= exp2(-1022);
276        e += 1022;
277        if f == 0.0 {
278            return f;
279        }
280    }
281    f * exp2(e)
282}
283
284/// Lua number → text. Integers print as integers. Floats print with
285/// shortest round-trip digits (the 5.5 "read back correctly" rule) in
286/// C `%g`-style presentation: scientific form when the decimal exponent
287/// falls outside [-4, 14), two-digit signed exponent, and `.0` appended to
288/// integral-looking decimals (PUC lua_number2str). Exact boundary alignment
289/// against PUC 5.5 output is rechecked by the P04 gate (strings/math suites).
290pub fn num_to_string(n: Num) -> String {
291    num_to_string_for(n, FloatFmt::TwoStage55)
292}
293
294/// Write i64 decimal into a stack buffer; returns the slice of valid
295/// bytes inside `buf`. 20 chars covers i64::MIN..=i64::MAX (the longest
296/// is "-9223372036854775808" at 20 bytes). Hot in tostring(int) on
297/// numeric-heavy workloads (string_concat builds 5000 of these): skips
298/// the String allocation that `i.to_string()` does.
299#[inline]
300pub fn write_i64_dec(i: i64, buf: &mut [u8; 20]) -> &[u8] {
301    if i == 0 {
302        buf[0] = b'0';
303        return &buf[..1];
304    }
305    let neg = i < 0;
306    // unsigned_abs handles i64::MIN safely (negation overflow case).
307    let mut n = i.unsigned_abs();
308    let mut pos = 20;
309    while n > 0 {
310        pos -= 1;
311        buf[pos] = b'0' + (n % 10) as u8;
312        n /= 10;
313    }
314    if neg {
315        pos -= 1;
316        buf[pos] = b'-';
317    }
318    &buf[pos..]
319}
320
321/// Float rendering flavor per dialect generation — each PUC line
322/// prints floats with a different `LUA_NUMBER_FMT` (v2.14 HD, pinned
323/// by the per-dialect diff corpus):
324#[derive(Clone, Copy, PartialEq, Eq, Debug)]
325pub enum FloatFmt {
326    /// ≤5.2: `%.14g`, and NO ".0" suffix (single number type —
327    /// `tostring(2.0)` is `"2"`). pm.lua :13 and 5.1/511 pin this.
328    Legacy14,
329    /// 5.3/5.4: `%.14g` + ".0" on integer-looking renderings.
330    G14,
331    /// 5.5: `%.15g` → round-trip check → `%.17g` + ".0"
332    /// (lobject.c `tostringbuffFloat`).
333    TwoStage55,
334}
335
336/// Render a number per the dialect's float flavor. Integers are
337/// flavor-independent.
338pub fn num_to_string_for(n: Num, fmt: FloatFmt) -> String {
339    match n {
340        Num::Int(i) => i.to_string(),
341        Num::Float(f) => float_to_string(f, fmt),
342    }
343}
344
345/// C `printf("%.{prec}g", f)` semantics: `prec` significant digits;
346/// scientific form when the decimal exponent is `< -4` or `>= prec`,
347/// fixed form otherwise; trailing zeros (and a bare trailing point)
348/// stripped; scientific exponent printed sign + ≥2 digits.
349fn format_g(f: f64, prec: usize) -> String {
350    debug_assert!(prec >= 1);
351    // Decimal exponent from a correctly-rounded scientific rendering at
352    // the target precision (rounding may bump the exponent: 9.99 → 1e1).
353    let sci = format!("{f:.*e}", prec - 1);
354    let epos = sci.rfind('e').expect("scientific form has exponent");
355    let exp: i32 = sci[epos + 1..].parse().expect("valid exponent");
356    if exp < -4 || exp >= prec as i32 {
357        let mut mant = sci[..epos].to_string();
358        if mant.contains('.') {
359            while mant.ends_with('0') {
360                mant.pop();
361            }
362            if mant.ends_with('.') {
363                mant.pop();
364            }
365        }
366        let (esign, eabs) = if exp < 0 { ('-', -exp) } else { ('+', exp) };
367        format!("{mant}e{esign}{eabs:02}")
368    } else {
369        let decimals = (prec as i32 - 1 - exp).max(0) as usize;
370        let mut s = format!("{f:.decimals$}");
371        if s.contains('.') {
372            while s.ends_with('0') {
373                s.pop();
374            }
375            if s.ends_with('.') {
376                s.pop();
377            }
378        }
379        s
380    }
381}
382
383fn float_to_string(f: f64, fmt: FloatFmt) -> String {
384    if f.is_nan() {
385        return "nan".to_string();
386    }
387    if f.is_infinite() {
388        return if f < 0.0 { "-inf" } else { "inf" }.to_string();
389    }
390    let mut s = match fmt {
391        // ≤5.4: plain LUA_NUMBER_FMT="%.14g" (lua 5.1.5-5.4.8).
392        FloatFmt::Legacy14 | FloatFmt::G14 => format_g(f, 14),
393        // 5.5 `tostringbuffFloat` (lobject.c): %.15g, read back, and
394        // only if the round-trip is inexact re-print with %.17g.
395        FloatFmt::TwoStage55 => {
396            let first = format_g(f, 15);
397            if first.parse::<f64>() == Ok(f) {
398                first
399            } else {
400                format_g(f, 17)
401            }
402        }
403    };
404    if s.bytes().all(|c| c.is_ascii_digit() || c == b'-') && fmt != FloatFmt::Legacy14 {
405        s.push_str(".0");
406    }
407    s
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413
414    #[test]
415    fn str2num_semantics() {
416        assert_eq!(str2num(b"  42  ", true, true), Some(Num::Int(42)));
417        assert_eq!(str2num(b"-10", true, true), Some(Num::Int(-10)));
418        assert_eq!(str2num(b"+0x10", true, true), Some(Num::Int(16)));
419        assert_eq!(str2num(b"-0x10", true, true), Some(Num::Int(-16)));
420        assert_eq!(str2num(b" 0x1p4 ", true, true), Some(Num::Float(16.0)));
421        assert_eq!(str2num(b"3.5", true, true), Some(Num::Float(3.5)));
422        assert_eq!(str2num(b"1e3", true, true), Some(Num::Float(1000.0)));
423        assert_eq!(str2num(b"", true, true), None);
424        assert_eq!(str2num(b" - 1", true, true), None);
425        assert_eq!(str2num(b"10a", true, true), None);
426        assert_eq!(str2num(b"0x", true, true), None);
427        // 5.1 flavor: everything is a float, no hex floats
428        assert_eq!(str2num(b"42", false, false), Some(Num::Float(42.0)));
429        assert_eq!(str2num(b"0x1p4", false, false), None);
430        // minint boundary: "-9223372036854775808" parses as the integer minint
431        // (PUC l_str2int's `+ neg`), but the positive magnitude 2^63 overflows
432        // to a float, and maxint stays an integer.
433        assert_eq!(
434            str2num(b"-9223372036854775808", true, true),
435            Some(Num::Int(i64::MIN))
436        );
437        assert_eq!(
438            str2num(b"9223372036854775807", true, true),
439            Some(Num::Int(i64::MAX))
440        );
441        assert_eq!(
442            str2num(b"9223372036854775808", true, true),
443            Some(Num::Float(9223372036854775808.0))
444        );
445    }
446
447    #[test]
448    fn number_printing() {
449        assert_eq!(num_to_string(Num::Int(42)), "42");
450        assert_eq!(num_to_string(Num::Int(-1)), "-1");
451        assert_eq!(num_to_string(Num::Float(2.0)), "2.0");
452        assert_eq!(num_to_string(Num::Float(-2.0)), "-2.0");
453        assert_eq!(num_to_string(Num::Float(0.5)), "0.5");
454        assert_eq!(num_to_string(Num::Float(1e300)), "1e+300");
455        assert_eq!(num_to_string(Num::Float(1e-7)), "1e-07");
456        assert_eq!(num_to_string(Num::Float(1e15)), "1e+15");
457        assert_eq!(num_to_string(Num::Float(100.0)), "100.0");
458        assert_eq!(num_to_string(Num::Float(f64::INFINITY)), "inf");
459        assert_eq!(num_to_string(Num::Float(f64::NAN)), "nan");
460        // PUC 5.5 two-stage rule: %.15g, then %.17g when the round-trip
461        // is inexact (lobject.c tostringbuffFloat). Reference spellings
462        // taken from the lua5.5 binary (fixture 226 pins the full matrix).
463        assert_eq!(num_to_string(Num::Float(0.1)), "0.1");
464        assert_eq!(num_to_string(Num::Float(1.0 / 3.0)), "0.33333333333333331");
465        assert_eq!(
466            num_to_string(Num::Float(std::f64::consts::PI)),
467            "3.1415926535897931"
468        );
469        assert_eq!(num_to_string(Num::Float(1e14)), "100000000000000.0");
470        assert_eq!(
471            num_to_string(Num::Float(9007199254740992.0)),
472            "9007199254740992.0"
473        );
474        assert_eq!(num_to_string(Num::Float(5e-324)), "4.94065645841247e-324");
475    }
476
477    #[test]
478    fn hex_float_rounding() {
479        // > 53 significant bits forces rounding; Rust's u64→f64 conversion is
480        // correctly rounded and serves as the reference
481        let Some(Num::Float(f)) = hex_literal(b"1FFFFFFFFFFFFF8.0p0", true, true) else {
482            panic!()
483        };
484        assert_eq!(f, 0x1FFFFFFFFFFFFF8u64 as f64);
485        let Some(Num::Float(g)) = hex_literal(b"1.8p1", true, true) else {
486            panic!()
487        };
488        assert_eq!(g, 3.0);
489        let Some(Num::Float(h)) = hex_literal(b"0.8", true, true) else {
490            panic!()
491        };
492        assert_eq!(h, 0.5);
493    }
494}
luna_core/numeric.rs

luna_core/
numeric.rs