luna_core/
numeric.rs

1//! Lua numeral conversion core (stone candidate: pure functions, no runtime
2//! types). Two consumers: the lexer (literal tokens, shape pre-validated by
3//! scanning) and the VM/stdlib (`str2num` — luaO_str2num semantics with
4//! whitespace and sign). Versioning is expressed as capability flags so this
5//! module stays dialect-agnostic.
6
7/// Result of parsing a Lua numeric literal — either an integer or a float
8/// (Lua 5.1 collapses everything to float at this layer).
9#[derive(Clone, Copy, PartialEq, Debug)]
10pub enum Num {
11    /// Integer-typed numeral.
12    Int(i64),
13    /// Float-typed numeral.
14    Float(f64),
15}
16
17impl Num {
18    /// Lossy conversion to `f64`. Integer-to-float follows Lua's coercion
19    /// (`Int` is cast as `i as f64`).
20    pub fn as_f64(self) -> f64 {
21        match self {
22            Num::Int(i) => i as f64,
23            Num::Float(f) => f,
24        }
25    }
26
27    fn negate(self) -> Num {
28        match self {
29            Num::Int(i) => Num::Int(i.wrapping_neg()),
30            Num::Float(f) => Num::Float(-f),
31        }
32    }
33}
34
35/// Decode a single ASCII hex digit (`0-9`, `a-f`, `A-F`) into its numeric
36/// value, or return `None` for a non-hex byte.
37pub fn hex_digit(c: u8) -> Option<u32> {
38    match c {
39        b'0'..=b'9' => Some((c - b'0') as u32),
40        b'a'..=b'f' => Some((c - b'a' + 10) as u32),
41        b'A'..=b'F' => Some((c - b'A' + 10) as u32),
42        _ => None,
43    }
44}
45
46/// Decimal numeral (no sign, no surrounding space).
47/// `int_ok = false` forces float results (Lua 5.1: numbers are doubles).
48/// `neg` is whether a leading '-' was stripped by the caller: it widens the
49/// integer range by one unit (PUC l_str2int's `+ neg`) so that the magnitude
50/// 2^63 parses as an integer — letting `tonumber("-9223372036854775808")`
51/// recover minint. The caller still applies the actual negation.
52pub fn dec_literal(text: &[u8], int_ok: bool, neg: bool) -> Option<Num> {
53    let mut i = 0;
54    let mut int_digits = 0;
55    while i < text.len() && text[i].is_ascii_digit() {
56        i += 1;
57        int_digits += 1;
58    }
59    let mut frac_digits = 0;
60    let mut has_dot = false;
61    if i < text.len() && text[i] == b'.' {
62        has_dot = true;
63        i += 1;
64        while i < text.len() && text[i].is_ascii_digit() {
65            i += 1;
66            frac_digits += 1;
67        }
68    }
69    if int_digits + frac_digits == 0 {
70        return None;
71    }
72    let mut has_exp = false;
73    if i < text.len() && matches!(text[i], b'e' | b'E') {
74        has_exp = true;
75        i += 1;
76        if i < text.len() && matches!(text[i], b'+' | b'-') {
77            i += 1;
78        }
79        let mut digits = 0;
80        while i < text.len() && text[i].is_ascii_digit() {
81            i += 1;
82            digits += 1;
83        }
84        if digits == 0 {
85            return None;
86        }
87    }
88    if i != text.len() {
89        return None;
90    }
91    let s = str::from_utf8(text).expect("ascii numeral");
92    if !has_dot && !has_exp && int_ok {
93        // decimal integer; accumulate the magnitude in u64 with PUC's overflow
94        // rule (l_str2int). The `+ neg` widens the last accepted digit so the
95        // magnitude 2^63 is taken as an integer when negative (== minint);
96        // on overflow it becomes a float. The caller applies the sign, so we
97        // return the wrapped magnitude (2^63 as i64 is the minint bit pattern).
98        const MAXBY10: u64 = i64::MAX as u64 / 10;
99        const MAXLAST: u64 = i64::MAX as u64 % 10;
100        let mut a: u64 = 0;
101        let mut overflow = false;
102        for &c in s.as_bytes() {
103            let d = (c - b'0') as u64;
104            if a >= MAXBY10 && (a > MAXBY10 || d > MAXLAST + neg as u64) {
105                overflow = true;
106                break;
107            }
108            a = a * 10 + d;
109        }
110        if !overflow {
111            return Some(Num::Int(a as i64));
112        }
113    }
114    s.parse::<f64>().ok().map(Num::Float)
115}
116
117/// Hex numeral after the `0x` prefix (no sign, no surrounding space).
118pub fn hex_literal(text: &[u8], int_ok: bool, float_ok: bool) -> Option<Num> {
119    let mut i = 0;
120    while i < text.len() && hex_digit(text[i]).is_some() {
121        i += 1;
122    }
123    let int_end = i;
124    let mut has_dot = false;
125    let mut frac = 0..0;
126    if i < text.len() && text[i] == b'.' {
127        has_dot = true;
128        i += 1;
129        let fs = i;
130        while i < text.len() && hex_digit(text[i]).is_some() {
131            i += 1;
132        }
133        frac = fs..i;
134    }
135    if int_end + frac.len() == 0 {
136        return None;
137    }
138    let has_exp = i < text.len() && matches!(text[i], b'p' | b'P');
139    let mut pexp: i64 = 0;
140    if has_exp {
141        i += 1;
142        let mut sign = 1i64;
143        if i < text.len() && matches!(text[i], b'+' | b'-') {
144            sign = if text[i] == b'-' { -1 } else { 1 };
145            i += 1;
146        }
147        let mut digits = 0;
148        let mut e: i64 = 0;
149        while i < text.len() && text[i].is_ascii_digit() {
150            e = (e * 10 + (text[i] - b'0') as i64).min(1 << 40);
151            i += 1;
152            digits += 1;
153        }
154        if digits == 0 {
155            return None;
156        }
157        pexp = sign * e;
158    }
159    if i != text.len() {
160        return None;
161    }
162    if !has_exp && !has_dot {
163        if int_ok {
164            // pure hex integer: wraps modulo 2^64 (5.3+ semantics)
165            let mut v: u64 = 0;
166            for &c in &text[..int_end] {
167                v = v
168                    .wrapping_mul(16)
169                    .wrapping_add(hex_digit(c).unwrap() as u64);
170            }
171            return Some(Num::Int(v as i64));
172        }
173        // ≤5.2 had no integer subtype: PUC `lua_strx2number` accumulates
174        // every hex digit in `lua_Number` (a double), so a 150-digit literal
175        // gives the actual mathematical value (~4e180) rather than the
176        // wrapped low-64 bits. math.lua 5.2 :59 bakes that exact equality.
177        let mut v: f64 = 0.0;
178        for &c in &text[..int_end] {
179            v = v * 16.0 + hex_digit(c).unwrap() as f64;
180        }
181        return Some(Num::Float(v));
182    }
183    if !float_ok {
184        return None;
185    }
186    // value = mant * 2^(4*exp4 + pexp); digits beyond 64 mantissa bits fold
187    // into the exponent (integer part) or the sticky bit (fraction part)
188    let mut mant: u64 = 0;
189    let mut sticky = false;
190    let mut exp4: i64 = 0;
191    for &c in &text[..int_end] {
192        let d = hex_digit(c).unwrap() as u64;
193        if mant >> 60 == 0 {
194            mant = mant * 16 + d;
195        } else {
196            sticky |= d != 0;
197            exp4 += 1;
198        }
199    }
200    for &c in &text[frac] {
201        let d = hex_digit(c).unwrap() as u64;
202        if mant >> 60 == 0 {
203            mant = mant * 16 + d;
204            exp4 -= 1;
205        } else {
206            sticky |= d != 0;
207        }
208    }
209    Some(Num::Float(compose_f64(mant, sticky, exp4 * 4 + pexp)))
210}
211
212/// luaO_str2num: optional surrounding whitespace and sign, decimal or hex.
213/// Used by VM string→number coercion and `tonumber`.
214pub fn str2num(s: &[u8], int_ok: bool, hex_float_ok: bool) -> Option<Num> {
215    let is_space = |c: &&u8| matches!(**c, b' ' | b'\t' | b'\n' | 0x0B | 0x0C | b'\r');
216    let mut s = s;
217    while s.first().filter(is_space).is_some() {
218        s = &s[1..];
219    }
220    while s.last().filter(is_space).is_some() {
221        s = &s[..s.len() - 1];
222    }
223    let neg = match s.first() {
224        Some(b'-') => {
225            s = &s[1..];
226            true
227        }
228        Some(b'+') => {
229            s = &s[1..];
230            false
231        }
232        _ => false,
233    };
234    let n = if s.len() > 2 && s[0] == b'0' && matches!(s[1], b'x' | b'X') {
235        hex_literal(&s[2..], int_ok, hex_float_ok)?
236    } else {
237        dec_literal(s, int_ok, neg)?
238    };
239    Some(if neg { n.negate() } else { n })
240}
241
242/// Round a 64-bit mantissa (+sticky) to f64 and scale by 2^exp.
243fn compose_f64(mant: u64, sticky: bool, exp: i64) -> f64 {
244    if mant == 0 {
245        return 0.0;
246    }
247    let bits = 64 - mant.leading_zeros() as i64;
248    let (m, extra) = if bits <= 53 {
249        (mant, 0i64)
250    } else {
251        let excess = (bits - 53) as u32;
252        let kept = mant >> excess;
253        let rem = mant & ((1u64 << excess) - 1);
254        let half = 1u64 << (excess - 1);
255        let round_up = rem > half || (rem == half && (sticky || kept & 1 == 1));
256        (kept + round_up as u64, excess as i64)
257    };
258    scale_f64(m as f64, exp + extra)
259}
260
261fn exp2(e: i64) -> f64 {
262    debug_assert!((-1022..=1023).contains(&e));
263    f64::from_bits(((e + 1023) as u64) << 52)
264}
265
266fn scale_f64(mut f: f64, mut e: i64) -> f64 {
267    while e > 1023 {
268        f *= exp2(1023);
269        e -= 1023;
270        if f.is_infinite() {
271            return f;
272        }
273    }
274    while e < -1022 {
275        f *= exp2(-1022);
276        e += 1022;
277        if f == 0.0 {
278            return f;
279        }
280    }
281    f * exp2(e)
282}
283
284/// Lua number → text. Integers print as integers. Floats print with
285/// shortest round-trip digits (the 5.5 "read back correctly" rule) in
286/// C `%g`-style presentation: scientific form when the decimal exponent
287/// falls outside [-4, 14), two-digit signed exponent, and `.0` appended to
288/// integral-looking decimals (PUC lua_number2str). Exact boundary alignment
289/// against PUC 5.5 output is rechecked by the P04 gate (strings/math suites).
290pub fn num_to_string(n: Num) -> String {
291    num_to_string_for(n, /* legacy_float = */ false)
292}
293
294/// Write i64 decimal into a stack buffer; returns the slice of valid
295/// bytes inside `buf`. 20 chars covers i64::MIN..=i64::MAX (the longest
296/// is "-9223372036854775808" at 20 bytes). Hot in tostring(int) on
297/// numeric-heavy workloads (string_concat builds 5000 of these): skips
298/// the String allocation that `i.to_string()` does.
299#[inline]
300pub fn write_i64_dec(i: i64, buf: &mut [u8; 20]) -> &[u8] {
301    if i == 0 {
302        buf[0] = b'0';
303        return &buf[..1];
304    }
305    let neg = i < 0;
306    // unsigned_abs handles i64::MIN safely (negation overflow case).
307    let mut n = i.unsigned_abs();
308    let mut pos = 20;
309    while n > 0 {
310        pos -= 1;
311        buf[pos] = b'0' + (n % 10) as u8;
312        n /= 10;
313    }
314    if neg {
315        pos -= 1;
316        buf[pos] = b'-';
317    }
318    &buf[pos..]
319}
320
321/// Variant for ≤5.2: those dialects only had `lua_Number` (a double), so
322/// PUC's `%.14g` formatter trims any trailing `.0` (an integer-valued float
323/// renders as plain `2`, not `2.0`). 5.3+ introduced the integer subtype and
324/// the renderer started appending `.0` to distinguish floats — pm.lua's
325/// pattern transformations build `"%" .. (s+1)` and need `"%2"` on 5.1/5.2.
326pub fn num_to_string_for(n: Num, legacy_float: bool) -> String {
327    match n {
328        Num::Int(i) => i.to_string(),
329        Num::Float(f) => float_to_string(f, legacy_float),
330    }
331}
332
333/// C `printf("%.{prec}g", f)` semantics: `prec` significant digits;
334/// scientific form when the decimal exponent is `< -4` or `>= prec`,
335/// fixed form otherwise; trailing zeros (and a bare trailing point)
336/// stripped; scientific exponent printed sign + ≥2 digits.
337fn format_g(f: f64, prec: usize) -> String {
338    debug_assert!(prec >= 1);
339    // Decimal exponent from a correctly-rounded scientific rendering at
340    // the target precision (rounding may bump the exponent: 9.99 → 1e1).
341    let sci = format!("{f:.*e}", prec - 1);
342    let epos = sci.rfind('e').expect("scientific form has exponent");
343    let exp: i32 = sci[epos + 1..].parse().expect("valid exponent");
344    if exp < -4 || exp >= prec as i32 {
345        let mut mant = sci[..epos].to_string();
346        if mant.contains('.') {
347            while mant.ends_with('0') {
348                mant.pop();
349            }
350            if mant.ends_with('.') {
351                mant.pop();
352            }
353        }
354        let (esign, eabs) = if exp < 0 { ('-', -exp) } else { ('+', exp) };
355        format!("{mant}e{esign}{eabs:02}")
356    } else {
357        let decimals = (prec as i32 - 1 - exp).max(0) as usize;
358        let mut s = format!("{f:.decimals$}");
359        if s.contains('.') {
360            while s.ends_with('0') {
361                s.pop();
362            }
363            if s.ends_with('.') {
364                s.pop();
365            }
366        }
367        s
368    }
369}
370
371fn float_to_string(f: f64, legacy_float: bool) -> String {
372    if f.is_nan() {
373        return "nan".to_string();
374    }
375    if f.is_infinite() {
376        return if f < 0.0 { "-inf" } else { "inf" }.to_string();
377    }
378    // PUC 5.5 `tostringbuffFloat` (lobject.c): print with %.15g
379    // (LUA_NUMBER_FMT), read it back, and only if the round-trip is
380    // inexact re-print with %.17g (LUA_NUMBER_FMT_N). This keeps the
381    // human-friendly short forms ("0.1", "3.5") while making every
382    // rendering value-exact ("3.1415926535897931"). Applied uniformly
383    // across dialects (≤5.4 PUC used a plain %.14g; luna prefers the
384    // value-exact rendering — official ≤5.4 suites assert semantics,
385    // not stdout float spelling).
386    let mut s = format_g(f, 15);
387    if s.parse::<f64>() != Ok(f) {
388        s = format_g(f, 17);
389    }
390    if s.bytes().all(|c| c.is_ascii_digit() || c == b'-') {
391        if legacy_float { s } else { format!("{s}.0") }
392    } else {
393        s
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[test]
402    fn str2num_semantics() {
403        assert_eq!(str2num(b"  42  ", true, true), Some(Num::Int(42)));
404        assert_eq!(str2num(b"-10", true, true), Some(Num::Int(-10)));
405        assert_eq!(str2num(b"+0x10", true, true), Some(Num::Int(16)));
406        assert_eq!(str2num(b"-0x10", true, true), Some(Num::Int(-16)));
407        assert_eq!(str2num(b" 0x1p4 ", true, true), Some(Num::Float(16.0)));
408        assert_eq!(str2num(b"3.5", true, true), Some(Num::Float(3.5)));
409        assert_eq!(str2num(b"1e3", true, true), Some(Num::Float(1000.0)));
410        assert_eq!(str2num(b"", true, true), None);
411        assert_eq!(str2num(b" - 1", true, true), None);
412        assert_eq!(str2num(b"10a", true, true), None);
413        assert_eq!(str2num(b"0x", true, true), None);
414        // 5.1 flavor: everything is a float, no hex floats
415        assert_eq!(str2num(b"42", false, false), Some(Num::Float(42.0)));
416        assert_eq!(str2num(b"0x1p4", false, false), None);
417        // minint boundary: "-9223372036854775808" parses as the integer minint
418        // (PUC l_str2int's `+ neg`), but the positive magnitude 2^63 overflows
419        // to a float, and maxint stays an integer.
420        assert_eq!(
421            str2num(b"-9223372036854775808", true, true),
422            Some(Num::Int(i64::MIN))
423        );
424        assert_eq!(
425            str2num(b"9223372036854775807", true, true),
426            Some(Num::Int(i64::MAX))
427        );
428        assert_eq!(
429            str2num(b"9223372036854775808", true, true),
430            Some(Num::Float(9223372036854775808.0))
431        );
432    }
433
434    #[test]
435    fn number_printing() {
436        assert_eq!(num_to_string(Num::Int(42)), "42");
437        assert_eq!(num_to_string(Num::Int(-1)), "-1");
438        assert_eq!(num_to_string(Num::Float(2.0)), "2.0");
439        assert_eq!(num_to_string(Num::Float(-2.0)), "-2.0");
440        assert_eq!(num_to_string(Num::Float(0.5)), "0.5");
441        assert_eq!(num_to_string(Num::Float(1e300)), "1e+300");
442        assert_eq!(num_to_string(Num::Float(1e-7)), "1e-07");
443        assert_eq!(num_to_string(Num::Float(1e15)), "1e+15");
444        assert_eq!(num_to_string(Num::Float(100.0)), "100.0");
445        assert_eq!(num_to_string(Num::Float(f64::INFINITY)), "inf");
446        assert_eq!(num_to_string(Num::Float(f64::NAN)), "nan");
447        // PUC 5.5 two-stage rule: %.15g, then %.17g when the round-trip
448        // is inexact (lobject.c tostringbuffFloat). Reference spellings
449        // taken from the lua5.5 binary (fixture 226 pins the full matrix).
450        assert_eq!(num_to_string(Num::Float(0.1)), "0.1");
451        assert_eq!(num_to_string(Num::Float(1.0 / 3.0)), "0.33333333333333331");
452        assert_eq!(
453            num_to_string(Num::Float(std::f64::consts::PI)),
454            "3.1415926535897931"
455        );
456        assert_eq!(num_to_string(Num::Float(1e14)), "100000000000000.0");
457        assert_eq!(
458            num_to_string(Num::Float(9007199254740992.0)),
459            "9007199254740992.0"
460        );
461        assert_eq!(num_to_string(Num::Float(5e-324)), "4.94065645841247e-324");
462    }
463
464    #[test]
465    fn hex_float_rounding() {
466        // > 53 significant bits forces rounding; Rust's u64→f64 conversion is
467        // correctly rounded and serves as the reference
468        let Some(Num::Float(f)) = hex_literal(b"1FFFFFFFFFFFFF8.0p0", true, true) else {
469            panic!()
470        };
471        assert_eq!(f, 0x1FFFFFFFFFFFFF8u64 as f64);
472        let Some(Num::Float(g)) = hex_literal(b"1.8p1", true, true) else {
473            panic!()
474        };
475        assert_eq!(g, 3.0);
476        let Some(Num::Float(h)) = hex_literal(b"0.8", true, true) else {
477            panic!()
478        };
479        assert_eq!(h, 0.5);
480    }
481}
luna_core/numeric.rs

luna_core/
numeric.rs