Skip to main content

luna_core/
numeric.rs

1//! Lua numeral conversion core (stone candidate: pure functions, no runtime
2//! types). Two consumers: the lexer (literal tokens, shape pre-validated by
3//! scanning) and the VM/stdlib (`str2num` — luaO_str2num semantics with
4//! whitespace and sign). Versioning is expressed as capability flags so this
5//! module stays dialect-agnostic.
6
7/// Result of parsing a Lua numeric literal — either an integer or a float
8/// (Lua 5.1 collapses everything to float at this layer).
9#[derive(Clone, Copy, PartialEq, Debug)]
10pub enum Num {
11    /// Integer-typed numeral.
12    Int(i64),
13    /// Float-typed numeral.
14    Float(f64),
15}
16
17impl Num {
18    /// Lossy conversion to `f64`. Integer-to-float follows Lua's coercion
19    /// (`Int` is cast as `i as f64`).
20    pub fn as_f64(self) -> f64 {
21        match self {
22            Num::Int(i) => i as f64,
23            Num::Float(f) => f,
24        }
25    }
26
27    fn negate(self) -> Num {
28        match self {
29            Num::Int(i) => Num::Int(i.wrapping_neg()),
30            Num::Float(f) => Num::Float(-f),
31        }
32    }
33}
34
35/// Decode a single ASCII hex digit (`0-9`, `a-f`, `A-F`) into its numeric
36/// value, or return `None` for a non-hex byte.
37pub fn hex_digit(c: u8) -> Option<u32> {
38    match c {
39        b'0'..=b'9' => Some((c - b'0') as u32),
40        b'a'..=b'f' => Some((c - b'a' + 10) as u32),
41        b'A'..=b'F' => Some((c - b'A' + 10) as u32),
42        _ => None,
43    }
44}
45
46/// Decimal numeral (no sign, no surrounding space).
47/// `int_ok = false` forces float results (Lua 5.1: numbers are doubles).
48/// `neg` is whether a leading '-' was stripped by the caller: it widens the
49/// integer range by one unit (PUC l_str2int's `+ neg`) so that the magnitude
50/// 2^63 parses as an integer — letting `tonumber("-9223372036854775808")`
51/// recover minint. The caller still applies the actual negation.
52pub fn dec_literal(text: &[u8], int_ok: bool, neg: bool) -> Option<Num> {
53    let mut i = 0;
54    let mut int_digits = 0;
55    while i < text.len() && text[i].is_ascii_digit() {
56        i += 1;
57        int_digits += 1;
58    }
59    let mut frac_digits = 0;
60    let mut has_dot = false;
61    if i < text.len() && text[i] == b'.' {
62        has_dot = true;
63        i += 1;
64        while i < text.len() && text[i].is_ascii_digit() {
65            i += 1;
66            frac_digits += 1;
67        }
68    }
69    if int_digits + frac_digits == 0 {
70        return None;
71    }
72    let mut has_exp = false;
73    if i < text.len() && matches!(text[i], b'e' | b'E') {
74        has_exp = true;
75        i += 1;
76        if i < text.len() && matches!(text[i], b'+' | b'-') {
77            i += 1;
78        }
79        let mut digits = 0;
80        while i < text.len() && text[i].is_ascii_digit() {
81            i += 1;
82            digits += 1;
83        }
84        if digits == 0 {
85            return None;
86        }
87    }
88    if i != text.len() {
89        return None;
90    }
91    let s = str::from_utf8(text).expect("ascii numeral");
92    if !has_dot && !has_exp && int_ok {
93        // decimal integer; accumulate the magnitude in u64 with PUC's overflow
94        // rule (l_str2int). The `+ neg` widens the last accepted digit so the
95        // magnitude 2^63 is taken as an integer when negative (== minint);
96        // on overflow it becomes a float. The caller applies the sign, so we
97        // return the wrapped magnitude (2^63 as i64 is the minint bit pattern).
98        const MAXBY10: u64 = i64::MAX as u64 / 10;
99        const MAXLAST: u64 = i64::MAX as u64 % 10;
100        let mut a: u64 = 0;
101        let mut overflow = false;
102        for &c in s.as_bytes() {
103            let d = (c - b'0') as u64;
104            if a >= MAXBY10 && (a > MAXBY10 || d > MAXLAST + neg as u64) {
105                overflow = true;
106                break;
107            }
108            a = a * 10 + d;
109        }
110        if !overflow {
111            return Some(Num::Int(a as i64));
112        }
113    }
114    s.parse::<f64>().ok().map(Num::Float)
115}
116
117/// Hex numeral after the `0x` prefix (no sign, no surrounding space).
118pub fn hex_literal(text: &[u8], int_ok: bool, float_ok: bool) -> Option<Num> {
119    let mut i = 0;
120    while i < text.len() && hex_digit(text[i]).is_some() {
121        i += 1;
122    }
123    let int_end = i;
124    let mut has_dot = false;
125    let mut frac = 0..0;
126    if i < text.len() && text[i] == b'.' {
127        has_dot = true;
128        i += 1;
129        let fs = i;
130        while i < text.len() && hex_digit(text[i]).is_some() {
131            i += 1;
132        }
133        frac = fs..i;
134    }
135    if int_end + frac.len() == 0 {
136        return None;
137    }
138    let has_exp = i < text.len() && matches!(text[i], b'p' | b'P');
139    let mut pexp: i64 = 0;
140    if has_exp {
141        i += 1;
142        let mut sign = 1i64;
143        if i < text.len() && matches!(text[i], b'+' | b'-') {
144            sign = if text[i] == b'-' { -1 } else { 1 };
145            i += 1;
146        }
147        let mut digits = 0;
148        let mut e: i64 = 0;
149        while i < text.len() && text[i].is_ascii_digit() {
150            e = (e * 10 + (text[i] - b'0') as i64).min(1 << 40);
151            i += 1;
152            digits += 1;
153        }
154        if digits == 0 {
155            return None;
156        }
157        pexp = sign * e;
158    }
159    if i != text.len() {
160        return None;
161    }
162    if !has_exp && !has_dot {
163        if int_ok {
164            // pure hex integer: wraps modulo 2^64 (5.3+ semantics)
165            let mut v: u64 = 0;
166            for &c in &text[..int_end] {
167                v = v
168                    .wrapping_mul(16)
169                    .wrapping_add(hex_digit(c).unwrap() as u64);
170            }
171            return Some(Num::Int(v as i64));
172        }
173        // ≤5.2 had no integer subtype: PUC `lua_strx2number` accumulates
174        // every hex digit in `lua_Number` (a double), so a 150-digit literal
175        // gives the actual mathematical value (~4e180) rather than the
176        // wrapped low-64 bits. math.lua 5.2 :59 bakes that exact equality.
177        let mut v: f64 = 0.0;
178        for &c in &text[..int_end] {
179            v = v * 16.0 + hex_digit(c).unwrap() as f64;
180        }
181        return Some(Num::Float(v));
182    }
183    if !float_ok {
184        return None;
185    }
186    // value = mant * 2^(4*exp4 + pexp); digits beyond 64 mantissa bits fold
187    // into the exponent (integer part) or the sticky bit (fraction part)
188    let mut mant: u64 = 0;
189    let mut sticky = false;
190    let mut exp4: i64 = 0;
191    for &c in &text[..int_end] {
192        let d = hex_digit(c).unwrap() as u64;
193        if mant >> 60 == 0 {
194            mant = mant * 16 + d;
195        } else {
196            sticky |= d != 0;
197            exp4 += 1;
198        }
199    }
200    for &c in &text[frac] {
201        let d = hex_digit(c).unwrap() as u64;
202        if mant >> 60 == 0 {
203            mant = mant * 16 + d;
204            exp4 -= 1;
205        } else {
206            sticky |= d != 0;
207        }
208    }
209    Some(Num::Float(compose_f64(mant, sticky, exp4 * 4 + pexp)))
210}
211
212/// luaO_str2num: optional surrounding whitespace and sign, decimal or hex.
213/// Used by VM string→number coercion and `tonumber`.
214pub fn str2num(s: &[u8], int_ok: bool, hex_float_ok: bool) -> Option<Num> {
215    let is_space = |c: &&u8| matches!(**c, b' ' | b'\t' | b'\n' | 0x0B | 0x0C | b'\r');
216    let mut s = s;
217    while s.first().filter(is_space).is_some() {
218        s = &s[1..];
219    }
220    while s.last().filter(is_space).is_some() {
221        s = &s[..s.len() - 1];
222    }
223    let neg = match s.first() {
224        Some(b'-') => {
225            s = &s[1..];
226            true
227        }
228        Some(b'+') => {
229            s = &s[1..];
230            false
231        }
232        _ => false,
233    };
234    let n = if s.len() > 2 && s[0] == b'0' && matches!(s[1], b'x' | b'X') {
235        hex_literal(&s[2..], int_ok, hex_float_ok)?
236    } else {
237        dec_literal(s, int_ok, neg)?
238    };
239    Some(if neg { n.negate() } else { n })
240}
241
242/// Round a 64-bit mantissa (+sticky) to f64 and scale by 2^exp.
243fn compose_f64(mant: u64, sticky: bool, exp: i64) -> f64 {
244    if mant == 0 {
245        return 0.0;
246    }
247    let bits = 64 - mant.leading_zeros() as i64;
248    let (m, extra) = if bits <= 53 {
249        (mant, 0i64)
250    } else {
251        let excess = (bits - 53) as u32;
252        let kept = mant >> excess;
253        let rem = mant & ((1u64 << excess) - 1);
254        let half = 1u64 << (excess - 1);
255        let round_up = rem > half || (rem == half && (sticky || kept & 1 == 1));
256        (kept + round_up as u64, excess as i64)
257    };
258    scale_f64(m as f64, exp + extra)
259}
260
261fn exp2(e: i64) -> f64 {
262    debug_assert!((-1022..=1023).contains(&e));
263    f64::from_bits(((e + 1023) as u64) << 52)
264}
265
266fn scale_f64(mut f: f64, mut e: i64) -> f64 {
267    while e > 1023 {
268        f *= exp2(1023);
269        e -= 1023;
270        if f.is_infinite() {
271            return f;
272        }
273    }
274    while e < -1022 {
275        f *= exp2(-1022);
276        e += 1022;
277        if f == 0.0 {
278            return f;
279        }
280    }
281    f * exp2(e)
282}
283
284/// Lua number → text. Integers print as integers. Floats print with
285/// shortest round-trip digits (the 5.5 "read back correctly" rule) in
286/// C `%g`-style presentation: scientific form when the decimal exponent
287/// falls outside [-4, 14), two-digit signed exponent, and `.0` appended to
288/// integral-looking decimals (PUC lua_number2str). Exact boundary alignment
289/// against PUC 5.5 output is rechecked by the P04 gate (strings/math suites).
290pub fn num_to_string(n: Num) -> String {
291    num_to_string_for(n, /* legacy_float = */ false)
292}
293
294/// Write i64 decimal into a stack buffer; returns the slice of valid
295/// bytes inside `buf`. 20 chars covers i64::MIN..=i64::MAX (the longest
296/// is "-9223372036854775808" at 20 bytes). Hot in tostring(int) on
297/// numeric-heavy workloads (string_concat builds 5000 of these): skips
298/// the String allocation that `i.to_string()` does.
299#[inline]
300pub fn write_i64_dec(i: i64, buf: &mut [u8; 20]) -> &[u8] {
301    if i == 0 {
302        buf[0] = b'0';
303        return &buf[..1];
304    }
305    let neg = i < 0;
306    // unsigned_abs handles i64::MIN safely (negation overflow case).
307    let mut n = i.unsigned_abs();
308    let mut pos = 20;
309    while n > 0 {
310        pos -= 1;
311        buf[pos] = b'0' + (n % 10) as u8;
312        n /= 10;
313    }
314    if neg {
315        pos -= 1;
316        buf[pos] = b'-';
317    }
318    &buf[pos..]
319}
320
321/// Variant for ≤5.2: those dialects only had `lua_Number` (a double), so
322/// PUC's `%.14g` formatter trims any trailing `.0` (an integer-valued float
323/// renders as plain `2`, not `2.0`). 5.3+ introduced the integer subtype and
324/// the renderer started appending `.0` to distinguish floats — pm.lua's
325/// pattern transformations build `"%" .. (s+1)` and need `"%2"` on 5.1/5.2.
326pub fn num_to_string_for(n: Num, legacy_float: bool) -> String {
327    match n {
328        Num::Int(i) => i.to_string(),
329        Num::Float(f) => float_to_string(f, legacy_float),
330    }
331}
332
333fn float_to_string(f: f64, legacy_float: bool) -> String {
334    if f.is_nan() {
335        return "nan".to_string();
336    }
337    if f.is_infinite() {
338        return if f < 0.0 { "-inf" } else { "inf" }.to_string();
339    }
340    // decimal exponent from Rust's shortest scientific form "d[.ddd]e±x"
341    let sci = format!("{f:e}");
342    let epos = sci.rfind('e').expect("scientific form has exponent");
343    let exp: i32 = sci[epos + 1..].parse().expect("valid exponent");
344    if (-4..14).contains(&exp) {
345        let s = format!("{f}");
346        if s.bytes().all(|c| c.is_ascii_digit() || c == b'-') {
347            if legacy_float { s } else { format!("{s}.0") }
348        } else {
349            s
350        }
351    } else {
352        let mantissa = &sci[..epos];
353        let (esign, eabs) = if exp < 0 { ('-', -exp) } else { ('+', exp) };
354        format!("{mantissa}e{esign}{eabs:02}")
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361
362    #[test]
363    fn str2num_semantics() {
364        assert_eq!(str2num(b"  42  ", true, true), Some(Num::Int(42)));
365        assert_eq!(str2num(b"-10", true, true), Some(Num::Int(-10)));
366        assert_eq!(str2num(b"+0x10", true, true), Some(Num::Int(16)));
367        assert_eq!(str2num(b"-0x10", true, true), Some(Num::Int(-16)));
368        assert_eq!(str2num(b" 0x1p4 ", true, true), Some(Num::Float(16.0)));
369        assert_eq!(str2num(b"3.5", true, true), Some(Num::Float(3.5)));
370        assert_eq!(str2num(b"1e3", true, true), Some(Num::Float(1000.0)));
371        assert_eq!(str2num(b"", true, true), None);
372        assert_eq!(str2num(b" - 1", true, true), None);
373        assert_eq!(str2num(b"10a", true, true), None);
374        assert_eq!(str2num(b"0x", true, true), None);
375        // 5.1 flavor: everything is a float, no hex floats
376        assert_eq!(str2num(b"42", false, false), Some(Num::Float(42.0)));
377        assert_eq!(str2num(b"0x1p4", false, false), None);
378        // minint boundary: "-9223372036854775808" parses as the integer minint
379        // (PUC l_str2int's `+ neg`), but the positive magnitude 2^63 overflows
380        // to a float, and maxint stays an integer.
381        assert_eq!(
382            str2num(b"-9223372036854775808", true, true),
383            Some(Num::Int(i64::MIN))
384        );
385        assert_eq!(
386            str2num(b"9223372036854775807", true, true),
387            Some(Num::Int(i64::MAX))
388        );
389        assert_eq!(
390            str2num(b"9223372036854775808", true, true),
391            Some(Num::Float(9223372036854775808.0))
392        );
393    }
394
395    #[test]
396    fn number_printing() {
397        assert_eq!(num_to_string(Num::Int(42)), "42");
398        assert_eq!(num_to_string(Num::Int(-1)), "-1");
399        assert_eq!(num_to_string(Num::Float(2.0)), "2.0");
400        assert_eq!(num_to_string(Num::Float(-2.0)), "-2.0");
401        assert_eq!(num_to_string(Num::Float(0.5)), "0.5");
402        assert_eq!(num_to_string(Num::Float(1e300)), "1e+300");
403        assert_eq!(num_to_string(Num::Float(1e-7)), "1e-07");
404        assert_eq!(num_to_string(Num::Float(1e15)), "1e+15");
405        assert_eq!(num_to_string(Num::Float(100.0)), "100.0");
406        assert_eq!(num_to_string(Num::Float(f64::INFINITY)), "inf");
407        assert_eq!(num_to_string(Num::Float(f64::NAN)), "nan");
408        // shortest round-trip (the 5.5 printing rule)
409        assert_eq!(num_to_string(Num::Float(0.1)), "0.1");
410        assert_eq!(num_to_string(Num::Float(1.0 / 3.0)), "0.3333333333333333");
411    }
412
413    #[test]
414    fn hex_float_rounding() {
415        // > 53 significant bits forces rounding; Rust's u64→f64 conversion is
416        // correctly rounded and serves as the reference
417        let Some(Num::Float(f)) = hex_literal(b"1FFFFFFFFFFFFF8.0p0", true, true) else {
418            panic!()
419        };
420        assert_eq!(f, 0x1FFFFFFFFFFFFF8u64 as f64);
421        let Some(Num::Float(g)) = hex_literal(b"1.8p1", true, true) else {
422            panic!()
423        };
424        assert_eq!(g, 3.0);
425        let Some(Num::Float(h)) = hex_literal(b"0.8", true, true) else {
426            panic!()
427        };
428        assert_eq!(h, 0.5);
429    }
430}