1#![no_std]
2
3#[cfg(test)]
4extern crate std;
5
6mod arch;
7mod common;
8mod decimal;
9mod float;
10mod lemire;
11mod slow;
12mod table;
13
14use self::{common::BiasedFp, float::RawFloat, table::POWER_OF_FIVE_128};
15pub use crate::arch::simd_str2int;
16
17const FLOATING_LONGEST_DIGITS: usize = 17;
18const F64_BITS: u32 = 64;
19const F64_SIG_BITS: u32 = 52;
20const F64_SIG_FULL_BITS: u32 = 53;
21const F64_EXP_BIAS: i32 = 1023;
22const F64_SIG_MASK: u64 = 0x000F_FFFF_FFFF_FFFF;
23
24#[derive(Debug)]
25pub enum ParserNumber {
26 Unsigned(u64),
27 Signed(i64),
29 Float(f64),
31}
32
33#[derive(Debug)]
34pub enum Error {
35 InvalidNumber,
36 FloatMustBeFinite,
37}
38
39macro_rules! match_digit {
40 ($data:expr, $i:expr, $pattern:pat) => {
41 $i < $data.len() && matches!($data[$i], $pattern)
42 };
43}
44
45macro_rules! is_digit {
46 ($data:expr, $i:expr) => {
47 $i < $data.len() && $data[$i].is_ascii_digit()
48 };
49}
50
51macro_rules! digit {
52 ($data:expr, $i:expr) => {
53 ($data[$i] - b'0') as u64
54 };
55}
56
57macro_rules! check_digit {
58 ($data:expr, $i:expr) => {
59 if !($i < $data.len() && $data[$i].is_ascii_digit()) {
60 return Err(Error::InvalidNumber);
61 }
62 };
63}
64
65#[inline(always)]
66fn parse_exponent(data: &[u8], index: &mut usize) -> Result<i32, Error> {
67 let mut exponent: i32 = 0;
68 let mut negative = false;
69
70 if *index >= data.len() {
71 return Err(Error::InvalidNumber);
72 }
73
74 match data[*index] {
75 b'+' => *index += 1,
76 b'-' => {
77 negative = true;
78 *index += 1;
79 }
80 _ => {}
81 }
82
83 check_digit!(data, *index);
84 while exponent < 1000 && is_digit!(data, *index) {
85 exponent = digit!(data, *index) as i32 + exponent * 10;
86 *index += 1;
87 }
88 while is_digit!(data, *index) {
89 *index += 1;
90 }
91 if negative {
92 exponent = -exponent;
93 }
94 Ok(exponent)
95}
96
97const POW10_UINT: [u64; 18] = [
98 1,
99 10,
100 100,
101 1000,
102 10000,
103 100000,
104 1000000,
105 10000000,
106 100000000,
107 1000000000,
108 10000000000,
109 100000000000,
110 1000000000000,
111 10000000000000,
112 100000000000000,
113 1000000000000000,
114 10000000000000000,
115 100000000000000000,
116];
117
118#[inline(always)]
122fn parse_number_fraction(
123 data: &[u8],
124 index: &mut usize,
125 significant: &mut u64,
126 exponent: &mut i32,
127 mut need: isize,
128 dot_pos: usize,
129) -> Result<bool, Error> {
130 debug_assert!(need < FLOATING_LONGEST_DIGITS as isize);
131
132 if need > 0 {
139 if data.len() - *index >= 16 {
140 let (frac, ndigits) = unsafe { simd_str2int(&data[*index..], need as usize) };
141 *significant = *significant * POW10_UINT[ndigits] + frac;
142 *index += ndigits;
143 } else {
144 while need > 0 && is_digit!(data, *index) {
145 *significant = *significant * 10 + digit!(data, *index);
146 *index += 1;
147 need -= 1;
148 }
149 }
150 }
151
152 *exponent -= *index as i32 - dot_pos as i32;
153 let mut trunc = false;
154 while is_digit!(data, *index) {
155 trunc = true;
156 *index += 1;
157 }
158
159 if match_digit!(data, *index, b'e' | b'E') {
160 *index += 1;
161 *exponent += parse_exponent(data, &mut *index)?;
162 }
163 Ok(trunc)
164}
165
166#[inline(always)]
167pub fn parse_number(data: &[u8], index: &mut usize, negative: bool) -> Result<ParserNumber, Error> {
168 let mut significant: u64 = 0;
169 let mut exponent: i32 = 0;
170 let mut trunc = false;
171 let raw_num = &data[*index..];
172
173 if match_digit!(data, *index, b'0') {
174 *index += 1;
175
176 if *index >= data.len() || !matches!(data[*index], b'.' | b'e' | b'E') {
177 if negative {
179 return Ok(ParserNumber::Float(0.0));
180 }
181 return Ok(ParserNumber::Unsigned(0));
182 }
183
184 match data[*index] {
186 b'.' => {
187 *index += 1;
188 let dot_pos = *index;
189 check_digit!(data, *index);
190 while match_digit!(data, *index, b'0') {
191 *index += 1;
192 }
193 if match_digit!(data, *index, b'e' | b'E') {
195 *index += 1;
196 if match_digit!(data, *index, b'-' | b'+') {
197 *index += 1;
198 }
199 check_digit!(data, *index);
200 while is_digit!(data, *index) {
201 *index += 1;
202 }
203 return Ok(ParserNumber::Float(0.0));
204 }
205
206 if !is_digit!(data, *index) {
211 return Ok(ParserNumber::Float(0.0));
212 }
213
214 significant = digit!(data, *index);
215 *index += 1;
216
217 if is_digit!(data, *index) {
218 let need = FLOATING_LONGEST_DIGITS as isize - 1;
219 trunc = parse_number_fraction(
220 data,
221 index,
222 &mut significant,
223 &mut exponent,
224 need,
225 dot_pos,
226 )?;
227 } else {
228 exponent -= *index as i32 - dot_pos as i32;
229 if match_digit!(data, *index, b'e' | b'E') {
230 *index += 1;
231 exponent += parse_exponent(data, &mut *index)?;
232 }
233 }
234 }
235 b'e' | b'E' => {
236 *index += 1;
237 if match_digit!(data, *index, b'-' | b'+') {
238 *index += 1;
239 }
240 check_digit!(data, *index);
241 while is_digit!(data, *index) {
242 *index += 1;
243 }
244 return Ok(ParserNumber::Float(0.0));
245 }
246 _ => unreachable!("unreachable branch in parse_number_unchecked"),
247 }
248 } else {
249 let digit_start = *index;
251 while is_digit!(data, *index) {
252 significant = significant
255 .wrapping_mul(10)
256 .wrapping_add(digit!(data, *index));
257 *index += 1;
258 }
259 let mut digits_cnt = *index - digit_start;
260 if digits_cnt == 0 {
261 return Err(Error::InvalidNumber);
262 }
263
264 if digits_cnt > 19 {
266 *index = digit_start;
267 significant = 0;
268 digits_cnt = 0;
269 while is_digit!(data, *index) && digits_cnt < 19 {
270 significant = significant * 10 + digit!(data, *index);
271 digits_cnt += 1;
272 *index += 1;
273 }
274
275 while is_digit!(data, *index) {
277 exponent += 1;
278 *index += 1;
279 trunc = true;
280 }
281 }
282
283 if match_digit!(data, *index, b'e' | b'E') {
286 *index += 1;
288 exponent += parse_exponent(data, index)?;
289 } else if match_digit!(data, *index, b'.') {
290 *index += 1;
291 check_digit!(data, *index);
292 let dot_pos = *index;
293
294 let need = FLOATING_LONGEST_DIGITS as isize - digits_cnt as isize;
296 trunc =
297 parse_number_fraction(data, index, &mut significant, &mut exponent, need, dot_pos)?;
298 } else {
299 if exponent == 0 {
301 if negative {
302 if significant > (1u64 << 63) {
303 return Ok(ParserNumber::Float(-(significant as f64)));
304 } else {
305 return Ok(ParserNumber::Signed(0_i64.wrapping_sub(significant as i64)));
308 }
309 } else {
310 return Ok(ParserNumber::Unsigned(significant));
311 }
312 } else if exponent == 1 {
313 let last = digit!(data, *index - 1);
315 let (out, ov0) = significant.overflowing_mul(10);
316 let (out, ov1) = out.overflowing_add(last);
317 if !ov0 && !ov1 {
318 significant = out;
320 if negative {
321 return Ok(ParserNumber::Float(-(significant as f64)));
322 } else {
323 return Ok(ParserNumber::Unsigned(significant));
324 }
325 }
326 }
327 trunc = true;
328 }
329 }
330
331 parse_float(significant, exponent, negative, trunc, raw_num)
333}
334
335#[inline(always)]
336fn parse_float(
337 significant: u64,
338 exponent: i32,
339 negative: bool,
340 trunc: bool,
341 raw_num: &[u8],
342) -> Result<ParserNumber, Error> {
343 if significant >> 52 == 0 && (-22..=(22 + 15)).contains(&exponent) {
345 if let Some(mut float) = parse_float_fast(exponent, significant) {
346 if negative {
347 float = -float;
348 }
349 return Ok(ParserNumber::Float(float));
350 }
351 }
352
353 if !trunc && exponent > (-308 + 1) && exponent < (308 - 20) {
354 if let Some(raw) = parse_floating_normal_fast(exponent, significant) {
355 let mut float = f64::from_u64_bits(raw);
356 if negative {
357 float = -float;
358 }
359 return Ok(ParserNumber::Float(float));
360 }
361 }
362
363 let exponent = exponent as i64;
368 let mut fp = lemire::compute_float::<f64>(exponent, significant);
369 if trunc && fp.e >= 0 && fp != lemire::compute_float::<f64>(exponent, significant + 1) {
370 fp.e = -1;
371 }
372
373 if fp.e < 0 {
376 fp = slow::parse_long_mantissa::<f64>(raw_num);
377 }
378
379 let mut float = biased_fp_to_float::<f64>(fp);
380 if negative {
381 float = -float;
382 }
383
384 if float.is_infinite() {
386 return Err(Error::FloatMustBeFinite);
387 }
388 Ok(ParserNumber::Float(float))
389}
390
391#[inline(always)]
393fn parse_floating_normal_fast(exp10: i32, man: u64) -> Option<u64> {
394 let (mut hi, lo, hi2, add, bits);
395 let mut exp2: i32;
396 let mut exact = false;
397 let idx = exp10 + 342;
398 let sig2_ext = POWER_OF_FIVE_128[idx as usize].1;
399 let sig2 = POWER_OF_FIVE_128[idx as usize].0;
400
401 let mut lz = man.leading_zeros();
402 let sig1 = man << lz;
403 exp2 = ((217706 * exp10 - 4128768) >> 16) - lz as i32;
404
405 (lo, hi) = lemire::full_multiplication(sig1, sig2);
406
407 bits = hi & ((1u64 << (64 - 54 - 1)) - 1);
408 if bits.wrapping_sub(1) < ((1u64 << (64 - 54 - 1)) - 2) {
409 exact = true;
410 } else {
411 (_, hi2) = lemire::full_multiplication(sig1, sig2_ext);
412 add = lo.wrapping_add(hi2);
414 if add + 1 > 1u64 {
415 let carry = add < lo || add < hi2;
416 hi += carry as u64;
417 exact = true;
418 }
419 }
420
421 if exact {
422 lz = if hi < (1u64 << 63) { 1 } else { 0 };
423 hi <<= lz;
424 exp2 -= lz as i32;
425 exp2 += 64;
426
427 let round_up = (hi & (1u64 << (64 - 54))) > 0;
428 hi = hi.wrapping_add(if round_up { 1u64 << (64 - 54) } else { 0 });
429
430 if hi < (1u64 << (64 - 54)) {
431 hi = 1u64 << 63;
432 exp2 += 1;
433 }
434
435 hi >>= F64_BITS - F64_SIG_FULL_BITS;
436 exp2 += F64_BITS as i32 - F64_SIG_FULL_BITS as i32 + F64_SIG_BITS as i32;
437 exp2 += F64_EXP_BIAS;
438 let raw = ((exp2 as u64) << F64_SIG_BITS) | (hi & F64_SIG_MASK);
439 return Some(raw);
440 }
441 None
442}
443
444#[inline(always)]
445fn biased_fp_to_float<T: RawFloat>(x: BiasedFp) -> T {
447 let mut word = x.f;
448 word |= (x.e as u64) << T::MANTISSA_EXPLICIT_BITS;
449 T::from_u64_bits(word)
450}
451
452#[inline(always)]
453fn parse_float_fast(exp10: i32, significant: u64) -> Option<f64> {
454 let mut d = significant as f64;
455 if exp10 > 0 {
456 if exp10 > 22 {
457 d *= POW10_FLOAT[exp10 as usize - 22];
458 if (-1e15..=1e15).contains(&d) {
459 Some(d * POW10_FLOAT[22])
460 } else {
461 None
462 }
463 } else {
464 Some(d * POW10_FLOAT[exp10 as usize])
465 }
466 } else {
467 Some(d / POW10_FLOAT[(-exp10) as usize])
468 }
469}
470
471const POW10_FLOAT: [f64; 23] = [
472 1e-000, 1e+001,
473 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, 1e+012, 1e+013,
474 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021,
475 1e+022, ];
477
478#[cfg(test)]
479mod test {
480 use crate::{parse_number, ParserNumber};
481
482 fn test_parse_ok(input: &str, expect: f64) {
483 assert_eq!(input.parse::<f64>().unwrap(), expect);
484
485 let mut data = input.as_bytes().to_vec();
486 data.push(b' ');
487 let mut index = 0;
488 let num = parse_number(&data, &mut index, false).unwrap();
489 assert!(
490 matches!(num, ParserNumber::Float(f) if f == expect),
491 "parsed is {:?} failed num is {}",
492 num,
493 input
494 );
495 assert_eq!(data[index], b' ', "failed num is {}", input);
496 }
497
498 #[test]
499 fn test_parse_float() {
500 test_parse_ok("0.0", 0.0);
501 test_parse_ok("0.01", 0.01);
502 test_parse_ok("0.1", 0.1);
503 test_parse_ok("0.12", 0.12);
504 test_parse_ok("0.123", 0.123);
505 test_parse_ok("0.1234", 0.1234);
506 test_parse_ok("0.12345", 0.12345);
507 test_parse_ok("0.123456", 0.123456);
508 test_parse_ok("0.1234567", 0.1234567);
509 test_parse_ok("0.12345678", 0.12345678);
510 test_parse_ok("0.123456789", 0.123456789);
511 test_parse_ok("0.1234567890", 0.1234567890);
512 test_parse_ok("0.10000000149011612", 0.10000000149011612);
513 test_parse_ok("0.06411743306171047", 0.06411743306171047);
514
515 test_parse_ok("0e-1", 0e-1);
516 test_parse_ok("0e+1000000", 0e+1000000);
517 test_parse_ok("0.001e-1", 0.001e-1);
518 test_parse_ok("0.001e+123", 0.001e+123);
519 test_parse_ok(
520 "0.000000000000000000000000001e+123",
521 0.000000000000000000000000001e+123,
522 );
523
524 test_parse_ok("1.0", 1.0);
525 test_parse_ok("1350.0", 1350.0);
526 test_parse_ok("1.10000000149011612", 1.1000000014901161);
527
528 test_parse_ok("1e0", 1e0);
529 test_parse_ok("1.0e0", 1.0e0);
530 test_parse_ok("1.0e+0", 1.0e+0);
531 test_parse_ok("1.001e-123", 1.001e-123);
532 test_parse_ok("10000000149011610000.0e-123", 1.000_000_014_901_161e-104);
533 test_parse_ok(
534 "10000000149011612123.001e-123",
535 1.000_000_014_901_161_2e-104,
536 );
537 test_parse_ok("33333333333333333333", 3.333333333333333e19);
538 test_parse_ok("135e-12", 135e-12);
539
540 test_parse_ok("12448139190673828122020e-47", 1.244813919067383e-25);
542 test_parse_ok(
543 "3469446951536141862700000000000000000e-62",
544 3.469446951536142e-26,
545 );
546 }
547}