1use crate::decode_windows1252;
2use std::convert::TryFrom;
3use std::error;
4use std::fmt;
5
6#[derive(Debug, Clone, PartialEq)]
8pub enum ScalarError {
9 AllDigits,
11
12 Overflow,
14
15 InvalidBool,
17
18 PrecisionLoss(f64),
20}
21
22impl fmt::Display for ScalarError {
23 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
24 match self {
25 ScalarError::AllDigits => write!(f, "did not contain all digits"),
26 ScalarError::InvalidBool => write!(f, "is not a valid bool"),
27 ScalarError::Overflow => write!(f, "caused an overflow"),
28 ScalarError::PrecisionLoss(_) => write!(f, "precision loss"),
29 }
30 }
31}
32
33impl error::Error for ScalarError {
34 fn source(&self) -> Option<&(dyn error::Error + 'static)> {
35 None
36 }
37}
38
39#[derive(PartialEq, Eq, Copy, Clone)]
54pub struct Scalar<'a> {
55 data: &'a [u8],
56}
57
58impl<'a> Scalar<'a> {
59 #[inline]
61 pub fn new(data: &'a [u8]) -> Scalar<'a> {
62 Scalar { data }
63 }
64
65 #[inline]
67 pub fn as_bytes(self) -> &'a [u8] {
68 self.data
69 }
70
71 #[inline]
88 pub fn to_f64(self) -> Result<f64, ScalarError> {
89 to_f64(self.data)
90 }
91
92 #[inline]
104 pub fn to_bool(self) -> Result<bool, ScalarError> {
105 to_bool(self.data)
106 }
107
108 #[inline]
120 pub fn to_i64(self) -> Result<i64, ScalarError> {
121 to_i64(self.data)
122 }
123
124 #[inline]
136 pub fn to_u64(self) -> Result<u64, ScalarError> {
137 to_u64(self.data)
138 }
139
140 #[inline]
152 pub fn is_ascii(self) -> bool {
153 self.data.is_ascii()
154 }
155}
156
157impl fmt::Debug for Scalar<'_> {
158 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
159 write!(f, "Scalar {{ {} }}", self)
160 }
161}
162
163impl fmt::Display for Scalar<'_> {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 if self.is_ascii() {
166 write!(f, "{}", decode_windows1252(self.data))
167 } else {
168 write!(f, "non-ascii string of {} length", self.data.len())
169 }
170 }
171}
172
173#[inline]
174fn to_bool(d: &[u8]) -> Result<bool, ScalarError> {
175 match d {
176 [b'y', b'e', b's'] => Ok(true),
177 [b'n', b'o'] => Ok(false),
178 _ => Err(ScalarError::InvalidBool),
179 }
180}
181
182const OVERFLOW_CUTOFF: usize = digits_in(u64::MAX);
183const SAFE_INTEGER: u64 = 2u64.pow(53) - 1;
184const SAFE_INTEGER_LEN: usize = digits_in(SAFE_INTEGER);
185
186#[inline]
188fn to_f64(mut d: &[u8]) -> Result<f64, ScalarError> {
189 let mut acc = 0;
190 let mut integer_part = d;
191
192 let (&c, rest) = d.split_first().ok_or(ScalarError::AllDigits)?;
193 let negative = c == b'-';
194 if negative {
195 integer_part = rest;
196 d = rest;
197 } else if c.is_ascii_digit() {
198 acc = u64::from(c - b'0');
199 d = rest;
200 } else if c == b'+' {
201 integer_part = rest;
202 d = rest;
203 } else if c != b'.' {
204 return Err(ScalarError::AllDigits);
205 }
206
207 let sign = -((negative as i64 * 2).wrapping_sub(1));
208 while let Some((&c, mut rest)) = d.split_first() {
209 if c.is_ascii_digit() {
210 acc = acc.wrapping_mul(10);
211 acc = acc.wrapping_add(u64::from(c - b'0'));
212 d = rest;
213 } else if c == b'.' {
214 let mut total = acc;
215 let mut nondigit = false;
216 if let Some((&last, fractions)) = rest.split_last() {
217 for &x in fractions {
218 nondigit |= !x.is_ascii_digit();
219 total = total.wrapping_mul(10);
220 total = total.wrapping_add(u64::from(x - b'0'));
221 }
222
223 if nondigit {
224 return Err(ScalarError::AllDigits);
225 }
226
227 if last.is_ascii_digit() {
228 total = total.wrapping_mul(10);
229 total = total.wrapping_add(u64::from(last - b'0'));
230 } else if last != b'f' {
231 return Err(ScalarError::AllDigits);
232 } else {
233 rest = &rest[..rest.len() - 1];
234 }
235 }
236
237 let fractional_digits = rest.len();
238 let whole_digits = integer_part.len() - fractional_digits - 1;
239
240 if fractional_digits + whole_digits >= OVERFLOW_CUTOFF - 1 {
241 check_overflow_init(rest, acc)?;
242 }
243
244 let pow = POWER_OF_TEN
245 .get(fractional_digits)
246 .ok_or(ScalarError::Overflow)?;
247 let d = (total as f64) / *pow;
248 return Ok((sign as f64) * d);
249 } else if c == b'f' && rest.is_empty() {
250 integer_part = &integer_part[..integer_part.len().saturating_sub(1)];
251 d = rest;
252 } else {
253 return Err(ScalarError::AllDigits);
254 }
255 }
256
257 if integer_part.len() < SAFE_INTEGER_LEN {
258 return Ok((sign * (acc as i64)) as f64);
259 }
260
261 check_precision_and_overflow(sign, acc, integer_part)
262}
263
264#[cold]
265fn check_precision_and_overflow(
266 sign: i64,
267 acc: u64,
268 integer_part: &[u8],
269) -> Result<f64, ScalarError> {
270 if integer_part.len() >= OVERFLOW_CUTOFF {
271 check_overflow(integer_part)?;
272 }
273
274 let val = i64::try_from(acc)
275 .map(|x| x * sign)
276 .map_err(|_| ScalarError::Overflow);
277
278 if acc > SAFE_INTEGER {
279 let approx = if sign == 1 { acc as f64 } else { val? as f64 };
280 return Err(ScalarError::PrecisionLoss(approx));
281 }
282
283 Ok(val? as f64)
284}
285
286#[inline]
287fn to_i64(d: &[u8]) -> Result<i64, ScalarError> {
288 let (result, left) = to_i64_t(d)?;
289 if left.is_empty() {
290 Ok(result)
291 } else {
292 Err(ScalarError::AllDigits)
293 }
294}
295
296#[inline]
297pub(crate) fn to_i64_t(d: &[u8]) -> Result<(i64, &[u8]), ScalarError> {
298 let (&c, data) = d.split_first().ok_or(ScalarError::AllDigits)?;
299 let mut sign = 1;
300
301 let start = if c.is_ascii_digit() {
302 c - b'0'
303 } else if c == b'-' {
304 sign = -1;
305 0
306 } else if c == b'+' {
307 0
308 } else {
309 return Err(ScalarError::AllDigits);
310 };
311
312 let (val, rest) = to_u64_partial(data, u64::from(start));
313 if d.len() >= OVERFLOW_CUTOFF - 1 {
314 check_overflow(d)?;
315 }
316
317 let val = i64::try_from(val)
318 .map(|x| sign * x)
319 .map_err(|_| ScalarError::Overflow)?;
320 Ok((val, rest))
321}
322
323#[inline]
330pub(crate) fn to_u64(d: &[u8]) -> Result<u64, ScalarError> {
331 let (&c, data) = d.split_first().ok_or(ScalarError::AllDigits)?;
332 let mut result = if c.is_ascii_digit() {
333 u64::from(c - b'0')
334 } else if c == b'+' {
335 0
336 } else {
337 return Err(ScalarError::AllDigits);
338 };
339
340 for &x in data {
341 if !x.is_ascii_digit() {
342 return Err(ScalarError::AllDigits);
343 }
344
345 result = result.wrapping_mul(10);
346 result = result.wrapping_add(u64::from(x - b'0'));
347 }
348
349 if d.len() >= OVERFLOW_CUTOFF - 1 {
352 check_overflow(d)?;
353 }
354
355 Ok(result)
356}
357
358#[cold]
359fn check_overflow(mut d: &[u8]) -> Result<u64, ScalarError> {
360 if d.is_empty() {
361 return Err(ScalarError::AllDigits);
362 }
363
364 if matches!(d[0], b'+' | b'-') {
365 d = &d[1..];
366 }
367
368 check_overflow_init(d, 0)
369}
370
371#[cold]
372fn check_overflow_init(d: &[u8], start: u64) -> Result<u64, ScalarError> {
373 let mut acc = start;
374 for &x in d {
375 if !x.is_ascii_digit() {
378 return Ok(acc);
379 }
380
381 acc = acc
382 .checked_mul(10)
383 .and_then(|acc| acc.checked_add(u64::from(x - b'0')))
384 .ok_or(ScalarError::Overflow)?;
385 }
386
387 Ok(acc)
388}
389
390#[inline]
391fn to_u64_partial(mut d: &[u8], start: u64) -> (u64, &[u8]) {
392 let mut result = start;
393
394 while let Some((c, rest)) = d.split_first() {
395 if !c.is_ascii_digit() {
396 return (result, d);
397 }
398
399 result = result.wrapping_mul(10);
400 result = result.wrapping_add(u64::from(c - b'0'));
401 d = rest;
402 }
403
404 (result, &[])
405}
406
407const fn digits_in(n: u64) -> usize {
408 if n == 0 { 1 } else { n.ilog10() as usize + 1 }
409}
410
411const POWER_OF_TEN: [f64; 23] = [
412 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
413 1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
414];
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419 use quickcheck_macros::quickcheck;
420
421 #[test]
422 fn test_memory_size() {
423 assert!(std::mem::size_of::<Scalar>() <= 2 * std::mem::size_of::<usize>());
425 }
426
427 #[test]
428 fn scalar_to_bool() {
429 assert_eq!((Scalar::new(b"yes").to_bool()), Ok(true));
430 assert_eq!((Scalar::new(b"no").to_bool()), Ok(false));
431 assert_eq!((Scalar::new(b"-1").to_f64()), Ok(-1.0));
432 }
433
434 #[test]
435 fn scalar_to_f64() {
436 assert_eq!((Scalar::new(b"0").to_f64()), Ok(0.0));
437 assert_eq!((Scalar::new(b"1").to_f64()), Ok(1.0));
438 assert_eq!((Scalar::new(b"-1").to_f64()), Ok(-1.0));
439 assert_eq!((Scalar::new(b"-10000").to_f64()), Ok(-10000.0));
440 assert_eq!((Scalar::new(b"10000").to_f64()), Ok(10000.0));
441 assert_eq!((Scalar::new(b"20405029").to_f64()), Ok(20405029.0));
442 assert_eq!((Scalar::new(b"-20405029").to_f64()), Ok(-20405029.0));
443 assert_eq!(
444 (Scalar::new(b"20405029553322").to_f64()),
445 Ok(20405029553322.0)
446 );
447 assert_eq!(
448 (Scalar::new(b"-20405029553322").to_f64()),
449 Ok(-20405029553322.0)
450 );
451
452 assert_eq!((Scalar::new(b"0.504").to_f64()), Ok(0.504));
453 assert_eq!((Scalar::new(b"-0.504").to_f64()), Ok(-0.504));
454 assert_eq!((Scalar::new(b".504").to_f64()), Ok(0.504));
455 assert_eq!((Scalar::new(b"-.504").to_f64()), Ok(-0.504));
456 assert_eq!((Scalar::new(b"1.00125").to_f64()), Ok(1.00125));
457 assert_eq!((Scalar::new(b"-1.50000").to_f64()), Ok(-1.5));
458 assert_eq!((Scalar::new(b"-10000.0").to_f64()), Ok(-10000.0));
459 assert_eq!((Scalar::new(b"10000.000").to_f64()), Ok(10000.0));
460 assert_eq!((Scalar::new(b"20405029.125").to_f64()), Ok(20405029.125));
461 assert_eq!((Scalar::new(b"-20405029.125").to_f64()), Ok(-20405029.125));
462 assert_eq!(
463 (Scalar::new(b"20405029553322.015").to_f64()),
464 Ok(20405029553322.015)
465 );
466 assert_eq!(
467 (Scalar::new(b"-20405029553322.015").to_f64()),
468 Ok(-20405029553322.015)
469 );
470 assert_eq!(
471 Scalar::new(b"10.99999999999999").to_f64(),
472 Ok(10.99999999999999)
473 );
474 assert_eq!((Scalar::new(b"+0.5").to_f64()), Ok(0.5));
475
476 assert!(Scalar::new(b"E").to_f64().is_err());
477 assert!(Scalar::new(b"").to_f64().is_err());
478 }
479
480 #[test]
481 fn scalar_to_f64_with_f_suffix() {
482 assert_eq!((Scalar::new(b"0.0f").to_f64()), Ok(0.0));
483 assert_eq!((Scalar::new(b"-5.5f").to_f64()), Ok(-5.5));
484 assert_eq!((Scalar::new(b"10.0f").to_f64()), Ok(10.0));
485 assert_eq!((Scalar::new(b"0.40f").to_f64()), Ok(0.4));
486 assert_eq!((Scalar::new(b"123.456f").to_f64()), Ok(123.456));
487 assert_eq!((Scalar::new(b"-0.001f").to_f64()), Ok(-0.001));
488 assert_eq!((Scalar::new(b"+42.0f").to_f64()), Ok(42.0));
489 assert_eq!((Scalar::new(b".5f").to_f64()), Ok(0.5));
490 assert_eq!((Scalar::new(b"1f").to_f64()), Ok(1.0));
491 assert_eq!((Scalar::new(b"-1f").to_f64()), Ok(-1.0));
492 assert_eq!((Scalar::new(b"10.f").to_f64()), Ok(10.0));
493
494 assert!(Scalar::new(b"f").to_f64().is_err());
495 assert!(Scalar::new(b"invalidf").to_f64().is_err());
496 assert_eq!((Scalar::new(b"0f").to_f64()), Ok(0.0));
497 }
498
499 #[test]
500 fn scalar_f64_fraction_too_long() {
501 assert!(Scalar::new(b"0.00000000000000000000000").to_f64().is_err());
502 }
503
504 #[test]
505 fn scalar_to_i64() {
506 assert_eq!((Scalar::new(b"0").to_i64()), Ok(0));
507 assert_eq!((Scalar::new(b"1").to_i64()), Ok(1));
508 assert_eq!((Scalar::new(b"-1").to_i64()), Ok(-1));
509 assert_eq!((Scalar::new(b"-10000").to_i64()), Ok(-10000));
510 assert_eq!((Scalar::new(b"10000").to_i64()), Ok(10000));
511 assert_eq!((Scalar::new(b"20405029").to_i64()), Ok(20405029));
512 assert_eq!((Scalar::new(b"-20405029").to_i64()), Ok(-20405029));
513 assert_eq!(
514 (Scalar::new(b"20405029553322").to_i64()),
515 Ok(20405029553322)
516 );
517 assert_eq!(
518 (Scalar::new(b"-20405029553322").to_i64()),
519 Ok(-20405029553322)
520 );
521
522 assert_eq!((Scalar::new(b"+0").to_i64()), Ok(0));
523 assert_eq!((Scalar::new(b"+1").to_i64()), Ok(1));
524
525 assert_eq!(
526 Scalar::new(b"9223372036854775807").to_i64(),
527 Ok(9223372036854775807)
528 );
529 assert!(Scalar::new(b"-9223372036854775809").to_i64().is_err());
530 assert!(Scalar::new(b"9223372036854775808").to_i64().is_err());
531 }
532
533 #[test]
534 fn scalar_to_u64() {
535 assert_eq!((Scalar::new(b"0").to_u64()), Ok(0));
536 assert_eq!((Scalar::new(b"1").to_u64()), Ok(1));
537 assert_eq!((Scalar::new(b"45").to_u64()), Ok(45));
538 assert_eq!((Scalar::new(b"+45").to_u64()), Ok(45));
539 assert_eq!((Scalar::new(b"10000").to_u64()), Ok(10000));
540 assert_eq!((Scalar::new(b"20405029").to_u64()), Ok(20405029));
541 assert_eq!(
542 (Scalar::new(b"20405029553322").to_u64()),
543 Ok(20405029553322)
544 );
545 assert_eq!(
546 (Scalar::new(b"+20405029553322").to_u64()),
547 Ok(20405029553322)
548 );
549 assert_eq!(
550 (Scalar::new(b"18446744073709551615").to_u64()),
551 Ok(18446744073709551615)
552 );
553 assert_eq!(
554 (Scalar::new(b"+18446744073709551615").to_u64()),
555 Ok(18446744073709551615)
556 );
557 }
558
559 #[test]
560 fn scalar_to_u64_overflow() {
561 assert!(
562 Scalar::new(b"888888888888888888888888888888888")
563 .to_u64()
564 .is_err()
565 );
566 assert!(Scalar::new(b"666666666666666685902").to_u64().is_err());
567 assert!(Scalar::new(b"184467440737095516106").to_u64().is_err());
568 }
569
570 #[test]
571 fn scalar_to_f64_overflow() {
572 assert!(
573 Scalar::new(b"9999999999.99999999999999999")
574 .to_f64()
575 .is_err()
576 );
577 assert!(
578 Scalar::new(b"999999999999999999999.999999999")
579 .to_f64()
580 .is_err()
581 );
582 assert!(Scalar::new(b"10.99999990999999999999999").to_f64().is_err());
583 }
584
585 #[test]
586 fn scalar_empty_string() {
587 let s = Scalar::new(b"");
588 assert!(s.to_bool().is_err());
589 assert!(s.to_f64().is_err());
590 assert!(s.to_i64().is_err());
591 assert!(s.to_u64().is_err());
592 }
593
594 #[test]
595 fn scalar_precision() {
596 let s = Scalar::new(b"90071992547409097");
597 assert_eq!(s.to_i64(), Ok(90071992547409097));
598 assert_eq!(s.to_u64(), Ok(90071992547409097));
599 let fl = s.to_f64().unwrap_err();
600 assert_eq!(fl, ScalarError::PrecisionLoss(90071992547409100.0));
601
602 let s = Scalar::new(b"18446744073709547616");
603 assert!(s.to_i64().is_err());
604 assert_eq!(s.to_u64(), Ok(18446744073709547616));
605 let fl = s.to_f64().unwrap_err();
606 assert_eq!(fl, ScalarError::PrecisionLoss(18446744073709548000.0));
607
608 let s = Scalar::new(b"-90071992547409097");
609 assert_eq!(s.to_i64(), Ok(-90071992547409097));
610 assert!(s.to_u64().is_err());
611 let fl = s.to_f64().unwrap_err();
612 assert_eq!(fl, ScalarError::PrecisionLoss(-90071992547409100.0));
613 }
614
615 #[quickcheck]
616 fn to_string_equality(data: Vec<u8>) -> bool {
617 use encoding_rs::*;
618 let (cow, _) = WINDOWS_1252.decode_without_bom_handling(&data);
619 let actual: String = data
620 .iter()
621 .map(|&x| crate::data::WINDOWS_1252[x as usize])
622 .collect();
623
624 cow.into_owned() == actual
625 }
626}