jomini/
scalar.rs

1use crate::decode_windows1252;
2use std::convert::TryFrom;
3use std::error;
4use std::fmt;
5
6/// An error that can occur when converting a scalar into the requested type.
7#[derive(Debug, Clone, PartialEq)]
8pub enum ScalarError {
9    /// The scalar did not contain only numbers
10    AllDigits,
11
12    /// The scalar caused an overflow when calculating its numerical value
13    Overflow,
14
15    /// The scalar was not a recognized boolean value
16    InvalidBool,
17
18    /// The scalar would lose precision if the given float was returned
19    PrecisionLoss(f64),
20}
21
22impl fmt::Display for ScalarError {
23    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
24        match self {
25            ScalarError::AllDigits => write!(f, "did not contain all digits"),
26            ScalarError::InvalidBool => write!(f, "is not a valid bool"),
27            ScalarError::Overflow => write!(f, "caused an overflow"),
28            ScalarError::PrecisionLoss(_) => write!(f, "precision loss"),
29        }
30    }
31}
32
33impl error::Error for ScalarError {
34    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
35        None
36    }
37}
38
39/// A byte slice that represents a single value.
40///
41/// A scalars does not carry with it an encoding, so an appropriate encoder must be used
42/// if text is wished to be extracted from a scalar
43///
44/// ```
45/// use jomini::Scalar;
46///
47/// let v1 = Scalar::new(b"10");
48/// assert_eq!(v1.to_u64(), Ok(10));
49/// assert_eq!(v1.to_i64(), Ok(10));
50/// assert_eq!(v1.to_f64(), Ok(10.0));
51/// assert!(v1.to_bool().is_err());
52/// ```
53#[derive(PartialEq, Eq, Copy, Clone)]
54pub struct Scalar<'a> {
55    data: &'a [u8],
56}
57
58impl<'a> Scalar<'a> {
59    /// Create a new scalar backed by a byte slice
60    #[inline]
61    pub fn new(data: &'a [u8]) -> Scalar<'a> {
62        Scalar { data }
63    }
64
65    /// View the raw data
66    #[inline]
67    pub fn as_bytes(self) -> &'a [u8] {
68        self.data
69    }
70
71    /// Try converting the scalar to f64
72    ///
73    /// Supports optional 'f' suffix for floating point literals.
74    ///
75    /// ```
76    /// use jomini::Scalar;
77    ///
78    /// let v1 = Scalar::new(b"1.000");
79    /// assert_eq!(v1.to_f64(), Ok(1.0));
80    ///
81    /// let v2 = Scalar::new(b"-5.67821");
82    /// assert_eq!(v2.to_f64(), Ok(-5.67821));
83    ///
84    /// let v3 = Scalar::new(b"10.0f");
85    /// assert_eq!(v3.to_f64(), Ok(10.0));
86    /// ```
87    #[inline]
88    pub fn to_f64(self) -> Result<f64, ScalarError> {
89        to_f64(self.data)
90    }
91
92    /// Try converting the scalar to boolean, only "yes" and "no" can be mapped:
93    ///
94    /// ```
95    /// use jomini::Scalar;
96    ///
97    /// let v1 = Scalar::new(b"yes");
98    /// assert_eq!(v1.to_bool(), Ok(true));
99    ///
100    /// let v2 = Scalar::new(b"no");
101    /// assert_eq!(v2.to_bool(), Ok(false));
102    /// ```
103    #[inline]
104    pub fn to_bool(self) -> Result<bool, ScalarError> {
105        to_bool(self.data)
106    }
107
108    /// Try converting the scalar to i64
109    ///
110    /// ```
111    /// use jomini::Scalar;
112    ///
113    /// let v1 = Scalar::new(b"-50");
114    /// assert_eq!(v1.to_i64(), Ok(-50));
115    ///
116    /// let v2 = Scalar::new(b"120");
117    /// assert_eq!(v2.to_i64(), Ok(120));
118    /// ```
119    #[inline]
120    pub fn to_i64(self) -> Result<i64, ScalarError> {
121        to_i64(self.data)
122    }
123
124    /// Try converting the scalar to u64
125    ///
126    /// ```
127    /// use jomini::Scalar;
128    ///
129    /// let v1 = Scalar::new(b"50");
130    /// assert_eq!(v1.to_i64(), Ok(50));
131    ///
132    /// let v2 = Scalar::new(b"120");
133    /// assert_eq!(v2.to_i64(), Ok(120));
134    /// ```
135    #[inline]
136    pub fn to_u64(self) -> Result<u64, ScalarError> {
137        to_u64(self.data)
138    }
139
140    /// Returns if the scalar contains only ascii values
141    ///
142    /// ```
143    /// use jomini::Scalar;
144    ///
145    /// let v1 = Scalar::new(b"a");
146    /// assert!(v1.is_ascii());
147    ///
148    /// let v2 = Scalar::new(&[255][..]);
149    /// assert!(!v2.is_ascii());
150    /// ```
151    #[inline]
152    pub fn is_ascii(self) -> bool {
153        self.data.is_ascii()
154    }
155}
156
157impl fmt::Debug for Scalar<'_> {
158    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
159        write!(f, "Scalar {{ {} }}", self)
160    }
161}
162
163impl fmt::Display for Scalar<'_> {
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        if self.is_ascii() {
166            write!(f, "{}", decode_windows1252(self.data))
167        } else {
168            write!(f, "non-ascii string of {} length", self.data.len())
169        }
170    }
171}
172
173#[inline]
174fn to_bool(d: &[u8]) -> Result<bool, ScalarError> {
175    match d {
176        [b'y', b'e', b's'] => Ok(true),
177        [b'n', b'o'] => Ok(false),
178        _ => Err(ScalarError::InvalidBool),
179    }
180}
181
182const OVERFLOW_CUTOFF: usize = digits_in(u64::MAX);
183const SAFE_INTEGER: u64 = 2u64.pow(53) - 1;
184const SAFE_INTEGER_LEN: usize = digits_in(SAFE_INTEGER);
185
186/// Inspired by https://github.com/lemire/fast_double_parser
187#[inline]
188fn to_f64(mut d: &[u8]) -> Result<f64, ScalarError> {
189    let mut acc = 0;
190    let mut integer_part = d;
191
192    let (&c, rest) = d.split_first().ok_or(ScalarError::AllDigits)?;
193    let negative = c == b'-';
194    if negative {
195        integer_part = rest;
196        d = rest;
197    } else if c.is_ascii_digit() {
198        acc = u64::from(c - b'0');
199        d = rest;
200    } else if c == b'+' {
201        integer_part = rest;
202        d = rest;
203    } else if c != b'.' {
204        return Err(ScalarError::AllDigits);
205    }
206
207    let sign = -((negative as i64 * 2).wrapping_sub(1));
208    while let Some((&c, mut rest)) = d.split_first() {
209        if c.is_ascii_digit() {
210            acc = acc.wrapping_mul(10);
211            acc = acc.wrapping_add(u64::from(c - b'0'));
212            d = rest;
213        } else if c == b'.' {
214            let mut total = acc;
215            let mut nondigit = false;
216            if let Some((&last, fractions)) = rest.split_last() {
217                for &x in fractions {
218                    nondigit |= !x.is_ascii_digit();
219                    total = total.wrapping_mul(10);
220                    total = total.wrapping_add(u64::from(x - b'0'));
221                }
222
223                if nondigit {
224                    return Err(ScalarError::AllDigits);
225                }
226
227                if last.is_ascii_digit() {
228                    total = total.wrapping_mul(10);
229                    total = total.wrapping_add(u64::from(last - b'0'));
230                } else if last != b'f' {
231                    return Err(ScalarError::AllDigits);
232                } else {
233                    rest = &rest[..rest.len() - 1];
234                }
235            }
236
237            let fractional_digits = rest.len();
238            let whole_digits = integer_part.len() - fractional_digits - 1;
239
240            if fractional_digits + whole_digits >= OVERFLOW_CUTOFF - 1 {
241                check_overflow_init(rest, acc)?;
242            }
243
244            let pow = POWER_OF_TEN
245                .get(fractional_digits)
246                .ok_or(ScalarError::Overflow)?;
247            let d = (total as f64) / *pow;
248            return Ok((sign as f64) * d);
249        } else if c == b'f' && rest.is_empty() {
250            integer_part = &integer_part[..integer_part.len().saturating_sub(1)];
251            d = rest;
252        } else {
253            return Err(ScalarError::AllDigits);
254        }
255    }
256
257    if integer_part.len() < SAFE_INTEGER_LEN {
258        return Ok((sign * (acc as i64)) as f64);
259    }
260
261    check_precision_and_overflow(sign, acc, integer_part)
262}
263
264#[cold]
265fn check_precision_and_overflow(
266    sign: i64,
267    acc: u64,
268    integer_part: &[u8],
269) -> Result<f64, ScalarError> {
270    if integer_part.len() >= OVERFLOW_CUTOFF {
271        check_overflow(integer_part)?;
272    }
273
274    let val = i64::try_from(acc)
275        .map(|x| x * sign)
276        .map_err(|_| ScalarError::Overflow);
277
278    if acc > SAFE_INTEGER {
279        let approx = if sign == 1 { acc as f64 } else { val? as f64 };
280        return Err(ScalarError::PrecisionLoss(approx));
281    }
282
283    Ok(val? as f64)
284}
285
286#[inline]
287fn to_i64(d: &[u8]) -> Result<i64, ScalarError> {
288    let (result, left) = to_i64_t(d)?;
289    if left.is_empty() {
290        Ok(result)
291    } else {
292        Err(ScalarError::AllDigits)
293    }
294}
295
296#[inline]
297pub(crate) fn to_i64_t(d: &[u8]) -> Result<(i64, &[u8]), ScalarError> {
298    let (&c, data) = d.split_first().ok_or(ScalarError::AllDigits)?;
299    let mut sign = 1;
300
301    let start = if c.is_ascii_digit() {
302        c - b'0'
303    } else if c == b'-' {
304        sign = -1;
305        0
306    } else if c == b'+' {
307        0
308    } else {
309        return Err(ScalarError::AllDigits);
310    };
311
312    let (val, rest) = to_u64_partial(data, u64::from(start));
313    if d.len() >= OVERFLOW_CUTOFF - 1 {
314        check_overflow(d)?;
315    }
316
317    let val = i64::try_from(val)
318        .map(|x| sign * x)
319        .map_err(|_| ScalarError::Overflow)?;
320    Ok((val, rest))
321}
322
323/// Convert a buffer to an u64. This function is micro-optimized for small
324/// inputs. Previous implementations had higher throughput on larger input but
325/// couldn't parse small inputs as quickly. Micro-optimizing this function for
326/// small inputs shaved ~7% off deserializing 120MB save as 10% of all time
327/// was spent in this function. Dates are a common occurrence of numbers that
328/// are 1-4 digits in length
329#[inline]
330pub(crate) fn to_u64(d: &[u8]) -> Result<u64, ScalarError> {
331    let (&c, data) = d.split_first().ok_or(ScalarError::AllDigits)?;
332    let mut result = if c.is_ascii_digit() {
333        u64::from(c - b'0')
334    } else if c == b'+' {
335        0
336    } else {
337        return Err(ScalarError::AllDigits);
338    };
339
340    for &x in data {
341        if !x.is_ascii_digit() {
342            return Err(ScalarError::AllDigits);
343        }
344
345        result = result.wrapping_mul(10);
346        result = result.wrapping_add(u64::from(x - b'0'));
347    }
348
349    // Check for overflow. We know the overflow possibility exists only when
350    // there are at least 20 digits to match u64::MAX (184467440737095516105)
351    if d.len() >= OVERFLOW_CUTOFF - 1 {
352        check_overflow(d)?;
353    }
354
355    Ok(result)
356}
357
358#[cold]
359fn check_overflow(mut d: &[u8]) -> Result<u64, ScalarError> {
360    if d.is_empty() {
361        return Err(ScalarError::AllDigits);
362    }
363
364    if matches!(d[0], b'+' | b'-') {
365        d = &d[1..];
366    }
367
368    check_overflow_init(d, 0)
369}
370
371#[cold]
372fn check_overflow_init(d: &[u8], start: u64) -> Result<u64, ScalarError> {
373    let mut acc = start;
374    for &x in d {
375        // The input should already be validated by this point, so we just
376        // return the accumulator if we find a non-digit.
377        if !x.is_ascii_digit() {
378            return Ok(acc);
379        }
380
381        acc = acc
382            .checked_mul(10)
383            .and_then(|acc| acc.checked_add(u64::from(x - b'0')))
384            .ok_or(ScalarError::Overflow)?;
385    }
386
387    Ok(acc)
388}
389
390#[inline]
391fn to_u64_partial(mut d: &[u8], start: u64) -> (u64, &[u8]) {
392    let mut result = start;
393
394    while let Some((c, rest)) = d.split_first() {
395        if !c.is_ascii_digit() {
396            return (result, d);
397        }
398
399        result = result.wrapping_mul(10);
400        result = result.wrapping_add(u64::from(c - b'0'));
401        d = rest;
402    }
403
404    (result, &[])
405}
406
407const fn digits_in(n: u64) -> usize {
408    if n == 0 { 1 } else { n.ilog10() as usize + 1 }
409}
410
411const POWER_OF_TEN: [f64; 23] = [
412    1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
413    1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
414];
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419    use quickcheck_macros::quickcheck;
420
421    #[test]
422    fn test_memory_size() {
423        // https://users.rust-lang.org/t/guidelines-for-self-ownership-on-copy-types/61262/2
424        assert!(std::mem::size_of::<Scalar>() <= 2 * std::mem::size_of::<usize>());
425    }
426
427    #[test]
428    fn scalar_to_bool() {
429        assert_eq!((Scalar::new(b"yes").to_bool()), Ok(true));
430        assert_eq!((Scalar::new(b"no").to_bool()), Ok(false));
431        assert_eq!((Scalar::new(b"-1").to_f64()), Ok(-1.0));
432    }
433
434    #[test]
435    fn scalar_to_f64() {
436        assert_eq!((Scalar::new(b"0").to_f64()), Ok(0.0));
437        assert_eq!((Scalar::new(b"1").to_f64()), Ok(1.0));
438        assert_eq!((Scalar::new(b"-1").to_f64()), Ok(-1.0));
439        assert_eq!((Scalar::new(b"-10000").to_f64()), Ok(-10000.0));
440        assert_eq!((Scalar::new(b"10000").to_f64()), Ok(10000.0));
441        assert_eq!((Scalar::new(b"20405029").to_f64()), Ok(20405029.0));
442        assert_eq!((Scalar::new(b"-20405029").to_f64()), Ok(-20405029.0));
443        assert_eq!(
444            (Scalar::new(b"20405029553322").to_f64()),
445            Ok(20405029553322.0)
446        );
447        assert_eq!(
448            (Scalar::new(b"-20405029553322").to_f64()),
449            Ok(-20405029553322.0)
450        );
451
452        assert_eq!((Scalar::new(b"0.504").to_f64()), Ok(0.504));
453        assert_eq!((Scalar::new(b"-0.504").to_f64()), Ok(-0.504));
454        assert_eq!((Scalar::new(b".504").to_f64()), Ok(0.504));
455        assert_eq!((Scalar::new(b"-.504").to_f64()), Ok(-0.504));
456        assert_eq!((Scalar::new(b"1.00125").to_f64()), Ok(1.00125));
457        assert_eq!((Scalar::new(b"-1.50000").to_f64()), Ok(-1.5));
458        assert_eq!((Scalar::new(b"-10000.0").to_f64()), Ok(-10000.0));
459        assert_eq!((Scalar::new(b"10000.000").to_f64()), Ok(10000.0));
460        assert_eq!((Scalar::new(b"20405029.125").to_f64()), Ok(20405029.125));
461        assert_eq!((Scalar::new(b"-20405029.125").to_f64()), Ok(-20405029.125));
462        assert_eq!(
463            (Scalar::new(b"20405029553322.015").to_f64()),
464            Ok(20405029553322.015)
465        );
466        assert_eq!(
467            (Scalar::new(b"-20405029553322.015").to_f64()),
468            Ok(-20405029553322.015)
469        );
470        assert_eq!(
471            Scalar::new(b"10.99999999999999").to_f64(),
472            Ok(10.99999999999999)
473        );
474        assert_eq!((Scalar::new(b"+0.5").to_f64()), Ok(0.5));
475
476        assert!(Scalar::new(b"E").to_f64().is_err());
477        assert!(Scalar::new(b"").to_f64().is_err());
478    }
479
480    #[test]
481    fn scalar_to_f64_with_f_suffix() {
482        assert_eq!((Scalar::new(b"0.0f").to_f64()), Ok(0.0));
483        assert_eq!((Scalar::new(b"-5.5f").to_f64()), Ok(-5.5));
484        assert_eq!((Scalar::new(b"10.0f").to_f64()), Ok(10.0));
485        assert_eq!((Scalar::new(b"0.40f").to_f64()), Ok(0.4));
486        assert_eq!((Scalar::new(b"123.456f").to_f64()), Ok(123.456));
487        assert_eq!((Scalar::new(b"-0.001f").to_f64()), Ok(-0.001));
488        assert_eq!((Scalar::new(b"+42.0f").to_f64()), Ok(42.0));
489        assert_eq!((Scalar::new(b".5f").to_f64()), Ok(0.5));
490        assert_eq!((Scalar::new(b"1f").to_f64()), Ok(1.0));
491        assert_eq!((Scalar::new(b"-1f").to_f64()), Ok(-1.0));
492        assert_eq!((Scalar::new(b"10.f").to_f64()), Ok(10.0));
493
494        assert!(Scalar::new(b"f").to_f64().is_err());
495        assert!(Scalar::new(b"invalidf").to_f64().is_err());
496        assert_eq!((Scalar::new(b"0f").to_f64()), Ok(0.0));
497    }
498
499    #[test]
500    fn scalar_f64_fraction_too_long() {
501        assert!(Scalar::new(b"0.00000000000000000000000").to_f64().is_err());
502    }
503
504    #[test]
505    fn scalar_to_i64() {
506        assert_eq!((Scalar::new(b"0").to_i64()), Ok(0));
507        assert_eq!((Scalar::new(b"1").to_i64()), Ok(1));
508        assert_eq!((Scalar::new(b"-1").to_i64()), Ok(-1));
509        assert_eq!((Scalar::new(b"-10000").to_i64()), Ok(-10000));
510        assert_eq!((Scalar::new(b"10000").to_i64()), Ok(10000));
511        assert_eq!((Scalar::new(b"20405029").to_i64()), Ok(20405029));
512        assert_eq!((Scalar::new(b"-20405029").to_i64()), Ok(-20405029));
513        assert_eq!(
514            (Scalar::new(b"20405029553322").to_i64()),
515            Ok(20405029553322)
516        );
517        assert_eq!(
518            (Scalar::new(b"-20405029553322").to_i64()),
519            Ok(-20405029553322)
520        );
521
522        assert_eq!((Scalar::new(b"+0").to_i64()), Ok(0));
523        assert_eq!((Scalar::new(b"+1").to_i64()), Ok(1));
524
525        assert_eq!(
526            Scalar::new(b"9223372036854775807").to_i64(),
527            Ok(9223372036854775807)
528        );
529        assert!(Scalar::new(b"-9223372036854775809").to_i64().is_err());
530        assert!(Scalar::new(b"9223372036854775808").to_i64().is_err());
531    }
532
533    #[test]
534    fn scalar_to_u64() {
535        assert_eq!((Scalar::new(b"0").to_u64()), Ok(0));
536        assert_eq!((Scalar::new(b"1").to_u64()), Ok(1));
537        assert_eq!((Scalar::new(b"45").to_u64()), Ok(45));
538        assert_eq!((Scalar::new(b"+45").to_u64()), Ok(45));
539        assert_eq!((Scalar::new(b"10000").to_u64()), Ok(10000));
540        assert_eq!((Scalar::new(b"20405029").to_u64()), Ok(20405029));
541        assert_eq!(
542            (Scalar::new(b"20405029553322").to_u64()),
543            Ok(20405029553322)
544        );
545        assert_eq!(
546            (Scalar::new(b"+20405029553322").to_u64()),
547            Ok(20405029553322)
548        );
549        assert_eq!(
550            (Scalar::new(b"18446744073709551615").to_u64()),
551            Ok(18446744073709551615)
552        );
553        assert_eq!(
554            (Scalar::new(b"+18446744073709551615").to_u64()),
555            Ok(18446744073709551615)
556        );
557    }
558
559    #[test]
560    fn scalar_to_u64_overflow() {
561        assert!(
562            Scalar::new(b"888888888888888888888888888888888")
563                .to_u64()
564                .is_err()
565        );
566        assert!(Scalar::new(b"666666666666666685902").to_u64().is_err());
567        assert!(Scalar::new(b"184467440737095516106").to_u64().is_err());
568    }
569
570    #[test]
571    fn scalar_to_f64_overflow() {
572        assert!(
573            Scalar::new(b"9999999999.99999999999999999")
574                .to_f64()
575                .is_err()
576        );
577        assert!(
578            Scalar::new(b"999999999999999999999.999999999")
579                .to_f64()
580                .is_err()
581        );
582        assert!(Scalar::new(b"10.99999990999999999999999").to_f64().is_err());
583    }
584
585    #[test]
586    fn scalar_empty_string() {
587        let s = Scalar::new(b"");
588        assert!(s.to_bool().is_err());
589        assert!(s.to_f64().is_err());
590        assert!(s.to_i64().is_err());
591        assert!(s.to_u64().is_err());
592    }
593
594    #[test]
595    fn scalar_precision() {
596        let s = Scalar::new(b"90071992547409097");
597        assert_eq!(s.to_i64(), Ok(90071992547409097));
598        assert_eq!(s.to_u64(), Ok(90071992547409097));
599        let fl = s.to_f64().unwrap_err();
600        assert_eq!(fl, ScalarError::PrecisionLoss(90071992547409100.0));
601
602        let s = Scalar::new(b"18446744073709547616");
603        assert!(s.to_i64().is_err());
604        assert_eq!(s.to_u64(), Ok(18446744073709547616));
605        let fl = s.to_f64().unwrap_err();
606        assert_eq!(fl, ScalarError::PrecisionLoss(18446744073709548000.0));
607
608        let s = Scalar::new(b"-90071992547409097");
609        assert_eq!(s.to_i64(), Ok(-90071992547409097));
610        assert!(s.to_u64().is_err());
611        let fl = s.to_f64().unwrap_err();
612        assert_eq!(fl, ScalarError::PrecisionLoss(-90071992547409100.0));
613    }
614
615    #[quickcheck]
616    fn to_string_equality(data: Vec<u8>) -> bool {
617        use encoding_rs::*;
618        let (cow, _) = WINDOWS_1252.decode_without_bom_handling(&data);
619        let actual: String = data
620            .iter()
621            .map(|&x| crate::data::WINDOWS_1252[x as usize])
622            .collect();
623
624        cow.into_owned() == actual
625    }
626}