Skip to main content

hayro_syntax/object/
number.rs

1//! Numbers.
2
3use crate::math::{powi_f64, trunc_f64};
4use crate::object::macros::object;
5use crate::object::{Object, ObjectLike};
6use crate::reader::Reader;
7use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
8use crate::trivia::{is_regular_character, is_white_space_character};
9use core::fmt::Debug;
10
11#[rustfmt::skip]
12static POWERS_OF_10: [f64; 20] = [
13    1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
14    1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
15];
16
17/// A number.
18#[derive(Clone, Copy, Debug, PartialEq)]
19pub struct Number(pub(crate) InternalNumber);
20
21impl Number {
22    /// The number zero.
23    pub const ZERO: Self = Self::from_i32(0);
24    /// The number one.
25    pub const ONE: Self = Self::from_i32(1);
26
27    /// Returns the number as a f64.
28    pub fn as_f64(&self) -> f64 {
29        match self.0 {
30            InternalNumber::Real(r) => r,
31            InternalNumber::Integer(i) => i as f64,
32        }
33    }
34
35    /// Returns the number as a f32.
36    pub fn as_f32(&self) -> f32 {
37        match self.0 {
38            InternalNumber::Real(r) => r as f32,
39            InternalNumber::Integer(i) => i as f32,
40        }
41    }
42
43    /// Returns the number as an i64.
44    pub fn as_i64(&self) -> i64 {
45        match self.0 {
46            InternalNumber::Real(r) => {
47                let res = r as i64;
48
49                if !(trunc_f64(r) == r) {
50                    debug!("float {r} was truncated to {res}");
51                }
52
53                res
54            }
55            InternalNumber::Integer(i) => i,
56        }
57    }
58
59    /// Create a new `Number` from an f32 number.
60    pub const fn from_f32(num: f32) -> Self {
61        Self(InternalNumber::Real(num as f64))
62    }
63
64    /// Create a new `Number` from an i32 number.
65    pub const fn from_i32(num: i32) -> Self {
66        Self(InternalNumber::Integer(num as i64))
67    }
68}
69
70impl Skippable for Number {
71    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
72        let has_sign = r.forward_if(|b| b == b'+' || b == b'-').is_some();
73
74        // Some PDFs have weird trailing minuses, so try to accept those as well.
75        match r.peek_byte()? {
76            b'.' => {
77                r.read_byte()?;
78                // See PDFJS-9252 - treat a single . as 0.
79                r.forward_while(is_digit_or_minus);
80            }
81            b'0'..=b'9' | b'-' => {
82                r.forward_while_1(is_digit_or_minus)?;
83                if let Some(()) = r.forward_tag(b".") {
84                    r.forward_while(is_digit_or_minus);
85                }
86            }
87            // See PDFJS-bug1753983 - accept just + or - as a zero.
88            // ALso see PDFJS-bug1953099, where the sign is followed by a show
89            // text string operand, requiring us to allow '<' and '(' as well.
90            b if has_sign && (is_white_space_character(b) || matches!(b, b'(' | b'<')) => {}
91            _ => return None,
92        }
93
94        // See issue 994. Don't accept numbers that are followed by a regular character.
95        if r.peek_byte().is_some_and(is_regular_character) {
96            return None;
97        }
98
99        Some(())
100    }
101}
102
103impl Readable<'_> for Number {
104    #[inline]
105    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
106        let old_offset = r.offset();
107        read_inner(r).or_else(|| {
108            r.jump(old_offset);
109            None
110        })
111    }
112}
113
114#[inline(always)]
115fn read_inner(r: &mut Reader<'_>) -> Option<Number> {
116    let negative = match r.peek_byte()? {
117        b'-' => {
118            r.forward();
119            true
120        }
121        b'+' => {
122            r.forward();
123            false
124        }
125        _ => false,
126    };
127
128    let mut mantissa: u64 = 0;
129    let mut has_dot = false;
130    let mut decimal_shift: u32 = 0;
131    let mut has_digits = false;
132
133    loop {
134        match r.peek_byte() {
135            Some(b'0'..=b'9') => {
136                let d = r.read_byte().unwrap();
137                mantissa = mantissa
138                    // Using `saturating` would arguably be better here, but
139                    // profiling showed that it seems to be more expensive, at least
140                    // on ARM. Since such large numbers shouldn't appear anyway,
141                    // it doesn't really matter a lot what mode we use.
142                    .wrapping_mul(10)
143                    .wrapping_add((d - b'0') as u64);
144                has_digits = true;
145                if has_dot {
146                    decimal_shift += 1;
147                }
148            }
149            Some(b'.') if !has_dot => {
150                r.forward();
151                has_dot = true;
152            }
153            // Some weird PDFs have trailing minus in the fraction of number.
154            Some(b'-') if has_digits => {
155                r.forward();
156                r.forward_while(is_digit_or_minus);
157                break;
158            }
159            _ => break,
160        }
161    }
162
163    if !has_digits {
164        if negative || has_dot {
165            // Treat numbers like just `-`, `+` or `-.` as zero.
166            return Some(Number(InternalNumber::Integer(0)));
167        }
168        return None;
169    }
170
171    // See issue 994. Don't accept numbers that are followed by a regular character
172    // without any white space in-between.
173    if r.peek_byte().is_some_and(is_regular_character) {
174        return None;
175    }
176
177    if !has_dot {
178        let value = if negative {
179            (mantissa as i64).wrapping_neg()
180        } else {
181            mantissa as i64
182        };
183        Some(Number(InternalNumber::Integer(value)))
184    } else {
185        let mut value = mantissa as f64;
186
187        if decimal_shift > 0 {
188            if decimal_shift < POWERS_OF_10.len() as u32 {
189                value /= POWERS_OF_10[decimal_shift as usize];
190            } else {
191                value /= powi_f64(10.0, decimal_shift);
192            }
193        }
194
195        if negative {
196            value = -value;
197        }
198
199        Some(Number(InternalNumber::Real(value)))
200    }
201}
202
203object!(Number, Number);
204
205#[derive(Clone, Copy, Debug, PartialEq)]
206pub(crate) enum InternalNumber {
207    Real(f64),
208    Integer(i64),
209}
210
211macro_rules! int_num {
212    ($i:ident) => {
213        impl Skippable for $i {
214            fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
215                r.forward_if(|b| b == b'+' || b == b'-');
216                r.forward_while_1(is_digit)?;
217
218                // We have a float instead of an integer.
219                if r.peek_byte() == Some(b'.') {
220                    return None;
221                }
222
223                // See issue 994. Don't accept numbers that are followed by a regular character
224                // without any white space in-between.
225                if r.peek_byte().is_some_and(is_regular_character) {
226                    return None;
227                }
228
229                Some(())
230            }
231        }
232
233        impl<'a> Readable<'a> for $i {
234            fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<$i> {
235                r.read::<Number>(ctx)
236                    .map(|n| n.as_i64())
237                    .and_then(|n| n.try_into().ok())
238            }
239        }
240
241        impl TryFrom<Object<'_>> for $i {
242            type Error = ();
243
244            fn try_from(value: Object<'_>) -> core::result::Result<Self, Self::Error> {
245                match value {
246                    Object::Number(n) => n.as_i64().try_into().ok().ok_or(()),
247                    _ => Err(()),
248                }
249            }
250        }
251
252        impl<'a> ObjectLike<'a> for $i {}
253    };
254}
255
256int_num!(i32);
257int_num!(i64);
258int_num!(u32);
259int_num!(u16);
260int_num!(usize);
261int_num!(u8);
262
263impl Skippable for f32 {
264    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
265        r.skip::<Number>(is_content_stream).map(|_| {})
266    }
267}
268
269impl Readable<'_> for f32 {
270    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
271        r.read_without_context::<Number>()
272            .map(|n| n.as_f64() as Self)
273    }
274}
275
276impl TryFrom<Object<'_>> for f32 {
277    type Error = ();
278
279    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
280        match value {
281            Object::Number(n) => Ok(n.as_f64() as Self),
282            _ => Err(()),
283        }
284    }
285}
286
287impl ObjectLike<'_> for f32 {}
288
289impl Skippable for f64 {
290    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
291        r.skip::<Number>(is_content_stream).map(|_| {})
292    }
293}
294
295impl Readable<'_> for f64 {
296    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
297        r.read_without_context::<Number>().map(|n| n.as_f64())
298    }
299}
300
301impl TryFrom<Object<'_>> for f64 {
302    type Error = ();
303
304    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
305        match value {
306            Object::Number(n) => Ok(n.as_f64()),
307            _ => Err(()),
308        }
309    }
310}
311
312impl ObjectLike<'_> for f64 {}
313
314pub(crate) fn is_digit(byte: u8) -> bool {
315    byte.is_ascii_digit()
316}
317
318pub(crate) fn is_digit_or_minus(byte: u8) -> bool {
319    is_digit(byte) || byte == b'-'
320}
321
322#[cfg(test)]
323mod tests {
324    use crate::object::Number;
325    use crate::reader::Reader;
326    use crate::reader::ReaderExt;
327
328    #[test]
329    fn int_1() {
330        assert_eq!(
331            Reader::new("0".as_bytes())
332                .read_without_context::<i32>()
333                .unwrap(),
334            0
335        );
336    }
337
338    #[test]
339    fn int_3() {
340        assert_eq!(
341            Reader::new("+32".as_bytes())
342                .read_without_context::<i32>()
343                .unwrap(),
344            32
345        );
346    }
347
348    #[test]
349    fn int_4() {
350        assert_eq!(
351            Reader::new("-32".as_bytes())
352                .read_without_context::<i32>()
353                .unwrap(),
354            -32
355        );
356    }
357
358    #[test]
359    fn int_6() {
360        assert_eq!(
361            Reader::new("98349".as_bytes())
362                .read_without_context::<i32>()
363                .unwrap(),
364            98349
365        );
366    }
367
368    #[test]
369    fn int_7() {
370        assert_eq!(
371            Reader::new("003245".as_bytes())
372                .read_without_context::<i32>()
373                .unwrap(),
374            3245
375        );
376    }
377
378    #[test]
379    fn int_min_does_not_panic() {
380        assert_eq!(
381            Reader::new("-9223372036854775808".as_bytes())
382                .read_without_context::<i64>()
383                .unwrap(),
384            i64::MIN
385        );
386    }
387
388    #[test]
389    fn real_1() {
390        assert_eq!(
391            Reader::new("3".as_bytes())
392                .read_without_context::<f32>()
393                .unwrap(),
394            3.0
395        );
396    }
397
398    #[test]
399    fn real_3() {
400        assert_eq!(
401            Reader::new("+32".as_bytes())
402                .read_without_context::<f32>()
403                .unwrap(),
404            32.0
405        );
406    }
407
408    #[test]
409    fn real_4() {
410        assert_eq!(
411            Reader::new("-32".as_bytes())
412                .read_without_context::<f32>()
413                .unwrap(),
414            -32.0
415        );
416    }
417
418    #[test]
419    fn real_5() {
420        assert_eq!(
421            Reader::new("-32.01".as_bytes())
422                .read_without_context::<f32>()
423                .unwrap(),
424            -32.01
425        );
426    }
427
428    #[test]
429    fn real_6() {
430        assert_eq!(
431            Reader::new("-.345".as_bytes())
432                .read_without_context::<f32>()
433                .unwrap(),
434            -0.345
435        );
436    }
437
438    #[test]
439    fn real_7() {
440        assert_eq!(
441            Reader::new("-.00143".as_bytes())
442                .read_without_context::<f32>()
443                .unwrap(),
444            -0.00143
445        );
446    }
447
448    #[test]
449    fn real_8() {
450        assert_eq!(
451            Reader::new("-12.0013".as_bytes())
452                .read_without_context::<f32>()
453                .unwrap(),
454            -12.0013
455        );
456    }
457
458    #[test]
459    fn real_9() {
460        assert_eq!(
461            Reader::new("98349.432534".as_bytes())
462                .read_without_context::<f32>()
463                .unwrap(),
464            98_349.43
465        );
466    }
467
468    #[test]
469    fn real_10() {
470        assert_eq!(
471            Reader::new("-34534656.34".as_bytes())
472                .read_without_context::<f32>()
473                .unwrap(),
474            -34534656.34
475        );
476    }
477
478    #[test]
479    fn real_failing() {
480        assert!(
481            Reader::new("+abc".as_bytes())
482                .read_without_context::<f32>()
483                .is_none()
484        );
485    }
486
487    #[test]
488    fn number_1() {
489        assert_eq!(
490            Reader::new("+32".as_bytes())
491                .read_without_context::<Number>()
492                .unwrap()
493                .as_f64() as f32,
494            32.0
495        );
496    }
497
498    #[test]
499    fn number_2() {
500        assert_eq!(
501            Reader::new("-32.01".as_bytes())
502                .read_without_context::<Number>()
503                .unwrap()
504                .as_f64() as f32,
505            -32.01
506        );
507    }
508
509    #[test]
510    fn number_3() {
511        assert_eq!(
512            Reader::new("-.345".as_bytes())
513                .read_without_context::<Number>()
514                .unwrap()
515                .as_f64() as f32,
516            -0.345
517        );
518    }
519
520    #[test]
521    fn large_number() {
522        assert_eq!(
523            Reader::new("38359922".as_bytes())
524                .read_without_context::<Number>()
525                .unwrap()
526                .as_i64(),
527            38359922
528        );
529    }
530
531    #[test]
532    fn large_number_2() {
533        assert_eq!(
534            Reader::new("4294966260".as_bytes())
535                .read_without_context::<u32>()
536                .unwrap(),
537            4294966260
538        );
539    }
540}