Skip to main content

hayro_syntax/object/
number.rs

1//! Numbers.
2
3use crate::math::{powi_f64, trunc_f64};
4use crate::object::macros::object;
5use crate::object::{Object, ObjectLike};
6use crate::reader::Reader;
7use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
8use crate::trivia::{is_regular_character, is_white_space_character};
9use core::fmt::Debug;
10
11#[rustfmt::skip]
12static POWERS_OF_10: [f64; 20] = [
13    1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
14    1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
15];
16
17/// A number.
18#[derive(Clone, Copy, Debug, PartialEq)]
19pub struct Number(pub(crate) InternalNumber);
20
21impl Number {
22    /// The number zero.
23    pub const ZERO: Self = Self::from_i32(0);
24    /// The number one.
25    pub const ONE: Self = Self::from_i32(1);
26
27    /// Returns the number as a f64.
28    pub fn as_f64(&self) -> f64 {
29        match self.0 {
30            InternalNumber::Real(r) => r,
31            InternalNumber::Integer(i) => i as f64,
32        }
33    }
34
35    /// Returns the number as a f32.
36    pub fn as_f32(&self) -> f32 {
37        match self.0 {
38            InternalNumber::Real(r) => r as f32,
39            InternalNumber::Integer(i) => i as f32,
40        }
41    }
42
43    /// Returns the number as an i64.
44    pub fn as_i64(&self) -> i64 {
45        match self.0 {
46            InternalNumber::Real(r) => {
47                let res = r as i64;
48
49                if !(trunc_f64(r) == r) {
50                    debug!("float {r} was truncated to {res}");
51                }
52
53                res
54            }
55            InternalNumber::Integer(i) => i,
56        }
57    }
58
59    /// Create a new `Number` from an f32 number.
60    pub const fn from_f32(num: f32) -> Self {
61        Self(InternalNumber::Real(num as f64))
62    }
63
64    /// Create a new `Number` from an i32 number.
65    pub const fn from_i32(num: i32) -> Self {
66        Self(InternalNumber::Integer(num as i64))
67    }
68}
69
70impl Skippable for Number {
71    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
72        let has_sign = r.forward_if(|b| b == b'+' || b == b'-').is_some();
73
74        // Some PDFs have weird trailing minuses, so try to accept those as well.
75        match r.peek_byte()? {
76            b'.' => {
77                r.read_byte()?;
78                // See PDFJS-9252 - treat a single . as 0.
79                r.forward_while(is_digit_or_minus);
80            }
81            b'0'..=b'9' | b'-' => {
82                r.forward_while_1(is_digit_or_minus)?;
83                if let Some(()) = r.forward_tag(b".") {
84                    r.forward_while(is_digit_or_minus);
85                }
86            }
87            // See PDFJS-bug1753983 - accept just + or - as a zero.
88            // ALso see PDFJS-bug1953099, where the sign is followed by a show
89            // text string operand, requiring us to allow '<' and '(' as well.
90            b if has_sign && (is_white_space_character(b) || matches!(b, b'(' | b'<')) => {}
91            _ => return None,
92        }
93
94        // See issue 994. Don't accept numbers that are followed by a regular character.
95        if r.peek_byte().is_some_and(is_regular_character) {
96            return None;
97        }
98
99        Some(())
100    }
101}
102
103impl Readable<'_> for Number {
104    #[inline]
105    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
106        let old_offset = r.offset();
107        read_inner(r).or_else(|| {
108            r.jump(old_offset);
109            None
110        })
111    }
112}
113
114#[inline(always)]
115fn read_inner(r: &mut Reader<'_>) -> Option<Number> {
116    let negative = match r.peek_byte()? {
117        b'-' => {
118            r.forward();
119            true
120        }
121        b'+' => {
122            r.forward();
123            false
124        }
125        _ => false,
126    };
127
128    let mut mantissa: u64 = 0;
129    let mut has_dot = false;
130    let mut decimal_shift: u32 = 0;
131    let mut has_digits = false;
132
133    loop {
134        match r.peek_byte() {
135            Some(b'0'..=b'9') => {
136                let d = r.read_byte().unwrap();
137                mantissa = mantissa
138                    // Using `saturating` would arguably be better here, but
139                    // profiling showed that it seems to be more expensive, at least
140                    // on ARM. Since such large numbers shouldn't appear anyway,
141                    // it doesn't really matter a lot what mode we use.
142                    .wrapping_mul(10)
143                    .wrapping_add((d - b'0') as u64);
144                has_digits = true;
145                if has_dot {
146                    decimal_shift += 1;
147                }
148            }
149            Some(b'.') if !has_dot => {
150                r.forward();
151                has_dot = true;
152            }
153            // Some weird PDFs have trailing minus in the fraction of number.
154            Some(b'-') if has_digits => {
155                r.forward();
156                r.forward_while(is_digit_or_minus);
157                break;
158            }
159            _ => break,
160        }
161    }
162
163    if !has_digits {
164        if negative || has_dot {
165            // Treat numbers like just `-`, `+` or `-.` as zero.
166            return Some(Number(InternalNumber::Integer(0)));
167        }
168        return None;
169    }
170
171    // See issue 994. Don't accept numbers that are followed by a regular character
172    // without any white space in-between.
173    if r.peek_byte().is_some_and(is_regular_character) {
174        return None;
175    }
176
177    if !has_dot {
178        let value = if negative {
179            -(mantissa as i64)
180        } else {
181            mantissa as i64
182        };
183        Some(Number(InternalNumber::Integer(value)))
184    } else {
185        let mut value = mantissa as f64;
186
187        if decimal_shift > 0 {
188            if decimal_shift < POWERS_OF_10.len() as u32 {
189                value /= POWERS_OF_10[decimal_shift as usize];
190            } else {
191                value /= powi_f64(10.0, decimal_shift);
192            }
193        }
194
195        if negative {
196            value = -value;
197        }
198
199        Some(Number(InternalNumber::Real(value)))
200    }
201}
202
203object!(Number, Number);
204
205#[derive(Clone, Copy, Debug, PartialEq)]
206pub(crate) enum InternalNumber {
207    Real(f64),
208    Integer(i64),
209}
210
211macro_rules! int_num {
212    ($i:ident) => {
213        impl Skippable for $i {
214            fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
215                r.forward_if(|b| b == b'+' || b == b'-');
216                r.forward_while_1(is_digit)?;
217
218                // We have a float instead of an integer.
219                if r.peek_byte() == Some(b'.') {
220                    return None;
221                }
222
223                Some(())
224            }
225        }
226
227        impl<'a> Readable<'a> for $i {
228            fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<$i> {
229                r.read::<Number>(ctx)
230                    .map(|n| n.as_i64())
231                    .and_then(|n| n.try_into().ok())
232            }
233        }
234
235        impl TryFrom<Object<'_>> for $i {
236            type Error = ();
237
238            fn try_from(value: Object<'_>) -> core::result::Result<Self, Self::Error> {
239                match value {
240                    Object::Number(n) => n.as_i64().try_into().ok().ok_or(()),
241                    _ => Err(()),
242                }
243            }
244        }
245
246        impl<'a> ObjectLike<'a> for $i {}
247    };
248}
249
250int_num!(i32);
251int_num!(i64);
252int_num!(u32);
253int_num!(u16);
254int_num!(usize);
255int_num!(u8);
256
257impl Skippable for f32 {
258    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
259        r.skip::<Number>(is_content_stream).map(|_| {})
260    }
261}
262
263impl Readable<'_> for f32 {
264    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
265        r.read_without_context::<Number>()
266            .map(|n| n.as_f64() as Self)
267    }
268}
269
270impl TryFrom<Object<'_>> for f32 {
271    type Error = ();
272
273    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
274        match value {
275            Object::Number(n) => Ok(n.as_f64() as Self),
276            _ => Err(()),
277        }
278    }
279}
280
281impl ObjectLike<'_> for f32 {}
282
283impl Skippable for f64 {
284    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
285        r.skip::<Number>(is_content_stream).map(|_| {})
286    }
287}
288
289impl Readable<'_> for f64 {
290    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
291        r.read_without_context::<Number>().map(|n| n.as_f64())
292    }
293}
294
295impl TryFrom<Object<'_>> for f64 {
296    type Error = ();
297
298    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
299        match value {
300            Object::Number(n) => Ok(n.as_f64()),
301            _ => Err(()),
302        }
303    }
304}
305
306impl ObjectLike<'_> for f64 {}
307
308pub(crate) fn is_digit(byte: u8) -> bool {
309    byte.is_ascii_digit()
310}
311
312pub(crate) fn is_digit_or_minus(byte: u8) -> bool {
313    is_digit(byte) || byte == b'-'
314}
315
316#[cfg(test)]
317mod tests {
318    use crate::object::Number;
319    use crate::reader::Reader;
320    use crate::reader::ReaderExt;
321
322    #[test]
323    fn int_1() {
324        assert_eq!(
325            Reader::new("0".as_bytes())
326                .read_without_context::<i32>()
327                .unwrap(),
328            0
329        );
330    }
331
332    #[test]
333    fn int_3() {
334        assert_eq!(
335            Reader::new("+32".as_bytes())
336                .read_without_context::<i32>()
337                .unwrap(),
338            32
339        );
340    }
341
342    #[test]
343    fn int_4() {
344        assert_eq!(
345            Reader::new("-32".as_bytes())
346                .read_without_context::<i32>()
347                .unwrap(),
348            -32
349        );
350    }
351
352    #[test]
353    fn int_6() {
354        assert_eq!(
355            Reader::new("98349".as_bytes())
356                .read_without_context::<i32>()
357                .unwrap(),
358            98349
359        );
360    }
361
362    #[test]
363    fn int_7() {
364        assert_eq!(
365            Reader::new("003245".as_bytes())
366                .read_without_context::<i32>()
367                .unwrap(),
368            3245
369        );
370    }
371
372    #[test]
373    fn real_1() {
374        assert_eq!(
375            Reader::new("3".as_bytes())
376                .read_without_context::<f32>()
377                .unwrap(),
378            3.0
379        );
380    }
381
382    #[test]
383    fn real_3() {
384        assert_eq!(
385            Reader::new("+32".as_bytes())
386                .read_without_context::<f32>()
387                .unwrap(),
388            32.0
389        );
390    }
391
392    #[test]
393    fn real_4() {
394        assert_eq!(
395            Reader::new("-32".as_bytes())
396                .read_without_context::<f32>()
397                .unwrap(),
398            -32.0
399        );
400    }
401
402    #[test]
403    fn real_5() {
404        assert_eq!(
405            Reader::new("-32.01".as_bytes())
406                .read_without_context::<f32>()
407                .unwrap(),
408            -32.01
409        );
410    }
411
412    #[test]
413    fn real_6() {
414        assert_eq!(
415            Reader::new("-.345".as_bytes())
416                .read_without_context::<f32>()
417                .unwrap(),
418            -0.345
419        );
420    }
421
422    #[test]
423    fn real_7() {
424        assert_eq!(
425            Reader::new("-.00143".as_bytes())
426                .read_without_context::<f32>()
427                .unwrap(),
428            -0.00143
429        );
430    }
431
432    #[test]
433    fn real_8() {
434        assert_eq!(
435            Reader::new("-12.0013".as_bytes())
436                .read_without_context::<f32>()
437                .unwrap(),
438            -12.0013
439        );
440    }
441
442    #[test]
443    fn real_9() {
444        assert_eq!(
445            Reader::new("98349.432534".as_bytes())
446                .read_without_context::<f32>()
447                .unwrap(),
448            98_349.43
449        );
450    }
451
452    #[test]
453    fn real_10() {
454        assert_eq!(
455            Reader::new("-34534656.34".as_bytes())
456                .read_without_context::<f32>()
457                .unwrap(),
458            -34534656.34
459        );
460    }
461
462    #[test]
463    fn real_failing() {
464        assert!(
465            Reader::new("+abc".as_bytes())
466                .read_without_context::<f32>()
467                .is_none()
468        );
469    }
470
471    #[test]
472    fn number_1() {
473        assert_eq!(
474            Reader::new("+32".as_bytes())
475                .read_without_context::<Number>()
476                .unwrap()
477                .as_f64() as f32,
478            32.0
479        );
480    }
481
482    #[test]
483    fn number_2() {
484        assert_eq!(
485            Reader::new("-32.01".as_bytes())
486                .read_without_context::<Number>()
487                .unwrap()
488                .as_f64() as f32,
489            -32.01
490        );
491    }
492
493    #[test]
494    fn number_3() {
495        assert_eq!(
496            Reader::new("-.345".as_bytes())
497                .read_without_context::<Number>()
498                .unwrap()
499                .as_f64() as f32,
500            -0.345
501        );
502    }
503
504    #[test]
505    fn large_number() {
506        assert_eq!(
507            Reader::new("38359922".as_bytes())
508                .read_without_context::<Number>()
509                .unwrap()
510                .as_i64(),
511            38359922
512        );
513    }
514
515    #[test]
516    fn large_number_2() {
517        assert_eq!(
518            Reader::new("4294966260".as_bytes())
519                .read_without_context::<u32>()
520                .unwrap(),
521            4294966260
522        );
523    }
524}