Skip to main content

pdf_syntax/object/
number.rs

1//! Numbers.
2
3use crate::math::{powi_f64, trunc_f64};
4use crate::object::macros::object;
5use crate::object::{Object, ObjectLike};
6use crate::reader::Reader;
7use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
8use crate::trivia::{is_regular_character, is_white_space_character};
9use core::fmt::Debug;
10use log::debug;
11
12#[rustfmt::skip]
13static POWERS_OF_10: [f64; 20] = [
14    1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
15    1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
16];
17
18/// A number.
19#[derive(Clone, Copy, Debug, PartialEq)]
20pub struct Number(pub(crate) InternalNumber);
21
22impl Number {
23    /// The number zero.
24    pub const ZERO: Self = Self::from_i32(0);
25    /// The number one.
26    pub const ONE: Self = Self::from_i32(1);
27
28    /// Returns the number as a f64.
29    pub fn as_f64(&self) -> f64 {
30        match self.0 {
31            InternalNumber::Real(r) => r,
32            InternalNumber::Integer(i) => i as f64,
33        }
34    }
35
36    /// Returns the number as a f32.
37    pub fn as_f32(&self) -> f32 {
38        match self.0 {
39            InternalNumber::Real(r) => r as f32,
40            InternalNumber::Integer(i) => i as f32,
41        }
42    }
43
44    /// Returns the number as an i64.
45    pub fn as_i64(&self) -> i64 {
46        match self.0 {
47            InternalNumber::Real(r) => {
48                let res = r as i64;
49
50                if !(trunc_f64(r) == r) {
51                    debug!("float {r} was truncated to {res}");
52                }
53
54                res
55            }
56            InternalNumber::Integer(i) => i,
57        }
58    }
59
60    /// Create a new `Number` from an f32 number.
61    pub const fn from_f32(num: f32) -> Self {
62        Self(InternalNumber::Real(num as f64))
63    }
64
65    /// Create a new `Number` from an i32 number.
66    pub const fn from_i32(num: i32) -> Self {
67        Self(InternalNumber::Integer(num as i64))
68    }
69
70    /// Returns true if this number was parsed as a real (floating-point) number.
71    ///
72    /// PDF distinguishes integers from reals: implementation limits (§6.1.12/§6.1.13)
73    /// only apply to real values, not to integers.
74    pub fn is_real(&self) -> bool {
75        matches!(self.0, InternalNumber::Real(_))
76    }
77}
78
79impl Skippable for Number {
80    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
81        let has_sign = r.forward_if(|b| b == b'+' || b == b'-').is_some();
82
83        // Some PDFs have weird trailing minuses, so try to accept those as well.
84        match r.peek_byte()? {
85            b'.' => {
86                r.read_byte()?;
87                // See PDFJS-9252 - treat a single . as 0.
88                r.forward_while(is_digit_or_minus);
89            }
90            b'0'..=b'9' | b'-' => {
91                r.forward_while_1(is_digit_or_minus)?;
92                if let Some(()) = r.forward_tag(b".") {
93                    r.forward_while(is_digit_or_minus);
94                }
95            }
96            // See PDFJS-bug1753983 - accept just + or - as a zero.
97            // ALso see PDFJS-bug1953099, where the sign is followed by a show
98            // text string operand, requiring us to allow '<' and '(' as well.
99            b if has_sign && (is_white_space_character(b) || matches!(b, b'(' | b'<')) => {}
100            _ => return None,
101        }
102
103        // See issue 994. Don't accept numbers that are followed by a regular character.
104        if r.peek_byte().is_some_and(is_regular_character) {
105            return None;
106        }
107
108        Some(())
109    }
110}
111
112impl Readable<'_> for Number {
113    #[inline]
114    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
115        let old_offset = r.offset();
116        read_inner(r).or_else(|| {
117            r.jump(old_offset);
118            None
119        })
120    }
121}
122
123#[inline(always)]
124fn read_inner(r: &mut Reader<'_>) -> Option<Number> {
125    let negative = match r.peek_byte()? {
126        b'-' => {
127            r.forward();
128            true
129        }
130        b'+' => {
131            r.forward();
132            false
133        }
134        _ => false,
135    };
136
137    let mut mantissa: u64 = 0;
138    let mut has_dot = false;
139    let mut decimal_shift: u32 = 0;
140    let mut has_digits = false;
141
142    loop {
143        match r.peek_byte() {
144            Some(b'0'..=b'9') => {
145                let d = r.read_byte().expect("peek_byte returned Some");
146                mantissa = mantissa
147                    // Using `saturating` would arguably be better here, but
148                    // profiling showed that it seems to be more expensive, at least
149                    // on ARM. Since such large numbers shouldn't appear anyway,
150                    // it doesn't really matter a lot what mode we use.
151                    .wrapping_mul(10)
152                    .wrapping_add((d - b'0') as u64);
153                has_digits = true;
154                if has_dot {
155                    decimal_shift += 1;
156                }
157            }
158            Some(b'.') if !has_dot => {
159                r.forward();
160                has_dot = true;
161            }
162            // Some weird PDFs have trailing minus in the fraction of number.
163            Some(b'-') if has_digits => {
164                r.forward();
165                r.forward_while(is_digit_or_minus);
166                break;
167            }
168            _ => break,
169        }
170    }
171
172    if !has_digits {
173        if negative || has_dot {
174            // Treat numbers like just `-`, `+` or `-.` as zero.
175            return Some(Number(InternalNumber::Integer(0)));
176        }
177        return None;
178    }
179
180    // See issue 994. Don't accept numbers that are followed by a regular character
181    // without any white space in-between.
182    if r.peek_byte().is_some_and(is_regular_character) {
183        return None;
184    }
185
186    if !has_dot {
187        let value = if negative {
188            -(mantissa as i64)
189        } else {
190            mantissa as i64
191        };
192        Some(Number(InternalNumber::Integer(value)))
193    } else {
194        let mut value = mantissa as f64;
195
196        if decimal_shift > 0 {
197            if decimal_shift < POWERS_OF_10.len() as u32 {
198                value /= POWERS_OF_10[decimal_shift as usize];
199            } else {
200                value /= powi_f64(10.0, decimal_shift);
201            }
202        }
203
204        if negative {
205            value = -value;
206        }
207
208        Some(Number(InternalNumber::Real(value)))
209    }
210}
211
212object!(Number, Number);
213
214#[derive(Clone, Copy, Debug, PartialEq)]
215pub(crate) enum InternalNumber {
216    Real(f64),
217    Integer(i64),
218}
219
220macro_rules! int_num {
221    ($i:ident) => {
222        impl Skippable for $i {
223            fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
224                r.forward_if(|b| b == b'+' || b == b'-');
225                r.forward_while_1(is_digit)?;
226
227                // We have a float instead of an integer.
228                if r.peek_byte() == Some(b'.') {
229                    return None;
230                }
231
232                Some(())
233            }
234        }
235
236        impl<'a> Readable<'a> for $i {
237            fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<$i> {
238                r.read::<Number>(ctx)
239                    .map(|n| n.as_i64())
240                    .and_then(|n| n.try_into().ok())
241            }
242        }
243
244        impl TryFrom<Object<'_>> for $i {
245            type Error = ();
246
247            fn try_from(value: Object<'_>) -> core::result::Result<Self, Self::Error> {
248                match value {
249                    Object::Number(n) => n.as_i64().try_into().ok().ok_or(()),
250                    _ => Err(()),
251                }
252            }
253        }
254
255        impl<'a> ObjectLike<'a> for $i {}
256    };
257}
258
259int_num!(i32);
260int_num!(i64);
261int_num!(u32);
262int_num!(u16);
263int_num!(usize);
264int_num!(u8);
265
266impl Skippable for f32 {
267    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
268        r.skip::<Number>(is_content_stream).map(|_| {})
269    }
270}
271
272impl Readable<'_> for f32 {
273    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
274        r.read_without_context::<Number>()
275            .map(|n| n.as_f64() as Self)
276    }
277}
278
279impl TryFrom<Object<'_>> for f32 {
280    type Error = ();
281
282    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
283        match value {
284            Object::Number(n) => Ok(n.as_f64() as Self),
285            _ => Err(()),
286        }
287    }
288}
289
290impl ObjectLike<'_> for f32 {}
291
292impl Skippable for f64 {
293    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
294        r.skip::<Number>(is_content_stream).map(|_| {})
295    }
296}
297
298impl Readable<'_> for f64 {
299    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
300        r.read_without_context::<Number>().map(|n| n.as_f64())
301    }
302}
303
304impl TryFrom<Object<'_>> for f64 {
305    type Error = ();
306
307    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
308        match value {
309            Object::Number(n) => Ok(n.as_f64()),
310            _ => Err(()),
311        }
312    }
313}
314
315impl ObjectLike<'_> for f64 {}
316
317pub(crate) fn is_digit(byte: u8) -> bool {
318    byte.is_ascii_digit()
319}
320
321pub(crate) fn is_digit_or_minus(byte: u8) -> bool {
322    is_digit(byte) || byte == b'-'
323}
324
325#[cfg(test)]
326mod tests {
327    use crate::object::Number;
328    use crate::reader::Reader;
329    use crate::reader::ReaderExt;
330
331    #[test]
332    fn int_1() {
333        assert_eq!(
334            Reader::new("0".as_bytes())
335                .read_without_context::<i32>()
336                .unwrap(),
337            0
338        );
339    }
340
341    #[test]
342    fn int_3() {
343        assert_eq!(
344            Reader::new("+32".as_bytes())
345                .read_without_context::<i32>()
346                .unwrap(),
347            32
348        );
349    }
350
351    #[test]
352    fn int_4() {
353        assert_eq!(
354            Reader::new("-32".as_bytes())
355                .read_without_context::<i32>()
356                .unwrap(),
357            -32
358        );
359    }
360
361    #[test]
362    fn int_6() {
363        assert_eq!(
364            Reader::new("98349".as_bytes())
365                .read_without_context::<i32>()
366                .unwrap(),
367            98349
368        );
369    }
370
371    #[test]
372    fn int_7() {
373        assert_eq!(
374            Reader::new("003245".as_bytes())
375                .read_without_context::<i32>()
376                .unwrap(),
377            3245
378        );
379    }
380
381    #[test]
382    fn real_1() {
383        assert_eq!(
384            Reader::new("3".as_bytes())
385                .read_without_context::<f32>()
386                .unwrap(),
387            3.0
388        );
389    }
390
391    #[test]
392    fn real_3() {
393        assert_eq!(
394            Reader::new("+32".as_bytes())
395                .read_without_context::<f32>()
396                .unwrap(),
397            32.0
398        );
399    }
400
401    #[test]
402    fn real_4() {
403        assert_eq!(
404            Reader::new("-32".as_bytes())
405                .read_without_context::<f32>()
406                .unwrap(),
407            -32.0
408        );
409    }
410
411    #[test]
412    fn real_5() {
413        assert_eq!(
414            Reader::new("-32.01".as_bytes())
415                .read_without_context::<f32>()
416                .unwrap(),
417            -32.01
418        );
419    }
420
421    #[test]
422    fn real_6() {
423        assert_eq!(
424            Reader::new("-.345".as_bytes())
425                .read_without_context::<f32>()
426                .unwrap(),
427            -0.345
428        );
429    }
430
431    #[test]
432    fn real_7() {
433        assert_eq!(
434            Reader::new("-.00143".as_bytes())
435                .read_without_context::<f32>()
436                .unwrap(),
437            -0.00143
438        );
439    }
440
441    #[test]
442    fn real_8() {
443        assert_eq!(
444            Reader::new("-12.0013".as_bytes())
445                .read_without_context::<f32>()
446                .unwrap(),
447            -12.0013
448        );
449    }
450
451    #[test]
452    fn real_9() {
453        assert_eq!(
454            Reader::new("98349.432534".as_bytes())
455                .read_without_context::<f32>()
456                .unwrap(),
457            98_349.43
458        );
459    }
460
461    #[test]
462    fn real_10() {
463        assert_eq!(
464            Reader::new("-34534656.34".as_bytes())
465                .read_without_context::<f32>()
466                .unwrap(),
467            -34534656.34
468        );
469    }
470
471    #[test]
472    fn real_failing() {
473        assert!(
474            Reader::new("+abc".as_bytes())
475                .read_without_context::<f32>()
476                .is_none()
477        );
478    }
479
480    #[test]
481    fn number_1() {
482        assert_eq!(
483            Reader::new("+32".as_bytes())
484                .read_without_context::<Number>()
485                .unwrap()
486                .as_f64() as f32,
487            32.0
488        );
489    }
490
491    #[test]
492    fn number_2() {
493        assert_eq!(
494            Reader::new("-32.01".as_bytes())
495                .read_without_context::<Number>()
496                .unwrap()
497                .as_f64() as f32,
498            -32.01
499        );
500    }
501
502    #[test]
503    fn number_3() {
504        assert_eq!(
505            Reader::new("-.345".as_bytes())
506                .read_without_context::<Number>()
507                .unwrap()
508                .as_f64() as f32,
509            -0.345
510        );
511    }
512
513    #[test]
514    fn large_number() {
515        assert_eq!(
516            Reader::new("38359922".as_bytes())
517                .read_without_context::<Number>()
518                .unwrap()
519                .as_i64(),
520            38359922
521        );
522    }
523
524    #[test]
525    fn large_number_2() {
526        assert_eq!(
527            Reader::new("4294966260".as_bytes())
528                .read_without_context::<u32>()
529                .unwrap(),
530            4294966260
531        );
532    }
533}