hayro_syntax/object/
number.rs

1//! Numbers.
2
3use crate::object::macros::object;
4use crate::object::{Object, ObjectLike};
5use crate::reader::{Readable, Reader, ReaderContext, Skippable};
6use log::debug;
7use std::fmt::Debug;
8use std::str::FromStr;
9
10/// A number.
11#[derive(Clone, Copy, Debug, PartialEq)]
12pub struct Number(pub(crate) InternalNumber);
13
14impl Number {
15    /// The number zero.
16    pub const ZERO: Number = Number::from_i32(0);
17    /// The number one.
18    pub const ONE: Number = Number::from_i32(1);
19
20    /// Returns the number as a f64.
21    pub fn as_f64(&self) -> f64 {
22        match self.0 {
23            InternalNumber::Real(r) => r,
24            InternalNumber::Integer(i) => i as f64,
25        }
26    }
27
28    /// Returns the number as a f32.
29    pub fn as_f32(&self) -> f32 {
30        match self.0 {
31            InternalNumber::Real(r) => r as f32,
32            InternalNumber::Integer(i) => i as f32,
33        }
34    }
35
36    /// Returns the number as an i64.
37    pub fn as_i64(&self) -> i64 {
38        match self.0 {
39            InternalNumber::Real(r) => {
40                let res = r as i64;
41
42                if !(r.trunc() == r) {
43                    debug!("float {r} was truncated to {res}");
44                }
45
46                res
47            }
48            InternalNumber::Integer(i) => i,
49        }
50    }
51
52    /// Create a new `Number` from an f32 number.
53    pub const fn from_f32(num: f32) -> Self {
54        Self(InternalNumber::Real(num as f64))
55    }
56
57    /// Create a new `Number` from an i32 number.
58    pub const fn from_i32(num: i32) -> Self {
59        Self(InternalNumber::Integer(num as i64))
60    }
61}
62
63impl Skippable for Number {
64    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
65        r.forward_if(|b| b == b'+' || b == b'-');
66
67        // Some PDFs have weird trailing minuses, so try to accept those as well.
68        match r.peek_byte()? {
69            b'.' => {
70                r.read_byte()?;
71                r.forward_while_1(is_digit_or_minus)?;
72            }
73
74            b'0'..=b'9' | b'-' => {
75                r.forward_while_1(is_digit_or_minus)?;
76                if let Some(()) = r.forward_tag(b".") {
77                    r.forward_while(is_digit_or_minus);
78                }
79            }
80            _ => return None,
81        }
82
83        Some(())
84    }
85}
86
87impl Readable<'_> for Number {
88    fn read(r: &mut Reader<'_>, ctx: &ReaderContext) -> Option<Self> {
89        // TODO: This function is probably the biggest bottleneck in content parsing, so
90        // worth optimizing (i.e. reading the number directly from the bytes instead
91        // of first parsing it to a number).
92
93        let mut data = r.skip::<Number>(ctx.in_content_stream)?;
94        // Some weird PDFs have trailing minus in the fraction of number, try to strip those.
95        if let Some(idx) = data[1..].iter().position(|b| *b == b'-') {
96            data = &data[..idx.saturating_sub(1)];
97        }
98        // We need to use f64 here, so that we can still parse a full `i32` without losing
99        // precision.
100        let num = f64::from_str(std::str::from_utf8(data).ok()?).ok()?;
101
102        if num.fract() == 0.0 {
103            Some(Number(InternalNumber::Integer(num as i64)))
104        } else {
105            Some(Number(InternalNumber::Real(num)))
106        }
107    }
108}
109
110object!(Number, Number);
111
112#[derive(Clone, Copy, Debug, PartialEq)]
113pub(crate) enum InternalNumber {
114    Real(f64),
115    Integer(i64),
116}
117
118macro_rules! int_num {
119    ($i:ident) => {
120        impl Skippable for $i {
121            fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
122                r.forward_if(|b| b == b'+' || b == b'-');
123                r.forward_while_1(is_digit)?;
124
125                // We have a float instead of an integer.
126                if r.peek_byte() == Some(b'.') {
127                    return None;
128                }
129
130                Some(())
131            }
132        }
133
134        impl<'a> Readable<'a> for $i {
135            fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<$i> {
136                r.read::<Number>(ctx)
137                    .map(|n| n.as_i64())
138                    .and_then(|n| n.try_into().ok())
139            }
140        }
141
142        impl TryFrom<Object<'_>> for $i {
143            type Error = ();
144
145            fn try_from(value: Object<'_>) -> std::result::Result<Self, Self::Error> {
146                match value {
147                    Object::Number(n) => n.as_i64().try_into().ok().ok_or(()),
148                    _ => Err(()),
149                }
150            }
151        }
152
153        impl<'a> ObjectLike<'a> for $i {}
154    };
155}
156
157int_num!(i32);
158int_num!(i64);
159int_num!(u32);
160int_num!(u16);
161int_num!(usize);
162int_num!(u8);
163
164impl Skippable for f32 {
165    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
166        r.skip::<Number>(is_content_stream).map(|_| {})
167    }
168}
169
170impl Readable<'_> for f32 {
171    fn read(r: &mut Reader, _: &ReaderContext) -> Option<Self> {
172        r.read_without_context::<Number>()
173            .map(|n| n.as_f64() as f32)
174    }
175}
176
177impl TryFrom<Object<'_>> for f32 {
178    type Error = ();
179
180    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
181        match value {
182            Object::Number(n) => Ok(n.as_f64() as f32),
183            _ => Err(()),
184        }
185    }
186}
187
188impl ObjectLike<'_> for f32 {}
189
190impl Skippable for f64 {
191    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
192        r.skip::<Number>(is_content_stream).map(|_| {})
193    }
194}
195
196impl Readable<'_> for f64 {
197    fn read(r: &mut Reader, _: &ReaderContext) -> Option<Self> {
198        r.read_without_context::<Number>().map(|n| n.as_f64())
199    }
200}
201
202impl TryFrom<Object<'_>> for f64 {
203    type Error = ();
204
205    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
206        match value {
207            Object::Number(n) => Ok(n.as_f64()),
208            _ => Err(()),
209        }
210    }
211}
212
213impl ObjectLike<'_> for f64 {}
214
215pub(crate) fn is_digit(byte: u8) -> bool {
216    byte.is_ascii_digit()
217}
218
219pub(crate) fn is_digit_or_minus(byte: u8) -> bool {
220    is_digit(byte) || byte == b'-'
221}
222
223#[cfg(test)]
224mod tests {
225    use crate::object::Number;
226    use crate::reader::Reader;
227
228    #[test]
229    fn int_1() {
230        assert_eq!(
231            Reader::new("0".as_bytes())
232                .read_without_context::<i32>()
233                .unwrap(),
234            0
235        );
236    }
237
238    #[test]
239    fn int_3() {
240        assert_eq!(
241            Reader::new("+32".as_bytes())
242                .read_without_context::<i32>()
243                .unwrap(),
244            32
245        );
246    }
247
248    #[test]
249    fn int_4() {
250        assert_eq!(
251            Reader::new("-32".as_bytes())
252                .read_without_context::<i32>()
253                .unwrap(),
254            -32
255        );
256    }
257
258    #[test]
259    fn int_6() {
260        assert_eq!(
261            Reader::new("98349".as_bytes())
262                .read_without_context::<i32>()
263                .unwrap(),
264            98349
265        );
266    }
267
268    #[test]
269    fn int_7() {
270        assert_eq!(
271            Reader::new("003245".as_bytes())
272                .read_without_context::<i32>()
273                .unwrap(),
274            3245
275        );
276    }
277
278    #[test]
279    fn int_trailing() {
280        assert_eq!(
281            Reader::new("0abc".as_bytes())
282                .read_without_context::<i32>()
283                .unwrap(),
284            0
285        );
286    }
287
288    #[test]
289    fn real_1() {
290        assert_eq!(
291            Reader::new("3".as_bytes())
292                .read_without_context::<f32>()
293                .unwrap(),
294            3.0
295        );
296    }
297
298    #[test]
299    fn real_3() {
300        assert_eq!(
301            Reader::new("+32".as_bytes())
302                .read_without_context::<f32>()
303                .unwrap(),
304            32.0
305        );
306    }
307
308    #[test]
309    fn real_4() {
310        assert_eq!(
311            Reader::new("-32".as_bytes())
312                .read_without_context::<f32>()
313                .unwrap(),
314            -32.0
315        );
316    }
317
318    #[test]
319    fn real_5() {
320        assert_eq!(
321            Reader::new("-32.01".as_bytes())
322                .read_without_context::<f32>()
323                .unwrap(),
324            -32.01
325        );
326    }
327
328    #[test]
329    fn real_6() {
330        assert_eq!(
331            Reader::new("-.345".as_bytes())
332                .read_without_context::<f32>()
333                .unwrap(),
334            -0.345
335        );
336    }
337
338    #[test]
339    fn real_7() {
340        assert_eq!(
341            Reader::new("-.00143".as_bytes())
342                .read_without_context::<f32>()
343                .unwrap(),
344            -0.00143
345        );
346    }
347
348    #[test]
349    fn real_8() {
350        assert_eq!(
351            Reader::new("-12.0013".as_bytes())
352                .read_without_context::<f32>()
353                .unwrap(),
354            -12.0013
355        );
356    }
357
358    #[test]
359    fn real_9() {
360        assert_eq!(
361            Reader::new("98349.432534".as_bytes())
362                .read_without_context::<f32>()
363                .unwrap(),
364            98_349.43
365        );
366    }
367
368    #[test]
369    fn real_10() {
370        assert_eq!(
371            Reader::new("-34534656.34".as_bytes())
372                .read_without_context::<f32>()
373                .unwrap(),
374            -34534656.34
375        );
376    }
377
378    #[test]
379    fn real_trailing() {
380        assert_eq!(
381            Reader::new("0abc".as_bytes())
382                .read_without_context::<f32>()
383                .unwrap(),
384            0.0
385        );
386    }
387
388    #[test]
389    fn real_failing() {
390        assert!(
391            Reader::new("+abc".as_bytes())
392                .read_without_context::<f32>()
393                .is_none()
394        );
395    }
396
397    #[test]
398    fn number_1() {
399        assert_eq!(
400            Reader::new("+32".as_bytes())
401                .read_without_context::<Number>()
402                .unwrap()
403                .as_f64() as f32,
404            32.0
405        );
406    }
407
408    #[test]
409    fn number_2() {
410        assert_eq!(
411            Reader::new("-32.01".as_bytes())
412                .read_without_context::<Number>()
413                .unwrap()
414                .as_f64() as f32,
415            -32.01
416        );
417    }
418
419    #[test]
420    fn number_3() {
421        assert_eq!(
422            Reader::new("-.345".as_bytes())
423                .read_without_context::<Number>()
424                .unwrap()
425                .as_f64() as f32,
426            -0.345
427        );
428    }
429
430    #[test]
431    fn large_number() {
432        assert_eq!(
433            Reader::new("38359922".as_bytes())
434                .read_without_context::<Number>()
435                .unwrap()
436                .as_i64(),
437            38359922
438        );
439    }
440
441    #[test]
442    fn large_number_2() {
443        assert_eq!(
444            Reader::new("4294966260".as_bytes())
445                .read_without_context::<u32>()
446                .unwrap(),
447            4294966260
448        );
449    }
450}