hayro_syntax/object/
number.rs

1//! Numbers.
2
3use crate::object::macros::object;
4use crate::object::{Object, ObjectLike};
5use crate::reader::Reader;
6use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
7use log::debug;
8use std::fmt::Debug;
9use std::str::FromStr;
10
11/// A number.
12#[derive(Clone, Copy, Debug, PartialEq)]
13pub struct Number(pub(crate) InternalNumber);
14
15impl Number {
16    /// The number zero.
17    pub const ZERO: Self = Self::from_i32(0);
18    /// The number one.
19    pub const ONE: Self = Self::from_i32(1);
20
21    /// Returns the number as a f64.
22    pub fn as_f64(&self) -> f64 {
23        match self.0 {
24            InternalNumber::Real(r) => r,
25            InternalNumber::Integer(i) => i as f64,
26        }
27    }
28
29    /// Returns the number as a f32.
30    pub fn as_f32(&self) -> f32 {
31        match self.0 {
32            InternalNumber::Real(r) => r as f32,
33            InternalNumber::Integer(i) => i as f32,
34        }
35    }
36
37    /// Returns the number as an i64.
38    pub fn as_i64(&self) -> i64 {
39        match self.0 {
40            InternalNumber::Real(r) => {
41                let res = r as i64;
42
43                if !(r.trunc() == r) {
44                    debug!("float {r} was truncated to {res}");
45                }
46
47                res
48            }
49            InternalNumber::Integer(i) => i,
50        }
51    }
52
53    /// Create a new `Number` from an f32 number.
54    pub const fn from_f32(num: f32) -> Self {
55        Self(InternalNumber::Real(num as f64))
56    }
57
58    /// Create a new `Number` from an i32 number.
59    pub const fn from_i32(num: i32) -> Self {
60        Self(InternalNumber::Integer(num as i64))
61    }
62}
63
64impl Skippable for Number {
65    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
66        r.forward_if(|b| b == b'+' || b == b'-');
67
68        // Some PDFs have weird trailing minuses, so try to accept those as well.
69        match r.peek_byte()? {
70            b'.' => {
71                r.read_byte()?;
72                r.forward_while_1(is_digit_or_minus)?;
73            }
74
75            b'0'..=b'9' | b'-' => {
76                r.forward_while_1(is_digit_or_minus)?;
77                if let Some(()) = r.forward_tag(b".") {
78                    r.forward_while(is_digit_or_minus);
79                }
80            }
81            _ => return None,
82        }
83
84        Some(())
85    }
86}
87
88impl Readable<'_> for Number {
89    fn read(r: &mut Reader<'_>, ctx: &ReaderContext<'_>) -> Option<Self> {
90        // TODO: This function is probably the biggest bottleneck in content parsing, so
91        // worth optimizing (i.e. reading the number directly from the bytes instead
92        // of first parsing it to a number).
93
94        let mut data = r.skip::<Self>(ctx.in_content_stream)?;
95        // Some weird PDFs have trailing minus in the fraction of number, try to strip those.
96        if let Some(idx) = data[1..].iter().position(|b| *b == b'-') {
97            data = &data[..idx.saturating_sub(1)];
98        }
99        // We need to use f64 here, so that we can still parse a full `i32` without losing
100        // precision.
101        let num = f64::from_str(std::str::from_utf8(data).ok()?).ok()?;
102
103        if num.fract() == 0.0 {
104            Some(Self(InternalNumber::Integer(num as i64)))
105        } else {
106            Some(Self(InternalNumber::Real(num)))
107        }
108    }
109}
110
111object!(Number, Number);
112
113#[derive(Clone, Copy, Debug, PartialEq)]
114pub(crate) enum InternalNumber {
115    Real(f64),
116    Integer(i64),
117}
118
119macro_rules! int_num {
120    ($i:ident) => {
121        impl Skippable for $i {
122            fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
123                r.forward_if(|b| b == b'+' || b == b'-');
124                r.forward_while_1(is_digit)?;
125
126                // We have a float instead of an integer.
127                if r.peek_byte() == Some(b'.') {
128                    return None;
129                }
130
131                Some(())
132            }
133        }
134
135        impl<'a> Readable<'a> for $i {
136            fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<$i> {
137                r.read::<Number>(ctx)
138                    .map(|n| n.as_i64())
139                    .and_then(|n| n.try_into().ok())
140            }
141        }
142
143        impl TryFrom<Object<'_>> for $i {
144            type Error = ();
145
146            fn try_from(value: Object<'_>) -> std::result::Result<Self, Self::Error> {
147                match value {
148                    Object::Number(n) => n.as_i64().try_into().ok().ok_or(()),
149                    _ => Err(()),
150                }
151            }
152        }
153
154        impl<'a> ObjectLike<'a> for $i {}
155    };
156}
157
158int_num!(i32);
159int_num!(i64);
160int_num!(u32);
161int_num!(u16);
162int_num!(usize);
163int_num!(u8);
164
165impl Skippable for f32 {
166    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
167        r.skip::<Number>(is_content_stream).map(|_| {})
168    }
169}
170
171impl Readable<'_> for f32 {
172    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
173        r.read_without_context::<Number>()
174            .map(|n| n.as_f64() as Self)
175    }
176}
177
178impl TryFrom<Object<'_>> for f32 {
179    type Error = ();
180
181    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
182        match value {
183            Object::Number(n) => Ok(n.as_f64() as Self),
184            _ => Err(()),
185        }
186    }
187}
188
189impl ObjectLike<'_> for f32 {}
190
191impl Skippable for f64 {
192    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
193        r.skip::<Number>(is_content_stream).map(|_| {})
194    }
195}
196
197impl Readable<'_> for f64 {
198    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
199        r.read_without_context::<Number>().map(|n| n.as_f64())
200    }
201}
202
203impl TryFrom<Object<'_>> for f64 {
204    type Error = ();
205
206    fn try_from(value: Object<'_>) -> Result<Self, Self::Error> {
207        match value {
208            Object::Number(n) => Ok(n.as_f64()),
209            _ => Err(()),
210        }
211    }
212}
213
214impl ObjectLike<'_> for f64 {}
215
216pub(crate) fn is_digit(byte: u8) -> bool {
217    byte.is_ascii_digit()
218}
219
220pub(crate) fn is_digit_or_minus(byte: u8) -> bool {
221    is_digit(byte) || byte == b'-'
222}
223
224#[cfg(test)]
225mod tests {
226    use crate::object::Number;
227    use crate::reader::Reader;
228    use crate::reader::ReaderExt;
229
230    #[test]
231    fn int_1() {
232        assert_eq!(
233            Reader::new("0".as_bytes())
234                .read_without_context::<i32>()
235                .unwrap(),
236            0
237        );
238    }
239
240    #[test]
241    fn int_3() {
242        assert_eq!(
243            Reader::new("+32".as_bytes())
244                .read_without_context::<i32>()
245                .unwrap(),
246            32
247        );
248    }
249
250    #[test]
251    fn int_4() {
252        assert_eq!(
253            Reader::new("-32".as_bytes())
254                .read_without_context::<i32>()
255                .unwrap(),
256            -32
257        );
258    }
259
260    #[test]
261    fn int_6() {
262        assert_eq!(
263            Reader::new("98349".as_bytes())
264                .read_without_context::<i32>()
265                .unwrap(),
266            98349
267        );
268    }
269
270    #[test]
271    fn int_7() {
272        assert_eq!(
273            Reader::new("003245".as_bytes())
274                .read_without_context::<i32>()
275                .unwrap(),
276            3245
277        );
278    }
279
280    #[test]
281    fn int_trailing() {
282        assert_eq!(
283            Reader::new("0abc".as_bytes())
284                .read_without_context::<i32>()
285                .unwrap(),
286            0
287        );
288    }
289
290    #[test]
291    fn real_1() {
292        assert_eq!(
293            Reader::new("3".as_bytes())
294                .read_without_context::<f32>()
295                .unwrap(),
296            3.0
297        );
298    }
299
300    #[test]
301    fn real_3() {
302        assert_eq!(
303            Reader::new("+32".as_bytes())
304                .read_without_context::<f32>()
305                .unwrap(),
306            32.0
307        );
308    }
309
310    #[test]
311    fn real_4() {
312        assert_eq!(
313            Reader::new("-32".as_bytes())
314                .read_without_context::<f32>()
315                .unwrap(),
316            -32.0
317        );
318    }
319
320    #[test]
321    fn real_5() {
322        assert_eq!(
323            Reader::new("-32.01".as_bytes())
324                .read_without_context::<f32>()
325                .unwrap(),
326            -32.01
327        );
328    }
329
330    #[test]
331    fn real_6() {
332        assert_eq!(
333            Reader::new("-.345".as_bytes())
334                .read_without_context::<f32>()
335                .unwrap(),
336            -0.345
337        );
338    }
339
340    #[test]
341    fn real_7() {
342        assert_eq!(
343            Reader::new("-.00143".as_bytes())
344                .read_without_context::<f32>()
345                .unwrap(),
346            -0.00143
347        );
348    }
349
350    #[test]
351    fn real_8() {
352        assert_eq!(
353            Reader::new("-12.0013".as_bytes())
354                .read_without_context::<f32>()
355                .unwrap(),
356            -12.0013
357        );
358    }
359
360    #[test]
361    fn real_9() {
362        assert_eq!(
363            Reader::new("98349.432534".as_bytes())
364                .read_without_context::<f32>()
365                .unwrap(),
366            98_349.43
367        );
368    }
369
370    #[test]
371    fn real_10() {
372        assert_eq!(
373            Reader::new("-34534656.34".as_bytes())
374                .read_without_context::<f32>()
375                .unwrap(),
376            -34534656.34
377        );
378    }
379
380    #[test]
381    fn real_trailing() {
382        assert_eq!(
383            Reader::new("0abc".as_bytes())
384                .read_without_context::<f32>()
385                .unwrap(),
386            0.0
387        );
388    }
389
390    #[test]
391    fn real_failing() {
392        assert!(
393            Reader::new("+abc".as_bytes())
394                .read_without_context::<f32>()
395                .is_none()
396        );
397    }
398
399    #[test]
400    fn number_1() {
401        assert_eq!(
402            Reader::new("+32".as_bytes())
403                .read_without_context::<Number>()
404                .unwrap()
405                .as_f64() as f32,
406            32.0
407        );
408    }
409
410    #[test]
411    fn number_2() {
412        assert_eq!(
413            Reader::new("-32.01".as_bytes())
414                .read_without_context::<Number>()
415                .unwrap()
416                .as_f64() as f32,
417            -32.01
418        );
419    }
420
421    #[test]
422    fn number_3() {
423        assert_eq!(
424            Reader::new("-.345".as_bytes())
425                .read_without_context::<Number>()
426                .unwrap()
427                .as_f64() as f32,
428            -0.345
429        );
430    }
431
432    #[test]
433    fn large_number() {
434        assert_eq!(
435            Reader::new("38359922".as_bytes())
436                .read_without_context::<Number>()
437                .unwrap()
438                .as_i64(),
439            38359922
440        );
441    }
442
443    #[test]
444    fn large_number_2() {
445        assert_eq!(
446            Reader::new("4294966260".as_bytes())
447                .read_without_context::<u32>()
448                .unwrap(),
449            4294966260
450        );
451    }
452}