cff_parser/
parser.rs

1//! Binary parsing utils.
2//!
3//! This module should not be used directly, unless you're planning to parse
4//! some tables manually.
5
6use core::convert::{TryFrom, TryInto};
7use core::ops::Range;
8
9/// A trait for parsing raw binary data of fixed size.
10///
11/// This is a low-level, internal trait that should not be used directly.
12pub trait FromData: Sized {
13    /// Object's raw data size.
14    ///
15    /// Not always the same as `mem::size_of`.
16    const SIZE: usize;
17
18    /// Parses an object from a raw data.
19    fn parse(data: &[u8]) -> Option<Self>;
20}
21
22/// A trait for parsing raw binary data of variable size.
23///
24/// This is a low-level, internal trait that should not be used directly.
25pub trait FromSlice<'a>: Sized {
26    /// Parses an object from a raw data.
27    fn parse(data: &'a [u8]) -> Option<Self>;
28}
29
30impl FromData for () {
31    const SIZE: usize = 0;
32
33    #[inline]
34    fn parse(_: &[u8]) -> Option<Self> {
35        Some(())
36    }
37}
38
39impl FromData for u8 {
40    const SIZE: usize = 1;
41
42    #[inline]
43    fn parse(data: &[u8]) -> Option<Self> {
44        data.get(0).copied()
45    }
46}
47
48impl FromData for i8 {
49    const SIZE: usize = 1;
50
51    #[inline]
52    fn parse(data: &[u8]) -> Option<Self> {
53        data.get(0).copied().map(|n| n as i8)
54    }
55}
56
57impl FromData for u16 {
58    const SIZE: usize = 2;
59
60    #[inline]
61    fn parse(data: &[u8]) -> Option<Self> {
62        data.try_into().ok().map(u16::from_be_bytes)
63    }
64}
65
66impl FromData for i16 {
67    const SIZE: usize = 2;
68
69    #[inline]
70    fn parse(data: &[u8]) -> Option<Self> {
71        data.try_into().ok().map(i16::from_be_bytes)
72    }
73}
74
75impl FromData for u32 {
76    const SIZE: usize = 4;
77
78    #[inline]
79    fn parse(data: &[u8]) -> Option<Self> {
80        data.try_into().ok().map(u32::from_be_bytes)
81    }
82}
83
84impl FromData for i32 {
85    const SIZE: usize = 4;
86
87    #[inline]
88    fn parse(data: &[u8]) -> Option<Self> {
89        data.try_into().ok().map(i32::from_be_bytes)
90    }
91}
92
93impl FromData for u64 {
94    const SIZE: usize = 8;
95
96    #[inline]
97    fn parse(data: &[u8]) -> Option<Self> {
98        data.try_into().ok().map(u64::from_be_bytes)
99    }
100}
101
102/// A u24 number.
103///
104/// Stored as u32, but encoded as 3 bytes in the font.
105///
106/// <https://docs.microsoft.com/en-us/typography/opentype/spec/otff#data-types>
107#[derive(Clone, Copy, Debug)]
108pub struct U24(pub u32);
109
110impl FromData for U24 {
111    const SIZE: usize = 3;
112
113    #[inline]
114    fn parse(data: &[u8]) -> Option<Self> {
115        let data: [u8; 3] = data.try_into().ok()?;
116        Some(U24(u32::from_be_bytes([0, data[0], data[1], data[2]])))
117    }
118}
119
120/// A 32-bit signed fixed-point number (16.16).
121#[derive(Clone, Copy, Debug)]
122pub struct Fixed(pub f32);
123
124impl FromData for Fixed {
125    const SIZE: usize = 4;
126
127    #[inline]
128    fn parse(data: &[u8]) -> Option<Self> {
129        // TODO: is it safe to cast?
130        i32::parse(data).map(|n| Fixed(n as f32 / 65536.0))
131    }
132}
133
134/// A safe u32 to usize casting.
135///
136/// Rust doesn't implement `From<u32> for usize`,
137/// because it has to support 16 bit targets.
138/// We don't, so we can allow this.
139pub trait NumFrom<T>: Sized {
140    /// Converts u32 into usize.
141    fn num_from(_: T) -> Self;
142}
143
144impl NumFrom<u32> for usize {
145    #[inline]
146    fn num_from(v: u32) -> Self {
147        #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
148        {
149            v as usize
150        }
151
152        // compilation error on 16 bit targets
153    }
154}
155
156impl NumFrom<char> for usize {
157    #[inline]
158    fn num_from(v: char) -> Self {
159        #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
160        {
161            v as usize
162        }
163
164        // compilation error on 16 bit targets
165    }
166}
167
168/// Just like TryFrom<N>, but for numeric types not supported by the Rust's std.
169pub trait TryNumFrom<T>: Sized {
170    /// Casts between numeric types.
171    fn try_num_from(_: T) -> Option<Self>;
172}
173
174impl TryNumFrom<f32> for u8 {
175    #[inline]
176    fn try_num_from(v: f32) -> Option<Self> {
177        i32::try_num_from(v).and_then(|v| u8::try_from(v).ok())
178    }
179}
180
181impl TryNumFrom<f32> for i16 {
182    #[inline]
183    fn try_num_from(v: f32) -> Option<Self> {
184        i32::try_num_from(v).and_then(|v| i16::try_from(v).ok())
185    }
186}
187
188impl TryNumFrom<f32> for u16 {
189    #[inline]
190    fn try_num_from(v: f32) -> Option<Self> {
191        i32::try_num_from(v).and_then(|v| u16::try_from(v).ok())
192    }
193}
194
195#[allow(clippy::manual_range_contains)]
196impl TryNumFrom<f32> for i32 {
197    #[inline]
198    fn try_num_from(v: f32) -> Option<Self> {
199        // Based on https://github.com/rust-num/num-traits/blob/master/src/cast.rs
200
201        // Float as int truncates toward zero, so we want to allow values
202        // in the exclusive range `(MIN-1, MAX+1)`.
203
204        // We can't represent `MIN-1` exactly, but there's no fractional part
205        // at this magnitude, so we can just use a `MIN` inclusive boundary.
206        const MIN: f32 = i32::MIN as f32;
207        // We can't represent `MAX` exactly, but it will round up to exactly
208        // `MAX+1` (a power of two) when we cast it.
209        const MAX_P1: f32 = i32::MAX as f32;
210        if v >= MIN && v < MAX_P1 {
211            Some(v as i32)
212        } else {
213            None
214        }
215    }
216}
217
218/// A slice-like container that converts internal binary data only on access.
219///
220/// Array values are stored in a continuous data chunk.
221#[derive(Clone, Copy)]
222pub struct LazyArray16<'a, T> {
223    data: &'a [u8],
224    data_type: core::marker::PhantomData<T>,
225}
226
227impl<T> Default for LazyArray16<'_, T> {
228    #[inline]
229    fn default() -> Self {
230        LazyArray16 {
231            data: &[],
232            data_type: core::marker::PhantomData,
233        }
234    }
235}
236
237impl<'a, T: FromData> LazyArray16<'a, T> {
238    /// Creates a new `LazyArray`.
239    #[inline]
240    pub fn new(data: &'a [u8]) -> Self {
241        LazyArray16 {
242            data,
243            data_type: core::marker::PhantomData,
244        }
245    }
246
247    /// Returns a value at `index`.
248    #[inline]
249    pub fn get(&self, index: u16) -> Option<T> {
250        if index < self.len() {
251            let start = usize::from(index) * T::SIZE;
252            let end = start + T::SIZE;
253            self.data.get(start..end).and_then(T::parse)
254        } else {
255            None
256        }
257    }
258
259    /// Returns the last value.
260    #[inline]
261    pub fn last(&self) -> Option<T> {
262        if !self.is_empty() {
263            self.get(self.len() - 1)
264        } else {
265            None
266        }
267    }
268
269    /// Returns sub-array.
270    #[inline]
271    pub fn slice(&self, range: Range<u16>) -> Option<Self> {
272        let start = usize::from(range.start) * T::SIZE;
273        let end = usize::from(range.end) * T::SIZE;
274        Some(LazyArray16 {
275            data: self.data.get(start..end)?,
276            ..LazyArray16::default()
277        })
278    }
279
280    /// Returns array's length.
281    #[inline]
282    pub fn len(&self) -> u16 {
283        (self.data.len() / T::SIZE) as u16
284    }
285
286    /// Checks if array is empty.
287    #[inline]
288    pub fn is_empty(&self) -> bool {
289        self.len() == 0
290    }
291
292    /// Performs a binary search by specified `key`.
293    #[inline]
294    pub fn binary_search(&self, key: &T) -> Option<(u16, T)>
295    where
296        T: Ord,
297    {
298        self.binary_search_by(|p| p.cmp(key))
299    }
300
301    /// Performs a binary search using specified closure.
302    #[inline]
303    pub fn binary_search_by<F>(&self, mut f: F) -> Option<(u16, T)>
304    where
305        F: FnMut(&T) -> core::cmp::Ordering,
306    {
307        // Based on Rust std implementation.
308
309        use core::cmp::Ordering;
310
311        let mut size = self.len();
312        if size == 0 {
313            return None;
314        }
315
316        let mut base = 0;
317        while size > 1 {
318            let half = size / 2;
319            let mid = base + half;
320            // mid is always in [0, size), that means mid is >= 0 and < size.
321            // mid >= 0: by definition
322            // mid < size: mid = size / 2 + size / 4 + size / 8 ...
323            let cmp = f(&self.get(mid)?);
324            base = if cmp == Ordering::Greater { base } else { mid };
325            size -= half;
326        }
327
328        // base is always in [0, size) because base <= mid.
329        let value = self.get(base)?;
330        if f(&value) == Ordering::Equal {
331            Some((base, value))
332        } else {
333            None
334        }
335    }
336}
337
338impl<'a, T: FromData + core::fmt::Debug + Copy> core::fmt::Debug for LazyArray16<'a, T> {
339    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
340        f.debug_list().entries(*self).finish()
341    }
342}
343
344impl<'a, T: FromData> IntoIterator for LazyArray16<'a, T> {
345    type Item = T;
346    type IntoIter = LazyArrayIter16<'a, T>;
347
348    #[inline]
349    fn into_iter(self) -> Self::IntoIter {
350        LazyArrayIter16 {
351            data: self,
352            index: 0,
353        }
354    }
355}
356
357/// An iterator over `LazyArray16`.
358#[derive(Clone, Copy)]
359#[allow(missing_debug_implementations)]
360pub struct LazyArrayIter16<'a, T> {
361    data: LazyArray16<'a, T>,
362    index: u16,
363}
364
365impl<T: FromData> Default for LazyArrayIter16<'_, T> {
366    #[inline]
367    fn default() -> Self {
368        LazyArrayIter16 {
369            data: LazyArray16::new(&[]),
370            index: 0,
371        }
372    }
373}
374
375impl<'a, T: FromData> Iterator for LazyArrayIter16<'a, T> {
376    type Item = T;
377
378    #[inline]
379    fn next(&mut self) -> Option<Self::Item> {
380        self.index += 1; // TODO: check
381        self.data.get(self.index - 1)
382    }
383
384    #[inline]
385    fn count(self) -> usize {
386        usize::from(self.data.len().saturating_sub(self.index))
387    }
388}
389
390/// A [`LazyArray16`]-like container, but data is accessed by offsets.
391///
392/// Unlike [`LazyArray16`], internal storage is not continuous.
393///
394/// Multiple offsets can point to the same data.
395#[derive(Clone, Copy)]
396pub struct LazyOffsetArray16<'a, T: FromSlice<'a>> {
397    data: &'a [u8],
398    // Zero offsets must be ignored, therefore we're using `Option<Offset16>`.
399    offsets: LazyArray16<'a, Option<Offset16>>,
400    data_type: core::marker::PhantomData<T>,
401}
402
403impl<'a, T: FromSlice<'a>> LazyOffsetArray16<'a, T> {
404    /// Creates a new `LazyOffsetArray16`.
405    #[allow(dead_code)]
406    pub fn new(data: &'a [u8], offsets: LazyArray16<'a, Option<Offset16>>) -> Self {
407        Self {
408            data,
409            offsets,
410            data_type: core::marker::PhantomData,
411        }
412    }
413
414    /// Parses `LazyOffsetArray16` from raw data.
415    #[allow(dead_code)]
416    pub fn parse(data: &'a [u8]) -> Option<Self> {
417        let mut s = Stream::new(data);
418        let count = s.read::<u16>()?;
419        let offsets = s.read_array16(count)?;
420        Some(Self {
421            data,
422            offsets,
423            data_type: core::marker::PhantomData,
424        })
425    }
426
427    /// Returns a value at `index`.
428    #[inline]
429    pub fn get(&self, index: u16) -> Option<T> {
430        let offset = self.offsets.get(index)??.to_usize();
431        self.data.get(offset..).and_then(T::parse)
432    }
433
434    /// Returns array's length.
435    #[inline]
436    pub fn len(&self) -> u16 {
437        self.offsets.len()
438    }
439
440    /// Checks if array is empty.
441    #[inline]
442    #[allow(dead_code)]
443    pub fn is_empty(&self) -> bool {
444        self.len() == 0
445    }
446}
447
448impl<'a, T: FromSlice<'a> + core::fmt::Debug + Copy> core::fmt::Debug for LazyOffsetArray16<'a, T> {
449    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
450        f.debug_list().entries(*self).finish()
451    }
452}
453
454/// An iterator over [`LazyOffsetArray16`] values.
455#[derive(Clone, Copy)]
456#[allow(missing_debug_implementations)]
457pub struct LazyOffsetArrayIter16<'a, T: FromSlice<'a>> {
458    array: LazyOffsetArray16<'a, T>,
459    index: u16,
460}
461
462impl<'a, T: FromSlice<'a>> IntoIterator for LazyOffsetArray16<'a, T> {
463    type Item = T;
464    type IntoIter = LazyOffsetArrayIter16<'a, T>;
465
466    #[inline]
467    fn into_iter(self) -> Self::IntoIter {
468        LazyOffsetArrayIter16 {
469            array: self,
470            index: 0,
471        }
472    }
473}
474
475impl<'a, T: FromSlice<'a>> Iterator for LazyOffsetArrayIter16<'a, T> {
476    type Item = T;
477
478    fn next(&mut self) -> Option<Self::Item> {
479        if self.index < self.array.len() {
480            self.index += 1;
481            self.array.get(self.index - 1)
482        } else {
483            None
484        }
485    }
486
487    #[inline]
488    fn count(self) -> usize {
489        usize::from(self.array.len().saturating_sub(self.index))
490    }
491}
492
493/// A streaming binary parser.
494#[derive(Clone, Default, Debug)]
495pub struct Stream<'a> {
496    data: &'a [u8],
497    offset: usize,
498}
499
500impl<'a> Stream<'a> {
501    /// Creates a new `Stream` parser.
502    #[inline]
503    pub fn new(data: &'a [u8]) -> Self {
504        Stream { data, offset: 0 }
505    }
506
507    /// Creates a new `Stream` parser at offset.
508    ///
509    /// Returns `None` when `offset` is out of bounds.
510    #[inline]
511    pub fn new_at(data: &'a [u8], offset: usize) -> Option<Self> {
512        if offset <= data.len() {
513            Some(Stream { data, offset })
514        } else {
515            None
516        }
517    }
518
519    /// Checks that stream reached the end of the data.
520    #[inline]
521    pub fn at_end(&self) -> bool {
522        self.offset >= self.data.len()
523    }
524
525    /// Returns the current offset.
526    #[inline]
527    pub fn offset(&self) -> usize {
528        self.offset
529    }
530
531    /// Returns the trailing data.
532    ///
533    /// Returns `None` when `Stream` is reached the end.
534    #[inline]
535    pub fn tail(&self) -> Option<&'a [u8]> {
536        self.data.get(self.offset..)
537    }
538
539    /// Advances by `FromData::SIZE`.
540    ///
541    /// Doesn't check bounds.
542    #[inline]
543    pub fn skip<T: FromData>(&mut self) {
544        self.advance(T::SIZE);
545    }
546
547    /// Advances by the specified `len`.
548    ///
549    /// Doesn't check bounds.
550    #[inline]
551    pub fn advance(&mut self, len: usize) {
552        self.offset += len;
553    }
554
555    /// Parses the type from the steam.
556    ///
557    /// Returns `None` when there is not enough data left in the stream
558    /// or the type parsing failed.
559    #[inline]
560    pub fn read<T: FromData>(&mut self) -> Option<T> {
561        self.read_bytes(T::SIZE).and_then(T::parse)
562    }
563
564    /// Reads N bytes from the stream.
565    #[inline]
566    pub fn read_bytes(&mut self, len: usize) -> Option<&'a [u8]> {
567        // An integer overflow here on 32bit systems is almost guarantee to be caused
568        // by an incorrect parsing logic from the caller side.
569        // Simply using `checked_add` here would silently swallow errors, which is not what we want.
570        debug_assert!(self.offset as u64 + len as u64 <= u32::MAX as u64);
571
572        let v = self.data.get(self.offset..self.offset + len)?;
573        self.advance(len);
574        Some(v)
575    }
576
577    /// Reads the next `count` types as a slice.
578    #[inline]
579    pub fn read_array16<T: FromData>(&mut self, count: u16) -> Option<LazyArray16<'a, T>> {
580        let len = usize::from(count) * T::SIZE;
581        self.read_bytes(len).map(LazyArray16::new)
582    }
583
584    #[allow(dead_code)]
585    #[inline]
586    pub fn read_at_offset16(&mut self, data: &'a [u8]) -> Option<&'a [u8]> {
587        let offset = self.read::<Offset16>()?.to_usize();
588        data.get(offset..)
589    }
590}
591
592/// A common offset methods.
593pub trait Offset {
594    /// Converts the offset to `usize`.
595    fn to_usize(&self) -> usize;
596}
597
598/// A type-safe u16 offset.
599#[derive(Clone, Copy, Debug)]
600pub struct Offset16(pub u16);
601
602impl Offset for Offset16 {
603    #[inline]
604    fn to_usize(&self) -> usize {
605        usize::from(self.0)
606    }
607}
608
609impl FromData for Offset16 {
610    const SIZE: usize = 2;
611
612    #[inline]
613    fn parse(data: &[u8]) -> Option<Self> {
614        u16::parse(data).map(Offset16)
615    }
616}
617
618impl FromData for Option<Offset16> {
619    const SIZE: usize = Offset16::SIZE;
620
621    #[inline]
622    fn parse(data: &[u8]) -> Option<Self> {
623        let offset = Offset16::parse(data)?;
624        if offset.0 != 0 {
625            Some(Some(offset))
626        } else {
627            Some(None)
628        }
629    }
630}
631
632/// A type-safe u24 offset.
633#[derive(Clone, Copy, Debug)]
634pub struct Offset24(pub u32);
635
636impl Offset for Offset24 {
637    #[inline]
638    fn to_usize(&self) -> usize {
639        usize::num_from(self.0)
640    }
641}
642
643impl FromData for Offset24 {
644    const SIZE: usize = 3;
645
646    #[inline]
647    fn parse(data: &[u8]) -> Option<Self> {
648        U24::parse(data).map(|n| Offset24(n.0))
649    }
650}
651
652impl FromData for Option<Offset24> {
653    const SIZE: usize = Offset24::SIZE;
654
655    #[inline]
656    fn parse(data: &[u8]) -> Option<Self> {
657        let offset = Offset24::parse(data)?;
658        if offset.0 != 0 {
659            Some(Some(offset))
660        } else {
661            Some(None)
662        }
663    }
664}
665
666/// A type-safe u32 offset.
667#[derive(Clone, Copy, Debug)]
668pub struct Offset32(pub u32);
669
670impl Offset for Offset32 {
671    #[inline]
672    fn to_usize(&self) -> usize {
673        usize::num_from(self.0)
674    }
675}
676
677impl FromData for Offset32 {
678    const SIZE: usize = 4;
679
680    #[inline]
681    fn parse(data: &[u8]) -> Option<Self> {
682        u32::parse(data).map(Offset32)
683    }
684}
685
686impl FromData for Option<Offset32> {
687    const SIZE: usize = Offset32::SIZE;
688
689    #[inline]
690    fn parse(data: &[u8]) -> Option<Self> {
691        let offset = Offset32::parse(data)?;
692        if offset.0 != 0 {
693            Some(Some(offset))
694        } else {
695            Some(None)
696        }
697    }
698}