Skip to main content

read_fonts/
font_data.rs

1//! raw font bytes
2
3#![deny(clippy::arithmetic_side_effects)]
4use std::ops::{Range, RangeBounds};
5
6use bytemuck::AnyBitPattern;
7use types::{BigEndian, FixedSize, Scalar};
8
9use crate::array::ComputedArray;
10use crate::read::{ComputeSize, FontReadWithArgs, ReadError};
11use crate::FontRead;
12
13/// A reference to raw binary font data.
14///
15/// This is a wrapper around a byte slice, that provides convenience methods
16/// for parsing and validating that data.
17#[derive(Debug, Default, Clone, Copy)]
18pub struct FontData<'a> {
19    bytes: &'a [u8],
20}
21
22/// A cursor for validating bytes during parsing.
23///
24/// This type improves the ergonomics of validation blah blah
25///
26/// # Note
27///
28/// call `finish` when you're done to ensure you're in bounds
29#[derive(Debug, Default, Clone, Copy)]
30pub struct Cursor<'a> {
31    pos: usize,
32    data: FontData<'a>,
33}
34
35impl<'a> FontData<'a> {
36    /// Empty data, useful for some tests and examples
37    pub const EMPTY: FontData<'static> = FontData { bytes: &[] };
38
39    /// Create a new `FontData` with these bytes.
40    ///
41    /// You generally don't need to do this? It is handled for you when loading
42    /// data from disk, but may be useful in tests.
43    pub const fn new(bytes: &'a [u8]) -> Self {
44        FontData { bytes }
45    }
46
47    /// The length of the data, in bytes
48    pub fn len(&self) -> usize {
49        self.bytes.len()
50    }
51
52    /// `true` if the data has a length of zero bytes.
53    pub fn is_empty(&self) -> bool {
54        self.bytes.is_empty()
55    }
56
57    /// Returns self[pos..]
58    pub fn split_off(&self, pos: usize) -> Option<FontData<'a>> {
59        self.bytes.get(pos..).map(|bytes| FontData { bytes })
60    }
61
62    /// returns self[..pos], and updates self to = self[pos..];
63    pub fn take_up_to(&mut self, pos: usize) -> Option<FontData<'a>> {
64        if pos > self.len() {
65            return None;
66        }
67        let (head, tail) = self.bytes.split_at(pos);
68        self.bytes = tail;
69        Some(FontData { bytes: head })
70    }
71
72    pub fn slice(&self, range: impl RangeBounds<usize>) -> Option<FontData<'a>> {
73        let bounds = (range.start_bound().cloned(), range.end_bound().cloned());
74        self.bytes.get(bounds).map(|bytes| FontData { bytes })
75    }
76
77    /// Read a scalar at the provided location in the data.
78    pub fn read_at<T: Scalar>(&self, offset: usize) -> Result<T, ReadError> {
79        let end = offset
80            .checked_add(T::RAW_BYTE_LEN)
81            .ok_or(ReadError::OutOfBounds)?;
82        self.bytes
83            .get(offset..end)
84            .and_then(T::read)
85            .ok_or(ReadError::OutOfBounds)
86    }
87
88    /// Read a big-endian value at the provided location in the data.
89    pub fn read_be_at<T: Scalar>(&self, offset: usize) -> Result<BigEndian<T>, ReadError> {
90        let end = offset
91            .checked_add(T::RAW_BYTE_LEN)
92            .ok_or(ReadError::OutOfBounds)?;
93        self.bytes
94            .get(offset..end)
95            .and_then(BigEndian::from_slice)
96            .ok_or(ReadError::OutOfBounds)
97    }
98
99    pub fn read_with_args<T>(&self, range: Range<usize>, args: &T::Args) -> Result<T, ReadError>
100    where
101        T: FontReadWithArgs<'a>,
102    {
103        self.slice(range)
104            .ok_or(ReadError::OutOfBounds)
105            .and_then(|data| T::read_with_args(data, args))
106    }
107
108    fn check_in_bounds(&self, offset: usize) -> Result<(), ReadError> {
109        self.bytes
110            .get(..offset)
111            .ok_or(ReadError::OutOfBounds)
112            .map(|_| ())
113    }
114
115    /// Interpret the bytes at the provided offset as a reference to `T`.
116    ///
117    /// Returns an error if the slice `offset..` is shorter than `T::RAW_BYTE_LEN`.
118    ///
119    /// This is a wrapper around [`read_ref_unchecked`][], which panics if
120    /// the type does not uphold the required invariants.
121    ///
122    /// # Panics
123    ///
124    /// This function will panic if `T` is zero-sized, has an alignment
125    /// other than one, or has any internal padding.
126    ///
127    /// [`read_ref_unchecked`]: [Self::read_ref_unchecked]
128    pub fn read_ref_at<T: AnyBitPattern + FixedSize>(
129        &self,
130        offset: usize,
131    ) -> Result<&'a T, ReadError> {
132        let end = offset
133            .checked_add(T::RAW_BYTE_LEN)
134            .ok_or(ReadError::OutOfBounds)?;
135        self.bytes
136            .get(offset..end)
137            .ok_or(ReadError::OutOfBounds)
138            .map(bytemuck::from_bytes)
139    }
140
141    /// Interpret the bytes at the provided offset as a slice of `T`.
142    ///
143    /// Returns an error if `range` is out of bounds for the underlying data,
144    /// or if the length of the range is not a multiple of `T::RAW_BYTE_LEN`.
145    ///
146    /// This is a wrapper around [`read_array_unchecked`][], which panics if
147    /// the type does not uphold the required invariants.
148    ///
149    /// # Panics
150    ///
151    /// This function will panic if `T` is zero-sized, has an alignment
152    /// other than one, or has any internal padding.
153    ///
154    /// [`read_array_unchecked`]: [Self::read_array_unchecked]
155    pub fn read_array<T: AnyBitPattern + FixedSize>(
156        &self,
157        range: Range<usize>,
158    ) -> Result<&'a [T], ReadError> {
159        let bytes = self
160            .bytes
161            .get(range.clone())
162            .ok_or(ReadError::OutOfBounds)?;
163        if bytes
164            .len()
165            .checked_rem(std::mem::size_of::<T>())
166            .unwrap_or(1) // definitely != 0
167            != 0
168        {
169            return Err(ReadError::InvalidArrayLen);
170        };
171        Ok(bytemuck::cast_slice(bytes))
172    }
173
174    pub(crate) fn cursor(&self) -> Cursor<'a> {
175        Cursor {
176            pos: 0,
177            data: *self,
178        }
179    }
180
181    /// Return the data as a byte slice
182    pub fn as_bytes(&self) -> &'a [u8] {
183        self.bytes
184    }
185}
186
187impl<'a> Cursor<'a> {
188    pub(crate) fn advance<T: Scalar>(&mut self) {
189        self.pos = self.pos.saturating_add(T::RAW_BYTE_LEN);
190    }
191
192    pub(crate) fn advance_by(&mut self, n_bytes: usize) {
193        self.pos = self.pos.saturating_add(n_bytes);
194    }
195
196    /// Read a variable length u32 and advance the cursor
197    pub(crate) fn read_u32_var(&mut self) -> Result<u32, ReadError> {
198        let mut next = || self.read::<u8>().map(|v| v as u32);
199        let b0 = next()?;
200        // TODO this feels possible to simplify, e.g. compute length, loop taking one and shifting and or'ing
201        #[allow(clippy::arithmetic_side_effects)] // these are all checked
202        let result = match b0 {
203            _ if b0 < 0x80 => b0,
204            _ if b0 < 0xC0 => ((b0 - 0x80) << 8) | next()?,
205            _ if b0 < 0xE0 => ((b0 - 0xC0) << 16) | (next()? << 8) | next()?,
206            _ if b0 < 0xF0 => ((b0 - 0xE0) << 24) | (next()? << 16) | (next()? << 8) | next()?,
207            _ => {
208                // 0xF0 is a dedicated 5-byte prefix; high bits are carried entirely
209                // by the following 4 bytes.
210                (next()? << 24) | (next()? << 16) | (next()? << 8) | next()?
211            }
212        };
213
214        Ok(result)
215    }
216
217    /// Read a scalar and advance the cursor.
218    pub(crate) fn read<T: Scalar>(&mut self) -> Result<T, ReadError> {
219        let temp = self.data.read_at(self.pos);
220        self.advance::<T>();
221        temp
222    }
223
224    /// Read a big-endian value and advance the cursor.
225    pub(crate) fn read_be<T: Scalar>(&mut self) -> Result<BigEndian<T>, ReadError> {
226        let temp = self.data.read_be_at(self.pos);
227        self.advance::<T>();
228        temp
229    }
230
231    pub(crate) fn read_with_args<T>(&mut self, args: &T::Args) -> Result<T, ReadError>
232    where
233        T: FontReadWithArgs<'a> + ComputeSize,
234    {
235        let len = T::compute_size(args)?;
236        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
237        let temp = self.data.read_with_args(self.pos..range_end, args);
238        self.advance_by(len);
239        temp
240    }
241
242    // only used in records that contain arrays :/
243    pub(crate) fn read_computed_array<T>(
244        &mut self,
245        len: usize,
246        args: &T::Args,
247    ) -> Result<ComputedArray<'a, T>, ReadError>
248    where
249        T: FontReadWithArgs<'a> + ComputeSize,
250    {
251        let len = len
252            .checked_mul(T::compute_size(args)?)
253            .ok_or(ReadError::OutOfBounds)?;
254        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
255        let temp = self.data.read_with_args(self.pos..range_end, args);
256        self.advance_by(len);
257        temp
258    }
259
260    pub(crate) fn read_array<T: AnyBitPattern + FixedSize>(
261        &mut self,
262        n_elem: usize,
263    ) -> Result<&'a [T], ReadError> {
264        let len = n_elem
265            .checked_mul(T::RAW_BYTE_LEN)
266            .ok_or(ReadError::OutOfBounds)?;
267        let end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
268        let temp = self.data.read_array(self.pos..end);
269        self.advance_by(len);
270        temp
271    }
272
273    /// return the current position, or an error if we are out of bounds
274    pub(crate) fn position(&self) -> Result<usize, ReadError> {
275        self.data.check_in_bounds(self.pos).map(|_| self.pos)
276    }
277
278    // used when handling fields with an implicit length, which must be at the
279    // end of a table.
280    pub(crate) fn remaining_bytes(&self) -> usize {
281        self.data.len().saturating_sub(self.pos)
282    }
283
284    pub(crate) fn remaining(self) -> Option<FontData<'a>> {
285        self.data.split_off(self.pos)
286    }
287
288    pub fn is_empty(&self) -> bool {
289        self.pos >= self.data.len()
290    }
291}
292
293// useful so we can have offsets that are just to data
294impl<'a> FontRead<'a> for FontData<'a> {
295    fn read(data: FontData<'a>) -> Result<Self, ReadError> {
296        Ok(data)
297    }
298}
299
300impl AsRef<[u8]> for FontData<'_> {
301    fn as_ref(&self) -> &[u8] {
302        self.bytes
303    }
304}
305
306impl<'a> From<&'a [u8]> for FontData<'a> {
307    fn from(src: &'a [u8]) -> FontData<'a> {
308        FontData::new(src)
309    }
310}
311
312//kind of ugly, but makes FontData work with FontBuilder. If FontBuilder stops using
313//Cow in its API, we can probably get rid of this?
314#[cfg(feature = "std")]
315impl<'a> From<FontData<'a>> for std::borrow::Cow<'a, [u8]> {
316    fn from(src: FontData<'a>) -> Self {
317        src.bytes.into()
318    }
319}