Skip to main content

read_fonts/
font_data.rs

1//! raw font bytes
2
3#![deny(clippy::arithmetic_side_effects)]
4use std::ops::{Range, RangeBounds};
5
6use bytemuck::AnyBitPattern;
7use types::{BigEndian, FixedSize, Scalar};
8
9use crate::array::ComputedArray;
10use crate::read::{ComputeSize, FontReadWithArgs, ReadError};
11use crate::FontRead;
12
13/// A reference to raw binary font data.
14///
15/// This is a wrapper around a byte slice, that provides convenience methods
16/// for parsing and validating that data.
17#[derive(Debug, Default, Clone, Copy)]
18pub struct FontData<'a> {
19    bytes: &'a [u8],
20}
21
22/// A cursor for validating bytes during parsing.
23///
24/// This type improves the ergonomics of validation blah blah
25///
26/// # Note
27///
28/// call `finish` when you're done to ensure you're in bounds
29#[derive(Debug, Default, Clone, Copy)]
30pub struct Cursor<'a> {
31    pos: usize,
32    data: FontData<'a>,
33}
34
35// we reuse a single buffer for all tables, but it gets padded with a u16
36// to accurately represent format-1 tables
37const ARR_LEN: usize = FontData::NULL_POOL_SIZE + u16::RAW_BYTE_LEN;
38
39/// This is [0, 1] ('1' in u16be) followed by NULL_POOL_SIZE zeros.
40///
41/// - this same array is reused both for format-1 tables (which need a leading 1)
42///   as well as all other tables, which don't.
43static EMPTY_TABLE_BYTES: [u8; ARR_LEN] = {
44    let mut arr = [0u8; ARR_LEN];
45    arr[1] = 1;
46    arr
47};
48
49impl FontData<'static> {
50    // https://github.com/harfbuzz/harfbuzz/blob/aba63bb5/src/hb-null.hh#L40
51    /// The number of bytes required to represent the largest table we have.
52    ///
53    /// This is checked by an assert at compile time, and can be increased as needed.
54    const NULL_POOL_SIZE: usize = 262;
55
56    /// Return all zeroes suitable for the default impl of a table.
57    pub(crate) fn default_table_data() -> Self {
58        FontData::new(&EMPTY_TABLE_BYTES[2..])
59    }
60
61    /// Return a [0x0, 0x01] byte pair (u16be) and then all zeros, to represent
62    /// the default impl of a format 1 table with u16 format.
63    pub(crate) fn default_format_1_u16_table_data() -> Self {
64        FontData::new(&EMPTY_TABLE_BYTES)
65    }
66
67    /// Return a single 0x01 and then all zeros, to represent the default impl
68    /// of a format 1 table with u8 format.
69    pub(crate) fn default_format_1_u8_table_data() -> Self {
70        FontData::new(&EMPTY_TABLE_BYTES[1..])
71    }
72
73    /// Return `true` if our default data can represent a table `n_bytes` long
74    pub(crate) const fn default_data_long_enough(n_bytes: usize) -> bool {
75        n_bytes <= Self::NULL_POOL_SIZE
76    }
77}
78
79impl<'a> FontData<'a> {
80    /// Empty data, useful for some tests and examples
81    pub const EMPTY: FontData<'static> = FontData { bytes: &[] };
82
83    /// Create a new `FontData` with these bytes.
84    ///
85    /// You generally don't need to do this? It is handled for you when loading
86    /// data from disk, but may be useful in tests.
87    pub const fn new(bytes: &'a [u8]) -> Self {
88        FontData { bytes }
89    }
90
91    /// The length of the data, in bytes
92    pub fn len(&self) -> usize {
93        self.bytes.len()
94    }
95
96    /// `true` if the data has a length of zero bytes.
97    pub fn is_empty(&self) -> bool {
98        self.bytes.is_empty()
99    }
100
101    /// Returns self[pos..]
102    pub fn split_off(&self, pos: usize) -> Option<FontData<'a>> {
103        self.bytes.get(pos..).map(|bytes| FontData { bytes })
104    }
105
106    /// returns self[..pos], and updates self to = self[pos..];
107    pub fn take_up_to(&mut self, pos: usize) -> Option<FontData<'a>> {
108        if pos > self.len() {
109            return None;
110        }
111        let (head, tail) = self.bytes.split_at(pos);
112        self.bytes = tail;
113        Some(FontData { bytes: head })
114    }
115
116    pub fn slice(&self, range: impl RangeBounds<usize>) -> Option<FontData<'a>> {
117        let bounds = (range.start_bound().cloned(), range.end_bound().cloned());
118        self.bytes.get(bounds).map(|bytes| FontData { bytes })
119    }
120
121    /// Read a scalar at the provided location in the data.
122    pub fn read_at<T: Scalar>(&self, offset: usize) -> Result<T, ReadError> {
123        let end = offset
124            .checked_add(T::RAW_BYTE_LEN)
125            .ok_or(ReadError::OutOfBounds)?;
126        self.bytes
127            .get(offset..end)
128            .and_then(T::read)
129            .ok_or(ReadError::OutOfBounds)
130    }
131
132    /// Read a big-endian value at the provided location in the data.
133    pub fn read_be_at<T: Scalar>(&self, offset: usize) -> Result<BigEndian<T>, ReadError> {
134        let end = offset
135            .checked_add(T::RAW_BYTE_LEN)
136            .ok_or(ReadError::OutOfBounds)?;
137        self.bytes
138            .get(offset..end)
139            .and_then(BigEndian::from_slice)
140            .ok_or(ReadError::OutOfBounds)
141    }
142
143    pub fn read_with_args<T>(&self, range: Range<usize>, args: &T::Args) -> Result<T, ReadError>
144    where
145        T: FontReadWithArgs<'a>,
146    {
147        self.slice(range)
148            .ok_or(ReadError::OutOfBounds)
149            .and_then(|data| T::read_with_args(data, args))
150    }
151
152    fn check_in_bounds(&self, offset: usize) -> Result<(), ReadError> {
153        self.bytes
154            .get(..offset)
155            .ok_or(ReadError::OutOfBounds)
156            .map(|_| ())
157    }
158
159    /// Interpret the bytes at the provided offset as a reference to `T`.
160    ///
161    /// Returns an error if the slice `offset..` is shorter than `T::RAW_BYTE_LEN`.
162    ///
163    /// This is a wrapper around [`read_ref_unchecked`][], which panics if
164    /// the type does not uphold the required invariants.
165    ///
166    /// # Panics
167    ///
168    /// This function will panic if `T` is zero-sized, has an alignment
169    /// other than one, or has any internal padding.
170    ///
171    /// [`read_ref_unchecked`]: [Self::read_ref_unchecked]
172    pub fn read_ref_at<T: AnyBitPattern + FixedSize>(
173        &self,
174        offset: usize,
175    ) -> Result<&'a T, ReadError> {
176        let end = offset
177            .checked_add(T::RAW_BYTE_LEN)
178            .ok_or(ReadError::OutOfBounds)?;
179        self.bytes
180            .get(offset..end)
181            .ok_or(ReadError::OutOfBounds)
182            .map(bytemuck::from_bytes)
183    }
184
185    /// Interpret the bytes at the provided offset as a slice of `T`.
186    ///
187    /// Returns an error if `range` is out of bounds for the underlying data,
188    /// or if the length of the range is not a multiple of `T::RAW_BYTE_LEN`.
189    ///
190    /// This is a wrapper around [`read_array_unchecked`][], which panics if
191    /// the type does not uphold the required invariants.
192    ///
193    /// # Panics
194    ///
195    /// This function will panic if `T` is zero-sized, has an alignment
196    /// other than one, or has any internal padding.
197    ///
198    /// [`read_array_unchecked`]: [Self::read_array_unchecked]
199    pub fn read_array<T: AnyBitPattern + FixedSize>(
200        &self,
201        range: Range<usize>,
202    ) -> Result<&'a [T], ReadError> {
203        let bytes = self
204            .bytes
205            .get(range.clone())
206            .ok_or(ReadError::OutOfBounds)?;
207        if bytes
208            .len()
209            .checked_rem(std::mem::size_of::<T>())
210            .unwrap_or(1) // definitely != 0
211            != 0
212        {
213            return Err(ReadError::InvalidArrayLen);
214        };
215        Ok(bytemuck::cast_slice(bytes))
216    }
217
218    pub(crate) fn cursor(&self) -> Cursor<'a> {
219        Cursor {
220            pos: 0,
221            data: *self,
222        }
223    }
224
225    /// Return the data as a byte slice
226    pub fn as_bytes(&self) -> &'a [u8] {
227        self.bytes
228    }
229}
230
231impl<'a> Cursor<'a> {
232    pub(crate) fn advance<T: Scalar>(&mut self) {
233        self.pos = self.pos.saturating_add(T::RAW_BYTE_LEN);
234    }
235
236    pub(crate) fn advance_by(&mut self, n_bytes: usize) {
237        self.pos = self.pos.saturating_add(n_bytes);
238    }
239
240    /// Read a variable length u32 and advance the cursor
241    pub(crate) fn read_u32_var(&mut self) -> Result<u32, ReadError> {
242        let mut next = || self.read::<u8>().map(|v| v as u32);
243        let b0 = next()?;
244        // TODO this feels possible to simplify, e.g. compute length, loop taking one and shifting and or'ing
245        #[allow(clippy::arithmetic_side_effects)] // these are all checked
246        let result = match b0 {
247            _ if b0 < 0x80 => b0,
248            _ if b0 < 0xC0 => ((b0 - 0x80) << 8) | next()?,
249            _ if b0 < 0xE0 => ((b0 - 0xC0) << 16) | (next()? << 8) | next()?,
250            _ if b0 < 0xF0 => ((b0 - 0xE0) << 24) | (next()? << 16) | (next()? << 8) | next()?,
251            _ => {
252                // 0xF0 is a dedicated 5-byte prefix; high bits are carried entirely
253                // by the following 4 bytes.
254                (next()? << 24) | (next()? << 16) | (next()? << 8) | next()?
255            }
256        };
257
258        Ok(result)
259    }
260
261    /// Read a scalar and advance the cursor.
262    pub(crate) fn read<T: Scalar>(&mut self) -> Result<T, ReadError> {
263        let temp = self.data.read_at(self.pos);
264        self.advance::<T>();
265        temp
266    }
267
268    /// Read a big-endian value and advance the cursor.
269    pub(crate) fn read_be<T: Scalar>(&mut self) -> Result<BigEndian<T>, ReadError> {
270        let temp = self.data.read_be_at(self.pos);
271        self.advance::<T>();
272        temp
273    }
274
275    pub(crate) fn read_with_args<T>(&mut self, args: &T::Args) -> Result<T, ReadError>
276    where
277        T: FontReadWithArgs<'a> + ComputeSize,
278    {
279        let len = T::compute_size(args)?;
280        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
281        let temp = self.data.read_with_args(self.pos..range_end, args);
282        self.advance_by(len);
283        temp
284    }
285
286    // only used in records that contain arrays :/
287    pub(crate) fn read_computed_array<T>(
288        &mut self,
289        len: usize,
290        args: &T::Args,
291    ) -> Result<ComputedArray<'a, T>, ReadError>
292    where
293        T: FontReadWithArgs<'a> + ComputeSize,
294    {
295        let len = len
296            .checked_mul(T::compute_size(args)?)
297            .ok_or(ReadError::OutOfBounds)?;
298        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
299        let temp = self.data.read_with_args(self.pos..range_end, args);
300        self.advance_by(len);
301        temp
302    }
303
304    pub(crate) fn read_array<T: AnyBitPattern + FixedSize>(
305        &mut self,
306        n_elem: usize,
307    ) -> Result<&'a [T], ReadError> {
308        let len = n_elem
309            .checked_mul(T::RAW_BYTE_LEN)
310            .ok_or(ReadError::OutOfBounds)?;
311        let end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
312        let temp = self.data.read_array(self.pos..end);
313        self.advance_by(len);
314        temp
315    }
316
317    /// return the current position, or an error if we are out of bounds
318    pub(crate) fn position(&self) -> Result<usize, ReadError> {
319        self.data.check_in_bounds(self.pos).map(|_| self.pos)
320    }
321
322    // used when handling fields with an implicit length, which must be at the
323    // end of a table.
324    pub(crate) fn remaining_bytes(&self) -> usize {
325        self.data.len().saturating_sub(self.pos)
326    }
327
328    pub(crate) fn remaining(self) -> Option<FontData<'a>> {
329        self.data.split_off(self.pos)
330    }
331
332    pub fn is_empty(&self) -> bool {
333        self.pos >= self.data.len()
334    }
335}
336
337// useful so we can have offsets that are just to data
338impl<'a> FontRead<'a> for FontData<'a> {
339    fn read(data: FontData<'a>) -> Result<Self, ReadError> {
340        Ok(data)
341    }
342}
343
344impl AsRef<[u8]> for FontData<'_> {
345    fn as_ref(&self) -> &[u8] {
346        self.bytes
347    }
348}
349
350impl<'a> From<&'a [u8]> for FontData<'a> {
351    fn from(src: &'a [u8]) -> FontData<'a> {
352        FontData::new(src)
353    }
354}
355
356//kind of ugly, but makes FontData work with FontBuilder. If FontBuilder stops using
357//Cow in its API, we can probably get rid of this?
358#[cfg(feature = "std")]
359impl<'a> From<FontData<'a>> for std::borrow::Cow<'a, [u8]> {
360    fn from(src: FontData<'a>) -> Self {
361        src.bytes.into()
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368    #[test]
369    fn how_does_big_endian_work_again() {
370        let data = FontData::default_format_1_u16_table_data();
371        assert_eq!(data.read_at(0), Ok(1u16));
372
373        assert_eq!(
374            FontData::default_format_1_u8_table_data().read_at(0),
375            Ok(1u8)
376        );
377    }
378}