Skip to main content

read_fonts/
font_data.rs

1//! raw font bytes
2
3#![deny(clippy::arithmetic_side_effects)]
4use std::ops::{Range, RangeBounds};
5
6use bytemuck::AnyBitPattern;
7use types::{BigEndian, FixedSize, Scalar};
8
9use crate::array::ComputedArray;
10use crate::read::{ComputeSize, FontReadWithArgs, ReadError};
11use crate::FontRead;
12
13/// A reference to raw binary font data.
14///
15/// This is a wrapper around a byte slice, that provides convenience methods
16/// for parsing and validating that data.
17#[derive(Debug, Default, Clone, Copy)]
18pub struct FontData<'a> {
19    bytes: &'a [u8],
20}
21
22/// A cursor for validating bytes during parsing.
23///
24/// This type improves the ergonomics of validation blah blah
25///
26/// # Note
27///
28/// call `finish` when you're done to ensure you're in bounds
29#[derive(Debug, Default, Clone, Copy)]
30pub struct Cursor<'a> {
31    pos: usize,
32    data: FontData<'a>,
33}
34
35// we reuse a single buffer for all tables, but it gets padded with a u16
36// to accurately represent format-1 tables
37const ARR_LEN: usize = FontData::NULL_POOL_SIZE + u16::RAW_BYTE_LEN;
38
39/// This is [0, 1] ('1' in u16be) followed by NULL_POOL_SIZE zeros.
40///
41/// - this same array is reused both for format-1 tables (which need a leading 1)
42///   as well as all other tables, which don't.
43static EMPTY_TABLE_BYTES: [u8; ARR_LEN] = {
44    let mut arr = [0u8; ARR_LEN];
45    arr[1] = 1;
46    arr
47};
48
49impl FontData<'static> {
50    // https://github.com/harfbuzz/harfbuzz/blob/aba63bb5/src/hb-null.hh#L40
51    /// The number of bytes required to represent the largest table we have.
52    ///
53    /// This is checked by an assert at compile time, and can be increased as needed.
54    const NULL_POOL_SIZE: usize = 262;
55
56    /// Return all zeroes suitable for the default impl of a table.
57    pub(crate) fn default_table_data() -> Self {
58        FontData::new(&EMPTY_TABLE_BYTES[2..])
59    }
60
61    /// Return a [0x0, 0x01] byte pair (u16be) and then all zeros, to represent
62    /// the default impl of a format 1 table with u16 format.
63    pub(crate) fn default_format_1_u16_table_data() -> Self {
64        FontData::new(&EMPTY_TABLE_BYTES)
65    }
66
67    /// Return a single 0x01 and then all zeros, to represent the default impl
68    /// of a format 1 table with u8 format.
69    pub(crate) fn default_format_1_u8_table_data() -> Self {
70        FontData::new(&EMPTY_TABLE_BYTES[1..])
71    }
72
73    /// Return `true` if our default data can represent a table `n_bytes` long
74    ///
75    /// This is used in codegen'd asserts
76    // only used to evaluate anonymous const items (const_: () = ...) which isn't
77    // visible to the dead_code lint
78    #[expect(dead_code)]
79    pub(crate) const fn default_data_long_enough(n_bytes: usize) -> bool {
80        n_bytes <= Self::NULL_POOL_SIZE
81    }
82}
83
84impl<'a> FontData<'a> {
85    /// Empty data, useful for some tests and examples
86    pub const EMPTY: FontData<'static> = FontData { bytes: &[] };
87
88    /// Create a new `FontData` with these bytes.
89    ///
90    /// You generally don't need to do this? It is handled for you when loading
91    /// data from disk, but may be useful in tests.
92    pub const fn new(bytes: &'a [u8]) -> Self {
93        FontData { bytes }
94    }
95
96    /// The length of the data, in bytes
97    pub fn len(&self) -> usize {
98        self.bytes.len()
99    }
100
101    /// `true` if the data has a length of zero bytes.
102    pub fn is_empty(&self) -> bool {
103        self.bytes.is_empty()
104    }
105
106    /// Returns self[pos..]
107    pub fn split_off(&self, pos: usize) -> Option<FontData<'a>> {
108        self.bytes.get(pos..).map(|bytes| FontData { bytes })
109    }
110
111    /// returns self[..pos], and updates self to = self[pos..];
112    pub fn take_up_to(&mut self, pos: usize) -> Option<FontData<'a>> {
113        if pos > self.len() {
114            return None;
115        }
116        let (head, tail) = self.bytes.split_at(pos);
117        self.bytes = tail;
118        Some(FontData { bytes: head })
119    }
120
121    pub fn slice(&self, range: impl RangeBounds<usize>) -> Option<FontData<'a>> {
122        let bounds = (range.start_bound().cloned(), range.end_bound().cloned());
123        self.bytes.get(bounds).map(|bytes| FontData { bytes })
124    }
125
126    /// Read a scalar at the provided location in the data.
127    pub fn read_at<T: Scalar>(&self, offset: usize) -> Result<T, ReadError> {
128        let end = offset
129            .checked_add(T::RAW_BYTE_LEN)
130            .ok_or(ReadError::OutOfBounds)?;
131        self.bytes
132            .get(offset..end)
133            .and_then(T::read)
134            .ok_or(ReadError::OutOfBounds)
135    }
136
137    /// Read a big-endian value at the provided location in the data.
138    pub fn read_be_at<T: Scalar>(&self, offset: usize) -> Result<BigEndian<T>, ReadError> {
139        let end = offset
140            .checked_add(T::RAW_BYTE_LEN)
141            .ok_or(ReadError::OutOfBounds)?;
142        self.bytes
143            .get(offset..end)
144            .and_then(BigEndian::from_slice)
145            .ok_or(ReadError::OutOfBounds)
146    }
147
148    pub fn read_with_args<T>(&self, range: Range<usize>, args: &T::Args) -> Result<T, ReadError>
149    where
150        T: FontReadWithArgs<'a>,
151    {
152        self.slice(range)
153            .ok_or(ReadError::OutOfBounds)
154            .and_then(|data| T::read_with_args(data, args))
155    }
156
157    fn check_in_bounds(&self, offset: usize) -> Result<(), ReadError> {
158        self.bytes
159            .get(..offset)
160            .ok_or(ReadError::OutOfBounds)
161            .map(|_| ())
162    }
163
164    /// Interpret the bytes at the provided offset as a reference to `T`.
165    ///
166    /// Returns an error if the slice `offset..` is shorter than `T::RAW_BYTE_LEN`.
167    ///
168    /// This is a wrapper around [`read_ref_unchecked`][], which panics if
169    /// the type does not uphold the required invariants.
170    ///
171    /// # Panics
172    ///
173    /// This function will panic if `T` is zero-sized, has an alignment
174    /// other than one, or has any internal padding.
175    ///
176    /// [`read_ref_unchecked`]: [Self::read_ref_unchecked]
177    pub fn read_ref_at<T: AnyBitPattern + FixedSize>(
178        &self,
179        offset: usize,
180    ) -> Result<&'a T, ReadError> {
181        let end = offset
182            .checked_add(T::RAW_BYTE_LEN)
183            .ok_or(ReadError::OutOfBounds)?;
184        self.bytes
185            .get(offset..end)
186            .ok_or(ReadError::OutOfBounds)
187            .map(bytemuck::from_bytes)
188    }
189
190    /// Interpret the bytes at the provided offset as a slice of `T`.
191    ///
192    /// Returns an error if `range` is out of bounds for the underlying data,
193    /// or if the length of the range is not a multiple of `T::RAW_BYTE_LEN`.
194    ///
195    /// This is a wrapper around [`read_array_unchecked`][], which panics if
196    /// the type does not uphold the required invariants.
197    ///
198    /// # Panics
199    ///
200    /// This function will panic if `T` is zero-sized, has an alignment
201    /// other than one, or has any internal padding.
202    ///
203    /// [`read_array_unchecked`]: [Self::read_array_unchecked]
204    pub fn read_array<T: AnyBitPattern + FixedSize>(
205        &self,
206        range: Range<usize>,
207    ) -> Result<&'a [T], ReadError> {
208        let bytes = self
209            .bytes
210            .get(range.clone())
211            .ok_or(ReadError::OutOfBounds)?;
212        if bytes
213            .len()
214            .checked_rem(std::mem::size_of::<T>())
215            .unwrap_or(1) // definitely != 0
216            != 0
217        {
218            return Err(ReadError::InvalidArrayLen);
219        };
220        Ok(bytemuck::cast_slice(bytes))
221    }
222
223    pub(crate) fn cursor(&self) -> Cursor<'a> {
224        Cursor {
225            pos: 0,
226            data: *self,
227        }
228    }
229
230    /// Return the data as a byte slice
231    pub fn as_bytes(&self) -> &'a [u8] {
232        self.bytes
233    }
234}
235
236impl<'a> Cursor<'a> {
237    pub(crate) fn advance<T: Scalar>(&mut self) {
238        self.pos = self.pos.saturating_add(T::RAW_BYTE_LEN);
239    }
240
241    pub(crate) fn advance_by(&mut self, n_bytes: usize) {
242        self.pos = self.pos.saturating_add(n_bytes);
243    }
244
245    /// Read a variable length u32 and advance the cursor
246    pub(crate) fn read_u32_var(&mut self) -> Result<u32, ReadError> {
247        let mut next = || self.read::<u8>().map(|v| v as u32);
248        let b0 = next()?;
249        // TODO this feels possible to simplify, e.g. compute length, loop taking one and shifting and or'ing
250        #[allow(clippy::arithmetic_side_effects)] // these are all checked
251        let result = match b0 {
252            _ if b0 < 0x80 => b0,
253            _ if b0 < 0xC0 => ((b0 - 0x80) << 8) | next()?,
254            _ if b0 < 0xE0 => ((b0 - 0xC0) << 16) | (next()? << 8) | next()?,
255            _ if b0 < 0xF0 => ((b0 - 0xE0) << 24) | (next()? << 16) | (next()? << 8) | next()?,
256            _ => {
257                // 0xF0 is a dedicated 5-byte prefix; high bits are carried entirely
258                // by the following 4 bytes.
259                (next()? << 24) | (next()? << 16) | (next()? << 8) | next()?
260            }
261        };
262
263        Ok(result)
264    }
265
266    /// Read a scalar and advance the cursor.
267    pub(crate) fn read<T: Scalar>(&mut self) -> Result<T, ReadError> {
268        let temp = self.data.read_at(self.pos);
269        self.advance::<T>();
270        temp
271    }
272
273    /// Read a big-endian value and advance the cursor.
274    pub(crate) fn read_be<T: Scalar>(&mut self) -> Result<BigEndian<T>, ReadError> {
275        let temp = self.data.read_be_at(self.pos);
276        self.advance::<T>();
277        temp
278    }
279
280    pub(crate) fn read_with_args<T>(&mut self, args: &T::Args) -> Result<T, ReadError>
281    where
282        T: FontReadWithArgs<'a> + ComputeSize,
283    {
284        let len = T::compute_size(args)?;
285        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
286        let temp = self.data.read_with_args(self.pos..range_end, args);
287        self.advance_by(len);
288        temp
289    }
290
291    // only used in records that contain arrays :/
292    pub(crate) fn read_computed_array<T>(
293        &mut self,
294        len: usize,
295        args: &T::Args,
296    ) -> Result<ComputedArray<'a, T>, ReadError>
297    where
298        T: FontReadWithArgs<'a> + ComputeSize,
299    {
300        let len = len
301            .checked_mul(T::compute_size(args)?)
302            .ok_or(ReadError::OutOfBounds)?;
303        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
304        let temp = self.data.read_with_args(self.pos..range_end, args);
305        self.advance_by(len);
306        temp
307    }
308
309    pub(crate) fn read_array<T: AnyBitPattern + FixedSize>(
310        &mut self,
311        n_elem: usize,
312    ) -> Result<&'a [T], ReadError> {
313        let len = n_elem
314            .checked_mul(T::RAW_BYTE_LEN)
315            .ok_or(ReadError::OutOfBounds)?;
316        let end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
317        let temp = self.data.read_array(self.pos..end);
318        self.advance_by(len);
319        temp
320    }
321
322    /// return the current position, or an error if we are out of bounds
323    pub(crate) fn position(&self) -> Result<usize, ReadError> {
324        self.data.check_in_bounds(self.pos).map(|_| self.pos)
325    }
326
327    // used when handling fields with an implicit length, which must be at the
328    // end of a table.
329    pub(crate) fn remaining_bytes(&self) -> usize {
330        self.data.len().saturating_sub(self.pos)
331    }
332
333    pub(crate) fn remaining(self) -> Option<FontData<'a>> {
334        self.data.split_off(self.pos)
335    }
336
337    pub fn is_empty(&self) -> bool {
338        self.pos >= self.data.len()
339    }
340}
341
342// useful so we can have offsets that are just to data
343impl<'a> FontRead<'a> for FontData<'a> {
344    fn read(data: FontData<'a>) -> Result<Self, ReadError> {
345        Ok(data)
346    }
347}
348
349impl AsRef<[u8]> for FontData<'_> {
350    fn as_ref(&self) -> &[u8] {
351        self.bytes
352    }
353}
354
355impl<'a> From<&'a [u8]> for FontData<'a> {
356    fn from(src: &'a [u8]) -> FontData<'a> {
357        FontData::new(src)
358    }
359}
360
361//kind of ugly, but makes FontData work with FontBuilder. If FontBuilder stops using
362//Cow in its API, we can probably get rid of this?
363#[cfg(feature = "std")]
364impl<'a> From<FontData<'a>> for std::borrow::Cow<'a, [u8]> {
365    fn from(src: FontData<'a>) -> Self {
366        src.bytes.into()
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    #[test]
374    fn how_does_big_endian_work_again() {
375        let data = FontData::default_format_1_u16_table_data();
376        assert_eq!(data.read_at(0), Ok(1u16));
377
378        assert_eq!(
379            FontData::default_format_1_u8_table_data().read_at(0),
380            Ok(1u8)
381        );
382    }
383}