fontconfig_cache_parser/
lib.rs

1#![deny(missing_docs)]
2
3//! A crate for parsing fontconfig cache files.
4//!
5//! The fontconfig cache format is a C-style binary format, containing a maze of twisty structs all alike,
6//! with lots of pointers from one to another. This makes it pretty inefficient to parse the whole file,
7//! especially if you're only interested in a few parts. The expected workflow of this crate is:
8//!
9//! 1. You read the cache file into memory (possibly using `mmap` if the file is large and performance is important).
10//! 2. You construct a [`Cache`](crate::Cache::from_bytes), borrowing the memory chunk.
11//! 3. You follow the various methods on `Cache` to get access to the information you want.
12//!    As you follow those methods, the data will be read incrementally from the memory chunk you
13//!    created in part 1.
14
15use bytemuck::AnyBitPattern;
16use std::os::raw::{c_int, c_uint};
17
18pub mod data;
19pub mod ptr;
20
21use data::{
22    CacheData, CharSetData, FontSetData, PatternData, PatternEltData, ValueData, ValueListData,
23};
24use ptr::{Array, Ptr};
25
26type Result<T> = std::result::Result<T, Error>;
27
28/// A dynamically typed value.
29///
30/// This is a wrapper around fontconfig's `FcValue` type.
31#[allow(missing_docs)]
32#[derive(Clone, Debug)]
33pub enum Value<'buf> {
34    Unknown,
35    Void,
36    Int(c_int),
37    Float(f64),
38    String(Ptr<'buf, u8>),
39    Bool(c_int),
40    /// Not yet supported
41    Matrix(Ptr<'buf, ()>),
42    CharSet(CharSet<'buf>),
43    /// Not yet supported
44    FtFace(Ptr<'buf, ()>),
45    /// Not yet supported
46    LangSet(Ptr<'buf, ()>),
47    /// Not yet supported
48    Range(Ptr<'buf, ()>),
49}
50
51impl<'buf> Ptr<'buf, ValueData> {
52    /// Converts the raw C representation to an enum.
53    pub fn to_value(&self) -> Result<Value<'buf>> {
54        use crate::Value::*;
55        let payload = self.deref()?;
56
57        unsafe {
58            Ok(match payload.ty {
59                -1 => Unknown,
60                0 => Void,
61                1 => Int(payload.val.i),
62                2 => Float(payload.val.d),
63                3 => String(self.relative_offset(payload.val.s)?),
64                4 => Bool(payload.val.b),
65                5 => Matrix(self.relative_offset(payload.val.m)?),
66                6 => CharSet(crate::CharSet(self.relative_offset(payload.val.c)?)),
67                7 => FtFace(self.relative_offset(payload.val.f)?),
68                8 => LangSet(self.relative_offset(payload.val.l)?),
69                9 => Range(self.relative_offset(payload.val.r)?),
70                _ => return Err(Error::InvalidEnumTag(payload.ty)),
71            })
72        }
73    }
74}
75
76/// All the different object types supported by fontconfig.
77///
78/// (We currently only actually handle a few of these.)
79#[repr(C)]
80#[derive(Copy, Clone, Debug)]
81#[allow(missing_docs)]
82#[non_exhaustive]
83pub enum Object {
84    Invalid = 0,
85    Family,
86    FamilyLang,
87    Style,
88    StyleLang,
89    FullName,
90    FullNameLang,
91    Slant,
92    Weight,
93    Width,
94    Size,
95    Aspect,
96    PixelSize,
97    Spacing,
98    Foundry,
99    AntiAlias,
100    HintStyle,
101    Hinting,
102    VerticalLayout,
103    AutoHint,
104    GlobalAdvance,
105    File,
106    Index,
107    Rasterizer,
108    Outline,
109    Scalable,
110    Dpi,
111    Rgba,
112    Scale,
113    MinSpace,
114    CharWidth,
115    CharHeight,
116    Matrix,
117    CharSet,
118    Lang,
119    FontVersion,
120    Capability,
121    FontFormat,
122    Embolden,
123    EmbeddedBitmap,
124    Decorative,
125    LcdFilter,
126    NameLang,
127    FontFeatures,
128    PrgName,
129    Hash,
130    PostscriptName,
131    Color,
132    Symbol,
133    FontVariations,
134    Variable,
135    FontHasHint,
136    Order,
137    DesktopName,
138    NamedInstance,
139    FontWrapper,
140}
141
142const MAX_OBJECT: c_int = Object::FontWrapper as c_int;
143
144impl TryFrom<c_int> for Object {
145    type Error = Error;
146
147    fn try_from(value: c_int) -> Result<Self> {
148        if value <= MAX_OBJECT {
149            Ok(unsafe { std::mem::transmute(value) })
150        } else {
151            Err(Error::InvalidObjectTag(value))
152        }
153    }
154}
155
156/// A linked list of [`Value`]s.
157#[derive(Clone, Debug)]
158struct ValueList<'buf>(pub Ptr<'buf, ValueListData>);
159
160impl<'buf> ValueList<'buf> {
161    fn value(&self) -> Result<Value<'buf>> {
162        self.0
163            .relative_offset(ptr::offset(
164                std::mem::size_of_val(&self.0.deref()?.next) as isize
165            ))
166            .and_then(|val_ptr| val_ptr.to_value())
167    }
168}
169
170/// An iterator over [`Value`]s.
171#[derive(Clone, Debug)]
172struct ValueListIter<'buf> {
173    next: Option<Result<ValueList<'buf>>>,
174}
175
176impl<'buf> Iterator for ValueListIter<'buf> {
177    type Item = Result<Value<'buf>>;
178
179    fn next(&mut self) -> Option<Self::Item> {
180        let next = self.next.take();
181        if let Some(Ok(next)) = next {
182            match next.0.deref() {
183                Ok(next_payload) => {
184                    if next_payload.next.0 == 0 {
185                        self.next = None;
186                    } else {
187                        self.next = Some(next.0.relative_offset(next_payload.next).map(ValueList));
188                    }
189                }
190                Err(e) => {
191                    self.next = Some(Err(e));
192                }
193            }
194            Some(next.value())
195        } else if let Some(Err(e)) = next {
196            Some(Err(e))
197        } else {
198            None
199        }
200    }
201}
202
203/// A list of properties, each one associated with a range of values.
204#[derive(Clone, Debug)]
205pub struct Pattern<'buf>(pub Ptr<'buf, PatternData>);
206
207impl Pattern<'_> {
208    /// Returns an iterator over the elements in this pattern.
209    pub fn elts(&self) -> Result<impl Iterator<Item = PatternElt> + '_> {
210        let payload = self.0.deref()?;
211        let elts = self.0.relative_offset(payload.elts_offset)?;
212        Ok(elts.array(payload.num)?.map(PatternElt))
213    }
214
215    /// The serialized pattern data, straight from the fontconfig cache.
216    pub fn data(&self) -> Result<PatternData> {
217        self.0.deref()
218    }
219}
220
221/// A single element of a [`Pattern`].
222///
223/// This consists of an [`Object`] type, and a range of values. By convention,
224/// the values are all of the same [`Value`] variant (of a type determined by the object
225/// tag), but this is not actually enforced.
226pub struct PatternElt<'buf>(pub Ptr<'buf, PatternEltData>);
227
228impl<'buf> PatternElt<'buf> {
229    /// An iterator over the values in this `PatternElt`.
230    pub fn values(&self) -> Result<impl Iterator<Item = Result<Value<'buf>>> + 'buf> {
231        Ok(ValueListIter {
232            next: Some(Ok(ValueList(
233                self.0.relative_offset(self.0.deref()?.values)?,
234            ))),
235        })
236    }
237
238    /// The object tag, describing the font property that this `PatternElt` represents.
239    pub fn object(&self) -> Result<Object> {
240        self.0.deref()?.object.try_into()
241    }
242
243    /// The serialized pattern elt data, straight from the fontconfig cache.
244    pub fn data(&self) -> Result<PatternEltData> {
245        self.0.deref()
246    }
247}
248
249/// A set of fonts.
250#[derive(Clone, Debug)]
251pub struct FontSet<'buf>(pub Ptr<'buf, FontSetData>);
252
253impl<'buf> FontSet<'buf> {
254    /// Returns an iterator over the fonts in this set.
255    pub fn fonts<'a>(&'a self) -> Result<impl Iterator<Item = Result<Pattern<'buf>>> + 'a> {
256        let payload = self.0.deref()?;
257        let fonts = self
258            .0
259            .relative_offset(payload.fonts)?
260            .array(payload.nfont)?;
261        let me = self.clone();
262        Ok(fonts.map(move |font_offset| Ok(Pattern(me.0.relative_offset(font_offset.deref()?)?))))
263    }
264
265    /// The serialized font set data, straight from the fontconfig cache.
266    pub fn data(&self) -> Result<FontSetData> {
267        self.0.deref()
268    }
269}
270
271/// A set of code points.
272#[derive(Clone, Debug)]
273pub struct CharSet<'buf>(pub Ptr<'buf, CharSetData>);
274
275impl<'buf> CharSet<'buf> {
276    /// Returns an iterator over the leaf bitsets.
277    pub fn leaves(&self) -> Result<impl Iterator<Item = Result<CharSetLeaf>> + 'buf> {
278        let payload = self.0.deref()?;
279        let leaf_array = self.0.relative_offset(payload.leaves)?;
280        Ok(leaf_array.array(payload.num)?.map(move |leaf_offset| {
281            leaf_array
282                .relative_offset(leaf_offset.deref()?)
283                .and_then(|leaf_ptr| leaf_ptr.deref())
284        }))
285    }
286
287    /// Returns an iterator over the 16-bit leaf offsets.
288    pub fn numbers(&self) -> Result<Array<'buf, u16>> {
289        let payload = self.0.deref()?;
290        self.0.relative_offset(payload.numbers)?.array(payload.num)
291    }
292
293    /// Creates an iterator over the codepoints in this charset.
294    pub fn chars(&self) -> Result<impl Iterator<Item = Result<u32>> + 'buf> {
295        // TODO: this iterator-mangling is super-grungy and shouldn't allocate.
296        // This would be super easy to write using generators; the main issue is that
297        // the early-return-on-decode errors make the control flow tricky to express
298        // with combinators and closures.
299        fn transpose_result_iter<T: 'static, I: Iterator<Item = T> + 'static>(
300            res: Result<I>,
301        ) -> impl Iterator<Item = Result<T>> {
302            match res {
303                Ok(iter) => Box::new(iter.map(|x| Ok(x))) as Box<dyn Iterator<Item = Result<T>>>,
304                Err(e) => Box::new(Some(Err(e)).into_iter()) as Box<dyn Iterator<Item = Result<T>>>,
305            }
306        }
307
308        let leaves = self.leaves()?;
309        let numbers = self.numbers()?;
310        Ok(leaves.zip(numbers).flat_map(|(leaf, number)| {
311            let iter = (move || {
312                let number = (number.deref()? as u32) << 8;
313                Ok(leaf?.iter().map(move |x| x as u32 + number))
314            })();
315            transpose_result_iter(iter)
316        }))
317    }
318
319    /// The `CharSetLeaf` at the given index, if there is one.
320    pub fn leaf_at(&self, idx: usize) -> Result<Option<CharSetLeaf>> {
321        let payload = self.0.deref()?;
322        let leaf_array = self.0.relative_offset(payload.leaves)?;
323        leaf_array
324            .array(payload.num)?
325            .get(idx)
326            .map(|ptr| {
327                leaf_array
328                    .relative_offset(ptr.deref()?)
329                    .and_then(|leaf_ptr| leaf_ptr.deref())
330            })
331            .transpose()
332    }
333
334    /// Checks whether this charset contains a given codepoint.
335    pub fn contains(&self, ch: u32) -> Result<bool> {
336        let hi = ((ch >> 8) & 0xffff) as u16;
337        let lo = (ch & 0xff) as u8;
338        match self.numbers()?.as_slice()?.binary_search(&hi) {
339            // The unwrap will succeed because numbers and leaves have the same length.
340            Ok(idx) => Ok(self.leaf_at(idx)?.unwrap().contains_byte(lo)),
341            Err(_) => Ok(false),
342        }
343    }
344}
345
346/// A set of bytes, represented as a bitset.
347#[derive(AnyBitPattern, Copy, Clone, Debug)]
348#[repr(C)]
349pub struct CharSetLeaf {
350    /// The bits in the set, all 256 of them.
351    pub map: [u32; 8],
352}
353
354impl CharSetLeaf {
355    /// Checks whether this set contains the given byte.
356    pub fn contains_byte(&self, byte: u8) -> bool {
357        let map_idx = (byte >> 5) as usize;
358        let bit_idx = (byte & 0x1f) as u32;
359
360        (self.map[map_idx] >> bit_idx) & 1 != 0
361    }
362
363    /// Creates an iterator over bits in this set.
364    pub fn iter(self) -> CharSetLeafIter {
365        CharSetLeafIter {
366            leaf: self,
367            map_idx: 0,
368        }
369    }
370}
371
372impl IntoIterator for CharSetLeaf {
373    type Item = u8;
374    type IntoIter = CharSetLeafIter;
375    fn into_iter(self) -> CharSetLeafIter {
376        self.iter()
377    }
378}
379
380/// An iterator over bits in a [`CharSetLeaf`](crate::CharSetLeaf),
381/// created by [`CharSetLeaf::iter`](crate::CharSetLeaf::iter).
382#[derive(Clone, Debug)]
383pub struct CharSetLeafIter {
384    leaf: CharSetLeaf,
385    map_idx: u8,
386}
387
388impl Iterator for CharSetLeafIter {
389    type Item = u8;
390
391    fn next(&mut self) -> Option<u8> {
392        let len = self.leaf.map.len() as u8;
393        if self.map_idx >= len {
394            None
395        } else {
396            let bits = &mut self.leaf.map[self.map_idx as usize];
397            if *bits != 0 {
398                let ret = bits.trailing_zeros() as u8;
399                *bits &= !(1 << ret);
400                Some(ret + (self.map_idx << 5))
401            } else {
402                while self.map_idx < len && self.leaf.map[self.map_idx as usize] == 0 {
403                    self.map_idx += 1;
404                }
405                self.next()
406            }
407        }
408    }
409}
410
411/// All the possible errors we can encounter when parsing the cache file.
412#[derive(Clone, Debug, thiserror::Error)]
413#[allow(missing_docs)]
414pub enum Error {
415    #[error("Invalid magic number {0:#x}")]
416    BadMagic(c_uint),
417
418    #[error("Unsupported version {0}")]
419    UnsupportedVersion(c_int),
420
421    #[error("Bad pointer {0}")]
422    BadPointer(isize),
423
424    #[error("Bad offset {0}")]
425    BadOffset(isize),
426
427    #[error("Bad alignment (expected {expected_alignment}) for offset {offset}")]
428    BadAlignment {
429        expected_alignment: usize,
430        offset: usize,
431    },
432
433    #[error("Bad length {0}")]
434    BadLength(isize),
435
436    #[error("Invalid enum tag {0}")]
437    InvalidEnumTag(c_int),
438
439    #[error("Invalid object tag {0}")]
440    InvalidObjectTag(c_int),
441
442    #[error("Unterminated string at {0}")]
443    UnterminatedString(isize),
444
445    #[error("Wrong size: header expects {expected} bytes, buffer is {actual} bytes")]
446    WrongSize { expected: isize, actual: isize },
447}
448
449/// The fontconfig cache header.
450#[derive(Clone, Debug)]
451pub struct Cache<'buf>(Ptr<'buf, CacheData>);
452
453impl<'buf> Cache<'buf> {
454    /// Read a cache from a slice of bytes.
455    pub fn from_bytes(buf: &'buf [u8]) -> Result<Self> {
456        use Error::*;
457
458        let len = std::mem::size_of::<CacheData>();
459        if buf.len() < len {
460            Err(WrongSize {
461                expected: len as isize,
462                actual: buf.len() as isize,
463            })
464        } else {
465            let cache: CacheData = bytemuck::try_pod_read_unaligned(&buf[0..len])
466                .expect("but we checked the length...");
467
468            if cache.magic != 4228054020 {
469                Err(BadMagic(cache.magic))
470            // We support versions 7 through 9.
471            // Version 7 was introduced in 2015 (fontconfig git ref ad9f5880, released in fontconfig
472            // 2.11.95)
473            // The 7 -> 8 change didn't affect the cache format (fontconfig git ref 5d84745e,
474            // released in fontconfig 2.13.95)
475            // The 8 -> 9 change affected the format of language data, which we
476            // don't currently support. (fontconfig git ref ce9cbe36, released in fontconfig 2.15.0)
477            } else if cache.version < 7 || cache.version > 9 {
478                Err(UnsupportedVersion(cache.version))
479            } else if cache.size != buf.len() as isize {
480                Err(WrongSize {
481                    expected: cache.size,
482                    actual: buf.len() as isize,
483                })
484            } else {
485                Ok(Cache(Ptr {
486                    buf,
487                    offset: 0,
488                    marker: std::marker::PhantomData,
489                }))
490            }
491        }
492    }
493
494    /// The [`FontSet`](crate::FontSet) stored in this cache.
495    pub fn set(&self) -> Result<FontSet<'buf>> {
496        Ok(FontSet(self.0.relative_offset(self.0.deref()?.set)?))
497    }
498
499    /// The serialized cache data, straight from the fontconfig cache.
500    pub fn data(&self) -> Result<CacheData> {
501        self.0.deref()
502    }
503}