hb_subset/
subset.rs

1use std::{marker::PhantomData, ptr::null_mut};
2
3use crate::{
4    map::Map,
5    set::{CharSet, Set, TagSet, U32Set},
6    sys, AllocationError, FontFace, SubsettingError,
7};
8
9mod flags;
10
11pub use flags::*;
12
13/// A description of how a font should be subset.
14///
15/// Subsetting reduces the codepoint coverage of font files and removes all data that is no longer needed. A subset
16/// input describes the desired subset. The input is provided along with a font to the subsetting operation. Output is a
17/// new font file containing only the data specified in the input.
18///
19/// Currently most outline and bitmap tables are supported: glyf, CFF, CFF2, sbix, COLR, and CBDT/CBLC. This also
20/// includes fonts with variable outlines via OpenType variations. Notably EBDT/EBLC and SVG are not supported. Layout
21/// subsetting is supported only for OpenType Layout tables (GSUB, GPOS, GDEF). Notably subsetting of graphite or AAT
22/// tables is not yet supported.
23///
24/// Fonts with graphite or AAT tables may still be subsetted but will likely need to use the retain glyph ids option and
25/// configure the subset to pass through the layout tables untouched.
26pub struct SubsetInput(*mut sys::hb_subset_input_t);
27
28impl SubsetInput {
29    /// Creates a new subset input object.
30    #[doc(alias = "hb_subset_input_create_or_fail")]
31    pub fn new() -> Result<Self, AllocationError> {
32        let input = unsafe { sys::hb_subset_input_create_or_fail() };
33        if input.is_null() {
34            return Err(AllocationError);
35        }
36        Ok(Self(input))
37    }
38
39    /// Configures input object to keep everything in the font face. That is, all Unicodes, glyphs, names, layout items,
40    /// glyph names, etc.
41    ///
42    /// The input can be tailored afterwards by the caller.
43    #[doc(alias = "hb_subset_input_keep_everything")]
44    pub fn keep_everything(&mut self) {
45        unsafe { sys::hb_subset_input_keep_everything(self.as_raw()) }
46    }
47
48    /// Gets a proxy for modifying flags.
49    ///
50    /// # Example
51    /// ```
52    /// # use hb_subset::*;
53    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
54    /// let mut subset = SubsetInput::new()?;
55    /// subset.flags().retain_glyph_names();
56    /// assert_eq!(*subset.flags(), *Flags::default().retain_glyph_names());
57    ///
58    /// *subset.flags() = Flags::default();
59    /// assert_eq!(*subset.flags(), Flags::default());
60    /// # Ok(())
61    /// # }
62    /// ```
63    #[doc(alias = "hb_subset_input_set_flags")]
64    #[doc(alias = "hb_subset_input_get_flags")]
65    pub fn flags(&mut self) -> FlagRef<'_> {
66        FlagRef(self, Flags(unsafe { sys::hb_subset_input_get_flags(self.as_raw()) }))
67    }
68
69    /// Gets the set of glyph IDs to retain.
70    ///
71    /// The caller should modify the set as needed.
72    #[doc(alias = "hb_subset_input_glyph_set")]
73    #[doc(alias = "hb_subset_input_set")]
74    #[doc(alias = "HB_SUBSET_SETS_GLYPH_INDEX")]
75    pub fn glyph_set(&mut self) -> U32Set<'_> {
76        unsafe {
77            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_glyph_set(self.as_raw())))
78        }
79    }
80
81    /// Gets the set of Unicode codepoints to retain.
82    ///
83    /// The caller should modify the set as needed.
84    #[doc(alias = "hb_subset_input_unicode_set")]
85    #[doc(alias = "hb_subset_input_set")]
86    #[doc(alias = "HB_SUBSET_SETS_UNICODE")]
87    pub fn unicode_set(&mut self) -> CharSet<'_> {
88        unsafe {
89            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_unicode_set(self.as_raw())))
90        }
91    }
92
93    /// Gets the set of table tags which specifies tables that should not be subsetted.
94    ///
95    /// The caller should modify the set as needed.
96    #[doc(alias = "hb_subset_input_set")]
97    #[doc(alias = "HB_SUBSET_SETS_NO_SUBSET_TABLE_TAG")]
98    pub fn no_subset_table_tag_set(&mut self) -> TagSet<'_> {
99        unsafe {
100            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
101                self.as_raw(),
102                sys::hb_subset_sets_t::NO_SUBSET_TABLE_TAG,
103            )))
104        }
105    }
106
107    /// Gets the set of table tags which specifies tables which will be dropped in the subset.
108    ///
109    /// The caller should modify the set as needed.
110    #[doc(alias = "hb_subset_input_set")]
111    #[doc(alias = "HB_SUBSET_SETS_DROP_TABLE_TAG")]
112    pub fn drop_table_tag_set(&mut self) -> TagSet<'_> {
113        unsafe {
114            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
115                self.as_raw(),
116                sys::hb_subset_sets_t::DROP_TABLE_TAG,
117            )))
118        }
119    }
120
121    /// Gets the set of name ids that will be retained.
122    ///
123    /// The caller should modify the set as needed.
124    #[doc(alias = "hb_subset_input_set")]
125    #[doc(alias = "HB_SUBSET_SETS_NAME_ID")]
126    pub fn name_id_set(&mut self) -> U32Set<'_> {
127        unsafe {
128            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
129                self.as_raw(),
130                sys::hb_subset_sets_t::NAME_ID,
131            )))
132        }
133    }
134
135    /// Gets the set of name lang ids that will be retained.
136    ///
137    /// The caller should modify the set as needed.
138    #[doc(alias = "hb_subset_input_set")]
139    #[doc(alias = "HB_SUBSET_SETS_NAME_LANG_ID")]
140    pub fn name_lang_id_set(&mut self) -> U32Set<'_> {
141        unsafe {
142            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
143                self.as_raw(),
144                sys::hb_subset_sets_t::NAME_LANG_ID,
145            )))
146        }
147    }
148
149    /// Gets the set of layout feature tags that will be retained in the subset.
150    ///
151    /// The caller should modify the set as needed.
152    #[doc(alias = "hb_subset_input_set")]
153    #[doc(alias = "HB_SUBSET_SETS_LAYOUT_FEATURE_TAG")]
154    pub fn layout_feature_tag_set(&mut self) -> TagSet<'_> {
155        unsafe {
156            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
157                self.as_raw(),
158                sys::hb_subset_sets_t::LAYOUT_FEATURE_TAG,
159            )))
160        }
161    }
162
163    /// Gets the set of layout script tags that will be retained in the subset.
164    ///
165    /// Defaults to all tags. The caller should modify the set as needed.
166    #[doc(alias = "hb_subset_input_set")]
167    #[doc(alias = "HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG")]
168    pub fn layout_script_tag_set(&mut self) -> TagSet<'_> {
169        unsafe {
170            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
171                self.as_raw(),
172                sys::hb_subset_sets_t::LAYOUT_SCRIPT_TAG,
173            )))
174        }
175    }
176
177    /// Returns a map which can be used to provide an explicit mapping from old to new glyph id's in the produced
178    /// subset. The caller should populate the map as desired. If this map is left empty then glyph ids will be
179    /// automatically mapped to new values by the subsetter. If populated, the mapping must be unique. That is no two
180    /// original glyph ids can be mapped to the same new id. Additionally, if a mapping is provided then the retain gids
181    /// option cannot be enabled.
182    ///
183    /// Any glyphs that are retained in the subset which are not specified in this mapping will be assigned glyph ids
184    /// after the highest glyph id in the mapping.
185    ///
186    /// Note: this will accept and apply non-monotonic mappings, however this may result in unsorted Coverage tables.
187    /// Such fonts may not work for all use cases (for example ots will reject unsorted coverage tables). So it's
188    /// recommended, if possible, to supply a monotonic mapping.
189    #[doc(alias = "hb_subset_input_old_to_new_glyph_mapping")]
190    pub fn old_to_new_glyph_mapping(&mut self) -> Map<'_, u32, u32> {
191        unsafe {
192            Map::from_raw(sys::hb_map_reference(sys::hb_subset_input_old_to_new_glyph_mapping(
193                self.as_raw(),
194            )))
195        }
196    }
197
198    /// Subsets a font according to provided input.
199    #[doc(alias = "hb_subset_or_fail")]
200    pub fn subset_font(&self, font: &FontFace<'_>) -> Result<FontFace<'static>, SubsettingError> {
201        let face = unsafe { sys::hb_subset_or_fail(font.as_raw(), self.as_raw()) };
202        if face.is_null() {
203            return Err(SubsettingError);
204        }
205        Ok(unsafe { FontFace::from_raw(face) })
206    }
207
208    /// Computes a plan for subsetting the supplied face according to a provided input.
209    ///
210    /// The plan describes which tables and glyphs should be retained.
211    #[doc(alias = "hb_subset_plan_create_or_fail")]
212    pub fn plan<'f>(&self, font: &'f FontFace<'_>) -> Result<SubsetPlan<'f, '_>, SubsettingError> {
213        let plan = unsafe { sys::hb_subset_plan_create_or_fail(font.as_raw(), self.as_raw()) };
214        if plan.is_null() {
215            return Err(SubsettingError);
216        }
217        Ok(unsafe { SubsetPlan::from_raw(plan) })
218    }
219}
220
221impl SubsetInput {
222    /// Converts the subset input into raw [`sys::hb_subset_input_t`] pointer.
223    ///
224    /// This method transfers the ownership of the subset input to the caller. It is up to the caller to call
225    /// [`sys::hb_subset_input_destroy`] to free the pointer, or call [`Self::from_raw`] to convert it back into
226    /// [`SubsetInput`].
227    pub fn into_raw(self) -> *mut sys::hb_subset_input_t {
228        let ptr = self.0;
229        std::mem::forget(self);
230        ptr
231    }
232
233    /// Exposes the raw inner pointer without transferring the ownership.
234    ///
235    /// Unlike [`Self::into_raw`], this method does not transfer the ownership of the pointer to the caller.
236    pub fn as_raw(&self) -> *mut sys::hb_subset_input_t {
237        self.0
238    }
239
240    /// Constructs a subset input from raw [`sys::hb_subset_input_t`] pointer.
241    ///
242    /// # Safety
243    /// The given `subset` pointer must either be constructed by some Harfbuzz function, or be returned from
244    /// [`Self::into_raw`].
245    pub unsafe fn from_raw(subset: *mut sys::hb_subset_input_t) -> Self {
246        Self(subset)
247    }
248}
249
250impl Drop for SubsetInput {
251    #[doc(alias = "hb_subset_input_destroy")]
252    fn drop(&mut self) {
253        unsafe { sys::hb_subset_input_destroy(self.0) }
254    }
255}
256
257/// Information about how a subsetting operation will be executed.
258///
259/// This includes e.g. how glyph ids are mapped from the original font to the subset.
260pub struct SubsetPlan<'f, 'b> {
261    plan: *mut sys::hb_subset_plan_t,
262    // The lifetime here is actually referring to the lifetime of SubsetPlan
263    unicode_to_old_glyph_mapping: Map<'static, char, u32>,
264    new_to_old_glyph_mapping: Map<'static, u32, u32>,
265    old_to_new_glyph_mapping: Map<'static, u32, u32>,
266    _font: PhantomData<&'f FontFace<'b>>,
267}
268
269impl<'f, 'b> SubsetPlan<'f, 'b> {
270    /// Executes the subsetting plan.
271    #[doc(alias = "hb_subset_plan_execute_or_fail")]
272    pub fn subset(&self) -> Result<FontFace<'b>, SubsettingError> {
273        let font = unsafe { sys::hb_subset_plan_execute_or_fail(self.as_raw()) };
274        if font.is_null() {
275            return Err(SubsettingError);
276        }
277        Ok(unsafe { FontFace::from_raw(font) })
278    }
279
280    /// Returns the mapping between codepoints in the original font and the associated glyph id in the original font.
281    #[doc(alias = "hb_subset_plan_unicode_to_old_glyph_mapping")]
282    pub fn unicode_to_old_glyph_mapping(&self) -> &'_ Map<'_, char, u32> {
283        &self.unicode_to_old_glyph_mapping
284    }
285
286    /// Returns the mapping between glyphs in the subset that will be produced by plan and the glyph in the original font.
287    #[doc(alias = "hb_subset_plan_new_to_old_glyph_mapping")]
288    pub fn new_to_old_glyph_mapping(&self) -> &'_ Map<'_, u32, u32> {
289        &self.new_to_old_glyph_mapping
290    }
291
292    /// Returns the mapping between glyphs in the original font to glyphs in the subset that will be produced by plan.
293    #[doc(alias = "hb_subset_plan_old_to_new_glyph_mapping")]
294    pub fn old_to_new_glyph_mapping(&self) -> &'_ Map<'_, u32, u32> {
295        &self.old_to_new_glyph_mapping
296    }
297}
298
299impl<'f, 'b> SubsetPlan<'f, 'b> {
300    /// Converts the subset plan into raw [`sys::hb_subset_plan_t`] pointer.
301    ///
302    /// This method transfers the ownership of the subset plan to the caller. It is up to the caller to call
303    /// [`sys::hb_subset_plan_destroy`] to free the pointer, or call [`Self::from_raw`] to convert it back into
304    /// [`SubsetPlan`].
305    pub fn into_raw(mut self) -> *mut sys::hb_subset_plan_t {
306        let ptr = self.plan;
307        self.plan = null_mut();
308        ptr
309    }
310
311    /// Exposes the raw inner pointer without transferring the ownership.
312    ///
313    /// Unlike [`Self::into_raw`], this method does not transfer the ownership of the pointer to the caller.
314    pub fn as_raw(&self) -> *mut sys::hb_subset_plan_t {
315        self.plan
316    }
317
318    /// Constructs a subset plan from raw [`sys::hb_subset_plan_t`] pointer.
319    ///
320    /// # Safety
321    /// The given `plan` pointer must either be constructed by some Harfbuzz function, or be returned from
322    /// [`Self::into_raw`].
323    pub unsafe fn from_raw(plan: *mut sys::hb_subset_plan_t) -> Self {
324        let unicode_to_old_glyph_mapping = unsafe {
325            Map::from_raw(sys::hb_map_reference(sys::hb_subset_plan_unicode_to_old_glyph_mapping(
326                plan,
327            )))
328        };
329        let new_to_old_glyph_mapping = unsafe {
330            Map::from_raw(sys::hb_map_reference(sys::hb_subset_plan_new_to_old_glyph_mapping(plan)))
331        };
332        let old_to_new_glyph_mapping = unsafe {
333            Map::from_raw(sys::hb_map_reference(sys::hb_subset_plan_old_to_new_glyph_mapping(plan)))
334        };
335
336        Self {
337            plan,
338            unicode_to_old_glyph_mapping,
339            new_to_old_glyph_mapping,
340            old_to_new_glyph_mapping,
341            _font: PhantomData,
342        }
343    }
344}
345
346impl<'f, 'b> Drop for SubsetPlan<'f, 'b> {
347    #[doc(alias = "hb_subset_plan_destroy")]
348    fn drop(&mut self) {
349        if !self.plan.is_null() {
350            unsafe { sys::hb_subset_plan_destroy(self.plan) }
351        }
352    }
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358    use crate::{tests::NOTO_SANS, Blob};
359
360    #[test]
361    fn keep_everything_should_keep_all_codepoints_and_glyphs() {
362        let mut subset = SubsetInput::new().unwrap();
363        subset.keep_everything();
364        assert_eq!(subset.unicode_set().len(), u32::MAX as usize);
365        assert_eq!(subset.glyph_set().len(), u32::MAX as usize);
366        let orig = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
367        let new = subset.subset_font(&orig).unwrap();
368        assert_eq!(
369            orig.covered_codepoints().unwrap().len(),
370            new.covered_codepoints().unwrap().len()
371        );
372        assert_eq!(orig.glyph_count(), new.glyph_count());
373    }
374
375    #[test]
376    fn keeping_codepoints_should_keep_ligatures() {
377        let font = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
378        let mut subset = SubsetInput::new().unwrap();
379        subset.unicode_set().insert('f');
380        subset.unicode_set().insert('i');
381        let font = subset.subset_font(&font).unwrap();
382        assert_eq!(font.covered_codepoints().unwrap().len(), 2);
383        assert_eq!(font.glyph_count(), 6); // TODO: Actually check *which* glyphs are included
384                                           // Currently just assuming [empty], f, i, fi, ffi, and ff
385    }
386
387    #[test]
388    fn old_to_new_glyph_mapping() {
389        let font = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
390        let char_to_glyph = font.nominal_glyph_mapping().unwrap();
391
392        // Map 'a' and 'b' to arbitrary glyphs
393        let mut subset = SubsetInput::new().unwrap();
394        subset
395            .old_to_new_glyph_mapping()
396            .insert(char_to_glyph.get('a').unwrap(), 5);
397        subset
398            .old_to_new_glyph_mapping()
399            .insert(char_to_glyph.get('b').unwrap(), 709);
400        subset.unicode_set().insert('a');
401        subset.unicode_set().insert('b');
402
403        let font = subset.subset_font(&font).unwrap();
404        // Most of the glyphs should be empty
405        assert_eq!(font.glyph_count(), 710);
406
407        let char_to_glyph = font.nominal_glyph_mapping().unwrap();
408        // But the specified ones should be what we set
409        assert_eq!(char_to_glyph.get('a').unwrap(), 5);
410        assert_eq!(char_to_glyph.get('b').unwrap(), 709);
411    }
412
413    #[test]
414    fn convert_subset_into_raw_and_back() {
415        let subset = SubsetInput::new().unwrap();
416        let subset_ptr = subset.into_raw();
417        let subset = unsafe { SubsetInput::from_raw(subset_ptr) };
418        drop(subset);
419    }
420
421    #[test]
422    fn convert_plan_into_raw_and_back() {
423        let font = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
424        let subset = SubsetInput::new().unwrap();
425        let plan = subset.plan(&font).unwrap();
426        let plan_ptr = plan.into_raw();
427        let plan = unsafe { SubsetPlan::from_raw(plan_ptr) };
428        drop(plan);
429    }
430}