mkwebfont_hb-subset 0.5.0

A wrapper for HarfBuzz font subsetting API
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
use std::{marker::PhantomData, ptr::null_mut};

use crate::{
    map::Map,
    set::{CharSet, Set, TagSet, U32Set},
    sys, AllocationError, FontFace, SubsettingError,
};

mod flags;

pub use flags::*;

/// A description of how a font should be subset.
///
/// Subsetting reduces the codepoint coverage of font files and removes all data that is no longer needed. A subset
/// input describes the desired subset. The input is provided along with a font to the subsetting operation. Output is a
/// new font file containing only the data specified in the input.
///
/// Currently most outline and bitmap tables are supported: glyf, CFF, CFF2, sbix, COLR, and CBDT/CBLC. This also
/// includes fonts with variable outlines via OpenType variations. Notably EBDT/EBLC and SVG are not supported. Layout
/// subsetting is supported only for OpenType Layout tables (GSUB, GPOS, GDEF). Notably subsetting of graphite or AAT
/// tables is not yet supported.
///
/// Fonts with graphite or AAT tables may still be subsetted but will likely need to use the retain glyph ids option and
/// configure the subset to pass through the layout tables untouched.
pub struct SubsetInput(*mut sys::hb_subset_input_t);

impl SubsetInput {
    /// Creates a new subset input object.
    #[doc(alias = "hb_subset_input_create_or_fail")]
    pub fn new() -> Result<Self, AllocationError> {
        let input = unsafe { sys::hb_subset_input_create_or_fail() };
        if input.is_null() {
            return Err(AllocationError);
        }
        Ok(Self(input))
    }

    /// Configures input object to keep everything in the font face. That is, all Unicodes, glyphs, names, layout items,
    /// glyph names, etc.
    ///
    /// The input can be tailored afterwards by the caller.
    #[doc(alias = "hb_subset_input_keep_everything")]
    pub fn keep_everything(&mut self) {
        unsafe { sys::hb_subset_input_keep_everything(self.as_raw()) }
    }

    /// Gets a proxy for modifying flags.
    ///
    /// # Example
    /// ```
    /// # use hb_subset::*;
    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
    /// let mut subset = SubsetInput::new()?;
    /// subset.flags().retain_glyph_names();
    /// assert_eq!(*subset.flags(), *Flags::default().retain_glyph_names());
    ///
    /// *subset.flags() = Flags::default();
    /// assert_eq!(*subset.flags(), Flags::default());
    /// # Ok(())
    /// # }
    /// ```
    #[doc(alias = "hb_subset_input_set_flags")]
    #[doc(alias = "hb_subset_input_get_flags")]
    pub fn flags(&mut self) -> FlagRef<'_> {
        FlagRef(self, Flags(unsafe { sys::hb_subset_input_get_flags(self.as_raw()) }))
    }

    /// Gets the set of glyph IDs to retain.
    ///
    /// The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_glyph_set")]
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_GLYPH_INDEX")]
    pub fn glyph_set(&mut self) -> U32Set<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_glyph_set(self.as_raw())))
        }
    }

    /// Gets the set of Unicode codepoints to retain.
    ///
    /// The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_unicode_set")]
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_UNICODE")]
    pub fn unicode_set(&mut self) -> CharSet<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_unicode_set(self.as_raw())))
        }
    }

    /// Gets the set of table tags which specifies tables that should not be subsetted.
    ///
    /// The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_NO_SUBSET_TABLE_TAG")]
    pub fn no_subset_table_tag_set(&mut self) -> TagSet<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
                self.as_raw(),
                sys::hb_subset_sets_t::NO_SUBSET_TABLE_TAG,
            )))
        }
    }

    /// Gets the set of table tags which specifies tables which will be dropped in the subset.
    ///
    /// The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_DROP_TABLE_TAG")]
    pub fn drop_table_tag_set(&mut self) -> TagSet<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
                self.as_raw(),
                sys::hb_subset_sets_t::DROP_TABLE_TAG,
            )))
        }
    }

    /// Gets the set of name ids that will be retained.
    ///
    /// The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_NAME_ID")]
    pub fn name_id_set(&mut self) -> U32Set<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
                self.as_raw(),
                sys::hb_subset_sets_t::NAME_ID,
            )))
        }
    }

    /// Gets the set of name lang ids that will be retained.
    ///
    /// The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_NAME_LANG_ID")]
    pub fn name_lang_id_set(&mut self) -> U32Set<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
                self.as_raw(),
                sys::hb_subset_sets_t::NAME_LANG_ID,
            )))
        }
    }

    /// Gets the set of layout feature tags that will be retained in the subset.
    ///
    /// The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_LAYOUT_FEATURE_TAG")]
    pub fn layout_feature_tag_set(&mut self) -> TagSet<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
                self.as_raw(),
                sys::hb_subset_sets_t::LAYOUT_FEATURE_TAG,
            )))
        }
    }

    /// Gets the set of layout script tags that will be retained in the subset.
    ///
    /// Defaults to all tags. The caller should modify the set as needed.
    #[doc(alias = "hb_subset_input_set")]
    #[doc(alias = "HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG")]
    pub fn layout_script_tag_set(&mut self) -> TagSet<'_> {
        unsafe {
            Set::from_raw(sys::hb_set_reference(sys::hb_subset_input_set(
                self.as_raw(),
                sys::hb_subset_sets_t::LAYOUT_SCRIPT_TAG,
            )))
        }
    }

    /// Returns a map which can be used to provide an explicit mapping from old to new glyph id's in the produced
    /// subset. The caller should populate the map as desired. If this map is left empty then glyph ids will be
    /// automatically mapped to new values by the subsetter. If populated, the mapping must be unique. That is no two
    /// original glyph ids can be mapped to the same new id. Additionally, if a mapping is provided then the retain gids
    /// option cannot be enabled.
    ///
    /// Any glyphs that are retained in the subset which are not specified in this mapping will be assigned glyph ids
    /// after the highest glyph id in the mapping.
    ///
    /// Note: this will accept and apply non-monotonic mappings, however this may result in unsorted Coverage tables.
    /// Such fonts may not work for all use cases (for example ots will reject unsorted coverage tables). So it's
    /// recommended, if possible, to supply a monotonic mapping.
    #[doc(alias = "hb_subset_input_old_to_new_glyph_mapping")]
    pub fn old_to_new_glyph_mapping(&mut self) -> Map<'_, u32, u32> {
        unsafe {
            Map::from_raw(sys::hb_map_reference(sys::hb_subset_input_old_to_new_glyph_mapping(
                self.as_raw(),
            )))
        }
    }

    /// Subsets a font according to provided input.
    #[doc(alias = "hb_subset_or_fail")]
    pub fn subset_font(&self, font: &FontFace<'_>) -> Result<FontFace<'static>, SubsettingError> {
        let face = unsafe { sys::hb_subset_or_fail(font.as_raw(), self.as_raw()) };
        if face.is_null() {
            return Err(SubsettingError);
        }
        Ok(unsafe { FontFace::from_raw(face) })
    }

    /// Computes a plan for subsetting the supplied face according to a provided input.
    ///
    /// The plan describes which tables and glyphs should be retained.
    #[doc(alias = "hb_subset_plan_create_or_fail")]
    pub fn plan<'f>(&self, font: &'f FontFace<'_>) -> Result<SubsetPlan<'f, '_>, SubsettingError> {
        let plan = unsafe { sys::hb_subset_plan_create_or_fail(font.as_raw(), self.as_raw()) };
        if plan.is_null() {
            return Err(SubsettingError);
        }
        Ok(unsafe { SubsetPlan::from_raw(plan) })
    }
}

impl SubsetInput {
    /// Converts the subset input into raw [`sys::hb_subset_input_t`] pointer.
    ///
    /// This method transfers the ownership of the subset input to the caller. It is up to the caller to call
    /// [`sys::hb_subset_input_destroy`] to free the pointer, or call [`Self::from_raw`] to convert it back into
    /// [`SubsetInput`].
    pub fn into_raw(self) -> *mut sys::hb_subset_input_t {
        let ptr = self.0;
        std::mem::forget(self);
        ptr
    }

    /// Exposes the raw inner pointer without transferring the ownership.
    ///
    /// Unlike [`Self::into_raw`], this method does not transfer the ownership of the pointer to the caller.
    pub fn as_raw(&self) -> *mut sys::hb_subset_input_t {
        self.0
    }

    /// Constructs a subset input from raw [`sys::hb_subset_input_t`] pointer.
    ///
    /// # Safety
    /// The given `subset` pointer must either be constructed by some Harfbuzz function, or be returned from
    /// [`Self::into_raw`].
    pub unsafe fn from_raw(subset: *mut sys::hb_subset_input_t) -> Self {
        Self(subset)
    }
}

impl Drop for SubsetInput {
    #[doc(alias = "hb_subset_input_destroy")]
    fn drop(&mut self) {
        unsafe { sys::hb_subset_input_destroy(self.0) }
    }
}

/// Information about how a subsetting operation will be executed.
///
/// This includes e.g. how glyph ids are mapped from the original font to the subset.
pub struct SubsetPlan<'f, 'b> {
    plan: *mut sys::hb_subset_plan_t,
    // The lifetime here is actually referring to the lifetime of SubsetPlan
    unicode_to_old_glyph_mapping: Map<'static, char, u32>,
    new_to_old_glyph_mapping: Map<'static, u32, u32>,
    old_to_new_glyph_mapping: Map<'static, u32, u32>,
    _font: PhantomData<&'f FontFace<'b>>,
}

impl<'f, 'b> SubsetPlan<'f, 'b> {
    /// Executes the subsetting plan.
    #[doc(alias = "hb_subset_plan_execute_or_fail")]
    pub fn subset(&self) -> Result<FontFace<'b>, SubsettingError> {
        let font = unsafe { sys::hb_subset_plan_execute_or_fail(self.as_raw()) };
        if font.is_null() {
            return Err(SubsettingError);
        }
        Ok(unsafe { FontFace::from_raw(font) })
    }

    /// Returns the mapping between codepoints in the original font and the associated glyph id in the original font.
    #[doc(alias = "hb_subset_plan_unicode_to_old_glyph_mapping")]
    pub fn unicode_to_old_glyph_mapping(&self) -> &'_ Map<'_, char, u32> {
        &self.unicode_to_old_glyph_mapping
    }

    /// Returns the mapping between glyphs in the subset that will be produced by plan and the glyph in the original font.
    #[doc(alias = "hb_subset_plan_new_to_old_glyph_mapping")]
    pub fn new_to_old_glyph_mapping(&self) -> &'_ Map<'_, u32, u32> {
        &self.new_to_old_glyph_mapping
    }

    /// Returns the mapping between glyphs in the original font to glyphs in the subset that will be produced by plan.
    #[doc(alias = "hb_subset_plan_old_to_new_glyph_mapping")]
    pub fn old_to_new_glyph_mapping(&self) -> &'_ Map<'_, u32, u32> {
        &self.old_to_new_glyph_mapping
    }
}

impl<'f, 'b> SubsetPlan<'f, 'b> {
    /// Converts the subset plan into raw [`sys::hb_subset_plan_t`] pointer.
    ///
    /// This method transfers the ownership of the subset plan to the caller. It is up to the caller to call
    /// [`sys::hb_subset_plan_destroy`] to free the pointer, or call [`Self::from_raw`] to convert it back into
    /// [`SubsetPlan`].
    pub fn into_raw(mut self) -> *mut sys::hb_subset_plan_t {
        let ptr = self.plan;
        self.plan = null_mut();
        ptr
    }

    /// Exposes the raw inner pointer without transferring the ownership.
    ///
    /// Unlike [`Self::into_raw`], this method does not transfer the ownership of the pointer to the caller.
    pub fn as_raw(&self) -> *mut sys::hb_subset_plan_t {
        self.plan
    }

    /// Constructs a subset plan from raw [`sys::hb_subset_plan_t`] pointer.
    ///
    /// # Safety
    /// The given `plan` pointer must either be constructed by some Harfbuzz function, or be returned from
    /// [`Self::into_raw`].
    pub unsafe fn from_raw(plan: *mut sys::hb_subset_plan_t) -> Self {
        let unicode_to_old_glyph_mapping = unsafe {
            Map::from_raw(sys::hb_map_reference(sys::hb_subset_plan_unicode_to_old_glyph_mapping(
                plan,
            )))
        };
        let new_to_old_glyph_mapping = unsafe {
            Map::from_raw(sys::hb_map_reference(sys::hb_subset_plan_new_to_old_glyph_mapping(plan)))
        };
        let old_to_new_glyph_mapping = unsafe {
            Map::from_raw(sys::hb_map_reference(sys::hb_subset_plan_old_to_new_glyph_mapping(plan)))
        };

        Self {
            plan,
            unicode_to_old_glyph_mapping,
            new_to_old_glyph_mapping,
            old_to_new_glyph_mapping,
            _font: PhantomData,
        }
    }
}

impl<'f, 'b> Drop for SubsetPlan<'f, 'b> {
    #[doc(alias = "hb_subset_plan_destroy")]
    fn drop(&mut self) {
        if !self.plan.is_null() {
            unsafe { sys::hb_subset_plan_destroy(self.plan) }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{tests::NOTO_SANS, Blob};

    #[test]
    fn keep_everything_should_keep_all_codepoints_and_glyphs() {
        let mut subset = SubsetInput::new().unwrap();
        subset.keep_everything();
        assert_eq!(subset.unicode_set().len(), u32::MAX as usize);
        assert_eq!(subset.glyph_set().len(), u32::MAX as usize);
        let orig = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
        let new = subset.subset_font(&orig).unwrap();
        assert_eq!(
            orig.covered_codepoints().unwrap().len(),
            new.covered_codepoints().unwrap().len()
        );
        assert_eq!(orig.glyph_count(), new.glyph_count());
    }

    #[test]
    fn keeping_codepoints_should_keep_ligatures() {
        let font = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
        let mut subset = SubsetInput::new().unwrap();
        subset.unicode_set().insert('f');
        subset.unicode_set().insert('i');
        let font = subset.subset_font(&font).unwrap();
        assert_eq!(font.covered_codepoints().unwrap().len(), 2);
        assert_eq!(font.glyph_count(), 6); // TODO: Actually check *which* glyphs are included
                                           // Currently just assuming [empty], f, i, fi, ffi, and ff
    }

    #[test]
    fn old_to_new_glyph_mapping() {
        let font = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
        let char_to_glyph = font.nominal_glyph_mapping().unwrap();

        // Map 'a' and 'b' to arbitrary glyphs
        let mut subset = SubsetInput::new().unwrap();
        subset
            .old_to_new_glyph_mapping()
            .insert(char_to_glyph.get('a').unwrap(), 5);
        subset
            .old_to_new_glyph_mapping()
            .insert(char_to_glyph.get('b').unwrap(), 709);
        subset.unicode_set().insert('a');
        subset.unicode_set().insert('b');

        let font = subset.subset_font(&font).unwrap();
        // Most of the glyphs should be empty
        assert_eq!(font.glyph_count(), 710);

        let char_to_glyph = font.nominal_glyph_mapping().unwrap();
        // But the specified ones should be what we set
        assert_eq!(char_to_glyph.get('a').unwrap(), 5);
        assert_eq!(char_to_glyph.get('b').unwrap(), 709);
    }

    #[test]
    fn convert_subset_into_raw_and_back() {
        let subset = SubsetInput::new().unwrap();
        let subset_ptr = subset.into_raw();
        let subset = unsafe { SubsetInput::from_raw(subset_ptr) };
        drop(subset);
    }

    #[test]
    fn convert_plan_into_raw_and_back() {
        let font = FontFace::new(Blob::from_file(NOTO_SANS).unwrap()).unwrap();
        let subset = SubsetInput::new().unwrap();
        let plan = subset.plan(&font).unwrap();
        let plan_ptr = plan.into_raw();
        let plan = unsafe { SubsetPlan::from_raw(plan_ptr) };
        drop(plan);
    }
}