allsorts_subset_browser/cff/
subset.rs

1use std::convert::TryFrom;
2use std::mem;
3
4use rustc_hash::{FxHashMap, FxHashSet};
5
6use super::{
7    owned, CFFFont, CFFVariant, CIDData, Charset, CustomCharset, DictDelta, FDSelect, Font,
8    FontDict, MaybeOwnedIndex, Operand, Operator, ParseError, Range, ADOBE, CFF, IDENTITY,
9    ISO_ADOBE_LAST_SID, OFFSET_ZERO, STANDARD_STRINGS,
10};
11use crate::binary::read::ReadArrayCow;
12use crate::binary::write::{WriteBinaryDep, WriteBuffer};
13use crate::subset::{SubsetError, SubsetGlyphs};
14
15/// A subset CFF font.
16pub struct SubsetCFF<'a> {
17    table: CFF<'a>,
18    new_to_old_id: Vec<u16>,
19    old_to_new_id: FxHashMap<u16, u16>,
20}
21
22impl<'a> SubsetCFF<'a> {
23    pub(crate) fn new(
24        table: CFF<'a>,
25        new_to_old_id: Vec<u16>,
26        old_to_new_id: FxHashMap<u16, u16>,
27    ) -> Self {
28        SubsetCFF {
29            table,
30            new_to_old_id,
31            old_to_new_id,
32        }
33    }
34}
35
36impl<'a> From<SubsetCFF<'a>> for CFF<'a> {
37    fn from(subset: SubsetCFF<'a>) -> CFF<'a> {
38        subset.table
39    }
40}
41
42impl<'a> SubsetGlyphs for SubsetCFF<'a> {
43    fn len(&self) -> usize {
44        self.new_to_old_id.len()
45    }
46
47    fn old_id(&self, new_id: u16) -> u16 {
48        self.new_to_old_id[usize::from(new_id)]
49    }
50
51    fn new_id(&self, old_id: u16) -> u16 {
52        self.old_to_new_id.get(&old_id).copied().unwrap_or(0)
53    }
54}
55
56impl<'a> CFF<'a> {
57    /// Create a subset of this CFF table.
58    ///
59    /// - `glpyh_ids` contains the ids of the glyphs to retain.
60    ///
61    /// When subsetting a Type 1 CFF font and retaining more than 255 glyphs the
62    /// `convert_cff_to_cid_if_more_than_255_glyphs` argument controls whether the Type 1 font
63    /// is converted to a CID keyed font in the process. The primary motivation for this is
64    /// broader compatibility, especially if the subset font is embedded in a PDF.
65    ///
66    /// **Known Limitations**
67    ///
68    /// Currently the subsetting process does not produce the smallest possible output font.
69    /// There are various parts of the source font that are copied to the output font as-is.
70    /// Specifically the subsetting process does not subset the String INDEX.
71    ///
72    /// Subsetting the String INDEX requires updating all String IDs (SID) in the font so
73    /// that they point at their new position in the String INDEX.
74    pub fn subset(
75        &'a self,
76        glyph_ids: &[u16],
77        convert_cff_to_cid_if_more_than_255_glyphs: bool,
78    ) -> Result<SubsetCFF<'a>, SubsetError> {
79        let mut cff = self.to_owned();
80        let font: &mut Font<'_> = &mut cff.fonts[0];
81        let mut charset = Vec::with_capacity(glyph_ids.len());
82        let mut fd_select = Vec::with_capacity(glyph_ids.len());
83        let mut new_to_old_id = Vec::with_capacity(glyph_ids.len());
84        let mut old_to_new_id =
85            FxHashMap::with_capacity_and_hasher(glyph_ids.len(), Default::default());
86        let mut glyph_data = Vec::with_capacity(glyph_ids.len());
87        let mut used_local_subrs = FxHashMap::default();
88        let mut used_global_subrs = FxHashSet::default();
89
90        for &glyph_id in glyph_ids {
91            let char_string = font
92                .char_strings_index
93                .read_object(usize::from(glyph_id))
94                .ok_or(ParseError::BadIndex)?;
95
96            let subrs = super::charstring::char_string_used_subrs(
97                CFFFont::CFF(font),
98                &font.char_strings_index,
99                &cff.global_subr_index,
100                glyph_id,
101            )?;
102            used_global_subrs.extend(subrs.global_subr_used);
103            if !subrs.local_subr_used.is_empty() {
104                used_local_subrs.insert(glyph_id, subrs.local_subr_used);
105            }
106
107            glyph_data.push(char_string.to_owned());
108            // Cast should be safe as there must be less than u16::MAX glyphs in a font
109            old_to_new_id.insert(glyph_id, new_to_old_id.len() as u16);
110            new_to_old_id.push(glyph_id);
111
112            if glyph_id != 0 {
113                let sid_or_cid = font
114                    .charset
115                    .id_for_glyph(glyph_id)
116                    .ok_or(ParseError::BadIndex)?;
117                charset.push(sid_or_cid);
118            }
119
120            // Calculate CID/Type 1 specific updates
121            match &font.data {
122                CFFVariant::CID(cid) => {
123                    // Find out which font DICT this glyph maps to if it's a CID font
124                    // Need to know which font DICT applies to each glyph, then ideally work out which FDSelect
125                    // format is the best to use. For now it's probably good enough to just use format 0
126                    let fd_index = cid
127                        .fd_select
128                        .font_dict_index(glyph_id)
129                        .ok_or(ParseError::BadIndex)?;
130                    fd_select.push(fd_index);
131                }
132                CFFVariant::Type1(_type1) => {}
133            }
134        }
135
136        cff.global_subr_index =
137            rebuild_global_subr_index(&cff.global_subr_index, used_global_subrs)?;
138        font.char_strings_index = MaybeOwnedIndex::Owned(owned::Index { data: glyph_data });
139
140        // Update CID/Type 1 specific structures
141        match &mut font.data {
142            CFFVariant::CID(cid) => {
143                // Build new local_subr_indices
144                cid.local_subr_indices = rebuild_local_subr_indices(cid, used_local_subrs)?;
145
146                // Filter out Subr ops in the Private DICT if the local subr INDEX is None for
147                // that DICT.
148                filter_private_dict_subr_ops(cid);
149
150                cid.fd_select = FDSelect::Format0 {
151                    glyph_font_dict_indices: ReadArrayCow::Owned(fd_select),
152                };
153            }
154            CFFVariant::Type1(type1) => {
155                // Build new local_subr_index
156                type1.local_subr_index = rebuild_type_1_local_subr_index(
157                    type1.local_subr_index.as_ref(),
158                    used_local_subrs,
159                )?;
160
161                // Filter out Subr ops in the Private DICT if the local subr INDEX is None.
162                if type1.local_subr_index.is_none() {
163                    type1
164                        .private_dict
165                        .dict
166                        .retain(|(op, _)| *op != Operator::Subrs);
167                }
168            }
169        }
170
171        // Update the charset
172        if font.is_cid_keyed() {
173            font.charset = Charset::Custom(CustomCharset::Format0 {
174                glyphs: ReadArrayCow::Owned(charset),
175            });
176        } else if convert_cff_to_cid_if_more_than_255_glyphs && font.char_strings_index.len() > 255
177        {
178            font.charset = convert_type1_to_cid(&mut cff.string_index, font)?;
179        } else {
180            let iso_adobe = 1..=ISO_ADOBE_LAST_SID;
181            if charset
182                .iter()
183                .zip(iso_adobe)
184                .all(|(sid, iso_adobe_sid)| *sid == iso_adobe_sid)
185            {
186                // As per section 18 of Technical Note #5176: There are no predefined charsets for
187                // CID fonts. So this branch is only taken for Type 1 fonts.
188                font.charset = Charset::ISOAdobe;
189            } else {
190                font.charset = Charset::Custom(CustomCharset::Format0 {
191                    glyphs: ReadArrayCow::Owned(charset),
192                });
193            }
194        }
195
196        Ok(SubsetCFF {
197            table: cff,
198            new_to_old_id,
199            old_to_new_id,
200        })
201    }
202}
203
204pub(crate) fn rebuild_global_subr_index(
205    src_global_subr_index: &MaybeOwnedIndex<'_>,
206    used_global_subrs: FxHashSet<usize>,
207) -> Result<MaybeOwnedIndex<'static>, ParseError> {
208    // Return a completely empty global subr index if there are no used global subrs
209    if used_global_subrs.is_empty() {
210        return Ok(MaybeOwnedIndex::Owned(owned::Index { data: Vec::new() }));
211    }
212
213    // Create a destination INDEX with the same number of entries as the source INDEX (see note
214    // in rebuild_local_subr_indices)
215    let mut dst_global_subr_index = owned::Index {
216        data: vec![Vec::new(); src_global_subr_index.len()],
217    };
218
219    copy_used_subrs(
220        used_global_subrs.iter().copied(),
221        src_global_subr_index,
222        &mut dst_global_subr_index,
223    )?;
224
225    Ok(MaybeOwnedIndex::Owned(dst_global_subr_index))
226}
227
228pub(crate) fn rebuild_local_subr_indices(
229    cid: &CIDData<'_>,
230    used_subrs_by_glyph: FxHashMap<u16, FxHashSet<usize>>,
231) -> Result<Vec<Option<MaybeOwnedIndex<'static>>>, ParseError> {
232    // Start off with all local subr indices as absent
233    let mut indices = vec![None; cid.private_dicts.len()];
234
235    for (glyph_id, used_subrs) in used_subrs_by_glyph {
236        // For each glyph determine the index of the local subr index
237        let index_of_local_subr_index = cid
238            .fd_select
239            .font_dict_index(glyph_id)
240            .map(usize::from)
241            .ok_or(ParseError::BadIndex)?;
242
243        // Get the source Local Subr INDEX that we'll be copying from
244        let src_local_subrs_index = match cid.local_subr_indices.get(index_of_local_subr_index) {
245            Some(Some(index)) => Some(index),
246            _ => None,
247        }
248        .ok_or(ParseError::BadIndex)?;
249
250        // Get the Local Subr INDEX that we'll be copying to, if it doesn't exist then create it
251        //
252        // To avoid needing to rewrite all CharStrings to reference updated sub-routine
253        // indices we instead fill the Local Subr INDEX with empty entries so that
254        // indexes into it remain stable.
255        //
256        // An earlier iteration of this code only populated entries in the INDEX up to the largest
257        // sub-routine index that was used. However this doesn't work because the operand to
258        // callsubr is biased based on the number of entries in the INDEX, so for the existing char
259        // strings to continue to work the same number of entries needs to be maintained. To do that
260        // we fill it with empty entries.
261        let dst_local_subr_index = match &mut indices[index_of_local_subr_index] {
262            Some(index) => index,
263            local_subr_index @ None => {
264                *local_subr_index = Some(owned::Index {
265                    data: vec![Vec::new(); src_local_subrs_index.len()],
266                });
267                local_subr_index.as_mut().unwrap() // NOTE(unwrap): safe as we set value above
268            }
269        };
270
271        copy_used_subrs(
272            used_subrs.iter().copied(),
273            src_local_subrs_index,
274            dst_local_subr_index,
275        )?;
276    }
277
278    Ok(indices
279        .into_iter()
280        .map(|index| index.map(MaybeOwnedIndex::Owned))
281        .collect())
282}
283
284fn copy_used_subrs(
285    used_subrs: impl Iterator<Item = usize>,
286    src_subrs_index: &MaybeOwnedIndex<'_>,
287    dst_subr_index: &mut owned::Index,
288) -> Result<(), ParseError> {
289    // `used_subrs` contains the indexes of sub-routines in `src_subr_index` that need to be copied.
290    // For each used subr we copy it to `dst_subr_index`.
291    for subr_index in used_subrs {
292        // Check to see if this sub-routine has already been copied to the INDEX. We do this
293        // by checking if its length is greater than zero. A defined subroutine will have a
294        // non-zero length as it must at least end with either an endchar or a return operator.
295        if dst_subr_index
296            .data
297            .get(subr_index)
298            .map_or(false, |subr| !subr.is_empty())
299        {
300            continue;
301        }
302
303        // Retrieve the Subr contents from the source Local Subr INDEX
304        let char_string = src_subrs_index
305            .read_object(subr_index)
306            .ok_or(ParseError::BadIndex)?;
307
308        // Now copy the Subr into the new index. I was curious about the efficiency of
309        // extend_from_slice in this context but looking at the assembly it compiles down to
310        // a call to memcpy.
311        debug_assert_eq!(dst_subr_index.data[subr_index].len(), 0);
312        dst_subr_index.data[subr_index].reserve_exact(char_string.len());
313        dst_subr_index.data[subr_index].extend_from_slice(char_string);
314    }
315    Ok(())
316}
317
318pub(crate) fn rebuild_type_1_local_subr_index(
319    src_local_subrs_index: Option<&MaybeOwnedIndex<'_>>,
320    used_subrs_by_glyph: FxHashMap<u16, FxHashSet<usize>>,
321) -> Result<Option<MaybeOwnedIndex<'static>>, ParseError> {
322    if used_subrs_by_glyph.is_empty() {
323        return Ok(None);
324    }
325
326    // Get the source Local Subr INDEX that we'll be copying from
327    let src_local_subrs_index = src_local_subrs_index.ok_or(ParseError::BadIndex)?;
328
329    // Create a destination INDEX with the same number of entries as the source INDEX (see note
330    // in rebuild_local_subr_indices)
331    let mut dst_local_subr_index = owned::Index {
332        data: vec![Vec::new(); src_local_subrs_index.len()],
333    };
334
335    for used_subrs in used_subrs_by_glyph.values() {
336        copy_used_subrs(
337            used_subrs.iter().copied(),
338            src_local_subrs_index,
339            &mut dst_local_subr_index,
340        )?;
341    }
342
343    Ok(Some(MaybeOwnedIndex::Owned(dst_local_subr_index)))
344}
345
346fn filter_private_dict_subr_ops(cid: &mut CIDData<'_>) {
347    for (private_dict, local_subr_index) in cid
348        .private_dicts
349        .iter_mut()
350        .zip(cid.local_subr_indices.iter())
351    {
352        if local_subr_index.is_none() {
353            private_dict.dict.retain(|(op, _)| *op != Operator::Subrs);
354        }
355    }
356}
357
358fn convert_type1_to_cid<'a>(
359    string_index: &mut MaybeOwnedIndex<'a>,
360    font: &mut Font<'a>,
361) -> Result<Charset<'a>, ParseError> {
362    assert!(!font.is_cid_keyed());
363
364    // Retrieve the SIDs of Adobe and Identity, adding them if they're not in the String INDEX
365    // already.
366    let (adobe_sid, identity_sid) = match (string_index.index(ADOBE), string_index.index(IDENTITY))
367    {
368        (Some(adobe_sid), Some(identity_sid)) => (adobe_sid, identity_sid),
369        (Some(adobe_sid), None) => (adobe_sid, string_index.push(IDENTITY.to_owned())),
370        (None, Some(identity_sid)) => (string_index.push(ADOBE.to_owned()), identity_sid),
371        (None, None) => (
372            string_index.push(ADOBE.to_owned()),
373            string_index.push(IDENTITY.to_owned()),
374        ),
375    };
376
377    // > the standard strings take SIDs in the range 0 to (nStdStrings –1). The first string in the
378    // > String INDEX corresponds to the SID whose value is equal to nStdStrings, the first
379    // > non-standard string
380    let adobe_sid = adobe_sid + STANDARD_STRINGS.len();
381    let identity_sid = identity_sid + STANDARD_STRINGS.len();
382
383    // Build Font DICT
384    let mut font_dict = FontDict::new();
385    font_dict.inner_mut().push((
386        Operator::Private,
387        vec![Operand::Offset(0), Operand::Offset(0)],
388    )); // Size and Offset will be updated when written out
389
390    let mut font_dict_buffer = WriteBuffer::new();
391    FontDict::write_dep(&mut font_dict_buffer, &font_dict, DictDelta::new())
392        .map_err(|_err| ParseError::BadValue)?;
393    let font_dict_index = MaybeOwnedIndex::Owned(owned::Index {
394        data: vec![font_dict_buffer.into_inner()],
395    });
396
397    let n_glyphs = u16::try_from(font.char_strings_index.len())?;
398
399    let fd_select = FDSelect::Format3 {
400        ranges: ReadArrayCow::Owned(vec![Range {
401            first: 0,
402            n_left: 0,
403        }]),
404        sentinel: n_glyphs,
405    };
406    let cid_data = CFFVariant::CID(CIDData {
407        font_dict_index,
408        private_dicts: Vec::new(),
409        local_subr_indices: Vec::new(),
410        fd_select,
411    });
412
413    // Swap Type1 data with CID data
414    let type1_data = match mem::replace(&mut font.data, cid_data) {
415        CFFVariant::Type1(data) => data,
416        CFFVariant::CID(_) => unreachable!(),
417    };
418    match &mut font.data {
419        CFFVariant::Type1(_type1) => unreachable!(),
420        CFFVariant::CID(cid) => {
421            cid.private_dicts = vec![type1_data.private_dict];
422            cid.local_subr_indices = vec![type1_data.local_subr_index];
423        }
424    };
425
426    // Update the Top DICT
427    // Add ROS
428    let registry = Operand::Integer(i32::try_from(adobe_sid)?);
429    let ordering = Operand::Integer(i32::try_from(identity_sid)?);
430    let supplement = Operand::Integer(0);
431    let ros = (Operator::ROS, vec![registry, ordering, supplement]);
432    font.top_dict.inner_mut().insert(0, ros);
433
434    // Add FDSelect and FDArray offsets to Top DICT
435    // Actual offsets will be filled in when writing
436    font.top_dict
437        .inner_mut()
438        .push((Operator::FDArray, OFFSET_ZERO.to_vec()));
439    font.top_dict
440        .inner_mut()
441        .push((Operator::FDSelect, OFFSET_ZERO.to_vec()));
442
443    // Add CIDCount
444    font.top_dict.inner_mut().push((
445        Operator::CIDCount,
446        vec![Operand::Integer(i32::from(n_glyphs))],
447    ));
448
449    // Remove Private DICT offset and encoding
450    font.top_dict.remove(Operator::Private);
451    font.top_dict.remove(Operator::Encoding);
452
453    // Add charset
454    Ok(Charset::Custom(CustomCharset::Format2 {
455        ranges: ReadArrayCow::Owned(vec![Range {
456            first: 1,
457            n_left: n_glyphs.checked_sub(2).ok_or(ParseError::BadIndex)?,
458        }]),
459    }))
460}