read_fonts/tables/gsub/
closure.rs

1//! Computing the closure over a set of glyphs
2//!
3//! This means taking a set of glyphs and updating it to include any other glyphs
4//! reachable from those glyphs via substitution, recursively.
5
6use font_types::GlyphId16;
7
8use crate::{
9    collections::IntSet,
10    tables::layout::{
11        ChainedSequenceContextFormat1, ChainedSequenceContextFormat2,
12        ChainedSequenceContextFormat3, ExtensionLookup, SequenceContextFormat1,
13        SequenceContextFormat2, SequenceContextFormat3, Subtables,
14    },
15    FontRead, ReadError,
16};
17
18use super::{
19    AlternateSubstFormat1, ChainedSequenceContext, ClassDef, Gsub, LigatureSubstFormat1,
20    MultipleSubstFormat1, ReverseChainSingleSubstFormat1, SequenceContext, SingleSubst,
21    SingleSubstFormat1, SingleSubstFormat2, SubstitutionSubtables,
22};
23
24/// A trait for tables which participate in closure
25pub(crate) trait GlyphClosure {
26    /// Update the set of glyphs with any glyphs reachable via substitution.
27    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError>;
28}
29
30impl Gsub<'_> {
31    /// Return the set of glyphs reachable from the input set via any substitution.
32    pub fn closure_glyphs(
33        &self,
34        mut glyphs: IntSet<GlyphId16>,
35    ) -> Result<IntSet<GlyphId16>, ReadError> {
36        // we need to do this iteratively, since any glyph found in one pass
37        // over the lookups could also be the target of substitutions.
38
39        // we always call this once, and then keep calling if it produces
40        // additional glyphs
41        let mut prev_glyph_count = glyphs.len();
42        self.closure_glyphs_once(&mut glyphs)?;
43        let mut new_glyph_count = glyphs.len();
44
45        while prev_glyph_count != new_glyph_count {
46            prev_glyph_count = new_glyph_count;
47            self.closure_glyphs_once(&mut glyphs)?;
48            new_glyph_count = glyphs.len();
49        }
50
51        Ok(glyphs)
52    }
53
54    fn closure_glyphs_once(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
55        let lookups_to_use = self.find_reachable_lookups(glyphs)?;
56        let lookup_list = self.lookup_list()?;
57        for (i, lookup) in lookup_list.lookups().iter().enumerate() {
58            if !lookups_to_use.contains(i as u16) {
59                continue;
60            }
61            let subtables = lookup?.subtables()?;
62            subtables.add_reachable_glyphs(glyphs)?;
63        }
64        Ok(())
65    }
66
67    fn find_reachable_lookups(&self, glyphs: &IntSet<GlyphId16>) -> Result<IntSet<u16>, ReadError> {
68        let feature_list = self.feature_list()?;
69        let lookup_list = self.lookup_list()?;
70        // first we want to get the lookups that are directly referenced by a feature
71        // (including in a feature variation table)
72        let mut lookup_ids = IntSet::new();
73        let feature_variations = self
74            .feature_variations()
75            .transpose()?
76            .map(|vars| {
77                let data = vars.offset_data();
78                vars.feature_variation_records()
79                    .iter()
80                    .filter_map(move |rec| {
81                        rec.feature_table_substitution(data)
82                            .transpose()
83                            .ok()
84                            .flatten()
85                    })
86                    .flat_map(|subs| {
87                        subs.substitutions()
88                            .iter()
89                            .map(move |sub| sub.alternate_feature(subs.offset_data()))
90                    })
91            })
92            .into_iter()
93            .flatten();
94        for feature in feature_list
95            .feature_records()
96            .iter()
97            .map(|rec| rec.feature(feature_list.offset_data()))
98            .chain(feature_variations)
99        {
100            lookup_ids.extend(feature?.lookup_list_indices().iter().map(|idx| idx.get()));
101        }
102
103        // and now we need to add lookups referenced by contextual lookups,
104        // IFF they are reachable via the current set of glyphs:
105        for lookup in lookup_list.lookups().iter() {
106            let subtables = lookup?.subtables()?;
107            match subtables {
108                SubstitutionSubtables::Contextual(tables) => tables
109                    .iter()
110                    .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)),
111                SubstitutionSubtables::ChainContextual(tables) => tables
112                    .iter()
113                    .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)),
114                _ => Ok(()),
115            }?;
116        }
117        Ok(lookup_ids)
118    }
119}
120
121impl GlyphClosure for SubstitutionSubtables<'_> {
122    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
123        match self {
124            SubstitutionSubtables::Single(tables) => tables.add_reachable_glyphs(glyphs),
125            SubstitutionSubtables::Multiple(tables) => tables.add_reachable_glyphs(glyphs),
126            SubstitutionSubtables::Alternate(tables) => tables.add_reachable_glyphs(glyphs),
127            SubstitutionSubtables::Ligature(tables) => tables.add_reachable_glyphs(glyphs),
128            SubstitutionSubtables::Reverse(tables) => tables.add_reachable_glyphs(glyphs),
129            _ => Ok(()),
130        }
131    }
132}
133
134impl<'a, T: FontRead<'a> + GlyphClosure + 'a, Ext: ExtensionLookup<'a, T> + 'a> GlyphClosure
135    for Subtables<'a, T, Ext>
136{
137    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
138        self.iter()
139            .try_for_each(|t| t?.add_reachable_glyphs(glyphs))
140    }
141}
142
143impl GlyphClosure for SingleSubst<'_> {
144    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
145        for (target, replacement) in self.iter_subs()? {
146            if glyphs.contains(target) {
147                glyphs.insert(replacement);
148            }
149        }
150        Ok(())
151    }
152}
153
154impl SingleSubst<'_> {
155    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
156        let (left, right) = match self {
157            SingleSubst::Format1(t) => (Some(t.iter_subs()?), None),
158            SingleSubst::Format2(t) => (None, Some(t.iter_subs()?)),
159        };
160        Ok(left
161            .into_iter()
162            .flatten()
163            .chain(right.into_iter().flatten()))
164    }
165}
166
167impl SingleSubstFormat1<'_> {
168    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
169        let delta = self.delta_glyph_id();
170        let coverage = self.coverage()?;
171        Ok(coverage.iter().filter_map(move |gid| {
172            let raw = (gid.to_u16() as i32).checked_add(delta as i32);
173            let raw = raw.and_then(|raw| u16::try_from(raw).ok())?;
174            Some((gid, GlyphId16::new(raw)))
175        }))
176    }
177}
178
179impl SingleSubstFormat2<'_> {
180    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
181        let coverage = self.coverage()?;
182        let subs = self.substitute_glyph_ids();
183        Ok(coverage.iter().zip(subs.iter().map(|id| id.get())))
184    }
185}
186
187impl GlyphClosure for MultipleSubstFormat1<'_> {
188    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
189        let coverage = self.coverage()?;
190        let sequences = self.sequences();
191        for (gid, replacements) in coverage.iter().zip(sequences.iter()) {
192            let replacements = replacements?;
193            if glyphs.contains(gid) {
194                glyphs.extend(
195                    replacements
196                        .substitute_glyph_ids()
197                        .iter()
198                        .map(|gid| gid.get()),
199                );
200            }
201        }
202        Ok(())
203    }
204}
205
206impl GlyphClosure for AlternateSubstFormat1<'_> {
207    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
208        let coverage = self.coverage()?;
209        let alts = self.alternate_sets();
210        for (gid, alts) in coverage.iter().zip(alts.iter()) {
211            let alts = alts?;
212            if glyphs.contains(gid) {
213                glyphs.extend(alts.alternate_glyph_ids().iter().map(|gid| gid.get()));
214            }
215        }
216        Ok(())
217    }
218}
219
220impl GlyphClosure for LigatureSubstFormat1<'_> {
221    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
222        let coverage = self.coverage()?;
223        let ligs = self.ligature_sets();
224        for (gid, lig_set) in coverage.iter().zip(ligs.iter()) {
225            let lig_set = lig_set?;
226            if glyphs.contains(gid) {
227                for lig in lig_set.ligatures().iter() {
228                    let lig = lig?;
229                    if lig
230                        .component_glyph_ids()
231                        .iter()
232                        .all(|gid| glyphs.contains(gid.get()))
233                    {
234                        glyphs.insert(lig.ligature_glyph());
235                    }
236                }
237            }
238        }
239        Ok(())
240    }
241}
242
243impl GlyphClosure for ReverseChainSingleSubstFormat1<'_> {
244    fn add_reachable_glyphs(&self, glyphs: &mut IntSet<GlyphId16>) -> Result<(), ReadError> {
245        for coverage in self
246            .backtrack_coverages()
247            .iter()
248            .chain(self.lookahead_coverages().iter())
249        {
250            if !coverage?.iter().any(|gid| glyphs.contains(gid)) {
251                return Ok(());
252            }
253        }
254
255        for (gid, sub) in self.coverage()?.iter().zip(self.substitute_glyph_ids()) {
256            if glyphs.contains(gid) {
257                glyphs.insert(sub.get());
258            }
259        }
260
261        Ok(())
262    }
263}
264
265impl SequenceContext<'_> {
266    fn add_reachable_lookups(
267        &self,
268        glyphs: &IntSet<GlyphId16>,
269        lookups: &mut IntSet<u16>,
270    ) -> Result<(), ReadError> {
271        match self {
272            SequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups),
273            SequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups),
274            SequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups),
275        }
276    }
277}
278
279impl SequenceContextFormat1<'_> {
280    fn add_reachable_lookups(
281        &self,
282        glyphs: &IntSet<GlyphId16>,
283        lookups: &mut IntSet<u16>,
284    ) -> Result<(), ReadError> {
285        let coverage = self.coverage()?;
286        for seq in coverage
287            .iter()
288            .zip(self.seq_rule_sets().iter())
289            .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(gid)))
290        {
291            for rule in seq?.seq_rules().iter() {
292                let rule = rule?;
293                if rule
294                    .input_sequence()
295                    .iter()
296                    .all(|gid| glyphs.contains(gid.get()))
297                {
298                    lookups.extend(
299                        rule.seq_lookup_records()
300                            .iter()
301                            .map(|rec| rec.lookup_list_index()),
302                    );
303                }
304            }
305        }
306        Ok(())
307    }
308}
309
310impl SequenceContextFormat2<'_> {
311    fn add_reachable_lookups(
312        &self,
313        glyphs: &IntSet<GlyphId16>,
314        lookups: &mut IntSet<u16>,
315    ) -> Result<(), ReadError> {
316        let classdef = self.class_def()?;
317        let our_classes = make_class_set(glyphs, &classdef);
318        for seq in self
319            .class_seq_rule_sets()
320            .iter()
321            .enumerate()
322            .filter_map(|(i, seq)| seq.filter(|_| our_classes.contains(i as u16)))
323        {
324            for rule in seq?.class_seq_rules().iter() {
325                let rule = rule?;
326                if rule
327                    .input_sequence()
328                    .iter()
329                    .all(|class_id| our_classes.contains(class_id.get()))
330                {
331                    lookups.extend(
332                        rule.seq_lookup_records()
333                            .iter()
334                            .map(|rec| rec.lookup_list_index()),
335                    )
336                }
337            }
338        }
339        Ok(())
340    }
341}
342
343impl SequenceContextFormat3<'_> {
344    fn add_reachable_lookups(
345        &self,
346        glyphs: &IntSet<GlyphId16>,
347        lookups: &mut IntSet<u16>,
348    ) -> Result<(), ReadError> {
349        for coverage in self.coverages().iter() {
350            if !coverage?.iter().any(|gid| glyphs.contains(gid)) {
351                return Ok(());
352            }
353        }
354        lookups.extend(
355            self.seq_lookup_records()
356                .iter()
357                .map(|rec| rec.lookup_list_index()),
358        );
359        Ok(())
360    }
361}
362
363impl ChainedSequenceContext<'_> {
364    fn add_reachable_lookups(
365        &self,
366        glyphs: &IntSet<GlyphId16>,
367        lookups: &mut IntSet<u16>,
368    ) -> Result<(), ReadError> {
369        match self {
370            ChainedSequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups),
371            ChainedSequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups),
372            ChainedSequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups),
373        }
374    }
375}
376
377impl ChainedSequenceContextFormat1<'_> {
378    fn add_reachable_lookups(
379        &self,
380        glyphs: &IntSet<GlyphId16>,
381        lookups: &mut IntSet<u16>,
382    ) -> Result<(), ReadError> {
383        let coverage = self.coverage()?;
384        for seq in coverage
385            .iter()
386            .zip(self.chained_seq_rule_sets().iter())
387            .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(gid)))
388        {
389            for rule in seq?.chained_seq_rules().iter() {
390                let rule = rule?;
391                if rule
392                    .input_sequence()
393                    .iter()
394                    .chain(rule.backtrack_sequence())
395                    .chain(rule.lookahead_sequence())
396                    .all(|gid| glyphs.contains(gid.get()))
397                {
398                    lookups.extend(
399                        rule.seq_lookup_records()
400                            .iter()
401                            .map(|rec| rec.lookup_list_index()),
402                    );
403                }
404            }
405        }
406        Ok(())
407    }
408}
409
410impl ChainedSequenceContextFormat2<'_> {
411    fn add_reachable_lookups(
412        &self,
413        glyphs: &IntSet<GlyphId16>,
414        lookups: &mut IntSet<u16>,
415    ) -> Result<(), ReadError> {
416        let input = self.input_class_def()?;
417        let backtrack = self.backtrack_class_def()?;
418        let lookahead = self.lookahead_class_def()?;
419
420        let input_classes = make_class_set(glyphs, &input);
421        let backtrack_classes = make_class_set(glyphs, &backtrack);
422        let lookahead_classes = make_class_set(glyphs, &lookahead);
423        for seq in self
424            .chained_class_seq_rule_sets()
425            .iter()
426            .enumerate()
427            .filter_map(|(i, seq)| seq.filter(|_| input_classes.contains(i as u16)))
428        {
429            for rule in seq?.chained_class_seq_rules().iter() {
430                let rule = rule?;
431                if rule
432                    .input_sequence()
433                    .iter()
434                    .all(|cls| input_classes.contains(cls.get()))
435                    && rule
436                        .backtrack_sequence()
437                        .iter()
438                        .all(|cls| backtrack_classes.contains(cls.get()))
439                    && rule
440                        .lookahead_sequence()
441                        .iter()
442                        .all(|cls| lookahead_classes.contains(cls.get()))
443                {
444                    lookups.extend(
445                        rule.seq_lookup_records()
446                            .iter()
447                            .map(|rec| rec.lookup_list_index()),
448                    )
449                }
450            }
451        }
452        Ok(())
453    }
454}
455
456impl ChainedSequenceContextFormat3<'_> {
457    fn add_reachable_lookups(
458        &self,
459        glyphs: &IntSet<GlyphId16>,
460        lookups: &mut IntSet<u16>,
461    ) -> Result<(), ReadError> {
462        for coverage in self
463            .backtrack_coverages()
464            .iter()
465            .chain(self.input_coverages().iter())
466            .chain(self.lookahead_coverages().iter())
467        {
468            if !coverage?.iter().any(|gid| glyphs.contains(gid)) {
469                return Ok(());
470            }
471        }
472        lookups.extend(
473            self.seq_lookup_records()
474                .iter()
475                .map(|rec| rec.lookup_list_index()),
476        );
477        Ok(())
478    }
479}
480
481fn make_class_set(glyphs: &IntSet<GlyphId16>, classdef: &ClassDef) -> IntSet<u16> {
482    glyphs.iter().map(|gid| classdef.get(gid)).collect()
483}
484
485#[cfg(test)]
486mod tests {
487    use std::collections::{HashMap, HashSet};
488
489    use crate::{FontRef, TableProvider};
490
491    use super::*;
492    use font_test_data::closure as test_data;
493
494    struct GlyphMap {
495        to_gid: HashMap<&'static str, GlyphId16>,
496        from_gid: HashMap<GlyphId16, &'static str>,
497    }
498
499    impl GlyphMap {
500        fn new(raw_order: &'static str) -> GlyphMap {
501            let to_gid: HashMap<_, _> = raw_order
502                .split('\n')
503                .map(|line| line.trim())
504                .filter(|line| !(line.starts_with('#') || line.is_empty()))
505                .enumerate()
506                .map(|(gid, name)| (name, GlyphId16::new(gid.try_into().unwrap())))
507                .collect();
508            let from_gid = to_gid.iter().map(|(name, gid)| (*gid, *name)).collect();
509            GlyphMap { from_gid, to_gid }
510        }
511
512        fn get_gid(&self, name: &str) -> Option<GlyphId16> {
513            self.to_gid.get(name).copied()
514        }
515
516        fn get_name(&self, gid: GlyphId16) -> Option<&str> {
517            self.from_gid.get(&gid).copied()
518        }
519    }
520
521    fn get_gsub(test_data: &'static [u8]) -> Gsub<'static> {
522        let font = FontRef::new(test_data).unwrap();
523        font.gsub().unwrap()
524    }
525
526    fn compute_closure(gsub: &Gsub, glyph_map: &GlyphMap, input: &[&str]) -> IntSet<GlyphId16> {
527        let input_glyphs = input
528            .iter()
529            .map(|name| glyph_map.get_gid(name).unwrap())
530            .collect();
531        gsub.closure_glyphs(input_glyphs).unwrap()
532    }
533
534    /// assert a set of glyph ids matches a slice of names
535    macro_rules! assert_closure_result {
536        ($glyph_map:expr, $result:expr, $expected:expr) => {
537            let result = $result
538                .iter()
539                .map(|gid| $glyph_map.get_name(gid).unwrap())
540                .collect::<HashSet<_>>();
541            let expected = $expected.iter().copied().collect::<HashSet<_>>();
542            if expected != result {
543                let in_output = result.difference(&expected).collect::<Vec<_>>();
544                let in_expected = expected.difference(&result).collect::<Vec<_>>();
545                let mut msg = format!("Closure output does not match\n");
546                if !in_expected.is_empty() {
547                    msg.push_str(format!("missing {in_expected:?}\n").as_str());
548                }
549                if !in_output.is_empty() {
550                    msg.push_str(format!("unexpected {in_output:?}").as_str());
551                }
552                panic!("{msg}")
553            }
554        };
555    }
556
557    #[test]
558    fn smoke_test() {
559        // tests various lookup types.
560        // test input is font-test-data/test_data/fea/simple_closure.fea
561        let gsub = get_gsub(test_data::SIMPLE);
562        let glyph_map = GlyphMap::new(test_data::SIMPLE_GLYPHS);
563        let result = compute_closure(&gsub, &glyph_map, &["a"]);
564
565        assert_closure_result!(
566            glyph_map,
567            result,
568            &["a", "A", "b", "c", "d", "a_a", "a.1", "a.2", "a.3"]
569        );
570    }
571
572    #[test]
573    fn recursive() {
574        // a scenario in which one substitution adds glyphs that trigger additional
575        // substitutions.
576        //
577        // test input is font-test-data/test_data/fea/recursive_closure.fea
578        let gsub = get_gsub(test_data::RECURSIVE);
579        let glyph_map = GlyphMap::new(test_data::RECURSIVE_GLYPHS);
580        let result = compute_closure(&gsub, &glyph_map, &["a"]);
581        assert_closure_result!(glyph_map, result, &["a", "b", "c", "d"]);
582    }
583
584    #[test]
585    fn contextual_lookups() {
586        let gsub = get_gsub(test_data::CONTEXTUAL);
587        let glyph_map = GlyphMap::new(test_data::CONTEXTUAL_GLYPHS);
588
589        // these match the lookups but not the context
590        let nop = compute_closure(&gsub, &glyph_map, &["three", "four", "e", "f"]);
591        assert_closure_result!(glyph_map, nop, &["three", "four", "e", "f"]);
592
593        let gsub6f1 = compute_closure(
594            &gsub,
595            &glyph_map,
596            &["one", "two", "three", "four", "five", "six", "seven"],
597        );
598        assert_closure_result!(
599            glyph_map,
600            gsub6f1,
601            &["one", "two", "three", "four", "five", "six", "seven", "X", "Y"]
602        );
603
604        let gsub6f3 = compute_closure(&gsub, &glyph_map, &["space", "e"]);
605        assert_closure_result!(glyph_map, gsub6f3, &["space", "e", "e.2"]);
606
607        let gsub5f3 = compute_closure(&gsub, &glyph_map, &["f", "g"]);
608        assert_closure_result!(glyph_map, gsub5f3, &["f", "g", "f.2"]);
609    }
610
611    #[test]
612    fn recursive_context() {
613        let gsub = get_gsub(test_data::RECURSIVE_CONTEXTUAL);
614        let glyph_map = GlyphMap::new(test_data::RECURSIVE_CONTEXTUAL_GLYPHS);
615
616        let nop = compute_closure(&gsub, &glyph_map, &["b", "B"]);
617        assert_closure_result!(glyph_map, nop, &["b", "B"]);
618
619        let full = compute_closure(&gsub, &glyph_map, &["a", "b", "c"]);
620        assert_closure_result!(glyph_map, full, &["a", "b", "c", "B", "B.2", "B.3"]);
621
622        let intermediate = compute_closure(&gsub, &glyph_map, &["a", "B.2"]);
623        assert_closure_result!(glyph_map, intermediate, &["a", "B.2", "B.3"]);
624    }
625
626    #[test]
627    fn feature_variations() {
628        let gsub = get_gsub(test_data::VARIATIONS_CLOSURE);
629        let glyph_map = GlyphMap::new(test_data::VARIATIONS_GLYPHS);
630
631        let input = compute_closure(&gsub, &glyph_map, &["a"]);
632        assert_closure_result!(glyph_map, input, &["a", "b", "c"]);
633    }
634}