write_fonts/
graph.rs

1//! A graph for resolving table offsets
2
3use font_types::Uint24;
4
5use crate::{table_type::TableType, tables::layout::LookupType, write::TableData};
6
7use std::{
8    collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, VecDeque},
9    sync::atomic::AtomicU64,
10};
11
12#[cfg(feature = "dot2")]
13mod graphviz;
14mod splitting;
15
16static OBJECT_COUNTER: AtomicU64 = AtomicU64::new(0);
17
18/// An identifier for an object in the compilation graph.
19#[derive(Debug, Clone, Copy, PartialOrd, Ord, Hash, PartialEq, Eq)]
20pub struct ObjectId(u64);
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
23#[repr(u8)]
24pub enum OffsetLen {
25    Offset16 = 2,
26    Offset24 = 3,
27    Offset32 = 4,
28}
29
30impl OffsetLen {
31    /// The maximum value for an offset of this length.
32    pub const fn max_value(self) -> u32 {
33        match self {
34            Self::Offset16 => u16::MAX as u32,
35            Self::Offset24 => (1 << 24) - 1,
36            Self::Offset32 => u32::MAX,
37        }
38    }
39}
40
41impl std::fmt::Display for OffsetLen {
42    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
43        match self {
44            Self::Offset16 => write!(f, "Offset16"),
45            Self::Offset24 => write!(f, "Offset24"),
46            Self::Offset32 => write!(f, "Offset32"),
47        }
48    }
49}
50/// A ranking used for sorting the graph.
51///
52/// Nodes are assigned a space, and nodes in lower spaces are always
53/// packed before nodes in higher spaces.
54#[derive(Debug, Clone, Copy, PartialOrd, Ord, Hash, PartialEq, Eq)]
55pub struct Space(u32);
56
57impl Space {
58    /// A generic space for nodes reachable via 16-bit offsets.
59    const SHORT_REACHABLE: Space = Space(0);
60    /// A generic space for nodes that are reachable via any offset.
61    const REACHABLE: Space = Space(1);
62    /// The first space used for assignment to specific subgraphs.
63    const INIT: Space = Space(2);
64
65    const fn is_custom(self) -> bool {
66        self.0 >= Space::INIT.0
67    }
68}
69
70impl ObjectId {
71    pub fn next() -> Self {
72        ObjectId(OBJECT_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed))
73    }
74}
75
76#[derive(Debug, Default)]
77pub(crate) struct ObjectStore {
78    pub(crate) objects: HashMap<TableData, ObjectId>,
79}
80
81impl ObjectStore {
82    pub(crate) fn add(&mut self, data: TableData) -> ObjectId {
83        *self.objects.entry(data).or_insert_with(ObjectId::next)
84    }
85}
86
87/// A graph of subtables, starting at a single root.
88///
89/// This type is used during compilation, to determine the final write order
90/// for the various subtables.
91//NOTE: we don't derive Debug because it's way too verbose to be useful
92pub struct Graph {
93    /// the actual data for each table
94    objects: BTreeMap<ObjectId, TableData>,
95    /// graph-specific state used for sorting
96    nodes: BTreeMap<ObjectId, Node>,
97    order: Vec<ObjectId>,
98    root: ObjectId,
99    parents_invalid: bool,
100    distance_invalid: bool,
101    positions_invalid: bool,
102    next_space: Space,
103    num_roots_per_space: HashMap<Space, usize>,
104}
105
106#[derive(Debug)]
107struct Node {
108    size: u32,
109    distance: u32,
110    /// overall position after sorting
111    position: u32,
112    space: Space,
113    parents: Vec<(ObjectId, OffsetLen)>,
114    priority: Priority,
115}
116
117/// Scored used when computing shortest distance
118#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
119struct Distance {
120    // a space ranking; like rankings are packed together,
121    // and larger rankings are packed after smaller ones.
122    space: Space,
123    distance: u64,
124    // a tie-breaker, based on order within a parent
125    order: u32,
126}
127
128//TODO: remove me? maybe? not really used right now...
129#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
130struct Priority(u8);
131
132/// A record of an overflowing offset
133#[derive(Clone, Debug)]
134pub(crate) struct Overflow {
135    parent: ObjectId,
136    child: ObjectId,
137    distance: u32,
138    offset_type: OffsetLen,
139}
140
141impl Priority {
142    const ZERO: Priority = Priority(0);
143    const ONE: Priority = Priority(1);
144    const TWO: Priority = Priority(2);
145    const THREE: Priority = Priority(3);
146
147    #[cfg(test)]
148    fn increase(&mut self) -> bool {
149        let result = *self != Priority::THREE;
150        self.0 = (self.0 + 1).min(3);
151        result
152    }
153}
154
155impl Distance {
156    const ROOT: Distance = Distance {
157        space: Space::SHORT_REACHABLE,
158        distance: 0,
159        order: 0,
160    };
161
162    fn rev(self) -> std::cmp::Reverse<Distance> {
163        std::cmp::Reverse(self)
164    }
165}
166
167impl Node {
168    pub fn new(size: u32) -> Self {
169        Node {
170            //obj,
171            size,
172            position: Default::default(),
173            distance: Default::default(),
174            space: Space::REACHABLE,
175            parents: Default::default(),
176            priority: Default::default(),
177        }
178    }
179
180    #[cfg(test)]
181    fn raise_priority(&mut self) -> bool {
182        self.priority.increase()
183    }
184
185    fn modified_distance(&self, order: u32) -> Distance {
186        let prev_dist = self.distance as i64;
187        let distance = match self.priority {
188            Priority::ZERO => prev_dist,
189            Priority::ONE => prev_dist - self.size as i64 / 2,
190            Priority::TWO => prev_dist - self.size as i64,
191            Priority::THREE => 0,
192            _ => 0,
193        }
194        .max(0) as u64;
195
196        Distance {
197            space: self.space,
198            distance,
199            order,
200        }
201    }
202}
203
204impl Graph {
205    pub(crate) fn from_obj_store(store: ObjectStore, root: ObjectId) -> Self {
206        let objects = store.objects.into_iter().map(|(k, v)| (v, k)).collect();
207        Self::from_objects(objects, root)
208    }
209
210    fn from_objects(objects: BTreeMap<ObjectId, TableData>, root: ObjectId) -> Self {
211        let nodes = objects
212            .iter()
213            //TODO: ensure table sizes elsewhere?
214            .map(|(key, obj)| (*key, Node::new(obj.bytes.len().try_into().unwrap())))
215            .collect();
216        Graph {
217            objects,
218            nodes,
219            order: Default::default(),
220            root,
221            parents_invalid: true,
222            distance_invalid: true,
223            positions_invalid: true,
224            next_space: Space::INIT,
225            num_roots_per_space: Default::default(),
226        }
227    }
228
229    /// Write out the serialized graph.
230    ///
231    /// This is not public API, and you are responsible for ensuring that
232    /// the graph is sorted before calling (by calling `pack_objects`, and
233    /// checking that it has succeeded).
234    pub(crate) fn serialize(&self) -> Vec<u8> {
235        fn write_offset(at: &mut [u8], len: OffsetLen, resolved: u32) {
236            let at = &mut at[..len as u8 as usize];
237            match len {
238                OffsetLen::Offset16 => at.copy_from_slice(
239                    u16::try_from(resolved)
240                        .expect("offset overflow should be checked before now")
241                        .to_be_bytes()
242                        .as_slice(),
243                ),
244                OffsetLen::Offset24 => at.copy_from_slice(
245                    Uint24::checked_new(resolved)
246                        .expect("offset overflow should be checked before now")
247                        .to_be_bytes()
248                        .as_slice(),
249                ),
250                OffsetLen::Offset32 => at.copy_from_slice(resolved.to_be_bytes().as_slice()),
251            }
252        }
253
254        assert!(
255            !self.order.is_empty(),
256            "graph must be sorted before serialization"
257        );
258        let mut offsets = HashMap::new();
259        let mut out = Vec::new();
260        let mut off = 0;
261
262        // first pass: write out bytes, record positions of offsets
263        for id in &self.order {
264            let node = self.objects.get(id).unwrap();
265            offsets.insert(*id, off);
266            off += node.bytes.len() as u32;
267            out.extend_from_slice(&node.bytes);
268        }
269
270        // second pass: write offsets
271        let mut table_head = 0;
272        for id in &self.order {
273            let node = self.objects.get(id).unwrap();
274            for offset in &node.offsets {
275                let abs_off = *offsets
276                    .get(&offset.object)
277                    .expect("all offsets visited in first pass");
278                let rel_off = abs_off - (table_head + offset.adjustment);
279                let buffer_pos = table_head + offset.pos;
280                let write_over = out.get_mut(buffer_pos as usize..).unwrap();
281                write_offset(write_over, offset.len, rel_off);
282            }
283            table_head += node.bytes.len() as u32;
284        }
285        out
286    }
287
288    /// Attempt to pack the graph.
289    ///
290    /// This involves finding an order for objects such that all offsets are
291    /// resolvable.
292    ///
293    /// In the simple case, this just means finding a topological ordering.
294    /// In exceptional cases, however, this may require us to significantly
295    /// modify the graph.
296    ///
297    /// Our implementation is closely modeled on the implementation in the
298    /// HarfBuzz repacker; see the [repacker docs] for further detail.
299    ///
300    /// returns `true` if a solution is found, `false` otherwise
301    ///
302    /// [repacker docs]: https://github.com/harfbuzz/harfbuzz/blob/main/docs/repacker.md
303    pub(crate) fn pack_objects(&mut self) -> bool {
304        if self.basic_sort() {
305            return true;
306        }
307
308        self.try_splitting_subtables();
309        self.try_promoting_subtables();
310
311        log::info!("assigning spaces");
312        self.assign_spaces_hb();
313        self.sort_shortest_distance();
314
315        if !self.has_overflows() {
316            return true;
317        }
318
319        // now isolate spaces in a loop, until there are no more left:
320        let overflows = loop {
321            let overflows = self.find_overflows();
322            if overflows.is_empty() {
323                // we're done
324                return true;
325            }
326            log::trace!(
327                "failed with {} overflows, current size {}",
328                overflows.len(),
329                self.debug_len()
330            );
331            if !self.try_isolating_subgraphs(&overflows) {
332                log::debug!("finished isolating all subgraphs without solution");
333                break overflows;
334            }
335            self.sort_shortest_distance();
336        };
337
338        assert!(!overflows.is_empty());
339        self.debug_overflows(&overflows);
340        false
341    }
342
343    /// Initial sorting operation. Attempt Kahn, falling back to shortest distance.
344    ///
345    /// This has to be called first, since it establishes an initial order.
346    /// subsequent operations on the graph require this order.
347    ///
348    /// returns `true` if sort succeeds with no overflows
349    fn basic_sort(&mut self) -> bool {
350        log::trace!("sorting {} objects", self.objects.len());
351
352        self.sort_kahn();
353        if !self.has_overflows() {
354            return true;
355        }
356        log::trace!("kahn failed, trying shortest distance");
357        self.sort_shortest_distance();
358        !self.has_overflows()
359    }
360
361    fn has_overflows(&self) -> bool {
362        for (parent_id, data) in &self.objects {
363            let parent = &self.nodes[parent_id];
364            for link in &data.offsets {
365                let child = &self.nodes[&link.object];
366                //TODO: account for 'whence'
367                let rel_off = child.position - parent.position;
368                if link.len.max_value() < rel_off {
369                    return true;
370                }
371            }
372        }
373        false
374    }
375
376    pub(crate) fn find_overflows(&self) -> Vec<Overflow> {
377        let mut result = Vec::new();
378        for (parent_id, data) in &self.objects {
379            let parent = &self.nodes[parent_id];
380            for link in &data.offsets {
381                let child = &self.nodes[&link.object];
382                //TODO: account for 'whence'
383                let rel_off = child.position - parent.position;
384                if link.len.max_value() < rel_off {
385                    result.push(Overflow {
386                        parent: *parent_id,
387                        child: link.object,
388                        distance: rel_off,
389                        offset_type: link.len,
390                    });
391                }
392            }
393        }
394        result
395    }
396
397    fn debug_overflows(&self, overflows: &[Overflow]) {
398        let (parents, children): (HashSet<_>, HashSet<_>) =
399            overflows.iter().map(|x| (x.parent, x.child)).unzip();
400        log::debug!(
401            "found {} overflows from {} parents to {} children",
402            overflows.len(),
403            parents.len(),
404            children.len()
405        );
406        for parent in &parents {
407            let lookup_type = self.objects.get(parent).unwrap().type_;
408            log::debug!("obj {parent:?}: type {lookup_type}");
409        }
410
411        for overflow in overflows {
412            log::trace!(
413                "{:?} -> {:?} type {} dist {}",
414                overflow.parent,
415                overflow.child,
416                overflow.offset_type,
417                overflow.distance
418            );
419        }
420    }
421
422    // only valid if order is up to date. Returns total byte len of graph.
423    fn debug_len(&self) -> usize {
424        self.order
425            .iter()
426            .map(|id| self.objects.get(id).unwrap().bytes.len())
427            .sum()
428    }
429
430    fn update_parents(&mut self) {
431        if !self.parents_invalid {
432            return;
433        }
434        for node in self.nodes.values_mut() {
435            node.parents.clear();
436        }
437
438        for (id, obj) in &self.objects {
439            for link in &obj.offsets {
440                self.nodes
441                    .get_mut(&link.object)
442                    .unwrap()
443                    .parents
444                    .push((*id, link.len));
445            }
446        }
447        self.parents_invalid = false;
448    }
449
450    fn remove_orphans(&mut self) {
451        let mut visited = HashSet::with_capacity(self.nodes.len());
452        self.find_subgraph_hb(self.root, &mut visited);
453        if visited.len() != self.nodes.len() {
454            log::info!("removing {} orphan nodes", self.nodes.len() - visited.len());
455            for id in self
456                .nodes
457                .keys()
458                .copied()
459                .collect::<HashSet<_>>()
460                .difference(&visited)
461            {
462                self.nodes.remove(id);
463                self.objects.remove(id);
464            }
465        }
466    }
467
468    fn sort_kahn(&mut self) {
469        self.positions_invalid = true;
470        if self.nodes.len() <= 1 {
471            self.order.extend(self.nodes.keys().copied());
472            return;
473        }
474
475        let mut queue = BinaryHeap::new();
476        let mut removed_edges = HashMap::new();
477        let mut current_pos: u32 = 0;
478        self.order.clear();
479
480        self.update_parents();
481        queue.push(std::cmp::Reverse(self.root));
482
483        while let Some(id) = queue.pop().map(|x| x.0) {
484            let next = &self.objects[&id];
485            self.order.push(id);
486            self.nodes.get_mut(&id).unwrap().position = current_pos;
487            current_pos += next.bytes.len() as u32;
488            for link in &next.offsets {
489                let seen_edges = removed_edges.entry(link.object).or_insert(0usize);
490                *seen_edges += 1;
491                // if the target of this link has no other incoming links, add
492                // to the queue
493                if *seen_edges == self.nodes[&link.object].parents.len() {
494                    queue.push(std::cmp::Reverse(link.object));
495                }
496            }
497        }
498        //TODO: check for orphans & cycles?
499        for (id, seen_len) in &removed_edges {
500            if *seen_len != self.nodes[id].parents.len() {
501                panic!("cycle or something?");
502            }
503        }
504    }
505
506    pub(crate) fn sort_shortest_distance(&mut self) {
507        self.positions_invalid = true;
508        self.update_parents();
509        self.update_distances();
510        self.assign_space_0();
511
512        let mut queue = BinaryHeap::new();
513        let mut removed_edges = HashMap::with_capacity(self.nodes.len());
514        let mut current_pos = 0;
515        self.order.clear();
516
517        queue.push((Distance::ROOT.rev(), self.root));
518        let mut obj_order = 1u32;
519        while let Some((_, id)) = queue.pop() {
520            let next = &self.objects[&id];
521            self.order.push(id);
522            self.nodes.get_mut(&id).unwrap().position = current_pos;
523            current_pos += next.bytes.len() as u32;
524            for link in &next.offsets {
525                let seen_edges = removed_edges.entry(link.object).or_insert(0usize);
526                *seen_edges += 1;
527                // if the target of this link has no other incoming links, add
528                // to the queue
529                if *seen_edges == self.nodes[&link.object].parents.len() {
530                    let distance = self.nodes[&link.object].modified_distance(obj_order);
531                    obj_order += 1;
532                    queue.push((distance.rev(), link.object));
533                }
534            }
535        }
536
537        //TODO: check for orphans & cycles?
538        for (id, seen_len) in &removed_edges {
539            if *seen_len != self.nodes[id].parents.len() {
540                panic!("cycle or something?");
541            }
542        }
543    }
544
545    fn update_distances(&mut self) {
546        self.nodes
547            .values_mut()
548            .for_each(|node| node.distance = u32::MAX);
549        self.nodes.get_mut(&self.root).unwrap().distance = u32::MIN;
550
551        let mut queue = BinaryHeap::new();
552        let mut visited = HashSet::new();
553        queue.push((Default::default(), self.root));
554
555        while let Some((_, next_id)) = queue.pop() {
556            if !visited.insert(next_id) {
557                continue;
558            }
559            let next_distance = self.nodes[&next_id].distance;
560            let next_obj = &self.objects[&next_id];
561            for link in &next_obj.offsets {
562                if visited.contains(&link.object) {
563                    continue;
564                }
565
566                let child = self.nodes.get_mut(&link.object).unwrap();
567                let child_distance = next_distance + child.size;
568
569                if child_distance < child.distance {
570                    child.distance = child_distance;
571                    queue.push((child_distance, link.object));
572                }
573            }
574        }
575
576        self.distance_invalid = false;
577    }
578
579    /// isolate and assign spaces to subgraphs reachable via long offsets.
580    ///
581    /// This finds all subgraphs that are reachable via long offsets, and
582    /// isolates them (ensuring they are *only* reachable via long offsets),
583    /// assigning each unique space an identifier.
584    ///
585    /// Each space may have multiple roots; this works by finding the connected
586    /// components from each root (counting only nodes reachable via long offsets).
587    ///
588    /// This is a close port of the [assign_spaces] method used by the HarfBuzz
589    /// repacker.
590    ///
591    /// [assign_spaces]: https://github.com/harfbuzz/harfbuzz/blob/main/src/graph/graph.hh#L624
592    fn assign_spaces_hb(&mut self) -> bool {
593        self.update_parents();
594        let (visited, mut roots) = self.find_space_roots_hb();
595
596        if roots.is_empty() {
597            return false;
598        }
599
600        log::trace!("found {} space roots to isolate", roots.len());
601
602        // we want to *invert* the visited set, but we don't have a fancy hb_set_t
603        let mut visited = self
604            .order
605            .iter()
606            .copied()
607            .collect::<HashSet<_>>()
608            .difference(&visited)
609            .copied()
610            .collect::<HashSet<_>>();
611
612        let mut connected_roots = BTreeSet::new(); // we can reuse this
613        while let Some(next) = roots.iter().copied().next() {
614            connected_roots.clear();
615            self.find_connected_nodes_hb(next, &mut roots, &mut visited, &mut connected_roots);
616            self.isolate_subgraph_hb(&mut connected_roots);
617
618            self.distance_invalid = true;
619            self.positions_invalid = true;
620        }
621        true
622    }
623
624    /// Find the root nodes of 32 (and later 24?)-bit space.
625    ///
626    /// These are the set of nodes that have incoming long offsets, for which
627    /// no ancestor has an incoming long offset.
628    ///
629    /// Ported from the [find_space_roots] method in HarfBuzz.
630    ///
631    /// [find_space_roots]: https://github.com/harfbuzz/harfbuzz/blob/main/src/graph/graph.hh#L508
632    fn find_space_roots_hb(&self) -> (HashSet<ObjectId>, BTreeSet<ObjectId>) {
633        let mut visited = HashSet::new();
634        let mut roots = BTreeSet::new();
635
636        let mut queue = VecDeque::from([self.root]);
637
638        while let Some(id) = queue.pop_front() {
639            if visited.contains(&id) {
640                continue;
641            }
642            let obj = self.objects.get(&id).unwrap();
643            for link in &obj.offsets {
644                //FIXME: harfbuzz has a bunch of logic here for 24-bit offsets
645                if link.len == OffsetLen::Offset32 {
646                    roots.insert(link.object);
647                    self.find_subgraph_hb(link.object, &mut visited);
648                } else {
649                    queue.push_back(link.object);
650                }
651            }
652        }
653        (visited, roots)
654    }
655
656    fn find_subgraph_hb(&self, idx: ObjectId, nodes: &mut HashSet<ObjectId>) {
657        if !nodes.insert(idx) {
658            return;
659        }
660        for link in self.objects.get(&idx).unwrap().offsets.iter() {
661            self.find_subgraph_hb(link.object, nodes);
662        }
663    }
664
665    fn find_subgraph_map_hb(&self, idx: ObjectId, graph: &mut BTreeMap<ObjectId, usize>) {
666        use std::collections::btree_map::Entry;
667        for link in &self.objects[&idx].offsets {
668            match graph.entry(link.object) {
669                // To avoid double counting, we only recurse if we are seeing
670                // this node for the first time.
671                Entry::Vacant(entry) => {
672                    entry.insert(1);
673                    self.find_subgraph_map_hb(link.object, graph);
674                }
675                Entry::Occupied(entry) => {
676                    *entry.into_mut() += 1;
677                }
678            }
679        }
680    }
681
682    /// find all of the members of 'targets' that are reachable, skipping nodes in `visited`.
683    fn find_connected_nodes_hb(
684        &self,
685        id: ObjectId,
686        targets: &mut BTreeSet<ObjectId>,
687        visited: &mut HashSet<ObjectId>,
688        connected: &mut BTreeSet<ObjectId>,
689    ) {
690        if !visited.insert(id) {
691            return;
692        }
693        if targets.remove(&id) {
694            connected.insert(id);
695        }
696        // recurse to all children and parents
697        for (obj, _) in &self.nodes.get(&id).unwrap().parents {
698            self.find_connected_nodes_hb(*obj, targets, visited, connected);
699        }
700        for link in &self.objects.get(&id).unwrap().offsets {
701            self.find_connected_nodes_hb(link.object, targets, visited, connected);
702        }
703    }
704
705    /// Isolate the subgraph with the provided roots, moving it to a new space.
706    ///
707    /// This duplicates any nodes in this subgraph that are shared with
708    /// any other nodes in the graph.
709    ///
710    /// Based on the [isolate_subgraph] method in HarfBuzz.
711    ///
712    /// [isolate_subgraph]: https://github.com/harfbuzz/harfbuzz/blob/main/src/graph/graph.hh#L508
713    fn isolate_subgraph_hb(&mut self, roots: &mut BTreeSet<ObjectId>) -> bool {
714        self.update_parents();
715
716        // map of object id -> number of incoming edges
717        let mut subgraph = BTreeMap::new();
718
719        for root in roots.iter() {
720            // for the roots, we set the edge count to the number of long
721            // incoming offsets; if this differs from the total number of
722            // incoming offsets it means we need to dupe the root as well.
723            let inbound_wide_offsets = self.nodes[root]
724                .parents
725                .iter()
726                .filter(|(_, len)| !matches!(len, OffsetLen::Offset16))
727                .count();
728            subgraph.insert(*root, inbound_wide_offsets);
729            self.find_subgraph_map_hb(*root, &mut subgraph);
730        }
731
732        let next_space = self.next_space();
733        log::debug!("moved {} roots to {next_space:?}", roots.len(),);
734        self.num_roots_per_space.insert(next_space, roots.len());
735        let mut id_map = HashMap::new();
736        for (id, incoming_edges_in_subgraph) in &subgraph {
737            // there are edges to this object from outside the subgraph; dupe it.
738            if *incoming_edges_in_subgraph < self.nodes[id].parents.len() {
739                self.duplicate_subgraph(*id, &mut id_map, next_space);
740            }
741        }
742
743        // now remap any links in the subgraph from nodes that were not
744        // themselves duplicated (since they were not reachable from outside)
745        for id in subgraph.keys().filter(|k| !id_map.contains_key(k)) {
746            self.nodes.get_mut(id).unwrap().space = next_space;
747            let obj = self.objects.get_mut(id).unwrap();
748            for link in &mut obj.offsets {
749                if let Some(new_id) = id_map.get(&link.object) {
750                    link.object = *new_id;
751                }
752            }
753        }
754
755        if id_map.is_empty() {
756            return false;
757        }
758
759        // now everything but the links to the roots roots has been remapped;
760        // remap those, if needed
761        for root in roots.iter() {
762            let Some(new_id) = id_map.get(root) else {
763                continue;
764            };
765            self.parents_invalid = true;
766            self.positions_invalid = true;
767            for (parent_id, len) in &self.nodes[new_id].parents {
768                if !matches!(len, OffsetLen::Offset16) {
769                    for link in &mut self.objects.get_mut(parent_id).unwrap().offsets {
770                        if link.object == *root {
771                            link.object = *new_id;
772                        }
773                    }
774                }
775            }
776        }
777
778        // if any roots changed, we also rename them in the input set:
779        for (old, new) in id_map {
780            if roots.remove(&old) {
781                roots.insert(new);
782            }
783        }
784
785        true
786    }
787
788    /// for each space that has overflows and > 1 roots, select half the roots
789    /// and move them to a separate subgraph.
790    //
791    /// return `true` if any change was made.
792    ///
793    /// This is a port of the [_try_isolating_subgraphs] method in hb-repacker.
794    ///
795    /// [_try_isolating_subgraphs]: https://github.com/harfbuzz/harfbuzz/blob/main/src/hb-repacker.hh#L182
796    fn try_isolating_subgraphs(&mut self, overflows: &[Overflow]) -> bool {
797        let mut to_isolate = BTreeMap::new();
798        for overflow in overflows {
799            let parent_space = self.nodes[&overflow.parent].space;
800            // we only isolate subgraphs in wide-space
801            if !parent_space.is_custom() || self.num_roots_per_space[&parent_space] < 2 {
802                continue;
803            }
804            // if parent space is custom it means all children should also be
805            // in the same custom space.
806            assert_eq!(parent_space, self.nodes[&overflow.child].space);
807            let root = self.find_root_of_space(overflow.parent);
808            assert_eq!(self.nodes[&root].space, parent_space);
809            to_isolate
810                .entry(parent_space)
811                .or_insert_with(BTreeSet::new)
812                .insert(root);
813        }
814
815        if to_isolate.is_empty() {
816            return false;
817        }
818
819        for (space, mut roots) in to_isolate {
820            let n_total_roots = self.num_roots_per_space[&space];
821            debug_assert!(n_total_roots >= 2, "checked in the loop above");
822            let max_to_move = n_total_roots / 2;
823            log::trace!(
824                "moving {} of {} candidate roots from {space:?} to new space",
825                max_to_move.min(roots.len()),
826                roots.len()
827            );
828            while roots.len() > max_to_move {
829                roots.pop_last();
830            }
831            self.isolate_subgraph_hb(&mut roots);
832            *self.num_roots_per_space.get_mut(&space).unwrap() -= roots.len();
833        }
834
835        true
836    }
837
838    // invariant: obj must not be in space 0
839    fn find_root_of_space(&self, obj: ObjectId) -> ObjectId {
840        let space = self.nodes[&obj].space;
841        debug_assert!(space.is_custom());
842        let parent = self.nodes[&obj].parents[0].0;
843        if self.nodes[&parent].space != space {
844            return obj;
845        }
846        self.find_root_of_space(parent)
847    }
848
849    fn next_space(&mut self) -> Space {
850        self.next_space = Space(self.next_space.0 + 1);
851        self.next_space
852    }
853
854    fn try_promoting_subtables(&mut self) {
855        let Some((can_promote, parent_id)) = self.get_promotable_subtables() else {
856            return;
857        };
858        let to_promote = self.select_promotions_hb(&can_promote, parent_id);
859        log::info!(
860            "promoting {} of {} eligible subtables",
861            to_promote.len(),
862            can_promote.len()
863        );
864        self.actually_promote_subtables(&to_promote);
865    }
866
867    fn actually_promote_subtables(&mut self, to_promote: &[ObjectId]) {
868        fn make_extension(type_: LookupType, subtable_id: ObjectId) -> TableData {
869            const EXT_FORMAT: u16 = 1;
870            let mut data = TableData::new(TableType::Named("ExtensionPosFormat1"));
871            data.write(EXT_FORMAT);
872            data.write(type_.to_raw());
873            data.add_offset(subtable_id, 4, 0);
874            data
875        }
876
877        for id in to_promote {
878            // 'id' is a lookup table.
879            // we need to:
880            // - change the subtable type
881            // - create a new extension table for each subtable
882            // - update the object ids
883
884            let mut lookup = self.objects.remove(id).unwrap();
885            let lookup_type = lookup.type_.to_lookup_type().expect("validated before now");
886            for subtable_ref in &mut lookup.offsets {
887                let ext_table = make_extension(lookup_type, subtable_ref.object);
888                let ext_id = self.add_object(ext_table);
889                subtable_ref.object = ext_id;
890            }
891            lookup.write_over(lookup_type.promote().to_raw(), 0);
892            lookup.type_ = lookup_type.promote().into();
893            self.objects.insert(*id, lookup);
894        }
895        self.parents_invalid = true;
896        self.positions_invalid = true;
897    }
898
899    /// Manually add an object to the graph, after initial compilation.
900    ///
901    /// This can be used to perform edits to the graph during compilation, such
902    /// as for table splitting or promotion.
903    ///
904    /// This has drawbacks; in particular, at this stage we no longer deduplicate
905    /// objects.
906    fn add_object(&mut self, data: TableData) -> ObjectId {
907        self.parents_invalid = true;
908        self.distance_invalid = true;
909
910        let id = ObjectId::next();
911        self.nodes.insert(id, Node::new(data.bytes.len() as _));
912        self.objects.insert(id, data);
913        id
914    }
915
916    // get the list of tables that can be promoted, as well as the id of their parent table
917    fn get_promotable_subtables(&self) -> Option<(Vec<ObjectId>, ObjectId)> {
918        let can_promote = self
919            .objects
920            .iter()
921            .filter_map(|(id, obj)| (obj.type_.is_promotable()).then_some(*id))
922            .collect::<Vec<_>>();
923
924        if can_promote.is_empty() {
925            return None;
926        }
927
928        // sanity check: ensure that all promotable tables have a common root.
929        let parents = can_promote
930            .iter()
931            .flat_map(|id| {
932                self.nodes
933                    .get(id)
934                    .expect("all nodes exist")
935                    .parents
936                    .iter()
937                    .map(|x| x.0)
938            })
939            .collect::<HashSet<_>>();
940
941        // the only promotable subtables should be lookups, and there should
942        // be a single LookupList that is their parent; if there is more than
943        // one parent then something weird is going on.
944        if parents.len() > 1 {
945            if cfg!(debug_assertions) {
946                panic!("Promotable subtables exist with multiple parents");
947            } else {
948                log::warn!("Promotable subtables exist with multiple parents");
949                return None;
950            }
951        }
952
953        let parent_id = *parents.iter().next().unwrap();
954        Some((can_promote, parent_id))
955    }
956
957    /// select the tables to promote to extension, harfbuzz algorithm
958    ///
959    /// Based on the logic in HarfBuzz's [`_promote_exetnsions_if_needed`][hb-promote][hb-promote] function.
960    ///
961    /// [hb-promote]: https://github.com/harfbuzz/harfbuzz/blob/5d543d64222c6ce45332d0c188790f90691ef112/src/hb-repacker.hh#L97
962    fn select_promotions_hb(&self, candidates: &[ObjectId], parent_id: ObjectId) -> Vec<ObjectId> {
963        struct LookupSize {
964            id: ObjectId,
965            subgraph_size: usize,
966            subtable_count: usize,
967        }
968
969        impl LookupSize {
970            // I could impl Ord but then I need to impl PartialEq and it ends
971            // up being way more code
972            fn sort_key(&self) -> impl Ord {
973                let bytes_per_subtable = self.subtable_count as f64 / self.subgraph_size as f64;
974                // f64 isn't ord, so we turn it into an integer,
975                // then reverse, because we want bigger things first
976                std::cmp::Reverse((bytes_per_subtable * 1e9) as u64)
977            }
978        }
979
980        let mut lookup_sizes = Vec::with_capacity(candidates.len());
981        let mut reusable_buffer = HashSet::new();
982        let mut queue = VecDeque::new();
983        for id in candidates {
984            // get the subgraph size
985            queue.clear();
986            queue.push_back(*id);
987            let subgraph_size = self.find_subgraph_size(&mut queue, &mut reusable_buffer);
988            let subtable_count = self.objects.get(id).unwrap().offsets.len();
989            lookup_sizes.push(LookupSize {
990                id: *id,
991                subgraph_size,
992                subtable_count,
993            });
994        }
995
996        lookup_sizes.sort_by_key(LookupSize::sort_key);
997        const EXTENSION_SIZE: usize = 8; // number of bytes added by an extension subtable
998        const MAX_LAYER_SIZE: usize = u16::MAX as usize;
999
1000        let lookup_list_size = self.objects.get(&parent_id).unwrap().bytes.len();
1001        let mut l2_l3_size = lookup_list_size; // size of LookupList + lookups
1002        let mut l3_l4_size = 0; // Lookups + lookup subtables
1003        let mut l4_plus_size = 0; // subtables and anything below that
1004
1005        // start by assuming all lookups are extensions; we will adjust this later
1006        // if we do not promote.
1007        for lookup in &lookup_sizes {
1008            let subtables_size = lookup.subtable_count * EXTENSION_SIZE;
1009            l3_l4_size += subtables_size;
1010            l4_plus_size += subtables_size;
1011        }
1012
1013        let mut layers_full = false;
1014        let mut to_promote = Vec::new();
1015        for lookup in &lookup_sizes {
1016            if !layers_full {
1017                let lookup_size = self.objects.get(&lookup.id).unwrap().bytes.len();
1018                let subtables_size = self.find_children_size(lookup.id);
1019                let remaining_size = lookup.subgraph_size - lookup_size - subtables_size;
1020                l2_l3_size += lookup_size;
1021                l3_l4_size += lookup_size + subtables_size;
1022                // adjust down, because we are demoting out of extension space
1023                l3_l4_size -= lookup.subtable_count * EXTENSION_SIZE;
1024                l4_plus_size += subtables_size + remaining_size;
1025
1026                if l2_l3_size < MAX_LAYER_SIZE
1027                    && l3_l4_size < MAX_LAYER_SIZE
1028                    && l4_plus_size < MAX_LAYER_SIZE
1029                {
1030                    // this lookup fits in the 16-bit space, great
1031                    continue;
1032                }
1033                layers_full = true;
1034            }
1035            to_promote.push(lookup.id);
1036        }
1037        to_promote
1038    }
1039
1040    /// See if we have any subtables that support splitting, and split them
1041    /// if needed.
1042    ///
1043    /// Based on [`_presplit_subtables_if_needed`][presplit] in hb-repacker
1044    ///
1045    /// [presplit]: https://github.com/harfbuzz/harfbuzz/blob/5d543d64222c6ce45332d0c188790f90691ef112/src/hb-repacker.hh#LL72C22-L72C22
1046    fn try_splitting_subtables(&mut self) {
1047        let splittable = self
1048            .objects
1049            .iter()
1050            .filter_map(|(id, obj)| obj.type_.is_splittable().then_some(*id))
1051            .collect::<Vec<_>>();
1052        for lookup in &splittable {
1053            self.split_subtables_if_needed(*lookup);
1054        }
1055        if !splittable.is_empty() {
1056            self.remove_orphans();
1057        }
1058    }
1059
1060    fn split_subtables_if_needed(&mut self, lookup: ObjectId) {
1061        // So You Want to Split Subtables:
1062        // - support PairPos and MarkBase.
1063        let type_ = self.objects[&lookup].type_;
1064        match type_ {
1065            TableType::GposLookup(LookupType::PAIR_POS) => splitting::split_pair_pos(self, lookup),
1066            TableType::GposLookup(LookupType::MARK_TO_BASE) => {
1067                splitting::split_mark_to_base(self, lookup)
1068            }
1069            _ => (),
1070        }
1071    }
1072
1073    /// the size only of children of this object, not the whole subgraph
1074    fn find_children_size(&self, id: ObjectId) -> usize {
1075        self.objects[&id]
1076            .offsets
1077            .iter()
1078            .map(|off| self.objects.get(&off.object).unwrap().bytes.len())
1079            .sum()
1080    }
1081
1082    fn find_subgraph_size(
1083        &self,
1084        queue: &mut VecDeque<ObjectId>,
1085        visited: &mut HashSet<ObjectId>,
1086    ) -> usize {
1087        let mut size = 0;
1088        visited.clear();
1089        while !queue.is_empty() {
1090            let next = queue.pop_front().unwrap();
1091            visited.insert(next);
1092            let obj = self.objects.get(&next).unwrap();
1093            size += obj.bytes.len();
1094            queue.extend(
1095                obj.offsets
1096                    .iter()
1097                    .filter_map(|obj| (!visited.contains(&obj.object)).then_some(obj.object)),
1098            );
1099        }
1100        size
1101    }
1102
1103    fn duplicate_subgraph(
1104        &mut self,
1105        root: ObjectId,
1106        dupes: &mut HashMap<ObjectId, ObjectId>,
1107        space: Space,
1108    ) -> ObjectId {
1109        if let Some(existing) = dupes.get(&root) {
1110            return *existing;
1111        }
1112        self.parents_invalid = true;
1113        self.distance_invalid = true;
1114        let new_root = ObjectId::next();
1115        log::trace!("duplicating node {root:?} to {new_root:?}");
1116
1117        let mut obj = self.objects.get(&root).cloned().unwrap();
1118        let mut node = Node::new(obj.bytes.len() as u32);
1119        node.space = space;
1120
1121        for link in &mut obj.offsets {
1122            // recursively duplicate the object
1123            link.object = self.duplicate_subgraph(link.object, dupes, space);
1124        }
1125        dupes.insert(root, new_root);
1126        self.objects.insert(new_root, obj);
1127        self.nodes.insert(new_root, node);
1128        new_root
1129    }
1130
1131    /// Find the set of nodes that are reachable from root only following
1132    /// 16 & 24 bit offsets, and assign them to space 0.
1133    fn assign_space_0(&mut self) {
1134        let mut stack = VecDeque::from([self.root]);
1135
1136        while let Some(next) = stack.pop_front() {
1137            match self.nodes.get_mut(&next) {
1138                Some(node) if node.space != Space::SHORT_REACHABLE => {
1139                    node.space = Space::SHORT_REACHABLE
1140                }
1141                _ => continue,
1142            }
1143            for link in self
1144                .objects
1145                .get(&next)
1146                .iter()
1147                .flat_map(|obj| obj.offsets.iter())
1148            {
1149                if link.len != OffsetLen::Offset32 {
1150                    stack.push_back(link.object);
1151                }
1152            }
1153        }
1154    }
1155
1156    #[cfg(test)]
1157    fn find_descendents(&self, root: ObjectId) -> HashSet<ObjectId> {
1158        let mut result = HashSet::new();
1159        let mut stack = VecDeque::from([root]);
1160        while let Some(id) = stack.pop_front() {
1161            if result.insert(id) {
1162                for link in self
1163                    .objects
1164                    .get(&id)
1165                    .iter()
1166                    .flat_map(|obj| obj.offsets.iter())
1167                {
1168                    stack.push_back(link.object);
1169                }
1170            }
1171        }
1172        result
1173    }
1174
1175    #[cfg(feature = "dot2")]
1176    pub(crate) fn write_graph_viz(&self, path: impl AsRef<std::path::Path>) -> std::io::Result<()> {
1177        // if this is set then we prune the generated graph
1178        const PRUNE_GRAPH_ENV_VAR: &str = "FONTC_PRUNE_GRAPH";
1179        let try_trim_graph = std::env::var_os(PRUNE_GRAPH_ENV_VAR).is_some();
1180        graphviz::GraphVizGraph::from_graph(self, try_trim_graph).write_to_file(path)
1181    }
1182}
1183
1184impl Default for Priority {
1185    fn default() -> Self {
1186        Priority::ZERO
1187    }
1188}
1189
1190#[cfg(test)]
1191mod tests {
1192    use std::ops::Range;
1193
1194    use font_types::GlyphId16;
1195
1196    use crate::TableWriter;
1197
1198    use super::*;
1199
1200    fn make_ids<const N: usize>() -> [ObjectId; N] {
1201        let mut ids = [ObjectId::next(); N];
1202        for id in ids.iter_mut().skip(1) {
1203            *id = ObjectId::next();
1204        }
1205        ids
1206    }
1207
1208    struct Link {
1209        from: ObjectId,
1210        to: ObjectId,
1211        width: OffsetLen,
1212    }
1213
1214    struct TestGraphBuilder {
1215        objects: Vec<(ObjectId, usize)>,
1216        links: Vec<Link>,
1217    }
1218
1219    impl TestGraphBuilder {
1220        fn new<const N: usize>(ids: [ObjectId; N], sizes: [usize; N]) -> Self {
1221            TestGraphBuilder {
1222                objects: ids.into_iter().zip(sizes).collect(),
1223                links: Default::default(),
1224            }
1225        }
1226
1227        fn add_link(&mut self, from: ObjectId, to: ObjectId, width: OffsetLen) -> &mut Self {
1228            self.links.push(Link { from, to, width });
1229            self
1230        }
1231
1232        fn build(&self) -> Graph {
1233            let mut objects = self
1234                .objects
1235                .iter()
1236                .map(|(id, size)| {
1237                    let table = TableData::make_mock(*size);
1238                    (*id, table)
1239                })
1240                .collect::<BTreeMap<_, _>>();
1241
1242            for link in &self.links {
1243                objects
1244                    .get_mut(&link.from)
1245                    .unwrap()
1246                    .add_mock_offset(link.to, link.width);
1247            }
1248            let root = self.objects.first().unwrap().0;
1249            Graph::from_objects(objects, root)
1250        }
1251    }
1252
1253    //#[test]
1254    //fn difference_smoke_test() {
1255    //assert!(Distance::MIN < Distance::MAX);
1256    //assert!(
1257    //Distance::from_offset_and_size(OffsetLen::Offset16, 10)
1258    //< Distance::from_offset_and_size(OffsetLen::Offset16, 20)
1259    //);
1260    //assert!(
1261    //Distance::from_offset_and_size(OffsetLen::Offset32, 10)
1262    //> Distance::from_offset_and_size(OffsetLen::Offset16, 20)
1263    //);
1264    //assert!(Distance::new(10, 3) > Distance::new(10, 1));
1265    //}
1266
1267    #[test]
1268    fn priority_smoke_test() {
1269        let mut node = Node::new(20);
1270        node.distance = 100;
1271        let mod0 = node.modified_distance(1);
1272        node.raise_priority();
1273        let mod1 = node.modified_distance(1);
1274        assert!(mod0 > mod1);
1275        node.raise_priority();
1276        let mod2 = node.modified_distance(1);
1277        assert!(mod1 > mod2);
1278        node.raise_priority();
1279        let mod3 = node.modified_distance(1);
1280        assert!(mod2 > mod3, "{mod2:?} {mod3:?}");
1281
1282        // max priority is 3
1283        node.raise_priority();
1284        let mod4 = node.modified_distance(1);
1285        assert_eq!(mod3, mod4);
1286    }
1287
1288    #[test]
1289    fn kahn_basic() {
1290        let ids = make_ids::<4>();
1291        let sizes = [10, 10, 20, 10];
1292        let mut graph = TestGraphBuilder::new(ids, sizes)
1293            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1294            .add_link(ids[0], ids[2], OffsetLen::Offset16)
1295            .add_link(ids[0], ids[3], OffsetLen::Offset16)
1296            .add_link(ids[3], ids[1], OffsetLen::Offset16)
1297            .build();
1298
1299        graph.sort_kahn();
1300        // 3 links 1, so 1 must be last
1301        assert_eq!(&graph.order, &[ids[0], ids[2], ids[3], ids[1]]);
1302    }
1303
1304    #[test]
1305    fn shortest_basic() {
1306        let ids = make_ids::<4>();
1307        let sizes = [10, 10, 20, 10];
1308        let mut graph = TestGraphBuilder::new(ids, sizes)
1309            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1310            .add_link(ids[0], ids[2], OffsetLen::Offset16)
1311            .add_link(ids[0], ids[3], OffsetLen::Offset16)
1312            .build();
1313
1314        graph.sort_shortest_distance();
1315        // but 2 is larger than 3, so should be ordered after
1316        assert_eq!(&graph.order, &[ids[0], ids[1], ids[3], ids[2]]);
1317    }
1318
1319    #[test]
1320    fn overflow_basic() {
1321        let ids = make_ids::<3>();
1322        let sizes = [10, u16::MAX as usize - 5, 100];
1323        let mut graph = TestGraphBuilder::new(ids, sizes)
1324            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1325            .add_link(ids[0], ids[2], OffsetLen::Offset16)
1326            .add_link(ids[1], ids[2], OffsetLen::Offset16)
1327            .build();
1328        graph.sort_kahn();
1329        assert_eq!(graph.find_overflows().len(), 1);
1330        assert_eq!(graph.find_overflows()[0].parent, ids[0]);
1331        assert_eq!(graph.find_overflows()[0].child, ids[2]);
1332    }
1333
1334    #[test]
1335    fn duplicate_subgraph() {
1336        let _ = env_logger::builder().is_test(true).try_init();
1337        let ids = make_ids::<10>();
1338        let sizes = [10; 10];
1339
1340        // root has two children, one 16 and one 32-bit offset.
1341        // those subgraphs share three nodes, which must be deduped.
1342
1343        //
1344        //     before          after
1345        //      0                 0
1346        //     / ⑊            ┌───┘⑊
1347        //    1   2 ---+      1     2 ---+
1348        //    |\ / \   |     / \   / \   |
1349        //    | 3   4  5    9   3 3'  4  5
1350        //    |  \ / \          |  \ / \
1351        //    |   6   7         6   6'  7
1352        //    |       |                 |
1353        //    |    8──┘              8──┘
1354        //    |    │                /
1355        //    9 ───┘               9'
1356
1357        let mut graph = TestGraphBuilder::new(ids, sizes)
1358            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1359            .add_link(ids[0], ids[2], OffsetLen::Offset32)
1360            .add_link(ids[1], ids[3], OffsetLen::Offset16)
1361            .add_link(ids[1], ids[9], OffsetLen::Offset16)
1362            .add_link(ids[2], ids[3], OffsetLen::Offset16)
1363            .add_link(ids[2], ids[4], OffsetLen::Offset16)
1364            .add_link(ids[2], ids[5], OffsetLen::Offset16)
1365            .add_link(ids[3], ids[6], OffsetLen::Offset16)
1366            .add_link(ids[4], ids[6], OffsetLen::Offset16)
1367            .add_link(ids[4], ids[7], OffsetLen::Offset16)
1368            .add_link(ids[7], ids[8], OffsetLen::Offset16)
1369            .add_link(ids[8], ids[9], OffsetLen::Offset16)
1370            .build();
1371
1372        assert_eq!(graph.nodes.len(), 10);
1373        let one = graph.find_descendents(ids[1]);
1374        let two = graph.find_descendents(ids[2]);
1375        assert_eq!(one.intersection(&two).count(), 3);
1376
1377        graph.assign_spaces_hb();
1378
1379        // 3, 6, and 9 should be duplicated
1380        assert_eq!(graph.nodes.len(), 13);
1381        let one = graph.find_descendents(ids[1]);
1382        let two = graph.find_descendents(ids[2]);
1383        assert_eq!(one.intersection(&two).count(), 0);
1384
1385        for id in &one {
1386            assert!(!graph.nodes.get(id).unwrap().space.is_custom());
1387        }
1388
1389        for id in &two {
1390            assert!(graph.nodes.get(id).unwrap().space.is_custom());
1391        }
1392    }
1393
1394    #[test]
1395    fn split_overflowing_spaces() {
1396        // this attempts to show a simplified version of a gsub table with extension
1397        // subtables, before any isolation/deduplication has happened.
1398        //
1399        //    before                         after
1400        //      0           (GSUB)             0
1401        //      |                              |
1402        //      1        (lookup List)         1
1403        //      |                              |
1404        //      2          (Lookup)            2
1405        //     / \                            / \
1406        //  ╔═3   4═╗   (ext subtables)    ╔═3   4═╗
1407        //  ║       ║                      ║       ║   (long offsets)
1408        //  5─┐   ┌─6    (subtables)       5       6
1409        //  │ └─8─┘ │                     / \     / \
1410        //  │       │    (cov tables)    7'  8'  7   8
1411        //  └───7───┘
1412        //
1413
1414        let _ = env_logger::builder().is_test(true).try_init();
1415        let ids = make_ids::<9>();
1416        // make the coverage tables big enough that overflow is unavoidable
1417        let sizes = [10, 4, 12, 8, 8, 14, 14, 65520, 65520];
1418        let mut graph = TestGraphBuilder::new(ids, sizes)
1419            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1420            .add_link(ids[1], ids[2], OffsetLen::Offset16)
1421            .add_link(ids[2], ids[3], OffsetLen::Offset16)
1422            .add_link(ids[2], ids[4], OffsetLen::Offset16)
1423            .add_link(ids[3], ids[5], OffsetLen::Offset32)
1424            .add_link(ids[4], ids[6], OffsetLen::Offset32)
1425            .add_link(ids[5], ids[7], OffsetLen::Offset16)
1426            .add_link(ids[5], ids[8], OffsetLen::Offset16)
1427            .add_link(ids[6], ids[7], OffsetLen::Offset16)
1428            .add_link(ids[6], ids[8], OffsetLen::Offset16)
1429            .build();
1430        graph.sort_shortest_distance();
1431
1432        assert!(graph.has_overflows());
1433        assert_eq!(graph.nodes.len(), 9);
1434
1435        graph.assign_spaces_hb();
1436        graph.sort_shortest_distance();
1437
1438        // now spaces are assigned, but not isolated
1439        assert_eq!(graph.nodes[&ids[5]].space, graph.nodes[&ids[6]].space);
1440        assert_eq!(graph.nodes.len(), 9);
1441
1442        // now isolate space that overflows
1443        let overflows = graph.find_overflows();
1444        graph.try_isolating_subgraphs(&overflows);
1445        graph.sort_shortest_distance();
1446
1447        assert_eq!(graph.nodes.len(), 11);
1448        assert!(graph.find_overflows().is_empty());
1449        // ensure we are correctly update the roots_per_space thing
1450        assert_eq!(graph.num_roots_per_space[&graph.nodes[&ids[6]].space], 1);
1451        assert_eq!(graph.num_roots_per_space[&graph.nodes[&ids[5]].space], 1);
1452    }
1453
1454    #[test]
1455    fn all_roads_lead_to_overflow() {
1456        // this is a regression test for a bug we had where we would fail
1457        // to correctly duplicate shared subgraphs when there were
1458        // multiple links between two objects, which caused us to overcount
1459        // the 'incoming edges in subgraph'.
1460
1461        let _ = env_logger::builder().is_test(true).try_init();
1462
1463        let ids = make_ids::<9>();
1464        let sizes = [10, 10, 10, 10, 10, 65524, 65524, 10, 24];
1465        let mut graph = TestGraphBuilder::new(ids, sizes)
1466            .add_link(ids[0], ids[1], OffsetLen::Offset32)
1467            .add_link(ids[0], ids[2], OffsetLen::Offset32)
1468            .add_link(ids[0], ids[3], OffsetLen::Offset32)
1469            .add_link(ids[0], ids[4], OffsetLen::Offset32)
1470            .add_link(ids[1], ids[5], OffsetLen::Offset16)
1471            .add_link(ids[1], ids[5], OffsetLen::Offset16)
1472            .add_link(ids[2], ids[6], OffsetLen::Offset16)
1473            .add_link(ids[3], ids[7], OffsetLen::Offset16)
1474            .add_link(ids[5], ids[8], OffsetLen::Offset16)
1475            .add_link(ids[5], ids[8], OffsetLen::Offset16)
1476            .add_link(ids[6], ids[8], OffsetLen::Offset16)
1477            .add_link(ids[7], ids[8], OffsetLen::Offset16)
1478            .build();
1479
1480        graph.assign_spaces_hb();
1481        graph.sort_shortest_distance();
1482        let overflows = graph.find_overflows();
1483        assert!(!overflows.is_empty());
1484        graph.try_isolating_subgraphs(&overflows);
1485        graph.sort_shortest_distance();
1486        let overflows = graph.find_overflows();
1487        assert!(!overflows.is_empty());
1488        assert!(graph.try_isolating_subgraphs(&overflows));
1489        graph.sort_shortest_distance();
1490        assert!(!graph.has_overflows());
1491    }
1492
1493    #[test]
1494    fn two_roots_one_space() {
1495        // If a subgraph is reachable from multiple long offsets, they are all
1496        // initially placed in the same space.
1497        //
1498        //  ┌──0═══╗    ┌──0═══╗
1499        //  │  ║   ║    │  ║   ║
1500        //  │  ║   ║    │  ║   ║
1501        //  1  2   3    1  2   3
1502        //  │   \ /     │   \ /
1503        //  └────4      4    4'
1504        //       │      │    │
1505        //       5      5    5'
1506
1507        let ids = make_ids::<6>();
1508        let sizes = [10; 6];
1509        let mut graph = TestGraphBuilder::new(ids, sizes)
1510            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1511            .add_link(ids[0], ids[2], OffsetLen::Offset32)
1512            .add_link(ids[0], ids[3], OffsetLen::Offset32)
1513            .add_link(ids[1], ids[4], OffsetLen::Offset16)
1514            .add_link(ids[2], ids[4], OffsetLen::Offset16)
1515            .add_link(ids[3], ids[4], OffsetLen::Offset16)
1516            .add_link(ids[4], ids[5], OffsetLen::Offset16)
1517            .build();
1518
1519        assert_eq!(graph.nodes.len(), 6);
1520        graph.assign_spaces_hb();
1521        assert_eq!(graph.nodes.len(), 8);
1522        let one = graph.find_descendents(ids[1]);
1523        assert!(one.iter().all(|id| !graph.nodes[id].space.is_custom()));
1524
1525        let two = graph.find_descendents(ids[2]);
1526        let three = graph.find_descendents(ids[3]);
1527        assert_eq!(two.intersection(&three).count(), 2);
1528        assert_eq!(two.union(&three).count(), 4);
1529
1530        assert_eq!(
1531            two.union(&three)
1532                .map(|id| graph.nodes[id].space)
1533                .collect::<HashSet<_>>()
1534                .len(),
1535            1
1536        );
1537    }
1538
1539    #[test]
1540    fn duplicate_shared_root_subgraph() {
1541        // if a node is linked from both 16 & 32-bit space, and has no parents
1542        // in 32 bit space, it should always still be deduped.
1543        //
1544        //    before    after
1545        //     0          0
1546        //    / ⑊        / ⑊
1547        //   1   ⑊      1   2
1548        //   └───╴2     │
1549        //              2'
1550
1551        let ids = make_ids::<3>();
1552        let sizes = [10; 3];
1553        let mut graph = TestGraphBuilder::new(ids, sizes)
1554            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1555            .add_link(ids[0], ids[2], OffsetLen::Offset32)
1556            .add_link(ids[1], ids[2], OffsetLen::Offset16)
1557            .build();
1558        graph.assign_spaces_hb();
1559        assert_eq!(graph.nodes.len(), 4);
1560    }
1561
1562    #[test]
1563    fn assign_space_even_without_any_duplication() {
1564        // the subgraph of the long offset (0->2) is already isolated, and
1565        // so requires no duplication; but we should still correctly assign a
1566        // space to the children.
1567        //
1568        //     0
1569        //    / ⑊
1570        //   1   2
1571        //      /
1572        //     3
1573
1574        let ids = make_ids::<4>();
1575        let sizes = [10; 4];
1576        let mut graph = TestGraphBuilder::new(ids, sizes)
1577            .add_link(ids[0], ids[1], OffsetLen::Offset16)
1578            .add_link(ids[0], ids[2], OffsetLen::Offset32)
1579            .add_link(ids[2], ids[3], OffsetLen::Offset16)
1580            .build();
1581        graph.assign_spaces_hb();
1582        let two = graph.find_descendents(ids[2]);
1583        assert!(two.iter().all(|id| graph.nodes[id].space.is_custom()));
1584    }
1585
1586    #[test]
1587    fn sort_respects_spaces() {
1588        let ids = make_ids::<4>();
1589        let sizes = [10; 4];
1590        let mut graph = TestGraphBuilder::new(ids, sizes)
1591            .add_link(ids[0], ids[1], OffsetLen::Offset32)
1592            .add_link(ids[0], ids[2], OffsetLen::Offset32)
1593            .add_link(ids[0], ids[3], OffsetLen::Offset16)
1594            .build();
1595        graph.sort_shortest_distance();
1596        assert_eq!(&graph.order, &[ids[0], ids[3], ids[1], ids[2]]);
1597    }
1598
1599    #[test]
1600    fn assign_32bit_spaces_if_needed() {
1601        let ids = make_ids::<3>();
1602        let sizes = [10, u16::MAX as usize, 10];
1603        let mut graph = TestGraphBuilder::new(ids, sizes)
1604            .add_link(ids[0], ids[1], OffsetLen::Offset32)
1605            .add_link(ids[0], ids[2], OffsetLen::Offset16)
1606            .add_link(ids[1], ids[2], OffsetLen::Offset16)
1607            .build();
1608        graph.basic_sort();
1609        // this will overflow unless the 32-bit offset is put last.
1610        assert!(graph.has_overflows());
1611        graph.pack_objects();
1612        assert!(!graph.has_overflows());
1613    }
1614
1615    /// Construct a real gsub table that cannot be packed unless we use extension
1616    /// subtables
1617    #[test]
1618    fn pack_real_gsub_table_with_extension_promotion() {
1619        use crate::tables::{gsub, layout};
1620
1621        // trial and error: a number that just triggers overflow.
1622        const NUM_SUBTABLES: usize = 3279;
1623
1624        // make an rsub rule for each glyph.
1625        let rsub_rules = (0u16..NUM_SUBTABLES as u16)
1626            .map(|id| {
1627                // Each rule will use unique coverage tables, so nothing is shared.
1628                let coverage = std::iter::once(GlyphId16::new(id)).collect();
1629                let backtrack = [id + 1, id + 3].into_iter().map(GlyphId16::new).collect();
1630                gsub::ReverseChainSingleSubstFormat1::new(
1631                    coverage,
1632                    vec![backtrack],
1633                    vec![],
1634                    vec![GlyphId16::new(id + 1)],
1635                )
1636            })
1637            .collect();
1638
1639        let list = layout::LookupList::<gsub::SubstitutionLookup>::new(vec![
1640            gsub::SubstitutionLookup::Reverse(layout::Lookup::new(
1641                layout::LookupFlag::empty(),
1642                rsub_rules,
1643            )),
1644        ]);
1645        let table = gsub::Gsub::new(Default::default(), Default::default(), list);
1646
1647        let mut graph = TableWriter::make_graph(&table);
1648        assert!(
1649            !graph.basic_sort(),
1650            "simple sorting should not resolve this graph"
1651        );
1652
1653        const BASE_LEN: usize = 10 // GPOS header len
1654           + 2 // scriptlist table + featurelist (both empty, get deduped)
1655           + 4 // lookup list, one offset
1656           + 6; // lookup table (no offsets)
1657        const RSUB_LEN: usize = 16 // base table len
1658            + 6 // one-glyph coverage table
1659            + 8; // two-glyph backtrack coverage table
1660
1661        const EXTENSION_LEN: usize = 8;
1662
1663        assert_eq!(graph.debug_len(), BASE_LEN + NUM_SUBTABLES * RSUB_LEN);
1664        assert!(graph.pack_objects());
1665        assert_eq!(
1666            graph.debug_len(),
1667            BASE_LEN + NUM_SUBTABLES * RSUB_LEN + NUM_SUBTABLES * EXTENSION_LEN
1668        );
1669
1670        const EXPECTED_N_TABLES: usize = 5 // header, script/feature/lookup lists, lookup
1671            - 1 // because script/feature are both empty, thus identical
1672            + NUM_SUBTABLES * 3 // subtable + coverage + backtrack
1673            + NUM_SUBTABLES; // extension table for each subtable
1674        assert_eq!(graph.order.len(), EXPECTED_N_TABLES);
1675    }
1676
1677    fn make_big_pair_pos(glyph_range: Range<u16>) -> crate::tables::gpos::PositionLookup {
1678        use crate::tables::{gpos, layout};
1679        let coverage = glyph_range.clone().map(GlyphId16::new).collect();
1680        let pair_sets = glyph_range
1681            .map(|id| {
1682                let value_rec = gpos::ValueRecord::new().with_x_advance(id as _);
1683                gpos::PairSet::new(
1684                    // 165 is arbitrary; we want a big enough value that each
1685                    // PairSet is 'fairly large'.
1686                    (id..id + 165)
1687                        .map(|id2| {
1688                            gpos::PairValueRecord::new(
1689                                GlyphId16::new(id2),
1690                                value_rec.clone(),
1691                                gpos::ValueRecord::default(),
1692                            )
1693                        })
1694                        .collect(),
1695                )
1696            })
1697            .collect::<Vec<_>>();
1698        gpos::PositionLookup::Pair(layout::Lookup::new(
1699            layout::LookupFlag::empty(),
1700            vec![gpos::PairPos::format_1(coverage, pair_sets)],
1701        ))
1702    }
1703
1704    #[test]
1705    fn pack_real_gpos_table_with_extension_promotion() {
1706        use crate::tables::{gpos, layout};
1707
1708        let _ = env_logger::builder().is_test(true).try_init();
1709
1710        // this is a shallow graph with large nodes, which makes it easier
1711        // to visualize with graphviz.
1712        let pp1 = make_big_pair_pos(1..20);
1713        let pp2 = make_big_pair_pos(100..120);
1714        let pp3 = make_big_pair_pos(200..221);
1715        let pp4 = make_big_pair_pos(400..422);
1716        let pp5 = make_big_pair_pos(500..523);
1717        let pp6 = make_big_pair_pos(600..624);
1718        let table = gpos::Gpos::new(
1719            Default::default(),
1720            Default::default(),
1721            layout::LookupList::new(vec![pp1, pp2, pp3, pp4, pp5, pp6]),
1722        );
1723
1724        // this constructs a graph where there are overflows in a single pairpos
1725        // subtable.
1726        let mut graph = TableWriter::make_graph(&table);
1727        assert!(
1728            !graph.basic_sort(),
1729            "simple sorting should not resolve this graph",
1730        );
1731
1732        // uncomment these two lines if you want to visualize the graph:
1733
1734        //graph.write_graph_viz("promote_gpos_before.dot");
1735
1736        let n_tables_before = graph.order.len();
1737        assert!(graph.pack_objects());
1738
1739        //graph.write_graph_viz("promote_gpos_after.dot");
1740
1741        // we should have resolved this overflow by promoting a single lookup
1742        // to be an extension, but our logic for determining when to promote
1743        // is not quite perfect, so it promotes an extra.
1744        //
1745        // if our impl changes and this is failing because we're only promoting
1746        // a single extension, then that's great
1747        assert_eq!(n_tables_before + 2, graph.order.len());
1748    }
1749
1750    #[test]
1751    fn preserve_mark_filter_set_after_split() {
1752        use crate::tables::{gpos, layout};
1753        use read_fonts::tables::gpos as rgpos;
1754        use read_fonts::FontRead as _;
1755
1756        let mut pp = make_big_pair_pos(0..100);
1757        let gpos::PositionLookup::Pair(ppos) = &mut pp else {
1758            panic!("oops");
1759        };
1760        ppos.mark_filtering_set = Some(404);
1761        ppos.lookup_flag |= layout::LookupFlag::USE_MARK_FILTERING_SET;
1762        let table = gpos::Gpos::new(
1763            Default::default(),
1764            Default::default(),
1765            layout::LookupList::new(vec![pp]),
1766        );
1767        let bytes = crate::dump_table(&table).unwrap();
1768        let rgpos = rgpos::Gpos::read(bytes.as_slice().into()).unwrap();
1769        let lookups = rgpos.lookup_list().unwrap();
1770        assert_eq!(lookups.lookup_count(), 1);
1771        let lookup = lookups.lookups().get(0).unwrap();
1772        // after splitting, the mark filter set id should be the same
1773        assert_eq!(lookup.mark_filtering_set(), Some(404));
1774        match lookup.subtables().unwrap() {
1775            rgpos::PositionSubtables::Pair(subtables) => assert_eq!(subtables.len(), 2),
1776            _ => panic!("extremely wrong"),
1777        };
1778    }
1779
1780    #[test]
1781    fn unpackable_graph_should_fail() {
1782        let _ = env_logger::builder().is_test(true).try_init();
1783        // specifically, it should not run forever.
1784        let ids = make_ids::<4>();
1785        let sizes = [10, 10, 66000, 66000];
1786        let mut graph = TestGraphBuilder::new(ids, sizes)
1787            .add_link(ids[0], ids[1], OffsetLen::Offset32)
1788            .add_link(ids[1], ids[2], OffsetLen::Offset16)
1789            .add_link(ids[1], ids[3], OffsetLen::Offset16)
1790            .build();
1791
1792        assert!(!graph.pack_objects());
1793    }
1794}