write_fonts/tables/variations/
ivs_builder.rs

1//! Building the ItemVariationStore
2
3use std::{
4    cmp::Reverse,
5    collections::{BinaryHeap, HashMap, HashSet},
6    fmt::{Debug, Display},
7};
8
9use crate::tables::{
10    layout::VariationIndex,
11    variations::{ItemVariationData, ItemVariationStore, VariationRegion, VariationRegionList},
12};
13use indexmap::IndexMap;
14
15type TemporaryDeltaSetId = u32;
16
17/// A builder for the [ItemVariationStore].
18///
19/// This handles assigning VariationIndex values to unique sets of deltas and
20/// grouping delta sets into [ItemVariationData] subtables.
21#[derive(Clone, Debug)]
22pub struct VariationStoreBuilder {
23    // region -> index map
24    all_regions: HashMap<VariationRegion, usize>,
25    delta_sets: DeltaSetStorage,
26    // must match fvar. We require the user to pass this in because we cannot
27    // infer it in the case where no deltas are added to the builder.
28    axis_count: u16,
29}
30
31/// A collection of delta sets.
32#[derive(Clone, Debug)]
33enum DeltaSetStorage {
34    // only for hvar: we do not deduplicate deltas, and store one per glyph id
35    Direct(Vec<DeltaSet>),
36    // the general case, where each delta gets a unique id
37    Deduplicated(IndexMap<DeltaSet, TemporaryDeltaSetId>),
38}
39
40/// A map from the temporary delta set identifiers to the final values.
41///
42/// This is generated when the [ItemVariationStore] is built; afterwards
43/// any tables or records that contain VariationIndex tables need to be remapped.
44#[derive(Clone, Debug, Default)]
45pub struct VariationIndexRemapping {
46    map: HashMap<TemporaryDeltaSetId, VariationIndex>,
47}
48
49/// Remapping temporary delta set identifiers to the final values.
50///
51/// This is called after the [`ItemVariationStore`] has been built, at which
52/// point any table containing a delta set index needs to be updated to point
53/// to the final value.
54///
55/// This trait should be implemented by any table that contains delta set indices,
56/// as well as for any of table containing such a table, which should recursively
57/// call it on the relevant subtables.
58pub trait RemapVariationIndices {
59    /// Remap any `TemporaryDeltaSetId`s to their final `VariationIndex` values
60    fn remap_variation_indices(&mut self, key_map: &VariationIndexRemapping);
61}
62
63/// Always sorted, so we can ensure equality
64///
65/// Each tuple is (region index, delta value)
66#[derive(Clone, Debug, Default, Hash, PartialEq, Eq)]
67struct DeltaSet(Vec<(u16, i32)>);
68
69impl VariationStoreBuilder {
70    /// Create a builder that will optimize delta storage.
71    ///
72    /// This is the general case. For HVAR, it is also possible to use the
73    /// glyph ids as implicit indices, which may be more efficient for some
74    /// data. To use implicit indices, use [`new_with_implicit_indices`] instead.
75    ///
76    /// [`new_with_implicit_indices`]: VariationStoreBuilder::new_with_implicit_indices
77    pub fn new(axis_count: u16) -> Self {
78        Self {
79            axis_count,
80            delta_sets: DeltaSetStorage::Deduplicated(Default::default()),
81            all_regions: Default::default(),
82        }
83    }
84
85    /// Returns `true` if no deltas have been added to this builder
86    pub fn is_empty(&self) -> bool {
87        match &self.delta_sets {
88            DeltaSetStorage::Direct(val) => val.is_empty(),
89            DeltaSetStorage::Deduplicated(val) => val.is_empty(),
90        }
91    }
92
93    /// Create a builder that does not share deltas between entries.
94    ///
95    /// This is used in HVAR, where it is possible to use glyph ids as the
96    /// 'inner index', and to generate a single ItemVariationData subtable
97    /// with one entry per item.
98    pub fn new_with_implicit_indices(axis_count: u16) -> Self {
99        VariationStoreBuilder {
100            axis_count,
101            all_regions: Default::default(),
102            delta_sets: DeltaSetStorage::Direct(Default::default()),
103        }
104    }
105
106    pub fn add_deltas<T: Into<i32>>(
107        &mut self,
108        deltas: Vec<(VariationRegion, T)>,
109    ) -> TemporaryDeltaSetId {
110        let mut delta_set = Vec::with_capacity(deltas.len());
111        for (region, delta) in deltas {
112            let region_idx = self.canonical_index_for_region(region) as u16;
113            delta_set.push((region_idx, delta.into()));
114        }
115        delta_set.sort_unstable();
116        // treat a deltaset containing all zeros the same as an empty one;
117        // e.g. a glyph that only has one instance at the default location (no deltas)
118        // vs another that defines multiple instances but all of them are at the
119        // default location (all deltas are zero).
120        if delta_set.iter().all(|(_, delta)| *delta == 0) {
121            delta_set.clear();
122        }
123        self.delta_sets.add(DeltaSet(delta_set))
124    }
125
126    fn canonical_index_for_region(&mut self, region: VariationRegion) -> usize {
127        let next_idx = self.all_regions.len();
128        *self.all_regions.entry(region).or_insert(next_idx)
129    }
130
131    fn make_region_list(&self, subtables: &mut [Option<ItemVariationData>]) -> VariationRegionList {
132        // collect the set of region indices actually used by each ItemVariationData
133        let used_regions = subtables
134            .iter()
135            .flatten()
136            .flat_map(|var_data| var_data.region_indexes.iter())
137            .map(|idx| *idx as usize)
138            .collect::<HashSet<_>>();
139        // prune unused regions and keep track of old index to new index
140        let mut region_list = self
141            .all_regions
142            .iter()
143            .filter_map(|(reg, idx)| {
144                if used_regions.contains(idx) {
145                    Some((idx, reg.to_owned()))
146                } else {
147                    None
148                }
149            })
150            .collect::<Vec<_>>();
151        region_list.sort_unstable();
152        let mut new_regions = Vec::new();
153        let mut region_map = HashMap::new();
154        for (old_idx, reg) in region_list.into_iter() {
155            region_map.insert(*old_idx as u16, new_regions.len() as u16);
156            new_regions.push(reg);
157        }
158        // remap the region indexes in each subtable
159        for var_data in subtables.iter_mut().flatten() {
160            var_data.region_indexes = var_data
161                .region_indexes
162                .iter()
163                .map(|idx| region_map[idx])
164                .collect();
165        }
166        VariationRegionList::new(self.axis_count, new_regions)
167    }
168
169    fn encoder(&self) -> Encoder<'_> {
170        Encoder::new(&self.delta_sets, self.all_regions.len() as u16)
171    }
172
173    /// Build the `ItemVariationStore` table
174    ///
175    /// This also returns a structure that can be used to remap the temporarily
176    /// assigned delta set Ids to their final `VariationIndex` values.
177    pub fn build(self) -> (ItemVariationStore, VariationIndexRemapping) {
178        let mut key_map = VariationIndexRemapping::default();
179        let mut subtables = if matches!(self.delta_sets, DeltaSetStorage::Direct(_)) {
180            vec![self.build_unoptimized(&mut key_map)]
181        } else {
182            let mut encoder = self.encoder();
183            encoder.optimize();
184            encoder.encode(&mut key_map)
185        };
186        let region_list = self.make_region_list(&mut subtables);
187        (ItemVariationStore::new(region_list, subtables), key_map)
188    }
189
190    /// Build a single ItemVariationData subtable
191    fn build_unoptimized(
192        &self,
193        key_map: &mut VariationIndexRemapping,
194    ) -> Option<ItemVariationData> {
195        // first pick an encoding capable of representing all items:
196        let n_regions = self.all_regions.len() as u16;
197        let mut shape = RowShape(vec![ColumnBits::None; n_regions as usize]);
198        let mut temp = RowShape::default();
199
200        for (delta, _) in self.delta_sets.iter() {
201            temp.reuse(delta, n_regions);
202            if !shape.can_cover(&temp) {
203                shape = shape.merge(&temp);
204            }
205        }
206
207        // then encode everything with that encoding.
208        let encoding = Encoding {
209            shape,
210            deltas: self.delta_sets.iter().collect(),
211        };
212        debug_assert!(
213            encoding.deltas.len() <= u16::MAX as usize,
214            "unmapped variation store supports at most u16::MAX items"
215        );
216        encoding.encode(key_map, 0)
217    }
218}
219
220/// A context for encoding deltas into the final [`ItemVariationStore`].
221///
222/// This mostly exists so that we can write better tests.
223struct Encoder<'a> {
224    encodings: Vec<Encoding<'a>>,
225}
226
227/// A set of deltas that share a shape.
228struct Encoding<'a> {
229    shape: RowShape,
230    deltas: Vec<(&'a DeltaSet, TemporaryDeltaSetId)>,
231}
232
233/// A type for remapping delta sets during encoding.
234///
235/// This mapping applies to a single ItemVariationData table, with the regions
236/// defined in the VariationRegionList in the parent table.
237struct RegionMap {
238    /// A map from the canonical region indices (represented in the sorted
239    /// order of the map) to the ordering of the deltas in a particular
240    /// ItemVariationData table.
241    ///
242    /// For each canonical index, we store the local (column) index and the bits
243    /// required to store that column.
244    regions_to_columns: Vec<(u16, ColumnBits)>,
245    n_active_regions: u16,
246    n_long_regions: u16,
247    long_words: bool,
248}
249
250/// Describes the compressability of a row of deltas across all variation regions.
251///
252/// fonttools calls this the 'characteristic' of a row.
253///
254/// We could get much fancier about how we represent this type, and avoid
255/// allocation in most cases; but this is simple and works, so :shrug:
256#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
257struct RowShape(Vec<ColumnBits>);
258
259//NOTE: we could do fancier bit packing here (fonttools uses four bits per
260//column but I think the gains will be marginal)
261/// The number of bits required to represent a given delta column.
262#[repr(u8)]
263#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
264enum ColumnBits {
265    /// i.e. the value is zero
266    None = 0,
267    /// an i8
268    One = 1,
269    /// an i16
270    Two = 2,
271    /// an i32
272    Four = 4,
273}
274
275impl<'a> Encoder<'a> {
276    fn new(delta_map: &'a DeltaSetStorage, total_regions: u16) -> Self {
277        let mut shape = RowShape::default();
278        let mut encodings: IndexMap<_, Vec<_>> = Default::default();
279
280        for (delta, idx) in delta_map.iter() {
281            shape.reuse(delta, total_regions);
282            match encodings.get_mut(&shape) {
283                Some(items) => items.push((delta, idx)),
284                None => {
285                    encodings.insert(shape.clone(), vec![(delta, idx)]);
286                }
287            }
288        }
289        let encodings = encodings
290            .into_iter()
291            .map(|(shape, deltas)| Encoding { shape, deltas })
292            .collect();
293
294        Encoder { encodings }
295    }
296
297    fn cost(&self) -> usize {
298        self.encodings.iter().map(Encoding::cost).sum()
299    }
300
301    /// Recursively combine encodings where doing so provides space savings.
302    ///
303    /// This is a reimplementation of the [VarStore_optimize][fonttools] function
304    /// in fonttools, although it is not a direct port.
305    ///
306    /// [fonttools]: https://github.com/fonttools/fonttools/blob/fb56e7b7c9715895b81708904c840875008adb9c/Lib/fontTools/varLib/varStore.py#L471
307    fn optimize(&mut self) {
308        let cost = self.cost();
309        log::trace!("optimizing {} encodings, {cost}B", self.encodings.len(),);
310        // a little helper for pretty-printing our todo list
311        struct DebugTodoList<'a>(&'a [Option<Encoding<'a>>]);
312        impl Debug for DebugTodoList<'_> {
313            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
314                write!(f, "Todo({} items", self.0.len())?;
315                for (i, enc) in self.0.iter().enumerate() {
316                    if let Some(enc) = enc {
317                        write!(f, "\n    {i:>4}: {enc:?}")?;
318                    }
319                }
320                writeln!(f, ")")
321            }
322        }
323        // temporarily take ownership of all the encodings
324        let mut to_process = std::mem::take(&mut self.encodings);
325        // pre-sort them like fonttools does, for stability
326        to_process.sort_unstable_by(Encoding::ord_matching_fonttools);
327        // convert to a vec of Option<Encoding>;
328        // we will replace items with None as they are combined
329        let mut to_process = to_process.into_iter().map(Option::Some).collect::<Vec<_>>();
330
331        // build up a priority list of the space savings from combining each pair
332        // of encodings
333        let mut queue = BinaryHeap::with_capacity(to_process.len());
334
335        for (i, red) in to_process.iter().enumerate() {
336            for (j, blue) in to_process.iter().enumerate().skip(i + 1) {
337                let gain = red.as_ref().unwrap().compute_gain(blue.as_ref().unwrap());
338                if gain > 0 {
339                    log::trace!("adding ({i}, {j} ({gain})) to queue");
340                    // negate gain to match fonttools and use std::cmp::Reverse to
341                    // mimic Python heapq's "min heap"
342                    queue.push(Reverse((-gain, i, j)));
343                }
344            }
345        }
346
347        // iteratively process each item in the queue
348        while let Some(Reverse((_gain, i, j))) = queue.pop() {
349            if to_process[i].is_none() || to_process[j].is_none() {
350                continue;
351            }
352            // as items are combined, we leave `None` in the to_process list.
353            // This ensures that indices are stable.
354            let (Some(mut to_update), Some(to_add)) = (
355                to_process.get_mut(i).and_then(Option::take),
356                to_process.get_mut(j).and_then(Option::take),
357            ) else {
358                unreachable!("checked above")
359            };
360            log::trace!(
361                "combining {i}+{j} ({}, {} {_gain})",
362                to_update.shape,
363                to_add.shape
364            );
365
366            //NOTE: it is now possible that we have duplicate data. I'm not sure
367            //how likely this is? Not very likely? it would require one deltaset's
368            //regions to be a subset of another, with zeros for the missing axes?
369            to_update.merge_with(to_add);
370            let n = to_process.len(); // index we assign the combined encoding
371            let mut maybe_existing_encoding = None;
372            for (ii, opt_encoding) in to_process.iter_mut().enumerate() {
373                // does two things: skips empty indices, and also temporarily
374                // removes the item (we'll put it back unless we merge, below)
375                let Some(encoding) = opt_encoding.take() else {
376                    continue;
377                };
378
379                if encoding.shape == to_update.shape {
380                    // if an identical encoding exists in the list, we will just
381                    // merge it with the newly created one. We do this after
382                    // calculating the new gains, though, so we aren't changing
383                    // anything mid-stream
384                    maybe_existing_encoding = Some(encoding);
385                    continue;
386                }
387                let gain = to_update.compute_gain(&encoding);
388                if gain > 0 {
389                    log::trace!("adding ({n}, {ii} ({gain})) to queue");
390                    queue.push(Reverse((-gain, ii, n)));
391                }
392                *opt_encoding = Some(encoding);
393            }
394            if let Some(existing) = maybe_existing_encoding.take() {
395                to_update.deltas.extend(existing.deltas);
396            }
397            to_process.push(Some(to_update));
398            log::trace!("{:?}", DebugTodoList(&to_process));
399        }
400        self.encodings = to_process.into_iter().flatten().collect();
401        // now sort the items in each individual encoding
402        self.encodings
403            .iter_mut()
404            .for_each(|enc| enc.deltas.sort_unstable());
405        // and then sort the encodings themselves; order doesn't matter,
406        // but we want to match fonttools output when comparing ttx
407        self.encodings
408            .sort_unstable_by(Encoding::ord_matching_fonttools);
409        log::trace!(
410            "optimized {} encodings, {}B, ({}B saved)",
411            self.encodings.len(),
412            self.cost(),
413            cost.saturating_sub(self.cost()),
414        );
415    }
416
417    /// Encode the `Encoding` sets into [`ItemVariationData`] subtables.
418    ///
419    /// In general, each encoding ends up being one subtable, except:
420    /// - if the encoding is empty, we get a `NULL` subtable (aka None)
421    /// - if an encoding contains more than 0xFFFF rows, it is split into
422    ///   multiple subtables.
423    fn encode(self, key_map: &mut VariationIndexRemapping) -> Vec<Option<ItemVariationData>> {
424        self.encodings
425            .into_iter()
426            .flat_map(Encoding::iter_split_into_table_size_chunks)
427            .enumerate()
428            .map(|(i, encoding)| encoding.encode(key_map, i as u16))
429            .collect()
430    }
431}
432
433impl ColumnBits {
434    fn for_val(val: i32) -> Self {
435        if val == 0 {
436            Self::None
437        } else if i8::try_from(val).is_ok() {
438            Self::One
439        } else if i16::try_from(val).is_ok() {
440            Self::Two
441        } else {
442            Self::Four
443        }
444    }
445
446    /// The number of bytes required to store this column
447    fn cost(self) -> usize {
448        self as u8 as _
449    }
450}
451
452impl RowShape {
453    /// Reuse this types storage for a new delta set.
454    ///
455    /// This might be premature optimization.
456    ///
457    /// The rationale is that many of these are identical, so this saves us
458    /// from constantly allocating and throwing away.
459    fn reuse(&mut self, deltas: &DeltaSet, n_regions: u16) {
460        self.0.clear();
461        self.0.resize(n_regions as _, ColumnBits::None);
462        for (region, delta) in &deltas.0 {
463            self.0[*region as usize] = ColumnBits::for_val(*delta);
464        }
465    }
466
467    /// Returns a shape that can fit both self and other.
468    ///
469    /// In practice this means taking the max of each column.
470    fn merge(&self, other: &Self) -> Self {
471        Self(
472            self.0
473                .iter()
474                .zip(other.0.iter())
475                .map(|(us, them)| *us.max(them))
476                .collect(),
477        )
478    }
479
480    /// `true` if each value in this shape is >= the same value in `other`.
481    fn can_cover(&self, other: &Self) -> bool {
482        debug_assert_eq!(self.0.len(), other.0.len());
483        self.0
484            .iter()
485            .zip(other.0.iter())
486            .all(|(us, them)| us >= them)
487    }
488
489    /// the cost in bytes of a row in this encoding
490    fn row_cost(&self) -> usize {
491        self.0.iter().copied().map(ColumnBits::cost).sum()
492    }
493
494    fn overhead(&self) -> usize {
495        /// the minimum number of bytes in an ItemVariationData table
496        const SUBTABLE_FIXED_COST: usize = 10;
497        const COST_PER_REGION: usize = 2;
498        SUBTABLE_FIXED_COST + (self.n_non_zero_regions() * COST_PER_REGION)
499    }
500
501    fn n_non_zero_regions(&self) -> usize {
502        self.0.iter().map(|x| (*x as u8).min(1) as usize).sum()
503    }
504
505    /// return a tuple for the counts of (1, 2, 3) byte-encoded items in self
506    fn count_lengths(&self) -> (u16, u16, u16) {
507        self.0
508            .iter()
509            .fold((0, 0, 0), |(byte, short, long), this| match this {
510                ColumnBits::One => (byte + 1, short, long),
511                ColumnBits::Two => (byte, short + 1, long),
512                ColumnBits::Four => (byte, short, long + 1),
513                _ => (byte, short, long),
514            })
515    }
516
517    /// Returns a struct that maps the canonical regions to the column indices
518    /// used in this ItemVariationData.
519    fn region_map(&self) -> RegionMap {
520        let mut with_idx = self.0.iter().copied().enumerate().collect::<Vec<_>>();
521        // sort in descending order of bit size, e.g. big first
522        with_idx.sort_unstable_by_key(|(idx, bit)| (std::cmp::Reverse(*bit), *idx));
523        // now build a map of indexes from the original positions to the new ones.
524        let mut map = vec![(0u16, ColumnBits::None); with_idx.len()];
525        for (new_idx, (canonical_idx, bits)) in with_idx.iter().enumerate() {
526            map[*canonical_idx] = (new_idx as _, *bits);
527        }
528
529        let (count_8, count_16, count_32) = self.count_lengths();
530        let long_words = count_32 > 0;
531        let n_long_regions = if long_words { count_32 } else { count_16 };
532        let n_active_regions = count_8 + count_16 + count_32;
533        RegionMap {
534            regions_to_columns: map,
535            n_active_regions,
536            n_long_regions,
537            long_words,
538        }
539    }
540
541    // for verifying our sorting behaviour.
542    // ported from https://github.com/fonttools/fonttools/blob/ec9986d3b863d/Lib/fontTools/varLib/varStore.py#L441
543    #[cfg(test)]
544    fn to_fonttools_repr(&self) -> u128 {
545        assert!(
546            self.0.len() <= u128::BITS as usize / 4,
547            "we can only pack 128 bits"
548        );
549
550        let has_long_word = self.0.iter().any(|bits| *bits == ColumnBits::Four);
551        let mut chars = 0;
552        let mut i = 1;
553
554        if !has_long_word {
555            for v in &self.0 {
556                if *v != ColumnBits::None {
557                    chars += i;
558                }
559                if *v == ColumnBits::Two {
560                    chars += i * 0b0010;
561                }
562                i <<= 4;
563            }
564        } else {
565            for v in &self.0 {
566                if *v != ColumnBits::None {
567                    chars += i * 0b0011;
568                }
569                if *v == ColumnBits::Four {
570                    chars += i * 0b1100;
571                }
572                i <<= 4;
573            }
574        }
575        chars
576    }
577}
578
579impl<'a> Encoding<'a> {
580    fn cost(&self) -> usize {
581        self.shape.overhead() + (self.shape.row_cost() * self.deltas.len())
582    }
583
584    fn compute_gain(&self, other: &Encoding) -> i64 {
585        let current_cost = self.cost() + other.cost();
586
587        let combined = self.shape.merge(&other.shape);
588        let combined_cost =
589            combined.overhead() + (combined.row_cost() * (self.deltas.len() + other.deltas.len()));
590        current_cost as i64 - combined_cost as i64
591    }
592
593    fn merge_with(&mut self, other: Encoding<'a>) {
594        self.shape = self.shape.merge(&other.shape);
595        self.deltas.extend(other.deltas);
596    }
597
598    /// Split this item into chunks that fit in an ItemVariationData subtable.
599    ///
600    /// we can only encode up to u16::MAX items in a single subtable, so if we
601    /// have more items than that we split them off now.
602    fn iter_split_into_table_size_chunks(self) -> impl Iterator<Item = Encoding<'a>> {
603        let mut next = Some(self);
604        std::iter::from_fn(move || {
605            let mut this = next.take()?;
606            next = this.split_off_back();
607            Some(this)
608        })
609    }
610
611    /// If we contain more than the max allowed items, split the extra items off
612    ///
613    /// This ensures `self` can be encoded.
614    fn split_off_back(&mut self) -> Option<Self> {
615        const MAX_ITEMS: usize = 0xFFFF;
616        if self.deltas.len() <= MAX_ITEMS {
617            return None;
618        }
619        let deltas = self.deltas.split_off(MAX_ITEMS);
620        Some(Self {
621            shape: self.shape.clone(),
622            deltas,
623        })
624    }
625
626    fn encode(
627        self,
628        key_map: &mut VariationIndexRemapping,
629        subtable_idx: u16,
630    ) -> Option<ItemVariationData> {
631        log::trace!(
632            "encoding subtable {subtable_idx} ({} rows, {}B)",
633            self.deltas.len(),
634            self.cost()
635        );
636        assert!(self.deltas.len() <= 0xffff, "call split_off_back first");
637        let item_count = self.deltas.len() as u16;
638        if item_count == 0 {
639            //TODO: figure out when a null subtable is useful?
640            return None;
641        }
642
643        let region_map = self.shape.region_map();
644        let n_regions = self.shape.n_non_zero_regions();
645        let total_n_delta_values = self.deltas.len() * n_regions;
646        let mut raw_deltas = vec![0i32; total_n_delta_values];
647
648        // first we generate a vec of i32s, which represents an uncompressed
649        // 2d array where rows are items and columns are per-region values.
650        for (i, (delta, raw_key)) in self.deltas.iter().enumerate() {
651            let pos = i * n_regions;
652            for (region, val) in &delta.0 {
653                let Some(column_idx) = region_map.column_index_for_region(*region) else {
654                    continue;
655                };
656                let idx = pos + column_idx as usize;
657                raw_deltas[idx] = *val;
658            }
659            let final_key = VariationIndex::new(subtable_idx, i as u16);
660            key_map.set(*raw_key, final_key);
661        }
662
663        // then we convert the correctly-ordered i32s into the final compressed
664        // representation.
665        let delta_sets = region_map.encode_raw_delta_values(raw_deltas);
666        let word_delta_count = region_map.word_delta_count();
667        let region_indexes = region_map.indices();
668
669        Some(ItemVariationData::new(
670            item_count,
671            word_delta_count,
672            region_indexes,
673            delta_sets,
674        ))
675    }
676
677    /// match the sorting behaviour that fonttools uses for the final sorting.
678    ///
679    /// fonttools's behaviour is particular, because they store the 'rowshape' as
680    /// a packed bitvec with the least significant bits storing the first item,
681    /// e.g. it's the inverse of our default order. Also we don't want to include
682    /// our temporary ids.
683    fn ord_matching_fonttools(&self, other: &Self) -> std::cmp::Ordering {
684        // first just compare the cost
685        let cost_ord = self.shape.row_cost().cmp(&other.shape.row_cost());
686        if cost_ord != std::cmp::Ordering::Equal {
687            return cost_ord;
688        }
689
690        debug_assert_eq!(
691            self.shape.0.len(),
692            other.shape.0.len(),
693            "all shapes have same # of regions"
694        );
695
696        // if cost is equal, compare each column, in reverse
697        for (a, b) in self.shape.0.iter().rev().zip(other.shape.0.iter().rev()) {
698            match a.cmp(b) {
699                std::cmp::Ordering::Equal => (), // continue
700                not_eq => return not_eq,
701            }
702        }
703        std::cmp::Ordering::Equal
704    }
705}
706
707impl RegionMap {
708    /// Takes the delta data as a vec of i32s, writes a vec of BigEndian bytes.
709    ///
710    /// This is mostly boilerplate around whether we are writing i16 and i8, or
711    /// i32 and i16.
712    ///
713    /// Invariant: the raw deltas are sorted based on the region ordering of this
714    /// RegionMap.
715    fn encode_raw_delta_values(&self, raw_deltas: Vec<i32>) -> Vec<u8> {
716        // handles the branching logic of whether long words are 32 or 16 bits.
717        fn encode_words<'a>(
718            long: &'a [i32],
719            short: &'a [i32],
720            long_words: bool,
721        ) -> impl Iterator<Item = u8> + 'a {
722            // dumb trick: the two branches have different concrete types,
723            // so we need to unify them
724            let left = long_words.then(|| {
725                long.iter()
726                    .flat_map(|x| x.to_be_bytes().into_iter())
727                    .chain(short.iter().flat_map(|x| (*x as i16).to_be_bytes()))
728            });
729            let right = (!long_words).then(|| {
730                long.iter()
731                    .flat_map(|x| (*x as i16).to_be_bytes().into_iter())
732                    .chain(short.iter().flat_map(|x| (*x as i8).to_be_bytes()))
733            });
734
735            // combine the two branches into a single type
736            left.into_iter()
737                .flatten()
738                .chain(right.into_iter().flatten())
739        }
740
741        if self.n_active_regions == 0 {
742            return Default::default();
743        }
744
745        raw_deltas
746            .chunks(self.n_active_regions as usize)
747            .flat_map(|delta_set| {
748                let (long, short) = delta_set.split_at(self.n_long_regions as usize);
749                encode_words(long, short, self.long_words)
750            })
751            .collect()
752    }
753
754    /// Compute the 'wordDeltaCount' field
755    ///
756    /// This is a packed field, with the high bit indicating if we have 2-or-4-bit
757    /// words, and the low 15 bits indicating the number of 'long' types
758    fn word_delta_count(&self) -> u16 {
759        let long_flag = if self.long_words { 0x8000 } else { 0 };
760        self.n_long_regions | long_flag
761    }
762
763    /// For the provided canonical region index, returns the column index used
764    /// in this encoding, or None if the region is ignored.
765    fn column_index_for_region(&self, region: u16) -> Option<u16> {
766        let (column, bits) = self.regions_to_columns[region as usize];
767        (bits != ColumnBits::None).then_some(column)
768    }
769
770    /// the indexes into the canonical region list of the active columns
771    fn indices(&self) -> Vec<u16> {
772        let mut result: Vec<_> = self
773            .regions_to_columns
774            .iter()
775            .enumerate()
776            .filter_map(|(i, (_, bits))| (*bits as u8 > 0).then_some(i as _))
777            .collect();
778        // we need this result to be sorted based on the local order:
779        result.sort_unstable_by_key(|region_idx| {
780            self.regions_to_columns
781                .get(*region_idx as usize)
782                .map(|(column, _)| *column)
783                // this can't fail since we got the indexes from this array
784                // immediately previously, but this probably generates better
785                // code than an unwrap
786                .unwrap_or(u16::MAX)
787        });
788        result
789    }
790}
791
792impl VariationIndexRemapping {
793    fn set(&mut self, from: TemporaryDeltaSetId, to: VariationIndex) {
794        self.map.insert(from, to);
795    }
796
797    pub fn get(&self, from: TemporaryDeltaSetId) -> Option<VariationIndex> {
798        self.map.get(&from).cloned()
799    }
800
801    /// convert to tuple for easier comparisons in tests
802    #[cfg(test)]
803    fn get_raw(&self, from: TemporaryDeltaSetId) -> Option<(u16, u16)> {
804        self.map
805            .get(&from)
806            .map(|var| (var.delta_set_outer_index, var.delta_set_inner_index))
807    }
808}
809
810impl DeltaSetStorage {
811    fn add(&mut self, delta_set: DeltaSet) -> TemporaryDeltaSetId {
812        match self {
813            DeltaSetStorage::Direct(deltas) => {
814                let next_id = deltas.len() as u32;
815                deltas.push(delta_set);
816                next_id
817            }
818            DeltaSetStorage::Deduplicated(deltas) => {
819                let next_id = deltas.len() as u32;
820                *deltas.entry(delta_set).or_insert(next_id)
821            }
822        }
823    }
824
825    fn iter(&self) -> impl Iterator<Item = (&DeltaSet, TemporaryDeltaSetId)> + '_ {
826        // a dumb trick so that we are returning a single concrete type regardless
827        // of which variant this is (which is required when returning impl Trait)
828        let (a_vec, a_map) = match self {
829            DeltaSetStorage::Direct(deltas) => (Some(deltas), None),
830            DeltaSetStorage::Deduplicated(deltas) => (None, Some(deltas)),
831        };
832        a_vec
833            .into_iter()
834            .flat_map(|x| x.iter().enumerate().map(|(i, val)| (val, i as u32)))
835            .chain(
836                a_map
837                    .into_iter()
838                    .flat_map(|map| map.iter().map(|(val, idx)| (val, *idx))),
839            )
840    }
841}
842
843// a custom impl so that we match the behaviour of fonttools:
844//
845// - fonttools stores this densely, as just a tuple of deltas in region-order.
846// - we store this sparsely, with explicit region indices.
847// - this means that we need to handle the case where we are eliding a delta,
848//   in one deltaset where we have a negative value in the other.
849//   For example:
850//   # fonttools rep (1, 5, -10), (1, 5, 0)
851//   # fontations   [(0, 1), (1, 5), (2, -10), (0, 1), (1, 5)]
852//
853// in this case fonttools will sort the first set before the second, and we would
854// do the opposite.
855impl PartialOrd for DeltaSet {
856    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
857        Some(self.cmp(other))
858    }
859}
860
861impl Ord for DeltaSet {
862    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
863        let max_region_idx = self
864            .0
865            .iter()
866            .chain(other.0.iter())
867            .map(|(idx, _)| *idx)
868            .max()
869            .unwrap_or(0);
870
871        let left = DenseDeltaIter::new(&self.0, max_region_idx);
872        let right = DenseDeltaIter::new(&other.0, max_region_idx);
873
874        for (l, r) in left.zip(right) {
875            match l.cmp(&r) {
876                std::cmp::Ordering::Equal => (),
877                non_eq => return non_eq,
878            }
879        }
880        std::cmp::Ordering::Equal
881    }
882}
883
884// a helper that iterates our sparse deltas, inserting explicit 0s for any missing
885// regions.
886//
887// // this is only used in our partial ord impl
888struct DenseDeltaIter<'a> {
889    total_len: u16,
890    cur_pos: u16,
891    deltas: &'a [(u16, i32)],
892}
893
894impl<'a> DenseDeltaIter<'a> {
895    fn new(deltas: &'a [(u16, i32)], max_idx: u16) -> Self {
896        DenseDeltaIter {
897            total_len: max_idx,
898            deltas,
899            cur_pos: 0,
900        }
901    }
902}
903
904impl Iterator for DenseDeltaIter<'_> {
905    type Item = i32;
906
907    fn next(&mut self) -> Option<Self::Item> {
908        if self.cur_pos > self.total_len {
909            return None;
910        }
911        let result = if self.deltas.first().map(|(idx, _)| *idx) == Some(self.cur_pos) {
912            let result = self.deltas.first().unwrap().1;
913            self.deltas = &self.deltas[1..];
914            result
915        } else {
916            0
917        };
918        self.cur_pos += 1;
919        Some(result)
920    }
921}
922
923impl Default for DeltaSetStorage {
924    fn default() -> Self {
925        Self::Deduplicated(Default::default())
926    }
927}
928
929impl Display for RowShape {
930    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
931        for col in &self.0 {
932            match col {
933                ColumnBits::None => write!(f, "-"),
934                ColumnBits::One => write!(f, "B"),
935                ColumnBits::Two => write!(f, "S"),
936                ColumnBits::Four => write!(f, "L"),
937            }?
938        }
939        Ok(())
940    }
941}
942
943impl Debug for Encoding<'_> {
944    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
945        write!(
946            f,
947            "Encoding({}, {} items {} bytes)",
948            self.shape,
949            self.deltas.len(),
950            self.cost()
951        )
952    }
953}
954
955impl Debug for Encoder<'_> {
956    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
957        f.debug_struct("Encoder")
958            .field("encodings", &self.encodings)
959            .finish_non_exhaustive()
960    }
961}
962
963#[cfg(test)]
964mod tests {
965    use crate::tables::variations::RegionAxisCoordinates;
966    use font_types::F2Dot14;
967    use read_fonts::{FontData, FontRead};
968
969    use super::*;
970
971    fn reg_coords(min: f32, default: f32, max: f32) -> RegionAxisCoordinates {
972        RegionAxisCoordinates {
973            start_coord: F2Dot14::from_f32(min),
974            peak_coord: F2Dot14::from_f32(default),
975            end_coord: F2Dot14::from_f32(max),
976        }
977    }
978
979    fn test_regions() -> [VariationRegion; 3] {
980        [
981            VariationRegion::new(vec![reg_coords(0.0, 0.2, 1.0), reg_coords(0.0, 0.0, 1.0)]),
982            VariationRegion::new(vec![reg_coords(0.0, 0.1, 0.3), reg_coords(0.0, 0.1, 0.3)]),
983            VariationRegion::new(vec![reg_coords(0.0, 0.1, 0.5), reg_coords(0.0, 0.1, 0.3)]),
984        ]
985    }
986
987    #[test]
988    #[allow(clippy::redundant_clone)]
989    fn smoke_test() {
990        let [r1, r2, r3] = test_regions();
991
992        let mut builder = VariationStoreBuilder::new(2);
993        builder.add_deltas(vec![(r1.clone(), 512), (r2, 266), (r3.clone(), 1115)]);
994        builder.add_deltas(vec![(r3.clone(), 20)]);
995        builder.add_deltas(vec![(r3.clone(), 21)]);
996        builder.add_deltas(vec![(r3, 22)]);
997
998        // we should have three regions, and two subtables
999        let (store, _) = builder.build();
1000        assert_eq!(store.variation_region_list.variation_regions.len(), 3);
1001        assert_eq!(store.item_variation_data.len(), 2);
1002        assert_eq!(
1003            store.item_variation_data[0]
1004                .as_ref()
1005                .unwrap()
1006                .region_indexes,
1007            vec![2]
1008        );
1009        assert_eq!(
1010            store.item_variation_data[1]
1011                .as_ref()
1012                .unwrap()
1013                .region_indexes,
1014            vec![0, 1, 2]
1015        );
1016    }
1017
1018    #[test]
1019    fn key_mapping() {
1020        let [r1, r2, r3] = test_regions();
1021
1022        let mut builder = VariationStoreBuilder::new(2);
1023        let k1 = builder.add_deltas(vec![(r1.clone(), 5), (r2, 1000), (r3.clone(), 1500)]);
1024        let k2 = builder.add_deltas(vec![(r1.clone(), -3), (r3.clone(), 20)]);
1025        let k3 = builder.add_deltas(vec![(r1.clone(), -12), (r3.clone(), 7)]);
1026        // add enough items so that the optimizer doesn't merge these two encodings
1027        let _ = builder.add_deltas(vec![(r1.clone(), -10), (r3.clone(), 7)]);
1028        let _ = builder.add_deltas(vec![(r1.clone(), -9), (r3.clone(), 7)]);
1029        let _ = builder.add_deltas(vec![(r1, -11), (r3, 7)]);
1030
1031        // let encoder = builder.encoder();
1032        // eprintln!("{encoder:?}");
1033        // we should have three regions, and two subtables
1034        let (_, key_lookup) = builder.build();
1035
1036        // first subtable has only one item
1037        // first item gets mapped into second subtable, because of how we sort
1038        assert_eq!(key_lookup.get_raw(k1).unwrap(), (1, 0),);
1039        // next two items are in the same (first) subtable
1040        // inner indexes are based on sort order within the subtable:
1041        assert_eq!(key_lookup.get_raw(k2).unwrap(), (0, 4),); // largest r1 value
1042        assert_eq!(key_lookup.get_raw(k3).unwrap(), (0, 0),); // smallest r1 value
1043
1044        assert_eq!(key_lookup.map.len(), 6);
1045    }
1046
1047    // really just here to check my own understanding of what's going on
1048    #[test]
1049    fn fontools_rowshape_repr() {
1050        use ColumnBits as C;
1051        let shape1 = RowShape(vec![C::None, C::One, C::One, C::Two]);
1052        assert_eq!(shape1.to_fonttools_repr(), 0b0011_0001_0001_0000);
1053        let shape2 = RowShape(vec![C::Two, C::One, C::One, C::None]);
1054        assert_eq!(shape2.to_fonttools_repr(), 0b0000_0001_0001_0011);
1055
1056        assert!(shape1.to_fonttools_repr() > shape2.to_fonttools_repr());
1057    }
1058
1059    #[test]
1060    fn encoding_sort_order() {
1061        let _ = env_logger::builder().is_test(true).try_init();
1062        let [r1, r2, r3] = test_regions();
1063
1064        // make two encodings that have the same total cost, but different shape
1065
1066        let mut builder = VariationStoreBuilder::new(2);
1067        // shape (2, 1, 0)
1068        builder.add_deltas(vec![(r1.clone(), 1000), (r2.clone(), 5)]);
1069        builder.add_deltas(vec![(r1.clone(), 1013), (r2.clone(), 20)]);
1070        builder.add_deltas(vec![(r1.clone(), 1014), (r2.clone(), 21)]);
1071        // shape (0, 2, 1)
1072        builder.add_deltas(vec![(r2.clone(), 1212), (r3.clone(), 7)]);
1073        builder.add_deltas(vec![(r2.clone(), 1213), (r3.clone(), 8)]);
1074        builder.add_deltas(vec![(r2.clone(), 1214), (r3.clone(), 8)]);
1075
1076        //shape (1, 0, 1)
1077        builder.add_deltas(vec![(r1.clone(), 12), (r3.clone(), 7)]);
1078        builder.add_deltas(vec![(r1.clone(), 13), (r3.clone(), 9)]);
1079        builder.add_deltas(vec![(r1.clone(), 14), (r3.clone(), 10)]);
1080        builder.add_deltas(vec![(r1.clone(), 15), (r3.clone(), 11)]);
1081        builder.add_deltas(vec![(r1.clone(), 16), (r3.clone(), 12)]);
1082
1083        let (var_store, key_lookup) = builder.build();
1084        assert_eq!(var_store.item_variation_data.len(), 3);
1085        assert_eq!(key_lookup.map.len(), 11);
1086
1087        // encoding (1, 0, 1) will be sorted first, since it has the lowest cost
1088        assert_eq!(
1089            var_store.item_variation_data[0]
1090                .as_ref()
1091                .unwrap()
1092                .region_indexes,
1093            vec![0, 2]
1094        );
1095
1096        // then encoding with shape (2, 1, 0) since the costs are equal and we
1097        // compare backwards, to match fonttools
1098        assert_eq!(
1099            var_store.item_variation_data[1]
1100                .as_ref()
1101                .unwrap()
1102                .region_indexes,
1103            vec![0, 1]
1104        );
1105    }
1106
1107    #[test]
1108    #[allow(clippy::redundant_clone)]
1109    fn to_binary() {
1110        let [r1, r2, r3] = test_regions();
1111
1112        let mut builder = VariationStoreBuilder::new(2);
1113        builder.add_deltas(vec![(r1.clone(), 512), (r2, 1000), (r3.clone(), 265)]);
1114        builder.add_deltas(vec![(r1.clone(), -3), (r3.clone(), 20)]);
1115        builder.add_deltas(vec![(r1.clone(), -12), (r3.clone(), 7)]);
1116        builder.add_deltas(vec![(r1.clone(), -11), (r3.clone(), 8)]);
1117        builder.add_deltas(vec![(r1.clone(), -10), (r3.clone(), 9)]);
1118        let (table, _) = builder.build();
1119        let bytes = crate::dump_table(&table).unwrap();
1120        let data = FontData::new(&bytes);
1121
1122        let reloaded = read_fonts::tables::variations::ItemVariationStore::read(data).unwrap();
1123
1124        assert_eq!(reloaded.item_variation_data_count(), 2);
1125        let var_data_array = reloaded.item_variation_data();
1126
1127        let var_data = var_data_array.get(0).unwrap().unwrap();
1128        assert_eq!(var_data.region_indexes(), &[0, 2]);
1129        assert_eq!(var_data.item_count(), 4);
1130        assert_eq!(var_data.delta_set(0).collect::<Vec<_>>(), vec![-12, 7]);
1131        assert_eq!(var_data.delta_set(1).collect::<Vec<_>>(), vec![-11, 8]);
1132        assert_eq!(var_data.delta_set(2).collect::<Vec<_>>(), vec![-10, 9]);
1133        assert_eq!(var_data.delta_set(3).collect::<Vec<_>>(), vec![-3, 20]);
1134
1135        let var_data = var_data_array.get(1).unwrap().unwrap();
1136        assert_eq!(var_data.region_indexes(), &[0, 1, 2]);
1137        assert_eq!(var_data.item_count(), 1);
1138        assert_eq!(
1139            var_data.delta_set(0).collect::<Vec<_>>(),
1140            vec![512, 1000, 265]
1141        );
1142    }
1143
1144    #[test]
1145    fn reuse_identical_variation_data() {
1146        let _ = env_logger::builder().is_test(true).try_init();
1147        let [r1, r2, r3] = test_regions();
1148
1149        let mut builder = VariationStoreBuilder::new(2);
1150        let k1 = builder.add_deltas(vec![(r1.clone(), 5), (r2, 10), (r3.clone(), 15)]);
1151        let k2 = builder.add_deltas(vec![(r1.clone(), -12), (r3.clone(), 7)]);
1152        let k3 = builder.add_deltas(vec![(r1.clone(), -12), (r3.clone(), 7)]);
1153        let k4 = builder.add_deltas(vec![(r1, 322), (r3, 532)]);
1154
1155        // we should have three regions, and two subtables
1156        let (_, key_lookup) = builder.build();
1157        assert_eq!(k2, k3);
1158        assert_ne!(k1, k2);
1159        assert_ne!(k1, k4);
1160        assert_eq!(key_lookup.map.len(), 3);
1161    }
1162
1163    /// if we have a single region set, where some deltas are 32-bit, the
1164    /// smaller deltas should get their own subtable IFF we save enough bytes
1165    /// to justify this
1166    #[test]
1167    #[allow(clippy::redundant_clone)]
1168    fn long_deltas_split() {
1169        let [r1, r2, _] = test_regions();
1170        let mut builder = VariationStoreBuilder::new(2);
1171        // short
1172        builder.add_deltas(vec![(r1.clone(), 1), (r2.clone(), 2)]);
1173        builder.add_deltas(vec![(r1.clone(), 3), (r2.clone(), 4)]);
1174        builder.add_deltas(vec![(r1.clone(), 5), (r2.clone(), 6)]);
1175        // long
1176        builder.add_deltas(vec![(r1.clone(), 0xffff + 1), (r2.clone(), 0xffff + 2)]);
1177        let mut encoder = builder.encoder();
1178        assert_eq!(encoder.encodings.len(), 2);
1179        encoder.optimize();
1180        assert_eq!(encoder.encodings.len(), 2);
1181    }
1182
1183    /// combine smaller deltas into larger when there aren't many of them
1184    #[test]
1185    #[allow(clippy::redundant_clone)]
1186    fn long_deltas_combine() {
1187        let [r1, r2, _] = test_regions();
1188        let mut builder = VariationStoreBuilder::new(2);
1189        // short
1190        builder.add_deltas(vec![(r1.clone(), 1), (r2.clone(), 2)]);
1191        builder.add_deltas(vec![(r1.clone(), 3), (r2.clone(), 4)]);
1192        // long
1193        builder.add_deltas(vec![(r1.clone(), 0xffff + 1), (r2.clone(), 0xffff + 2)]);
1194
1195        let mut encoder = builder.encoder();
1196        assert_eq!(encoder.encodings.len(), 2);
1197        assert_eq!(encoder.encodings[0].shape.overhead(), 14); // 10 base, 2 * 2 columns
1198        assert_eq!(encoder.encodings[0].cost(), 14 + 4); // overhead + 2 * 2 bytes/row
1199        assert_eq!(encoder.encodings[1].shape.overhead(), 14);
1200        assert_eq!(encoder.encodings[1].cost(), 14 + 8); // overhead + 1 * 8 bytes/rows
1201        encoder.optimize();
1202        assert_eq!(encoder.encodings.len(), 1);
1203    }
1204
1205    // ensure that we are merging as expected
1206    #[test]
1207    #[allow(clippy::redundant_clone)]
1208    fn combine_many_shapes() {
1209        let _ = env_logger::builder().is_test(true).try_init();
1210        let [r1, r2, r3] = test_regions();
1211        let mut builder = VariationStoreBuilder::new(2);
1212        // orchestrate a failure case:
1213        // - we want to combine
1214        builder.add_deltas(vec![(r1.clone(), 0xffff + 5)]); // (L--)
1215        builder.add_deltas(vec![(r1.clone(), 2)]); // (B--)
1216        builder.add_deltas(vec![(r1.clone(), 300)]); // (S--)
1217        builder.add_deltas(vec![(r2.clone(), 0xffff + 5)]); // (-L-)
1218        builder.add_deltas(vec![(r2.clone(), 2)]); // (-B-)
1219        builder.add_deltas(vec![(r2.clone(), 300)]); // (-S-)
1220        builder.add_deltas(vec![(r3.clone(), 0xffff + 5)]); // (--L)
1221        builder.add_deltas(vec![(r3.clone(), 2)]); // (--B)
1222        builder.add_deltas(vec![(r3.clone(), 300)]); // (--S)
1223        let mut encoder = builder.encoder();
1224        encoder.optimize();
1225        // we compile down to three subtables, each with one column
1226        assert_eq!(encoder.encodings.len(), 3);
1227        assert!(encoder.encodings[0]
1228            .compute_gain(&encoder.encodings[1])
1229            .is_negative());
1230    }
1231
1232    #[test]
1233    #[allow(clippy::redundant_clone)]
1234    fn combine_two_big_fellas() {
1235        let _ = env_logger::builder().is_test(true).try_init();
1236        let [r1, r2, r3] = test_regions();
1237        let mut builder = VariationStoreBuilder::new(2);
1238        // we only combine two of these, since that saves 2 bytes, but adding
1239        // the third is too expensive
1240        builder.add_deltas(vec![(r1.clone(), 0xffff + 5)]); // (L--)
1241        builder.add_deltas(vec![(r2.clone(), 0xffff + 5)]); // (-L-)
1242        builder.add_deltas(vec![(r3.clone(), 0xffff + 5)]); // (--L)
1243
1244        let mut encoder = builder.encoder();
1245        assert_eq!(encoder.encodings[0].cost(), 16);
1246        let merge_cost = 2 // extra column
1247            + 4 // existing encoding gets extra column
1248            + 8; // two columns for new row
1249        assert_eq!(
1250            encoder.encodings[0].compute_gain(&encoder.encodings[1]),
1251            16 - merge_cost
1252        );
1253        encoder.optimize();
1254
1255        // we didn't merge any further because it's too expensive
1256        let next_merge_cost = 2
1257            + 2 * 4 // two existing rows get extra column
1258            + 12; // three columns for new row
1259        assert_eq!(encoder.encodings.len(), 2);
1260        assert_eq!(encoder.encodings[0].cost(), 16);
1261        assert_eq!(
1262            encoder.encodings[0].compute_gain(&encoder.encodings[1]),
1263            16 - next_merge_cost
1264        );
1265    }
1266
1267    /// we had a crash here where we were trying to write zeros when they should
1268    /// be getting ignored.
1269    #[test]
1270    fn ensure_zero_deltas_dont_write() {
1271        let _ = env_logger::builder().is_test(true).try_init();
1272        let [r1, r2, _] = test_regions();
1273        let mut builder = VariationStoreBuilder::new(2);
1274        builder.add_deltas(vec![(r1.clone(), 0), (r2.clone(), 4)]);
1275        let _ = builder.build();
1276    }
1277
1278    // we had another crash here where when *all* deltas were zero we would
1279    // call 'slice.chunks()' with '0', which is not allowed
1280    #[test]
1281    fn ensure_all_zeros_dont_write() {
1282        let _ = env_logger::builder().is_test(true).try_init();
1283        let [r1, r2, _] = test_regions();
1284        let mut builder = VariationStoreBuilder::new(2);
1285        builder.add_deltas(vec![(r1.clone(), 0), (r2.clone(), 0)]);
1286        let _ = builder.build();
1287    }
1288
1289    #[test]
1290    fn vardata_region_indices_order() {
1291        let r0 = VariationRegion::new(vec![reg_coords(0.0, 0.5, 1.0)]);
1292        let r1 = VariationRegion::new(vec![reg_coords(0.5, 1.0, 1.0)]);
1293
1294        let mut builder = VariationStoreBuilder::new(1);
1295        builder.add_deltas(vec![(r0.clone(), 1), (r1.clone(), 2)]);
1296        // 256 won't fit in a u8 thus we expect the deltas for the column corresponding
1297        // to r1 will be packed as u16
1298        builder.add_deltas(vec![(r0.clone(), 3), (r1.clone(), 256)]);
1299
1300        let (store, _varidx_map) = builder.build();
1301
1302        assert_eq!(store.variation_region_list.variation_regions.len(), 2);
1303        assert_eq!(store.item_variation_data.len(), 1);
1304
1305        let var_data = store.item_variation_data[0].as_ref().unwrap();
1306
1307        assert_eq!(var_data.item_count, 2);
1308        assert_eq!(var_data.word_delta_count, 1);
1309        // this should be [1, 0] and not [0, 1] because the regions with wider
1310        // deltas should be packed first.
1311        // var_data.region_indexes is an array of indices into the variation region list
1312        // in the order of the columns of the variation data. So it maps from column index
1313        // to region index, not the other way around.
1314        assert_eq!(var_data.region_indexes, vec![1, 0]);
1315        assert_eq!(
1316            var_data.delta_sets,
1317            // ItemVariationData packs deltas as two-dimensional [u8] array
1318            // with item_count rows and region_index_count columns.
1319            // In this particular case (word_count=1) the first column contains 'words'
1320            // with 2-byte deltas, followed by the second column with 1-byte deltas.
1321            vec![
1322                // item[0]
1323                0, 2, // 2: delta for r1
1324                1, //    1: delta for r0
1325                // item[1]
1326                1, 0, // 256: delta for r1
1327                3, //    3: delta for r0
1328            ],
1329        );
1330    }
1331
1332    #[test]
1333    fn unoptimized_version() {
1334        let r0 = VariationRegion::new(vec![reg_coords(0.0, 0.5, 1.0)]);
1335        let r1 = VariationRegion::new(vec![reg_coords(0.5, 1.0, 1.0)]);
1336
1337        let mut builder = VariationStoreBuilder::new_with_implicit_indices(1);
1338        builder.add_deltas(vec![(r0.clone(), 1), (r1.clone(), 2)]);
1339        // 256 won't fit in a u8 thus we expect the deltas for the column corresponding
1340        // to r1 will be packed as u16
1341        builder.add_deltas(vec![(r0.clone(), 1), (r1.clone(), 2)]);
1342        builder.add_deltas(vec![(r0.clone(), 3), (r1.clone(), 256)]);
1343        builder.add_deltas(vec![(r0.clone(), 3), (r1.clone(), 256)]);
1344
1345        let (ivs, key_map) = builder.build();
1346        // we should get an ivs with one subtable, containing four deltas
1347        assert_eq!(ivs.item_variation_data.len(), 1);
1348
1349        let var_data = ivs.item_variation_data[0].as_ref().unwrap();
1350        assert_eq!(var_data.item_count, 4);
1351        assert_eq!(var_data.region_indexes, vec![1, 0]);
1352
1353        assert_eq!(
1354            var_data.delta_sets,
1355            &[
1356                0x0, 0x2, // item 1, region 2
1357                0x1, // item 1, region 1
1358                0x0, 0x2, // item 2, region 2
1359                0x1, // item 2, region 1
1360                0x1, 0x0, // item 3, region 2
1361                0x3, // item 3, region 1
1362                0x1, 0x0, // item 4, region 2
1363                0x3, // item 4, region 1
1364            ]
1365        );
1366
1367        // assert that keymap entries are identity mapping
1368        assert_eq!(key_map.map.len(), 4);
1369        assert!(key_map
1370            .map
1371            .iter()
1372            .all(|(key, idx)| *key == idx.delta_set_inner_index as u32))
1373    }
1374
1375    #[test]
1376    fn delta_set_ordering() {
1377        let left = DeltaSet(vec![(0, 1), (1, 2), (2, -11)]);
1378        let right = DeltaSet(vec![(0, 1), (1, 2)]);
1379
1380        // although the vec ord impl thinks that the left is 'bigger'
1381        // (it having more items):
1382        assert!(left.0 > right.0);
1383        // our custom impl treats it as smaller, matching fonttools
1384        assert!(left < right);
1385
1386        // but this is only the case because the delta is negative
1387        let left = DeltaSet(vec![(0, 1), (1, 2), (2, 11)]);
1388        let right = DeltaSet(vec![(0, 1), (1, 2)]);
1389
1390        assert!(left > right);
1391        let left = DeltaSet(vec![(0, 1), (1, 2), (2, -11)]);
1392        let right = DeltaSet(vec![(0, 1), (1, 2), (3, 0)]);
1393
1394        assert!(left < right);
1395
1396        // also true in the middle
1397        let left = DeltaSet(vec![(0, 1), (1, -2), (2, -11)]);
1398        let right = DeltaSet(vec![(0, 1), (2, -11)]);
1399        assert!(left < right)
1400    }
1401
1402    #[test]
1403    fn no_duplicate_zero_delta_sets() {
1404        let r0 = VariationRegion::new(vec![reg_coords(0.0, 5.0, 1.0)]);
1405        let r1 = VariationRegion::new(vec![reg_coords(0.5, 1.0, 1.0)]);
1406        let mut builder = VariationStoreBuilder::new(1);
1407        let varidxes = vec![
1408            // first glyph has no variations (e.g. .notdef only defined at default location)
1409            // but we still need to add it to the variation store to reserve an index so
1410            // we add an empty delta set
1411            builder.add_deltas::<i32>(Vec::new()),
1412            builder.add_deltas(vec![(r0.clone(), 50), (r1.clone(), 100)]),
1413            // this glyph has explicit masters that are *all* the same as the default (delta is 0);
1414            // we expect the builder to reuse the same no-op delta set as the first glyph
1415            builder.add_deltas(vec![(r0.clone(), 0), (r1.clone(), 0)]),
1416            // this glyph repeats the same delta set as the second glyph, thus we expect
1417            // the builder to map it to the same delta set index
1418            builder.add_deltas(vec![(r0.clone(), 50), (r1.clone(), 100)]),
1419            // this glyph happens to have one master that's the same as the default (delta is 0);
1420            // nothing special here, we expect a new delta set to be created
1421            builder.add_deltas(vec![(r0.clone(), 0), (r1.clone(), 100)]),
1422        ];
1423        let (store, key_map) = builder.build();
1424
1425        let varidx_map: Vec<u32> = varidxes
1426            .into_iter()
1427            .map(|idx| key_map.get(idx).unwrap().into())
1428            .collect::<Vec<_>>();
1429
1430        assert_eq!(store.variation_region_list.variation_regions.len(), 2);
1431        assert_eq!(store.item_variation_data.len(), 1);
1432
1433        let var_data = store.item_variation_data[0].as_ref().unwrap();
1434
1435        assert_eq!(var_data.item_count, 3);
1436        assert_eq!(var_data.word_delta_count, 0);
1437        assert_eq!(var_data.region_indexes, vec![0, 1]);
1438        assert_eq!(var_data.delta_sets, vec![0, 0, 0, 100, 50, 100],);
1439        // glyph 0 and 2 should map to the same no-op [0, 0] deltaset, while
1440        // glyph 1 and 3 should map to deltaset [50, 100];
1441        // glyph 4 should map to deltaset [0, 100]
1442        assert_eq!(varidx_map, vec![0, 2, 0, 2, 1]);
1443    }
1444
1445    #[test]
1446    fn prune_unused_regions() {
1447        // https://github.com/googlefonts/fontations/issues/733
1448        let r0 = VariationRegion::new(vec![reg_coords(-1.0, -0.5, 0.0)]);
1449        let r1 = VariationRegion::new(vec![reg_coords(-1.0, -1.0, 0.0)]);
1450        let r2 = VariationRegion::new(vec![reg_coords(0.0, 0.5, 1.0)]);
1451        let r3 = VariationRegion::new(vec![reg_coords(0.0, 1.0, 1.0)]);
1452        let mut builder = VariationStoreBuilder::new(1);
1453        builder.add_deltas(vec![
1454            (r0.clone(), 0),
1455            (r1.clone(), 50),
1456            (r2.clone(), 0),
1457            (r3.clone(), 100),
1458        ]);
1459        let (store, _) = builder.build();
1460
1461        // not 4 regions, since only 2 are actually used
1462        assert_eq!(store.variation_region_list.variation_regions.len(), 2);
1463        assert_eq!(store.item_variation_data.len(), 1);
1464
1465        let var_data = store.item_variation_data[0].as_ref().unwrap();
1466        assert_eq!(var_data.item_count, 1);
1467        assert_eq!(var_data.word_delta_count, 0);
1468        assert_eq!(var_data.region_indexes, vec![0, 1]); // not 1, 3
1469        assert_eq!(var_data.delta_sets, vec![50, 100]);
1470    }
1471
1472    #[test]
1473    fn we_match_fonttools_stable_order() {
1474        use rand::seq::SliceRandom;
1475        use rand::thread_rng;
1476
1477        let mut builder = VariationStoreBuilder::new(1);
1478        let r1 = VariationRegion::new(vec![reg_coords(-1.0, -1.0, 0.0)]);
1479        let r2 = VariationRegion::new(vec![reg_coords(0.0, 1.0, 1.0)]);
1480
1481        let mut delta_sets = vec![
1482            // 7 delta sets with row shape "BB"
1483            vec![(r1.clone(), 1), (r2.clone(), 2)],
1484            vec![(r1.clone(), 3), (r2.clone(), 4)],
1485            vec![(r1.clone(), 5), (r2.clone(), 6)],
1486            vec![(r1.clone(), 7), (r2.clone(), 8)],
1487            vec![(r1.clone(), 9), (r2.clone(), 10)],
1488            vec![(r1.clone(), 11), (r2.clone(), 12)],
1489            vec![(r1.clone(), 13), (r2.clone(), 14)],
1490            // 4 delta sets with row shape "-S"
1491            vec![(r1.clone(), 0), (r2.clone(), -130)],
1492            vec![(r1.clone(), 0), (r2.clone(), -129)],
1493            vec![(r1.clone(), 0), (r2.clone(), 128)],
1494            vec![(r1.clone(), 0), (r2.clone(), 129)],
1495            // 1 delta set with row shape "-B".
1496            // The gain from merging the following into either one of the previous
1497            // encodings happens to be the same so the order in which the winning pair
1498            // gets popped from the heap (sorted by relative gain) depends on the order
1499            // in which the delta sets were pushed; the sort key that fonttools uses to
1500            // sort the inputs (for stability) is such that the encoding with row shape
1501            // "-B" will be merged with the first encoding with row shape "BB" and not
1502            // with the second one with row shape "-S".
1503            vec![(r1.clone(), 0), (r2.clone(), -1)],
1504        ];
1505
1506        // Add delta sets in random order and test that the algorithm is stable
1507        let mut rng = thread_rng();
1508        delta_sets.shuffle(&mut rng);
1509        for deltas in delta_sets {
1510            builder.add_deltas(deltas);
1511        }
1512
1513        let (store, _) = builder.build();
1514
1515        let bytes = crate::dump_table(&store).unwrap();
1516        let data = FontData::new(&bytes);
1517        let reloaded = read_fonts::tables::variations::ItemVariationStore::read(data).unwrap();
1518
1519        assert_eq!(reloaded.item_variation_data_count(), 2);
1520        let var_data_array = reloaded.item_variation_data();
1521
1522        let var_data = var_data_array.get(0).unwrap().unwrap();
1523        assert_eq!(var_data.region_indexes(), &[0, 1]);
1524        // count must be 8, not 7, because [0, -1] should be in here
1525        assert_eq!(var_data.item_count(), 8);
1526        assert_eq!(var_data.delta_set(0).collect::<Vec<_>>(), vec![0, -1]);
1527        assert_eq!(var_data.delta_set(1).collect::<Vec<_>>(), vec![1, 2]);
1528        assert_eq!(var_data.delta_set(2).collect::<Vec<_>>(), vec![3, 4]);
1529        assert_eq!(var_data.delta_set(3).collect::<Vec<_>>(), vec![5, 6]);
1530        assert_eq!(var_data.delta_set(4).collect::<Vec<_>>(), vec![7, 8]);
1531        assert_eq!(var_data.delta_set(5).collect::<Vec<_>>(), vec![9, 10]);
1532        assert_eq!(var_data.delta_set(6).collect::<Vec<_>>(), vec![11, 12]);
1533        assert_eq!(var_data.delta_set(7).collect::<Vec<_>>(), vec![13, 14]);
1534
1535        let var_data = var_data_array.get(1).unwrap().unwrap();
1536        assert_eq!(var_data.region_indexes(), &[1]);
1537        assert_eq!(var_data.item_count(), 4);
1538        // ... and not in here
1539        assert_eq!(var_data.delta_set(0).collect::<Vec<_>>(), vec![-130]);
1540        assert_eq!(var_data.delta_set(1).collect::<Vec<_>>(), vec![-129]);
1541        assert_eq!(var_data.delta_set(2).collect::<Vec<_>>(), vec![128]);
1542        assert_eq!(var_data.delta_set(3).collect::<Vec<_>>(), vec![129]);
1543    }
1544}