Skip to main content

justpdf_core/font/
subset.rs

1//! TrueType font subsetting.
2//!
3//! Takes a TrueType font binary and a set of glyph IDs, and produces a new
4//! font binary containing only those glyphs. This is used when embedding fonts
5//! in PDF to reduce file size.
6
7use std::collections::{BTreeSet, HashMap};
8
9/// Result of font subsetting.
10#[derive(Debug)]
11pub struct SubsetResult {
12    /// The subsetted font binary data.
13    pub data: Vec<u8>,
14    /// Mapping from old glyph IDs to new glyph IDs.
15    pub gid_map: HashMap<u16, u16>,
16}
17
18/// A parsed TrueType table record.
19#[derive(Debug, Clone)]
20#[allow(dead_code)]
21struct TableRecord {
22    tag: [u8; 4],
23    checksum: u32,
24    offset: u32,
25    length: u32,
26}
27
28/// Tables we keep in the subset font (in recommended order).
29const KEPT_TABLES: &[[u8; 4]] = &[
30    *b"head", *b"hhea", *b"maxp", *b"OS/2", *b"name", *b"cmap", *b"loca", *b"glyf", *b"hmtx",
31    *b"post", *b"cvt ", *b"fpgm", *b"prep",
32];
33
34// Composite glyph flags.
35const ARG_1_AND_2_ARE_WORDS: u16 = 0x0001;
36const WE_HAVE_A_SCALE: u16 = 0x0008;
37const MORE_COMPONENTS: u16 = 0x0020;
38const WE_HAVE_AN_X_AND_Y_SCALE: u16 = 0x0040;
39const WE_HAVE_A_TWO_BY_TWO: u16 = 0x0080;
40
41/// Subset a TrueType font to include only the specified glyph IDs.
42///
43/// `font_data` is the raw TTF binary.
44/// `glyph_ids` is the set of glyph IDs to keep.
45/// Returns the subsetted font data and a mapping from old to new glyph IDs.
46///
47/// Returns `None` if the font data is invalid, too short, or uses CFF outlines.
48pub fn subset_font(font_data: &[u8], glyph_ids: &[u16]) -> Option<SubsetResult> {
49    if font_data.len() < 12 {
50        return None;
51    }
52
53    // Parse offset table.
54    let sf_version = read_u32(font_data, 0)?;
55    // Reject CFF/OpenType fonts (tag 'OTTO' = 0x4F54544F).
56    if sf_version == 0x4F54544F {
57        return None;
58    }
59    // Accept TrueType: 0x00010000 or 'true' (0x74727565).
60    if sf_version != 0x00010000 && sf_version != 0x74727565 {
61        return None;
62    }
63
64    let num_tables = read_u16(font_data, 4)? as usize;
65
66    // Parse table records.
67    let mut tables: Vec<TableRecord> = Vec::with_capacity(num_tables);
68    for i in 0..num_tables {
69        let rec_offset = 12 + i * 16;
70        if rec_offset + 16 > font_data.len() {
71            return None;
72        }
73        let mut tag = [0u8; 4];
74        tag.copy_from_slice(&font_data[rec_offset..rec_offset + 4]);
75        tables.push(TableRecord {
76            tag,
77            checksum: read_u32(font_data, rec_offset + 4)?,
78            offset: read_u32(font_data, rec_offset + 8)?,
79            length: read_u32(font_data, rec_offset + 12)?,
80        });
81    }
82
83    // Look up essential tables.
84    let find_table = |tag: &[u8; 4]| -> Option<&TableRecord> {
85        tables.iter().find(|t| &t.tag == tag)
86    };
87
88    let head_rec = find_table(b"head")?;
89    let maxp_rec = find_table(b"maxp")?;
90    let loca_rec = find_table(b"loca")?;
91    let glyf_rec = find_table(b"glyf")?;
92    let hhea_rec = find_table(b"hhea")?;
93    let hmtx_rec = find_table(b"hmtx")?;
94
95    // Read head.indexToLocFormat (offset 50 within the head table).
96    let head_data = table_data(font_data, head_rec)?;
97    if head_data.len() < 54 {
98        return None;
99    }
100    let index_to_loc_format = read_i16(head_data, 50)?;
101
102    // Read maxp.numGlyphs (offset 4 within maxp).
103    let maxp_data = table_data(font_data, maxp_rec)?;
104    if maxp_data.len() < 6 {
105        return None;
106    }
107    let total_glyphs = read_u16(maxp_data, 4)? as usize;
108
109    // Read number of long horizontal metrics from hhea (offset 34).
110    let hhea_data = table_data(font_data, hhea_rec)?;
111    if hhea_data.len() < 36 {
112        return None;
113    }
114    let num_h_metrics = read_u16(hhea_data, 34)? as usize;
115
116    // Parse loca table to get glyph offsets.
117    let loca_data = table_data(font_data, loca_rec)?;
118    let glyf_data = table_data(font_data, glyf_rec)?;
119
120    let glyph_offsets = parse_loca(loca_data, index_to_loc_format, total_glyphs)?;
121
122    // Build the set of glyphs to keep: always include glyph 0, plus requested glyphs,
123    // plus any components referenced by composite glyphs.
124    let mut keep_gids: BTreeSet<u16> = BTreeSet::new();
125    keep_gids.insert(0); // .notdef
126    for &gid in glyph_ids {
127        if (gid as usize) < total_glyphs {
128            keep_gids.insert(gid);
129        }
130    }
131
132    // Recursively find composite glyph components.
133    let mut work: Vec<u16> = keep_gids.iter().copied().collect();
134    while let Some(gid) = work.pop() {
135        let start = glyph_offsets[gid as usize];
136        let end = glyph_offsets[gid as usize + 1];
137        if start >= end {
138            continue; // empty glyph
139        }
140        let glyph_slice = glyf_data.get(start..end)?;
141        if glyph_slice.len() < 2 {
142            continue;
143        }
144        let num_contours = read_i16(glyph_slice, 0)?;
145        if num_contours >= 0 {
146            continue; // simple glyph
147        }
148        // Composite glyph: extract component glyph IDs.
149        let components = parse_composite_glyph_components(glyph_slice)?;
150        for comp_gid in components {
151            if (comp_gid as usize) < total_glyphs && keep_gids.insert(comp_gid) {
152                work.push(comp_gid);
153            }
154        }
155    }
156
157    // Build old-to-new GID mapping (sorted, sequential).
158    let sorted_gids: Vec<u16> = keep_gids.iter().copied().collect();
159    let mut gid_map: HashMap<u16, u16> = HashMap::new();
160    for (new_gid, &old_gid) in sorted_gids.iter().enumerate() {
161        gid_map.insert(old_gid, new_gid as u16);
162    }
163    let new_num_glyphs = sorted_gids.len() as u16;
164
165    // Build new glyf table, updating composite glyph references.
166    let mut new_glyf: Vec<u8> = Vec::new();
167    let mut new_loca_offsets: Vec<u32> = Vec::with_capacity(sorted_gids.len() + 1);
168
169    for &old_gid in &sorted_gids {
170        new_loca_offsets.push(new_glyf.len() as u32);
171        let start = glyph_offsets[old_gid as usize];
172        let end = glyph_offsets[old_gid as usize + 1];
173        if start >= end {
174            continue; // empty glyph, offset stays the same
175        }
176        let glyph_slice = glyf_data.get(start..end)?;
177        let num_contours = read_i16(glyph_slice, 0)?;
178        if num_contours >= 0 {
179            // Simple glyph: copy as-is.
180            new_glyf.extend_from_slice(glyph_slice);
181        } else {
182            // Composite glyph: rewrite component GIDs.
183            let mut patched = glyph_slice.to_vec();
184            rewrite_composite_glyph_ids(&mut patched, &gid_map)?;
185            new_glyf.extend_from_slice(&patched);
186        }
187        // Pad to 4-byte boundary.
188        while new_glyf.len() % 4 != 0 {
189            new_glyf.push(0);
190        }
191    }
192    // Final loca entry: end of last glyph.
193    new_loca_offsets.push(new_glyf.len() as u32);
194
195    // Build new loca table (use long format for simplicity).
196    let new_index_to_loc_format: i16 = 1; // long format
197    let mut new_loca: Vec<u8> = Vec::with_capacity(new_loca_offsets.len() * 4);
198    for &off in &new_loca_offsets {
199        new_loca.extend_from_slice(&off.to_be_bytes());
200    }
201
202    // Build new hmtx table.
203    let hmtx_data = table_data(font_data, hmtx_rec)?;
204    let new_hmtx = build_subset_hmtx(hmtx_data, &sorted_gids, num_h_metrics, total_glyphs)?;
205
206    // Patch head table: update indexToLocFormat and zero out checksumAdjustment.
207    let mut new_head = head_data.to_vec();
208    // Zero checksumAdjustment (offset 8, 4 bytes) — we fix it later.
209    write_u32(&mut new_head, 8, 0);
210    // Set indexToLocFormat to 1 (long).
211    write_i16(&mut new_head, 50, new_index_to_loc_format);
212
213    // Patch maxp table: update numGlyphs.
214    let mut new_maxp = maxp_data.to_vec();
215    write_u16(&mut new_maxp, 4, new_num_glyphs);
216
217    // Patch hhea table: update numberOfHMetrics to new_num_glyphs.
218    let mut new_hhea = hhea_data.to_vec();
219    write_u16(&mut new_hhea, 34, new_num_glyphs);
220
221    // Collect all table data for output.
222    struct TableEntry {
223        tag: [u8; 4],
224        data: Vec<u8>,
225    }
226
227    let mut out_tables: Vec<TableEntry> = Vec::new();
228
229    for kept_tag in KEPT_TABLES {
230        let data: Vec<u8> = match kept_tag {
231            b"head" => new_head.clone(),
232            b"hhea" => new_hhea.clone(),
233            b"maxp" => new_maxp.clone(),
234            b"loca" => new_loca.clone(),
235            b"glyf" => new_glyf.clone(),
236            b"hmtx" => new_hmtx.clone(),
237            _ => {
238                // Copy the original table if it exists; skip if not.
239                match find_table(kept_tag) {
240                    Some(rec) => table_data(font_data, rec)?.to_vec(),
241                    None => continue,
242                }
243            }
244        };
245        out_tables.push(TableEntry {
246            tag: *kept_tag,
247            data,
248        });
249    }
250
251    // Assemble the final font binary.
252    let num_out_tables = out_tables.len() as u16;
253    let (search_range, entry_selector, range_shift) = calc_table_search_params(num_out_tables);
254
255    // Offset table: 12 bytes.
256    // Table records: 16 bytes each.
257    let header_size = 12 + (num_out_tables as usize) * 16;
258    let mut output: Vec<u8> = Vec::new();
259
260    // Write offset table.
261    output.extend_from_slice(&0x00010000u32.to_be_bytes()); // sfVersion
262    output.extend_from_slice(&num_out_tables.to_be_bytes());
263    output.extend_from_slice(&search_range.to_be_bytes());
264    output.extend_from_slice(&entry_selector.to_be_bytes());
265    output.extend_from_slice(&range_shift.to_be_bytes());
266
267    // We need to compute the offsets for each table data block.
268    // Table data starts right after the header.
269    let mut data_offset = header_size;
270    // Pad each table to 4-byte boundary.
271    struct TableOut {
272        tag: [u8; 4],
273        checksum: u32,
274        offset: u32,
275        padded_data: Vec<u8>,
276    }
277
278    let mut table_outs: Vec<TableOut> = Vec::new();
279    for entry in &out_tables {
280        let mut padded = entry.data.clone();
281        while padded.len() % 4 != 0 {
282            padded.push(0);
283        }
284        let cs = calc_checksum(&padded);
285        table_outs.push(TableOut {
286            tag: entry.tag,
287            checksum: cs,
288            offset: data_offset as u32,
289            padded_data: padded.clone(),
290        });
291        data_offset += padded.len();
292    }
293
294    // Write table records.
295    for t in &table_outs {
296        output.extend_from_slice(&t.tag);
297        output.extend_from_slice(&t.checksum.to_be_bytes());
298        output.extend_from_slice(&t.offset.to_be_bytes());
299        // Length is the unpadded length.
300        let unpadded_len = out_tables
301            .iter()
302            .find(|e| e.tag == t.tag)
303            .map(|e| e.data.len() as u32)
304            .unwrap_or(t.padded_data.len() as u32);
305        output.extend_from_slice(&unpadded_len.to_be_bytes());
306    }
307
308    // Write table data.
309    for t in &table_outs {
310        output.extend_from_slice(&t.padded_data);
311    }
312
313    // Fix head.checksumAdjustment.
314    // The adjustment is: 0xB1B0AFBA - checksum_of_entire_file.
315    let file_checksum = calc_checksum(&output);
316    let adjustment = 0xB1B0AFBAu32.wrapping_sub(file_checksum);
317
318    // Find the head table offset in output and write the adjustment at offset 8.
319    if let Some(head_out) = table_outs.iter().find(|t| &t.tag == b"head") {
320        let adj_offset = head_out.offset as usize + 8;
321        if adj_offset + 4 <= output.len() {
322            write_u32(&mut output, adj_offset, adjustment);
323        }
324    }
325
326    Some(SubsetResult { data: output, gid_map })
327}
328
329/// Parse the `loca` table into a vector of byte offsets into the `glyf` table.
330/// Returns `total_glyphs + 1` entries (the last entry marks the end of the last glyph).
331fn parse_loca(loca_data: &[u8], format: i16, num_glyphs: usize) -> Option<Vec<usize>> {
332    let count = num_glyphs + 1;
333    let mut offsets = Vec::with_capacity(count);
334    match format {
335        0 => {
336            // Short format: u16 values, actual offset = value * 2.
337            if loca_data.len() < count * 2 {
338                return None;
339            }
340            for i in 0..count {
341                let val = read_u16(loca_data, i * 2)? as usize;
342                offsets.push(val * 2);
343            }
344        }
345        1 => {
346            // Long format: u32 values.
347            if loca_data.len() < count * 4 {
348                return None;
349            }
350            for i in 0..count {
351                let val = read_u32(loca_data, i * 4)? as usize;
352                offsets.push(val);
353            }
354        }
355        _ => return None,
356    }
357    Some(offsets)
358}
359
360/// Parse a composite glyph to extract the component glyph IDs it references.
361fn parse_composite_glyph_components(glyph_data: &[u8]) -> Option<Vec<u16>> {
362    let mut components = Vec::new();
363    // Skip the glyph header: numberOfContours (i16) + xMin, yMin, xMax, yMax (4 x i16) = 10 bytes.
364    let mut pos = 10;
365
366    loop {
367        if pos + 4 > glyph_data.len() {
368            return None;
369        }
370        let flags = read_u16(glyph_data, pos)?;
371        let component_gid = read_u16(glyph_data, pos + 2)?;
372        components.push(component_gid);
373        pos += 4;
374
375        // Skip arguments.
376        if flags & ARG_1_AND_2_ARE_WORDS != 0 {
377            pos += 4; // two i16 args
378        } else {
379            pos += 2; // two i8 args
380        }
381
382        // Skip transform data.
383        if flags & WE_HAVE_A_SCALE != 0 {
384            pos += 2; // one F2Dot14
385        } else if flags & WE_HAVE_AN_X_AND_Y_SCALE != 0 {
386            pos += 4; // two F2Dot14
387        } else if flags & WE_HAVE_A_TWO_BY_TWO != 0 {
388            pos += 8; // four F2Dot14
389        }
390
391        if flags & MORE_COMPONENTS == 0 {
392            break;
393        }
394    }
395
396    Some(components)
397}
398
399/// Rewrite component glyph IDs in a composite glyph using the gid_map.
400fn rewrite_composite_glyph_ids(glyph_data: &mut [u8], gid_map: &HashMap<u16, u16>) -> Option<()> {
401    let mut pos = 10; // skip glyph header
402
403    loop {
404        if pos + 4 > glyph_data.len() {
405            return None;
406        }
407        let flags = read_u16(glyph_data, pos)?;
408        let old_gid = read_u16(glyph_data, pos + 2)?;
409        let new_gid = *gid_map.get(&old_gid)?;
410        write_u16(glyph_data, pos + 2, new_gid);
411        pos += 4;
412
413        // Skip arguments.
414        if flags & ARG_1_AND_2_ARE_WORDS != 0 {
415            pos += 4;
416        } else {
417            pos += 2;
418        }
419
420        // Skip transform data.
421        if flags & WE_HAVE_A_SCALE != 0 {
422            pos += 2;
423        } else if flags & WE_HAVE_AN_X_AND_Y_SCALE != 0 {
424            pos += 4;
425        } else if flags & WE_HAVE_A_TWO_BY_TWO != 0 {
426            pos += 8;
427        }
428
429        if flags & MORE_COMPONENTS == 0 {
430            break;
431        }
432    }
433
434    Some(())
435}
436
437/// Build a new hmtx table for the subset glyphs.
438fn build_subset_hmtx(
439    hmtx_data: &[u8],
440    sorted_gids: &[u16],
441    num_h_metrics: usize,
442    _total_glyphs: usize,
443) -> Option<Vec<u8>> {
444    // hmtx structure:
445    //   numOfLongHorMetrics entries of (advanceWidth: u16, lsb: i16) = 4 bytes each
446    //   Remaining glyphs: just lsb (i16) = 2 bytes each, using the last advanceWidth.
447    let mut new_hmtx = Vec::new();
448
449    for &old_gid in sorted_gids {
450        let gid = old_gid as usize;
451        if gid < num_h_metrics {
452            // Full metric entry.
453            let offset = gid * 4;
454            if offset + 4 > hmtx_data.len() {
455                // Fallback: write zeros.
456                new_hmtx.extend_from_slice(&[0u8; 4]);
457            } else {
458                new_hmtx.extend_from_slice(&hmtx_data[offset..offset + 4]);
459            }
460        } else {
461            // Glyph beyond numOfLongHorMetrics: use last advance width + per-glyph lsb.
462            let last_aw_offset = (num_h_metrics - 1) * 4;
463            let advance_width = if last_aw_offset + 2 <= hmtx_data.len() {
464                &hmtx_data[last_aw_offset..last_aw_offset + 2]
465            } else {
466                &[0u8, 0]
467            };
468            let lsb_base = num_h_metrics * 4;
469            let lsb_idx = gid - num_h_metrics;
470            let lsb_offset = lsb_base + lsb_idx * 2;
471            let lsb = if lsb_offset + 2 <= hmtx_data.len() {
472                &hmtx_data[lsb_offset..lsb_offset + 2]
473            } else {
474                &[0u8, 0]
475            };
476            new_hmtx.extend_from_slice(advance_width);
477            new_hmtx.extend_from_slice(lsb);
478        }
479    }
480
481    Some(new_hmtx)
482}
483
484/// Calculate searchRange, entrySelector, rangeShift for the offset table.
485fn calc_table_search_params(num_tables: u16) -> (u16, u16, u16) {
486    let mut power = 1u16;
487    let mut log2 = 0u16;
488    while power * 2 <= num_tables {
489        power *= 2;
490        log2 += 1;
491    }
492    let search_range = power * 16;
493    let entry_selector = log2;
494    let range_shift = num_tables * 16 - search_range;
495    (search_range, entry_selector, range_shift)
496}
497
498/// Calculate the checksum of a block of data (interpreted as big-endian u32 words).
499fn calc_checksum(data: &[u8]) -> u32 {
500    let mut sum: u32 = 0;
501    let mut i = 0;
502    while i + 4 <= data.len() {
503        let word = u32::from_be_bytes([data[i], data[i + 1], data[i + 2], data[i + 3]]);
504        sum = sum.wrapping_add(word);
505        i += 4;
506    }
507    // Handle trailing bytes (data should be padded, but just in case).
508    if i < data.len() {
509        let mut last = [0u8; 4];
510        for (j, &b) in data[i..].iter().enumerate() {
511            last[j] = b;
512        }
513        sum = sum.wrapping_add(u32::from_be_bytes(last));
514    }
515    sum
516}
517
518/// Get the raw data slice for a table.
519fn table_data<'a>(font_data: &'a [u8], rec: &TableRecord) -> Option<&'a [u8]> {
520    let start = rec.offset as usize;
521    let end = start + rec.length as usize;
522    font_data.get(start..end)
523}
524
525// --- Binary read/write helpers ---
526
527fn read_u32(data: &[u8], offset: usize) -> Option<u32> {
528    let bytes = data.get(offset..offset + 4)?;
529    Some(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
530}
531
532fn read_u16(data: &[u8], offset: usize) -> Option<u16> {
533    let bytes = data.get(offset..offset + 2)?;
534    Some(u16::from_be_bytes([bytes[0], bytes[1]]))
535}
536
537fn read_i16(data: &[u8], offset: usize) -> Option<i16> {
538    let bytes = data.get(offset..offset + 2)?;
539    Some(i16::from_be_bytes([bytes[0], bytes[1]]))
540}
541
542fn write_u32(data: &mut [u8], offset: usize, val: u32) {
543    let bytes = val.to_be_bytes();
544    data[offset..offset + 4].copy_from_slice(&bytes);
545}
546
547fn write_u16(data: &mut [u8], offset: usize, val: u16) {
548    let bytes = val.to_be_bytes();
549    data[offset..offset + 2].copy_from_slice(&bytes);
550}
551
552fn write_i16(data: &mut [u8], offset: usize, val: i16) {
553    let bytes = val.to_be_bytes();
554    data[offset..offset + 2].copy_from_slice(&bytes);
555}
556
557#[cfg(test)]
558mod tests {
559    use super::*;
560
561    /// Build a minimal valid TrueType font binary for testing.
562    ///
563    /// Creates a font with the required tables: head, hhea, maxp, loca, glyf, hmtx.
564    /// Contains 3 glyphs:
565    ///   - GID 0: .notdef (empty glyph, 0 bytes in glyf)
566    ///   - GID 1: simple glyph (10 bytes of dummy contour data)
567    ///   - GID 2: simple glyph (10 bytes of dummy contour data)
568    fn build_test_font() -> Vec<u8> {
569        build_test_font_with_glyphs(3, &[], false)
570    }
571
572    fn build_test_font_with_glyphs(
573        num_glyphs: usize,
574        composite_refs: &[(usize, Vec<u16>)], // (glyph_index, component_gids)
575        _use_long_loca: bool,
576    ) -> Vec<u8> {
577        // We always use short loca format for test fonts.
578        let index_to_loc_format: i16 = 0; // short
579
580        // Build glyf table: glyph 0 is empty, rest are simple 12-byte glyphs
581        // (or composite if specified).
582        let mut glyf_entries: Vec<Vec<u8>> = Vec::new();
583        for gid in 0..num_glyphs {
584            if gid == 0 {
585                // .notdef: empty
586                glyf_entries.push(Vec::new());
587                continue;
588            }
589            // Check if this glyph is composite.
590            if let Some((_, components)) = composite_refs.iter().find(|(idx, _)| *idx == gid) {
591                // Build composite glyph data.
592                let mut data = Vec::new();
593                // numberOfContours = -1 (composite)
594                data.extend_from_slice(&(-1i16).to_be_bytes());
595                // xMin, yMin, xMax, yMax
596                data.extend_from_slice(&0i16.to_be_bytes());
597                data.extend_from_slice(&0i16.to_be_bytes());
598                data.extend_from_slice(&100i16.to_be_bytes());
599                data.extend_from_slice(&100i16.to_be_bytes());
600                // Component entries.
601                for (i, &comp_gid) in components.iter().enumerate() {
602                    let is_last = i == components.len() - 1;
603                    let flags: u16 = if is_last { 0 } else { MORE_COMPONENTS };
604                    data.extend_from_slice(&flags.to_be_bytes());
605                    data.extend_from_slice(&comp_gid.to_be_bytes());
606                    // Two i8 args (ARG_1_AND_2_ARE_WORDS not set).
607                    data.push(0);
608                    data.push(0);
609                }
610                // Pad to even length for short loca.
611                while data.len() % 2 != 0 {
612                    data.push(0);
613                }
614                glyf_entries.push(data);
615            } else {
616                // Simple glyph: 12 bytes (numberOfContours=1 + bbox + minimal data).
617                let mut data = Vec::new();
618                data.extend_from_slice(&1i16.to_be_bytes()); // numberOfContours
619                data.extend_from_slice(&0i16.to_be_bytes()); // xMin
620                data.extend_from_slice(&0i16.to_be_bytes()); // yMin
621                data.extend_from_slice(&(100i16).to_be_bytes()); // xMax
622                data.extend_from_slice(&(100i16).to_be_bytes()); // yMax
623                // endPtsOfContours[0] = 0
624                data.extend_from_slice(&0u16.to_be_bytes());
625                glyf_entries.push(data);
626            }
627        }
628
629        // Compute glyf table (concatenation of all glyph data).
630        let mut glyf_table = Vec::new();
631        let mut glyf_offsets: Vec<usize> = Vec::new();
632        for entry in &glyf_entries {
633            glyf_offsets.push(glyf_table.len());
634            glyf_table.extend_from_slice(entry);
635            // Pad individual glyph entries to 2-byte boundary (for short loca).
636            while glyf_table.len() % 2 != 0 {
637                glyf_table.push(0);
638            }
639        }
640        glyf_offsets.push(glyf_table.len());
641
642        // Build loca table (short format: offset / 2 as u16).
643        let mut loca_table = Vec::new();
644        for &off in &glyf_offsets {
645            loca_table.extend_from_slice(&((off / 2) as u16).to_be_bytes());
646        }
647
648        // Build head table (54 bytes minimum).
649        let mut head_table = vec![0u8; 54];
650        // version = 1.0
651        write_u32(&mut head_table, 0, 0x00010000);
652        // magicNumber at offset 12
653        write_u32(&mut head_table, 12, 0x5F0F3CF5);
654        // flags at offset 16
655        write_u16(&mut head_table, 16, 0x000B);
656        // unitsPerEm at offset 18
657        write_u16(&mut head_table, 18, 1000);
658        // indexToLocFormat at offset 50
659        write_i16(&mut head_table, 50, index_to_loc_format);
660
661        // Build maxp table (6 bytes minimum: version + numGlyphs).
662        let mut maxp_table = vec![0u8; 6];
663        write_u32(&mut maxp_table, 0, 0x00010000);
664        write_u16(&mut maxp_table, 4, num_glyphs as u16);
665
666        // Build hhea table (36 bytes).
667        let mut hhea_table = vec![0u8; 36];
668        write_u32(&mut hhea_table, 0, 0x00010000);
669        write_u16(&mut hhea_table, 34, num_glyphs as u16); // numberOfHMetrics
670
671        // Build hmtx table (4 bytes per glyph: advanceWidth + lsb).
672        let mut hmtx_table = Vec::new();
673        for gid in 0..num_glyphs {
674            let aw = (500 + gid * 100) as u16;
675            let lsb = 10i16;
676            hmtx_table.extend_from_slice(&aw.to_be_bytes());
677            hmtx_table.extend_from_slice(&lsb.to_be_bytes());
678        }
679
680        // Assemble the font.
681        let table_list: Vec<(&[u8; 4], &[u8])> = vec![
682            (b"head", &head_table),
683            (b"hhea", &hhea_table),
684            (b"maxp", &maxp_table),
685            (b"loca", &loca_table),
686            (b"glyf", &glyf_table),
687            (b"hmtx", &hmtx_table),
688        ];
689
690        let num_tables = table_list.len() as u16;
691        let (sr, es, rs) = calc_table_search_params(num_tables);
692        let header_size = 12 + (num_tables as usize) * 16;
693
694        let mut font = Vec::new();
695        // Offset table.
696        font.extend_from_slice(&0x00010000u32.to_be_bytes());
697        font.extend_from_slice(&num_tables.to_be_bytes());
698        font.extend_from_slice(&sr.to_be_bytes());
699        font.extend_from_slice(&es.to_be_bytes());
700        font.extend_from_slice(&rs.to_be_bytes());
701
702        // Compute table offsets.
703        let mut data_offset = header_size;
704        let mut table_records: Vec<(usize, usize)> = Vec::new(); // (offset, padded_len)
705        for (_, data) in &table_list {
706            let padded = (data.len() + 3) & !3;
707            table_records.push((data_offset, padded));
708            data_offset += padded;
709        }
710
711        // Write table records.
712        for (i, (tag, data)) in table_list.iter().enumerate() {
713            font.extend_from_slice(*tag);
714            let mut padded_data = data.to_vec();
715            while padded_data.len() % 4 != 0 {
716                padded_data.push(0);
717            }
718            let cs = calc_checksum(&padded_data);
719            font.extend_from_slice(&cs.to_be_bytes());
720            font.extend_from_slice(&(table_records[i].0 as u32).to_be_bytes());
721            font.extend_from_slice(&(data.len() as u32).to_be_bytes());
722        }
723
724        // Write table data.
725        for (_, data) in &table_list {
726            font.extend_from_slice(data);
727            while font.len() % 4 != 0 {
728                font.push(0);
729            }
730        }
731
732        font
733    }
734
735    #[test]
736    fn test_parse_offset_table_header() {
737        let font = build_test_font();
738        assert!(font.len() >= 12);
739        let sf_version = read_u32(&font, 0).unwrap();
740        assert_eq!(sf_version, 0x00010000);
741        let num_tables = read_u16(&font, 4).unwrap();
742        assert_eq!(num_tables, 6);
743    }
744
745    #[test]
746    fn test_subset_empty_glyph_set() {
747        // Subsetting with no glyph IDs should still include glyph 0 (.notdef).
748        let font = build_test_font();
749        let result = subset_font(&font, &[]).expect("subsetting should succeed");
750        assert!(result.gid_map.contains_key(&0));
751        assert_eq!(result.gid_map[&0], 0);
752        assert_eq!(result.gid_map.len(), 1);
753        // The output should be a valid font.
754        assert!(result.data.len() > 12);
755        let sf_version = read_u32(&result.data, 0).unwrap();
756        assert_eq!(sf_version, 0x00010000);
757    }
758
759    #[test]
760    fn test_subset_single_glyph() {
761        let font = build_test_font();
762        let result = subset_font(&font, &[1]).expect("subsetting should succeed");
763        // Should have glyph 0 and glyph 1.
764        assert_eq!(result.gid_map.len(), 2);
765        assert_eq!(result.gid_map[&0], 0);
766        assert_eq!(result.gid_map[&1], 1);
767        // Output should parse back.
768        let sf_version = read_u32(&result.data, 0).unwrap();
769        assert_eq!(sf_version, 0x00010000);
770        // Verify maxp in output has numGlyphs = 2.
771        let num_tables = read_u16(&result.data, 4).unwrap() as usize;
772        let mut found_maxp = false;
773        for i in 0..num_tables {
774            let rec_off = 12 + i * 16;
775            let tag = &result.data[rec_off..rec_off + 4];
776            if tag == b"maxp" {
777                let offset = read_u32(&result.data, rec_off + 8).unwrap() as usize;
778                let num_glyphs = read_u16(&result.data, offset + 4).unwrap();
779                assert_eq!(num_glyphs, 2);
780                found_maxp = true;
781                break;
782            }
783        }
784        assert!(found_maxp, "maxp table should be present");
785    }
786
787    #[test]
788    fn test_gid_mapping_correctness() {
789        let font = build_test_font(); // 3 glyphs: 0, 1, 2
790        // Keep only glyph 2 (plus glyph 0 is always included).
791        let result = subset_font(&font, &[2]).expect("subsetting should succeed");
792        assert_eq!(result.gid_map.len(), 2);
793        assert_eq!(result.gid_map[&0], 0); // .notdef stays at 0
794        assert_eq!(result.gid_map[&2], 1); // old GID 2 -> new GID 1
795    }
796
797    #[test]
798    fn test_subset_multiple_glyphs_ordering() {
799        let font = build_test_font_with_glyphs(5, &[], false);
800        let result = subset_font(&font, &[3, 1, 4]).expect("subsetting should succeed");
801        // Should have glyphs 0, 1, 3, 4 -> new IDs 0, 1, 2, 3.
802        assert_eq!(result.gid_map.len(), 4);
803        assert_eq!(result.gid_map[&0], 0);
804        assert_eq!(result.gid_map[&1], 1);
805        assert_eq!(result.gid_map[&3], 2);
806        assert_eq!(result.gid_map[&4], 3);
807    }
808
809    #[test]
810    fn test_subset_composite_glyph_includes_components() {
811        // Build a font with 4 glyphs:
812        //   0: .notdef (empty)
813        //   1: simple
814        //   2: simple
815        //   3: composite referencing GIDs 1 and 2
816        let font = build_test_font_with_glyphs(4, &[(3, vec![1, 2])], false);
817        // Request only glyph 3 (composite).
818        let result = subset_font(&font, &[3]).expect("subsetting should succeed");
819        // Should include 0, 1, 2, 3 (components pulled in automatically).
820        assert_eq!(result.gid_map.len(), 4);
821        assert!(result.gid_map.contains_key(&0));
822        assert!(result.gid_map.contains_key(&1));
823        assert!(result.gid_map.contains_key(&2));
824        assert!(result.gid_map.contains_key(&3));
825    }
826
827    #[test]
828    fn test_reject_cff_font() {
829        // Build a minimal CFF/OpenType header.
830        let mut data = vec![0u8; 64];
831        data[0..4].copy_from_slice(b"OTTO"); // CFF signature
832        assert!(subset_font(&data, &[1]).is_none());
833    }
834
835    #[test]
836    fn test_reject_too_short() {
837        assert!(subset_font(&[], &[]).is_none());
838        assert!(subset_font(&[0; 8], &[]).is_none());
839    }
840
841    #[test]
842    fn test_calc_table_search_params() {
843        let (sr, es, rs) = calc_table_search_params(6);
844        assert_eq!(sr, 64); // 4 * 16
845        assert_eq!(es, 2); // log2(4)
846        assert_eq!(rs, 32); // 6*16 - 64
847
848        let (sr, es, rs) = calc_table_search_params(1);
849        assert_eq!(sr, 16);
850        assert_eq!(es, 0);
851        assert_eq!(rs, 0);
852
853        let (sr, es, rs) = calc_table_search_params(8);
854        assert_eq!(sr, 128);
855        assert_eq!(es, 3);
856        assert_eq!(rs, 0);
857    }
858
859    #[test]
860    fn test_calc_checksum() {
861        // Four bytes: should be a single u32 word.
862        let data = 0x01020304u32.to_be_bytes();
863        assert_eq!(calc_checksum(&data), 0x01020304);
864
865        // Eight bytes: sum of two words.
866        let mut data = Vec::new();
867        data.extend_from_slice(&0x00000001u32.to_be_bytes());
868        data.extend_from_slice(&0x00000002u32.to_be_bytes());
869        assert_eq!(calc_checksum(&data), 3);
870    }
871
872    #[test]
873    fn test_subset_out_of_range_gid_ignored() {
874        let font = build_test_font(); // 3 glyphs
875        // Request a glyph ID that is out of range.
876        let result = subset_font(&font, &[999]).expect("subsetting should succeed");
877        // Only glyph 0 should be present.
878        assert_eq!(result.gid_map.len(), 1);
879        assert_eq!(result.gid_map[&0], 0);
880    }
881
882    #[test]
883    fn test_subset_duplicate_glyph_ids() {
884        let font = build_test_font();
885        let result = subset_font(&font, &[1, 1, 1]).expect("subsetting should succeed");
886        assert_eq!(result.gid_map.len(), 2);
887        assert_eq!(result.gid_map[&0], 0);
888        assert_eq!(result.gid_map[&1], 1);
889    }
890
891    #[test]
892    fn test_parse_loca_short_format() {
893        // Short format: offsets are u16, actual = value * 2.
894        let mut loca = Vec::new();
895        loca.extend_from_slice(&0u16.to_be_bytes()); // glyph 0 start
896        loca.extend_from_slice(&6u16.to_be_bytes()); // glyph 0 end / glyph 1 start (actual: 12)
897        loca.extend_from_slice(&10u16.to_be_bytes()); // glyph 1 end (actual: 20)
898
899        let offsets = parse_loca(&loca, 0, 2).unwrap();
900        assert_eq!(offsets, vec![0, 12, 20]);
901    }
902
903    #[test]
904    fn test_parse_loca_long_format() {
905        let mut loca = Vec::new();
906        loca.extend_from_slice(&0u32.to_be_bytes());
907        loca.extend_from_slice(&100u32.to_be_bytes());
908        loca.extend_from_slice(&250u32.to_be_bytes());
909
910        let offsets = parse_loca(&loca, 1, 2).unwrap();
911        assert_eq!(offsets, vec![0, 100, 250]);
912    }
913}