Skip to main content

forme/font/
subset.rs

1//! # TrueType Font Subsetter
2//!
3//! Strips a TrueType font to only the glyphs actually used in the document.
4//! This dramatically reduces PDF size — a typical font is 50-200KB but a
5//! subset with ~100 glyphs is usually 5-15KB.
6//!
7//! The subsetter rebuilds a valid TrueType file with remapped glyph IDs
8//! (contiguous starting from 0). This is important because PDF CIDFont
9//! width arrays and content stream glyph references must use the new IDs.
10//!
11//! ## Approach
12//!
13//! 1. Collect all needed glyphs (used glyphs + composite glyph dependencies)
14//! 2. Remap old GIDs to new contiguous GIDs
15//! 3. Rebuild required TrueType tables (glyf, loca, hmtx, cmap, etc.)
16//! 4. Write a valid TrueType file with correct checksums and alignment
17
18use std::collections::{BTreeSet, HashMap};
19
20/// Result of subsetting a font.
21pub struct SubsetResult {
22    /// The subset TrueType file bytes.
23    pub ttf_data: Vec<u8>,
24    /// Maps original glyph IDs to new contiguous glyph IDs.
25    pub gid_remap: HashMap<u16, u16>,
26}
27
28/// Subset a TrueType font to only include the given glyph IDs.
29pub fn subset_ttf(
30    ttf_data: &[u8],
31    used_gids: &std::collections::HashSet<u16>,
32) -> Result<SubsetResult, String> {
33    let face = ttf_parser::Face::parse(ttf_data, 0)
34        .map_err(|e| format!("Failed to parse TTF: {:?}", e))?;
35
36    // Always include glyph 0 (.notdef)
37    let mut needed_gids: BTreeSet<u16> = BTreeSet::new();
38    needed_gids.insert(0);
39    for &gid in used_gids {
40        needed_gids.insert(gid);
41    }
42
43    // Resolve composite glyph dependencies
44    let raw_glyf = find_table(ttf_data, b"glyf").ok_or("Missing glyf table")?;
45    let raw_loca = find_table(ttf_data, b"loca").ok_or("Missing loca table")?;
46    let head = find_table(ttf_data, b"head").ok_or("Missing head table")?;
47
48    let num_glyphs = face.number_of_glyphs();
49    let loca_format = read_i16(head, 50); // indexToLocFormat at offset 50
50    let loca_offsets = parse_loca(raw_loca, loca_format, num_glyphs)?;
51
52    // Recursively collect composite glyph component GIDs
53    let initial_gids: Vec<u16> = needed_gids.iter().copied().collect();
54    for gid in initial_gids {
55        collect_composite_deps(raw_glyf, &loca_offsets, gid, &mut needed_gids);
56    }
57
58    // Build remap: old GID → new contiguous GID
59    let mut gid_remap: HashMap<u16, u16> = HashMap::new();
60    for (new_gid, &old_gid) in needed_gids.iter().enumerate() {
61        gid_remap.insert(old_gid, new_gid as u16);
62    }
63
64    let new_num_glyphs = needed_gids.len() as u16;
65
66    // Rebuild glyf table with remapped composite references
67    let (new_glyf, new_loca_offsets) =
68        rebuild_glyf(raw_glyf, &loca_offsets, &needed_gids, &gid_remap);
69
70    // Determine loca format based on glyf size
71    let new_loca_format: i16 = if new_glyf.len() > 0x1FFFE { 1 } else { 0 };
72    let new_loca = build_loca(&new_loca_offsets, new_loca_format);
73
74    // Rebuild hmtx (horizontal metrics)
75    let raw_hmtx = find_table(ttf_data, b"hmtx").ok_or("Missing hmtx table")?;
76    let raw_hhea = find_table(ttf_data, b"hhea").ok_or("Missing hhea table")?;
77    let num_h_metrics = read_u16(raw_hhea, 34) as usize;
78    let new_hmtx = rebuild_hmtx(raw_hmtx, &needed_gids, num_h_metrics);
79
80    // Build minimal cmap (Format 4)
81    // We need the original char→gid mapping — invert through the face
82    let mut char_to_new_gid: Vec<(u16, u16)> = Vec::new();
83    for &old_gid in &needed_gids {
84        if old_gid == 0 {
85            continue;
86        }
87        // Search for Unicode codepoint that maps to this GID
88        // This is O(n) per glyph but subset sizes are small
89        for code in 0u32..=0xFFFF {
90            if let Some(ch) = char::from_u32(code) {
91                if let Some(gid) = face.glyph_index(ch) {
92                    if gid.0 == old_gid {
93                        if let Some(&new_gid) = gid_remap.get(&old_gid) {
94                            char_to_new_gid.push((code as u16, new_gid));
95                        }
96                        break;
97                    }
98                }
99            }
100        }
101    }
102    let new_cmap = build_cmap_format4(&char_to_new_gid);
103
104    // Copy or rebuild remaining required tables
105    let new_head = rebuild_head(head, new_loca_format);
106
107    let raw_hhea_data = raw_hhea.to_vec();
108    let new_hhea = rebuild_hhea(&raw_hhea_data, new_num_glyphs);
109
110    let new_maxp = build_maxp(new_num_glyphs);
111    let new_post = build_post_format3();
112
113    // Copy name table verbatim if present (or build minimal)
114    let new_name = find_table(ttf_data, b"name")
115        .map(|t| t.to_vec())
116        .unwrap_or_else(|| build_minimal_name(&face));
117
118    // Copy OS/2 table verbatim if present
119    let new_os2 = find_table(ttf_data, b"OS/2").map(|t| t.to_vec());
120
121    // Copy hinting tables verbatim if present
122    let cvt_data = find_table(ttf_data, b"cvt ").map(|t| t.to_vec());
123    let fpgm_data = find_table(ttf_data, b"fpgm").map(|t| t.to_vec());
124    let prep_data = find_table(ttf_data, b"prep").map(|t| t.to_vec());
125
126    // Assemble the final TrueType file
127    let mut tables: Vec<(u32, Vec<u8>)> = Vec::new();
128    tables.push((tag_u32(b"cmap"), new_cmap));
129    if let Some(cvt) = cvt_data {
130        tables.push((tag_u32(b"cvt "), cvt));
131    }
132    if let Some(fpgm) = fpgm_data {
133        tables.push((tag_u32(b"fpgm"), fpgm));
134    }
135    tables.push((tag_u32(b"glyf"), new_glyf));
136    tables.push((tag_u32(b"head"), new_head));
137    tables.push((tag_u32(b"hhea"), new_hhea));
138    tables.push((tag_u32(b"hmtx"), new_hmtx));
139    tables.push((tag_u32(b"loca"), new_loca));
140    tables.push((tag_u32(b"maxp"), new_maxp));
141    tables.push((tag_u32(b"name"), new_name));
142    if let Some(os2) = new_os2 {
143        tables.push((tag_u32(b"OS/2"), os2));
144    }
145    tables.push((tag_u32(b"post"), new_post));
146    if let Some(prep) = prep_data {
147        tables.push((tag_u32(b"prep"), prep));
148    }
149
150    // Sort tables by tag (required by TrueType spec for binary search)
151    tables.sort_by_key(|(tag, _)| *tag);
152
153    let output = write_ttf_file(&mut tables);
154
155    Ok(SubsetResult {
156        ttf_data: output,
157        gid_remap,
158    })
159}
160
161// ─── Table Locating ─────────────────────────────────────────────
162
163fn find_table<'a>(data: &'a [u8], tag: &[u8; 4]) -> Option<&'a [u8]> {
164    if data.len() < 12 {
165        return None;
166    }
167    let num_tables = read_u16(data, 4) as usize;
168    for i in 0..num_tables {
169        let offset = 12 + i * 16;
170        if offset + 16 > data.len() {
171            break;
172        }
173        if &data[offset..offset + 4] == tag {
174            let table_offset = read_u32(data, offset + 8) as usize;
175            let table_length = read_u32(data, offset + 12) as usize;
176            if table_offset + table_length <= data.len() {
177                return Some(&data[table_offset..table_offset + table_length]);
178            }
179        }
180    }
181    None
182}
183
184// ─── Loca Table Parsing ─────────────────────────────────────────
185
186fn parse_loca(data: &[u8], format: i16, num_glyphs: u16) -> Result<Vec<u32>, String> {
187    let count = num_glyphs as usize + 1; // loca has numGlyphs + 1 entries
188    let mut offsets = Vec::with_capacity(count);
189
190    if format == 0 {
191        // Short format: offsets are u16, multiply by 2
192        for i in 0..count {
193            let pos = i * 2;
194            if pos + 2 > data.len() {
195                offsets.push(*offsets.last().unwrap_or(&0));
196            } else {
197                offsets.push(read_u16(data, pos) as u32 * 2);
198            }
199        }
200    } else {
201        // Long format: offsets are u32
202        for i in 0..count {
203            let pos = i * 4;
204            if pos + 4 > data.len() {
205                offsets.push(*offsets.last().unwrap_or(&0));
206            } else {
207                offsets.push(read_u32(data, pos));
208            }
209        }
210    }
211
212    Ok(offsets)
213}
214
215// ─── Composite Glyph Dependency Collection ──────────────────────
216
217fn collect_composite_deps(glyf: &[u8], loca_offsets: &[u32], gid: u16, needed: &mut BTreeSet<u16>) {
218    let idx = gid as usize;
219    if idx + 1 >= loca_offsets.len() {
220        return;
221    }
222
223    let start = loca_offsets[idx] as usize;
224    let end = loca_offsets[idx + 1] as usize;
225    if start >= end || start + 10 > glyf.len() {
226        return;
227    }
228
229    let num_contours = read_i16(glyf, start);
230    if num_contours >= 0 {
231        return;
232    } // Simple glyph, no deps
233
234    // Composite glyph — walk component records
235    let mut pos = start + 10; // skip header (numContours + bbox)
236
237    loop {
238        if pos + 4 > glyf.len() {
239            break;
240        }
241        let flags = read_u16(glyf, pos);
242        let component_gid = read_u16(glyf, pos + 2);
243        pos += 4;
244
245        if needed.insert(component_gid) {
246            // Recursively collect deps for newly discovered component
247            collect_composite_deps(glyf, loca_offsets, component_gid, needed);
248        }
249
250        // Determine how many bytes of arguments follow
251        if flags & 0x0001 != 0 {
252            // ARG_1_AND_2_ARE_WORDS: 2 × i16
253            pos += 4;
254        } else {
255            // 2 × i8
256            pos += 2;
257        }
258
259        // Transform matrix components
260        if flags & 0x0008 != 0 {
261            // WE_HAVE_A_SCALE: 1 × F2Dot14
262            pos += 2;
263        } else if flags & 0x0040 != 0 {
264            // WE_HAVE_AN_X_AND_Y_SCALE: 2 × F2Dot14
265            pos += 4;
266        } else if flags & 0x0080 != 0 {
267            // WE_HAVE_A_TWO_BY_TWO: 4 × F2Dot14
268            pos += 8;
269        }
270
271        if flags & 0x0020 == 0 {
272            // MORE_COMPONENTS flag not set — done
273            break;
274        }
275    }
276}
277
278// ─── Table Rebuilding ───────────────────────────────────────────
279
280fn rebuild_glyf(
281    glyf: &[u8],
282    loca_offsets: &[u32],
283    needed_gids: &BTreeSet<u16>,
284    gid_remap: &HashMap<u16, u16>,
285) -> (Vec<u8>, Vec<u32>) {
286    let mut new_glyf: Vec<u8> = Vec::new();
287    let mut new_offsets: Vec<u32> = Vec::new();
288
289    for &old_gid in needed_gids {
290        new_offsets.push(new_glyf.len() as u32);
291
292        let idx = old_gid as usize;
293        if idx + 1 >= loca_offsets.len() {
294            continue;
295        }
296
297        let start = loca_offsets[idx] as usize;
298        let end = loca_offsets[idx + 1] as usize;
299        if start >= end || start >= glyf.len() {
300            // Empty glyph
301            continue;
302        }
303
304        let glyph_data = &glyf[start..end.min(glyf.len())];
305        let mut new_glyph = glyph_data.to_vec();
306
307        // If composite, rewrite component GID references
308        if glyph_data.len() >= 2 {
309            let num_contours = read_i16(glyph_data, 0);
310            if num_contours < 0 {
311                rewrite_composite_gids(&mut new_glyph, gid_remap);
312            }
313        }
314
315        new_glyf.extend_from_slice(&new_glyph);
316
317        // Pad to 4-byte boundary (required for loca to work correctly)
318        while !new_glyf.len().is_multiple_of(4) {
319            new_glyf.push(0);
320        }
321    }
322
323    // Final offset (marks end of last glyph)
324    new_offsets.push(new_glyf.len() as u32);
325
326    (new_glyf, new_offsets)
327}
328
329fn rewrite_composite_gids(glyph_data: &mut [u8], gid_remap: &HashMap<u16, u16>) {
330    let mut pos = 10; // skip header
331
332    loop {
333        if pos + 4 > glyph_data.len() {
334            break;
335        }
336        let flags = read_u16(glyph_data, pos);
337        let old_gid = read_u16(glyph_data, pos + 2);
338
339        // Rewrite the component GID
340        if let Some(&new_gid) = gid_remap.get(&old_gid) {
341            write_u16(glyph_data, pos + 2, new_gid);
342        }
343
344        pos += 4;
345
346        if flags & 0x0001 != 0 {
347            pos += 4;
348        } else {
349            pos += 2;
350        }
351        if flags & 0x0008 != 0 {
352            pos += 2;
353        } else if flags & 0x0040 != 0 {
354            pos += 4;
355        } else if flags & 0x0080 != 0 {
356            pos += 8;
357        }
358
359        if flags & 0x0020 == 0 {
360            break;
361        }
362    }
363}
364
365fn build_loca(offsets: &[u32], format: i16) -> Vec<u8> {
366    let mut data = Vec::new();
367    if format == 0 {
368        for &offset in offsets {
369            let short = (offset / 2) as u16;
370            data.extend_from_slice(&short.to_be_bytes());
371        }
372    } else {
373        for &offset in offsets {
374            data.extend_from_slice(&offset.to_be_bytes());
375        }
376    }
377    data
378}
379
380fn rebuild_hmtx(hmtx: &[u8], needed_gids: &BTreeSet<u16>, num_h_metrics: usize) -> Vec<u8> {
381    let mut data = Vec::new();
382
383    for &old_gid in needed_gids {
384        let idx = old_gid as usize;
385        if idx < num_h_metrics {
386            // Full metric: advance_width (u16) + lsb (i16)
387            let offset = idx * 4;
388            if offset + 4 <= hmtx.len() {
389                data.extend_from_slice(&hmtx[offset..offset + 4]);
390            } else {
391                data.extend_from_slice(&[0, 0, 0, 0]);
392            }
393        } else {
394            // Only lsb — use last advance width + per-glyph lsb
395            let last_aw_offset = (num_h_metrics - 1) * 4;
396            let advance_width = if last_aw_offset + 2 <= hmtx.len() {
397                &hmtx[last_aw_offset..last_aw_offset + 2]
398            } else {
399                &[0, 0]
400            };
401            let lsb_offset = num_h_metrics * 4 + (idx - num_h_metrics) * 2;
402            let lsb = if lsb_offset + 2 <= hmtx.len() {
403                &hmtx[lsb_offset..lsb_offset + 2]
404            } else {
405                &[0, 0]
406            };
407            data.extend_from_slice(advance_width);
408            data.extend_from_slice(lsb);
409        }
410    }
411
412    data
413}
414
415fn build_cmap_format4(char_to_gid: &[(u16, u16)]) -> Vec<u8> {
416    // Build a cmap table with a single Format 4 subtable
417    // Platform 3 (Windows), Encoding 1 (Unicode BMP)
418    let mut sorted = char_to_gid.to_vec();
419    sorted.sort_by_key(|(ch, _)| *ch);
420
421    // Build segments — each segment is a contiguous run of codepoints
422    let mut segments: Vec<(u16, u16, Vec<u16>)> = Vec::new(); // (start, end, gids)
423
424    for &(ch, gid) in &sorted {
425        if let Some(last) = segments.last_mut() {
426            if ch == last.1 + 1 {
427                last.1 = ch;
428                last.2.push(gid);
429                continue;
430            }
431        }
432        segments.push((ch, ch, vec![gid]));
433    }
434
435    // Add sentinel segment (0xFFFF)
436    segments.push((0xFFFF, 0xFFFF, vec![0]));
437
438    let seg_count = segments.len() as u16;
439    let seg_count_x2 = seg_count * 2;
440    // Compute search parameters per TrueType spec
441    let entry_selector = if seg_count > 0 {
442        (seg_count as f64).log2().floor() as u16
443    } else {
444        0
445    };
446    let search_range = (1u16 << entry_selector) * 2;
447    let range_shift = seg_count_x2.saturating_sub(search_range);
448
449    // Use glyphIdArray for all segments (idRangeOffset pointing to array)
450    // Simpler: use idDelta for single-glyph segments, idRangeOffset for others
451    // Simplest approach: use glyphIdArray for everything
452
453    let mut glyph_id_array: Vec<u16> = Vec::new();
454    let mut end_codes: Vec<u16> = Vec::new();
455    let mut start_codes: Vec<u16> = Vec::new();
456    let mut id_deltas: Vec<i16> = Vec::new();
457    let mut id_range_offsets: Vec<u16> = Vec::new();
458
459    for (i, (start, end, gids)) in segments.iter().enumerate() {
460        start_codes.push(*start);
461        end_codes.push(*end);
462
463        if *start == 0xFFFF {
464            // Sentinel
465            id_deltas.push(1);
466            id_range_offsets.push(0);
467        } else if gids.len() == 1 {
468            // Single char — use idDelta
469            let delta = gids[0] as i32 - *start as i32;
470            id_deltas.push(delta as i16);
471            id_range_offsets.push(0);
472        } else {
473            // Range — use idRangeOffset into glyphIdArray
474            id_deltas.push(0);
475            // Offset from current position in idRangeOffset array to glyphIdArray
476            let remaining_offsets = (segments.len() - i) as u16;
477            let offset = (remaining_offsets + glyph_id_array.len() as u16) * 2;
478            id_range_offsets.push(offset);
479            glyph_id_array.extend_from_slice(gids);
480        }
481    }
482
483    // Build the subtable
484    let subtable_len = 14 + seg_count as usize * 8 + glyph_id_array.len() * 2;
485    let mut subtable: Vec<u8> = Vec::new();
486    subtable.extend_from_slice(&4u16.to_be_bytes()); // format
487    subtable.extend_from_slice(&(subtable_len as u16).to_be_bytes()); // length
488    subtable.extend_from_slice(&0u16.to_be_bytes()); // language
489    subtable.extend_from_slice(&seg_count_x2.to_be_bytes());
490    subtable.extend_from_slice(&search_range.to_be_bytes());
491    subtable.extend_from_slice(&entry_selector.to_be_bytes());
492    subtable.extend_from_slice(&range_shift.to_be_bytes());
493
494    for &ec in &end_codes {
495        subtable.extend_from_slice(&ec.to_be_bytes());
496    }
497    subtable.extend_from_slice(&0u16.to_be_bytes()); // reservedPad
498
499    for &sc in &start_codes {
500        subtable.extend_from_slice(&sc.to_be_bytes());
501    }
502    for &d in &id_deltas {
503        subtable.extend_from_slice(&d.to_be_bytes());
504    }
505    for &r in &id_range_offsets {
506        subtable.extend_from_slice(&r.to_be_bytes());
507    }
508    for &g in &glyph_id_array {
509        subtable.extend_from_slice(&g.to_be_bytes());
510    }
511
512    // Build cmap header
513    let mut cmap: Vec<u8> = Vec::new();
514    cmap.extend_from_slice(&0u16.to_be_bytes()); // version
515    cmap.extend_from_slice(&1u16.to_be_bytes()); // numTables
516                                                 // Encoding record: platform 3 (Windows), encoding 1 (Unicode BMP)
517    cmap.extend_from_slice(&3u16.to_be_bytes()); // platformID
518    cmap.extend_from_slice(&1u16.to_be_bytes()); // encodingID
519    cmap.extend_from_slice(&12u32.to_be_bytes()); // offset to subtable
520    cmap.extend_from_slice(&subtable);
521
522    cmap
523}
524
525fn rebuild_head(head: &[u8], new_loca_format: i16) -> Vec<u8> {
526    let mut new_head = head.to_vec();
527    // Zero out checkSumAdjustment (offset 8, 4 bytes) — will be fixed later
528    write_u32(&mut new_head, 8, 0);
529    // Update indexToLocFormat (offset 50)
530    write_i16(&mut new_head, 50, new_loca_format);
531    new_head
532}
533
534fn rebuild_hhea(hhea: &[u8], new_num_glyphs: u16) -> Vec<u8> {
535    let mut new_hhea = hhea.to_vec();
536    // Pad to 36 bytes if needed (minimum hhea size)
537    while new_hhea.len() < 36 {
538        new_hhea.push(0);
539    }
540    // Update numberOfHMetrics (offset 34) — all glyphs get full metrics
541    write_u16(&mut new_hhea, 34, new_num_glyphs);
542    new_hhea
543}
544
545fn build_maxp(num_glyphs: u16) -> Vec<u8> {
546    let mut data = vec![0u8; 32];
547    // Version 1.0
548    write_u32(&mut data, 0, 0x00010000);
549    // numGlyphs
550    write_u16(&mut data, 4, num_glyphs);
551    // Fill remaining fields with reasonable defaults
552    write_u16(&mut data, 6, 256); // maxPoints
553    write_u16(&mut data, 8, 64); // maxContours
554    write_u16(&mut data, 10, 256); // maxCompositePoints
555    write_u16(&mut data, 12, 64); // maxCompositeContours
556    write_u16(&mut data, 14, 1); // maxZones
557    write_u16(&mut data, 16, 0); // maxTwilightPoints
558    write_u16(&mut data, 18, 64); // maxStorage
559    write_u16(&mut data, 20, 64); // maxFunctionDefs
560    write_u16(&mut data, 22, 64); // maxInstructionDefs
561    write_u16(&mut data, 24, 64); // maxStackElements
562    write_u16(&mut data, 26, 0); // maxSizeOfInstructions
563    write_u16(&mut data, 28, 64); // maxComponentElements
564    write_u16(&mut data, 30, 2); // maxComponentDepth
565    data
566}
567
568fn build_post_format3() -> Vec<u8> {
569    // Format 3.0 — no glyph names (smallest possible)
570    let mut data = vec![0u8; 32];
571    write_u32(&mut data, 0, 0x00030000); // version 3.0
572                                         // italicAngle, underlinePosition, underlineThickness, isFixedPitch — all 0
573    data
574}
575
576fn build_minimal_name(face: &ttf_parser::Face) -> Vec<u8> {
577    // Build a minimal name table with just the font family name
578    let family = face
579        .names()
580        .into_iter()
581        .find(|n| n.name_id == ttf_parser::name_id::FULL_NAME)
582        .and_then(|n| n.to_string())
583        .unwrap_or_else(|| "SubsetFont".to_string());
584
585    let name_bytes: Vec<u8> = family
586        .encode_utf16()
587        .flat_map(|c| c.to_be_bytes())
588        .collect();
589
590    let mut data = Vec::new();
591    // Name table header
592    data.extend_from_slice(&0u16.to_be_bytes()); // format
593    data.extend_from_slice(&1u16.to_be_bytes()); // count
594    let string_offset = 6 + 12; // header (6) + 1 record (12)
595    data.extend_from_slice(&(string_offset as u16).to_be_bytes()); // stringOffset
596
597    // Name record: platformID=3, encodingID=1, languageID=0x0409, nameID=4 (fullName)
598    data.extend_from_slice(&3u16.to_be_bytes()); // platformID
599    data.extend_from_slice(&1u16.to_be_bytes()); // encodingID
600    data.extend_from_slice(&0x0409u16.to_be_bytes()); // languageID
601    data.extend_from_slice(&4u16.to_be_bytes()); // nameID (full name)
602    data.extend_from_slice(&(name_bytes.len() as u16).to_be_bytes()); // length
603    data.extend_from_slice(&0u16.to_be_bytes()); // offset
604
605    // String data
606    data.extend_from_slice(&name_bytes);
607
608    data
609}
610
611// ─── TrueType File Writer ───────────────────────────────────────
612
613fn write_ttf_file(tables: &mut [(u32, Vec<u8>)]) -> Vec<u8> {
614    let num_tables = tables.len() as u16;
615    let entry_selector = if num_tables > 0 {
616        (num_tables as f64).log2().floor() as u16
617    } else {
618        0
619    };
620    let search_range = (1u16 << entry_selector) * 16;
621    let range_shift = (num_tables * 16).saturating_sub(search_range);
622
623    // Offset table (12 bytes)
624    let mut output: Vec<u8> = Vec::new();
625    output.extend_from_slice(&0x00010000u32.to_be_bytes()); // sfVersion (TrueType)
626    output.extend_from_slice(&num_tables.to_be_bytes());
627    output.extend_from_slice(&search_range.to_be_bytes());
628    output.extend_from_slice(&entry_selector.to_be_bytes());
629    output.extend_from_slice(&range_shift.to_be_bytes());
630
631    // Calculate table offsets
632    let dir_size = 12 + num_tables as usize * 16;
633    let mut table_offset = dir_size;
634
635    // Pad each table to 4-byte boundary
636    for (_, data) in tables.iter_mut() {
637        while data.len() % 4 != 0 {
638            data.push(0);
639        }
640    }
641
642    // Table directory
643    for (tag, data) in tables.iter() {
644        output.extend_from_slice(&tag.to_be_bytes());
645        let checksum = calc_table_checksum(data);
646        output.extend_from_slice(&checksum.to_be_bytes());
647        output.extend_from_slice(&(table_offset as u32).to_be_bytes());
648        output.extend_from_slice(&(data.len() as u32).to_be_bytes());
649        table_offset += data.len();
650    }
651
652    // Table data
653    for (_, data) in tables.iter() {
654        output.extend_from_slice(data);
655    }
656
657    // Fix head checkSumAdjustment
658    fix_head_checksum(&mut output, tables);
659
660    output
661}
662
663fn calc_table_checksum(data: &[u8]) -> u32 {
664    let mut sum: u32 = 0;
665    let mut i = 0;
666    while i + 4 <= data.len() {
667        sum = sum.wrapping_add(read_u32(data, i));
668        i += 4;
669    }
670    // Handle remaining bytes
671    if i < data.len() {
672        let mut last = [0u8; 4];
673        for (j, &b) in data[i..].iter().enumerate() {
674            last[j] = b;
675        }
676        sum = sum.wrapping_add(u32::from_be_bytes(last));
677    }
678    sum
679}
680
681fn fix_head_checksum(output: &mut [u8], tables: &[(u32, Vec<u8>)]) {
682    // Find the head table offset in the directory
683    let num_tables = read_u16(output, 4) as usize;
684    let head_tag = tag_u32(b"head");
685
686    for i in 0..num_tables {
687        let dir_offset = 12 + i * 16;
688        let tag = read_u32(output, dir_offset);
689        if tag == head_tag {
690            let table_offset = read_u32(output, dir_offset + 8) as usize;
691
692            // Calculate file checksum
693            let file_checksum = calc_table_checksum(output);
694            let adjustment = 0xB1B0AFBAu32.wrapping_sub(file_checksum);
695
696            // Write checkSumAdjustment at head table offset + 8
697            if table_offset + 12 <= output.len() {
698                write_u32(output, table_offset + 8, adjustment);
699            }
700
701            // Update the head table checksum in the directory
702            // First, find the head table data to recalculate
703            let head_data_len = read_u32(output, dir_offset + 12) as usize;
704            if table_offset + head_data_len <= output.len() {
705                let checksum =
706                    calc_table_checksum(&output[table_offset..table_offset + head_data_len]);
707                write_u32(output, dir_offset + 4, checksum);
708            }
709
710            break;
711        }
712    }
713
714    // Suppress unused variable warning
715    let _ = tables;
716}
717
718// ─── Byte Helpers ───────────────────────────────────────────────
719
720fn read_u16(data: &[u8], offset: usize) -> u16 {
721    u16::from_be_bytes([data[offset], data[offset + 1]])
722}
723
724fn read_i16(data: &[u8], offset: usize) -> i16 {
725    i16::from_be_bytes([data[offset], data[offset + 1]])
726}
727
728fn read_u32(data: &[u8], offset: usize) -> u32 {
729    u32::from_be_bytes([
730        data[offset],
731        data[offset + 1],
732        data[offset + 2],
733        data[offset + 3],
734    ])
735}
736
737fn write_u16(data: &mut [u8], offset: usize, val: u16) {
738    let bytes = val.to_be_bytes();
739    data[offset] = bytes[0];
740    data[offset + 1] = bytes[1];
741}
742
743fn write_i16(data: &mut [u8], offset: usize, val: i16) {
744    let bytes = val.to_be_bytes();
745    data[offset] = bytes[0];
746    data[offset + 1] = bytes[1];
747}
748
749fn write_u32(data: &mut [u8], offset: usize, val: u32) {
750    let bytes = val.to_be_bytes();
751    data[offset] = bytes[0];
752    data[offset + 1] = bytes[1];
753    data[offset + 2] = bytes[2];
754    data[offset + 3] = bytes[3];
755}
756
757fn tag_u32(tag: &[u8; 4]) -> u32 {
758    u32::from_be_bytes(*tag)
759}
760
761// ─── Tests ──────────────────────────────────────────────────────
762
763#[cfg(test)]
764mod tests {
765    use super::*;
766
767    #[test]
768    fn test_tag_u32() {
769        assert_eq!(tag_u32(b"glyf"), 0x676C7966);
770        assert_eq!(tag_u32(b"head"), 0x68656164);
771    }
772
773    #[test]
774    fn test_calc_table_checksum() {
775        // Known checksum for "ABCD" (0x41424344)
776        let data = b"ABCD";
777        assert_eq!(calc_table_checksum(data), 0x41424344);
778    }
779
780    #[test]
781    fn test_build_post_format3() {
782        let data = build_post_format3();
783        assert_eq!(data.len(), 32);
784        assert_eq!(read_u32(&data, 0), 0x00030000);
785    }
786
787    #[test]
788    fn test_build_maxp() {
789        let data = build_maxp(42);
790        assert_eq!(read_u32(&data, 0), 0x00010000);
791        assert_eq!(read_u16(&data, 4), 42);
792    }
793
794    #[test]
795    fn test_build_loca_short() {
796        let offsets = vec![0, 100, 200, 300];
797        let data = build_loca(&offsets, 0);
798        // Short format: each offset / 2 stored as u16
799        assert_eq!(data.len(), 8); // 4 entries × 2 bytes
800        assert_eq!(read_u16(&data, 0), 0);
801        assert_eq!(read_u16(&data, 2), 50);
802        assert_eq!(read_u16(&data, 4), 100);
803        assert_eq!(read_u16(&data, 6), 150);
804    }
805
806    #[test]
807    fn test_build_loca_long() {
808        let offsets = vec![0, 100, 200, 300];
809        let data = build_loca(&offsets, 1);
810        assert_eq!(data.len(), 16); // 4 entries × 4 bytes
811        assert_eq!(read_u32(&data, 0), 0);
812        assert_eq!(read_u32(&data, 4), 100);
813        assert_eq!(read_u32(&data, 8), 200);
814        assert_eq!(read_u32(&data, 12), 300);
815    }
816
817    #[test]
818    fn test_cmap_format4_single_char() {
819        let entries = vec![(65u16, 1u16)]; // 'A' → gid 1
820        let cmap = build_cmap_format4(&entries);
821
822        // Should be a valid cmap table
823        assert_eq!(read_u16(&cmap, 0), 0); // version
824        assert_eq!(read_u16(&cmap, 2), 1); // numTables
825
826        // Encoding record
827        assert_eq!(read_u16(&cmap, 4), 3); // platformID = Windows
828        assert_eq!(read_u16(&cmap, 6), 1); // encodingID = Unicode BMP
829
830        // Subtable format should be 4
831        let subtable_offset = read_u32(&cmap, 8) as usize;
832        assert_eq!(read_u16(&cmap, subtable_offset), 4);
833    }
834}