harumi 1.5.3

Pure-Rust PDF — CJK font embedding (Chinese/Japanese/Korean), OCR text overlay, text extraction, HTML→PDF, page merge/split. WASM-ready, zero C deps.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
//! Internal pure-Rust TTF font subsetter.
//!
//! Replaces the `subsetter` crate to minimize dependencies.
//! Handles TrueType (glyf-based) fonts; CFF fonts are rejected as unsupported.

use std::collections::{BTreeMap, BTreeSet};
use ttf_parser::Face;

/// Tracks which glyphs (by original GID) should be preserved in the subset.
/// Provides GID remapping: original GID → new GID (0..N after subsetting).
pub(super) struct GlyphRemapper {
    orig_gids: BTreeSet<u16>,
}

impl GlyphRemapper {
    /// Create a new empty remapper.
    pub fn new() -> Self {
        GlyphRemapper {
            orig_gids: BTreeSet::new(),
        }
    }

    /// Mark a glyph (by original GID) to be preserved in the subset.
    pub fn remap(&mut self, gid: u16) {
        self.orig_gids.insert(gid);
    }

    /// Get sorted list of original GIDs to preserve (for internal use).
    fn sorted_gids(&self) -> Vec<u16> {
        self.orig_gids.iter().copied().collect()
    }
}

/// Subset a TrueType font to include only the specified glyphs.
///
/// Returns the subsetted font bytes and the final sorted set of original GIDs
/// that were included (which may be larger than the remapper's set when
/// composite glyphs pull in additional component GIDs).
///
/// # Errors
/// Returns an error if the font is malformed, CFF-based, or subsetting fails.
pub(super) fn subset(
    data: &[u8],
    face_index: u32,
    remapper: &GlyphRemapper,
) -> Result<(Vec<u8>, BTreeSet<u16>), Box<dyn std::error::Error>> {
    let face = Face::parse(data, face_index)?;

    // For TTC (TrueType Collection), find the font data at the specified face_index.
    let is_ttc = data.len() >= 4 && &data[0..4] == b"ttcf";
    let font_data_start = if is_ttc {
        // TTC format: read face offset from TTC header.
        if data.len() < 12 + (face_index as usize + 1) * 4 {
            return Err("TTC header truncated".into());
        }
        u32::from_be_bytes([
            data[12 + face_index as usize * 4],
            data[12 + face_index as usize * 4 + 1],
            data[12 + face_index as usize * 4 + 2],
            data[12 + face_index as usize * 4 + 3],
        ]) as usize
    } else {
        0
    };

    // Parse the font structure using the font data offset.
    // For TTC: font_data is the face data, table offsets are absolute from TTC file start.
    // For TTF: font_data is the same as data, table offsets are from file start.
    let font_data = &data[font_data_start..];
    let offset_table = parse_offset_table(font_data)?;

    // Parse table records with raw offsets (not yet validated as slices).
    let table_recs_raw = parse_table_records_raw(font_data, &offset_table)?;

    // For TTC, offsets in the table directory are absolute from the TTC file start (offset 0 of full data).
    // When we read from font_data, the offsets are still absolute (they're just numbers in the directory).
    // For TTF, font_data_start == 0, so offsets are naturally correct.
    let table_records: Vec<(String, &[u8])> = {
        let mut result = Vec::new();
        for (tag, raw_offset, length) in table_recs_raw {
            // raw_offset is always absolute from the full data start.
            if raw_offset + length <= data.len() {
                result.push((tag, &data[raw_offset..raw_offset + length]));
            }
        }
        result
    };

    // Get required tables from the records.
    let mut tables = std::collections::HashMap::new();
    for (tag, slice) in table_records.iter() {
        tables.insert(tag.as_str(), *slice);
    }

    let head = *tables.get("head").ok_or("required table head not found")?;
    let hhea = *tables.get("hhea").ok_or("required table hhea not found")?;
    let maxp = *tables.get("maxp").ok_or("required table maxp not found")?;
    let glyf = *tables.get("glyf").ok_or("required table glyf not found")?;
    let loca = *tables.get("loca").ok_or("required table loca not found")?;
    let hmtx = *tables.get("hmtx").ok_or("required table hmtx not found")?;

    // Read head to determine loca format.
    let index_to_loc_format = if head.len() >= 52 {
        u16::from_be_bytes([head[50], head[51]]) & 0x0001
    } else {
        return Err("head table too short".into());
    };

    // Read number of metrics from hhea. numberOfHMetrics is at bytes [34..36].
    let num_h_metrics = if hhea.len() >= 36 {
        u16::from_be_bytes([hhea[34], hhea[35]]) as usize
    } else {
        return Err("hhea table too short".into());
    };

    // Read numGlyphs from maxp. numGlyphs is at bytes [4..6].
    let num_glyphs = if maxp.len() >= 6 {
        u16::from_be_bytes([maxp[4], maxp[5]]) as usize
    } else {
        return Err("maxp table too short".into());
    };

    // Parse loca to get glyph offsets.
    let glyph_offsets = parse_loca(loca, num_glyphs, index_to_loc_format as u8)?;

    // Collect all glyphs to preserve (including composite dependencies).
    let sorted_gids = remapper.sorted_gids();
    let mut gids_to_keep = BTreeSet::new();
    for &gid in &sorted_gids {
        gids_to_keep.insert(gid);
        collect_composite_deps(glyf, &glyph_offsets, gid as usize, &mut gids_to_keep)?;
    }

    // Build new glyf and loca tables.
    // Also rewrite composite glyph component GIDs to their new positions.
    let gid_remap: BTreeMap<u16, u16> = gids_to_keep
        .iter()
        .enumerate()
        .map(|(new_idx, &orig_gid)| (orig_gid, new_idx as u16))
        .collect();
    let (new_glyf, new_glyph_offsets) = build_glyf(&glyph_offsets, glyf, &gids_to_keep, &gid_remap)?;

    // Determine loca format. After 4-byte glyph alignment the total glyf size
    // might (rarely) exceed the short-format limit of 0xFFFF * 2 = 131070 bytes.
    let max_glyph_offset = new_glyph_offsets.last().copied().unwrap_or(0);
    let loca_format: u8 = if max_glyph_offset > 131070 { 1 } else { index_to_loc_format as u8 };
    let new_loca = build_loca(&new_glyph_offsets, loca_format)?;

    // Build new hmtx table.
    let new_hmtx = build_hmtx(
        hmtx,
        num_h_metrics,
        face.units_per_em() as usize,
        &gids_to_keep,
    )?;

    // Patch head, hhea, maxp tables with new metrics.
    let mut new_head = head.to_vec();
    // Zero out checkSumAdjustment (bytes 8-11 of head) so assemble_font can
    // compute and write the correct value for the final subset binary.
    // The original font's value would otherwise corrupt the full-font checksum.
    if new_head.len() >= 12 {
        new_head[8..12].copy_from_slice(&[0u8; 4]);
    }
    let mut new_hhea = hhea.to_vec();
    let mut new_maxp = maxp.to_vec();

    // Update head.indexToLocFormat if we had to upgrade to long format.
    if loca_format as u16 != index_to_loc_format && new_head.len() >= 52 {
        new_head[50..52].copy_from_slice(&(loca_format as u16).to_be_bytes());
    }

    // Update hhea.numberOfHMetrics.
    // All subset glyphs are written as full 4-byte longHorMetric entries in
    // build_hmtx, so numberOfHMetrics must equal the subset glyph count.
    let num_new_metrics = gids_to_keep.len();
    if new_hhea.len() >= 36 {
        new_hhea[34..36].copy_from_slice(&(num_new_metrics as u16).to_be_bytes());
    }

    // Update hhea.advanceWidthMax and hhea.minLeftSideBearing from rebuilt hmtx.
    if new_hhea.len() >= 14 && !new_hmtx.is_empty() {
        let adv_max = new_hmtx.chunks(4)
            .map(|c| u16::from_be_bytes([c[0], c[1]]))
            .max()
            .unwrap_or(0);
        new_hhea[10..12].copy_from_slice(&adv_max.to_be_bytes());
        let lsb_min = new_hmtx.chunks(4)
            .map(|c| i16::from_be_bytes([c[2], c[3]]))
            .min()
            .unwrap_or(0);
        new_hhea[12..14].copy_from_slice(&lsb_min.to_be_bytes());
    }

    // Compute hhea.minRightSideBearing, hhea.xMaxExtent, and head.fontBBox
    // from the subset glyph bounding boxes in the new glyf table.
    {
        let mut g_x_min = i16::MAX;
        let mut g_y_min = i16::MAX;
        let mut g_x_max = i16::MIN;
        let mut g_y_max = i16::MIN;
        let mut min_rsb = i16::MAX;
        let mut max_extent: i16 = i16::MIN;
        let mut has_outlines = false;

        for new_idx in 0..gids_to_keep.len() {
            let start = new_glyph_offsets[new_idx] as usize;
            let end = new_glyph_offsets[new_idx + 1] as usize;
            if end < start + 10 || end > new_glyf.len() { continue; }

            let hdr = &new_glyf[start..end];
            let x_min = i16::from_be_bytes([hdr[2], hdr[3]]);
            let y_min = i16::from_be_bytes([hdr[4], hdr[5]]);
            let x_max = i16::from_be_bytes([hdr[6], hdr[7]]);
            let y_max = i16::from_be_bytes([hdr[8], hdr[9]]);
            let adv = if new_idx * 4 + 2 <= new_hmtx.len() {
                u16::from_be_bytes([new_hmtx[new_idx * 4], new_hmtx[new_idx * 4 + 1]]) as i16
            } else { 0 };
            let lsb = if new_idx * 4 + 4 <= new_hmtx.len() {
                i16::from_be_bytes([new_hmtx[new_idx * 4 + 2], new_hmtx[new_idx * 4 + 3]])
            } else { 0 };

            g_x_min = g_x_min.min(x_min);
            g_y_min = g_y_min.min(y_min);
            g_x_max = g_x_max.max(x_max);
            g_y_max = g_y_max.max(y_max);
            min_rsb = min_rsb.min(adv - lsb - (x_max - x_min));
            max_extent = max_extent.max(lsb + (x_max - x_min));
            has_outlines = true;
        }

        if has_outlines {
            if new_hhea.len() >= 18 {
                new_hhea[14..16].copy_from_slice(&min_rsb.to_be_bytes());
                new_hhea[16..18].copy_from_slice(&max_extent.to_be_bytes());
            }
            if new_head.len() >= 44 {
                new_head[36..38].copy_from_slice(&g_x_min.to_be_bytes());
                new_head[38..40].copy_from_slice(&g_y_min.to_be_bytes());
                new_head[40..42].copy_from_slice(&g_x_max.to_be_bytes());
                new_head[42..44].copy_from_slice(&g_y_max.to_be_bytes());
            }
        }
    }

    // Update maxp.numGlyphs.
    if new_maxp.len() >= 6 {
        let bytes = (gids_to_keep.len() as u16).to_be_bytes();
        new_maxp[4..6].copy_from_slice(&bytes);
    }

    // Collect other tables to preserve (copy as-is).
    let mut output_tables: BTreeMap<String, Vec<u8>> = BTreeMap::new();
    output_tables.insert("head".into(), new_head);
    output_tables.insert("hhea".into(), new_hhea);
    output_tables.insert("maxp".into(), new_maxp);
    output_tables.insert("glyf".into(), new_glyf);
    output_tables.insert("loca".into(), new_loca);
    output_tables.insert("hmtx".into(), new_hmtx);

    // For PDF CIDFont embedding with Identity-H, only core TrueType tables and
    // hinting tables are safe to include. All other tables contain GID references
    // (GSUB, GPOS, gvar, etc.) or glyph counts (post, vhea/vmtx) that become
    // inconsistent after subsetting, causing Core Text / PDF viewers to reject
    // the embedded font and render all glyphs as replacement characters (●).
    for (tag, data_slice) in table_records.iter() {
        // Skip tables already rebuilt above.
        if matches!(tag.as_str(), "head" | "hhea" | "maxp" | "glyf" | "loca" | "hmtx") {
            continue;
        }
        // Include only hinting tables: safe (no GID references).
        if matches!(tag.as_str(), "fpgm" | "prep" | "cvt " | "gasp") {
            output_tables.insert(tag.clone(), data_slice.to_vec());
        }
        // Everything else is dropped:
        // - GSUB/GPOS/GDEF/BASE: OpenType layout with stale GID refs after subsetting
        // - gvar/fvar/avar/HVAR/STAT: variable-font tables with GID-indexed data
        // - post: numGlyphs field mismatches maxp.numGlyphs after subsetting
        // - vhea/vmtx: vertical metrics indexed by GID, not rebuilt for subset
        // - cmap/OS/2/VORG: PDF Identity-H handles encoding; not needed
        // - name/kern/morx/mort/JSTF/...: not used for PDF CIDFont rendering
    }

    // Assemble the new font binary.
    let font_bytes = assemble_font(&output_tables, offset_table.is_truetype)?;
    Ok((font_bytes, gids_to_keep))
}

// ──────────────────────────────────────────────────────────────────────────

struct OffsetTable {
    is_truetype: bool,
    num_tables: usize,
}

fn parse_offset_table(data: &[u8]) -> Result<OffsetTable, Box<dyn std::error::Error>> {
    if data.len() < 12 {
        return Err("font too short for offset table".into());
    }

    let scaler = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
    let is_truetype = scaler == 0x00010000 || scaler == 0x74727565; // 0x00010000 or 'true'

    let num_tables = u16::from_be_bytes([data[4], data[5]]) as usize;
    Ok(OffsetTable {
        is_truetype,
        num_tables,
    })
}

#[allow(clippy::type_complexity)]
fn parse_table_records_raw(
    data: &[u8],
    offset_table: &OffsetTable,
) -> Result<Vec<(String, usize, usize)>, Box<dyn std::error::Error>> {
    let mut records = Vec::new();
    for i in 0..offset_table.num_tables {
        let base = 12 + i * 16;
        if base + 16 > data.len() {
            return Err("table directory truncated".into());
        }
        let tag = String::from_utf8_lossy(&data[base..base + 4]).to_string();
        let offset = u32::from_be_bytes([
            data[base + 8],
            data[base + 9],
            data[base + 10],
            data[base + 11],
        ]) as usize;
        let length = u32::from_be_bytes([
            data[base + 12],
            data[base + 13],
            data[base + 14],
            data[base + 15],
        ]) as usize;
        records.push((tag, offset, length));
    }
    Ok(records)
}

// ──────────────────────────────────────────────────────────────────────────

fn parse_loca(
    loca: &[u8],
    num_glyphs: usize,
    format: u8,
) -> Result<Vec<u32>, Box<dyn std::error::Error>> {
    let mut offsets = Vec::new();
    if format == 0 {
        // Short format: offsets are u16 × 2
        for i in 0..=num_glyphs {
            if i * 2 + 2 > loca.len() {
                return Err("loca table truncated".into());
            }
            let offset = u16::from_be_bytes([loca[i * 2], loca[i * 2 + 1]]) as u32 * 2;
            offsets.push(offset);
        }
    } else {
        // Long format: offsets are u32
        for i in 0..=num_glyphs {
            if i * 4 + 4 > loca.len() {
                return Err("loca table truncated".into());
            }
            let offset = u32::from_be_bytes([
                loca[i * 4],
                loca[i * 4 + 1],
                loca[i * 4 + 2],
                loca[i * 4 + 3],
            ]);
            offsets.push(offset);
        }
    }
    Ok(offsets)
}

// ──────────────────────────────────────────────────────────────────────────

fn collect_composite_deps(
    glyf: &[u8],
    glyph_offsets: &[u32],
    gid: usize,
    visited: &mut BTreeSet<u16>,
) -> Result<(), Box<dyn std::error::Error>> {
    if gid >= glyph_offsets.len() - 1 {
        return Ok(());
    }

    let start = glyph_offsets[gid] as usize;
    let end = glyph_offsets[gid + 1] as usize;
    if start >= glyf.len() || end > glyf.len() {
        return Ok(()); // Invalid or empty glyph
    }

    let glyph_data = &glyf[start..end];
    if glyph_data.len() < 2 {
        return Ok(());
    }

    let num_contours = i16::from_be_bytes([glyph_data[0], glyph_data[1]]);
    if num_contours >= 0 {
        return Ok(()); // Simple glyph, no components
    }

    // Composite glyph: parse component records.
    let mut offset = 10; // Skip header (xMin, yMin, xMax, yMax)
    while offset < glyph_data.len() {
        if offset + 4 > glyph_data.len() {
            break;
        }

        let flags = u16::from_be_bytes([glyph_data[offset], glyph_data[offset + 1]]);
        let comp_gid = u16::from_be_bytes([glyph_data[offset + 2], glyph_data[offset + 3]]);

        if visited.insert(comp_gid) {
            collect_composite_deps(glyf, glyph_offsets, comp_gid as usize, visited)?;
        }

        offset += 4;

        // Skip arg bytes.
        if (flags & 0x0001) != 0 {
            offset += 4; // Two i16
        } else {
            offset += 2; // Two i8
        }

        // Skip scale/matrix.
        if (flags & 0x0008) != 0 {
            offset += 2; // F2Dot14
        } else if (flags & 0x0040) != 0 {
            offset += 4; // Two F2Dot14
        } else if (flags & 0x0080) != 0 {
            offset += 8; // 2x2 matrix
        }

        if (flags & 0x0020) == 0 {
            break; // No more components
        }
    }

    Ok(())
}

// ──────────────────────────────────────────────────────────────────────────

fn build_glyf(
    glyph_offsets: &[u32],
    glyf: &[u8],
    gids_to_keep: &BTreeSet<u16>,
    gid_remap: &BTreeMap<u16, u16>,
) -> Result<(Vec<u8>, Vec<u32>), Box<dyn std::error::Error>> {
    let mut new_glyf = Vec::new();
    let mut new_offsets = vec![0u32];

    for gid in gids_to_keep.iter() {
        let gid_usize = *gid as usize;
        if gid_usize + 1 >= glyph_offsets.len() {
            return Err("glyph index out of range".into());
        }
        let start = glyph_offsets[gid_usize] as usize;
        let end = glyph_offsets[gid_usize + 1] as usize;
        if start > glyf.len() || end > glyf.len() {
            return Err("glyph offset out of bounds".into());
        }
        let glyph_data = &glyf[start..end];
        if glyph_data.len() >= 2 {
            let num_contours = i16::from_be_bytes([glyph_data[0], glyph_data[1]]);
            if num_contours < 0 {
                // Composite glyph: copy with component GIDs rewritten to new positions.
                new_glyf.extend_from_slice(rewrite_composite_gids(glyph_data, gid_remap).as_slice());
            } else {
                new_glyf.extend_from_slice(glyph_data);
            }
        } else {
            new_glyf.extend_from_slice(glyph_data);
        }
        // Align next glyph to 4-byte boundary (TrueType spec requirement).
        let padding = (4 - new_glyf.len() % 4) % 4;
        new_glyf.resize(new_glyf.len() + padding, 0u8);
        new_offsets.push(new_glyf.len() as u32);
    }

    Ok((new_glyf, new_offsets))
}

/// Rewrites composite glyph component GIDs from original to new positions.
fn rewrite_composite_gids(glyph_data: &[u8], gid_remap: &BTreeMap<u16, u16>) -> Vec<u8> {
    let mut out = glyph_data.to_vec();
    let mut offset = 10; // Skip glyph header (numberOfContours + bounding box = 10 bytes)

    while offset < out.len() {
        if offset + 4 > out.len() {
            break;
        }
        let flags = u16::from_be_bytes([out[offset], out[offset + 1]]);
        let orig_comp_gid = u16::from_be_bytes([out[offset + 2], out[offset + 3]]);

        // Rewrite the component GID to its new position.
        if let Some(&new_gid) = gid_remap.get(&orig_comp_gid) {
            let bytes = new_gid.to_be_bytes();
            out[offset + 2] = bytes[0];
            out[offset + 3] = bytes[1];
        }

        offset += 4;

        // Advance past argument bytes (same logic as collect_composite_deps).
        if (flags & 0x0001) != 0 {
            offset += 4; // Two i16
        } else {
            offset += 2; // Two i8
        }

        // Advance past scale/matrix.
        if (flags & 0x0008) != 0 {
            offset += 2; // F2Dot14
        } else if (flags & 0x0040) != 0 {
            offset += 4; // Two F2Dot14
        } else if (flags & 0x0080) != 0 {
            offset += 8; // 2x2 matrix
        }

        if (flags & 0x0020) == 0 {
            break; // No more components
        }
    }

    out
}

fn build_loca(glyph_offsets: &[u32], format: u8) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
    let mut loca = Vec::new();
    if format == 0 {
        for &offset in glyph_offsets {
            let half = (offset / 2) as u16;
            loca.extend_from_slice(&half.to_be_bytes());
        }
    } else {
        for &offset in glyph_offsets {
            loca.extend_from_slice(&offset.to_be_bytes());
        }
    }
    Ok(loca)
}

fn build_hmtx(
    hmtx: &[u8],
    num_h_metrics: usize,
    units_per_em: usize,
    gids_to_keep: &BTreeSet<u16>,
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
    let mut new_hmtx = Vec::new();
    // Offset of the last longHorMetric entry's advance_width field.
    // Glyphs in the lsb-only section share this advance width.
    let last_adv_off = num_h_metrics.saturating_sub(1) * 4;

    for &gid in gids_to_keep.iter() {
        let g = gid as usize;
        if g < num_h_metrics {
            // Full longHorMetric entry: 4 bytes (advance_width + lsb).
            let off = g * 4;
            if off + 4 <= hmtx.len() {
                new_hmtx.extend_from_slice(&hmtx[off..off + 4]);
            } else {
                new_hmtx.extend_from_slice(&(units_per_em as u16).to_be_bytes());
                new_hmtx.extend_from_slice(&0i16.to_be_bytes());
            }
        } else {
            // "Mono" glyph: advance_width = last longHorMetric's advance.
            if last_adv_off + 2 <= hmtx.len() {
                new_hmtx.extend_from_slice(&hmtx[last_adv_off..last_adv_off + 2]);
            } else {
                new_hmtx.extend_from_slice(&(units_per_em as u16).to_be_bytes());
            }
            // lsb from the lsb-only section.
            let lsb_off = num_h_metrics * 4 + (g - num_h_metrics) * 2;
            if lsb_off + 2 <= hmtx.len() {
                new_hmtx.extend_from_slice(&hmtx[lsb_off..lsb_off + 2]);
            } else {
                new_hmtx.extend_from_slice(&0i16.to_be_bytes());
            }
        }
    }

    Ok(new_hmtx)
}

// ──────────────────────────────────────────────────────────────────────────

/// Computes the OpenType/TrueType table checksum: sum of all 32-bit big-endian words,
/// with any trailing bytes zero-padded to form the last word.
fn calc_checksum(data: &[u8]) -> u32 {
    let mut sum = 0u32;
    let mut i = 0;
    while i + 4 <= data.len() {
        sum = sum.wrapping_add(u32::from_be_bytes([data[i], data[i+1], data[i+2], data[i+3]]));
        i += 4;
    }
    if i < data.len() {
        let mut buf = [0u8; 4];
        buf[..data.len() - i].copy_from_slice(&data[i..]);
        sum = sum.wrapping_add(u32::from_be_bytes(buf));
    }
    sum
}

fn assemble_font(
    tables: &BTreeMap<String, Vec<u8>>,
    is_truetype: bool,
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
    let num_tables = tables.len();
    // TTF offset table fields are all u16 (2 bytes each).
    let search_range: u16 = (1u16 << (15 - (num_tables as u16).leading_zeros())) * 16;
    let entry_selector: u16 = 15 - (num_tables as u16).leading_zeros() as u16;
    let range_shift: u16 = num_tables as u16 * 16 - search_range;

    let mut font = Vec::new();

    // Offset table: sfVersion(4) + numTables(2) + searchRange(2) + entrySelector(2) + rangeShift(2) = 12 bytes.
    let scaler = if is_truetype {
        0x00010000u32
    } else {
        0x4F544F54u32
    }; // 0x00010000 or 'OTTO'
    font.extend_from_slice(&scaler.to_be_bytes());
    font.extend_from_slice(&(num_tables as u16).to_be_bytes());
    font.extend_from_slice(&search_range.to_be_bytes());
    font.extend_from_slice(&entry_selector.to_be_bytes());
    font.extend_from_slice(&range_shift.to_be_bytes());

    // Table directory (num_tables × 16 bytes).
    let mut dir_offset = 12 + num_tables * 16;
    let mut table_offsets = Vec::new();
    for (tag, data) in tables.iter() {
        // Align to 4-byte boundary.
        dir_offset = (dir_offset + 3) & !3;
        table_offsets.push((tag.clone(), dir_offset, data.len()));
        dir_offset += data.len();
    }

    for (tag, offset, length) in table_offsets.iter() {
        font.extend_from_slice(tag.as_bytes());
        // Placeholder checksum (0).
        font.extend_from_slice(&0u32.to_be_bytes());
        font.extend_from_slice(&(*offset as u32).to_be_bytes());
        font.extend_from_slice(&(*length as u32).to_be_bytes());
    }

    // Table data.
    let mut current_offset = font.len();
    for (_tag, data) in tables.iter() {
        // Align to 4-byte boundary.
        current_offset = (current_offset + 3) & !3;
        while font.len() < current_offset {
            font.push(0);
        }
        font.extend_from_slice(data);
        current_offset += data.len();
    }

    // --- Fill in per-table checksums ---
    // The directory entries are at positions 12, 28, 44, ... (12 + i*16).
    // Each 16-byte entry: tag(4) | checksum(4) | offset(4) | length(4).
    for (i, (_tag, offset, length)) in table_offsets.iter().enumerate() {
        let checksum = calc_checksum(&font[*offset..*offset + *length]);
        let cs_pos = 12 + i * 16 + 4;
        font[cs_pos..cs_pos + 4].copy_from_slice(&checksum.to_be_bytes());
    }

    // --- Compute and write head.checkSumAdjustment ---
    // Per spec: sum entire font as 32-bit big-endian words (with head.checkSumAdjustment = 0,
    // which it is since we haven't set it yet), then store 0xB1B0AFBA - sum at bytes 8-11
    // of the head table. The head table's per-table checksum in the directory was computed
    // with checkSumAdjustment = 0, which is the correct behavior per the spec.
    let full_sum = calc_checksum(&font);
    let adjustment = 0xB1B0AFBAu32.wrapping_sub(full_sum);
    for (tag, offset, _length) in &table_offsets {
        if tag == "head" {
            // checkSumAdjustment is at bytes 8-11 of the head table.
            let pos = offset + 8;
            if pos + 4 <= font.len() {
                font[pos..pos + 4].copy_from_slice(&adjustment.to_be_bytes());
            }
            break;
        }
    }

    Ok(font)
}