Skip to main content

ttf_rs/
subset.rs

1use crate::error::{Result, TtfError};
2use crate::font::Font;
3use crate::stream::FontWriter;
4use crate::tables::glyf::GlyphData;
5use crate::tables::cmap::{CmapSubtable, Format4};
6use std::collections::{HashMap, HashSet};
7
8/// FontSubset allows creating a subset of a font with only specified glyphs
9pub struct FontSubset {
10    font: Font,
11    glyph_ids: HashSet<u32>,
12    retain_tables: HashSet<[u8; 4]>,
13}
14
15impl FontSubset {
16    /// Create a new subset builder
17    pub fn new(font: Font) -> Self {
18        Self {
19            font,
20            glyph_ids: HashSet::new(),
21            retain_tables: HashSet::new(),
22        }
23    }
24
25    /// Add glyphs to keep in the subset
26    pub fn with_glyphs(&mut self, glyph_ids: &[u32]) -> &mut Self {
27        for &id in glyph_ids {
28            self.glyph_ids.insert(id);
29        }
30        self
31    }
32
33    /// Add characters to keep (will be mapped to their glyphs)
34    pub fn with_chars(&mut self, chars: &[char]) -> Result<&mut Self> {
35        for &c in chars {
36            let glyph_id = self.font.char_to_glyph(c)?;
37            self.glyph_ids.insert(glyph_id as u32);
38        }
39        Ok(self)
40    }
41
42    /// Retain specific tables (all tables are retained by default)
43    pub fn retain_tables(&mut self, tables: &[[u8; 4]]) -> &mut Self {
44        for &tag in tables {
45            self.retain_tables.insert(tag);
46        }
47        self
48    }
49
50    /// Build the subset font
51    pub fn build(&self) -> Result<Font> {
52        if self.glyph_ids.is_empty() {
53            return Err(TtfError::ParseError("No glyphs specified for subsetting".to_string()));
54        }
55
56        let mut subset_font = self.font.clone();
57
58        // Create glyph ID mapping (old -> new)
59        let mut glyph_map: HashMap<u32, u32> = HashMap::new();
60        let mut sorted_glyphs: Vec<u32> = self.glyph_ids.iter().cloned().collect();
61        sorted_glyphs.sort();
62
63        for (new_id, old_id) in sorted_glyphs.iter().enumerate() {
64            glyph_map.insert(*old_id, new_id as u32);
65        }
66
67        // Always include glyph 0 (.notdef)
68        if !self.glyph_ids.contains(&0) {
69            glyph_map.insert(0, 0);
70            sorted_glyphs.insert(0, 0);
71        }
72
73        // Create subset of tables
74        self.subset_glyf_table(&mut subset_font, &glyph_map)?;
75        self.subset_loca_table(&mut subset_font, &glyph_map)?;
76        self.subset_hmtx_table(&mut subset_font, &glyph_map)?;
77        self.subset_cmap_table(&mut subset_font, &glyph_map)?;
78
79        // Update maxp table
80        self.update_maxp_table(&mut subset_font)?;
81
82        // Remove unused tables if specified
83        if !self.retain_tables.is_empty() {
84            subset_font.table_records.retain(|r| self.retain_tables.contains(&r.table_tag));
85        }
86
87        Ok(subset_font)
88    }
89
90    /// Subset the glyf table
91    fn subset_glyf_table(&self, font: &mut Font, glyph_map: &HashMap<u32, u32>) -> Result<()> {
92        let glyf_table = self.font.glyf_table()?;
93        let mut new_glyphs = Vec::new();
94
95        for (old_id, new_id) in glyph_map {
96            if let Some(glyph) = glyf_table.get_glyph(*old_id as usize) {
97                new_glyphs.push((*new_id as usize, glyph.clone()));
98            }
99        }
100
101        // Sort by new glyph ID
102        new_glyphs.sort_by_key(|(id, _)| *id);
103
104        // Create new glyf data
105        let mut writer = FontWriter::new();
106
107        for (_, glyph) in &new_glyphs {
108            match &glyph.data {
109                GlyphData::Simple(simple) => {
110                    writer.write_i16(glyph.number_of_contours);
111                    writer.write_i16(glyph.x_min);
112                    writer.write_i16(glyph.y_min);
113                    writer.write_i16(glyph.x_max);
114                    writer.write_i16(glyph.y_max);
115
116                    for &end_pt in &simple.end_pts_of_contours {
117                        writer.write_u16(end_pt);
118                    }
119
120                    writer.write_u16(simple.instruction_length);
121                    for &instr in &simple.instructions {
122                        writer.write_u8(instr);
123                    }
124
125                    for &flag in &simple.flags {
126                        writer.write_u8(flag);
127                    }
128
129                    for &x in &simple.x_coordinates {
130                        writer.write_i16(x);
131                    }
132
133                    for &y in &simple.y_coordinates {
134                        writer.write_i16(y);
135                    }
136                }
137                GlyphData::Composite(composite) => {
138                    writer.write_i16(glyph.number_of_contours);
139                    writer.write_i16(glyph.x_min);
140                    writer.write_i16(glyph.y_min);
141                    writer.write_i16(glyph.x_max);
142                    writer.write_i16(glyph.y_max);
143
144                    for component in &composite.components {
145                        writer.write_u16(component.flags);
146                        let new_glyph_id = glyph_map.get(&(component.glyph_index as u32))
147                            .copied()
148                            .unwrap_or(component.glyph_index as u32);
149                        writer.write_u16(new_glyph_id as u16);
150                        writer.write_i16(component.arg1);
151                        writer.write_i16(component.arg2);
152                    }
153                }
154                GlyphData::Empty => {
155                    writer.write_i16(0);
156                    writer.write_i16(0);
157                    writer.write_i16(0);
158                    writer.write_i16(0);
159                    writer.write_i16(0);
160                }
161            }
162        }
163
164        // Update font data (simplified)
165        if let Some(record) = font.get_table_record(b"glyf") {
166            let data = writer.into_inner();
167            let offset = record.offset as usize;
168            if offset + data.len() <= font.data.len() {
169                font.data[offset..offset + data.len()].copy_from_slice(&data);
170                if let Some(record) = font.table_records.iter_mut().find(|r| r.table_tag == *b"glyf") {
171                    record.length = data.len() as u32;
172                }
173            }
174        }
175
176        Ok(())
177    }
178
179    /// Subset the loca table
180    fn subset_loca_table(&self, font: &mut Font, glyph_map: &HashMap<u32, u32>) -> Result<()> {
181        let head = self.font.head_table()?;
182        let num_glyphs = glyph_map.len();
183
184        let mut offsets = Vec::new();
185        let mut current_offset = 0u32;
186
187        for i in 0..num_glyphs {
188            offsets.push(current_offset);
189
190            // Get glyph size (simplified - in practice you'd calculate this from glyf data)
191            if let Some(glyph) = self.font.glyf_table()?.get_glyph(i) {
192                let size = match &glyph.data {
193                    GlyphData::Simple(simple) => {
194                        10 + (simple.end_pts_of_contours.len() * 2) as u32 +
195                        simple.instruction_length as u32 +
196                        simple.flags.len() as u32 +
197                        (simple.x_coordinates.len() * 2) as u32 +
198                        (simple.y_coordinates.len() * 2) as u32
199                    }
200                    GlyphData::Composite(composite) => {
201                        10 + (composite.components.len() * 8) as u32
202                    }
203                    GlyphData::Empty => 10,
204                };
205                current_offset += size;
206            }
207        }
208
209        let mut writer = FontWriter::new();
210        if head.is_long_loca_format() {
211            for &offset in &offsets {
212                writer.write_u32(offset);
213            }
214        } else {
215            for &offset in &offsets {
216                writer.write_u16((offset / 2) as u16);
217            }
218        }
219
220        // Update font data (simplified)
221        if let Some(record) = font.get_table_record(b"loca") {
222            let data = writer.into_inner();
223            let offset = record.offset as usize;
224            if offset + data.len() <= font.data.len() {
225                font.data[offset..offset + data.len()].copy_from_slice(&data);
226                if let Some(record) = font.table_records.iter_mut().find(|r| r.table_tag == *b"loca") {
227                    record.length = data.len() as u32;
228                }
229            }
230        }
231
232        Ok(())
233    }
234
235    /// Subset the hmtx table
236    fn subset_hmtx_table(&self, font: &mut Font, glyph_map: &HashMap<u32, u32>) -> Result<()> {
237        let hmtx = self.font.hmtx_table()?;
238        let _hhea = self.font.hhea_table()?;
239
240        let mut writer = FontWriter::new();
241
242        for i in 0..glyph_map.len() {
243            let advance = hmtx.get_advance_width(i as u16);
244            let lsb = hmtx.get_lsb(i as u16);
245
246            writer.write_u16(advance);
247            writer.write_i16(lsb);
248        }
249
250        // Update font data (simplified)
251        if let Some(record) = font.get_table_record(b"hmtx") {
252            let data = writer.into_inner();
253            let offset = record.offset as usize;
254            if offset + data.len() <= font.data.len() {
255                font.data[offset..offset + data.len()].copy_from_slice(&data);
256                if let Some(record) = font.table_records.iter_mut().find(|r| r.table_tag == *b"hmtx") {
257                    record.length = data.len() as u32;
258                }
259            }
260        }
261
262        Ok(())
263    }
264
265    /// Subset the cmap table
266    fn subset_cmap_table(&self, font: &mut Font, glyph_map: &HashMap<u32, u32>) -> Result<()> {
267        let cmap = self.font.cmap_table()?;
268
269        // Build character to new glyph mapping for all characters in the original cmap
270        let mut char_to_new_glyph: Vec<(u32, u32)> = Vec::new();
271
272        // Iterate through all possible characters and find mappings
273        for subtable in &cmap.subtables {
274            match subtable {
275                CmapSubtable::Format4(format4) => {
276                    // For Format 4, collect all character mappings
277                    for seg in 0..format4.seg_count() as usize {
278                        let start_code = format4.start_codes[seg];
279                        let end_code = format4.end_codes[seg];
280                        let id_delta = format4.id_deltas[seg];
281
282                        for char_code in start_code..=end_code {
283                            let old_glyph = if format4.id_range_offsets[seg] == 0 {
284                                ((char_code as i32 + id_delta as i32) as u16)
285                            } else {
286                                // For complex cases, use the lookup
287                                if let Some(g) = format4.get_glyph(char_code) {
288                                    g
289                                } else {
290                                    continue;
291                                }
292                            };
293
294                            // Check if this glyph is in our subset
295                            if let Some(&new_glyph) = glyph_map.get(&(old_glyph as u32)) {
296                                char_to_new_glyph.push((char_code as u32, new_glyph));
297                            }
298                        }
299                    }
300                }
301                CmapSubtable::Format6(format6) => {
302                    for (i, &old_glyph) in format6.glyph_id_array.iter().enumerate() {
303                        let char_code = format6.first_code as u32 + i as u32;
304                        if let Some(&new_glyph) = glyph_map.get(&(old_glyph as u32)) {
305                            char_to_new_glyph.push((char_code, new_glyph));
306                        }
307                    }
308                }
309                CmapSubtable::Format12(format12) => {
310                    for group in &format12.groups {
311                        for char_code in group.start_char_code..=group.end_char_code {
312                            let old_glyph = group.start_glyph_code + (char_code - group.start_char_code);
313                            if let Some(&new_glyph) = glyph_map.get(&old_glyph) {
314                                char_to_new_glyph.push((char_code, new_glyph));
315                            }
316                        }
317                    }
318                }
319                CmapSubtable::Format13(format13) => {
320                    for group in &format13.groups {
321                        for char_code in group.start_char_code..=group.end_char_code {
322                            if let Some(&new_glyph) = glyph_map.get(&group.glyph_code) {
323                                char_to_new_glyph.push((char_code, new_glyph));
324                            }
325                        }
326                    }
327                }
328                _ => {
329                    // Format 0 and Format 14 are less common, skip for now
330                }
331            }
332        }
333
334        // Remove duplicates and sort
335        char_to_new_glyph.sort();
336        char_to_new_glyph.dedup();
337
338        // Build a new Format 4 subtable with the subset
339        if !char_to_new_glyph.is_empty() {
340            let new_format4 = self.build_format4_subset(&char_to_new_glyph)?;
341            self.write_cmap_subset(font, &new_format4)?;
342        }
343
344        Ok(())
345    }
346
347    /// Build a Format 4 subtable from character mappings
348    fn build_format4_subset(&self, mappings: &[(u32, u32)]) -> Result<Format4> {
349        if mappings.is_empty() {
350            return Err(TtfError::ParseError("No mappings for subset".to_string()));
351        }
352
353        // Build segments from continuous character ranges with sequential glyphs
354        let mut segments: Vec<(u16, u16, i16)> = Vec::new(); // (start, end, id_delta)
355
356        let mut current_start = mappings[0].0 as u16;
357        let mut current_end = current_start;
358        let mut current_glyph_delta = (mappings[0].1 as i32 - mappings[0].0 as i32) as i16;
359
360        for &(char_code, new_glyph) in mappings.iter().skip(1) {
361            let char_code = char_code as u16;
362            let expected_glyph = (char_code as i32 + current_glyph_delta as i32) as u32;
363
364            if char_code == current_end + 1 && new_glyph == expected_glyph {
365                // Continue current segment
366                current_end = char_code;
367            } else {
368                // Start new segment
369                segments.push((current_start, current_end, current_glyph_delta));
370                current_start = char_code;
371                current_end = char_code;
372                current_glyph_delta = (new_glyph as i32 - char_code as i32) as i16;
373            }
374        }
375
376        // Add last segment
377        segments.push((current_start, current_end, current_glyph_delta));
378
379        // Add the final sentinel segment (0xFFFF, 0xFFFF, 1)
380        segments.push((0xFFFF, 0xFFFF, 1));
381
382        // Extract arrays from segments
383        let seg_count = segments.len() as u16;
384        let seg_count_x2 = seg_count * 2;
385        let end_codes: Vec<u16> = segments.iter().map(|(_, end, _)| *end).collect();
386        let start_codes: Vec<u16> = segments.iter().map(|(start, _, _)| *start).collect();
387        let id_deltas: Vec<i16> = segments.iter().map(|(_, _, delta)| *delta).collect();
388
389        // Calculate search range, entry selector, range_shift
390        let mut search_range = 1u16;
391        let mut entry_selector = 0u16;
392        while search_range * 2 <= seg_count_x2 {
393            search_range *= 2;
394            entry_selector += 1;
395        }
396        let range_shift = seg_count_x2 - search_range;
397
398        // For this simplified implementation, id_range_offsets are all 0
399        let id_range_offsets = vec![0u16; seg_count as usize];
400        let glyph_id_array = Vec::new(); // Empty since we use id_delta
401
402        let length = 2 + 2 + 2 + 2 + 2 + 2 + 2 // header
403            + (seg_count as u16 * 2) * 4 // end_codes, start_codes, id_deltas, id_range_offsets
404            + 2 // reserved
405            + (glyph_id_array.len() as u16 * 2);
406
407        Ok(Format4 {
408            format: 4,
409            length,
410            language: 0,
411            seg_count_x2,
412            search_range,
413            entry_selector,
414            range_shift,
415            end_codes,
416            start_codes,
417            id_deltas,
418            id_range_offsets,
419            glyph_id_array,
420        })
421    }
422
423    /// Write the subset cmap back to the font
424    fn write_cmap_subset(&self, font: &mut Font, format4: &Format4) -> Result<()> {
425        let mut writer = FontWriter::new();
426
427        // cmap header
428        writer.write_u16(0); // version
429        writer.write_u16(1); // num_tables (just one subtable)
430
431        // Encoding record (Unicode BMP)
432        writer.write_u16(3); // platform_id (Windows)
433        writer.write_u16(1); // encoding_id (Unicode BMP)
434        writer.write_u32(12); // offset (after header + encoding record)
435
436        // Format 4 subtable
437        writer.write_u16(format4.format);
438        writer.write_u16(format4.length);
439        writer.write_u16(format4.language);
440        writer.write_u16(format4.seg_count_x2);
441        writer.write_u16(format4.search_range);
442        writer.write_u16(format4.entry_selector);
443        writer.write_u16(format4.range_shift);
444
445        for &end_code in &format4.end_codes {
446            writer.write_u16(end_code);
447        }
448
449        writer.write_u16(0); // reservedPad
450
451        for &start_code in &format4.start_codes {
452            writer.write_u16(start_code);
453        }
454
455        for &id_delta in &format4.id_deltas {
456            writer.write_i16(id_delta);
457        }
458
459        for &id_range_offset in &format4.id_range_offsets {
460            writer.write_u16(id_range_offset);
461        }
462
463        for &glyph_id in &format4.glyph_id_array {
464            writer.write_u16(glyph_id);
465        }
466
467        // Update font data
468        if let Some(record) = font.get_table_record(b"cmap") {
469            let data = writer.into_inner();
470            let offset = record.offset as usize;
471            if offset + data.len() <= font.data.len() {
472                font.data[offset..offset + data.len()].copy_from_slice(&data);
473                if let Some(record) = font.table_records.iter_mut().find(|r| r.table_tag == *b"cmap") {
474                    record.length = data.len() as u32;
475                }
476            }
477        }
478
479        Ok(())
480    }
481
482    /// Update the maxp table with new glyph count
483    fn update_maxp_table(&self, font: &mut Font) -> Result<()> {
484        // Update num_glyphs in maxp table
485        let new_num_glyphs = self.glyph_ids.len() as u16;
486
487        // Update font data (simplified)
488        if let Some(record) = font.get_table_record(b"maxp") {
489            let offset = record.offset as usize;
490            // num_glyphs is at offset 4 in maxp table
491            let glyph_count_offset = offset + 4;
492            if glyph_count_offset + 2 <= font.data.len() {
493                font.data[glyph_count_offset..glyph_count_offset + 2]
494                    .copy_from_slice(&new_num_glyphs.to_be_bytes());
495            }
496        }
497
498        Ok(())
499    }
500}
501
502impl Font {
503    /// Create a subset of this font
504    pub fn subset(self) -> FontSubset {
505        FontSubset::new(self)
506    }
507}