Skip to main content

ttf_rs/tables/
cmap.rs

1use crate::error::{Result, TtfError};
2use crate::stream::FontReader;
3use crate::tables::TtfTable;
4
5/// CMAP table - Character to glyph mapping
6#[derive(Debug, Clone)]
7pub struct CmapTable {
8    pub version: u16,
9    pub encoding_records: Vec<EncodingRecord>,
10    pub subtables: Vec<CmapSubtable>,
11}
12
13#[derive(Debug, Clone)]
14pub struct EncodingRecord {
15    pub platform_id: u16,
16    pub encoding_id: u16,
17    pub offset: u32,
18}
19
20#[derive(Debug, Clone)]
21pub enum CmapSubtable {
22    Format0(Format0),
23    Format4(Format4),
24    Format6(Format6),
25    Format12(Format12),
26    Format13(Format13),
27    Format14(Format14),
28}
29
30#[derive(Debug, Clone)]
31pub struct Format0 {
32    pub format: u16,
33    pub length: u16,
34    pub language: u16,
35    pub glyph_id_array: Vec<u8>,
36}
37
38#[derive(Debug, Clone)]
39pub struct Format4 {
40    pub format: u16,
41    pub length: u16,
42    pub language: u16,
43    pub seg_count_x2: u16,
44    pub search_range: u16,
45    pub entry_selector: u16,
46    pub range_shift: u16,
47    pub end_codes: Vec<u16>,
48    pub start_codes: Vec<u16>,
49    pub id_deltas: Vec<i16>,
50    pub id_range_offsets: Vec<u16>,
51    pub glyph_id_array: Vec<u16>,
52}
53
54#[derive(Debug, Clone)]
55pub struct Format6 {
56    pub format: u16,
57    pub length: u16,
58    pub language: u16,
59    pub first_code: u16,
60    pub entry_count: u16,
61    pub glyph_id_array: Vec<u16>,
62}
63
64#[derive(Debug, Clone)]
65pub struct Format12 {
66    pub format: u32,
67    pub length: u32,
68    pub language: u32,
69    pub groups: Vec<SequentialMapGroup>,
70}
71
72#[derive(Debug, Clone)]
73pub struct SequentialMapGroup {
74    pub start_char_code: u32,
75    pub end_char_code: u32,
76    pub start_glyph_code: u32,
77}
78
79/// Format 13 - Many-to-one range mappings
80#[derive(Debug, Clone)]
81pub struct Format13 {
82    pub format: u32,
83    pub length: u32,
84    pub language: u32,
85    pub groups: Vec<ConstantMapGroup>,
86}
87
88#[derive(Debug, Clone)]
89pub struct ConstantMapGroup {
90    pub start_char_code: u32,
91    pub end_char_code: u32,
92    pub glyph_code: u32,
93}
94
95/// Format 14 - Unicode variation sequences
96#[derive(Debug, Clone)]
97pub struct Format14 {
98    pub format: u32,
99    pub length: u32,
100    pub num_var_selector_records: u32,
101    pub var_selector_records: Vec<VarSelectorRecord>,
102}
103
104#[derive(Debug, Clone)]
105pub struct VarSelectorRecord {
106    pub var_selector: U24,
107    pub default_uvsoffset: u32,
108    pub non_default_uvsoffset: u32,
109}
110
111/// U24 is a 3-byte unsigned integer
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113pub struct U24(pub u32);
114
115impl Format0 {
116    pub fn get_glyph(&self, char_code: u8) -> Option<u16> {
117        Some(self.glyph_id_array[char_code as usize] as u16)
118    }
119}
120
121impl Format4 {
122    pub fn seg_count(&self) -> u16 {
123        self.seg_count_x2 / 2
124    }
125
126    pub fn get_glyph(&self, char_code: u16) -> Option<u16> {
127        let seg_count = self.seg_count();
128
129        // Binary search for the segment
130        let mut min = 0;
131        let mut max = (seg_count - 1) as usize;
132
133        loop {
134            let mid = (min + max) / 2;
135
136            if char_code > self.end_codes[mid] {
137                if min == max {
138                    return None;
139                }
140                min = mid + 1;
141            } else if char_code < self.start_codes[mid] {
142                if min == max {
143                    return None;
144                }
145                max = mid - 1;
146            } else {
147                // Found the segment
148                let start_code = self.start_codes[mid];
149                let id_delta = self.id_deltas[mid] as i32;
150                let id_range_offset = self.id_range_offsets[mid];
151
152                if id_range_offset == 0 {
153                    // Simple case
154                    return Some((char_code as i32 + id_delta) as u16);
155                } else {
156                    // Complex case with id_range_offset
157                    let offset_index =
158                        (id_range_offset as usize / 2 + (char_code as usize - start_code as usize))
159                            as usize;
160
161                    if offset_index >= self.glyph_id_array.len() {
162                        return None;
163                    }
164
165                    let glyph_id = self.glyph_id_array[offset_index];
166
167                    if glyph_id == 0 {
168                        return None;
169                    }
170
171                    return Some(glyph_id);
172                }
173            }
174        }
175    }
176}
177
178impl Format6 {
179    pub fn get_glyph(&self, char_code: u16) -> Option<u16> {
180        if char_code >= self.first_code {
181            let index = (char_code - self.first_code) as usize;
182            if index < self.glyph_id_array.len() {
183                return Some(self.glyph_id_array[index]);
184            }
185        }
186        None
187    }
188}
189
190impl Format12 {
191    pub fn get_glyph(&self, char_code: u32) -> Option<u32> {
192        // Binary search through groups
193        let mut min = 0;
194        let mut max = self.groups.len().saturating_sub(1);
195
196        while min <= max {
197            let mid = (min + max) / 2;
198            let group = &self.groups[mid];
199
200            if char_code < group.start_char_code {
201                if mid == 0 {
202                    return None;
203                }
204                max = mid - 1;
205            } else if char_code > group.end_char_code {
206                min = mid + 1;
207            } else {
208                // char_code is within this group
209                return Some(group.start_glyph_code + (char_code - group.start_char_code));
210            }
211        }
212
213        None
214    }
215}
216
217impl Format13 {
218    pub fn get_glyph(&self, char_code: u32) -> Option<u32> {
219        // Binary search through groups (all map to same glyph)
220        let mut min = 0;
221        let mut max = self.groups.len().saturating_sub(1);
222
223        while min <= max {
224            let mid = (min + max) / 2;
225            let group = &self.groups[mid];
226
227            if char_code < group.start_char_code {
228                if mid == 0 {
229                    return None;
230                }
231                max = mid - 1;
232            } else if char_code > group.end_char_code {
233                min = mid + 1;
234            } else {
235                // char_code is within this group, all map to the same glyph
236                return Some(group.glyph_code);
237            }
238        }
239
240        None
241    }
242}
243
244impl CmapTable {
245    pub fn get_best_subtable(&self) -> Option<&CmapSubtable> {
246        // Priority: Unicode BMP (3,1), Unicode (0,3), Windows Symbol (3,0), Mac Roman (1,0)
247        let preferred = [
248            (3, 1), // Unicode BMP
249            (0, 4), // Unicode 2.0+
250            (0, 3), // Unicode 1.1
251            (3, 0), // Symbol
252            (1, 0), // Roman
253        ];
254
255        for (platform_id, encoding_id) in &preferred {
256            for (i, record) in self.encoding_records.iter().enumerate() {
257                if record.platform_id == *platform_id && record.encoding_id == *encoding_id {
258                    return self.subtables.get(i);
259                }
260            }
261        }
262
263        self.subtables.first()
264    }
265
266    pub fn map_char(&self, c: char) -> Option<u32> {
267        let code = c as u32;
268
269        if let Some(subtable) = self.get_best_subtable() {
270            match subtable {
271                CmapSubtable::Format0(f) if code <= 0xFF => {
272                    f.get_glyph(code as u8).map(|g| g as u32)
273                }
274                CmapSubtable::Format4(f) if code <= 0xFFFF => f.get_glyph(code as u16).map(|g| g as u32),
275                CmapSubtable::Format6(f) if code <= 0xFFFF => f.get_glyph(code as u16).map(|g| g as u32),
276                CmapSubtable::Format12(f) => f.get_glyph(code),
277                CmapSubtable::Format13(f) => f.get_glyph(code),
278                _ => None,
279            }
280        } else {
281            None
282        }
283    }
284}
285
286impl TtfTable for CmapTable {
287    fn from_reader(reader: &mut FontReader, _length: u32) -> Result<Self> {
288        let version = reader.read_u16()?;
289        let num_tables = reader.read_u16()?;
290
291        let mut encoding_records = Vec::with_capacity(num_tables as usize);
292        for _ in 0..num_tables {
293            encoding_records.push(EncodingRecord {
294                platform_id: reader.read_u16()?,
295                encoding_id: reader.read_u16()?,
296                offset: reader.read_u32()?,
297            });
298        }
299
300        // Parse subtables
301        let mut subtables = Vec::with_capacity(num_tables as usize);
302        for record in &encoding_records {
303            let current_pos = reader.position();
304            reader.set_position(record.offset as usize)?;
305
306            let format = reader.read_u16()?;
307            let subtable = match format {
308                0 => {
309                    let length = reader.read_u16()?;
310                    let language = reader.read_u16()?;
311                    let glyph_id_array = reader.read_bytes(256)?;
312                    CmapSubtable::Format0(Format0 {
313                        format,
314                        length,
315                        language,
316                        glyph_id_array,
317                    })
318                }
319                4 => {
320                    let length = reader.read_u16()?;
321                    let language = reader.read_u16()?;
322                    let seg_count_x2 = reader.read_u16()?;
323                    let seg_count = seg_count_x2 / 2;
324                    let search_range = reader.read_u16()?;
325                    let entry_selector = reader.read_u16()?;
326                    let range_shift = reader.read_u16()?;
327
328                    let mut end_codes = Vec::with_capacity(seg_count as usize);
329                    for _ in 0..seg_count {
330                        end_codes.push(reader.read_u16()?);
331                    }
332
333                    let _reserved_pad = reader.read_u16()?;
334
335                    let mut start_codes = Vec::with_capacity(seg_count as usize);
336                    for _ in 0..seg_count {
337                        start_codes.push(reader.read_u16()?);
338                    }
339
340                    let mut id_deltas = Vec::with_capacity(seg_count as usize);
341                    for _ in 0..seg_count {
342                        id_deltas.push(reader.read_i16()?);
343                    }
344
345                    let mut id_range_offsets = Vec::with_capacity(seg_count as usize);
346                    for _ in 0..seg_count {
347                        id_range_offsets.push(reader.read_u16()?);
348                    }
349
350                    let remaining = (length as usize)
351                        .saturating_sub(2 + 2 + 2 + 2 + 2 + 2 + 2)
352                        .saturating_sub(seg_count as usize * 2)
353                        .saturating_sub(2)
354                        .saturating_sub(seg_count as usize * 2)
355                        .saturating_sub(seg_count as usize * 2)
356                        .saturating_sub(seg_count as usize * 2);
357                    let glyph_id_array_size = remaining / 2;
358                    let mut glyph_id_array = Vec::with_capacity(glyph_id_array_size);
359                    for _ in 0..glyph_id_array_size {
360                        glyph_id_array.push(reader.read_u16()?);
361                    }
362
363                    CmapSubtable::Format4(Format4 {
364                        format,
365                        length,
366                        language,
367                        seg_count_x2,
368                        search_range,
369                        entry_selector,
370                        range_shift,
371                        end_codes,
372                        start_codes,
373                        id_deltas,
374                        id_range_offsets,
375                        glyph_id_array,
376                    })
377                }
378                6 => {
379                    // Format 6 - Trimmed table mapping
380                    let length = reader.read_u16()?;
381                    let language = reader.read_u16()?;
382                    let first_code = reader.read_u16()?;
383                    let entry_count = reader.read_u16()?;
384
385                    let mut glyph_id_array = Vec::with_capacity(entry_count as usize);
386                    for _ in 0..entry_count {
387                        glyph_id_array.push(reader.read_u16()?);
388                    }
389
390                    CmapSubtable::Format6(Format6 {
391                        format,
392                        length,
393                        language,
394                        first_code,
395                        entry_count,
396                        glyph_id_array,
397                    })
398                }
399                12 => {
400                    // Format 12 - Segmented coverage (full Unicode)
401                    reader.skip(2)?; // format is already read, but it's u16 in the stream for formats < 8
402                    let length = reader.read_u32()?;
403                    let language = reader.read_u32()?;
404                    let num_groups = reader.read_u32()?;
405
406                    let mut groups = Vec::with_capacity(num_groups as usize);
407                    for _ in 0..num_groups {
408                        groups.push(SequentialMapGroup {
409                            start_char_code: reader.read_u32()?,
410                            end_char_code: reader.read_u32()?,
411                            start_glyph_code: reader.read_u32()?,
412                        });
413                    }
414
415                    CmapSubtable::Format12(Format12 {
416                        format: 12,
417                        length,
418                        language,
419                        groups,
420                    })
421                }
422                13 => {
423                    // Format 13 - Many-to-one range mappings
424                    reader.skip(2)?;
425                    let length = reader.read_u32()?;
426                    let language = reader.read_u32()?;
427                    let num_groups = reader.read_u32()?;
428
429                    let mut groups = Vec::with_capacity(num_groups as usize);
430                    for _ in 0..num_groups {
431                        groups.push(ConstantMapGroup {
432                            start_char_code: reader.read_u32()?,
433                            end_char_code: reader.read_u32()?,
434                            glyph_code: reader.read_u32()?,
435                        });
436                    }
437
438                    CmapSubtable::Format13(Format13 {
439                        format: 13,
440                        length,
441                        language,
442                        groups,
443                    })
444                }
445                14 => {
446                    // Format 14 - Unicode variation sequences
447                    reader.skip(2)?;
448                    let length = reader.read_u32()?;
449                    let num_var_selector_records = reader.read_u32()?;
450
451                    let mut var_selector_records = Vec::with_capacity(num_var_selector_records as usize);
452                    for _ in 0..num_var_selector_records {
453                        var_selector_records.push(VarSelectorRecord {
454                            var_selector: U24(reader.read_u24()?),
455                            default_uvsoffset: reader.read_u32()?,
456                            non_default_uvsoffset: reader.read_u32()?,
457                        });
458                    }
459
460                    CmapSubtable::Format14(Format14 {
461                        format: 14,
462                        length,
463                        num_var_selector_records,
464                        var_selector_records,
465                    })
466                }
467                _ => {
468                    return Err(TtfError::ParseError(format!(
469                        "Unsupported cmap subtable format: {}",
470                        format
471                    )));
472                }
473            };
474
475            subtables.push(subtable);
476            reader.set_position(current_pos)?;
477        }
478
479        Ok(CmapTable {
480            version,
481            encoding_records,
482            subtables,
483        })
484    }
485}