pinot/
cmap.rs

1//! Character mapping table.
2
3use crate::parse_prelude::*;
4use core::cmp::Ordering;
5
6/// Tag for the `cmap` table.
7pub const CMAP: Tag = Tag::new(b"cmap");
8
9/// Character to glyph index mapping table.
10///
11/// <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>
12#[derive(Copy, Clone)]
13pub struct Cmap<'a>(Buffer<'a>);
14
15impl<'a> Cmap<'a> {
16    /// Creates a new character to glyph index mapping table from a byte slice
17    /// containing the table data.
18    pub fn new(data: &'a [u8]) -> Self {
19        Self(Buffer::new(data))
20    }
21
22    /// Returns the version.
23    pub fn version(&self) -> u16 {
24        self.0.read(0).unwrap_or(0)
25    }
26
27    /// Returns the array of encoding records.
28    pub fn records(&self) -> Slice<'a, EncodingRecord> {
29        let len = self.0.read_u16(2).unwrap_or_default() as usize;
30        self.0.read_slice(4, len).unwrap_or_default()
31    }
32
33    /// Returns an iterator over the subtables.
34    pub fn subtables(self) -> impl Iterator<Item = Subtable<'a>> + 'a + Clone {
35        self.records().iter().map(move |encoding| Subtable {
36            cmap: self,
37            encoding,
38        })
39    }
40
41    /// Maps a codepoint to a glyph identifier.
42    pub fn map(&self, codepoint: u32) -> Option<GlyphId> {
43        self.subtables()
44            .filter_map(|subtable| subtable.map(codepoint))
45            .next()
46    }
47
48    /// Maps a codepoint with variation selector to a glyph identifier.
49    pub fn map_variant(&self, codepoint: u32, variation_selector: u32) -> Option<MapVariant> {
50        self.subtables()
51            .filter_map(|subtable| subtable.map_variant(codepoint, variation_selector))
52            .next()
53    }
54}
55
56/// Encoding and offset to subtable.
57#[derive(Copy, Clone, Debug)]
58pub struct EncodingRecord {
59    /// Platform identifier.
60    pub platform_id: u16,
61    /// Platform specific encoding identifier.
62    pub encoding_id: u16,
63    /// Offset from beginning of table to the subtable for the encoding.
64    pub offset: u32,
65}
66
67impl ReadData for EncodingRecord {
68    unsafe fn read_data_unchecked(buf: &[u8], offset: usize) -> Self {
69        Self {
70            platform_id: u16::read_data_unchecked(buf, offset),
71            encoding_id: u16::read_data_unchecked(buf, offset + 2),
72            offset: u32::read_data_unchecked(buf, offset + 4),
73        }
74    }
75}
76
77/// Character to glyph index mapping subtable.
78#[derive(Copy, Clone)]
79pub struct Subtable<'a> {
80    /// Parent table.
81    pub cmap: Cmap<'a>,
82    /// Encoding record.
83    pub encoding: EncodingRecord,
84}
85
86impl<'a> Subtable<'a> {
87    /// Returns the subtable format.
88    pub fn format(&self) -> u16 {
89        self.cmap
90            .0
91            .read_u16(self.encoding.offset as usize)
92            .unwrap_or_default()
93    }
94
95    /// Maps a codepoint to a glyph identifier.
96    pub fn map(&self, codepoint: u32) -> Option<GlyphId> {
97        map(
98            self.cmap.0.data(),
99            self.encoding.offset,
100            self.format(),
101            codepoint,
102        )
103    }
104
105    /// Maps a codepoint with variation selector to a glyph identifier.
106    pub fn map_variant(&self, codepoint: u32, variation_selector: u32) -> Option<MapVariant> {
107        if self.format() == 14 {
108            map_variant(
109                self.cmap.0.data(),
110                self.encoding.offset,
111                codepoint,
112                variation_selector,
113            )
114        } else {
115            None
116        }
117    }
118}
119
120/// Maps a codepoint to a glyph identifer using the subtable of the given
121/// format at the specified offset in data.
122///
123/// Supports the following formats:
124/// - Format 4: <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-4-segment-mapping-to-delta-values>
125/// - Format 12: <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-12-segmented-coverage>
126/// - Format 13: <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-13-many-to-one-range-mappings>
127pub fn map(data: &[u8], offset: u32, format: u16, codepoint: u32) -> Option<GlyphId> {
128    match format {
129        4 => map_format4(data, offset, codepoint),
130        12 => map_format12(data, offset, codepoint),
131        13 => map_format13(data, offset, codepoint),
132        _ => None,
133    }
134}
135
136fn map_format4(data: &[u8], offset: u32, codepoint: u32) -> Option<GlyphId> {
137    if codepoint >= 65535 {
138        return None;
139    }
140    let codepoint = codepoint as u16;
141    let b = Buffer::with_offset(data, offset as usize)?;
142    let segcount_x2 = b.read_u16(6)? as usize;
143    let segcount = segcount_x2 / 2;
144    b.ensure_range(0, 16 + segcount_x2 * 4)?;
145    let end_codes_offset = 14;
146    let start_codes_offset = end_codes_offset + segcount_x2 + 2;
147    let mut lo = 0;
148    let mut hi = segcount;
149    while lo < hi {
150        let i = (lo + hi) / 2;
151        let i2 = i * 2;
152        let start = unsafe { b.read_unchecked::<u16>(start_codes_offset + i2) };
153        if codepoint < start {
154            hi = i;
155        } else if codepoint > unsafe { b.read_unchecked::<u16>(end_codes_offset + i2) } {
156            lo = i + 1;
157        } else {
158            let deltas_offset = start_codes_offset + segcount_x2;
159            let ranges_offset = deltas_offset + segcount_x2;
160            let mut range_base = ranges_offset + i2;
161            let range = unsafe { b.read_unchecked::<u16>(range_base) as usize };
162            let delta = unsafe { b.read_unchecked::<i16>(deltas_offset + i2) as i32 };
163            if range == 0 {
164                return Some((codepoint as i32 + delta) as u16);
165            }
166            range_base += range;
167            let diff = (codepoint - start) as usize * 2;
168            let id = b.read::<u16>(range_base + diff).unwrap_or(0);
169            return if id != 0 {
170                Some((id as i32 + delta as i32) as u16)
171            } else {
172                Some(0)
173            };
174        }
175    }
176    None
177}
178
179fn map_format12(data: &[u8], offset: u32, codepoint: u32) -> Option<GlyphId> {
180    let (start, delta) = map_format12_13(data, offset, codepoint)?;
181    Some((codepoint.wrapping_sub(start).wrapping_add(delta)) as u16)
182}
183
184fn map_format13(data: &[u8], offset: u32, codepoint: u32) -> Option<GlyphId> {
185    let (_, glyph_id) = map_format12_13(data, offset, codepoint)?;
186    Some(glyph_id as u16)
187}
188
189/// Common code for formats 12 and 13.
190fn map_format12_13(data: &[u8], offset: u32, codepoint: u32) -> Option<(u32, u32)> {
191    let b = Buffer::with_offset(data, offset as usize)?;
192    let base = 16;
193    let len = b.read_u32(base - 4)? as usize;
194    b.ensure_range(base, len * 12)?;
195    let mut lo = 0;
196    let mut hi = len;
197    while lo < hi {
198        let i = (lo + hi) / 2;
199        let rec = base + i * 12;
200        let start = unsafe { b.read_unchecked::<u32>(rec) };
201        if codepoint < start {
202            hi = i;
203        } else if codepoint > unsafe { b.read_unchecked::<u32>(rec + 4) } {
204            lo = i + 1;
205        } else {
206            return Some((start, unsafe { b.read_unchecked::<u32>(rec + 8) }));
207        }
208    }
209    None
210}
211
212/// Result of the mapping a codepoint with a variation selector.
213#[derive(Copy, Clone, PartialEq, Eq, Debug)]
214pub enum MapVariant {
215    /// Use the default glyph mapping.
216    UseDefault,
217    /// Use the specified variant.
218    Variant(GlyphId),
219}
220
221/// Maps a codepoint with variation selector to a glyph identifer using the
222/// format 14 subtable at the specified offset in data.
223///
224/// <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-14-unicode-variation-sequences>
225pub fn map_variant(
226    data: &[u8],
227    offset: u32,
228    codepoint: u32,
229    variation_selector: u32,
230) -> Option<MapVariant> {
231    let b = Buffer::with_offset(data, offset as usize)?;
232    let len = b.read_u32(6)? as usize;
233    let base = 10;
234    let mut lo = 0;
235    let mut hi = len;
236    let mut default_uvs_offset = 0;
237    let mut non_default_uvs_offset = 0;
238    while lo < hi {
239        let i = (lo + hi) / 2;
240        let rec = base + i * 11;
241        let vs = b.read_u24(rec)?;
242        match variation_selector.cmp(&vs) {
243            Ordering::Less => hi = i,
244            Ordering::Greater => lo = i + 1,
245            Ordering::Equal => {
246                default_uvs_offset = b.read_u32(rec + 3)? as usize;
247                non_default_uvs_offset = b.read_u32(rec + 7)? as usize;
248                break;
249            }
250        }
251    }
252    if default_uvs_offset != 0 {
253        let base = default_uvs_offset;
254        let len = b.read_u32(base)? as usize;
255        let mut lo = 0;
256        let mut hi = len;
257        while lo < hi {
258            let i = (lo + hi) / 2;
259            let rec = base + 4 + i * 4;
260            let start = b.read_u24(rec)?;
261            if codepoint < start {
262                hi = i;
263            } else if codepoint > (start + b.read_u8(rec + 3)? as u32) {
264                lo = i + 1;
265            } else {
266                // Fallback to standard mapping.
267                return Some(MapVariant::UseDefault);
268            }
269        }
270    }
271    if non_default_uvs_offset != 0 {
272        let base = non_default_uvs_offset;
273        let len = b.read_u32(base)? as usize;
274        let mut lo = 0;
275        let mut hi = len;
276        while lo < hi {
277            let i = (lo + hi) / 2;
278            let rec = base + 4 + i * 5;
279            let value = b.read_u24(rec)?;
280            match codepoint.cmp(&value) {
281                Ordering::Less => hi = i,
282                Ordering::Greater => lo = i + 1,
283                Ordering::Equal => {
284                    return Some(MapVariant::Variant(b.read_u16(rec + 3)?));
285                }
286            }
287        }
288    }
289    None
290}