1include!("../../generated/generated_name.rs");
4
5pub use types::NameId;
6
7impl<'a> Name<'a> {
8 pub fn string_data(&self) -> FontData<'a> {
10 let base = self.offset_data();
11 let off = self.storage_offset();
12 base.split_off(off as usize).unwrap_or_default()
13 }
14}
15
16impl NameRecord {
17 pub fn string<'a>(&self, data: FontData<'a>) -> Result<NameString<'a>, ReadError> {
22 let start = self.string_offset().non_null().unwrap_or(0);
23 let end = start + self.length() as usize;
24
25 let data = data
26 .as_bytes()
27 .get(start..end)
28 .ok_or(ReadError::OutOfBounds)?;
29
30 let encoding = Encoding::new(self.platform_id(), self.encoding_id());
31 Ok(NameString { data, encoding })
32 }
33
34 pub fn is_unicode(&self) -> bool {
37 self.platform_id() == 0
38 || (self.platform_id() == 3 && [0, 1, 10].contains(&self.encoding_id()))
39 }
40}
41
42impl LangTagRecord {
43 pub fn lang_tag<'a>(&self, data: FontData<'a>) -> Result<NameString<'a>, ReadError> {
45 let start = self.lang_tag_offset().non_null().unwrap_or(0);
46 let end = start + self.length() as usize;
47
48 let data = data
49 .as_bytes()
50 .get(start..end)
51 .ok_or(ReadError::OutOfBounds)?;
52
53 let encoding = Encoding::Utf16Be;
54 Ok(NameString { data, encoding })
55 }
56}
57
58#[derive(Copy, Clone, PartialEq, Eq)]
63pub struct NameString<'a> {
64 data: &'a [u8],
65 encoding: Encoding,
66}
67
68impl<'a> NameString<'a> {
69 pub fn chars(&self) -> CharIter<'a> {
71 CharIter {
72 data: self.data,
73 encoding: self.encoding,
74 pos: 0,
75 }
76 }
77}
78
79#[cfg(feature = "experimental_traverse")]
80impl<'a> traversal::SomeString<'a> for NameString<'a> {
81 fn iter_chars(&self) -> Box<dyn Iterator<Item = char> + 'a> {
82 Box::new(self.into_iter())
83 }
84}
85
86#[cfg(feature = "experimental_traverse")]
87impl NameRecord {
88 fn traverse_string<'a>(&self, data: FontData<'a>) -> traversal::FieldType<'a> {
89 FieldType::StringOffset(traversal::StringOffset {
90 offset: self.string_offset().into(),
91 target: self.string(data).map(|s| Box::new(s) as _),
92 })
93 }
94}
95
96#[cfg(feature = "experimental_traverse")]
97impl LangTagRecord {
98 fn traverse_lang_tag<'a>(&self, data: FontData<'a>) -> traversal::FieldType<'a> {
99 FieldType::StringOffset(traversal::StringOffset {
100 offset: self.lang_tag_offset().into(),
101 target: self.lang_tag(data).map(|s| Box::new(s) as _),
102 })
103 }
104}
105
106impl<'a> IntoIterator for NameString<'a> {
107 type Item = char;
108 type IntoIter = CharIter<'a>;
109 fn into_iter(self) -> Self::IntoIter {
110 self.chars()
111 }
112}
113
114impl std::fmt::Display for NameString<'_> {
115 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
116 for c in self.chars() {
117 c.fmt(f)?;
118 }
119 Ok(())
120 }
121}
122
123impl std::fmt::Debug for NameString<'_> {
124 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
125 write!(f, "\"{self}\"")
126 }
127}
128
129#[derive(Clone)]
131pub struct CharIter<'a> {
132 data: &'a [u8],
133 encoding: Encoding,
134 pos: usize,
135}
136
137impl CharIter<'_> {
138 fn bump_u16(&mut self) -> Option<u16> {
139 let result = self
140 .data
141 .get(self.pos..self.pos + 2)
142 .map(|x| u16::from_be_bytes(x.try_into().unwrap()))?;
143 self.pos += 2;
144 Some(result)
145 }
146
147 fn bump_u8(&mut self) -> Option<u8> {
148 let result = self.data.get(self.pos)?;
149 self.pos += 1;
150 Some(*result)
151 }
152}
153
154impl Iterator for CharIter<'_> {
155 type Item = char;
156
157 fn next(&mut self) -> Option<Self::Item> {
158 if self.pos >= self.data.len() {
159 return None;
160 }
161 let rep = core::char::REPLACEMENT_CHARACTER;
162 let raw_c = match self.encoding {
163 Encoding::Utf16Be => {
164 let c1 = self.bump_u16()? as u32;
165 if (0xD800..0xDC00).contains(&c1) {
166 let Some(c2) = self.bump_u16() else {
167 return Some(rep);
168 };
169 if !(0xDC00..=0xDFFF).contains(&c2) {
170 self.pos -= 2;
174 return Some(rep);
175 }
176 ((c1 & 0x3FF) << 10) + (c2 as u32 & 0x3FF) + 0x10000
177 } else {
178 c1
179 }
180 }
181 Encoding::MacRoman => {
182 let c = self.bump_u8()?;
183 MacRomanMapping.decode(c) as u32
184 }
185 _ => return None,
186 };
187 Some(std::char::from_u32(raw_c).unwrap_or(rep))
188 }
189}
190
191#[derive(Copy, Clone, PartialEq, Eq)]
193pub enum Encoding {
194 Utf16Be,
195 MacRoman,
196 Unknown,
197}
198
199impl Encoding {
200 pub fn new(platform_id: u16, encoding_id: u16) -> Encoding {
202 match (platform_id, encoding_id) {
203 (0, _) => Encoding::Utf16Be,
204 (1, 0) => Encoding::MacRoman,
205 (3, 0) => Encoding::Utf16Be,
206 (3, 1) => Encoding::Utf16Be,
207 (3, 10) => Encoding::Utf16Be,
208 _ => Encoding::Unknown,
209 }
210 }
211}
212
213pub struct MacRomanMapping;
215
216impl MacRomanMapping {
217 const START_REMAP: u8 = 128;
218 pub fn decode(self, raw: u8) -> char {
220 if raw < Self::START_REMAP {
221 raw as char
222 } else {
223 let idx = raw - Self::START_REMAP;
224 char::from_u32(MAC_ROMAN_DECODE[idx as usize] as u32).unwrap()
225 }
226 }
227
228 pub fn encode(self, c: char) -> Option<u8> {
230 let raw_c = c as u32;
231 let raw_c: u16 = raw_c.try_into().ok()?;
232 if raw_c < Self::START_REMAP as u16 {
233 Some(raw_c as u8)
234 } else {
235 match MAC_ROMAN_ENCODE.binary_search_by_key(&raw_c, |(unic, _)| *unic) {
236 Ok(idx) => Some(MAC_ROMAN_ENCODE[idx].1),
237 Err(_) => None,
238 }
239 }
240 }
241}
242
243#[rustfmt::skip]
246static MAC_ROMAN_DECODE: [u16; 128] = [
247 196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233,
248 232, 234, 235, 237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249,
249 251, 252, 8224, 176, 162, 163, 167, 8226, 182, 223, 174, 169, 8482, 180,
250 168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719,
251 960, 8747, 170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710,
252 171, 187, 8230, 160, 192, 195, 213, 338, 339, 8211, 8212, 8220, 8221, 8216,
253 8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183,
254 8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212,
255 63743, 210, 218, 219, 217, 305, 710, 732, 175, 728, 729, 730, 184, 733,
256 731, 711,
257];
258
259#[rustfmt::skip]
261static MAC_ROMAN_ENCODE: [(u16, u8); 128] = [
262 (160, 202), (161, 193), (162, 162), (163, 163),
263 (165, 180), (167, 164), (168, 172), (169, 169),
264 (170, 187), (171, 199), (172, 194), (174, 168),
265 (175, 248), (176, 161), (177, 177), (180, 171),
266 (181, 181), (182, 166), (183, 225), (184, 252),
267 (186, 188), (187, 200), (191, 192), (192, 203),
268 (193, 231), (194, 229), (195, 204), (196, 128),
269 (197, 129), (198, 174), (199, 130), (200, 233),
270 (201, 131), (202, 230), (203, 232), (204, 237),
271 (205, 234), (206, 235), (207, 236), (209, 132),
272 (210, 241), (211, 238), (212, 239), (213, 205),
273 (214, 133), (216, 175), (217, 244), (218, 242),
274 (219, 243), (220, 134), (223, 167), (224, 136),
275 (225, 135), (226, 137), (227, 139), (228, 138),
276 (229, 140), (230, 190), (231, 141), (232, 143),
277 (233, 142), (234, 144), (235, 145), (236, 147),
278 (237, 146), (238, 148), (239, 149), (241, 150),
279 (242, 152), (243, 151), (244, 153), (245, 155),
280 (246, 154), (247, 214), (248, 191), (249, 157),
281 (250, 156), (251, 158), (252, 159), (255, 216),
282 (305, 245), (338, 206), (339, 207), (376, 217),
283 (402, 196), (710, 246), (711, 255), (728, 249),
284 (729, 250), (730, 251), (731, 254), (732, 247),
285 (733, 253), (937, 189), (960, 185), (8211, 208),
286 (8212, 209), (8216, 212), (8217, 213), (8218, 226),
287 (8220, 210), (8221, 211), (8222, 227), (8224, 160),
288 (8225, 224), (8226, 165), (8230, 201), (8240, 228),
289 (8249, 220), (8250, 221), (8260, 218), (8364, 219),
290 (8482, 170), (8706, 182), (8710, 198), (8719, 184),
291 (8721, 183), (8730, 195), (8734, 176), (8747, 186),
292 (8776, 197), (8800, 173), (8804, 178), (8805, 179),
293 (9674, 215), (63743, 240), (64257, 222), (64258, 223),
294];
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299
300 #[test]
301 fn mac_roman() {
302 static INPUT: &str = "Joachim Müller-Lancé";
303 for c in INPUT.chars() {
304 let enc = MacRomanMapping.encode(c).unwrap();
305 assert_eq!(MacRomanMapping.decode(enc), c);
306 }
307 }
308
309 #[test]
310 fn lone_surrogate_at_end() {
311 let chars = CharIter {
312 data: &[0x09, 0x04, 0xD8, 0x00],
314 encoding: Encoding::Utf16Be,
315 pos: 0,
316 };
317 assert!(chars.eq(['ऄ', std::char::REPLACEMENT_CHARACTER].into_iter()))
318 }
319
320 #[test]
321 fn high_surrogate_followed_by_non_low() {
322 let rep = std::char::REPLACEMENT_CHARACTER;
323 let chars = CharIter {
326 data: &[0xD8, 0x00, 0x00, 0x41],
327 encoding: Encoding::Utf16Be,
328 pos: 0,
329 };
330 assert!(chars.eq([rep, 'A'].into_iter()));
331
332 let chars = CharIter {
335 data: &[0xD8, 0x00, 0xD8, 0x3D, 0xDE, 0x00],
336 encoding: Encoding::Utf16Be,
337 pos: 0,
338 };
339 assert!(chars.eq([rep, '😀'].into_iter()));
340 }
341}