1use std::cmp::Reverse;
2use std::collections::BTreeMap;
3use std::fmt::{self, Debug, Formatter};
4
5use serde::{Deserialize, Serialize};
6use ttf_parser::{name_id, PlatformId, Tag};
7use unicode_segmentation::UnicodeSegmentation;
8
9use super::exceptions::find_exception;
10use crate::text::{Font, FontStretch, FontStyle, FontVariant, FontWeight};
11
12#[derive(Debug, Default, Clone, Hash)]
14pub struct FontBook {
15 families: BTreeMap<String, Vec<usize>>,
17 infos: Vec<FontInfo>,
19}
20
21impl FontBook {
22 pub fn new() -> Self {
24 Self { families: BTreeMap::new(), infos: vec![] }
25 }
26
27 pub fn from_infos(infos: impl IntoIterator<Item = FontInfo>) -> Self {
29 let mut book = Self::new();
30 for info in infos {
31 book.push(info);
32 }
33 book
34 }
35
36 pub fn from_fonts<'a>(fonts: impl IntoIterator<Item = &'a Font>) -> Self {
38 Self::from_infos(fonts.into_iter().map(|font| font.info().clone()))
39 }
40
41 pub fn push(&mut self, info: FontInfo) {
43 let index = self.infos.len();
44 let family = info.family.to_lowercase();
45 self.families.entry(family).or_default().push(index);
46 self.infos.push(info);
47 }
48
49 pub fn info(&self, index: usize) -> Option<&FontInfo> {
51 self.infos.get(index)
52 }
53
54 pub fn contains_family(&self, family: &str) -> bool {
56 self.families.contains_key(family)
57 }
58
59 pub fn families(
62 &self,
63 ) -> impl Iterator<Item = (&str, impl Iterator<Item = &FontInfo>)> + '_ {
64 self.families.values().map(|ids| {
67 let family = self.infos[ids[0]].family.as_str();
68 let infos = ids.iter().map(|&id| &self.infos[id]);
69 (family, infos)
70 })
71 }
72
73 pub fn select(&self, family: &str, variant: FontVariant) -> Option<usize> {
78 let ids = self.families.get(family)?;
79 self.find_best_variant(None, variant, ids.iter().copied())
80 }
81
82 pub fn select_family(&self, family: &str) -> impl Iterator<Item = usize> + '_ {
84 self.families
85 .get(family)
86 .map(|vec| vec.as_slice())
87 .unwrap_or_default()
88 .iter()
89 .copied()
90 }
91
92 pub fn select_fallback(
97 &self,
98 like: Option<&FontInfo>,
99 variant: FontVariant,
100 text: &str,
101 ) -> Option<usize> {
102 let c = text.chars().find(|c| !c.is_whitespace())?;
104 let ids = self
105 .infos
106 .iter()
107 .enumerate()
108 .filter(|(_, info)| info.coverage.contains(c as u32))
109 .map(|(index, _)| index);
110
111 self.find_best_variant(like, variant, ids)
113 }
114
115 fn find_best_variant(
137 &self,
138 like: Option<&FontInfo>,
139 variant: FontVariant,
140 ids: impl IntoIterator<Item = usize>,
141 ) -> Option<usize> {
142 let mut best = None;
143 let mut best_key = None;
144
145 for id in ids {
146 let current = &self.infos[id];
147 let key = (
148 like.map(|like| {
149 (
150 current.flags.contains(FontFlags::MONOSPACE)
151 != like.flags.contains(FontFlags::MONOSPACE),
152 current.flags.contains(FontFlags::SERIF)
153 != like.flags.contains(FontFlags::SERIF),
154 Reverse(shared_prefix_words(¤t.family, &like.family)),
155 current.family.len(),
156 )
157 }),
158 current.variant.style.distance(variant.style),
159 current.variant.stretch.distance(variant.stretch),
160 current.variant.weight.distance(variant.weight),
161 );
162
163 if best_key.map_or(true, |b| key < b) {
164 best = Some(id);
165 best_key = Some(key);
166 }
167 }
168
169 best
170 }
171}
172
173#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
175pub struct FontInfo {
176 pub family: String,
178 pub variant: FontVariant,
181 pub flags: FontFlags,
183 pub coverage: Coverage,
185}
186
187bitflags::bitflags! {
188 #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
190 #[derive(Serialize, Deserialize)]
191 #[serde(transparent)]
192 pub struct FontFlags: u32 {
193 const MONOSPACE = 1 << 0;
195 const SERIF = 1 << 1;
197 }
198}
199
200impl FontInfo {
201 pub fn new(data: &[u8], index: u32) -> Option<Self> {
203 let ttf = ttf_parser::Face::parse(data, index).ok()?;
204 Self::from_ttf(&ttf)
205 }
206
207 pub fn iter(data: &[u8]) -> impl Iterator<Item = FontInfo> + '_ {
209 let count = ttf_parser::fonts_in_collection(data).unwrap_or(1);
210 (0..count).filter_map(move |index| Self::new(data, index))
211 }
212
213 pub(super) fn from_ttf(ttf: &ttf_parser::Face) -> Option<Self> {
215 let ps_name = find_name(ttf, name_id::POST_SCRIPT_NAME);
216 let exception = ps_name.as_deref().and_then(find_exception);
217 let family =
229 exception.and_then(|c| c.family.map(str::to_string)).or_else(|| {
230 let family = find_name(ttf, name_id::FAMILY)?;
231 Some(typographic_family(&family).to_string())
232 })?;
233
234 let variant = {
235 let style = exception.and_then(|c| c.style).unwrap_or_else(|| {
236 let mut full = find_name(ttf, name_id::FULL_NAME).unwrap_or_default();
237 full.make_ascii_lowercase();
238
239 let italic = ttf.is_italic() || full.contains("italic");
242 let oblique = ttf.is_oblique()
243 || full.contains("oblique")
244 || full.contains("slanted");
245
246 match (italic, oblique) {
247 (false, false) => FontStyle::Normal,
248 (true, _) => FontStyle::Italic,
249 (_, true) => FontStyle::Oblique,
250 }
251 });
252
253 let weight = exception.and_then(|c| c.weight).unwrap_or_else(|| {
254 let number = ttf.weight().to_number();
255 FontWeight::from_number(number)
256 });
257
258 let stretch = exception
259 .and_then(|c| c.stretch)
260 .unwrap_or_else(|| FontStretch::from_number(ttf.width().to_number()));
261
262 FontVariant { style, weight, stretch }
263 };
264
265 let mut codepoints = vec![];
267 for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) {
268 if subtable.is_unicode() {
269 subtable.codepoints(|c| codepoints.push(c));
270 }
271 }
272
273 let mut flags = FontFlags::empty();
274 flags.set(FontFlags::MONOSPACE, ttf.is_monospaced());
275
276 if let Some(panose) = ttf
278 .raw_face()
279 .table(Tag::from_bytes(b"OS/2"))
280 .and_then(|os2| os2.get(32..45))
281 {
282 if matches!(panose, [2, 2..=10, ..]) {
283 flags.insert(FontFlags::SERIF);
284 }
285 }
286
287 Some(FontInfo {
288 family,
289 variant,
290 flags,
291 coverage: Coverage::from_vec(codepoints),
292 })
293 }
294
295 pub fn is_last_resort(&self) -> bool {
298 self.family == "LastResort"
299 }
300}
301
302pub(super) fn find_name(ttf: &ttf_parser::Face, name_id: u16) -> Option<String> {
304 ttf.names().into_iter().find_map(|entry| {
305 if entry.name_id == name_id {
306 if let Some(string) = entry.to_string() {
307 return Some(string);
308 }
309
310 if entry.platform_id == PlatformId::Macintosh && entry.encoding_id == 0 {
311 return Some(decode_mac_roman(entry.name));
312 }
313 }
314
315 None
316 })
317}
318
319fn decode_mac_roman(coded: &[u8]) -> String {
321 #[rustfmt::skip]
322 const TABLE: [char; 128] = [
323 'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è',
324 'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü',
325 '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø',
326 '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø',
327 '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ',
328 '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl',
329 '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô',
330 '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ',
331 ];
332
333 fn char_from_mac_roman(code: u8) -> char {
334 if code < 128 {
335 code as char
336 } else {
337 TABLE[(code - 128) as usize]
338 }
339 }
340
341 coded.iter().copied().map(char_from_mac_roman).collect()
342}
343
344fn typographic_family(mut family: &str) -> &str {
346 const SEPARATORS: [char; 3] = [' ', '-', '_'];
348
349 const MODIFIERS: &[&str] =
351 &["extra", "ext", "ex", "x", "semi", "sem", "sm", "demi", "dem", "ultra"];
352
353 #[rustfmt::skip]
355 const SUFFIXES: &[&str] = &[
356 "normal", "italic", "oblique", "slanted",
357 "thin", "th", "hairline", "light", "lt", "regular", "medium", "med",
358 "md", "bold", "bd", "demi", "extb", "black", "blk", "bk", "heavy",
359 "narrow", "condensed", "cond", "cn", "cd", "compressed", "expanded", "exp"
360 ];
361
362 family = family.trim().trim_start_matches('.');
364
365 let lower = family.to_ascii_lowercase();
367 let mut len = usize::MAX;
368 let mut trimmed = lower.as_str();
369
370 while trimmed.len() < len {
372 len = trimmed.len();
373
374 let mut t = trimmed;
376 let mut shortened = false;
377 while let Some(s) = SUFFIXES.iter().find_map(|s| t.strip_suffix(s)) {
378 shortened = true;
379 t = s;
380 }
381
382 if !shortened {
383 break;
384 }
385
386 if let Some(s) = t.strip_suffix(SEPARATORS) {
388 trimmed = s;
389 t = s;
390 }
391
392 if let Some(t) = MODIFIERS.iter().find_map(|s| t.strip_suffix(s)) {
395 if let Some(stripped) = t.strip_suffix(SEPARATORS) {
396 trimmed = stripped;
397 }
398 }
399 }
400
401 family = &family[..len];
403
404 family
405}
406
407fn shared_prefix_words(left: &str, right: &str) -> usize {
409 left.unicode_words()
410 .zip(right.unicode_words())
411 .take_while(|(l, r)| l == r)
412 .count()
413}
414
415#[derive(Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
432#[serde(transparent)]
433pub struct Coverage(Vec<u32>);
434
435impl Coverage {
436 pub fn from_vec(mut codepoints: Vec<u32>) -> Self {
438 codepoints.sort();
439 codepoints.dedup();
440
441 let mut runs = Vec::new();
442 let mut next = 0;
443
444 for c in codepoints {
445 if let Some(run) = runs.last_mut().filter(|_| c == next) {
446 *run += 1;
447 } else {
448 runs.push(c - next);
449 runs.push(1);
450 }
451
452 next = c + 1;
453 }
454
455 Self(runs)
456 }
457
458 pub fn contains(&self, c: u32) -> bool {
460 let mut inside = false;
461 let mut cursor = 0;
462
463 for &run in &self.0 {
464 if (cursor..cursor + run).contains(&c) {
465 return inside;
466 }
467 cursor += run;
468 inside = !inside;
469 }
470
471 false
472 }
473
474 pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
476 let mut inside = false;
477 let mut cursor = 0;
478 self.0.iter().flat_map(move |run| {
479 let range = if inside { cursor..cursor + run } else { 0..0 };
480 inside = !inside;
481 cursor += run;
482 range
483 })
484 }
485}
486
487impl Debug for Coverage {
488 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
489 f.pad("Coverage(..)")
490 }
491}
492
493#[cfg(test)]
494mod tests {
495 use super::*;
496
497 #[test]
498 fn test_trim_styles() {
499 assert_eq!(typographic_family("Atma Light"), "Atma");
500 assert_eq!(typographic_family("eras bold"), "eras");
501 assert_eq!(typographic_family("footlight mt light"), "footlight mt");
502 assert_eq!(typographic_family("times new roman"), "times new roman");
503 assert_eq!(typographic_family("noto sans mono cond sembd"), "noto sans mono");
504 assert_eq!(typographic_family("noto serif SEMCOND sembd"), "noto serif");
505 assert_eq!(typographic_family("crimson text"), "crimson text");
506 assert_eq!(typographic_family("footlight light"), "footlight");
507 assert_eq!(typographic_family("Noto Sans"), "Noto Sans");
508 assert_eq!(typographic_family("Noto Sans Light"), "Noto Sans");
509 assert_eq!(typographic_family("Noto Sans Semicondensed Heavy"), "Noto Sans");
510 assert_eq!(typographic_family("Familx"), "Familx");
511 assert_eq!(typographic_family("Font Ultra"), "Font Ultra");
512 assert_eq!(typographic_family("Font Ultra Bold"), "Font");
513 }
514
515 #[test]
516 fn test_coverage() {
517 #[track_caller]
518 fn test(set: &[u32], runs: &[u32]) {
519 let coverage = Coverage::from_vec(set.to_vec());
520 assert_eq!(coverage.0, runs);
521
522 let max = 5 + set.iter().copied().max().unwrap_or_default();
523 for c in 0..max {
524 assert_eq!(set.contains(&c), coverage.contains(c));
525 }
526 }
527
528 test(&[], &[]);
529 test(&[0], &[0, 1]);
530 test(&[1], &[1, 1]);
531 test(&[0, 1], &[0, 2]);
532 test(&[0, 1, 3], &[0, 2, 1, 1]);
533 test(
534 &[18, 19, 2, 4, 9, 11, 15, 3, 3, 10],
536 &[2, 3, 4, 3, 3, 1, 2, 2],
537 )
538 }
539
540 #[test]
541 fn test_coverage_iter() {
542 let codepoints = vec![2, 3, 7, 8, 9, 14, 15, 19, 21];
543 let coverage = Coverage::from_vec(codepoints.clone());
544 assert_eq!(coverage.iter().collect::<Vec<_>>(), codepoints);
545 }
546}