Skip to main content

typst_library/text/font/
info.rs

1use std::fmt::{self, Debug, Formatter};
2
3use serde::{Deserialize, Serialize};
4use ttf_parser::{PlatformId, name_id};
5
6use super::find_exception;
7use crate::text::{
8    AxisValue, FontAxis, FontStretch, FontStyle, FontVariant, FontWeight, Tag,
9};
10
11/// Properties of a single font.
12#[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize)]
13pub struct FontInfo {
14    /// The typographic font family this font is part of.
15    pub family: String,
16    /// Properties that distinguish this font from other fonts in the same
17    /// family. For a variable font, this designates the default instance.
18    pub variant: FontVariant,
19    /// Properties of the font.
20    pub flags: FontFlags,
21    /// Variation axes this font supports.
22    #[serde(default, skip_serializing_if = "Vec::is_empty")]
23    pub axes: Vec<FontAxis>,
24    /// The unicode coverage of the font.
25    pub coverage: Coverage,
26}
27
28bitflags::bitflags! {
29    /// Bitflags describing characteristics of a font.
30    #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
31    #[derive(Serialize, Deserialize)]
32    #[serde(transparent)]
33    pub struct FontFlags: u32 {
34        /// All glyphs have the same width.
35        const MONOSPACE = 1 << 0;
36        /// Glyphs have short strokes at their stems.
37        const SERIF = 1 << 1;
38        /// Font face has a MATH table
39        const MATH = 1 << 2;
40        /// Font face has an fvar table
41        const VARIABLE = 1 << 3;
42    }
43}
44
45impl FontInfo {
46    /// Compute metadata for font at the `index` of the given data.
47    pub fn new(data: &[u8], index: u32) -> Option<Self> {
48        let ttf = ttf_parser::Face::parse(data, index).ok()?;
49        Self::from_ttf(&ttf)
50    }
51
52    /// Compute metadata for all fonts in the given data.
53    pub fn iter(data: &[u8]) -> impl Iterator<Item = FontInfo> + '_ {
54        let count = ttf_parser::fonts_in_collection(data).unwrap_or(1);
55        (0..count).filter_map(move |index| Self::new(data, index))
56    }
57
58    /// Compute metadata for a single ttf-parser face.
59    pub(super) fn from_ttf(ttf: &ttf_parser::Face) -> Option<Self> {
60        let ps_name = find_name(ttf, name_id::POST_SCRIPT_NAME);
61        let exception = ps_name.as_deref().and_then(find_exception);
62        // We cannot use Name ID 16 "Typographic Family", because for some
63        // fonts it groups together more than just Style / Weight / Stretch
64        // variants (e.g. Display variants of Noto fonts) and then some
65        // variants become inaccessible from Typst. And even though the
66        // fsSelection bit WWS should help us decide whether that is the
67        // case, it's wrong for some fonts (e.g. for certain variants of "Noto
68        // Sans Display").
69        //
70        // So, instead we use Name ID 1 "Family" and trim many common
71        // suffixes for which know that they just describe styling (e.g.
72        // "ExtraBold").
73        let family =
74            exception.and_then(|c| c.family.map(str::to_string)).or_else(|| {
75                let family = find_name(ttf, name_id::FAMILY)?;
76                Some(typographic_family(&family).to_string())
77            })?;
78
79        let variant = {
80            let style = exception.and_then(|c| c.style).unwrap_or_else(|| {
81                let mut full = find_name(ttf, name_id::FULL_NAME).unwrap_or_default();
82                full.make_ascii_lowercase();
83
84                // Some fonts miss the relevant bits for italic or oblique, so
85                // we also try to infer that from the full name.
86                //
87                // We do not use `ttf.is_italic()` because that also checks the
88                // italic angle which leads to false positives for some oblique
89                // fonts.
90                //
91                // See <https://github.com/typst/typst/issues/7479>.
92                let italic =
93                    ttf.style() == ttf_parser::Style::Italic || full.contains("italic");
94                let oblique = ttf.is_oblique()
95                    || full.contains("oblique")
96                    || full.contains("slanted");
97
98                match (italic, oblique) {
99                    (false, false) => FontStyle::Normal,
100                    (true, _) => FontStyle::Italic,
101                    (_, true) => FontStyle::Oblique,
102                }
103            });
104
105            let weight = exception.and_then(|c| c.weight).unwrap_or_else(|| {
106                let number = ttf.weight().to_number();
107                FontWeight::from_number(number)
108            });
109
110            let stretch = exception
111                .and_then(|c| c.stretch)
112                .unwrap_or_else(|| FontStretch::from_number(ttf.width().to_number()));
113
114            FontVariant { style, weight, stretch }
115        };
116
117        // Determine the unicode coverage.
118        let mut codepoints = vec![];
119        for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) {
120            if subtable.is_unicode() {
121                subtable.codepoints(|c| codepoints.push(c));
122            }
123        }
124
125        let mut flags = FontFlags::empty();
126        flags.set(FontFlags::MONOSPACE, ttf.is_monospaced());
127        flags.set(FontFlags::MATH, ttf.tables().math.is_some());
128        flags.set(FontFlags::VARIABLE, ttf.is_variable());
129
130        // Determine whether this is a serif or sans-serif font.
131        if let Some(panose) = ttf
132            .raw_face()
133            .table(ttf_parser::Tag::from_bytes(b"OS/2"))
134            .and_then(|os2| os2.get(32..45))
135            && matches!(panose, [2, 2..=10, ..])
136        {
137            flags.insert(FontFlags::SERIF);
138        }
139
140        let axes = ttf
141            .variation_axes()
142            .into_iter()
143            .map(|axis| FontAxis {
144                tag: Tag::from_bytes(&axis.tag.to_bytes()),
145                min: AxisValue(axis.min_value),
146                max: AxisValue(axis.max_value),
147                default: AxisValue(axis.def_value),
148            })
149            .collect();
150
151        Some(FontInfo {
152            family,
153            variant,
154            axes,
155            flags,
156            coverage: Coverage::from_vec(codepoints),
157        })
158    }
159
160    /// Whether this is the macOS LastResort font. It can yield tofus with
161    /// glyph ID != 0.
162    pub fn is_last_resort(&self) -> bool {
163        self.family == "LastResort"
164    }
165}
166
167/// Try to find and decode the name with the given id.
168pub(super) fn find_name(ttf: &ttf_parser::Face, name_id: u16) -> Option<String> {
169    ttf.names().into_iter().find_map(|entry| {
170        if entry.name_id == name_id {
171            if let Some(string) = entry.to_string() {
172                return Some(string);
173            }
174
175            if entry.platform_id == PlatformId::Macintosh && entry.encoding_id == 0 {
176                return Some(decode_mac_roman(entry.name));
177            }
178        }
179
180        None
181    })
182}
183
184/// Decode mac roman encoded bytes into a string.
185fn decode_mac_roman(coded: &[u8]) -> String {
186    #[rustfmt::skip]
187    const TABLE: [char; 128] = [
188        'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è',
189        'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü',
190        '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø',
191        '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø',
192        '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ',
193        '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl',
194        '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô',
195        '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ',
196    ];
197
198    fn char_from_mac_roman(code: u8) -> char {
199        if code < 128 { code as char } else { TABLE[(code - 128) as usize] }
200    }
201
202    coded.iter().copied().map(char_from_mac_roman).collect()
203}
204
205/// Trim style naming from a family name and fix bad names.
206fn typographic_family(mut family: &str) -> &str {
207    // Separators between names, modifiers and styles.
208    const SEPARATORS: [char; 3] = [' ', '-', '_'];
209
210    // Modifiers that can appear in combination with suffixes.
211    const MODIFIERS: &[&str] =
212        &["extra", "ext", "ex", "x", "semi", "sem", "sm", "demi", "dem", "ultra"];
213
214    // Style suffixes.
215    #[rustfmt::skip]
216    const SUFFIXES: &[&str] = &[
217        "normal", "italic", "oblique", "slanted",
218        "thin", "th", "hairline", "light", "lt", "regular", "medium", "med",
219        "md", "bold", "bd", "demi", "extb", "black", "blk", "bk", "heavy",
220        "narrow", "condensed", "cond", "cn", "cd", "compressed", "expanded", "exp",
221        "vf", "var", "variable",
222    ];
223
224    // Trim spacing and weird leading dots in Apple fonts.
225    family = family.trim().trim_start_matches('.');
226
227    // Lowercase the string so that the suffixes match case-insensitively.
228    let lower = family.to_ascii_lowercase();
229    let mut len = usize::MAX;
230    let mut trimmed = lower.as_str();
231
232    // Trim style suffixes repeatedly.
233    while trimmed.len() < len {
234        len = trimmed.len();
235
236        // Find style suffix.
237        let mut t = trimmed;
238        let mut shortened = false;
239        while let Some(s) = SUFFIXES.iter().find_map(|s| t.strip_suffix(s)) {
240            shortened = true;
241            t = s;
242        }
243
244        if !shortened {
245            break;
246        }
247
248        // Strip optional separator.
249        if let Some(s) = t.strip_suffix(SEPARATORS) {
250            trimmed = s;
251            t = s;
252        }
253
254        // Also allow an extra modifier, but apply it only if it is separated it
255        // from the text before it (to prevent false positives).
256        if let Some(t) = MODIFIERS.iter().find_map(|s| t.strip_suffix(s))
257            && let Some(stripped) = t.strip_suffix(SEPARATORS)
258        {
259            trimmed = stripped;
260        }
261    }
262
263    // Apply style suffix trimming.
264    family = &family[..len];
265
266    family
267}
268
269/// A compactly encoded set of codepoints.
270///
271/// The set is represented by alternating specifications of how many codepoints
272/// are not in the set and how many are in the set.
273///
274/// For example, for the set `{2, 3, 4, 9, 10, 11, 15, 18, 19}`, there are:
275/// - 2 codepoints not inside (0, 1)
276/// - 3 codepoints inside (2, 3, 4)
277/// - 4 codepoints not inside (5, 6, 7, 8)
278/// - 3 codepoints inside (9, 10, 11)
279/// - 3 codepoints not inside (12, 13, 14)
280/// - 1 codepoint inside (15)
281/// - 2 codepoints not inside (16, 17)
282/// - 2 codepoints inside (18, 19)
283///
284/// So the resulting encoding is `[2, 3, 4, 3, 3, 1, 2, 2]`.
285#[derive(Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
286#[serde(transparent)]
287pub struct Coverage(Vec<u32>);
288
289impl Coverage {
290    /// Encode a vector of codepoints.
291    pub fn from_vec(mut codepoints: Vec<u32>) -> Self {
292        codepoints.sort();
293        codepoints.dedup();
294
295        let mut runs = Vec::new();
296        let mut next = 0;
297
298        for c in codepoints {
299            if let Some(run) = runs.last_mut().filter(|_| c == next) {
300                *run += 1;
301            } else {
302                runs.push(c - next);
303                runs.push(1);
304            }
305
306            next = c + 1;
307        }
308
309        Self(runs)
310    }
311
312    /// Whether the codepoint is covered.
313    pub fn contains(&self, c: u32) -> bool {
314        let mut inside = false;
315        let mut cursor = 0;
316
317        for &run in &self.0 {
318            if (cursor..cursor + run).contains(&c) {
319                return inside;
320            }
321            cursor += run;
322            inside = !inside;
323        }
324
325        false
326    }
327
328    /// Iterate over all covered codepoints.
329    pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
330        let mut inside = false;
331        let mut cursor = 0;
332        self.0.iter().flat_map(move |run| {
333            let range = if inside { cursor..cursor + run } else { 0..0 };
334            inside = !inside;
335            cursor += run;
336            range
337        })
338    }
339}
340
341impl Debug for Coverage {
342    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
343        f.pad("Coverage(..)")
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn test_trim_styles() {
353        assert_eq!(typographic_family("Atma Light"), "Atma");
354        assert_eq!(typographic_family("eras bold"), "eras");
355        assert_eq!(typographic_family("footlight mt light"), "footlight mt");
356        assert_eq!(typographic_family("times new roman"), "times new roman");
357        assert_eq!(typographic_family("noto sans mono cond sembd"), "noto sans mono");
358        assert_eq!(typographic_family("noto serif SEMCOND sembd"), "noto serif");
359        assert_eq!(typographic_family("crimson text"), "crimson text");
360        assert_eq!(typographic_family("footlight light"), "footlight");
361        assert_eq!(typographic_family("Noto Sans"), "Noto Sans");
362        assert_eq!(typographic_family("Noto Sans Light"), "Noto Sans");
363        assert_eq!(typographic_family("Noto Sans Semicondensed Heavy"), "Noto Sans");
364        assert_eq!(typographic_family("Familx"), "Familx");
365        assert_eq!(typographic_family("Font Ultra"), "Font Ultra");
366        assert_eq!(typographic_family("Font Ultra Bold"), "Font");
367    }
368
369    #[test]
370    fn test_coverage() {
371        #[track_caller]
372        fn test(set: &[u32], runs: &[u32]) {
373            let coverage = Coverage::from_vec(set.to_vec());
374            assert_eq!(coverage.0, runs);
375
376            let max = 5 + set.iter().copied().max().unwrap_or_default();
377            for c in 0..max {
378                assert_eq!(set.contains(&c), coverage.contains(c));
379            }
380        }
381
382        test(&[], &[]);
383        test(&[0], &[0, 1]);
384        test(&[1], &[1, 1]);
385        test(&[0, 1], &[0, 2]);
386        test(&[0, 1, 3], &[0, 2, 1, 1]);
387        test(
388            // {2, 3, 4, 9, 10, 11, 15, 18, 19}
389            &[18, 19, 2, 4, 9, 11, 15, 3, 3, 10],
390            &[2, 3, 4, 3, 3, 1, 2, 2],
391        )
392    }
393
394    #[test]
395    fn test_coverage_iter() {
396        let codepoints = vec![2, 3, 7, 8, 9, 14, 15, 19, 21];
397        let coverage = Coverage::from_vec(codepoints.clone());
398        assert_eq!(coverage.iter().collect::<Vec<_>>(), codepoints);
399    }
400}