otl_normalizer/
glyph_names.rs

1//! Human readable names for glyphs
2//!
3//! Much of this code was originally part of runebender.
4
5use std::collections::{BTreeMap, HashMap};
6
7use fontdrasil::types::GlyphName;
8use write_fonts::read::{
9    FontRef, TableProvider,
10    tables::cmap::{CmapSubtable, EncodingRecord, PlatformId},
11    types::{GlyphId16, Tag},
12};
13
14use crate::error::Error;
15
16/// A map for gids to human-readable names
17#[derive(Clone, Debug, Default)]
18pub struct NameMap(pub(crate) BTreeMap<GlyphId16, GlyphName>);
19
20impl NameMap {
21    /// Create a new name mapping for the glyphs in the provided font
22    pub fn from_font(font: &FontRef) -> Result<NameMap, Error> {
23        let num_glyphs = font
24            .maxp()
25            .map_err(|_| Error::MissingTable(Tag::new(b"maxp")))?
26            .num_glyphs();
27        let reverse_cmap = reverse_cmap(font)?;
28        let post = font.post().ok();
29        let mut name_map = (1..num_glyphs)
30            .map(|gid| {
31                let gid = GlyphId16::new(gid);
32                // first check post, then do fallback
33                if let Some(name) = post
34                    .as_ref()
35                    .and_then(|post| post.glyph_name(gid).map(GlyphName::from))
36                {
37                    return (gid, name);
38                }
39                // fallback to unicode or gid
40                let name = match reverse_cmap.get(&gid).and_then(|cp| char::from_u32(*cp)) {
41                    Some(codepoint) => match glyph_name_for_char(codepoint) {
42                        Some(name) => name,
43                        // we have a codepoint but it doesn't have a name:
44                        None => {
45                            let raw = codepoint as u32;
46                            if raw <= 0xFFFF {
47                                smol_str::format_smolstr!("uni{raw:04X}")
48                            } else {
49                                smol_str::format_smolstr!("u{raw:X}")
50                            }
51                            .into()
52                        }
53                    },
54                    // we have no codepoint, just use glyph ID
55                    None => smol_str::format_smolstr!("glyph.{:05}", gid.to_u16()).into(),
56                };
57                (gid, name)
58            })
59            .collect::<BTreeMap<_, _>>();
60        name_map.insert(GlyphId16::NOTDEF, ".notdef".into());
61
62        Ok(NameMap(name_map))
63    }
64
65    /// Returns a human readable name for this gid.
66    ///
67    /// This will panic if the gid is not in the font used to create this map.
68    pub fn get(&self, gid: GlyphId16) -> &GlyphName {
69        // map contains a name for every gid in the font
70        self.0.get(&gid).unwrap()
71    }
72
73    #[allow(dead_code)]
74    pub(crate) fn iter(&self) -> impl Iterator<Item = &GlyphName> + '_ {
75        self.0.values()
76    }
77}
78
79fn reverse_cmap(font: &FontRef) -> Result<HashMap<GlyphId16, u32>, Error> {
80    // <https://github.com/fonttools/fonttools/blob/6fa1a76e061c2e84243d8cac/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L334>
81    fn is_unicode(record: &&EncodingRecord) -> bool {
82        record.platform_id() == PlatformId::Unicode
83            || record.platform_id() == PlatformId::Unicode
84                && [0, 1, 10].contains(&record.encoding_id())
85    }
86
87    let cmap = font
88        .cmap()
89        .map_err(|_| Error::MissingTable(Tag::new(b"cmap")))?;
90    let offset_data = cmap.offset_data();
91
92    let mut reverse_cmap = HashMap::new();
93
94    let mut add_to_map = |args: (u32, GlyphId16)| {
95        // because multiple glyphs may map to the same codepoint,
96        // we always use the lowest codepoint to determine the name.
97        let val = reverse_cmap.entry(args.1).or_insert(args.0);
98        *val = args.0.min(*val);
99    };
100
101    for subtable in cmap
102        .encoding_records()
103        .iter()
104        .filter(is_unicode)
105        .map(|rec| rec.subtable(offset_data).unwrap())
106    {
107        match subtable {
108            CmapSubtable::Format4(subtable) => subtable
109                .iter()
110                .map(|(unicode, gid)| (unicode, GlyphId16::try_from(gid).unwrap()))
111                .for_each(&mut add_to_map),
112            CmapSubtable::Format12(subtable) => subtable
113                .iter()
114                .map(|(unicode, gid)| (unicode, GlyphId16::try_from(gid).unwrap()))
115                .for_each(&mut add_to_map),
116            _ => (),
117        }
118    }
119
120    Ok(reverse_cmap)
121}
122
123impl FromIterator<GlyphName> for NameMap {
124    fn from_iter<T: IntoIterator<Item = GlyphName>>(iter: T) -> Self {
125        Self(
126            iter.into_iter()
127                .enumerate()
128                .map(|(i, name)| (GlyphId16::new(i as _), name))
129                .collect(),
130        )
131    }
132}
133
134/// Given a `char`, returns the postscript name for that `char`s glyph,
135/// if one exists in the aglfn.
136fn glyph_name_for_char(chr: char) -> Option<GlyphName> {
137    fontdrasil::agl::agl_name_for_char(chr).map(Into::into)
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn smoke_test() {
146        assert_eq!(glyph_name_for_char('c').unwrap(), "c");
147        assert_eq!(glyph_name_for_char('C').unwrap(), "C");
148
149        assert_eq!(glyph_name_for_char('é').unwrap(), "eacute");
150
151        assert_eq!(glyph_name_for_char('<').unwrap(), "less");
152        assert!(glyph_name_for_char('ء').is_none());
153        assert_eq!(glyph_name_for_char('!').unwrap(), "exclam");
154    }
155}