glyphs_reader/
glyphdata.rs

1//! determining glyph properties
2//!
3//! This module provides access to glyph info extracted from bundled
4//! (and potentially user-provided) data files.
5
6use quick_xml::{
7    events::{BytesStart, Event},
8    Reader,
9};
10use std::{
11    borrow::Cow,
12    collections::{BTreeSet, HashMap},
13    fmt::Display,
14    num::ParseIntError,
15    path::{Path, PathBuf},
16    str::FromStr,
17};
18
19use icu_properties::props::GeneralCategory;
20
21use smol_str::SmolStr;
22
23use crate::glyphdata_bundled::{self as bundled, find_pos_by_prod_name};
24
25/// The primary category for a given glyph
26///
27/// These categories are not the same as the unicode character categories.
28#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
29#[repr(u8)]
30pub enum Category {
31    Mark,
32    Space,
33    Separator,
34    Letter,
35    Number,
36    Symbol,
37    Punctuation,
38    Other,
39}
40
41/// The subcategory of a given glyph
42#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
43#[repr(u8)]
44pub enum Subcategory {
45    Spacing,
46    Radical,
47    Math,
48    Superscript,
49    Geometry,
50    Dash,
51    DecimalDigit,
52    Currency,
53    Fraction,
54    Halfform,
55    Small,
56    Number,
57    Quote,
58    Space,
59    Letter,
60    Jamo,
61    Format,
62    Parenthesis,
63    Matra,
64    Arrow,
65    Nonspacing,
66    Compatibility,
67    Syllable,
68    Ligature,
69    Modifier,
70    SpacingCombining,
71    Emoji,
72    Enclosing,
73    Composition,
74    Lowercase,
75    Uppercase,
76    Smallcaps,
77    Conjunct,
78    Other,
79}
80
81/// The script of a given glyph
82#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
83#[repr(u8)]
84pub enum Script {
85    Adlam,
86    Alchemical,
87    Arabic,
88    Armenian,
89    Avestan,
90    Balinese,
91    Bamum,
92    Batak,
93    Bengali,
94    BlackLetter,
95    Bopomofo,
96    Brahmi,
97    Braille,
98    Buginese,
99    Canadian,
100    Chakma,
101    Cham,
102    Cherokee,
103    Chorasmian,
104    Coptic,
105    Cyrillic,
106    Dentistry,
107    Deseret,
108    Devanagari,
109    Divesakuru,
110    Elbasan,
111    Elymaic,
112    Ethiopic,
113    Georgian,
114    Glagolitic,
115    Gothic,
116    Greek,
117    Gujarati,
118    Gurmukhi,
119    Han,
120    Hangul,
121    Hebrew,
122    Javanese,
123    Kana,
124    Kannada,
125    Kawi,
126    Kayahli,
127    Khmer,
128    Khojki,
129    Lao,
130    Latin,
131    Lepcha,
132    Lue,
133    Mahjong,
134    Malayalam,
135    Mandaic,
136    Math,
137    Mongolian,
138    Musical,
139    Myanmar,
140    Nko,
141    NyiakengPuachueHmong,
142    Ogham,
143    Oriya,
144    Osage,
145    Osmanya,
146    PahawhHmong,
147    PhaistosDisc,
148    Rovas,
149    Runic,
150    Samaritan,
151    Shavian,
152    Sinhala,
153    Syriac,
154    Tamil,
155    Telugu,
156    Thaana,
157    Thai,
158    Tham,
159    Tibet,
160    Tifinagh,
161    Vai,
162    Yezidi,
163    Yi,
164}
165
166/// Production name of a glyph.
167///
168/// Per [khaled](https://github.com/googlefonts/fontc/pull/1354#pullrequestreview-2707517748)
169/// the overwhelming majority follow simple patterns.
170///
171/// See also <https://github.com/adobe-type-tools/agl-specification?tab=readme-ov-file#2-the-mapping>
172#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
173pub enum ProductionName {
174    // uniHEX, e.g. uni004A
175    Bmp(u32),
176    // uHEX, e.g. uE007D
177    NonBmp(u32),
178    // I reject your patterns and choose my own
179    Custom(SmolStr),
180}
181
182impl From<&str> for ProductionName {
183    fn from(v: &str) -> ProductionName {
184        fn try_parse(
185            v: &str,
186            lbound: u32,
187            ubound: u32,
188            f: impl Fn(u32) -> ProductionName,
189        ) -> Option<ProductionName> {
190            if let Ok(v) = u32::from_str_radix(v, 16) {
191                if v >= lbound && v <= ubound {
192                    return Some(f(v));
193                }
194            }
195            None
196        }
197
198        match v {
199            _ if v.starts_with("uni") => try_parse(&v[3..], 0, 0xFFFF, ProductionName::Bmp),
200            _ if v.starts_with("u") => {
201                try_parse(&v[1..], 0xFFFF + 1, 0x10FFFF, ProductionName::NonBmp)
202            }
203            _ => None,
204        }
205        .unwrap_or_else(|| ProductionName::Custom(v.into()))
206    }
207}
208
209impl From<u32> for ProductionName {
210    fn from(v: u32) -> ProductionName {
211        if v <= 0xFFFF {
212            ProductionName::Bmp(v)
213        } else {
214            ProductionName::NonBmp(v)
215        }
216    }
217}
218
219impl Display for ProductionName {
220    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
221        match self {
222            ProductionName::Bmp(cp) => write!(f, "uni{cp:04X}"),
223            ProductionName::NonBmp(cp) => write!(f, "u{cp:X}"),
224            ProductionName::Custom(s) => write!(f, "{s}"),
225        }
226    }
227}
228
229impl From<ProductionName> for SmolStr {
230    fn from(v: ProductionName) -> SmolStr {
231        match v {
232            ProductionName::Bmp(cp) => smol_str::format_smolstr!("uni{cp:04X}"),
233            ProductionName::NonBmp(cp) => smol_str::format_smolstr!("u{cp:X}"),
234            ProductionName::Custom(s) => s,
235        }
236    }
237}
238
239/// A queryable set of glyph data
240///
241/// Always queries static data from glyphsLib. Optionally includes a set of override values as well.
242///
243/// Default/no overrides instances are cheap. Instances created with overrides are more expensive.
244#[derive(Default)]
245pub struct GlyphData {
246    // override-names are preferred to names in data
247    overrides: Option<HashMap<SmolStr, QueryResult>>,
248    overrrides_by_codepoint: Option<HashMap<u32, SmolStr>>,
249}
250
251impl GlyphData {
252    /// Overrides, if provided, explicitly assign the result for a given query
253    pub(crate) fn new(overrides: Option<HashMap<SmolStr, QueryResult>>) -> Self {
254        let overrrides_by_codepoint = overrides.as_ref().map(|overrides| {
255            overrides
256                .iter()
257                .filter_map(|(k, v)| v.codepoint.map(|cp| (cp, k.clone())))
258                .collect()
259        });
260        Self {
261            overrides,
262            overrrides_by_codepoint,
263        }
264    }
265
266    /// Create a new data set with user provided overrides
267    pub fn with_override_file(override_file: &Path) -> Result<Self, GlyphDataError> {
268        let bytes = std::fs::read(override_file).map_err(|err| GlyphDataError::UserFile {
269            path: override_file.to_owned(),
270            reason: err.kind(),
271        })?;
272        let overrides = parse_entries(&bytes)?;
273        Ok(GlyphData::new(Some(overrides)))
274    }
275}
276
277/// The category and subcategory to use
278///
279/// Used for overrides and as the result of [`GlyphData::query`]
280#[derive(Debug, Clone, PartialEq)]
281pub struct QueryResult {
282    pub category: Category,
283    pub subcategory: Option<Subcategory>,
284    pub codepoint: Option<u32>,
285    pub script: Option<Script>,
286    pub production_name: Option<ProductionName>,
287}
288
289#[derive(Clone, Debug, thiserror::Error)]
290pub enum GlyphDataError {
291    #[error("Couldn't read user file at '{path}': '{reason}'")]
292    UserFile {
293        path: PathBuf,
294        reason: std::io::ErrorKind,
295    },
296    #[error("Error parsing XML: '{0}'")]
297    ReaderError(#[from] quick_xml::Error),
298    #[error("Error parsing XML attribute: '{0}'")]
299    XmlAttributeError(#[from] quick_xml::events::attributes::AttrError),
300    #[error("Unknown category '{0}'")]
301    InvalidCategory(SmolStr),
302    #[error("Unknown subcategory '{0}'")]
303    InvalidSubcategory(SmolStr),
304    #[error("Unknown script '{0}'")]
305    InvalidScript(SmolStr),
306    #[error("the XML input did not start with a <glyphdata> tag")]
307    WrongFirstElement,
308    #[error("Missing required attribute '{missing}' in '{attributes}'")]
309    MissingRequiredAttribute {
310        attributes: String,
311        missing: &'static str,
312    },
313    #[error("Invalid unicode value '{raw}': '{inner}'")]
314    InvalidUnicode { raw: String, inner: ParseIntError },
315    #[error("Unexpected attribute '{0}'")]
316    UnknownAttribute(String),
317}
318
319impl GlyphDataError {
320    // a little helper here makes our parsing code cleaner
321    fn missing_attr(name: &'static str, raw_attrs: &[u8]) -> Self {
322        let attributes = String::from_utf8_lossy(raw_attrs).into_owned();
323        Self::MissingRequiredAttribute {
324            attributes,
325            missing: name,
326        }
327    }
328}
329
330/// Parse glyph info entries out of a GlyphData xml file.
331pub(crate) fn parse_entries(xml: &[u8]) -> Result<HashMap<SmolStr, QueryResult>, GlyphDataError> {
332    fn check_and_advance_past_preamble(reader: &mut Reader<&[u8]>) -> Result<(), GlyphDataError> {
333        loop {
334            let event = reader.read_event()?;
335            match event {
336                Event::Comment(_) => (),
337                Event::Decl(_) => (),
338                Event::DocType(_) => (),
339                Event::Start(start) if start.name().as_ref() == b"glyphData" => return Ok(()),
340                _other => {
341                    return Err(GlyphDataError::WrongFirstElement);
342                }
343            }
344        }
345    }
346
347    let mut reader = Reader::from_reader(xml);
348    reader.config_mut().trim_text(true);
349
350    check_and_advance_past_preamble(&mut reader)?;
351
352    let mut by_name = HashMap::new();
353    let mut alt_names = Vec::new();
354    for result in
355        iter_rows(&mut reader).map(|row| row.map_err(Into::into).and_then(parse_glyph_xml))
356    {
357        let info = result?;
358        by_name.insert(
359            info.name.clone(),
360            QueryResult {
361                category: info.category,
362                subcategory: info.subcategory,
363                codepoint: info.codepoint,
364                script: info.script,
365                production_name: info.production_name.clone(),
366            },
367        );
368        for alt in info.alt_names {
369            alt_names.push((
370                alt,
371                QueryResult {
372                    category: info.category,
373                    subcategory: info.subcategory,
374                    codepoint: None,
375                    script: info.script,
376                    production_name: info.production_name.clone(),
377                },
378            ));
379        }
380    }
381
382    // apply alts after to ensure they can't steal "real" names
383    for (name, value) in alt_names {
384        by_name.entry(name).or_insert(value);
385    }
386
387    Ok(by_name)
388}
389
390fn iter_rows<'a, 'b: 'a>(
391    reader: &'b mut Reader<&'a [u8]>,
392) -> impl Iterator<Item = Result<BytesStart<'a>, quick_xml::Error>> + 'a {
393    std::iter::from_fn(|| match reader.read_event() {
394        Err(e) => Some(Err(e)),
395        Ok(Event::Empty(start)) => Some(Ok(start)),
396        _ => None,
397    })
398}
399
400struct GlyphInfoFromXml {
401    name: SmolStr,
402    alt_names: Vec<SmolStr>,
403    category: Category,
404    subcategory: Option<Subcategory>,
405    codepoint: Option<u32>,
406    script: Option<Script>,
407    production_name: Option<ProductionName>,
408}
409
410fn parse_glyph_xml(item: BytesStart) -> Result<GlyphInfoFromXml, GlyphDataError> {
411    let mut name = None;
412    let mut category = None;
413    let mut subcategory = None;
414    let mut unicode = None;
415    let mut alt_names = None;
416    let mut script = None;
417    let mut production_name = None;
418
419    for attr in item.attributes() {
420        let attr = attr?;
421        let value = attr.unescape_value()?;
422        match attr.key.as_ref() {
423            b"name" => name = Some(value),
424            b"category" => category = Some(value),
425            b"subCategory" => subcategory = Some(value),
426            b"unicode" => unicode = Some(value),
427            b"altNames" => alt_names = Some(value),
428            b"script" => script = Some(value),
429            b"production" => production_name = Some(value.as_ref().into()),
430            b"unicodeLegacy" | b"case" | b"direction" | b"description" => (),
431            other => {
432                return Err(GlyphDataError::UnknownAttribute(
433                    String::from_utf8_lossy(other).into_owned(),
434                ))
435            }
436        }
437    }
438
439    // now we've found some values, let's finalize them
440    let name = name
441        .map(SmolStr::new)
442        .ok_or_else(|| GlyphDataError::missing_attr("name", item.attributes_raw()))?;
443    let category = category
444        .ok_or_else(|| GlyphDataError::missing_attr("category", item.attributes_raw()))
445        .and_then(|cat| {
446            Category::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidCategory)
447        })?;
448    let subcategory = subcategory
449        .map(|cat| Subcategory::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidSubcategory))
450        .transpose()?;
451    let script = script
452        .map(|cat| Script::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidScript))
453        .transpose()?;
454    let codepoint = unicode
455        .map(|s| {
456            u32::from_str_radix(&s, 16).map_err(|inner| GlyphDataError::InvalidUnicode {
457                raw: s.into_owned(),
458                inner,
459            })
460        })
461        .transpose()?;
462    let alt_names = alt_names
463        .map(|names| {
464            names
465                .as_ref()
466                .split(',')
467                .map(|name| SmolStr::from(name.trim()))
468                .collect()
469        })
470        .unwrap_or_default();
471
472    Ok(GlyphInfoFromXml {
473        name,
474        alt_names,
475        category,
476        subcategory,
477        codepoint,
478        script,
479        production_name,
480    })
481}
482
483impl GlyphData {
484    /// Get the info for the given name/codepoints, attempting to synthesize it if necessary
485    ///
486    /// Returns, from most to least preferred:
487    ///
488    /// 1. The matching override value
489    /// 1. The matching value from bundled data
490    /// 1. A computed value based on name heuristics
491    ///
492    // See https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L94
493    pub fn query(&self, name: &str, codepoints: Option<&BTreeSet<u32>>) -> Option<QueryResult> {
494        self.query_no_synthesis(name, codepoints)
495            // we don't have info for this glyph: can we synthesize it?
496            .or_else(|| self.construct_result(name))
497    }
498
499    /// As [`Self::query`] but without a fallback to computed values.
500    ///
501    /// Exists to enable result synthesis to query.
502    fn query_no_synthesis(
503        &self,
504        name: &str,
505        codepoints: Option<&BTreeSet<u32>>,
506    ) -> Option<QueryResult> {
507        // Override?
508        if let (Some(overrides), Some(overrides_by_codepoint)) = (
509            self.overrides.as_ref(),
510            self.overrrides_by_codepoint.as_ref(),
511        ) {
512            let override_result = overrides.get(name).or_else(|| {
513                codepoints
514                    .into_iter()
515                    .flat_map(|cps| cps.iter())
516                    .find_map(|cp: &u32| {
517                        overrides_by_codepoint
518                            .get(cp)
519                            .and_then(|n| overrides.get(n))
520                    })
521            });
522            if let Some(override_result) = override_result {
523                return Some(QueryResult {
524                    category: override_result.category,
525                    subcategory: override_result.subcategory,
526                    codepoint: override_result.codepoint,
527                    script: override_result.script,
528                    production_name: override_result.production_name.clone(),
529                });
530            }
531        }
532
533        // No override, perhaps we have a direct answer?
534        bundled::find_pos_by_name(name)
535            .or_else(|| {
536                codepoints
537                    .into_iter()
538                    .flat_map(|cps| cps.iter())
539                    .find_map(|cp| bundled::find_pos_by_codepoint(*cp))
540            })
541            .or_else(|| find_pos_by_prod_name(name.into()))
542            .map(|i| {
543                bundled::get(i).unwrap_or_else(|| panic!("We found invalid index {i} somehow"))
544            })
545    }
546
547    fn contains_name(&self, name: &str) -> bool {
548        if let Some(overrides) = self.overrides.as_ref() {
549            let name: SmolStr = name.into();
550            if overrides.contains_key(&name) {
551                return true;
552            }
553        }
554        bundled::find_pos_by_name(name).is_some()
555    }
556
557    fn construct_result(&self, name: &str) -> Option<QueryResult> {
558        let category_subcategory = self.construct_category(name);
559        let production_name = self.construct_production_name(name);
560        if category_subcategory.is_none() && production_name.is_none() {
561            return None;
562        }
563        // if we have a production name but no category, 'Other' is good enough
564        let (category, subcategory) = category_subcategory.unwrap_or((Category::Other, None));
565        Some(QueryResult {
566            category,
567            subcategory,
568            codepoint: None,
569            script: None,
570            production_name,
571        })
572    }
573
574    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L199
575    fn construct_category(&self, name: &str) -> Option<(Category, Option<Subcategory>)> {
576        // in glyphs.app '_' prefix means "no export"
577        if name.starts_with('_') {
578            return None;
579        }
580        let (base_name, _) = self.split_glyph_suffix(name);
581        if let Some(result) = self.query_no_synthesis(base_name, None) {
582            return Some((result.category, result.subcategory));
583        }
584
585        if let Some(base_names) = self.split_ligature_glyph_name(base_name) {
586            let base_names_attributes: Vec<_> = base_names
587                .iter()
588                .filter_map(|name| self.query_no_synthesis(name, None))
589                .collect();
590            if let Some(first_attr) = base_names_attributes.first() {
591                // if first is mark, we're a mark
592                if first_attr.category == Category::Mark {
593                    return Some((Category::Mark, first_attr.subcategory));
594                } else if first_attr.category == Category::Letter {
595                    // if first is letter and rest are marks/separators, we use info from first
596                    if base_names_attributes
597                        .iter()
598                        .skip(1)
599                        .map(|result| result.category)
600                        .all(|cat| matches!(cat, Category::Mark | Category::Separator))
601                    {
602                        return Some((first_attr.category, first_attr.subcategory));
603                    } else {
604                        return Some((Category::Letter, Some(Subcategory::Ligature)));
605                    }
606                }
607            }
608        };
609
610        // finally fall back to checking the AGLFN for the base name:
611        Self::construct_category_via_agl(base_name)
612    }
613
614    // https://github.com/googlefonts/glyphsLib/blob/c4db6b981d5/Lib/glyphsLib/glyphdata.py#L351
615    fn construct_production_name(&self, name: &str) -> Option<ProductionName> {
616        fn append_suffix(base_name: &mut String, suffix: Option<&str>) {
617            if let Some(suffix) = suffix {
618                base_name.push('.');
619                base_name.push_str(suffix);
620            }
621        }
622
623        fn is_u_name(name: &str) -> bool {
624            name.starts_with("u") && name[1..].bytes().all(|b| b.is_ascii_hexdigit())
625        }
626
627        let (base_name, suffix) = self.split_glyph_suffix(name);
628
629        // if we have a production name for the base name, append the suffix and go home
630        let prod_name_with_suffix = suffix.and_then(|_| {
631            self.query_no_synthesis(base_name, None)
632                .and_then(|result| result.production_name)
633                .map(|base_prod_name| {
634                    let mut prod_name = base_prod_name.to_string();
635                    append_suffix(&mut prod_name, suffix);
636                    prod_name.as_str().into()
637                })
638        });
639        if prod_name_with_suffix.is_some() {
640            return prod_name_with_suffix;
641        }
642
643        let base_names = self
644            .split_ligature_glyph_name(base_name)
645            .unwrap_or_else(|| vec![base_name.into()]);
646        // Attempt to find a production name for each ligature component (or the whole base name).
647        // Return early if any such names have no GlyphData entry
648        // OR the entry doesn't specify a production name AND they aren't already AGLFN names...
649        let prod_names: Vec<SmolStr> = base_names
650            .into_iter()
651            .map(|name| {
652                self.query_no_synthesis(&name, None).and_then(|result| {
653                    result.production_name.map(Into::into).or_else(|| {
654                        // if no production name, return the name itself if already in AGLFN
655                        fontdrasil::agl::char_for_agl_name(name.as_ref()).map(|_| name)
656                    })
657                })
658            })
659            .collect::<Option<_>>()?;
660
661        // only (uniXXXX, uniYYYY, etc.) names with 4 hex digits can be concatenated using the
662        // more compact format uniXXXXYYYY... uXXXXX names for characters beyond BMP are joined
663        // in ligatures using the usual '_'.
664        let any_characters_outside_bmp = prod_names
665            .iter()
666            .any(|name| name.len() > 5 && is_u_name(name.as_ref()));
667        let any_uni_names = prod_names.iter().any(|name| name.starts_with("uni"));
668
669        if !any_characters_outside_bmp && any_uni_names {
670            let mut uni_names: Vec<Cow<str>> = Vec::new();
671            for part in &prod_names {
672                if let Some(stripped) = part.strip_prefix("uni") {
673                    uni_names.push(Cow::Borrowed(stripped));
674                } else if part.len() == 5 && is_u_name(part.as_ref()) {
675                    uni_names.push(Cow::Borrowed(&part.as_ref()[1..]));
676                } else if let Some(ch) = fontdrasil::agl::char_for_agl_name(part.as_ref()) {
677                    uni_names.push(Cow::Owned(format!("{:04X}", ch as u32)));
678                } else {
679                    panic!("Unexpected part while constructing production name: {part}");
680                }
681            }
682            let mut result = String::from("uni");
683            for segment in uni_names {
684                result.push_str(segment.as_ref());
685            }
686            append_suffix(&mut result, suffix);
687            return Some(result.as_str().into());
688        }
689
690        let mut result = prod_names.join("_");
691        append_suffix(&mut result, suffix);
692        Some(result.as_str().into())
693    }
694
695    // this doesn't need a &self param, but we want it locally close to the
696    // code that calls it, so we'll make it a type method :shrug:
697    fn construct_category_via_agl(base_name: &str) -> Option<(Category, Option<Subcategory>)> {
698        if let Some(first_char) = fontdrasil::agl::glyph_name_to_unicode(base_name)
699            .chars()
700            .next()
701        {
702            let (category, subcategory) = category_from_icu(first_char);
703
704            // Exception: Something like "one_two" should be a (_, Ligature),
705            // "acutecomb_brevecomb" should however stay (Mark, Nonspacing).
706            if base_name.contains('_') && category != Category::Mark {
707                return Some((category, Some(Subcategory::Ligature)));
708            } else {
709                return Some((category, subcategory));
710            }
711        }
712        None
713    }
714
715    fn split_glyph_suffix<'n>(&self, name: &'n str) -> (&'n str, Option<&'n str>) {
716        let multi_suffix = name.bytes().filter(|b| *b == b'.').count() > 1;
717        if multi_suffix {
718            // with multiple suffixes, try adding them one at a time and seeing if
719            // we find a known name.
720            // basically: for 'char.bottom.alt' we want to return (char.bottom, alt)
721            // if
722            for idx in name
723                .bytes()
724                .enumerate()
725                .filter_map(|(i, b)| (b == b'.').then_some(i))
726                .skip(1)
727            {
728                let (base, suffix) = name.split_at(idx);
729                if self.contains_name(base) {
730                    // suffix starts with '.' so we strip it to match split_once below
731                    return (base, Some(&suffix[1..]));
732                }
733            }
734        }
735        // finally just split at the first dot, or the whole name if no suffix
736        name.split_once('.')
737            .map_or_else(|| (name, None), |(base, suffix)| (base, Some(suffix)))
738    }
739
740    /// Split a ligature glyph into component parts
741    ///
742    /// Returns `None` if the name does not contain the '_' character, otherwise
743    /// returns a list of names of components, derived from the glyph name.
744    /// See
745    /// <https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L307>
746    fn split_ligature_glyph_name(&self, name: &str) -> Option<Vec<SmolStr>> {
747        // if last part has a script suffix, grab it
748        let script_suffix = name.rsplit_once('_')?.1.rsplit_once('-').map(|(_, x)| x);
749
750        let mut parts: Vec<_> = name
751            .trim_end_matches(script_suffix.unwrap_or_default())
752            // after trimming script we also need to trim the '-'!
753            .trim_end_matches('-')
754            .split('_')
755            .map(SmolStr::new)
756            .collect();
757
758        let script = match script_suffix {
759            // if there was no suffix, we're done
760            None => return Some(parts),
761            Some(script) => script,
762        };
763
764        // otherwise we try adding the script suffix to each part, and see if
765        // that's a known glyph name:
766        for part in parts.iter_mut() {
767            // if the part already has a script, continue
768            if part.contains('-') {
769                continue;
770            }
771
772            let new_part = smol_str::format_smolstr!("{part}-{script}");
773            // if non-suffixed exists but suffixed doesn't, keep non-suffixed
774            if self.contains_name(part.as_ref()) && !self.contains_name(&new_part) {
775                continue;
776            }
777            *part = new_part;
778        }
779        Some(parts)
780    }
781}
782
783// https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L261
784fn category_from_icu(c: char) -> (Category, Option<Subcategory>) {
785    match icu_properties::CodePointMapData::<GeneralCategory>::new().get(c) {
786        GeneralCategory::Unassigned | GeneralCategory::OtherSymbol => (Category::Symbol, None),
787        GeneralCategory::UppercaseLetter
788        | GeneralCategory::LowercaseLetter
789        | GeneralCategory::TitlecaseLetter
790        | GeneralCategory::OtherLetter => (Category::Letter, None),
791        GeneralCategory::ModifierLetter => (Category::Letter, Some(Subcategory::Modifier)),
792        GeneralCategory::NonspacingMark => (Category::Mark, Some(Subcategory::Nonspacing)),
793        GeneralCategory::SpacingMark => (Category::Mark, Some(Subcategory::SpacingCombining)),
794        GeneralCategory::EnclosingMark => (Category::Mark, Some(Subcategory::Enclosing)),
795        GeneralCategory::DecimalNumber | GeneralCategory::OtherNumber => {
796            (Category::Number, Some(Subcategory::DecimalDigit))
797        }
798        GeneralCategory::LetterNumber => (Category::Number, None),
799        GeneralCategory::SpaceSeparator => (Category::Separator, Some(Subcategory::Space)),
800        GeneralCategory::LineSeparator
801        | GeneralCategory::ParagraphSeparator
802        | GeneralCategory::Control => (Category::Separator, None),
803        GeneralCategory::Format => (Category::Separator, Some(Subcategory::Format)),
804        GeneralCategory::PrivateUse => (Category::Letter, Some(Subcategory::Compatibility)),
805        GeneralCategory::DashPunctuation => (Category::Punctuation, Some(Subcategory::Dash)),
806        GeneralCategory::OpenPunctuation | GeneralCategory::ClosePunctuation => {
807            (Category::Punctuation, Some(Subcategory::Parenthesis))
808        }
809        GeneralCategory::ConnectorPunctuation | GeneralCategory::OtherPunctuation => {
810            (Category::Punctuation, None)
811        }
812        GeneralCategory::InitialPunctuation | GeneralCategory::FinalPunctuation => {
813            (Category::Punctuation, Some(Subcategory::Quote))
814        }
815        GeneralCategory::MathSymbol => (Category::Symbol, Some(Subcategory::Math)),
816        GeneralCategory::CurrencySymbol => (Category::Symbol, Some(Subcategory::Currency)),
817        GeneralCategory::ModifierSymbol => (Category::Mark, Some(Subcategory::Spacing)),
818        GeneralCategory::Surrogate => unreachable!("char cannot represent surrogate code points"),
819    }
820}
821
822impl FromStr for Category {
823    type Err = SmolStr;
824
825    fn from_str(s: &str) -> Result<Self, Self::Err> {
826        match s {
827            "Mark" => Ok(Self::Mark),
828            "Space" => Ok(Self::Space),
829            "Separator" => Ok(Self::Separator),
830            "Letter" => Ok(Self::Letter),
831            "Number" => Ok(Self::Number),
832            "Symbol" => Ok(Self::Symbol),
833            "Punctuation" => Ok(Self::Punctuation),
834            "Other" => Ok(Self::Other),
835            _ => Err(s.into()),
836        }
837    }
838}
839
840impl FromStr for Subcategory {
841    type Err = SmolStr;
842
843    fn from_str(s: &str) -> Result<Self, Self::Err> {
844        match s {
845            "Spacing" => Ok(Self::Spacing),
846            "Radical" => Ok(Self::Radical),
847            "Math" => Ok(Self::Math),
848            "Superscript" => Ok(Self::Superscript),
849            "Geometry" => Ok(Self::Geometry),
850            "Dash" => Ok(Self::Dash),
851            "Decimal Digit" => Ok(Self::DecimalDigit),
852            "Currency" => Ok(Self::Currency),
853            "Fraction" => Ok(Self::Fraction),
854            "Halfform" => Ok(Self::Halfform),
855            "Small" => Ok(Self::Small),
856            "Number" => Ok(Self::Number),
857            "Quote" => Ok(Self::Quote),
858            "Space" => Ok(Self::Space),
859            "Letter" => Ok(Self::Letter),
860            "Jamo" => Ok(Self::Jamo),
861            "Format" => Ok(Self::Format),
862            "Parenthesis" => Ok(Self::Parenthesis),
863            "Matra" => Ok(Self::Matra),
864            "Arrow" => Ok(Self::Arrow),
865            "Nonspacing" => Ok(Self::Nonspacing),
866            "Compatibility" => Ok(Self::Compatibility),
867            "Syllable" => Ok(Self::Syllable),
868            "Ligature" => Ok(Self::Ligature),
869            "Modifier" => Ok(Self::Modifier),
870            "Spacing Combining" => Ok(Self::SpacingCombining),
871            "Emoji" => Ok(Self::Emoji),
872            "Enclosing" => Ok(Self::Enclosing),
873            "Composition" => Ok(Self::Composition),
874            "Other" => Ok(Self::Other),
875            "Lowercase" => Ok(Self::Lowercase),
876            "Uppercase" => Ok(Self::Uppercase),
877            "Smallcaps" => Ok(Self::Smallcaps),
878            "Conjunct" => Ok(Self::Conjunct),
879
880            _ => Err(s.into()),
881        }
882    }
883}
884
885impl FromStr for Script {
886    type Err = SmolStr;
887
888    fn from_str(s: &str) -> Result<Self, Self::Err> {
889        match s {
890            "adlam" => Ok(Self::Adlam),
891            "alchemical" => Ok(Self::Alchemical),
892            "arabic" => Ok(Self::Arabic),
893            "armenian" => Ok(Self::Armenian),
894            "avestan" => Ok(Self::Avestan),
895            "balinese" => Ok(Self::Balinese),
896            "bamum" => Ok(Self::Bamum),
897            "batak" => Ok(Self::Batak),
898            "bengali" => Ok(Self::Bengali),
899            "blackLetter" => Ok(Self::BlackLetter),
900            "bopomofo" => Ok(Self::Bopomofo),
901            "brahmi" => Ok(Self::Brahmi),
902            "braille" => Ok(Self::Braille),
903            "buginese" => Ok(Self::Buginese),
904            "canadian" => Ok(Self::Canadian),
905            "chakma" => Ok(Self::Chakma),
906            "cham" => Ok(Self::Cham),
907            "cherokee" => Ok(Self::Cherokee),
908            "chorasmian" => Ok(Self::Chorasmian),
909            "coptic" => Ok(Self::Coptic),
910            "cyrillic" => Ok(Self::Cyrillic),
911            "dentistry" => Ok(Self::Dentistry),
912            "deseret" => Ok(Self::Deseret),
913            "devanagari" => Ok(Self::Devanagari),
914            "divesakuru" => Ok(Self::Divesakuru),
915            "elbasan" => Ok(Self::Elbasan),
916            "elymaic" => Ok(Self::Elymaic),
917            "ethiopic" => Ok(Self::Ethiopic),
918            "georgian" => Ok(Self::Georgian),
919            "glagolitic" => Ok(Self::Glagolitic),
920            "gothic" => Ok(Self::Gothic),
921            "greek" => Ok(Self::Greek),
922            "gujarati" => Ok(Self::Gujarati),
923            "gurmukhi" => Ok(Self::Gurmukhi),
924            "han" => Ok(Self::Han),
925            "hangul" => Ok(Self::Hangul),
926            "hebrew" => Ok(Self::Hebrew),
927            "javanese" => Ok(Self::Javanese),
928            "kana" => Ok(Self::Kana),
929            "kannada" => Ok(Self::Kannada),
930            "kawi" => Ok(Self::Kawi),
931            "kayahli" => Ok(Self::Kayahli),
932            "khmer" => Ok(Self::Khmer),
933            "khojki" => Ok(Self::Khojki),
934            "lao" => Ok(Self::Lao),
935            "latin" => Ok(Self::Latin),
936            "lepcha" => Ok(Self::Lepcha),
937            "lue" => Ok(Self::Lue),
938            "mahjong" => Ok(Self::Mahjong),
939            "malayalam" => Ok(Self::Malayalam),
940            "mandaic" => Ok(Self::Mandaic),
941            "math" => Ok(Self::Math),
942            "mongolian" => Ok(Self::Mongolian),
943            "musical" => Ok(Self::Musical),
944            "myanmar" => Ok(Self::Myanmar),
945            "nko" => Ok(Self::Nko),
946            "nyiakeng puachue hmong" => Ok(Self::NyiakengPuachueHmong),
947            "ogham" => Ok(Self::Ogham),
948            "oriya" => Ok(Self::Oriya),
949            "osage" => Ok(Self::Osage),
950            "osmanya" => Ok(Self::Osmanya),
951            "pahawh hmong" => Ok(Self::PahawhHmong),
952            "phaistosDisc" => Ok(Self::PhaistosDisc),
953            "rovas" => Ok(Self::Rovas),
954            "runic" => Ok(Self::Runic),
955            "samaritan" => Ok(Self::Samaritan),
956            "shavian" => Ok(Self::Shavian),
957            "sinhala" => Ok(Self::Sinhala),
958            "syriac" => Ok(Self::Syriac),
959            "tamil" => Ok(Self::Tamil),
960            "telugu" => Ok(Self::Telugu),
961            "thaana" => Ok(Self::Thaana),
962            "thai" => Ok(Self::Thai),
963            "tham" => Ok(Self::Tham),
964            "tibet" => Ok(Self::Tibet),
965            "tifinagh" => Ok(Self::Tifinagh),
966            "vai" => Ok(Self::Vai),
967            "yi" => Ok(Self::Yi),
968            _ => Err(s.into()),
969        }
970    }
971}
972
973impl Display for Category {
974    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
975        match self {
976            Self::Mark => write!(f, "Mark"),
977            Self::Space => write!(f, "Space"),
978            Self::Separator => write!(f, "Separator"),
979            Self::Letter => write!(f, "Letter"),
980            Self::Number => write!(f, "Number"),
981            Self::Symbol => write!(f, "Symbol"),
982            Self::Punctuation => write!(f, "Punctuation"),
983            Self::Other => write!(f, "Other"),
984        }
985    }
986}
987
988impl Display for Subcategory {
989    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
990        match self {
991            Self::Spacing => write!(f, "Spacing"),
992            Self::Radical => write!(f, "Radical"),
993            Self::Math => write!(f, "Math"),
994            Self::Superscript => write!(f, "Superscript"),
995            Self::Geometry => write!(f, "Geometry"),
996            Self::Dash => write!(f, "Dash"),
997            Self::DecimalDigit => write!(f, "Decimal Digit"),
998            Self::Currency => write!(f, "Currency"),
999            Self::Fraction => write!(f, "Fraction"),
1000            Self::Halfform => write!(f, "Halfform"),
1001            Self::Small => write!(f, "Small"),
1002            Self::Number => write!(f, "Number"),
1003            Self::Quote => write!(f, "Quote"),
1004            Self::Space => write!(f, "Space"),
1005            Self::Letter => write!(f, "Letter"),
1006            Self::Jamo => write!(f, "Jamo"),
1007            Self::Format => write!(f, "Format"),
1008            Self::Parenthesis => write!(f, "Parenthesis"),
1009            Self::Matra => write!(f, "Matra"),
1010            Self::Arrow => write!(f, "Arrow"),
1011            Self::Nonspacing => write!(f, "Nonspacing"),
1012            Self::Compatibility => write!(f, "Compatibility"),
1013            Self::Syllable => write!(f, "Syllable"),
1014            Self::Ligature => write!(f, "Ligature"),
1015            Self::Modifier => write!(f, "Modifier"),
1016            Self::SpacingCombining => write!(f, "Spacing Combining"),
1017            Self::Emoji => write!(f, "Emoji"),
1018            Self::Enclosing => write!(f, "Enclosing"),
1019            Self::Composition => write!(f, "Composition"),
1020            Self::Lowercase => write!(f, "Lowercase"),
1021            Self::Uppercase => write!(f, "Uppercase"),
1022            Self::Smallcaps => write!(f, "Smallcaps"),
1023            Self::Conjunct => write!(f, "Conjunct"),
1024            Self::Other => write!(f, "Other"),
1025        }
1026    }
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031
1032    use super::*;
1033    use rstest::rstest;
1034
1035    #[test]
1036    fn simple_overrides() {
1037        let overrides = HashMap::from([(
1038            "A".into(),
1039            QueryResult {
1040                category: Category::Mark,
1041                subcategory: Some(Subcategory::SpacingCombining),
1042                codepoint: Some(b'A' as u32),
1043                script: Some(Script::Alchemical),
1044                production_name: Some(ProductionName::Custom("MagicA".into())),
1045            },
1046        )]);
1047        let data = GlyphData::new(Some(overrides));
1048
1049        let result = data.query("A", None).unwrap();
1050        assert_eq!(result.category, Category::Mark);
1051        assert_eq!(result.subcategory, Some(Subcategory::SpacingCombining));
1052        assert_eq!(result.codepoint, Some(b'A' as u32));
1053        assert_eq!(result.script, Some(Script::Alchemical));
1054        assert_eq!(result.production_name, Some("MagicA".into()));
1055    }
1056
1057    #[test]
1058    fn overrides_from_file() {
1059        let data =
1060            GlyphData::with_override_file(Path::new("./data/GlyphData_override_test.xml")).unwrap();
1061        assert_eq!(data.query("zero", None).unwrap().category, Category::Other);
1062        assert_eq!(data.query("C", None).unwrap().category, Category::Number);
1063        assert_eq!(
1064            data.query("Yogh", None).unwrap().production_name,
1065            Some("Yolo".into())
1066        );
1067    }
1068
1069    fn get_category(name: &str, codepoints: &[u32]) -> Option<(Category, Option<Subcategory>)> {
1070        let codepoints = codepoints.iter().copied().collect();
1071        GlyphData::new(None)
1072            .query(name, Some(&codepoints))
1073            .map(|result| (result.category, result.subcategory))
1074    }
1075
1076    // from python glyphsLib: https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d5/tests/glyphdata_test.py#L106
1077    #[test]
1078    fn py_test_category() {
1079        for (name, expected) in [
1080            (".notdef", Some((Category::Separator, None))),
1081            // this test case requires AGL lookup:
1082            ("uni000D", Some((Category::Separator, None))),
1083            (
1084                "boxHeavyUp",
1085                Some((Category::Symbol, Some(Subcategory::Geometry))),
1086            ),
1087            ("eacute", Some((Category::Letter, None))),
1088            ("Abreveacute", Some((Category::Letter, None))),
1089            ("C-fraktur", Some((Category::Letter, None))),
1090            ("fi", Some((Category::Letter, Some(Subcategory::Ligature)))),
1091            (
1092                "fi.alt",
1093                Some((Category::Letter, Some(Subcategory::Ligature))),
1094            ),
1095            (
1096                "hib-ko",
1097                Some((Category::Letter, Some(Subcategory::Syllable))),
1098            ),
1099            (
1100                "one.foo",
1101                Some((Category::Number, Some(Subcategory::DecimalDigit))),
1102            ),
1103            (
1104                "one_two.foo",
1105                Some((Category::Number, Some(Subcategory::Ligature))),
1106            ),
1107            (
1108                "o_f_f_i",
1109                Some((Category::Letter, Some(Subcategory::Ligature))),
1110            ),
1111            (
1112                "o_f_f_i.foo",
1113                Some((Category::Letter, Some(Subcategory::Ligature))),
1114            ),
1115            (
1116                "ain_alefMaksura-ar.fina",
1117                Some((Category::Letter, Some(Subcategory::Ligature))),
1118            ),
1119            (
1120                "brevecomb",
1121                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1122            ),
1123            (
1124                "brevecomb.case",
1125                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1126            ),
1127            (
1128                "brevecomb_acutecomb",
1129                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1130            ),
1131            (
1132                "brevecomb_acutecomb.case",
1133                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1134            ),
1135            (
1136                "caroncomb_dotaccentcomb",
1137                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1138            ),
1139            (
1140                "dieresiscomb_caroncomb",
1141                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1142            ),
1143            (
1144                "dieresiscomb_macroncomb",
1145                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1146            ),
1147            (
1148                "dotaccentcomb_macroncomb",
1149                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1150            ),
1151            (
1152                "macroncomb_dieresiscomb",
1153                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1154            ),
1155            (
1156                "dotaccentcomb_o",
1157                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1158            ),
1159            (
1160                "macronlowmod_O",
1161                Some((Category::Mark, Some(Subcategory::Modifier))),
1162            ),
1163            ("O_o", Some((Category::Letter, Some(Subcategory::Ligature)))),
1164            (
1165                "O_dotaccentcomb_o",
1166                Some((Category::Letter, Some(Subcategory::Ligature))),
1167            ),
1168            ("O_dotaccentcomb", Some((Category::Letter, None))),
1169            (
1170                "O_period",
1171                Some((Category::Letter, Some(Subcategory::Ligature))),
1172            ),
1173            ("O_nbspace", Some((Category::Letter, None))),
1174            ("_a", None),
1175            ("_aaa", None),
1176            (
1177                "dal_alef-ar",
1178                Some((Category::Letter, Some(Subcategory::Ligature))),
1179            ),
1180            (
1181                "dal_lam-ar.dlig",
1182                Some((Category::Letter, Some(Subcategory::Ligature))),
1183            ),
1184            ("po-khmer", Some((Category::Letter, None))),
1185            (
1186                "po-khmer.below",
1187                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1188            ),
1189            (
1190                "po-khmer.below.ro",
1191                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1192            ),
1193        ] {
1194            let result = get_category(name, &[]);
1195            assert_eq!(result, expected, "{name}: {result:?} != {expected:?}");
1196        }
1197    }
1198
1199    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L145C5-L153C76
1200    #[test]
1201    fn py_category_by_unicode() {
1202        // "SignU.bn" is a non-standard name not defined in GlyphData.xml
1203        // 0x09C1 should match
1204        let result = get_category("SignU.bn", &[0x09C1]);
1205        assert_eq!(
1206            result,
1207            Some((Category::Mark, Some(Subcategory::Nonspacing)))
1208        )
1209    }
1210
1211    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L155C5-L162C1
1212    // https://github.com/googlefonts/glyphsLib/issues/232
1213    #[test]
1214    fn py_bug_232() {
1215        let u = get_category("uni07F0", &[]);
1216        assert_eq!(u, Some((Category::Mark, Some(Subcategory::Nonspacing))));
1217        let g = get_category("longlowtonecomb-nko", &[]);
1218        assert_eq!(g, Some((Category::Mark, Some(Subcategory::Nonspacing))));
1219    }
1220
1221    #[test]
1222    fn match_prod_name_with_suffix() {
1223        // https://github.com/googlefonts/fontc/issues/780#issuecomment-2674853729
1224        // "uni17BF.b" should match against production name uni17BF
1225        assert_eq!(
1226            Some((Category::Letter, None)),
1227            get_category("uni17BF.b", &[]),
1228        )
1229    }
1230
1231    #[rstest(name, expected,
1232        case("A", None),  // AGLFN names *are* production names
1233        case("z", None),
1234        case("nbspace", Some("uni00A0")),
1235        case("nonbreakingspace", Some("uni00A0")),  // altNames map to the same prod name
1236        case("uni00A0", Some("uni00A0")),  // prod names are already prod
1237        // the «» punctuation marks are spelled with an 'guillemets' in French, but for
1238        // some reasons the AGLFN has 'guillemot' (that's actually a bird! :shrug:)
1239        case("guillemetleft", Some("guillemotleft")),
1240        case("twosevenths", Some("two_fraction_seven")),
1241        case("idotaccent", Some("i.loclTRK")),
1242        case("idotless", Some("dotlessi")),
1243        case("Jacute", Some("uni004A0301")),
1244        case("scurl", Some("u1DF1E")),
1245        // In the old AGL, Delta was confused with increment 0x2206 so now it's banned
1246        // from the Greek alphabet.
1247        case("Delta", Some("uni0394")),
1248        case("increment", Some("uni2206")),
1249        case("dog-ko", Some("uniB3C5")),
1250        case("bau-kannada", Some("uni0CAC0CCC")),
1251        case("EnglandFlag", Some("u1F3F4E0067E0062E0065E006EE0067E007F")),
1252        case("pileOfPoo", Some("u1F4A9")),
1253        case("lam_alef-ar.fina", Some("uni06440627.fina")),
1254    )]
1255    fn query_production_names(name: &str, expected: Option<&str>) {
1256        let production_name = GlyphData::new(None)
1257            .query_no_synthesis(name, None)
1258            .unwrap()
1259            .production_name
1260            .map(|p| p.to_string());
1261        assert_eq!(
1262            production_name,
1263            expected.map(Into::into),
1264            "{name}: {production_name:?} != {expected:?}"
1265        );
1266    }
1267
1268    // Python original test cases for synthetic production names:
1269    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d59bec0c9437da3a748c58f2999911/tests/glyphdata_test.py#L196-L409
1270    // Note that I removed a bunch of them as they were too many and repetitive
1271    #[rstest(
1272        name,
1273        expected,
1274        case("Ech_Vew-arm.liga", "uni0535054E.liga"),
1275        case("aiMatra_anusvara-deva", "uni09480902"),
1276        case("aiMatra_reph_anusvara-deva", "uni09480930094D0902"),
1277        case("ca_iMatra-tamil", "uni0B9A0BBF"),
1278        case("ch_ya-deva", "uni091B094D092F"),
1279        case("d_dh_ya-deva", "uni0926094D0927094D092F"),
1280        case("da-khmer.below.ro", "uni17D2178A.ro"),
1281        case("da_rVocalicMatra-deva", "uni09260943"),
1282        case("dd_dda-deva", "uni0921094D0921"),
1283        case("eShortMatra_reph_anusvara-deva", "uni09460930094D0902"),
1284        case("ech_vew-arm.liga.sc", "uni0565057E.liga.sc"),
1285        case("finalkaf_qamats-hb", "uni05DA05B8"),
1286        case("finalkaf_sheva-hb", "uni05DA05B0"),
1287        case("finalkafdagesh_qamats-hb", "uniFB3A05B8"),
1288        case("finalkafdagesh_sheva-hb", "uniFB3A05B0"),
1289        case("h_la-deva", "uni0939094D0932"),
1290        case("ha_iMatra-tamil", "uni0BB90BBF"),
1291        case("hatafpatah_siluqleft-hb", "uni05B205BD"),
1292        case("iMark_toandakhiat-khmer.narrow", "uni17B717CD.narrow"),
1293        case("idotaccent.sc", "i.loclTRK.sc"),
1294        case("iiMatra_reph-deva", "uni09400930094D"),
1295        case("iiMatra_reph-deva.alt2", "uni09400930094D.alt2"),
1296        case("j_ny-deva", "uni091C094D091E094D"),
1297        case("j_ny-deva.alt2", "uni091C094D091E094D.alt2"),
1298        case("mo-khmer.below.ro", "uni17D21798.ro"),
1299        case("moMa_underscore-thai", "uni0E21005F"),
1300        case("nno-khmer.below.narrow1", "uni17D2178E.narrow1"),
1301        case("nyo-khmer.full.below.narrow", "uni17D21789.full.below.narrow"),
1302        case("sh_ra_iiMatra-tamil", "uni0BB60BCD0BB00BC0"),
1303        // plus some more tests that are not in glyphsLib
1304        case("A_A", "A_A"),
1305        case("a_a.sc", "a_a.sc"),
1306        case("brevecomb_acutecomb", "uni03060301"),
1307        case("brevecomb_acutecomb.case", "uni03060301.case"),
1308        case("pileOfPoo_pileOfPoo", "u1F4A9_u1F4A9"),
1309        case("pileOfPoo.ss01", "u1F4A9.ss01"),
1310        case("lam_alef-ar.fina.ss02", "uni06440627.fina.ss02"),
1311    )]
1312    fn synthetic_production_names(name: &str, expected: &str) {
1313        let production_name = GlyphData::new(None)
1314            .query(name, None)
1315            .unwrap()
1316            .production_name
1317            .unwrap()
1318            .to_string();
1319        assert_eq!(
1320            &production_name, expected,
1321            "{name}: {production_name:?} != {expected:?}"
1322        );
1323    }
1324}