glyphs_reader/
glyphdata.rs

1//! determining glyph properties
2//!
3//! This module provides access to glyph info extracted from bundled
4//! (and potentially user-provided) data files.
5
6use quick_xml::{
7    events::{BytesStart, Event},
8    Reader,
9};
10use std::{
11    borrow::Cow,
12    collections::{BTreeSet, HashMap},
13    fmt::Display,
14    num::ParseIntError,
15    path::{Path, PathBuf},
16    str::FromStr,
17};
18
19use icu_properties::props::GeneralCategory;
20
21use smol_str::SmolStr;
22
23use crate::glyphdata_bundled::{self as bundled, find_pos_by_prod_name};
24
25/// The primary category for a given glyph
26///
27/// These categories are not the same as the unicode character categories.
28#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
29#[repr(u8)]
30pub enum Category {
31    Mark,
32    Space,
33    Separator,
34    Letter,
35    Number,
36    Symbol,
37    Punctuation,
38    Other,
39}
40
41/// The subcategory of a given glyph
42#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
43#[repr(u8)]
44pub enum Subcategory {
45    Spacing,
46    Radical,
47    Math,
48    Superscript,
49    Geometry,
50    Dash,
51    DecimalDigit,
52    Currency,
53    Fraction,
54    Halfform,
55    Small,
56    Number,
57    Quote,
58    Space,
59    Letter,
60    Jamo,
61    Format,
62    Parenthesis,
63    Matra,
64    Arrow,
65    Nonspacing,
66    Compatibility,
67    Syllable,
68    Ligature,
69    Modifier,
70    SpacingCombining,
71    Emoji,
72    Enclosing,
73    Composition,
74    Other,
75}
76
77/// The script of a given glyph
78#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
79#[repr(u8)]
80pub enum Script {
81    Adlam,
82    Alchemical,
83    Arabic,
84    Armenian,
85    Avestan,
86    Balinese,
87    Bamum,
88    Batak,
89    Bengali,
90    BlackLetter,
91    Bopomofo,
92    Brahmi,
93    Braille,
94    Buginese,
95    Canadian,
96    Chakma,
97    Cham,
98    Cherokee,
99    Chorasmian,
100    Coptic,
101    Cyrillic,
102    Dentistry,
103    Deseret,
104    Devanagari,
105    Divesakuru,
106    Elbasan,
107    Elymaic,
108    Ethiopic,
109    Georgian,
110    Glagolitic,
111    Gothic,
112    Greek,
113    Gujarati,
114    Gurmukhi,
115    Han,
116    Hangul,
117    Hebrew,
118    Javanese,
119    Kana,
120    Kannada,
121    Kawi,
122    Kayahli,
123    Khmer,
124    Khojki,
125    Lao,
126    Latin,
127    Lepcha,
128    Lue,
129    Mahjong,
130    Malayalam,
131    Mandaic,
132    Math,
133    Mongolian,
134    Musical,
135    Myanmar,
136    Nko,
137    NyiakengPuachueHmong,
138    Ogham,
139    Oriya,
140    Osage,
141    Osmanya,
142    PahawhHmong,
143    PhaistosDisc,
144    Rovas,
145    Runic,
146    Samaritan,
147    Shavian,
148    Sinhala,
149    Syriac,
150    Tamil,
151    Telugu,
152    Thaana,
153    Thai,
154    Tham,
155    Tibet,
156    Tifinagh,
157    Vai,
158    Yezidi,
159    Yi,
160}
161
162/// Production name of a glyph.
163///
164/// Per [khaled](https://github.com/googlefonts/fontc/pull/1354#pullrequestreview-2707517748)
165/// the overwhelming majority follow simple patterns.
166///
167/// See also <https://github.com/adobe-type-tools/agl-specification?tab=readme-ov-file#2-the-mapping>
168#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
169pub enum ProductionName {
170    // uniHEX, e.g. uni004A
171    Bmp(u32),
172    // uHEX, e.g. uE007D
173    NonBmp(u32),
174    // I reject your patterns and choose my own
175    Custom(SmolStr),
176}
177
178impl From<&str> for ProductionName {
179    fn from(v: &str) -> ProductionName {
180        fn try_parse(
181            v: &str,
182            lbound: u32,
183            ubound: u32,
184            f: impl Fn(u32) -> ProductionName,
185        ) -> Option<ProductionName> {
186            if let Ok(v) = u32::from_str_radix(v, 16) {
187                if v >= lbound && v <= ubound {
188                    return Some(f(v));
189                }
190            }
191            None
192        }
193
194        match v {
195            _ if v.starts_with("uni") => try_parse(&v[3..], 0, 0xFFFF, ProductionName::Bmp),
196            _ if v.starts_with("u") => {
197                try_parse(&v[1..], 0xFFFF + 1, 0x10FFFF, ProductionName::NonBmp)
198            }
199            _ => None,
200        }
201        .unwrap_or_else(|| ProductionName::Custom(v.into()))
202    }
203}
204
205impl From<u32> for ProductionName {
206    fn from(v: u32) -> ProductionName {
207        if v <= 0xFFFF {
208            ProductionName::Bmp(v)
209        } else {
210            ProductionName::NonBmp(v)
211        }
212    }
213}
214
215impl Display for ProductionName {
216    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217        match self {
218            ProductionName::Bmp(cp) => write!(f, "uni{:04X}", cp),
219            ProductionName::NonBmp(cp) => write!(f, "u{:X}", cp),
220            ProductionName::Custom(s) => write!(f, "{}", s),
221        }
222    }
223}
224
225impl From<ProductionName> for SmolStr {
226    fn from(v: ProductionName) -> SmolStr {
227        match v {
228            ProductionName::Bmp(cp) => smol_str::format_smolstr!("uni{:04X}", cp),
229            ProductionName::NonBmp(cp) => smol_str::format_smolstr!("u{:X}", cp),
230            ProductionName::Custom(s) => s,
231        }
232    }
233}
234
235/// A queryable set of glyph data
236///
237/// Always queries static data from glyphsLib. Optionally includes a set of override values as well.
238///
239/// Default/no overrides instances are cheap. Instances created with overrides are more expensive.
240#[derive(Default)]
241pub struct GlyphData {
242    // override-names are preferred to names in data
243    overrides: Option<HashMap<SmolStr, QueryResult>>,
244    overrrides_by_codepoint: Option<HashMap<u32, SmolStr>>,
245}
246
247impl GlyphData {
248    /// Overrides, if provided, explicitly assign the result for a given query
249    pub(crate) fn new(overrides: Option<HashMap<SmolStr, QueryResult>>) -> Self {
250        let overrrides_by_codepoint = overrides.as_ref().map(|overrides| {
251            overrides
252                .iter()
253                .filter_map(|(k, v)| v.codepoint.map(|cp| (cp, k.clone())))
254                .collect()
255        });
256        Self {
257            overrides,
258            overrrides_by_codepoint,
259        }
260    }
261
262    /// Create a new data set with user provided overrides
263    pub fn with_override_file(override_file: &Path) -> Result<Self, GlyphDataError> {
264        let bytes = std::fs::read(override_file).map_err(|err| GlyphDataError::UserFile {
265            path: override_file.to_owned(),
266            reason: err.kind(),
267        })?;
268        let overrides = parse_entries(&bytes)?;
269        Ok(GlyphData::new(Some(overrides)))
270    }
271}
272
273/// The category and subcategory to use
274///
275/// Used for overrides and as the result of [`GlyphData::query`]
276#[derive(Debug, Clone, PartialEq)]
277pub struct QueryResult {
278    pub category: Category,
279    pub subcategory: Option<Subcategory>,
280    pub codepoint: Option<u32>,
281    pub script: Option<Script>,
282    pub production_name: Option<ProductionName>,
283}
284
285#[derive(Clone, Debug, thiserror::Error)]
286pub enum GlyphDataError {
287    #[error("Couldn't read user file at '{path}': '{reason}'")]
288    UserFile {
289        path: PathBuf,
290        reason: std::io::ErrorKind,
291    },
292    #[error("Error parsing XML: '{0}'")]
293    ReaderError(#[from] quick_xml::Error),
294    #[error("Error parsing XML attribute: '{0}'")]
295    XmlAttributeError(#[from] quick_xml::events::attributes::AttrError),
296    #[error("Unknown category '{0}'")]
297    InvalidCategory(SmolStr),
298    #[error("Unknown subcategory '{0}'")]
299    InvalidSubcategory(SmolStr),
300    #[error("Unknown script '{0}'")]
301    InvalidScript(SmolStr),
302    #[error("the XML input did not start with a <glyphdata> tag")]
303    WrongFirstElement,
304    #[error("Missing required attribute '{missing}' in '{attributes}'")]
305    MissingRequiredAttribute {
306        attributes: String,
307        missing: &'static str,
308    },
309    #[error("Invalid unicode value '{raw}': '{inner}'")]
310    InvalidUnicode { raw: String, inner: ParseIntError },
311    #[error("Unexpected attribute '{0}'")]
312    UnknownAttribute(String),
313}
314
315impl GlyphDataError {
316    // a little helper here makes our parsing code cleaner
317    fn missing_attr(name: &'static str, raw_attrs: &[u8]) -> Self {
318        let attributes = String::from_utf8_lossy(raw_attrs).into_owned();
319        Self::MissingRequiredAttribute {
320            attributes,
321            missing: name,
322        }
323    }
324}
325
326/// Parse glyph info entries out of a GlyphData xml file.
327pub(crate) fn parse_entries(xml: &[u8]) -> Result<HashMap<SmolStr, QueryResult>, GlyphDataError> {
328    fn check_and_advance_past_preamble(reader: &mut Reader<&[u8]>) -> Result<(), GlyphDataError> {
329        loop {
330            let event = reader.read_event()?;
331            match event {
332                Event::Comment(_) => (),
333                Event::Decl(_) => (),
334                Event::DocType(_) => (),
335                Event::Start(start) if start.name().as_ref() == b"glyphData" => return Ok(()),
336                _other => {
337                    return Err(GlyphDataError::WrongFirstElement);
338                }
339            }
340        }
341    }
342
343    let mut reader = Reader::from_reader(xml);
344    reader.config_mut().trim_text(true);
345
346    check_and_advance_past_preamble(&mut reader)?;
347
348    let mut by_name = HashMap::new();
349    let mut alt_names = Vec::new();
350    for result in
351        iter_rows(&mut reader).map(|row| row.map_err(Into::into).and_then(parse_glyph_xml))
352    {
353        let info = result?;
354        by_name.insert(
355            info.name.clone(),
356            QueryResult {
357                category: info.category,
358                subcategory: info.subcategory,
359                codepoint: info.codepoint,
360                script: info.script,
361                production_name: info.production_name.clone(),
362            },
363        );
364        for alt in info.alt_names {
365            alt_names.push((
366                alt,
367                QueryResult {
368                    category: info.category,
369                    subcategory: info.subcategory,
370                    codepoint: None,
371                    script: info.script,
372                    production_name: info.production_name.clone(),
373                },
374            ));
375        }
376    }
377
378    // apply alts after to ensure they can't steal "real" names
379    for (name, value) in alt_names {
380        by_name.entry(name).or_insert(value);
381    }
382
383    Ok(by_name)
384}
385
386fn iter_rows<'a, 'b: 'a>(
387    reader: &'b mut Reader<&'a [u8]>,
388) -> impl Iterator<Item = Result<BytesStart<'a>, quick_xml::Error>> + 'a {
389    std::iter::from_fn(|| match reader.read_event() {
390        Err(e) => Some(Err(e)),
391        Ok(Event::Empty(start)) => Some(Ok(start)),
392        _ => None,
393    })
394}
395
396struct GlyphInfoFromXml {
397    name: SmolStr,
398    alt_names: Vec<SmolStr>,
399    category: Category,
400    subcategory: Option<Subcategory>,
401    codepoint: Option<u32>,
402    script: Option<Script>,
403    production_name: Option<ProductionName>,
404}
405
406fn parse_glyph_xml(item: BytesStart) -> Result<GlyphInfoFromXml, GlyphDataError> {
407    let mut name = None;
408    let mut category = None;
409    let mut subcategory = None;
410    let mut unicode = None;
411    let mut alt_names = None;
412    let mut script = None;
413    let mut production_name = None;
414
415    for attr in item.attributes() {
416        let attr = attr?;
417        let value = attr.unescape_value()?;
418        match attr.key.as_ref() {
419            b"name" => name = Some(value),
420            b"category" => category = Some(value),
421            b"subCategory" => subcategory = Some(value),
422            b"unicode" => unicode = Some(value),
423            b"altNames" => alt_names = Some(value),
424            b"script" => script = Some(value),
425            b"production" => production_name = Some(value.as_ref().into()),
426            b"unicodeLegacy" | b"case" | b"direction" | b"description" => (),
427            other => {
428                return Err(GlyphDataError::UnknownAttribute(
429                    String::from_utf8_lossy(other).into_owned(),
430                ))
431            }
432        }
433    }
434
435    // now we've found some values, let's finalize them
436    let name = name
437        .map(SmolStr::new)
438        .ok_or_else(|| GlyphDataError::missing_attr("name", item.attributes_raw()))?;
439    let category = category
440        .ok_or_else(|| GlyphDataError::missing_attr("category", item.attributes_raw()))
441        .and_then(|cat| {
442            Category::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidCategory)
443        })?;
444    let subcategory = subcategory
445        .map(|cat| Subcategory::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidSubcategory))
446        .transpose()?;
447    let script = script
448        .map(|cat| Script::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidScript))
449        .transpose()?;
450    let codepoint = unicode
451        .map(|s| {
452            u32::from_str_radix(&s, 16).map_err(|inner| GlyphDataError::InvalidUnicode {
453                raw: s.into_owned(),
454                inner,
455            })
456        })
457        .transpose()?;
458    let alt_names = alt_names
459        .map(|names| {
460            names
461                .as_ref()
462                .split(',')
463                .map(|name| SmolStr::from(name.trim()))
464                .collect()
465        })
466        .unwrap_or_default();
467
468    Ok(GlyphInfoFromXml {
469        name,
470        alt_names,
471        category,
472        subcategory,
473        codepoint,
474        script,
475        production_name,
476    })
477}
478
479impl GlyphData {
480    /// Get the info for the given name/codepoints, attempting to synthesize it if necessary
481    ///
482    /// Returns, from most to least preferred:
483    ///
484    /// 1. The matching override value
485    /// 1. The matching value from bundled data
486    /// 1. A computed value based on name heuristics
487    ///
488    // See https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L94
489    pub fn query(&self, name: &str, codepoints: Option<&BTreeSet<u32>>) -> Option<QueryResult> {
490        self.query_no_synthesis(name, codepoints)
491            // we don't have info for this glyph: can we synthesize it?
492            .or_else(|| self.construct_result(name))
493    }
494
495    /// As [`Self::query`] but without a fallback to computed values.
496    ///
497    /// Exists to enable result synthesis to query.
498    fn query_no_synthesis(
499        &self,
500        name: &str,
501        codepoints: Option<&BTreeSet<u32>>,
502    ) -> Option<QueryResult> {
503        // Override?
504        if let (Some(overrides), Some(overrides_by_codepoint)) = (
505            self.overrides.as_ref(),
506            self.overrrides_by_codepoint.as_ref(),
507        ) {
508            let override_result = overrides.get(name).or_else(|| {
509                codepoints
510                    .into_iter()
511                    .flat_map(|cps| cps.iter())
512                    .find_map(|cp: &u32| {
513                        overrides_by_codepoint
514                            .get(cp)
515                            .and_then(|n| overrides.get(n))
516                    })
517            });
518            if let Some(override_result) = override_result {
519                return Some(QueryResult {
520                    category: override_result.category,
521                    subcategory: override_result.subcategory,
522                    codepoint: override_result.codepoint,
523                    script: override_result.script,
524                    production_name: override_result.production_name.clone(),
525                });
526            }
527        }
528
529        // No override, perhaps we have a direct answer?
530        bundled::find_pos_by_name(name)
531            .or_else(|| {
532                codepoints
533                    .into_iter()
534                    .flat_map(|cps| cps.iter())
535                    .find_map(|cp| bundled::find_pos_by_codepoint(*cp))
536            })
537            .or_else(|| find_pos_by_prod_name(name.into()))
538            .map(|i| {
539                bundled::get(i).unwrap_or_else(|| panic!("We found invalid index {i} somehow"))
540            })
541    }
542
543    fn contains_name(&self, name: &str) -> bool {
544        if let Some(overrides) = self.overrides.as_ref() {
545            let name: SmolStr = name.into();
546            if overrides.contains_key(&name) {
547                return true;
548            }
549        }
550        bundled::find_pos_by_name(name).is_some()
551    }
552
553    fn construct_result(&self, name: &str) -> Option<QueryResult> {
554        let category_subcategory = self.construct_category(name);
555        let production_name = self.construct_production_name(name);
556        if category_subcategory.is_none() && production_name.is_none() {
557            return None;
558        }
559        // if we have a production name but no category, 'Other' is good enough
560        let (category, subcategory) = category_subcategory.unwrap_or((Category::Other, None));
561        Some(QueryResult {
562            category,
563            subcategory,
564            codepoint: None,
565            script: None,
566            production_name,
567        })
568    }
569
570    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L199
571    fn construct_category(&self, name: &str) -> Option<(Category, Option<Subcategory>)> {
572        // in glyphs.app '_' prefix means "no export"
573        if name.starts_with('_') {
574            return None;
575        }
576        let (base_name, _) = self.split_glyph_suffix(name);
577        if let Some(result) = self.query_no_synthesis(base_name, None) {
578            return Some((result.category, result.subcategory));
579        }
580
581        if let Some(base_names) = self.split_ligature_glyph_name(base_name) {
582            let base_names_attributes: Vec<_> = base_names
583                .iter()
584                .filter_map(|name| self.query_no_synthesis(name, None))
585                .collect();
586            if let Some(first_attr) = base_names_attributes.first() {
587                // if first is mark, we're a mark
588                if first_attr.category == Category::Mark {
589                    return Some((Category::Mark, first_attr.subcategory));
590                } else if first_attr.category == Category::Letter {
591                    // if first is letter and rest are marks/separators, we use info from first
592                    if base_names_attributes
593                        .iter()
594                        .skip(1)
595                        .map(|result| result.category)
596                        .all(|cat| matches!(cat, Category::Mark | Category::Separator))
597                    {
598                        return Some((first_attr.category, first_attr.subcategory));
599                    } else {
600                        return Some((Category::Letter, Some(Subcategory::Ligature)));
601                    }
602                }
603            }
604        };
605
606        // finally fall back to checking the AGLFN for the base name:
607        Self::construct_category_via_agl(base_name)
608    }
609
610    // https://github.com/googlefonts/glyphsLib/blob/c4db6b981d5/Lib/glyphsLib/glyphdata.py#L351
611    fn construct_production_name(&self, name: &str) -> Option<ProductionName> {
612        fn append_suffix(base_name: &mut String, suffix: Option<&str>) {
613            if let Some(suffix) = suffix {
614                base_name.push('.');
615                base_name.push_str(suffix);
616            }
617        }
618
619        fn is_u_name(name: &str) -> bool {
620            name.starts_with("u") && name[1..].bytes().all(|b| b.is_ascii_hexdigit())
621        }
622
623        let (base_name, suffix) = self.split_glyph_suffix(name);
624
625        // if we have a production name for the base name, append the suffix and go home
626        let prod_name_with_suffix = suffix.and_then(|_| {
627            self.query_no_synthesis(base_name, None)
628                .and_then(|result| result.production_name)
629                .map(|base_prod_name| {
630                    let mut prod_name = base_prod_name.to_string();
631                    append_suffix(&mut prod_name, suffix);
632                    prod_name.as_str().into()
633                })
634        });
635        if prod_name_with_suffix.is_some() {
636            return prod_name_with_suffix;
637        }
638
639        let base_names = self
640            .split_ligature_glyph_name(base_name)
641            .unwrap_or_else(|| vec![base_name.into()]);
642        // Attempt to find a production name for each ligature component (or the whole base name).
643        // Return early if any such names have no GlyphData entry
644        // OR the entry doesn't specify a production name AND they aren't already AGLFN names...
645        let prod_names: Vec<SmolStr> = base_names
646            .into_iter()
647            .map(|name| {
648                self.query_no_synthesis(&name, None).and_then(|result| {
649                    result.production_name.map(Into::into).or_else(|| {
650                        // if no production name, return the name itself if already in AGLFN
651                        fontdrasil::agl::char_for_agl_name(name.as_ref()).map(|_| name)
652                    })
653                })
654            })
655            .collect::<Option<_>>()?;
656
657        // only (uniXXXX, uniYYYY, etc.) names with 4 hex digits can be concatenated using the
658        // more compact format uniXXXXYYYY... uXXXXX names for characters beyond BMP are joined
659        // in ligatures using the usual '_'.
660        let any_characters_outside_bmp = prod_names
661            .iter()
662            .any(|name| name.len() > 5 && is_u_name(name.as_ref()));
663        let any_uni_names = prod_names.iter().any(|name| name.starts_with("uni"));
664
665        if !any_characters_outside_bmp && any_uni_names {
666            let mut uni_names: Vec<Cow<str>> = Vec::new();
667            for part in &prod_names {
668                if let Some(stripped) = part.strip_prefix("uni") {
669                    uni_names.push(Cow::Borrowed(stripped));
670                } else if part.len() == 5 && is_u_name(part.as_ref()) {
671                    uni_names.push(Cow::Borrowed(&part.as_ref()[1..]));
672                } else if let Some(ch) = fontdrasil::agl::char_for_agl_name(part.as_ref()) {
673                    uni_names.push(Cow::Owned(format!("{:04X}", ch as u32)));
674                } else {
675                    panic!("Unexpected part while constructing production name: {part}");
676                }
677            }
678            let mut result = String::from("uni");
679            for segment in uni_names {
680                result.push_str(segment.as_ref());
681            }
682            append_suffix(&mut result, suffix);
683            return Some(result.as_str().into());
684        }
685
686        let mut result = prod_names.join("_");
687        append_suffix(&mut result, suffix);
688        Some(result.as_str().into())
689    }
690
691    // this doesn't need a &self param, but we want it locally close to the
692    // code that calls it, so we'll make it a type method :shrug:
693    fn construct_category_via_agl(base_name: &str) -> Option<(Category, Option<Subcategory>)> {
694        if let Some(first_char) = fontdrasil::agl::glyph_name_to_unicode(base_name)
695            .chars()
696            .next()
697        {
698            let (category, subcategory) = category_from_icu(first_char);
699
700            // Exception: Something like "one_two" should be a (_, Ligature),
701            // "acutecomb_brevecomb" should however stay (Mark, Nonspacing).
702            if base_name.contains('_') && category != Category::Mark {
703                return Some((category, Some(Subcategory::Ligature)));
704            } else {
705                return Some((category, subcategory));
706            }
707        }
708        None
709    }
710
711    fn split_glyph_suffix<'n>(&self, name: &'n str) -> (&'n str, Option<&'n str>) {
712        let multi_suffix = name.bytes().filter(|b| *b == b'.').count() > 1;
713        if multi_suffix {
714            // with multiple suffixes, try adding them one at a time and seeing if
715            // we find a known name.
716            // basically: for 'char.bottom.alt' we want to return (char.bottom, alt)
717            // if
718            for idx in name
719                .bytes()
720                .enumerate()
721                .filter_map(|(i, b)| (b == b'.').then_some(i))
722                .skip(1)
723            {
724                let (base, suffix) = name.split_at(idx);
725                if self.contains_name(base) {
726                    // suffix starts with '.' so we strip it to match split_once below
727                    return (base, Some(&suffix[1..]));
728                }
729            }
730        }
731        // finally just split at the first dot, or the whole name if no suffix
732        name.split_once('.')
733            .map_or_else(|| (name, None), |(base, suffix)| (base, Some(suffix)))
734    }
735
736    /// Split a ligature glyph into component parts
737    ///
738    /// Returns `None` if the name does not contain the '_' character, otherwise
739    /// returns a list of names of components, derived from the glyph name.
740    /// See
741    /// <https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L307>
742    fn split_ligature_glyph_name(&self, name: &str) -> Option<Vec<SmolStr>> {
743        // if last part has a script suffix, grab it
744        let script_suffix = name.rsplit_once('_')?.1.rsplit_once('-').map(|(_, x)| x);
745
746        let mut parts: Vec<_> = name
747            .trim_end_matches(script_suffix.unwrap_or_default())
748            // after trimming script we also need to trim the '-'!
749            .trim_end_matches('-')
750            .split('_')
751            .map(SmolStr::new)
752            .collect();
753
754        let script = match script_suffix {
755            // if there was no suffix, we're done
756            None => return Some(parts),
757            Some(script) => script,
758        };
759
760        // otherwise we try adding the script suffix to each part, and see if
761        // that's a known glyph name:
762        for part in parts.iter_mut() {
763            // if the part already has a script, continue
764            if part.contains('-') {
765                continue;
766            }
767
768            let new_part = smol_str::format_smolstr!("{part}-{script}");
769            // if non-suffixed exists but suffixed doesn't, keep non-suffixed
770            if self.contains_name(part.as_ref()) && !self.contains_name(&new_part) {
771                continue;
772            }
773            *part = new_part;
774        }
775        Some(parts)
776    }
777}
778
779// https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L261
780fn category_from_icu(c: char) -> (Category, Option<Subcategory>) {
781    match icu_properties::CodePointMapData::<GeneralCategory>::new().get(c) {
782        GeneralCategory::Unassigned | GeneralCategory::OtherSymbol => (Category::Symbol, None),
783        GeneralCategory::UppercaseLetter
784        | GeneralCategory::LowercaseLetter
785        | GeneralCategory::TitlecaseLetter
786        | GeneralCategory::OtherLetter => (Category::Letter, None),
787        GeneralCategory::ModifierLetter => (Category::Letter, Some(Subcategory::Modifier)),
788        GeneralCategory::NonspacingMark => (Category::Mark, Some(Subcategory::Nonspacing)),
789        GeneralCategory::SpacingMark => (Category::Mark, Some(Subcategory::SpacingCombining)),
790        GeneralCategory::EnclosingMark => (Category::Mark, Some(Subcategory::Enclosing)),
791        GeneralCategory::DecimalNumber | GeneralCategory::OtherNumber => {
792            (Category::Number, Some(Subcategory::DecimalDigit))
793        }
794        GeneralCategory::LetterNumber => (Category::Number, None),
795        GeneralCategory::SpaceSeparator => (Category::Separator, Some(Subcategory::Space)),
796        GeneralCategory::LineSeparator
797        | GeneralCategory::ParagraphSeparator
798        | GeneralCategory::Control => (Category::Separator, None),
799        GeneralCategory::Format => (Category::Separator, Some(Subcategory::Format)),
800        GeneralCategory::PrivateUse => (Category::Letter, Some(Subcategory::Compatibility)),
801        GeneralCategory::DashPunctuation => (Category::Punctuation, Some(Subcategory::Dash)),
802        GeneralCategory::OpenPunctuation | GeneralCategory::ClosePunctuation => {
803            (Category::Punctuation, Some(Subcategory::Parenthesis))
804        }
805        GeneralCategory::ConnectorPunctuation | GeneralCategory::OtherPunctuation => {
806            (Category::Punctuation, None)
807        }
808        GeneralCategory::InitialPunctuation | GeneralCategory::FinalPunctuation => {
809            (Category::Punctuation, Some(Subcategory::Quote))
810        }
811        GeneralCategory::MathSymbol => (Category::Symbol, Some(Subcategory::Math)),
812        GeneralCategory::CurrencySymbol => (Category::Symbol, Some(Subcategory::Currency)),
813        GeneralCategory::ModifierSymbol => (Category::Mark, Some(Subcategory::Spacing)),
814        GeneralCategory::Surrogate => unreachable!("char cannot represent surrogate code points"),
815    }
816}
817
818impl FromStr for Category {
819    type Err = SmolStr;
820
821    fn from_str(s: &str) -> Result<Self, Self::Err> {
822        match s {
823            "Mark" => Ok(Self::Mark),
824            "Space" => Ok(Self::Space),
825            "Separator" => Ok(Self::Separator),
826            "Letter" => Ok(Self::Letter),
827            "Number" => Ok(Self::Number),
828            "Symbol" => Ok(Self::Symbol),
829            "Punctuation" => Ok(Self::Punctuation),
830            "Other" => Ok(Self::Other),
831            _ => Err(s.into()),
832        }
833    }
834}
835
836impl FromStr for Subcategory {
837    type Err = SmolStr;
838
839    fn from_str(s: &str) -> Result<Self, Self::Err> {
840        match s {
841            "Spacing" => Ok(Self::Spacing),
842            "Radical" => Ok(Self::Radical),
843            "Math" => Ok(Self::Math),
844            "Superscript" => Ok(Self::Superscript),
845            "Geometry" => Ok(Self::Geometry),
846            "Dash" => Ok(Self::Dash),
847            "Decimal Digit" => Ok(Self::DecimalDigit),
848            "Currency" => Ok(Self::Currency),
849            "Fraction" => Ok(Self::Fraction),
850            "Halfform" => Ok(Self::Halfform),
851            "Small" => Ok(Self::Small),
852            "Number" => Ok(Self::Number),
853            "Quote" => Ok(Self::Quote),
854            "Space" => Ok(Self::Space),
855            "Letter" => Ok(Self::Letter),
856            "Jamo" => Ok(Self::Jamo),
857            "Format" => Ok(Self::Format),
858            "Parenthesis" => Ok(Self::Parenthesis),
859            "Matra" => Ok(Self::Matra),
860            "Arrow" => Ok(Self::Arrow),
861            "Nonspacing" => Ok(Self::Nonspacing),
862            "Compatibility" => Ok(Self::Compatibility),
863            "Syllable" => Ok(Self::Syllable),
864            "Ligature" => Ok(Self::Ligature),
865            "Modifier" => Ok(Self::Modifier),
866            "Spacing Combining" => Ok(Self::SpacingCombining),
867            "Emoji" => Ok(Self::Emoji),
868            "Enclosing" => Ok(Self::Enclosing),
869            "Composition" => Ok(Self::Composition),
870            "Other" => Ok(Self::Other),
871            _ => Err(s.into()),
872        }
873    }
874}
875
876impl FromStr for Script {
877    type Err = SmolStr;
878
879    fn from_str(s: &str) -> Result<Self, Self::Err> {
880        match s {
881            "adlam" => Ok(Self::Adlam),
882            "alchemical" => Ok(Self::Alchemical),
883            "arabic" => Ok(Self::Arabic),
884            "armenian" => Ok(Self::Armenian),
885            "avestan" => Ok(Self::Avestan),
886            "balinese" => Ok(Self::Balinese),
887            "bamum" => Ok(Self::Bamum),
888            "batak" => Ok(Self::Batak),
889            "bengali" => Ok(Self::Bengali),
890            "blackLetter" => Ok(Self::BlackLetter),
891            "bopomofo" => Ok(Self::Bopomofo),
892            "brahmi" => Ok(Self::Brahmi),
893            "braille" => Ok(Self::Braille),
894            "buginese" => Ok(Self::Buginese),
895            "canadian" => Ok(Self::Canadian),
896            "chakma" => Ok(Self::Chakma),
897            "cham" => Ok(Self::Cham),
898            "cherokee" => Ok(Self::Cherokee),
899            "chorasmian" => Ok(Self::Chorasmian),
900            "coptic" => Ok(Self::Coptic),
901            "cyrillic" => Ok(Self::Cyrillic),
902            "dentistry" => Ok(Self::Dentistry),
903            "deseret" => Ok(Self::Deseret),
904            "devanagari" => Ok(Self::Devanagari),
905            "divesakuru" => Ok(Self::Divesakuru),
906            "elbasan" => Ok(Self::Elbasan),
907            "elymaic" => Ok(Self::Elymaic),
908            "ethiopic" => Ok(Self::Ethiopic),
909            "georgian" => Ok(Self::Georgian),
910            "glagolitic" => Ok(Self::Glagolitic),
911            "gothic" => Ok(Self::Gothic),
912            "greek" => Ok(Self::Greek),
913            "gujarati" => Ok(Self::Gujarati),
914            "gurmukhi" => Ok(Self::Gurmukhi),
915            "han" => Ok(Self::Han),
916            "hangul" => Ok(Self::Hangul),
917            "hebrew" => Ok(Self::Hebrew),
918            "javanese" => Ok(Self::Javanese),
919            "kana" => Ok(Self::Kana),
920            "kannada" => Ok(Self::Kannada),
921            "kawi" => Ok(Self::Kawi),
922            "kayahli" => Ok(Self::Kayahli),
923            "khmer" => Ok(Self::Khmer),
924            "khojki" => Ok(Self::Khojki),
925            "lao" => Ok(Self::Lao),
926            "latin" => Ok(Self::Latin),
927            "lepcha" => Ok(Self::Lepcha),
928            "lue" => Ok(Self::Lue),
929            "mahjong" => Ok(Self::Mahjong),
930            "malayalam" => Ok(Self::Malayalam),
931            "mandaic" => Ok(Self::Mandaic),
932            "math" => Ok(Self::Math),
933            "mongolian" => Ok(Self::Mongolian),
934            "musical" => Ok(Self::Musical),
935            "myanmar" => Ok(Self::Myanmar),
936            "nko" => Ok(Self::Nko),
937            "nyiakeng puachue hmong" => Ok(Self::NyiakengPuachueHmong),
938            "ogham" => Ok(Self::Ogham),
939            "oriya" => Ok(Self::Oriya),
940            "osage" => Ok(Self::Osage),
941            "osmanya" => Ok(Self::Osmanya),
942            "pahawh hmong" => Ok(Self::PahawhHmong),
943            "phaistosDisc" => Ok(Self::PhaistosDisc),
944            "rovas" => Ok(Self::Rovas),
945            "runic" => Ok(Self::Runic),
946            "samaritan" => Ok(Self::Samaritan),
947            "shavian" => Ok(Self::Shavian),
948            "sinhala" => Ok(Self::Sinhala),
949            "syriac" => Ok(Self::Syriac),
950            "tamil" => Ok(Self::Tamil),
951            "telugu" => Ok(Self::Telugu),
952            "thaana" => Ok(Self::Thaana),
953            "thai" => Ok(Self::Thai),
954            "tham" => Ok(Self::Tham),
955            "tibet" => Ok(Self::Tibet),
956            "tifinagh" => Ok(Self::Tifinagh),
957            "vai" => Ok(Self::Vai),
958            "yi" => Ok(Self::Yi),
959            _ => Err(s.into()),
960        }
961    }
962}
963
964impl Display for Category {
965    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
966        match self {
967            Self::Mark => write!(f, "Mark"),
968            Self::Space => write!(f, "Space"),
969            Self::Separator => write!(f, "Separator"),
970            Self::Letter => write!(f, "Letter"),
971            Self::Number => write!(f, "Number"),
972            Self::Symbol => write!(f, "Symbol"),
973            Self::Punctuation => write!(f, "Punctuation"),
974            Self::Other => write!(f, "Other"),
975        }
976    }
977}
978
979impl Display for Subcategory {
980    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
981        match self {
982            Self::Spacing => write!(f, "Spacing"),
983            Self::Radical => write!(f, "Radical"),
984            Self::Math => write!(f, "Math"),
985            Self::Superscript => write!(f, "Superscript"),
986            Self::Geometry => write!(f, "Geometry"),
987            Self::Dash => write!(f, "Dash"),
988            Self::DecimalDigit => write!(f, "Decimal Digit"),
989            Self::Currency => write!(f, "Currency"),
990            Self::Fraction => write!(f, "Fraction"),
991            Self::Halfform => write!(f, "Halfform"),
992            Self::Small => write!(f, "Small"),
993            Self::Number => write!(f, "Number"),
994            Self::Quote => write!(f, "Quote"),
995            Self::Space => write!(f, "Space"),
996            Self::Letter => write!(f, "Letter"),
997            Self::Jamo => write!(f, "Jamo"),
998            Self::Format => write!(f, "Format"),
999            Self::Parenthesis => write!(f, "Parenthesis"),
1000            Self::Matra => write!(f, "Matra"),
1001            Self::Arrow => write!(f, "Arrow"),
1002            Self::Nonspacing => write!(f, "Nonspacing"),
1003            Self::Compatibility => write!(f, "Compatibility"),
1004            Self::Syllable => write!(f, "Syllable"),
1005            Self::Ligature => write!(f, "Ligature"),
1006            Self::Modifier => write!(f, "Modifier"),
1007            Self::SpacingCombining => write!(f, "Spacing Combining"),
1008            Self::Emoji => write!(f, "Emoji"),
1009            Self::Enclosing => write!(f, "Enclosing"),
1010            Self::Composition => write!(f, "Composition"),
1011            Self::Other => write!(f, "Other"),
1012        }
1013    }
1014}
1015
1016#[cfg(test)]
1017mod tests {
1018
1019    use super::*;
1020    use rstest::rstest;
1021
1022    #[test]
1023    fn simple_overrides() {
1024        let overrides = HashMap::from([(
1025            "A".into(),
1026            QueryResult {
1027                category: Category::Mark,
1028                subcategory: Some(Subcategory::SpacingCombining),
1029                codepoint: Some(b'A' as u32),
1030                script: Some(Script::Alchemical),
1031                production_name: Some(ProductionName::Custom("MagicA".into())),
1032            },
1033        )]);
1034        let data = GlyphData::new(Some(overrides));
1035
1036        let result = data.query("A", None).unwrap();
1037        assert_eq!(result.category, Category::Mark);
1038        assert_eq!(result.subcategory, Some(Subcategory::SpacingCombining));
1039        assert_eq!(result.codepoint, Some(b'A' as u32));
1040        assert_eq!(result.script, Some(Script::Alchemical));
1041        assert_eq!(result.production_name, Some("MagicA".into()));
1042    }
1043
1044    #[test]
1045    fn overrides_from_file() {
1046        let data =
1047            GlyphData::with_override_file(Path::new("./data/GlyphData_override_test.xml")).unwrap();
1048        assert_eq!(data.query("zero", None).unwrap().category, Category::Other);
1049        assert_eq!(data.query("C", None).unwrap().category, Category::Number);
1050        assert_eq!(
1051            data.query("Yogh", None).unwrap().production_name,
1052            Some("Yolo".into())
1053        );
1054    }
1055
1056    fn get_category(name: &str, codepoints: &[u32]) -> Option<(Category, Option<Subcategory>)> {
1057        let codepoints = codepoints.iter().copied().collect();
1058        GlyphData::new(None)
1059            .query(name, Some(&codepoints))
1060            .map(|result| (result.category, result.subcategory))
1061    }
1062
1063    // from python glyphsLib: https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d5/tests/glyphdata_test.py#L106
1064    #[test]
1065    fn py_test_category() {
1066        for (name, expected) in [
1067            (".notdef", Some((Category::Separator, None))),
1068            // this test case requires AGL lookup:
1069            ("uni000D", Some((Category::Separator, None))),
1070            (
1071                "boxHeavyUp",
1072                Some((Category::Symbol, Some(Subcategory::Geometry))),
1073            ),
1074            ("eacute", Some((Category::Letter, None))),
1075            ("Abreveacute", Some((Category::Letter, None))),
1076            ("C-fraktur", Some((Category::Letter, None))),
1077            ("fi", Some((Category::Letter, Some(Subcategory::Ligature)))),
1078            (
1079                "fi.alt",
1080                Some((Category::Letter, Some(Subcategory::Ligature))),
1081            ),
1082            (
1083                "hib-ko",
1084                Some((Category::Letter, Some(Subcategory::Syllable))),
1085            ),
1086            (
1087                "one.foo",
1088                Some((Category::Number, Some(Subcategory::DecimalDigit))),
1089            ),
1090            (
1091                "one_two.foo",
1092                Some((Category::Number, Some(Subcategory::Ligature))),
1093            ),
1094            (
1095                "o_f_f_i",
1096                Some((Category::Letter, Some(Subcategory::Ligature))),
1097            ),
1098            (
1099                "o_f_f_i.foo",
1100                Some((Category::Letter, Some(Subcategory::Ligature))),
1101            ),
1102            (
1103                "ain_alefMaksura-ar.fina",
1104                Some((Category::Letter, Some(Subcategory::Ligature))),
1105            ),
1106            (
1107                "brevecomb",
1108                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1109            ),
1110            (
1111                "brevecomb.case",
1112                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1113            ),
1114            (
1115                "brevecomb_acutecomb",
1116                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1117            ),
1118            (
1119                "brevecomb_acutecomb.case",
1120                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1121            ),
1122            (
1123                "caroncomb_dotaccentcomb",
1124                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1125            ),
1126            (
1127                "dieresiscomb_caroncomb",
1128                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1129            ),
1130            (
1131                "dieresiscomb_macroncomb",
1132                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1133            ),
1134            (
1135                "dotaccentcomb_macroncomb",
1136                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1137            ),
1138            (
1139                "macroncomb_dieresiscomb",
1140                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1141            ),
1142            (
1143                "dotaccentcomb_o",
1144                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1145            ),
1146            (
1147                "macronlowmod_O",
1148                Some((Category::Mark, Some(Subcategory::Modifier))),
1149            ),
1150            ("O_o", Some((Category::Letter, Some(Subcategory::Ligature)))),
1151            (
1152                "O_dotaccentcomb_o",
1153                Some((Category::Letter, Some(Subcategory::Ligature))),
1154            ),
1155            ("O_dotaccentcomb", Some((Category::Letter, None))),
1156            (
1157                "O_period",
1158                Some((Category::Letter, Some(Subcategory::Ligature))),
1159            ),
1160            ("O_nbspace", Some((Category::Letter, None))),
1161            ("_a", None),
1162            ("_aaa", None),
1163            (
1164                "dal_alef-ar",
1165                Some((Category::Letter, Some(Subcategory::Ligature))),
1166            ),
1167            (
1168                "dal_lam-ar.dlig",
1169                Some((Category::Letter, Some(Subcategory::Ligature))),
1170            ),
1171            ("po-khmer", Some((Category::Letter, None))),
1172            (
1173                "po-khmer.below",
1174                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1175            ),
1176            (
1177                "po-khmer.below.ro",
1178                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1179            ),
1180        ] {
1181            let result = get_category(name, &[]);
1182            assert_eq!(result, expected, "{name}: {result:?} != {expected:?}");
1183        }
1184    }
1185
1186    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L145C5-L153C76
1187    #[test]
1188    fn py_category_by_unicode() {
1189        // "SignU.bn" is a non-standard name not defined in GlyphData.xml
1190        // 0x09C1 should match
1191        let result = get_category("SignU.bn", &[0x09C1]);
1192        assert_eq!(
1193            result,
1194            Some((Category::Mark, Some(Subcategory::Nonspacing)))
1195        )
1196    }
1197
1198    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L155C5-L162C1
1199    // https://github.com/googlefonts/glyphsLib/issues/232
1200    #[test]
1201    fn py_bug_232() {
1202        let u = get_category("uni07F0", &[]);
1203        assert_eq!(u, Some((Category::Mark, Some(Subcategory::Nonspacing))));
1204        let g = get_category("longlowtonecomb-nko", &[]);
1205        assert_eq!(g, Some((Category::Mark, Some(Subcategory::Nonspacing))));
1206    }
1207
1208    #[test]
1209    fn match_prod_name_with_suffix() {
1210        // https://github.com/googlefonts/fontc/issues/780#issuecomment-2674853729
1211        // "uni17BF.b" should match against production name uni17BF
1212        assert_eq!(
1213            Some((Category::Letter, None)),
1214            get_category("uni17BF.b", &[]),
1215        )
1216    }
1217
1218    #[rstest(name, expected,
1219        case("A", None),  // AGLFN names *are* production names
1220        case("z", None),
1221        case("nbspace", Some("uni00A0")),
1222        case("nonbreakingspace", Some("uni00A0")),  // altNames map to the same prod name
1223        case("uni00A0", Some("uni00A0")),  // prod names are already prod
1224        // the «» punctuation marks are spelled with an 'guillemets' in French, but for
1225        // some reasons the AGLFN has 'guillemot' (that's actually a bird! :shrug:)
1226        case("guillemetleft", Some("guillemotleft")),
1227        case("twosevenths", Some("two_fraction_seven")),
1228        case("idotaccent", Some("i.loclTRK")),
1229        case("idotless", Some("dotlessi")),
1230        case("Jacute", Some("uni004A0301")),
1231        case("scurl", Some("u1DF1E")),
1232        // In the old AGL, Delta was confused with increment 0x2206 so now it's banned
1233        // from the Greek alphabet.
1234        case("Delta", Some("uni0394")),
1235        case("increment", Some("uni2206")),
1236        case("dog-ko", Some("uniB3C5")),
1237        case("bau-kannada", Some("uni0CAC0CCC")),
1238        case("EnglandFlag", Some("u1F3F4E0067E0062E0065E006EE0067E007F")),
1239        case("pileOfPoo", Some("u1F4A9")),
1240        case("lam_alef-ar.fina", Some("uni06440627.fina")),
1241    )]
1242    fn query_production_names(name: &str, expected: Option<&str>) {
1243        let production_name = GlyphData::new(None)
1244            .query_no_synthesis(name, None)
1245            .unwrap()
1246            .production_name
1247            .map(|p| p.to_string());
1248        assert_eq!(
1249            production_name,
1250            expected.map(Into::into),
1251            "{name}: {production_name:?} != {expected:?}"
1252        );
1253    }
1254
1255    // Python original test cases for synthetic production names:
1256    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d59bec0c9437da3a748c58f2999911/tests/glyphdata_test.py#L196-L409
1257    // Note that I removed a bunch of them as they were too many and repetitive
1258    #[rstest(
1259        name,
1260        expected,
1261        case("Ech_Vew-arm.liga", "uni0535054E.liga"),
1262        case("aiMatra_anusvara-deva", "uni09480902"),
1263        case("aiMatra_reph_anusvara-deva", "uni09480930094D0902"),
1264        case("ca_iMatra-tamil", "uni0B9A0BBF"),
1265        case("ch_ya-deva", "uni091B094D092F"),
1266        case("d_dh_ya-deva", "uni0926094D0927094D092F"),
1267        case("da-khmer.below.ro", "uni17D2178A.ro"),
1268        case("da_rVocalicMatra-deva", "uni09260943"),
1269        case("dd_dda-deva", "uni0921094D0921"),
1270        case("eShortMatra_reph_anusvara-deva", "uni09460930094D0902"),
1271        case("ech_vew-arm.liga.sc", "uni0565057E.liga.sc"),
1272        case("finalkaf_qamats-hb", "uni05DA05B8"),
1273        case("finalkaf_sheva-hb", "uni05DA05B0"),
1274        case("finalkafdagesh_qamats-hb", "uniFB3A05B8"),
1275        case("finalkafdagesh_sheva-hb", "uniFB3A05B0"),
1276        case("h_la-deva", "uni0939094D0932"),
1277        case("ha_iMatra-tamil", "uni0BB90BBF"),
1278        case("hatafpatah_siluqleft-hb", "uni05B205BD"),
1279        case("iMark_toandakhiat-khmer.narrow", "uni17B717CD.narrow"),
1280        case("idotaccent.sc", "i.loclTRK.sc"),
1281        case("iiMatra_reph-deva", "uni09400930094D"),
1282        case("iiMatra_reph-deva.alt2", "uni09400930094D.alt2"),
1283        case("j_ny-deva", "uni091C094D091E094D"),
1284        case("j_ny-deva.alt2", "uni091C094D091E094D.alt2"),
1285        case("mo-khmer.below.ro", "uni17D21798.ro"),
1286        case("moMa_underscore-thai", "uni0E21005F"),
1287        case("nno-khmer.below.narrow1", "uni17D2178E.narrow1"),
1288        case("nyo-khmer.full.below.narrow", "uni17D21789.full.below.narrow"),
1289        case("sh_ra_iiMatra-tamil", "uni0BB60BCD0BB00BC0"),
1290        // plus some more tests that are not in glyphsLib
1291        case("A_A", "A_A"),
1292        case("a_a.sc", "a_a.sc"),
1293        case("brevecomb_acutecomb", "uni03060301"),
1294        case("brevecomb_acutecomb.case", "uni03060301.case"),
1295        case("pileOfPoo_pileOfPoo", "u1F4A9_u1F4A9"),
1296        case("pileOfPoo.ss01", "u1F4A9.ss01"),
1297        case("lam_alef-ar.fina.ss02", "uni06440627.fina.ss02"),
1298    )]
1299    fn synthetic_production_names(name: &str, expected: &str) {
1300        let production_name = GlyphData::new(None)
1301            .query(name, None)
1302            .unwrap()
1303            .production_name
1304            .unwrap()
1305            .to_string();
1306        assert_eq!(
1307            &production_name, expected,
1308            "{name}: {production_name:?} != {expected:?}"
1309        );
1310    }
1311}