glyphs_reader/
glyphdata.rs

1//! determining glyph properties
2//!
3//! This module provides access to glyph info extracted from bundled
4//! (and potentially user-provided) data files.
5
6use quick_xml::{
7    Reader,
8    events::{BytesStart, Event},
9};
10use std::{
11    borrow::Cow,
12    collections::{BTreeSet, HashMap},
13    fmt::Display,
14    num::ParseIntError,
15    path::{Path, PathBuf},
16    str::FromStr,
17};
18
19use icu_properties::props::GeneralCategory;
20
21use smol_str::SmolStr;
22
23use crate::glyphdata_bundled::{self as bundled, find_pos_by_prod_name};
24
25/// The primary category for a given glyph
26///
27/// These categories are not the same as the unicode character categories.
28#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
29#[repr(u8)]
30pub enum Category {
31    Mark,
32    Space,
33    Separator,
34    Letter,
35    Number,
36    Symbol,
37    Punctuation,
38    Other,
39}
40
41/// The subcategory of a given glyph
42#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
43#[repr(u8)]
44pub enum Subcategory {
45    Spacing,
46    Radical,
47    Math,
48    Superscript,
49    Geometry,
50    Dash,
51    DecimalDigit,
52    Currency,
53    Fraction,
54    Halfform,
55    Small,
56    Number,
57    Quote,
58    Space,
59    Letter,
60    Jamo,
61    Format,
62    Parenthesis,
63    Matra,
64    Arrow,
65    Nonspacing,
66    Compatibility,
67    Syllable,
68    Ligature,
69    Modifier,
70    SpacingCombining,
71    Emoji,
72    Enclosing,
73    Composition,
74    Lowercase,
75    Uppercase,
76    Smallcaps,
77    Conjunct,
78    Other,
79}
80
81/// The script of a given glyph
82#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
83#[repr(u8)]
84pub enum Script {
85    Adlam,
86    Alchemical,
87    Arabic,
88    Armenian,
89    Avestan,
90    Balinese,
91    Bamum,
92    Batak,
93    Bengali,
94    BlackLetter,
95    Bopomofo,
96    Brahmi,
97    Braille,
98    Buginese,
99    Canadian,
100    Chakma,
101    Cham,
102    Cherokee,
103    Chorasmian,
104    Coptic,
105    Cyrillic,
106    Dentistry,
107    Deseret,
108    Devanagari,
109    Divesakuru,
110    Elbasan,
111    Elymaic,
112    Ethiopic,
113    Georgian,
114    Glagolitic,
115    Gothic,
116    Greek,
117    Gujarati,
118    Gurmukhi,
119    Han,
120    Hangul,
121    Hebrew,
122    Javanese,
123    Kana,
124    Kannada,
125    Kawi,
126    Kayahli,
127    Khmer,
128    Khojki,
129    Lao,
130    Latin,
131    Lepcha,
132    Lue,
133    Mahjong,
134    Malayalam,
135    Mandaic,
136    Math,
137    Mongolian,
138    Musical,
139    Myanmar,
140    Nko,
141    NyiakengPuachueHmong,
142    Ogham,
143    Oriya,
144    Osage,
145    Osmanya,
146    PahawhHmong,
147    PhaistosDisc,
148    Rovas,
149    Runic,
150    Samaritan,
151    Shavian,
152    Sinhala,
153    Syriac,
154    Tamil,
155    Telugu,
156    Thaana,
157    Thai,
158    Tham,
159    Tibet,
160    Tifinagh,
161    Vai,
162    Yezidi,
163    Yi,
164}
165
166/// Production name of a glyph.
167///
168/// Per [khaled](https://github.com/googlefonts/fontc/pull/1354#pullrequestreview-2707517748)
169/// the overwhelming majority follow simple patterns.
170///
171/// See also <https://github.com/adobe-type-tools/agl-specification?tab=readme-ov-file#2-the-mapping>
172#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
173pub enum ProductionName {
174    // uniHEX, e.g. uni004A
175    Bmp(u32),
176    // uHEX, e.g. uE007D
177    NonBmp(u32),
178    // I reject your patterns and choose my own
179    Custom(SmolStr),
180}
181
182impl From<&str> for ProductionName {
183    fn from(v: &str) -> ProductionName {
184        fn try_parse(
185            v: &str,
186            lbound: u32,
187            ubound: u32,
188            f: impl Fn(u32) -> ProductionName,
189        ) -> Option<ProductionName> {
190            if let Ok(v) = u32::from_str_radix(v, 16)
191                && v >= lbound
192                && v <= ubound
193            {
194                return Some(f(v));
195            }
196            None
197        }
198
199        match v {
200            _ if v.starts_with("uni") => try_parse(&v[3..], 0, 0xFFFF, ProductionName::Bmp),
201            _ if v.starts_with("u") => {
202                try_parse(&v[1..], 0xFFFF + 1, 0x10FFFF, ProductionName::NonBmp)
203            }
204            _ => None,
205        }
206        .unwrap_or_else(|| ProductionName::Custom(v.into()))
207    }
208}
209
210impl From<u32> for ProductionName {
211    fn from(v: u32) -> ProductionName {
212        if v <= 0xFFFF {
213            ProductionName::Bmp(v)
214        } else {
215            ProductionName::NonBmp(v)
216        }
217    }
218}
219
220impl Display for ProductionName {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        match self {
223            ProductionName::Bmp(cp) => write!(f, "uni{cp:04X}"),
224            ProductionName::NonBmp(cp) => write!(f, "u{cp:X}"),
225            ProductionName::Custom(s) => write!(f, "{s}"),
226        }
227    }
228}
229
230impl From<ProductionName> for SmolStr {
231    fn from(v: ProductionName) -> SmolStr {
232        match v {
233            ProductionName::Bmp(cp) => smol_str::format_smolstr!("uni{cp:04X}"),
234            ProductionName::NonBmp(cp) => smol_str::format_smolstr!("u{cp:X}"),
235            ProductionName::Custom(s) => s,
236        }
237    }
238}
239
240/// A queryable set of glyph data
241///
242/// Always queries static data from glyphsLib. Optionally includes a set of override values as well.
243///
244/// Default/no overrides instances are cheap. Instances created with overrides are more expensive.
245#[derive(Default)]
246pub struct GlyphData {
247    // override-names are preferred to names in data
248    overrides: Option<HashMap<SmolStr, QueryResult>>,
249    overrrides_by_codepoint: Option<HashMap<u32, SmolStr>>,
250}
251
252impl GlyphData {
253    /// Overrides, if provided, explicitly assign the result for a given query
254    pub(crate) fn new(overrides: Option<HashMap<SmolStr, QueryResult>>) -> Self {
255        let overrrides_by_codepoint = overrides.as_ref().map(|overrides| {
256            overrides
257                .iter()
258                .filter_map(|(k, v)| v.codepoint.map(|cp| (cp, k.clone())))
259                .collect()
260        });
261        Self {
262            overrides,
263            overrrides_by_codepoint,
264        }
265    }
266
267    /// Create a new data set with user provided overrides
268    pub fn with_override_file(override_file: &Path) -> Result<Self, GlyphDataError> {
269        let bytes = std::fs::read(override_file).map_err(|err| GlyphDataError::UserFile {
270            path: override_file.to_owned(),
271            reason: err.kind(),
272        })?;
273        let overrides = parse_entries(&bytes)?;
274        Ok(GlyphData::new(Some(overrides)))
275    }
276}
277
278/// The category and subcategory to use
279///
280/// Used for overrides and as the result of [`GlyphData::query`]
281#[derive(Debug, Clone, PartialEq)]
282pub struct QueryResult {
283    pub category: Category,
284    pub subcategory: Option<Subcategory>,
285    pub codepoint: Option<u32>,
286    pub script: Option<Script>,
287    pub production_name: Option<ProductionName>,
288}
289
290#[derive(Clone, Debug, thiserror::Error)]
291pub enum GlyphDataError {
292    #[error("Couldn't read user file at '{path}': '{reason}'")]
293    UserFile {
294        path: PathBuf,
295        reason: std::io::ErrorKind,
296    },
297    #[error("Error parsing XML: '{0}'")]
298    ReaderError(#[from] quick_xml::Error),
299    #[error("Error parsing XML attribute: '{0}'")]
300    XmlAttributeError(#[from] quick_xml::events::attributes::AttrError),
301    #[error("Unknown category '{0}'")]
302    InvalidCategory(SmolStr),
303    #[error("Unknown subcategory '{0}'")]
304    InvalidSubcategory(SmolStr),
305    #[error("Unknown script '{0}'")]
306    InvalidScript(SmolStr),
307    #[error("the XML input did not start with a <glyphdata> tag")]
308    WrongFirstElement,
309    #[error("Missing required attribute '{missing}' in '{attributes}'")]
310    MissingRequiredAttribute {
311        attributes: String,
312        missing: &'static str,
313    },
314    #[error("Invalid unicode value '{raw}': '{inner}'")]
315    InvalidUnicode { raw: String, inner: ParseIntError },
316    #[error("Unexpected attribute '{0}'")]
317    UnknownAttribute(String),
318}
319
320impl GlyphDataError {
321    // a little helper here makes our parsing code cleaner
322    fn missing_attr(name: &'static str, raw_attrs: &[u8]) -> Self {
323        let attributes = String::from_utf8_lossy(raw_attrs).into_owned();
324        Self::MissingRequiredAttribute {
325            attributes,
326            missing: name,
327        }
328    }
329}
330
331/// Parse glyph info entries out of a GlyphData xml file.
332pub(crate) fn parse_entries(xml: &[u8]) -> Result<HashMap<SmolStr, QueryResult>, GlyphDataError> {
333    fn check_and_advance_past_preamble(reader: &mut Reader<&[u8]>) -> Result<(), GlyphDataError> {
334        loop {
335            let event = reader.read_event()?;
336            match event {
337                Event::Comment(_) => (),
338                Event::Decl(_) => (),
339                Event::DocType(_) => (),
340                Event::Start(start) if start.name().as_ref() == b"glyphData" => return Ok(()),
341                _other => {
342                    return Err(GlyphDataError::WrongFirstElement);
343                }
344            }
345        }
346    }
347
348    let mut reader = Reader::from_reader(xml);
349    reader.config_mut().trim_text(true);
350
351    check_and_advance_past_preamble(&mut reader)?;
352
353    let mut by_name = HashMap::new();
354    let mut alt_names = Vec::new();
355    for result in
356        iter_rows(&mut reader).map(|row| row.map_err(Into::into).and_then(parse_glyph_xml))
357    {
358        let info = result?;
359        by_name.insert(
360            info.name.clone(),
361            QueryResult {
362                category: info.category,
363                subcategory: info.subcategory,
364                codepoint: info.codepoint,
365                script: info.script,
366                production_name: info.production_name.clone(),
367            },
368        );
369        for alt in info.alt_names {
370            alt_names.push((
371                alt,
372                QueryResult {
373                    category: info.category,
374                    subcategory: info.subcategory,
375                    codepoint: None,
376                    script: info.script,
377                    production_name: info.production_name.clone(),
378                },
379            ));
380        }
381    }
382
383    // apply alts after to ensure they can't steal "real" names
384    for (name, value) in alt_names {
385        by_name.entry(name).or_insert(value);
386    }
387
388    Ok(by_name)
389}
390
391fn iter_rows<'a, 'b: 'a>(
392    reader: &'b mut Reader<&'a [u8]>,
393) -> impl Iterator<Item = Result<BytesStart<'a>, quick_xml::Error>> + 'a {
394    std::iter::from_fn(|| match reader.read_event() {
395        Err(e) => Some(Err(e)),
396        Ok(Event::Empty(start)) => Some(Ok(start)),
397        _ => None,
398    })
399}
400
401struct GlyphInfoFromXml {
402    name: SmolStr,
403    alt_names: Vec<SmolStr>,
404    category: Category,
405    subcategory: Option<Subcategory>,
406    codepoint: Option<u32>,
407    script: Option<Script>,
408    production_name: Option<ProductionName>,
409}
410
411fn parse_glyph_xml(item: BytesStart) -> Result<GlyphInfoFromXml, GlyphDataError> {
412    let mut name = None;
413    let mut category = None;
414    let mut subcategory = None;
415    let mut unicode = None;
416    let mut alt_names = None;
417    let mut script = None;
418    let mut production_name = None;
419
420    for attr in item.attributes() {
421        let attr = attr?;
422        let value = attr.unescape_value()?;
423        match attr.key.as_ref() {
424            b"name" => name = Some(value),
425            b"category" => category = Some(value),
426            b"subCategory" => subcategory = Some(value),
427            b"unicode" => unicode = Some(value),
428            b"altNames" => alt_names = Some(value),
429            b"script" => script = Some(value),
430            b"production" => production_name = Some(value.as_ref().into()),
431            b"unicodeLegacy" | b"case" | b"direction" | b"description" => (),
432            other => {
433                return Err(GlyphDataError::UnknownAttribute(
434                    String::from_utf8_lossy(other).into_owned(),
435                ));
436            }
437        }
438    }
439
440    // now we've found some values, let's finalize them
441    let name = name
442        .map(SmolStr::new)
443        .ok_or_else(|| GlyphDataError::missing_attr("name", item.attributes_raw()))?;
444    let category = category
445        .ok_or_else(|| GlyphDataError::missing_attr("category", item.attributes_raw()))
446        .and_then(|cat| {
447            Category::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidCategory)
448        })?;
449    let subcategory = subcategory
450        .map(|cat| Subcategory::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidSubcategory))
451        .transpose()?;
452    let script = script
453        .map(|cat| Script::from_str(cat.as_ref()).map_err(GlyphDataError::InvalidScript))
454        .transpose()?;
455    let codepoint = unicode
456        .map(|s| {
457            u32::from_str_radix(&s, 16).map_err(|inner| GlyphDataError::InvalidUnicode {
458                raw: s.into_owned(),
459                inner,
460            })
461        })
462        .transpose()?;
463    let alt_names = alt_names
464        .map(|names| {
465            names
466                .as_ref()
467                .split(',')
468                .map(|name| SmolStr::from(name.trim()))
469                .collect()
470        })
471        .unwrap_or_default();
472
473    Ok(GlyphInfoFromXml {
474        name,
475        alt_names,
476        category,
477        subcategory,
478        codepoint,
479        script,
480        production_name,
481    })
482}
483
484impl GlyphData {
485    /// Get the info for the given name/codepoints, attempting to synthesize it if necessary
486    ///
487    /// Returns, from most to least preferred:
488    ///
489    /// 1. The matching override value
490    /// 1. The matching value from bundled data
491    /// 1. A computed value based on name heuristics
492    ///
493    // See https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L94
494    pub fn query(&self, name: &str, codepoints: Option<&BTreeSet<u32>>) -> Option<QueryResult> {
495        self.query_no_synthesis(name, codepoints)
496            // we don't have info for this glyph: can we synthesize it?
497            .or_else(|| self.construct_result(name))
498    }
499
500    /// As [`Self::query`] but without a fallback to computed values.
501    ///
502    /// Exists to enable result synthesis to query.
503    fn query_no_synthesis(
504        &self,
505        name: &str,
506        codepoints: Option<&BTreeSet<u32>>,
507    ) -> Option<QueryResult> {
508        // Override?
509        if let (Some(overrides), Some(overrides_by_codepoint)) = (
510            self.overrides.as_ref(),
511            self.overrrides_by_codepoint.as_ref(),
512        ) {
513            let override_result = overrides.get(name).or_else(|| {
514                codepoints
515                    .into_iter()
516                    .flat_map(|cps| cps.iter())
517                    .find_map(|cp: &u32| {
518                        overrides_by_codepoint
519                            .get(cp)
520                            .and_then(|n| overrides.get(n))
521                    })
522            });
523            if let Some(override_result) = override_result {
524                return Some(QueryResult {
525                    category: override_result.category,
526                    subcategory: override_result.subcategory,
527                    codepoint: override_result.codepoint,
528                    script: override_result.script,
529                    production_name: override_result.production_name.clone(),
530                });
531            }
532        }
533
534        // No override, perhaps we have a direct answer?
535        bundled::find_pos_by_name(name)
536            .or_else(|| {
537                codepoints
538                    .into_iter()
539                    .flat_map(|cps| cps.iter())
540                    .find_map(|cp| bundled::find_pos_by_codepoint(*cp))
541            })
542            .or_else(|| find_pos_by_prod_name(name.into()))
543            .map(|i| {
544                bundled::get(i).unwrap_or_else(|| panic!("We found invalid index {i} somehow"))
545            })
546    }
547
548    fn contains_name(&self, name: &str) -> bool {
549        if let Some(overrides) = self.overrides.as_ref() {
550            let name: SmolStr = name.into();
551            if overrides.contains_key(&name) {
552                return true;
553            }
554        }
555        bundled::find_pos_by_name(name).is_some()
556    }
557
558    fn construct_result(&self, name: &str) -> Option<QueryResult> {
559        let category_subcategory = self.construct_category(name);
560        let production_name = self.construct_production_name(name);
561        if category_subcategory.is_none() && production_name.is_none() {
562            return None;
563        }
564        // if we have a production name but no category, 'Other' is good enough
565        let (category, subcategory) = category_subcategory.unwrap_or((Category::Other, None));
566        Some(QueryResult {
567            category,
568            subcategory,
569            codepoint: None,
570            script: None,
571            production_name,
572        })
573    }
574
575    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L199
576    fn construct_category(&self, name: &str) -> Option<(Category, Option<Subcategory>)> {
577        // in glyphs.app '_' prefix means "no export"
578        if name.starts_with('_') {
579            return None;
580        }
581        let (base_name, _) = self.split_glyph_suffix(name);
582        if let Some(result) = self.query_no_synthesis(base_name, None) {
583            return Some((result.category, result.subcategory));
584        }
585
586        if let Some(base_names) = self.split_ligature_glyph_name(base_name) {
587            let base_names_attributes: Vec<_> = base_names
588                .iter()
589                .map(|name| self.query_no_synthesis(name, None))
590                .collect();
591            if let Some(first_attr) = base_names_attributes
592                .first()
593                .expect("if we have base_names it is non-empty")
594            {
595                // if first is mark, we're a mark
596                if first_attr.category == Category::Mark {
597                    return Some((Category::Mark, first_attr.subcategory));
598                } else if first_attr.category == Category::Letter {
599                    // if first is letter and rest are marks/separators, we use info from first
600                    if base_names_attributes
601                        .iter()
602                        .skip(1)
603                        .map(|result| result.as_ref().map(|r| r.category))
604                        .all(|cat| matches!(cat, None | Some(Category::Mark | Category::Separator)))
605                    {
606                        return Some((first_attr.category, first_attr.subcategory));
607                    } else {
608                        return Some((Category::Letter, Some(Subcategory::Ligature)));
609                    }
610                }
611            }
612        };
613
614        // finally fall back to checking the AGLFN for the base name:
615        Self::construct_category_via_agl(base_name)
616    }
617
618    // https://github.com/googlefonts/glyphsLib/blob/c4db6b981d5/Lib/glyphsLib/glyphdata.py#L351
619    fn construct_production_name(&self, name: &str) -> Option<ProductionName> {
620        fn append_suffix(base_name: &mut String, suffix: Option<&str>) {
621            if let Some(suffix) = suffix {
622                base_name.push('.');
623                base_name.push_str(suffix);
624            }
625        }
626
627        fn is_u_name(name: &str) -> bool {
628            name.starts_with("u") && name[1..].bytes().all(|b| b.is_ascii_hexdigit())
629        }
630
631        let (base_name, suffix) = self.split_glyph_suffix(name);
632
633        // if we have a production name for the base name, append the suffix and go home
634        let prod_name_with_suffix = suffix.and_then(|_| {
635            self.query_no_synthesis(base_name, None)
636                .and_then(|result| result.production_name)
637                .map(|base_prod_name| {
638                    let mut prod_name = base_prod_name.to_string();
639                    append_suffix(&mut prod_name, suffix);
640                    prod_name.as_str().into()
641                })
642        });
643        if prod_name_with_suffix.is_some() {
644            return prod_name_with_suffix;
645        }
646
647        let base_names = self
648            .split_ligature_glyph_name(base_name)
649            .unwrap_or_else(|| vec![base_name.into()]);
650        // Attempt to find a production name for each ligature component (or the whole base name).
651        // Return early if any such names have no GlyphData entry
652        // OR the entry doesn't specify a production name AND they aren't already AGLFN names...
653        let prod_names: Vec<SmolStr> = base_names
654            .into_iter()
655            .map(|name| {
656                self.query_no_synthesis(&name, None).and_then(|result| {
657                    result.production_name.map(Into::into).or_else(|| {
658                        // if no production name, return the name itself if already in AGLFN
659                        fontdrasil::agl::char_for_agl_name(name.as_ref()).map(|_| name)
660                    })
661                })
662            })
663            .collect::<Option<_>>()?;
664
665        // only (uniXXXX, uniYYYY, etc.) names with 4 hex digits can be concatenated using the
666        // more compact format uniXXXXYYYY... uXXXXX names for characters beyond BMP are joined
667        // in ligatures using the usual '_'.
668        let any_characters_outside_bmp = prod_names
669            .iter()
670            .any(|name| name.len() > 5 && is_u_name(name.as_ref()));
671        let any_uni_names = prod_names.iter().any(|name| name.starts_with("uni"));
672
673        if !any_characters_outside_bmp && any_uni_names {
674            let mut uni_names: Vec<Cow<str>> = Vec::new();
675            for part in &prod_names {
676                if let Some(stripped) = part.strip_prefix("uni") {
677                    uni_names.push(Cow::Borrowed(stripped));
678                } else if part.len() == 5 && is_u_name(part.as_ref()) {
679                    uni_names.push(Cow::Borrowed(&part.as_str()[1..]));
680                } else if let Some(ch) = fontdrasil::agl::char_for_agl_name(part.as_ref()) {
681                    uni_names.push(Cow::Owned(format!("{:04X}", ch as u32)));
682                } else {
683                    panic!("Unexpected part while constructing production name: {part}");
684                }
685            }
686            let mut result = String::from("uni");
687            for segment in uni_names {
688                result.push_str(segment.as_ref());
689            }
690            append_suffix(&mut result, suffix);
691            return Some(result.as_str().into());
692        }
693
694        let mut result = prod_names.join("_");
695        append_suffix(&mut result, suffix);
696        Some(result.as_str().into())
697    }
698
699    // this doesn't need a &self param, but we want it locally close to the
700    // code that calls it, so we'll make it a type method :shrug:
701    fn construct_category_via_agl(base_name: &str) -> Option<(Category, Option<Subcategory>)> {
702        if let Some(first_char) = fontdrasil::agl::glyph_name_to_unicode(base_name)
703            .chars()
704            .next()
705        {
706            let (category, subcategory) = category_from_icu(first_char);
707
708            // Exception: Something like "one_two" should be a (_, Ligature),
709            // "acutecomb_brevecomb" should however stay (Mark, Nonspacing).
710            if base_name.contains('_') && category != Category::Mark {
711                return Some((category, Some(Subcategory::Ligature)));
712            } else {
713                return Some((category, subcategory));
714            }
715        }
716        None
717    }
718
719    fn split_glyph_suffix<'n>(&self, name: &'n str) -> (&'n str, Option<&'n str>) {
720        let multi_suffix = name.bytes().filter(|b| *b == b'.').count() > 1;
721        if multi_suffix {
722            // with multiple suffixes, try adding them one at a time and seeing if
723            // we find a known name.
724            // basically: for 'char.bottom.alt' we want to return (char.bottom, alt)
725            // if
726            for idx in name
727                .bytes()
728                .enumerate()
729                .filter_map(|(i, b)| (b == b'.').then_some(i))
730                .skip(1)
731            {
732                let (base, suffix) = name.split_at(idx);
733                if self.contains_name(base) {
734                    // suffix starts with '.' so we strip it to match split_once below
735                    return (base, Some(&suffix[1..]));
736                }
737            }
738        }
739        // finally just split at the first dot, or the whole name if no suffix
740        name.split_once('.')
741            .map_or_else(|| (name, None), |(base, suffix)| (base, Some(suffix)))
742    }
743
744    /// Split a ligature glyph into component parts
745    ///
746    /// Returns `None` if the name does not contain the '_' character, otherwise
747    /// returns a list of names of components, derived from the glyph name.
748    /// See
749    /// <https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L307>
750    fn split_ligature_glyph_name(&self, name: &str) -> Option<Vec<SmolStr>> {
751        // if last part has a script suffix, grab it
752        let script_suffix = name.rsplit_once('_')?.1.rsplit_once('-').map(|(_, x)| x);
753
754        let mut parts: Vec<_> = name
755            .trim_end_matches(script_suffix.unwrap_or_default())
756            // after trimming script we also need to trim the '-'!
757            .trim_end_matches('-')
758            .split('_')
759            .map(SmolStr::new)
760            .collect();
761
762        let script = match script_suffix {
763            // if there was no suffix, we're done
764            None => return Some(parts),
765            Some(script) => script,
766        };
767
768        // otherwise we try adding the script suffix to each part, and see if
769        // that's a known glyph name:
770        for part in parts.iter_mut() {
771            // if the part already has a script, continue
772            if part.contains('-') {
773                continue;
774            }
775
776            let new_part = smol_str::format_smolstr!("{part}-{script}");
777            // if non-suffixed exists but suffixed doesn't, keep non-suffixed
778            if self.contains_name(part.as_ref()) && !self.contains_name(&new_part) {
779                continue;
780            }
781            *part = new_part;
782        }
783        Some(parts)
784    }
785}
786
787// https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/Lib/glyphsLib/glyphdata.py#L261
788fn category_from_icu(c: char) -> (Category, Option<Subcategory>) {
789    match icu_properties::CodePointMapData::<GeneralCategory>::new().get(c) {
790        GeneralCategory::Unassigned | GeneralCategory::OtherSymbol => (Category::Symbol, None),
791        GeneralCategory::UppercaseLetter
792        | GeneralCategory::LowercaseLetter
793        | GeneralCategory::TitlecaseLetter
794        | GeneralCategory::OtherLetter => (Category::Letter, None),
795        GeneralCategory::ModifierLetter => (Category::Letter, Some(Subcategory::Modifier)),
796        GeneralCategory::NonspacingMark => (Category::Mark, Some(Subcategory::Nonspacing)),
797        GeneralCategory::SpacingMark => (Category::Mark, Some(Subcategory::SpacingCombining)),
798        GeneralCategory::EnclosingMark => (Category::Mark, Some(Subcategory::Enclosing)),
799        GeneralCategory::DecimalNumber | GeneralCategory::OtherNumber => {
800            (Category::Number, Some(Subcategory::DecimalDigit))
801        }
802        GeneralCategory::LetterNumber => (Category::Number, None),
803        GeneralCategory::SpaceSeparator => (Category::Separator, Some(Subcategory::Space)),
804        GeneralCategory::LineSeparator
805        | GeneralCategory::ParagraphSeparator
806        | GeneralCategory::Control => (Category::Separator, None),
807        GeneralCategory::Format => (Category::Separator, Some(Subcategory::Format)),
808        GeneralCategory::PrivateUse => (Category::Letter, Some(Subcategory::Compatibility)),
809        GeneralCategory::DashPunctuation => (Category::Punctuation, Some(Subcategory::Dash)),
810        GeneralCategory::OpenPunctuation | GeneralCategory::ClosePunctuation => {
811            (Category::Punctuation, Some(Subcategory::Parenthesis))
812        }
813        GeneralCategory::ConnectorPunctuation | GeneralCategory::OtherPunctuation => {
814            (Category::Punctuation, None)
815        }
816        GeneralCategory::InitialPunctuation | GeneralCategory::FinalPunctuation => {
817            (Category::Punctuation, Some(Subcategory::Quote))
818        }
819        GeneralCategory::MathSymbol => (Category::Symbol, Some(Subcategory::Math)),
820        GeneralCategory::CurrencySymbol => (Category::Symbol, Some(Subcategory::Currency)),
821        GeneralCategory::ModifierSymbol => (Category::Mark, Some(Subcategory::Spacing)),
822        GeneralCategory::Surrogate => unreachable!("char cannot represent surrogate code points"),
823    }
824}
825
826impl FromStr for Category {
827    type Err = SmolStr;
828
829    fn from_str(s: &str) -> Result<Self, Self::Err> {
830        match s {
831            "Mark" => Ok(Self::Mark),
832            "Space" => Ok(Self::Space),
833            "Separator" => Ok(Self::Separator),
834            "Letter" => Ok(Self::Letter),
835            "Number" => Ok(Self::Number),
836            "Symbol" => Ok(Self::Symbol),
837            "Punctuation" => Ok(Self::Punctuation),
838            "Other" => Ok(Self::Other),
839            _ => Err(s.into()),
840        }
841    }
842}
843
844impl FromStr for Subcategory {
845    type Err = SmolStr;
846
847    fn from_str(s: &str) -> Result<Self, Self::Err> {
848        match s {
849            "Spacing" => Ok(Self::Spacing),
850            "Radical" => Ok(Self::Radical),
851            "Math" => Ok(Self::Math),
852            "Superscript" => Ok(Self::Superscript),
853            "Geometry" => Ok(Self::Geometry),
854            "Dash" => Ok(Self::Dash),
855            "Decimal Digit" => Ok(Self::DecimalDigit),
856            "Currency" => Ok(Self::Currency),
857            "Fraction" => Ok(Self::Fraction),
858            "Halfform" => Ok(Self::Halfform),
859            "Small" => Ok(Self::Small),
860            "Number" => Ok(Self::Number),
861            "Quote" => Ok(Self::Quote),
862            "Space" => Ok(Self::Space),
863            "Letter" => Ok(Self::Letter),
864            "Jamo" => Ok(Self::Jamo),
865            "Format" => Ok(Self::Format),
866            "Parenthesis" => Ok(Self::Parenthesis),
867            "Matra" => Ok(Self::Matra),
868            "Arrow" => Ok(Self::Arrow),
869            "Nonspacing" => Ok(Self::Nonspacing),
870            "Compatibility" => Ok(Self::Compatibility),
871            "Syllable" => Ok(Self::Syllable),
872            "Ligature" => Ok(Self::Ligature),
873            "Modifier" => Ok(Self::Modifier),
874            "Spacing Combining" => Ok(Self::SpacingCombining),
875            "Emoji" => Ok(Self::Emoji),
876            "Enclosing" => Ok(Self::Enclosing),
877            "Composition" => Ok(Self::Composition),
878            "Other" => Ok(Self::Other),
879            "Lowercase" => Ok(Self::Lowercase),
880            "Uppercase" => Ok(Self::Uppercase),
881            "Smallcaps" => Ok(Self::Smallcaps),
882            "Conjunct" => Ok(Self::Conjunct),
883
884            _ => Err(s.into()),
885        }
886    }
887}
888
889impl FromStr for Script {
890    type Err = SmolStr;
891
892    fn from_str(s: &str) -> Result<Self, Self::Err> {
893        match s {
894            "adlam" => Ok(Self::Adlam),
895            "alchemical" => Ok(Self::Alchemical),
896            "arabic" => Ok(Self::Arabic),
897            "armenian" => Ok(Self::Armenian),
898            "avestan" => Ok(Self::Avestan),
899            "balinese" => Ok(Self::Balinese),
900            "bamum" => Ok(Self::Bamum),
901            "batak" => Ok(Self::Batak),
902            "bengali" => Ok(Self::Bengali),
903            "blackLetter" => Ok(Self::BlackLetter),
904            "bopomofo" => Ok(Self::Bopomofo),
905            "brahmi" => Ok(Self::Brahmi),
906            "braille" => Ok(Self::Braille),
907            "buginese" => Ok(Self::Buginese),
908            "canadian" => Ok(Self::Canadian),
909            "chakma" => Ok(Self::Chakma),
910            "cham" => Ok(Self::Cham),
911            "cherokee" => Ok(Self::Cherokee),
912            "chorasmian" => Ok(Self::Chorasmian),
913            "coptic" => Ok(Self::Coptic),
914            "cyrillic" => Ok(Self::Cyrillic),
915            "dentistry" => Ok(Self::Dentistry),
916            "deseret" => Ok(Self::Deseret),
917            "devanagari" => Ok(Self::Devanagari),
918            "divesakuru" => Ok(Self::Divesakuru),
919            "elbasan" => Ok(Self::Elbasan),
920            "elymaic" => Ok(Self::Elymaic),
921            "ethiopic" => Ok(Self::Ethiopic),
922            "georgian" => Ok(Self::Georgian),
923            "glagolitic" => Ok(Self::Glagolitic),
924            "gothic" => Ok(Self::Gothic),
925            "greek" => Ok(Self::Greek),
926            "gujarati" => Ok(Self::Gujarati),
927            "gurmukhi" => Ok(Self::Gurmukhi),
928            "han" => Ok(Self::Han),
929            "hangul" => Ok(Self::Hangul),
930            "hebrew" => Ok(Self::Hebrew),
931            "javanese" => Ok(Self::Javanese),
932            "kana" => Ok(Self::Kana),
933            "kannada" => Ok(Self::Kannada),
934            "kawi" => Ok(Self::Kawi),
935            "kayahli" => Ok(Self::Kayahli),
936            "khmer" => Ok(Self::Khmer),
937            "khojki" => Ok(Self::Khojki),
938            "lao" => Ok(Self::Lao),
939            "latin" => Ok(Self::Latin),
940            "lepcha" => Ok(Self::Lepcha),
941            "lue" => Ok(Self::Lue),
942            "mahjong" => Ok(Self::Mahjong),
943            "malayalam" => Ok(Self::Malayalam),
944            "mandaic" => Ok(Self::Mandaic),
945            "math" => Ok(Self::Math),
946            "mongolian" => Ok(Self::Mongolian),
947            "musical" => Ok(Self::Musical),
948            "myanmar" => Ok(Self::Myanmar),
949            "nko" => Ok(Self::Nko),
950            "nyiakeng puachue hmong" => Ok(Self::NyiakengPuachueHmong),
951            "ogham" => Ok(Self::Ogham),
952            "oriya" => Ok(Self::Oriya),
953            "osage" => Ok(Self::Osage),
954            "osmanya" => Ok(Self::Osmanya),
955            "pahawh hmong" => Ok(Self::PahawhHmong),
956            "phaistosDisc" => Ok(Self::PhaistosDisc),
957            "rovas" => Ok(Self::Rovas),
958            "runic" => Ok(Self::Runic),
959            "samaritan" => Ok(Self::Samaritan),
960            "shavian" => Ok(Self::Shavian),
961            "sinhala" => Ok(Self::Sinhala),
962            "syriac" => Ok(Self::Syriac),
963            "tamil" => Ok(Self::Tamil),
964            "telugu" => Ok(Self::Telugu),
965            "thaana" => Ok(Self::Thaana),
966            "thai" => Ok(Self::Thai),
967            "tham" => Ok(Self::Tham),
968            "tibet" => Ok(Self::Tibet),
969            "tifinagh" => Ok(Self::Tifinagh),
970            "vai" => Ok(Self::Vai),
971            "yi" => Ok(Self::Yi),
972            _ => Err(s.into()),
973        }
974    }
975}
976
977impl Display for Category {
978    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
979        match self {
980            Self::Mark => write!(f, "Mark"),
981            Self::Space => write!(f, "Space"),
982            Self::Separator => write!(f, "Separator"),
983            Self::Letter => write!(f, "Letter"),
984            Self::Number => write!(f, "Number"),
985            Self::Symbol => write!(f, "Symbol"),
986            Self::Punctuation => write!(f, "Punctuation"),
987            Self::Other => write!(f, "Other"),
988        }
989    }
990}
991
992impl Display for Subcategory {
993    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
994        match self {
995            Self::Spacing => write!(f, "Spacing"),
996            Self::Radical => write!(f, "Radical"),
997            Self::Math => write!(f, "Math"),
998            Self::Superscript => write!(f, "Superscript"),
999            Self::Geometry => write!(f, "Geometry"),
1000            Self::Dash => write!(f, "Dash"),
1001            Self::DecimalDigit => write!(f, "Decimal Digit"),
1002            Self::Currency => write!(f, "Currency"),
1003            Self::Fraction => write!(f, "Fraction"),
1004            Self::Halfform => write!(f, "Halfform"),
1005            Self::Small => write!(f, "Small"),
1006            Self::Number => write!(f, "Number"),
1007            Self::Quote => write!(f, "Quote"),
1008            Self::Space => write!(f, "Space"),
1009            Self::Letter => write!(f, "Letter"),
1010            Self::Jamo => write!(f, "Jamo"),
1011            Self::Format => write!(f, "Format"),
1012            Self::Parenthesis => write!(f, "Parenthesis"),
1013            Self::Matra => write!(f, "Matra"),
1014            Self::Arrow => write!(f, "Arrow"),
1015            Self::Nonspacing => write!(f, "Nonspacing"),
1016            Self::Compatibility => write!(f, "Compatibility"),
1017            Self::Syllable => write!(f, "Syllable"),
1018            Self::Ligature => write!(f, "Ligature"),
1019            Self::Modifier => write!(f, "Modifier"),
1020            Self::SpacingCombining => write!(f, "Spacing Combining"),
1021            Self::Emoji => write!(f, "Emoji"),
1022            Self::Enclosing => write!(f, "Enclosing"),
1023            Self::Composition => write!(f, "Composition"),
1024            Self::Lowercase => write!(f, "Lowercase"),
1025            Self::Uppercase => write!(f, "Uppercase"),
1026            Self::Smallcaps => write!(f, "Smallcaps"),
1027            Self::Conjunct => write!(f, "Conjunct"),
1028            Self::Other => write!(f, "Other"),
1029        }
1030    }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035
1036    use super::*;
1037    use rstest::rstest;
1038
1039    #[test]
1040    fn simple_overrides() {
1041        let overrides = HashMap::from([(
1042            "A".into(),
1043            QueryResult {
1044                category: Category::Mark,
1045                subcategory: Some(Subcategory::SpacingCombining),
1046                codepoint: Some(b'A' as u32),
1047                script: Some(Script::Alchemical),
1048                production_name: Some(ProductionName::Custom("MagicA".into())),
1049            },
1050        )]);
1051        let data = GlyphData::new(Some(overrides));
1052
1053        let result = data.query("A", None).unwrap();
1054        assert_eq!(result.category, Category::Mark);
1055        assert_eq!(result.subcategory, Some(Subcategory::SpacingCombining));
1056        assert_eq!(result.codepoint, Some(b'A' as u32));
1057        assert_eq!(result.script, Some(Script::Alchemical));
1058        assert_eq!(result.production_name, Some("MagicA".into()));
1059    }
1060
1061    #[test]
1062    fn overrides_from_file() {
1063        let data =
1064            GlyphData::with_override_file(Path::new("./data/GlyphData_override_test.xml")).unwrap();
1065        assert_eq!(data.query("zero", None).unwrap().category, Category::Other);
1066        assert_eq!(data.query("C", None).unwrap().category, Category::Number);
1067        assert_eq!(
1068            data.query("Yogh", None).unwrap().production_name,
1069            Some("Yolo".into())
1070        );
1071    }
1072
1073    fn get_category(name: &str, codepoints: &[u32]) -> Option<(Category, Option<Subcategory>)> {
1074        let codepoints = codepoints.iter().copied().collect();
1075        GlyphData::new(None)
1076            .query(name, Some(&codepoints))
1077            .map(|result| (result.category, result.subcategory))
1078    }
1079
1080    // from python glyphsLib: https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d5/tests/glyphdata_test.py#L106
1081    #[test]
1082    fn py_test_category() {
1083        for (name, expected) in [
1084            (".notdef", Some((Category::Separator, None))),
1085            // this test case requires AGL lookup:
1086            ("uni000D", Some((Category::Separator, None))),
1087            (
1088                "boxHeavyUp",
1089                Some((Category::Symbol, Some(Subcategory::Geometry))),
1090            ),
1091            ("eacute", Some((Category::Letter, None))),
1092            ("Abreveacute", Some((Category::Letter, None))),
1093            ("C-fraktur", Some((Category::Letter, None))),
1094            ("fi", Some((Category::Letter, Some(Subcategory::Ligature)))),
1095            (
1096                "fi.alt",
1097                Some((Category::Letter, Some(Subcategory::Ligature))),
1098            ),
1099            (
1100                "hib-ko",
1101                Some((Category::Letter, Some(Subcategory::Syllable))),
1102            ),
1103            (
1104                "one.foo",
1105                Some((Category::Number, Some(Subcategory::DecimalDigit))),
1106            ),
1107            (
1108                "one_two.foo",
1109                Some((Category::Number, Some(Subcategory::Ligature))),
1110            ),
1111            (
1112                "o_f_f_i",
1113                Some((Category::Letter, Some(Subcategory::Ligature))),
1114            ),
1115            (
1116                "o_f_f_i.foo",
1117                Some((Category::Letter, Some(Subcategory::Ligature))),
1118            ),
1119            (
1120                "ain_alefMaksura-ar.fina",
1121                Some((Category::Letter, Some(Subcategory::Ligature))),
1122            ),
1123            (
1124                "brevecomb",
1125                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1126            ),
1127            (
1128                "brevecomb.case",
1129                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1130            ),
1131            (
1132                "brevecomb_acutecomb",
1133                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1134            ),
1135            (
1136                "brevecomb_acutecomb.case",
1137                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1138            ),
1139            (
1140                "caroncomb_dotaccentcomb",
1141                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1142            ),
1143            (
1144                "dieresiscomb_caroncomb",
1145                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1146            ),
1147            (
1148                "dieresiscomb_macroncomb",
1149                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1150            ),
1151            (
1152                "dotaccentcomb_macroncomb",
1153                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1154            ),
1155            (
1156                "macroncomb_dieresiscomb",
1157                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1158            ),
1159            (
1160                "dotaccentcomb_o",
1161                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1162            ),
1163            (
1164                "macronlowmod_O",
1165                Some((Category::Mark, Some(Subcategory::Modifier))),
1166            ),
1167            ("O_o", Some((Category::Letter, Some(Subcategory::Ligature)))),
1168            (
1169                "O_dotaccentcomb_o",
1170                Some((Category::Letter, Some(Subcategory::Ligature))),
1171            ),
1172            ("O_dotaccentcomb", Some((Category::Letter, None))),
1173            (
1174                "O_period",
1175                Some((Category::Letter, Some(Subcategory::Ligature))),
1176            ),
1177            ("O_nbspace", Some((Category::Letter, None))),
1178            ("_a", None),
1179            ("_aaa", None),
1180            (
1181                "dal_alef-ar",
1182                Some((Category::Letter, Some(Subcategory::Ligature))),
1183            ),
1184            (
1185                "dal_lam-ar.dlig",
1186                Some((Category::Letter, Some(Subcategory::Ligature))),
1187            ),
1188            ("po-khmer", Some((Category::Letter, None))),
1189            (
1190                "po-khmer.below",
1191                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1192            ),
1193            (
1194                "po-khmer.below.ro",
1195                Some((Category::Mark, Some(Subcategory::Nonspacing))),
1196            ),
1197        ] {
1198            let result = get_category(name, &[]);
1199            assert_eq!(result, expected, "{name}: {result:?} != {expected:?}");
1200        }
1201    }
1202
1203    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L145C5-L153C76
1204    #[test]
1205    fn py_category_by_unicode() {
1206        // "SignU.bn" is a non-standard name not defined in GlyphData.xml
1207        // 0x09C1 should match
1208        let result = get_category("SignU.bn", &[0x09C1]);
1209        assert_eq!(
1210            result,
1211            Some((Category::Mark, Some(Subcategory::Nonspacing)))
1212        )
1213    }
1214
1215    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d/tests/glyphdata_test.py#L155C5-L162C1
1216    // https://github.com/googlefonts/glyphsLib/issues/232
1217    #[test]
1218    fn py_bug_232() {
1219        let u = get_category("uni07F0", &[]);
1220        assert_eq!(u, Some((Category::Mark, Some(Subcategory::Nonspacing))));
1221        let g = get_category("longlowtonecomb-nko", &[]);
1222        assert_eq!(g, Some((Category::Mark, Some(Subcategory::Nonspacing))));
1223    }
1224
1225    #[test]
1226    fn unknown_name_combined_with_mark() {
1227        // if first part is unknown, we don't assign a category
1228        assert_eq!(get_category("Whata-WEIRDNameLOL_brevecomb", &[]), None)
1229    }
1230
1231    #[test]
1232    fn known_name_with_unknown_mark() {
1233        // if first part is a letter and rest is unknown, we use categories of
1234        // first part
1235        assert_eq!(
1236            get_category("i_acutecombcombcy", &[]),
1237            get_category("i", &[])
1238        )
1239    }
1240
1241    #[test]
1242    fn match_prod_name_with_suffix() {
1243        // https://github.com/googlefonts/fontc/issues/780#issuecomment-2674853729
1244        // "uni17BF.b" should match against production name uni17BF
1245        assert_eq!(
1246            Some((Category::Letter, None)),
1247            get_category("uni17BF.b", &[]),
1248        )
1249    }
1250
1251    #[rstest(name, expected,
1252        case("A", None),  // AGLFN names *are* production names
1253        case("z", None),
1254        case("nbspace", Some("uni00A0")),
1255        case("nonbreakingspace", Some("uni00A0")),  // altNames map to the same prod name
1256        case("uni00A0", Some("uni00A0")),  // prod names are already prod
1257        // the «» punctuation marks are spelled with an 'guillemets' in French, but for
1258        // some reasons the AGLFN has 'guillemot' (that's actually a bird! :shrug:)
1259        case("guillemetleft", Some("guillemotleft")),
1260        case("twosevenths", Some("two_fraction_seven")),
1261        case("idotaccent", Some("i.loclTRK")),
1262        case("idotless", Some("dotlessi")),
1263        case("Jacute", Some("uni004A0301")),
1264        case("scurl", Some("u1DF1E")),
1265        // In the old AGL, Delta was confused with increment 0x2206 so now it's banned
1266        // from the Greek alphabet.
1267        case("Delta", Some("uni0394")),
1268        case("increment", Some("uni2206")),
1269        case("dog-ko", Some("uniB3C5")),
1270        case("bau-kannada", Some("uni0CAC0CCC")),
1271        case("EnglandFlag", Some("u1F3F4E0067E0062E0065E006EE0067E007F")),
1272        case("pileOfPoo", Some("u1F4A9")),
1273        case("lam_alef-ar.fina", Some("uni06440627.fina")),
1274    )]
1275    fn query_production_names(name: &str, expected: Option<&str>) {
1276        let production_name = GlyphData::new(None)
1277            .query_no_synthesis(name, None)
1278            .unwrap()
1279            .production_name
1280            .map(|p| p.to_string());
1281        assert_eq!(
1282            production_name,
1283            expected.map(Into::into),
1284            "{name}: {production_name:?} != {expected:?}"
1285        );
1286    }
1287
1288    // Python original test cases for synthetic production names:
1289    // https://github.com/googlefonts/glyphsLib/blob/e2ebf5b517d59bec0c9437da3a748c58f2999911/tests/glyphdata_test.py#L196-L409
1290    // Note that I removed a bunch of them as they were too many and repetitive
1291    #[rstest(
1292        name,
1293        expected,
1294        case("Ech_Vew-arm.liga", "uni0535054E.liga"),
1295        case("aiMatra_anusvara-deva", "uni09480902"),
1296        case("aiMatra_reph_anusvara-deva", "uni09480930094D0902"),
1297        case("ca_iMatra-tamil", "uni0B9A0BBF"),
1298        case("ch_ya-deva", "uni091B094D092F"),
1299        case("d_dh_ya-deva", "uni0926094D0927094D092F"),
1300        case("da-khmer.below.ro", "uni17D2178A.ro"),
1301        case("da_rVocalicMatra-deva", "uni09260943"),
1302        case("dd_dda-deva", "uni0921094D0921"),
1303        case("eShortMatra_reph_anusvara-deva", "uni09460930094D0902"),
1304        case("ech_vew-arm.liga.sc", "uni0565057E.liga.sc"),
1305        case("finalkaf_qamats-hb", "uni05DA05B8"),
1306        case("finalkaf_sheva-hb", "uni05DA05B0"),
1307        case("finalkafdagesh_qamats-hb", "uniFB3A05B8"),
1308        case("finalkafdagesh_sheva-hb", "uniFB3A05B0"),
1309        case("h_la-deva", "uni0939094D0932"),
1310        case("ha_iMatra-tamil", "uni0BB90BBF"),
1311        case("hatafpatah_siluqleft-hb", "uni05B205BD"),
1312        case("iMark_toandakhiat-khmer.narrow", "uni17B717CD.narrow"),
1313        case("idotaccent.sc", "i.loclTRK.sc"),
1314        case("iiMatra_reph-deva", "uni09400930094D"),
1315        case("iiMatra_reph-deva.alt2", "uni09400930094D.alt2"),
1316        case("j_ny-deva", "uni091C094D091E094D"),
1317        case("j_ny-deva.alt2", "uni091C094D091E094D.alt2"),
1318        case("mo-khmer.below.ro", "uni17D21798.ro"),
1319        case("moMa_underscore-thai", "uni0E21005F"),
1320        case("nno-khmer.below.narrow1", "uni17D2178E.narrow1"),
1321        case("nyo-khmer.full.below.narrow", "uni17D21789.full.below.narrow"),
1322        case("sh_ra_iiMatra-tamil", "uni0BB60BCD0BB00BC0"),
1323        // plus some more tests that are not in glyphsLib
1324        case("A_A", "A_A"),
1325        case("a_a.sc", "a_a.sc"),
1326        case("brevecomb_acutecomb", "uni03060301"),
1327        case("brevecomb_acutecomb.case", "uni03060301.case"),
1328        case("pileOfPoo_pileOfPoo", "u1F4A9_u1F4A9"),
1329        case("pileOfPoo.ss01", "u1F4A9.ss01"),
1330        case("lam_alef-ar.fina.ss02", "uni06440627.fina.ss02"),
1331    )]
1332    fn synthetic_production_names(name: &str, expected: &str) {
1333        let production_name = GlyphData::new(None)
1334            .query(name, None)
1335            .unwrap()
1336            .production_name
1337            .unwrap()
1338            .to_string();
1339        assert_eq!(
1340            &production_name, expected,
1341            "{name}: {production_name:?} != {expected:?}"
1342        );
1343    }
1344}