gf_metadata/
lib.rs

1mod axes;
2mod designers;
3mod fonts_public;
4
5use std::{
6    cell::OnceCell,
7    collections::HashMap,
8    fs::{self, File},
9    io::{BufRead, BufReader, Error, ErrorKind},
10    path::{Path, PathBuf},
11    str::FromStr,
12};
13
14pub use axes::{AxisProto, FallbackProto};
15pub use designers::{AvatarProto, DesignerInfoProto};
16pub use fonts_public::*;
17use google_fonts_languages::LANGUAGES;
18pub use google_fonts_languages::{
19    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
20};
21use protobuf::text_format::ParseError;
22use regex::Regex;
23use walkdir::WalkDir;
24
25/// Read a FamilyProto from a METADATA.pb file content.
26///
27/// This function handles undocumented fields by stripping them out before parsing.
28pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
29    if s.contains("position") {
30        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
31        let s = re.replace_all(s, "");
32        protobuf::text_format::parse_from_str(&s)
33    } else {
34        protobuf::text_format::parse_from_str(s)
35    }
36}
37
38fn exemplar_score(font: &FontProto, preferred_style: FontStyle, preferred_weight: i32) -> i32 {
39    let mut score = 0;
40    // prefer preferred_style
41    if font.style() == preferred_style.style() {
42        score += 16;
43    }
44
45    // prefer closer to preferred_weight
46    score -= (font.weight() - preferred_weight).abs() / 100;
47
48    // prefer more weight to less weight
49    if font.weight() > preferred_weight {
50        score += 1;
51    }
52
53    // prefer variable
54    if font.filename().contains("].") {
55        score += 2;
56    }
57
58    score
59}
60
61/// Pick the exemplar font from a family.
62///
63/// This is the font file that is most likely to be a representative choice for
64/// the family. The heuristic is to prefer normal style, weight as close to 400
65/// as possible, and a variable font if present.
66pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
67    fn score(font: &FontProto) -> i32 {
68        exemplar_score(font, FontStyle::Normal, 400)
69    }
70    family
71        .fonts
72        .iter()
73        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
74}
75
76/// Font style preference for font selection (normal or italic)
77#[derive(Copy, Clone, Debug, PartialEq)]
78pub enum FontStyle {
79    Normal,
80    Italic,
81}
82
83impl FontStyle {
84    fn style(&self) -> &str {
85        match self {
86            FontStyle::Normal => "normal",
87            FontStyle::Italic => "italic",
88        }
89    }
90}
91
92/// Select the best matching font from a family given style and weight preferences.
93pub fn select_font(
94    family: &FamilyProto,
95    preferred_style: FontStyle,
96    preferred_weight: i32,
97) -> Option<&FontProto> {
98    let score =
99        |font: &FontProto| -> i32 { exemplar_score(font, preferred_style, preferred_weight) };
100    family
101        .fonts
102        .iter()
103        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
104}
105
106fn iter_families(
107    root: &Path,
108    filter: Option<&Regex>,
109) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
110    WalkDir::new(root)
111        .into_iter()
112        .filter_map(|d| d.ok())
113        .filter(|d| d.file_name() == "METADATA.pb")
114        .filter(move |d| {
115            filter
116                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
117                .unwrap_or(true)
118        })
119        .map(|d| {
120            (
121                d.path().to_path_buf(),
122                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
123            )
124        })
125}
126
127/// Iterate over all known languages.
128pub fn iter_languages(_root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
129    LANGUAGES.values().map(|l| Ok(*l.clone()))
130}
131
132/// Read tag entries from the tags/all directory.
133pub fn read_tags(root: &Path) -> Result<Vec<Tagging>, Error> {
134    let mut tag_dir = root.to_path_buf();
135    tag_dir.push("tags/all");
136    let mut tags = Vec::new();
137    for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
138        let entry = entry.expect("To access tag dir entries");
139        if entry
140            .path()
141            .extension()
142            .expect("To have extensions")
143            .to_str()
144            .expect("utf-8")
145            != "csv"
146        {
147            continue;
148        }
149        let fd = File::open(entry.path())?;
150        let rdr = BufReader::new(fd);
151        tags.extend(
152            rdr.lines()
153                .map(|s| s.expect("Valid tag lines"))
154                .map(|s| Tagging::from_str(&s).expect("Valid tag lines")),
155        );
156    }
157    Ok(tags)
158}
159
160/// Read tag metadata from tags/tags_metadata.csv
161pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
162    let mut tag_metadata_file = root.to_path_buf();
163    tag_metadata_file.push("tags/tags_metadata.csv");
164    let mut metadata = Vec::new();
165
166    let fd = File::open(&tag_metadata_file)?;
167    let rdr = BufReader::new(fd);
168    metadata.extend(
169        rdr.lines()
170            .map(|s| s.expect("Valid tag lines"))
171            .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
172    );
173
174    Ok(metadata)
175}
176
177fn csv_values(s: &str) -> Vec<&str> {
178    let mut s = s;
179    let mut values = Vec::new();
180    while !s.is_empty() {
181        s = s.trim();
182        let mut end_idx = None;
183        if let Some(s) = s.strip_prefix('"') {
184            end_idx = Some(s.find('"').expect("Close quote"));
185        }
186        end_idx = s[end_idx.unwrap_or_default()..]
187            .find(',')
188            .map(|v| v + end_idx.unwrap_or_default());
189        if let Some(end_idx) = end_idx {
190            let (value, rest) = s.split_at(end_idx);
191            values.push(value.trim());
192            s = &rest[1..];
193        } else {
194            values.push(s);
195            s = "";
196        }
197    }
198    values
199}
200
201/// A tag entry for a family
202///
203/// A tagging is an association of a family (and optionally a specific
204/// designspace location within that family) with a tag and a numeric value for that tag.
205#[derive(Clone, Debug)]
206pub struct Tagging {
207    /// Font family name
208    pub family: String,
209    /// Optional designspace location within the family
210    ///
211    /// This is given in the form used in the fonts web API; for example, `ital,wght@1,700`
212    /// refers to the italic style at weight 700.
213    pub loc: String,
214    /// Tag name
215    pub tag: String,
216    /// Tag value
217    pub value: f32,
218}
219
220impl FromStr for Tagging {
221    type Err = Error;
222
223    fn from_str(s: &str) -> Result<Self, Self::Err> {
224        let values = csv_values(s);
225        let (family, loc, tag, value) = match values[..] {
226            [family, tag, value] => (family, "", tag, value),
227            [family, loc, tag, value] => (family, loc, tag, value),
228            _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
229        };
230        Ok(Tagging {
231            family: family.to_string(),
232            loc: loc.to_string(),
233            tag: tag.to_string(),
234            value: f32::from_str(value)
235                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
236        })
237    }
238}
239
240/// Metadata for a tag
241#[derive(Clone, Debug)]
242pub struct TagMetadata {
243    /// Tag name (e.g. "/Quality/Drawing")
244    pub tag: String,
245    /// Minimum tag value
246    pub min_value: f32,
247    /// Maximum tag value
248    pub max_value: f32,
249    /// User friendly name for the tag (e.g. "drawing quality")
250    pub prompt_name: String,
251}
252
253impl FromStr for TagMetadata {
254    type Err = Error;
255
256    fn from_str(s: &str) -> Result<Self, Self::Err> {
257        let values = csv_values(s);
258        let [tag, min, max, prompt_name] = values[..] else {
259            return Err(Error::new(
260                ErrorKind::InvalidData,
261                "Unparseable tag metadata, wrong number of values",
262            ));
263        };
264        Ok(TagMetadata {
265            tag: tag.into(),
266            min_value: f32::from_str(min)
267                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
268            max_value: f32::from_str(max)
269                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
270            prompt_name: prompt_name.into(),
271        })
272    }
273}
274
275/// A view into the Google Fonts library.
276///
277/// This struct holds a path to a local checkout of the Google Fonts repo and
278/// provides cached, read-only accessors for families, tags and language
279/// metadata. All accessors return borrowed references where possible so callers
280/// should hold the `GoogleFonts` value for as long as they need the returned
281/// references.
282pub struct GoogleFonts {
283    repo_dir: PathBuf,
284    family_filter: Option<Regex>,
285    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
286    family_by_font_file: OnceCell<HashMap<String, usize>>,
287    tags: OnceCell<Result<Vec<Tagging>, Error>>,
288    tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
289}
290
291impl GoogleFonts {
292    /// Create a new `GoogleFonts` view.
293    ///
294    /// `p` should be the path to the root of a local Google Fonts repository
295    /// checkout (the directory containing `METADATA.pb` files and the
296    /// `tags/` directory). `family_filter`, if present, is a regular
297    /// expression used to filter which families are exposed by the
298    /// `families()` iterator.
299    ///
300    /// This constructor does not perform I/O; metadata is read lazily when
301    /// the corresponding accessor is called.
302    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
303        Self {
304            repo_dir: p,
305            family_filter,
306            families: OnceCell::new(),
307            family_by_font_file: OnceCell::new(),
308            tags: OnceCell::new(),
309            tag_metadata: OnceCell::new(),
310        }
311    }
312    /// Return the parsed tag entries for the repository.
313    ///
314    /// On first call this will read and parse the CSV files from the repo's
315    /// `tags/all` directory. Returns `Ok(&[Tag])` when parsing succeeded, or
316    /// `Err(&Error)` if an I/O or parse error occurred. The returned slice is
317    /// borrowed from internal storage and remains valid for the lifetime of
318    /// `self`.
319    pub fn tags(&self) -> Result<&[Tagging], &Error> {
320        self.tags
321            .get_or_init(|| read_tags(&self.repo_dir))
322            .as_ref()
323            .map(|tags| tags.as_slice())
324    }
325    /// Return tag metadata (min/max and prompt names) for tags defined in
326    /// the repository.
327    ///
328    /// This reads `tags/tags_metadata.csv` on first access and returns a
329    /// borrowed slice on success. Errors are returned as `Err(&Error)`.
330    pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
331        self.tag_metadata
332            .get_or_init(|| read_tag_metadata(&self.repo_dir))
333            .as_ref()
334            .map(|metadata| metadata.as_slice())
335    }
336    /// Return a list of discovered families and their parsed metadata.
337    ///
338    /// Each entry is a tuple `(PathBuf, Result<FamilyProto, ParseError>)`.
339    /// The `PathBuf` is the path to the `METADATA.pb` file for the family.
340    /// The `Result` contains the parsed `FamilyProto` on success or a
341    /// `ParseError` if the metadata could not be parsed. Families are
342    /// discovered lazily by scanning the repository and applying the
343    /// `family_filter` provided at construction (if any).
344    ///
345    /// The returned slice is borrowed from internal storage and stays valid
346    /// for the lifetime of `self`.
347    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
348        self.families
349            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
350            .as_slice()
351    }
352    /// Lookup a language by its identifier.
353    ///
354    /// The `lang_id` should be the language identifier used by the
355    /// `google-fonts-languages` crate (for example "en_Latn"). Returns
356    /// `Some(&LanguageProto)` if the language is known, otherwise `None`.
357    /// This is a simple passthrough to the bundled `LANGUAGES` map.
358    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
359        LANGUAGES.get(lang_id).map(|l| &**l)
360    }
361
362    fn family_by_font_file(&self) -> &HashMap<String, usize> {
363        self.family_by_font_file.get_or_init(|| {
364            self.families()
365                .iter()
366                .enumerate()
367                .filter(|(_, (_, f))| f.is_ok())
368                .flat_map(|(i, (_, f))| {
369                    f.as_ref()
370                        .unwrap()
371                        .fonts
372                        .iter()
373                        .map(move |f| (f.filename().to_string(), i))
374                })
375                .collect()
376        })
377    }
378
379    /// Given a `FontProto`, return the family it belongs to.
380    ///
381    /// If the provided font is known (by filename) this returns `Some((path, family))`
382    /// where `path` is the path to the family's `METADATA.pb` and `family` is
383    /// a borrowed `FamilyProto`. Returns `None` if the font is not present in
384    /// the discovered families.
385    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
386        self.family_by_font_file()
387            .get(font.filename())
388            .copied()
389            .map(|i| {
390                let (p, f) = &self.families()[i];
391                (p.as_path(), f.as_ref().unwrap())
392            })
393    }
394    /// Find the path to the font binary for a `FontProto`.
395    ///
396    /// This resolves the font's family, then constructs the filesystem path
397    /// to the font file (sibling to the family's `METADATA.pb`). If the
398    /// resulting file exists its `PathBuf` is returned. If the file cannot
399    /// be found `None` is returned. A diagnostic is printed to stderr when
400    /// the expected file is missing.
401    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
402        let (family_path, _) = self.family(font)?;
403        let mut font_file = family_path.parent().unwrap().to_path_buf();
404        font_file.push(font.filename());
405        if !font_file.exists() {
406            eprintln!("No such file as {font_file:?}");
407        }
408        font_file.exists().then_some(font_file)
409    }
410
411    /// Our best guess at the primary language for this family
412    ///
413    /// Meant to be a good choice for things like rendering a sample string
414    /// Guess the primary language for a family.
415    ///
416    /// The heuristic is:
417    /// 1. If the family declares a `primary_language` that maps to a known
418    ///    language, return that.
419    /// 2. Otherwise if the family declares a `primary_script`, pick the most
420    ///    populous language using that script.
421    /// 3. Fall back to `en_Latn` if nothing else matches.
422    ///
423    /// This is intended as a best-effort choice to select a reasonable
424    /// language for rendering sample text, not as an authoritative mapping.
425    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
426        // Probe primary lang, primary script, then default baselessly to latin
427        let mut primary_language: Option<&LanguageProto> = None;
428        if primary_language.is_none() && family.has_primary_language() {
429            if let Some(lang) = self.language(family.primary_language()) {
430                primary_language = Some(lang);
431            } else {
432                eprintln!(
433                    "{} specifies invalid primary_language {}",
434                    family.name(),
435                    family.primary_language()
436                );
437            }
438        }
439        if primary_language.is_none() && family.has_primary_script() {
440            // If our script matches many languages pick the one with the highest population
441            let lang = LANGUAGES
442                .values()
443                .filter(|l| l.script.is_some() && l.script() == family.primary_script())
444                .reduce(|acc, e| {
445                    if acc.population() > e.population() {
446                        acc
447                    } else {
448                        e
449                    }
450                });
451            if let Some(lang) = lang {
452                primary_language = Some(lang);
453            } else {
454                eprintln!(
455                    "{} specifies a primary_script that matches no languages {}",
456                    family.name(),
457                    family.primary_script()
458                );
459            }
460        }
461        if primary_language.is_none() {
462            primary_language = self.language("en_Latn");
463        }
464        primary_language
465            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
466    }
467}
468
469#[cfg(test)]
470mod tests {
471
472    use std::fs;
473
474    use super::*;
475
476    fn testdata_dir() -> std::path::PathBuf {
477        // cargo test seems to run in the project directory
478        // VSCode test seems to run in the workspace directory
479        // probe for the file we want in hopes of finding it regardless
480
481        ["./resources/testdata", "../resources/testdata"]
482            .iter()
483            .map(std::path::PathBuf::from)
484            .find(|pb| pb.exists())
485            .unwrap()
486    }
487
488    fn testdata_file_content(relative_path: &str) -> String {
489        let mut p = testdata_dir();
490        p.push(relative_path);
491        fs::read_to_string(p).unwrap()
492    }
493
494    #[test]
495    fn roboto_exemplar() {
496        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
497        let exemplar = exemplar(&roboto).unwrap();
498        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
499    }
500
501    #[test]
502    fn wix_exemplar() {
503        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
504        let exemplar = exemplar(&roboto).unwrap();
505        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
506    }
507
508    #[test]
509    fn parse_roboto_metadata() {
510        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
511    }
512
513    #[test]
514    fn parse_wix_metadata() {
515        // Has the undocumented position field
516        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
517    }
518
519    #[test]
520    fn parse_primary_lang_script_metadata() {
521        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
522        assert_eq!(
523            ("Jpan", "Invalid"),
524            (family.primary_script(), family.primary_language())
525        );
526    }
527
528    #[test]
529    fn parse_tag3() {
530        Tagging::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
531    }
532
533    #[test]
534    fn parse_tag4() {
535        Tagging::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31")
536            .expect("To parse");
537    }
538
539    #[test]
540    fn parse_tag_quoted() {
541        Tagging::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
542            .expect("To parse");
543    }
544
545    #[test]
546    fn parse_tag_quoted2() {
547        Tagging::from_str("\"\",t,1").expect("To parse");
548    }
549}