Skip to main content

gf_metadata/
lib.rs

1mod axes;
2mod designers;
3mod fonts_public;
4pub mod knowledge; // Don't think anyone uses this yet, but just in case
5// We don't use languages_public because we want to re-export from google_fonts_languages
6
7use std::{
8    cell::OnceCell,
9    collections::HashMap,
10    fs::{self, File},
11    io::{BufRead, BufReader, Error, ErrorKind},
12    path::{Path, PathBuf},
13    str::FromStr,
14};
15
16pub use axes::{AxisProto, FallbackProto};
17pub use designers::{AvatarProto, DesignerInfoProto};
18pub use fonts_public::*;
19use google_fonts_languages::LANGUAGES;
20pub use google_fonts_languages::{
21    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
22};
23use protobuf::text_format::ParseError;
24use regex::Regex;
25use walkdir::WalkDir;
26
27/// Read a FamilyProto from a METADATA.pb file content.
28///
29/// This function handles undocumented fields by stripping them out before parsing.
30pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
31    if s.contains("position") {
32        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
33        let s = re.replace_all(s, "");
34        protobuf::text_format::parse_from_str(&s)
35    } else {
36        protobuf::text_format::parse_from_str(s)
37    }
38}
39
40fn exemplar_score(font: &FontProto, preferred_style: FontStyle, preferred_weight: i32) -> i32 {
41    let mut score = 0;
42    // prefer preferred_style
43    if font.style() == preferred_style.style() {
44        score += 16;
45    }
46
47    // prefer closer to preferred_weight
48    score -= (font.weight() - preferred_weight).abs() / 100;
49
50    // prefer more weight to less weight
51    if font.weight() > preferred_weight {
52        score += 1;
53    }
54
55    // prefer variable
56    if font.filename().contains("].") {
57        score += 2;
58    }
59
60    score
61}
62
63/// Pick the exemplar font from a family.
64///
65/// This is the font file that is most likely to be a representative choice for
66/// the family. The heuristic is to prefer normal style, weight as close to 400
67/// as possible, and a variable font if present.
68pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
69    fn score(font: &FontProto) -> i32 {
70        exemplar_score(font, FontStyle::Normal, 400)
71    }
72    family
73        .fonts
74        .iter()
75        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
76}
77
78/// Font style preference for font selection (normal or italic)
79#[derive(Copy, Clone, Debug, PartialEq)]
80pub enum FontStyle {
81    Normal,
82    Italic,
83}
84
85impl FontStyle {
86    fn style(&self) -> &str {
87        match self {
88            FontStyle::Normal => "normal",
89            FontStyle::Italic => "italic",
90        }
91    }
92}
93
94/// Select the best matching font from a family given style and weight preferences.
95pub fn select_font(
96    family: &FamilyProto,
97    preferred_style: FontStyle,
98    preferred_weight: i32,
99) -> Option<&FontProto> {
100    let score =
101        |font: &FontProto| -> i32 { exemplar_score(font, preferred_style, preferred_weight) };
102    family
103        .fonts
104        .iter()
105        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
106}
107
108fn iter_families(
109    root: &Path,
110    filter: Option<&Regex>,
111) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
112    WalkDir::new(root)
113        .into_iter()
114        .filter_map(|d| d.ok())
115        .filter(|d| d.file_name() == "METADATA.pb")
116        .filter(move |d| {
117            filter
118                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
119                .unwrap_or(true)
120        })
121        .map(|d| {
122            (
123                d.path().to_path_buf(),
124                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
125            )
126        })
127}
128
129/// Iterate over all known languages.
130pub fn iter_languages(_root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
131    LANGUAGES.values().map(|l| Ok(*l.clone()))
132}
133
134/// Read tag entries from the tags/all directory.
135pub fn read_tags(root: &Path) -> Result<Vec<Tagging>, Error> {
136    let mut tag_dir = root.to_path_buf();
137    tag_dir.push("tags/all");
138    let mut tags = Vec::new();
139    for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
140        let entry = entry.expect("To access tag dir entries");
141        if entry
142            .path()
143            .extension()
144            .expect("To have extensions")
145            .to_str()
146            .expect("utf-8")
147            != "csv"
148        {
149            continue;
150        }
151        let fd = File::open(entry.path())?;
152        let rdr = BufReader::new(fd);
153        tags.extend(
154            rdr.lines()
155                .map(|s| s.expect("Valid tag lines"))
156                .map(|s| Tagging::from_str(&s).expect("Valid tag lines")),
157        );
158    }
159    Ok(tags)
160}
161
162/// Read tag metadata from tags/tags_metadata.csv
163pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
164    let mut tag_metadata_file = root.to_path_buf();
165    tag_metadata_file.push("tags/tags_metadata.csv");
166    let mut metadata = Vec::new();
167
168    let fd = File::open(&tag_metadata_file)?;
169    let rdr = BufReader::new(fd);
170    metadata.extend(
171        rdr.lines()
172            .map(|s| s.expect("Valid tag lines"))
173            .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
174    );
175
176    Ok(metadata)
177}
178
179fn csv_values(s: &str) -> Vec<&str> {
180    let mut s = s;
181    let mut values = Vec::new();
182    while !s.is_empty() {
183        s = s.trim();
184        let mut end_idx = None;
185        if let Some(s) = s.strip_prefix('"') {
186            end_idx = Some(s.find('"').expect("Close quote"));
187        }
188        end_idx = s[end_idx.unwrap_or_default()..]
189            .find(',')
190            .map(|v| v + end_idx.unwrap_or_default());
191        if let Some(end_idx) = end_idx {
192            let (value, rest) = s.split_at(end_idx);
193            values.push(value.trim());
194            s = &rest[1..];
195        } else {
196            values.push(s);
197            s = "";
198        }
199    }
200    values
201}
202
203/// A tag entry for a family
204///
205/// A tagging is an association of a family (and optionally a specific
206/// designspace location within that family) with a tag and a numeric value for that tag.
207#[derive(Clone, Debug)]
208pub struct Tagging {
209    /// Font family name
210    pub family: String,
211    /// Optional designspace location within the family
212    ///
213    /// This is given in the form used in the fonts web API; for example, `ital,wght@1,700`
214    /// refers to the italic style at weight 700.
215    pub loc: String,
216    /// Tag name
217    pub tag: String,
218    /// Tag value
219    pub value: f32,
220}
221
222impl FromStr for Tagging {
223    type Err = Error;
224
225    fn from_str(s: &str) -> Result<Self, Self::Err> {
226        let values = csv_values(s);
227        let (family, loc, tag, value) = match values[..] {
228            [family, tag, value] => (family, "", tag, value),
229            [family, loc, tag, value] => (family, loc, tag, value),
230            _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
231        };
232        Ok(Tagging {
233            family: family.to_string(),
234            loc: loc.to_string(),
235            tag: tag.to_string(),
236            value: f32::from_str(value)
237                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
238        })
239    }
240}
241
242/// Metadata for a tag
243#[derive(Clone, Debug)]
244pub struct TagMetadata {
245    /// Tag name (e.g. "/Quality/Drawing")
246    pub tag: String,
247    /// Minimum tag value
248    pub min_value: f32,
249    /// Maximum tag value
250    pub max_value: f32,
251    /// User friendly name for the tag (e.g. "drawing quality")
252    pub prompt_name: String,
253}
254
255impl FromStr for TagMetadata {
256    type Err = Error;
257
258    fn from_str(s: &str) -> Result<Self, Self::Err> {
259        let values = csv_values(s);
260        let [tag, min, max, prompt_name] = values[..] else {
261            return Err(Error::new(
262                ErrorKind::InvalidData,
263                "Unparseable tag metadata, wrong number of values",
264            ));
265        };
266        Ok(TagMetadata {
267            tag: tag.into(),
268            min_value: f32::from_str(min)
269                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
270            max_value: f32::from_str(max)
271                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
272            prompt_name: prompt_name.into(),
273        })
274    }
275}
276
277/// A view into the Google Fonts library.
278///
279/// This struct holds a path to a local checkout of the Google Fonts repo and
280/// provides cached, read-only accessors for families, tags and language
281/// metadata. All accessors return borrowed references where possible so callers
282/// should hold the `GoogleFonts` value for as long as they need the returned
283/// references.
284pub struct GoogleFonts {
285    repo_dir: PathBuf,
286    family_filter: Option<Regex>,
287    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
288    family_by_font_file: OnceCell<HashMap<String, usize>>,
289    tags: OnceCell<Result<Vec<Tagging>, Error>>,
290    tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
291}
292
293impl GoogleFonts {
294    /// Create a new `GoogleFonts` view.
295    ///
296    /// `p` should be the path to the root of a local Google Fonts repository
297    /// checkout (the directory containing `METADATA.pb` files and the
298    /// `tags/` directory). `family_filter`, if present, is a regular
299    /// expression used to filter which families are exposed by the
300    /// `families()` iterator.
301    ///
302    /// This constructor does not perform I/O; metadata is read lazily when
303    /// the corresponding accessor is called.
304    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
305        Self {
306            repo_dir: p,
307            family_filter,
308            families: OnceCell::new(),
309            family_by_font_file: OnceCell::new(),
310            tags: OnceCell::new(),
311            tag_metadata: OnceCell::new(),
312        }
313    }
314    /// Return the parsed tag entries for the repository.
315    ///
316    /// On first call this will read and parse the CSV files from the repo's
317    /// `tags/all` directory. Returns `Ok(&[Tag])` when parsing succeeded, or
318    /// `Err(&Error)` if an I/O or parse error occurred. The returned slice is
319    /// borrowed from internal storage and remains valid for the lifetime of
320    /// `self`.
321    pub fn tags(&self) -> Result<&[Tagging], &Error> {
322        self.tags
323            .get_or_init(|| read_tags(&self.repo_dir))
324            .as_ref()
325            .map(|tags| tags.as_slice())
326    }
327    /// Return tag metadata (min/max and prompt names) for tags defined in
328    /// the repository.
329    ///
330    /// This reads `tags/tags_metadata.csv` on first access and returns a
331    /// borrowed slice on success. Errors are returned as `Err(&Error)`.
332    pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
333        self.tag_metadata
334            .get_or_init(|| read_tag_metadata(&self.repo_dir))
335            .as_ref()
336            .map(|metadata| metadata.as_slice())
337    }
338    /// Return a list of discovered families and their parsed metadata.
339    ///
340    /// Each entry is a tuple `(PathBuf, Result<FamilyProto, ParseError>)`.
341    /// The `PathBuf` is the path to the `METADATA.pb` file for the family.
342    /// The `Result` contains the parsed `FamilyProto` on success or a
343    /// `ParseError` if the metadata could not be parsed. Families are
344    /// discovered lazily by scanning the repository and applying the
345    /// `family_filter` provided at construction (if any).
346    ///
347    /// The returned slice is borrowed from internal storage and stays valid
348    /// for the lifetime of `self`.
349    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
350        self.families
351            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
352            .as_slice()
353    }
354    /// Lookup a language by its identifier.
355    ///
356    /// The `lang_id` should be the language identifier used by the
357    /// `google-fonts-languages` crate (for example "en_Latn"). Returns
358    /// `Some(&LanguageProto)` if the language is known, otherwise `None`.
359    /// This is a simple passthrough to the bundled `LANGUAGES` map.
360    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
361        LANGUAGES.get(lang_id).map(|l| &**l)
362    }
363
364    fn family_by_font_file(&self) -> &HashMap<String, usize> {
365        self.family_by_font_file.get_or_init(|| {
366            self.families()
367                .iter()
368                .enumerate()
369                .filter(|(_, (_, f))| f.is_ok())
370                .flat_map(|(i, (_, f))| {
371                    f.as_ref()
372                        .unwrap()
373                        .fonts
374                        .iter()
375                        .map(move |f| (f.filename().to_string(), i))
376                })
377                .collect()
378        })
379    }
380
381    /// Given a `FontProto`, return the family it belongs to.
382    ///
383    /// If the provided font is known (by filename) this returns `Some((path, family))`
384    /// where `path` is the path to the family's `METADATA.pb` and `family` is
385    /// a borrowed `FamilyProto`. Returns `None` if the font is not present in
386    /// the discovered families.
387    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
388        self.family_by_font_file()
389            .get(font.filename())
390            .copied()
391            .map(|i| {
392                let (p, f) = &self.families()[i];
393                (p.as_path(), f.as_ref().unwrap())
394            })
395    }
396    /// Find the path to the font binary for a `FontProto`.
397    ///
398    /// This resolves the font's family, then constructs the filesystem path
399    /// to the font file (sibling to the family's `METADATA.pb`). If the
400    /// resulting file exists its `PathBuf` is returned. If the file cannot
401    /// be found `None` is returned. A diagnostic is printed to stderr when
402    /// the expected file is missing.
403    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
404        let (family_path, _) = self.family(font)?;
405        let mut font_file = family_path.parent().unwrap().to_path_buf();
406        font_file.push(font.filename());
407        if !font_file.exists() {
408            eprintln!("No such file as {font_file:?}");
409        }
410        font_file.exists().then_some(font_file)
411    }
412
413    /// Our best guess at the primary language for this family
414    ///
415    /// Meant to be a good choice for things like rendering a sample string
416    /// Guess the primary language for a family.
417    ///
418    /// The heuristic is:
419    /// 1. If the family declares a `primary_language` that maps to a known
420    ///    language, return that.
421    /// 2. Otherwise if the family declares a `primary_script`, pick the most
422    ///    populous language using that script.
423    /// 3. Fall back to `en_Latn` if nothing else matches.
424    ///
425    /// This is intended as a best-effort choice to select a reasonable
426    /// language for rendering sample text, not as an authoritative mapping.
427    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
428        // Probe primary lang, primary script, then default baselessly to latin
429        let mut primary_language: Option<&LanguageProto> = None;
430        if primary_language.is_none() && family.has_primary_language() {
431            if let Some(lang) = self.language(family.primary_language()) {
432                primary_language = Some(lang);
433            } else {
434                eprintln!(
435                    "{} specifies invalid primary_language {}",
436                    family.name(),
437                    family.primary_language()
438                );
439            }
440        }
441        if primary_language.is_none() && family.has_primary_script() {
442            // If our script matches many languages pick the one with the highest population
443            let lang = LANGUAGES
444                .values()
445                .filter(|l| l.script.is_some() && l.script() == family.primary_script())
446                .reduce(|acc, e| {
447                    if acc.population() > e.population() {
448                        acc
449                    } else {
450                        e
451                    }
452                });
453            if let Some(lang) = lang {
454                primary_language = Some(lang);
455            } else {
456                eprintln!(
457                    "{} specifies a primary_script that matches no languages {}",
458                    family.name(),
459                    family.primary_script()
460                );
461            }
462        }
463        if primary_language.is_none() {
464            primary_language = self.language("en_Latn");
465        }
466        primary_language
467            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
468    }
469}
470
471#[cfg(test)]
472mod tests {
473
474    use std::fs;
475
476    use super::*;
477
478    fn testdata_dir() -> std::path::PathBuf {
479        // cargo test seems to run in the project directory
480        // VSCode test seems to run in the workspace directory
481        // probe for the file we want in hopes of finding it regardless
482
483        ["./resources/testdata", "../resources/testdata"]
484            .iter()
485            .map(std::path::PathBuf::from)
486            .find(|pb| pb.exists())
487            .unwrap()
488    }
489
490    fn testdata_file_content(relative_path: &str) -> String {
491        let mut p = testdata_dir();
492        p.push(relative_path);
493        fs::read_to_string(p).unwrap()
494    }
495
496    #[test]
497    fn roboto_exemplar() {
498        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
499        let exemplar = exemplar(&roboto).unwrap();
500        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
501    }
502
503    #[test]
504    fn wix_exemplar() {
505        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
506        let exemplar = exemplar(&roboto).unwrap();
507        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
508    }
509
510    #[test]
511    fn parse_roboto_metadata() {
512        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
513    }
514
515    #[test]
516    fn parse_wix_metadata() {
517        // Has the undocumented position field
518        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
519    }
520
521    #[test]
522    fn parse_primary_lang_script_metadata() {
523        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
524        assert_eq!(
525            ("Jpan", "Invalid"),
526            (family.primary_script(), family.primary_language())
527        );
528    }
529
530    #[test]
531    fn parse_tag3() {
532        Tagging::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
533    }
534
535    #[test]
536    fn parse_tag4() {
537        Tagging::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31")
538            .expect("To parse");
539    }
540
541    #[test]
542    fn parse_tag_quoted() {
543        Tagging::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
544            .expect("To parse");
545    }
546
547    #[test]
548    fn parse_tag_quoted2() {
549        Tagging::from_str("\"\",t,1").expect("To parse");
550    }
551}