Skip to main content

gf_metadata/
lib.rs

1mod axes;
2mod designers;
3mod fonts_public;
4pub mod knowledge; // Don't think anyone uses this yet, but just in case
5// We don't use languages_public because we want to re-export from google_fonts_languages
6
7use std::{
8    cell::OnceCell,
9    collections::HashMap,
10    fs::{self, File},
11    io::{BufRead, BufReader, Error, ErrorKind},
12    path::{Path, PathBuf},
13    str::FromStr,
14};
15
16pub use axes::{AxisProto, FallbackProto};
17pub use designers::{AvatarProto, DesignerInfoProto};
18pub use fonts_public::*;
19use google_fonts_languages::LANGUAGES;
20pub use google_fonts_languages::{
21    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
22};
23use protobuf::text_format::ParseError;
24use regex::Regex;
25use walkdir::WalkDir;
26
27/// Read a FamilyProto from a METADATA.pb file content.
28///
29/// This function handles undocumented fields by stripping them out before parsing.
30pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
31    if s.contains("position") {
32        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
33        let s = re.replace_all(s, "");
34        protobuf::text_format::parse_from_str(&s)
35    } else {
36        protobuf::text_format::parse_from_str(s)
37    }
38}
39
40// Export parse_from_str for all messages, so user doesn't need a
41// dependency on protobuf crate.
42impl AxisProto {
43    /// Parse an AxisProto from its text format representation.
44    pub fn parse_from_str(s: &str) -> Result<AxisProto, ParseError> {
45        protobuf::text_format::parse_from_str(s)
46    }
47}
48impl DesignerInfoProto {
49    /// Parse a DesignerInfoProto from its text format representation.
50    pub fn parse_from_str(s: &str) -> Result<DesignerInfoProto, ParseError> {
51        protobuf::text_format::parse_from_str(s)
52    }
53}
54
55fn exemplar_score(font: &FontProto, preferred_style: FontStyle, preferred_weight: i32) -> i32 {
56    let mut score = 0;
57    // prefer preferred_style
58    if font.style() == preferred_style.style() {
59        score += 16;
60    }
61
62    // prefer closer to preferred_weight
63    score -= (font.weight() - preferred_weight).abs() / 100;
64
65    // prefer more weight to less weight
66    if font.weight() > preferred_weight {
67        score += 1;
68    }
69
70    // prefer variable
71    if font.filename().contains("].") {
72        score += 2;
73    }
74
75    score
76}
77
78/// Pick the exemplar font from a family.
79///
80/// This is the font file that is most likely to be a representative choice for
81/// the family. The heuristic is to prefer normal style, weight as close to 400
82/// as possible, and a variable font if present.
83pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
84    fn score(font: &FontProto) -> i32 {
85        exemplar_score(font, FontStyle::Normal, 400)
86    }
87    family
88        .fonts
89        .iter()
90        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
91}
92
93/// Font style preference for font selection (normal or italic)
94#[derive(Copy, Clone, Debug, PartialEq)]
95pub enum FontStyle {
96    Normal,
97    Italic,
98}
99
100impl FontStyle {
101    fn style(&self) -> &str {
102        match self {
103            FontStyle::Normal => "normal",
104            FontStyle::Italic => "italic",
105        }
106    }
107}
108
109/// Select the best matching font from a family given style and weight preferences.
110pub fn select_font(
111    family: &FamilyProto,
112    preferred_style: FontStyle,
113    preferred_weight: i32,
114) -> Option<&FontProto> {
115    let score =
116        |font: &FontProto| -> i32 { exemplar_score(font, preferred_style, preferred_weight) };
117    family
118        .fonts
119        .iter()
120        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
121}
122
123fn iter_families(
124    root: &Path,
125    filter: Option<&Regex>,
126) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
127    WalkDir::new(root)
128        .into_iter()
129        .filter_map(|d| d.ok())
130        .filter(|d| d.file_name() == "METADATA.pb")
131        .filter(move |d| {
132            filter
133                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
134                .unwrap_or(true)
135        })
136        .map(|d| {
137            (
138                d.path().to_path_buf(),
139                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
140            )
141        })
142}
143
144/// Iterate over all known languages.
145pub fn iter_languages(_root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
146    LANGUAGES.values().map(|l| Ok(*l.clone()))
147}
148
149/// Read tag entries from the tags/all directory.
150pub fn read_tags(root: &Path) -> Result<Vec<Tagging>, Error> {
151    let mut tag_dir = root.to_path_buf();
152    tag_dir.push("tags/all");
153    let mut tags = Vec::new();
154    for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
155        let entry = entry.expect("To access tag dir entries");
156        if entry
157            .path()
158            .extension()
159            .expect("To have extensions")
160            .to_str()
161            .expect("utf-8")
162            != "csv"
163        {
164            continue;
165        }
166        let fd = File::open(entry.path())?;
167        let rdr = BufReader::new(fd);
168        tags.extend(
169            rdr.lines()
170                .map(|s| s.expect("Valid tag lines"))
171                .map(|s| Tagging::from_str(&s).expect("Valid tag lines")),
172        );
173    }
174    Ok(tags)
175}
176
177/// Read tag metadata from tags/tags_metadata.csv
178pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
179    let mut tag_metadata_file = root.to_path_buf();
180    tag_metadata_file.push("tags/tags_metadata.csv");
181    let mut metadata = Vec::new();
182
183    let fd = File::open(&tag_metadata_file)?;
184    let rdr = BufReader::new(fd);
185    metadata.extend(
186        rdr.lines()
187            .map(|s| s.expect("Valid tag lines"))
188            .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
189    );
190
191    Ok(metadata)
192}
193
194fn csv_values(s: &str) -> Vec<&str> {
195    let mut s = s;
196    let mut values = Vec::new();
197    while !s.is_empty() {
198        s = s.trim();
199        let mut end_idx = None;
200        if let Some(s) = s.strip_prefix('"') {
201            end_idx = Some(s.find('"').expect("Close quote"));
202        }
203        end_idx = s[end_idx.unwrap_or_default()..]
204            .find(',')
205            .map(|v| v + end_idx.unwrap_or_default());
206        if let Some(end_idx) = end_idx {
207            let (value, rest) = s.split_at(end_idx);
208            values.push(value.trim());
209            s = &rest[1..];
210        } else {
211            values.push(s);
212            s = "";
213        }
214    }
215    values
216}
217
218/// A tag entry for a family
219///
220/// A tagging is an association of a family (and optionally a specific
221/// designspace location within that family) with a tag and a numeric value for that tag.
222#[derive(Clone, Debug)]
223pub struct Tagging {
224    /// Font family name
225    pub family: String,
226    /// Optional designspace location within the family
227    ///
228    /// This is given in the form used in the fonts web API; for example, `ital,wght@1,700`
229    /// refers to the italic style at weight 700.
230    pub loc: String,
231    /// Tag name
232    pub tag: String,
233    /// Tag value
234    pub value: f32,
235}
236
237impl FromStr for Tagging {
238    type Err = Error;
239
240    fn from_str(s: &str) -> Result<Self, Self::Err> {
241        let values = csv_values(s);
242        let (family, loc, tag, value) = match values[..] {
243            [family, tag, value] => (family, "", tag, value),
244            [family, loc, tag, value] => (family, loc, tag, value),
245            _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
246        };
247        Ok(Tagging {
248            family: family.to_string(),
249            loc: loc.to_string(),
250            tag: tag.to_string(),
251            value: f32::from_str(value)
252                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
253        })
254    }
255}
256
257/// Metadata for a tag
258#[derive(Clone, Debug)]
259pub struct TagMetadata {
260    /// Tag name (e.g. "/Quality/Drawing")
261    pub tag: String,
262    /// Minimum tag value
263    pub min_value: f32,
264    /// Maximum tag value
265    pub max_value: f32,
266    /// User friendly name for the tag (e.g. "drawing quality")
267    pub prompt_name: String,
268}
269
270impl FromStr for TagMetadata {
271    type Err = Error;
272
273    fn from_str(s: &str) -> Result<Self, Self::Err> {
274        let values = csv_values(s);
275        let [tag, min, max, prompt_name] = values[..] else {
276            return Err(Error::new(
277                ErrorKind::InvalidData,
278                "Unparseable tag metadata, wrong number of values",
279            ));
280        };
281        Ok(TagMetadata {
282            tag: tag.into(),
283            min_value: f32::from_str(min)
284                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
285            max_value: f32::from_str(max)
286                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
287            prompt_name: prompt_name.into(),
288        })
289    }
290}
291
292/// A view into the Google Fonts library.
293///
294/// This struct holds a path to a local checkout of the Google Fonts repo and
295/// provides cached, read-only accessors for families, tags and language
296/// metadata. All accessors return borrowed references where possible so callers
297/// should hold the `GoogleFonts` value for as long as they need the returned
298/// references.
299pub struct GoogleFonts {
300    repo_dir: PathBuf,
301    family_filter: Option<Regex>,
302    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
303    family_by_font_file: OnceCell<HashMap<String, usize>>,
304    tags: OnceCell<Result<Vec<Tagging>, Error>>,
305    tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
306}
307
308impl GoogleFonts {
309    /// Create a new `GoogleFonts` view.
310    ///
311    /// `p` should be the path to the root of a local Google Fonts repository
312    /// checkout (the directory containing `METADATA.pb` files and the
313    /// `tags/` directory). `family_filter`, if present, is a regular
314    /// expression used to filter which families are exposed by the
315    /// `families()` iterator.
316    ///
317    /// This constructor does not perform I/O; metadata is read lazily when
318    /// the corresponding accessor is called.
319    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
320        Self {
321            repo_dir: p,
322            family_filter,
323            families: OnceCell::new(),
324            family_by_font_file: OnceCell::new(),
325            tags: OnceCell::new(),
326            tag_metadata: OnceCell::new(),
327        }
328    }
329    /// Return the parsed tag entries for the repository.
330    ///
331    /// On first call this will read and parse the CSV files from the repo's
332    /// `tags/all` directory. Returns `Ok(&[Tag])` when parsing succeeded, or
333    /// `Err(&Error)` if an I/O or parse error occurred. The returned slice is
334    /// borrowed from internal storage and remains valid for the lifetime of
335    /// `self`.
336    pub fn tags(&self) -> Result<&[Tagging], &Error> {
337        self.tags
338            .get_or_init(|| read_tags(&self.repo_dir))
339            .as_ref()
340            .map(|tags| tags.as_slice())
341    }
342    /// Return tag metadata (min/max and prompt names) for tags defined in
343    /// the repository.
344    ///
345    /// This reads `tags/tags_metadata.csv` on first access and returns a
346    /// borrowed slice on success. Errors are returned as `Err(&Error)`.
347    pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
348        self.tag_metadata
349            .get_or_init(|| read_tag_metadata(&self.repo_dir))
350            .as_ref()
351            .map(|metadata| metadata.as_slice())
352    }
353    /// Return a list of discovered families and their parsed metadata.
354    ///
355    /// Each entry is a tuple `(PathBuf, Result<FamilyProto, ParseError>)`.
356    /// The `PathBuf` is the path to the `METADATA.pb` file for the family.
357    /// The `Result` contains the parsed `FamilyProto` on success or a
358    /// `ParseError` if the metadata could not be parsed. Families are
359    /// discovered lazily by scanning the repository and applying the
360    /// `family_filter` provided at construction (if any).
361    ///
362    /// The returned slice is borrowed from internal storage and stays valid
363    /// for the lifetime of `self`.
364    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
365        self.families
366            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
367            .as_slice()
368    }
369    /// Lookup a language by its identifier.
370    ///
371    /// The `lang_id` should be the language identifier used by the
372    /// `google-fonts-languages` crate (for example "en_Latn"). Returns
373    /// `Some(&LanguageProto)` if the language is known, otherwise `None`.
374    /// This is a simple passthrough to the bundled `LANGUAGES` map.
375    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
376        LANGUAGES.get(lang_id).map(|l| &**l)
377    }
378
379    fn family_by_font_file(&self) -> &HashMap<String, usize> {
380        self.family_by_font_file.get_or_init(|| {
381            self.families()
382                .iter()
383                .enumerate()
384                .filter(|(_, (_, f))| f.is_ok())
385                .flat_map(|(i, (_, f))| {
386                    f.as_ref()
387                        .unwrap()
388                        .fonts
389                        .iter()
390                        .map(move |f| (f.filename().to_string(), i))
391                })
392                .collect()
393        })
394    }
395
396    /// Given a `FontProto`, return the family it belongs to.
397    ///
398    /// If the provided font is known (by filename) this returns `Some((path, family))`
399    /// where `path` is the path to the family's `METADATA.pb` and `family` is
400    /// a borrowed `FamilyProto`. Returns `None` if the font is not present in
401    /// the discovered families.
402    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
403        self.family_by_font_file()
404            .get(font.filename())
405            .copied()
406            .map(|i| {
407                let (p, f) = &self.families()[i];
408                (p.as_path(), f.as_ref().unwrap())
409            })
410    }
411    /// Find the path to the font binary for a `FontProto`.
412    ///
413    /// This resolves the font's family, then constructs the filesystem path
414    /// to the font file (sibling to the family's `METADATA.pb`). If the
415    /// resulting file exists its `PathBuf` is returned. If the file cannot
416    /// be found `None` is returned. A diagnostic is printed to stderr when
417    /// the expected file is missing.
418    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
419        let (family_path, _) = self.family(font)?;
420        let mut font_file = family_path.parent().unwrap().to_path_buf();
421        font_file.push(font.filename());
422        if !font_file.exists() {
423            eprintln!("No such file as {font_file:?}");
424        }
425        font_file.exists().then_some(font_file)
426    }
427
428    /// Our best guess at the primary language for this family
429    ///
430    /// Meant to be a good choice for things like rendering a sample string
431    /// Guess the primary language for a family.
432    ///
433    /// The heuristic is:
434    /// 1. If the family declares a `primary_language` that maps to a known
435    ///    language, return that.
436    /// 2. Otherwise if the family declares a `primary_script`, pick the most
437    ///    populous language using that script.
438    /// 3. Fall back to `en_Latn` if nothing else matches.
439    ///
440    /// This is intended as a best-effort choice to select a reasonable
441    /// language for rendering sample text, not as an authoritative mapping.
442    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
443        // Probe primary lang, primary script, then default baselessly to latin
444        let mut primary_language: Option<&LanguageProto> = None;
445        if primary_language.is_none() && family.has_primary_language() {
446            if let Some(lang) = self.language(family.primary_language()) {
447                primary_language = Some(lang);
448            } else {
449                eprintln!(
450                    "{} specifies invalid primary_language {}",
451                    family.name(),
452                    family.primary_language()
453                );
454            }
455        }
456        if primary_language.is_none() && family.has_primary_script() {
457            // If our script matches many languages pick the one with the highest population
458            let lang = LANGUAGES
459                .values()
460                .filter(|l| l.script.is_some() && l.script() == family.primary_script())
461                .reduce(|acc, e| {
462                    if acc.population() > e.population() {
463                        acc
464                    } else {
465                        e
466                    }
467                });
468            if let Some(lang) = lang {
469                primary_language = Some(lang);
470            } else {
471                eprintln!(
472                    "{} specifies a primary_script that matches no languages {}",
473                    family.name(),
474                    family.primary_script()
475                );
476            }
477        }
478        if primary_language.is_none() {
479            primary_language = self.language("en_Latn");
480        }
481        primary_language
482            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
483    }
484}
485
486#[cfg(test)]
487mod tests {
488
489    use std::fs;
490
491    use super::*;
492
493    fn testdata_dir() -> std::path::PathBuf {
494        // cargo test seems to run in the project directory
495        // VSCode test seems to run in the workspace directory
496        // probe for the file we want in hopes of finding it regardless
497
498        ["./resources/testdata", "../resources/testdata"]
499            .iter()
500            .map(std::path::PathBuf::from)
501            .find(|pb| pb.exists())
502            .unwrap()
503    }
504
505    fn testdata_file_content(relative_path: &str) -> String {
506        let mut p = testdata_dir();
507        p.push(relative_path);
508        fs::read_to_string(p).unwrap()
509    }
510
511    #[test]
512    fn roboto_exemplar() {
513        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
514        let exemplar = exemplar(&roboto).unwrap();
515        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
516    }
517
518    #[test]
519    fn wix_exemplar() {
520        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
521        let exemplar = exemplar(&roboto).unwrap();
522        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
523    }
524
525    #[test]
526    fn parse_roboto_metadata() {
527        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
528    }
529
530    #[test]
531    fn parse_wix_metadata() {
532        // Has the undocumented position field
533        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
534    }
535
536    #[test]
537    fn parse_primary_lang_script_metadata() {
538        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
539        assert_eq!(
540            ("Jpan", "Invalid"),
541            (family.primary_script(), family.primary_language())
542        );
543    }
544
545    #[test]
546    fn parse_tag3() {
547        Tagging::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
548    }
549
550    #[test]
551    fn parse_tag4() {
552        Tagging::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31")
553            .expect("To parse");
554    }
555
556    #[test]
557    fn parse_tag_quoted() {
558        Tagging::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
559            .expect("To parse");
560    }
561
562    #[test]
563    fn parse_tag_quoted2() {
564        Tagging::from_str("\"\",t,1").expect("To parse");
565    }
566}