gf_metadata/
lib.rs

1mod fonts_public;
2mod languages_public;
3
4use std::{
5    cell::OnceCell,
6    collections::HashMap,
7    fs::{self, File},
8    io::{BufRead, BufReader, Error, ErrorKind},
9    path::{Path, PathBuf},
10    str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22    if s.contains("position") {
23        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24        let s = re.replace_all(s, "");
25        protobuf::text_format::parse_from_str(&s)
26    } else {
27        protobuf::text_format::parse_from_str(s)
28    }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32    protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto) -> i32 {
36    let mut score = 0;
37    // prefer regular
38    if font.style() == "normal" {
39        score += 16;
40    }
41
42    // prefer closer to 400
43    score -= (font.weight() - 400) / 100;
44
45    // prefer variable
46    if font.filename().contains("].") {
47        score += 1;
48    }
49
50    score
51}
52
53pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
54    family.fonts.iter().reduce(|acc, e| {
55        if exemplar_score(acc) >= exemplar_score(e) {
56            acc
57        } else {
58            e
59        }
60    })
61}
62
63fn iter_families(
64    root: &Path,
65    filter: Option<&Regex>,
66) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
67    WalkDir::new(root)
68        .into_iter()
69        .filter_map(|d| d.ok())
70        .filter(|d| d.file_name() == "METADATA.pb")
71        .filter(move |d| {
72            filter
73                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
74                .unwrap_or(true)
75        })
76        .map(|d| {
77            (
78                d.path().to_path_buf(),
79                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
80            )
81        })
82}
83
84pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
85    WalkDir::new(root)
86        .into_iter()
87        .filter_map(|d| d.ok())
88        .filter(|d| {
89            d.path()
90                .canonicalize()
91                .unwrap()
92                .to_str()
93                .unwrap()
94                .contains("gflanguages/data/languages")
95                && d.file_name().to_string_lossy().ends_with(".textproto")
96        })
97        .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
98}
99
100pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
101    let mut tag_dir = root.to_path_buf();
102    tag_dir.push("tags/all");
103    let mut tags = Vec::new();
104    for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
105        let entry = entry.expect("To access tag dir entries");
106        if entry
107            .path()
108            .extension()
109            .expect("To have extensions")
110            .to_str()
111            .expect("utf-8")
112            != "csv"
113        {
114            continue;
115        }
116        let fd = File::open(&entry.path())?;
117        let rdr = BufReader::new(fd);
118        tags.extend(
119            rdr.lines()
120                .map(|s| s.expect("Valid tag lines"))
121                .map(|s| Tag::from_str(&s).expect("Valid tag lines")),
122        );
123    }
124    Ok(tags)
125}
126
127pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
128    let mut tag_metadata_file = root.to_path_buf();
129    tag_metadata_file.push("tags/tags_metadata.csv");
130    let mut metadata = Vec::new();
131
132    let fd = File::open(&tag_metadata_file)?;
133    let rdr = BufReader::new(fd);
134    metadata.extend(
135        rdr.lines()
136            .map(|s| s.expect("Valid tag lines"))
137            .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
138    );
139
140    Ok(metadata)
141}
142
143fn csv_values(s: &str) -> Vec<&str> {
144    let mut s = s;
145    let mut values = Vec::new();
146    while !s.is_empty() {
147        s = s.trim();
148        let mut end_idx = None;
149        if s.starts_with('"') {
150            end_idx = Some(*(&s[1..].find('"').expect("Close quote")));
151        }
152        end_idx = s[end_idx.unwrap_or_default()..]
153            .find(',')
154            .map(|v| v + end_idx.unwrap_or_default());
155        if let Some(end_idx) = end_idx {
156            let (value, rest) = s.split_at(end_idx);
157            values.push(value.trim());
158            s = &rest[1..];
159        } else {
160            values.push(s);
161            s = "";
162        }
163    }
164    values
165}
166
167#[derive(Clone, Debug)]
168pub struct Tag {
169    pub family: String,
170    pub loc: String,
171    pub tag: String,
172    pub value: f32,
173}
174
175impl FromStr for Tag {
176    type Err = Error;
177
178    fn from_str(s: &str) -> Result<Self, Self::Err> {
179        let values = csv_values(s);
180        let (family, loc, tag, value) = match values[..] {
181            [family, tag, value] => (family, "", tag, value),
182            [family, loc, tag, value] => (family, loc, tag, value),
183            _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
184        };
185        Ok(Tag {
186            family: family.to_string(),
187            loc: loc.to_string(),
188            tag: tag.to_string(),
189            value: f32::from_str(value)
190                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
191        })
192    }
193}
194
195#[derive(Clone, Debug)]
196pub struct TagMetadata {
197    pub tag: String,
198    pub min_value: f32,
199    pub max_value: f32,
200    pub prompt_name: String,
201}
202
203impl FromStr for TagMetadata {
204    type Err = Error;
205
206    fn from_str(s: &str) -> Result<Self, Self::Err> {
207        let values = csv_values(s);
208        let [tag, min, max, prompt_name] = values[..] else {
209            return Err(Error::new(
210                ErrorKind::InvalidData,
211                "Unparseable tag metadata, wrong number of values",
212            ));
213        };
214        Ok(TagMetadata {
215            tag: tag.into(),
216            min_value: f32::from_str(min)
217                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
218            max_value: f32::from_str(max)
219                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
220            prompt_name: prompt_name.into(),
221        })
222    }
223}
224
225pub struct GoogleFonts {
226    repo_dir: PathBuf,
227    family_filter: Option<Regex>,
228    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
229    languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
230    family_by_font_file: OnceCell<HashMap<String, usize>>,
231    tags: OnceCell<Result<Vec<Tag>, Error>>,
232    tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
233}
234
235impl GoogleFonts {
236    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
237        Self {
238            repo_dir: p,
239            family_filter,
240            families: OnceCell::new(),
241            languages: OnceCell::new(),
242            family_by_font_file: OnceCell::new(),
243            tags: OnceCell::new(),
244            tag_metadata: OnceCell::new(),
245        }
246    }
247
248    pub fn tags(&self) -> Result<&[Tag], &Error> {
249        self.tags
250            .get_or_init(|| read_tags(&self.repo_dir))
251            .as_ref()
252            .map(|tags| tags.as_slice())
253    }
254
255    pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
256        self.tag_metadata
257            .get_or_init(|| read_tag_metadata(&self.repo_dir))
258            .as_ref()
259            .map(|metadata| metadata.as_slice())
260    }
261
262    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
263        self.families
264            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
265            .as_slice()
266    }
267
268    pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
269        self.languages
270            .get_or_init(|| iter_languages(&self.repo_dir).collect())
271            .as_slice()
272    }
273
274    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
275        self.languages()
276            .iter()
277            .filter_map(|l| l.as_ref().ok())
278            .find(|l| l.id() == lang_id)
279    }
280
281    fn family_by_font_file(&self) -> &HashMap<String, usize> {
282        self.family_by_font_file.get_or_init(|| {
283            self.families()
284                .iter()
285                .enumerate()
286                .filter(|(_, (_, f))| f.is_ok())
287                .flat_map(|(i, (_, f))| {
288                    f.as_ref()
289                        .unwrap()
290                        .fonts
291                        .iter()
292                        .map(move |f| (f.filename().to_string(), i))
293                })
294                .collect()
295        })
296    }
297
298    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
299        self.family_by_font_file()
300            .get(font.filename())
301            .copied()
302            .map(|i| {
303                let (p, f) = &self.families()[i];
304                (p.as_path(), f.as_ref().unwrap())
305            })
306    }
307
308    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
309        let Some((family_path, _)) = self.family(font) else {
310            return None;
311        };
312        let mut font_file = family_path.parent().unwrap().to_path_buf();
313        font_file.push(font.filename());
314        if !font_file.exists() {
315            eprintln!("No such file as {font_file:?}");
316        }
317        font_file.exists().then_some(font_file)
318    }
319
320    /// Our best guess at the primary language for this family
321    ///
322    /// Meant to be a good choice for things like rendering a sample string
323    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
324        // Probe primary lang, primary script, then default baselessly to latin
325        let mut primary_language: Option<&LanguageProto> = None;
326        if primary_language.is_none() && family.has_primary_language() {
327            if let Some(lang) = self.language(family.primary_language()) {
328                primary_language = Some(lang);
329            } else {
330                eprintln!(
331                    "{} specifies invalid primary_language {}",
332                    family.name(),
333                    family.primary_language()
334                );
335            }
336        }
337        if primary_language.is_none() && family.has_primary_script() {
338            // If our script matches many languages pick the one with the highest population
339            let lang = self
340                .languages()
341                .iter()
342                .filter_map(|r| r.as_ref().ok())
343                .filter(|l| l.has_script() && l.script() == family.primary_script())
344                .reduce(|acc, e| {
345                    if acc.population() > e.population() {
346                        acc
347                    } else {
348                        e
349                    }
350                });
351            if let Some(lang) = lang {
352                primary_language = Some(lang);
353            } else {
354                eprintln!(
355                    "{} specifies a primary_script that matches no languages {}",
356                    family.name(),
357                    family.primary_script()
358                );
359            }
360        }
361        if primary_language.is_none() {
362            primary_language = self.language("en_Latn");
363        }
364        primary_language
365            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
366    }
367}
368
369#[cfg(test)]
370mod tests {
371
372    use std::fs;
373
374    use super::*;
375
376    fn testdata_dir() -> std::path::PathBuf {
377        // cargo test seems to run in the project directory
378        // VSCode test seems to run in the workspace directory
379        // probe for the file we want in hopes of finding it regardless
380
381        ["./resources/testdata", "../resources/testdata"]
382            .iter()
383            .map(std::path::PathBuf::from)
384            .find(|pb| pb.exists())
385            .unwrap()
386    }
387
388    fn testdata_file_content(relative_path: &str) -> String {
389        let mut p = testdata_dir();
390        p.push(relative_path);
391        fs::read_to_string(p).unwrap()
392    }
393
394    #[test]
395    fn roboto_exemplar() {
396        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
397        let exemplar = exemplar(&roboto).unwrap();
398        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
399    }
400
401    #[test]
402    fn wix_exemplar() {
403        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
404        let exemplar = exemplar(&roboto).unwrap();
405        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
406    }
407
408    #[test]
409    fn parse_roboto_metadata() {
410        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
411    }
412
413    #[test]
414    fn parse_wix_metadata() {
415        // Has the undocumented position field
416        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
417    }
418
419    #[test]
420    fn parse_primary_lang_script_metadata() {
421        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
422        assert_eq!(
423            ("Jpan", "Invalid"),
424            (family.primary_script(), family.primary_language())
425        );
426    }
427
428    #[test]
429    fn parse_tag3() {
430        Tag::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
431    }
432
433    #[test]
434    fn parse_tag4() {
435        Tag::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31").expect("To parse");
436    }
437
438    #[test]
439    fn parse_tag_quoted() {
440        Tag::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
441            .expect("To parse");
442    }
443
444    #[test]
445    fn parse_tag_quoted2() {
446        Tag::from_str("\"\",t,1").expect("To parse");
447    }
448}