gf_metadata/
lib.rs

1mod fonts_public;
2mod languages_public;
3
4use std::{
5    cell::OnceCell,
6    collections::HashMap,
7    fs::{self, File},
8    io::{BufRead, BufReader, Error, ErrorKind},
9    path::{Path, PathBuf},
10    str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22    if s.contains("position") {
23        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24        let s = re.replace_all(s, "");
25        protobuf::text_format::parse_from_str(&s)
26    } else {
27        protobuf::text_format::parse_from_str(s)
28    }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32    protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto) -> i32 {
36    let mut score = 0;
37    // prefer regular
38    if font.style() == "normal" {
39        score += 16;
40    }
41
42    // prefer closer to 400
43    score -= (font.weight() - 400) / 100;
44
45    // prefer variable
46    if font.filename().contains("].") {
47        score += 1;
48    }
49
50    score
51}
52
53pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
54    family.fonts.iter().reduce(|acc, e| {
55        if exemplar_score(acc) >= exemplar_score(e) {
56            acc
57        } else {
58            e
59        }
60    })
61}
62
63fn iter_families(
64    root: &Path,
65    filter: Option<&Regex>,
66) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
67    WalkDir::new(root)
68        .into_iter()
69        .filter_map(|d| d.ok())
70        .filter(|d| d.file_name() == "METADATA.pb")
71        .filter(move |d| {
72            filter
73                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
74                .unwrap_or(true)
75        })
76        .map(|d| {
77            (
78                d.path().to_path_buf(),
79                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
80            )
81        })
82}
83
84pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
85    WalkDir::new(root)
86        .into_iter()
87        .filter_map(|d| d.ok())
88        .filter(|d| {
89            d.path()
90                .canonicalize()
91                .unwrap()
92                .to_str()
93                .unwrap()
94                .contains("gflanguages/data/languages")
95                && d.file_name().to_string_lossy().ends_with(".textproto")
96        })
97        .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
98}
99
100pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
101    let mut tag_dir = root.to_path_buf();
102    tag_dir.push("tags/all");
103    let mut tags = Vec::new();
104    for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
105        let entry = entry.expect("To access tag dir entries");
106        if entry
107            .path()
108            .extension()
109            .expect("To have extensions")
110            .to_str()
111            .expect("utf-8")
112            != "csv"
113        {
114            continue;
115        }
116        let fd = File::open(&entry.path())?;
117        let rdr = BufReader::new(fd);
118        tags.extend(
119            rdr.lines()
120                .map(|s| s.expect("Valid tag lines"))
121                .map(|s| Tag::from_str(&s).expect("Valid tag lines")),
122        );
123    }
124    Ok(tags)
125}
126
127pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
128    let mut tag_metadata_file = root.to_path_buf();
129    tag_metadata_file.push("tags/tags_metadata.csv");
130    let mut metadata = Vec::new();
131
132    let fd = File::open(&tag_metadata_file)?;
133    let rdr = BufReader::new(fd);
134    metadata.extend(
135        rdr.lines()
136            .map(|s| s.expect("Valid tag lines"))
137            .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
138    );
139
140    Ok(metadata)
141}
142
143fn csv_values(s: &str) -> Vec<&str> {
144    let mut s = s;
145    let mut values = Vec::new();
146    while !s.is_empty() {
147        s = s.trim();
148        let mut end_idx = None;
149        if s.starts_with('"') {
150            end_idx = Some(*(&s[1..].find('"').expect("Close quote")));
151        }
152        end_idx = s[end_idx.unwrap_or_default()..]
153            .find(',')
154            .map(|v| v + end_idx.unwrap_or_default());
155        if let Some(end_idx) = end_idx {
156            let (value, rest) = s.split_at(end_idx);
157            values.push(value.trim());
158            s = &rest[1..];
159        } else {
160            values.push(s);
161            s = "";
162        }
163    }
164    values
165}
166
167#[derive(Clone, Debug)]
168pub struct Tag {
169    pub family: String,
170    pub loc: String,
171    pub tag: String,
172    pub value: f32,
173}
174
175impl FromStr for Tag {
176    type Err = Error;
177
178    fn from_str(s: &str) -> Result<Self, Self::Err> {
179        let values = csv_values(s);
180        let (family, loc, tag, value) = match values[..] {
181            [family, tag, value] => (family, "", tag, value),
182            [family, loc, tag, value] => (family, loc, tag, value),
183            _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
184        };
185        Ok(Tag {
186            family: family.to_string(),
187            loc: loc.to_string(),
188            tag: tag.to_string(),
189            value: f32::from_str(value)
190                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
191        })
192    }
193}
194
195#[derive(Clone, Debug)]
196pub struct TagMetadata {
197    pub tag: String,
198    pub min_value: f32,
199    pub max_value: f32,
200    pub prompt_name: String,
201}
202
203impl FromStr for TagMetadata {
204    type Err = Error;
205
206    fn from_str(s: &str) -> Result<Self, Self::Err> {
207        let values = csv_values(s);
208        eprintln!("{s} => {values:?}");
209        let [tag, min, max, prompt_name] = values[..] else {
210            return Err(Error::new(
211                ErrorKind::InvalidData,
212                "Unparseable tag metadata, wrong number of values",
213            ));
214        };
215        Ok(TagMetadata {
216            tag: tag.into(),
217            min_value: f32::from_str(min)
218                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
219            max_value: f32::from_str(max)
220                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
221            prompt_name: prompt_name.into(),
222        })
223    }
224}
225
226pub struct GoogleFonts {
227    repo_dir: PathBuf,
228    family_filter: Option<Regex>,
229    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
230    languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
231    family_by_font_file: OnceCell<HashMap<String, usize>>,
232    tags: OnceCell<Result<Vec<Tag>, Error>>,
233    tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
234}
235
236impl GoogleFonts {
237    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
238        Self {
239            repo_dir: p,
240            family_filter,
241            families: OnceCell::new(),
242            languages: OnceCell::new(),
243            family_by_font_file: OnceCell::new(),
244            tags: OnceCell::new(),
245            tag_metadata: OnceCell::new(),
246        }
247    }
248
249    pub fn tags(&self) -> Result<&[Tag], &Error> {
250        self.tags
251            .get_or_init(|| read_tags(&self.repo_dir))
252            .as_ref()
253            .map(|tags| tags.as_slice())
254    }
255
256    pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
257        self.tag_metadata
258            .get_or_init(|| read_tag_metadata(&self.repo_dir))
259            .as_ref()
260            .map(|metadata| metadata.as_slice())
261    }
262
263    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
264        self.families
265            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
266            .as_slice()
267    }
268
269    pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
270        self.languages
271            .get_or_init(|| iter_languages(&self.repo_dir).collect())
272            .as_slice()
273    }
274
275    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
276        self.languages()
277            .iter()
278            .filter_map(|l| l.as_ref().ok())
279            .find(|l| l.id() == lang_id)
280    }
281
282    fn family_by_font_file(&self) -> &HashMap<String, usize> {
283        self.family_by_font_file.get_or_init(|| {
284            self.families()
285                .iter()
286                .enumerate()
287                .filter(|(_, (_, f))| f.is_ok())
288                .flat_map(|(i, (_, f))| {
289                    f.as_ref()
290                        .unwrap()
291                        .fonts
292                        .iter()
293                        .map(move |f| (f.filename().to_string(), i))
294                })
295                .collect()
296        })
297    }
298
299    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
300        self.family_by_font_file()
301            .get(font.filename())
302            .copied()
303            .map(|i| {
304                let (p, f) = &self.families()[i];
305                (p.as_path(), f.as_ref().unwrap())
306            })
307    }
308
309    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
310        let Some((family_path, _)) = self.family(font) else {
311            return None;
312        };
313        let mut font_file = family_path.parent().unwrap().to_path_buf();
314        font_file.push(font.filename());
315        if !font_file.exists() {
316            eprintln!("No such file as {font_file:?}");
317        }
318        font_file.exists().then_some(font_file)
319    }
320
321    /// Our best guess at the primary language for this family
322    ///
323    /// Meant to be a good choice for things like rendering a sample string
324    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
325        // Probe primary lang, primary script, then default baselessly to latin
326        let mut primary_language: Option<&LanguageProto> = None;
327        if primary_language.is_none() && family.has_primary_language() {
328            if let Some(lang) = self.language(family.primary_language()) {
329                primary_language = Some(lang);
330            } else {
331                eprintln!(
332                    "{} specifies invalid primary_language {}",
333                    family.name(),
334                    family.primary_language()
335                );
336            }
337        }
338        if primary_language.is_none() && family.has_primary_script() {
339            // If our script matches many languages pick the one with the highest population
340            let lang = self
341                .languages()
342                .iter()
343                .filter_map(|r| r.as_ref().ok())
344                .filter(|l| l.has_script() && l.script() == family.primary_script())
345                .reduce(|acc, e| {
346                    if acc.population() > e.population() {
347                        acc
348                    } else {
349                        e
350                    }
351                });
352            if let Some(lang) = lang {
353                primary_language = Some(lang);
354            } else {
355                eprintln!(
356                    "{} specifies a primary_script that matches no languages {}",
357                    family.name(),
358                    family.primary_script()
359                );
360            }
361        }
362        if primary_language.is_none() {
363            primary_language = self.language("en_Latn");
364        }
365        primary_language
366            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
367    }
368}
369
370#[cfg(test)]
371mod tests {
372
373    use std::fs;
374
375    use super::*;
376
377    fn testdata_dir() -> std::path::PathBuf {
378        // cargo test seems to run in the project directory
379        // VSCode test seems to run in the workspace directory
380        // probe for the file we want in hopes of finding it regardless
381
382        ["./resources/testdata", "../resources/testdata"]
383            .iter()
384            .map(std::path::PathBuf::from)
385            .find(|pb| pb.exists())
386            .unwrap()
387    }
388
389    fn testdata_file_content(relative_path: &str) -> String {
390        let mut p = testdata_dir();
391        p.push(relative_path);
392        fs::read_to_string(p).unwrap()
393    }
394
395    #[test]
396    fn roboto_exemplar() {
397        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
398        let exemplar = exemplar(&roboto).unwrap();
399        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
400    }
401
402    #[test]
403    fn wix_exemplar() {
404        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
405        let exemplar = exemplar(&roboto).unwrap();
406        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
407    }
408
409    #[test]
410    fn parse_roboto_metadata() {
411        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
412    }
413
414    #[test]
415    fn parse_wix_metadata() {
416        // Has the undocumented position field
417        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
418    }
419
420    #[test]
421    fn parse_primary_lang_script_metadata() {
422        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
423        assert_eq!(
424            ("Jpan", "Invalid"),
425            (family.primary_script(), family.primary_language())
426        );
427    }
428
429    #[test]
430    fn parse_tag3() {
431        Tag::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
432    }
433
434    #[test]
435    fn parse_tag4() {
436        Tag::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31").expect("To parse");
437    }
438
439    #[test]
440    fn parse_tag_quoted() {
441        Tag::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
442            .expect("To parse");
443    }
444
445    #[test]
446    fn parse_tag_quoted2() {
447        Tag::from_str("\"\",t,1").expect("To parse");
448    }
449}