gf_metadata/
lib.rs

1mod fonts_public;
2mod languages_public;
3
4use std::{
5    cell::OnceCell,
6    collections::HashMap,
7    fs::{self, File},
8    io::{BufRead, BufReader, Error, ErrorKind},
9    path::{Path, PathBuf},
10    str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22    if s.contains("position") {
23        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24        let s = re.replace_all(s, "");
25        protobuf::text_format::parse_from_str(&s)
26    } else {
27        protobuf::text_format::parse_from_str(s)
28    }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32    protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto) -> i32 {
36    let mut score = 0;
37    // prefer regular
38    if font.style() == "normal" {
39        score += 16;
40    }
41
42    // prefer closer to 400
43    score -= (font.weight() - 400) / 100;
44
45    // prefer variable
46    if font.filename().contains("].") {
47        score += 1;
48    }
49
50    score
51}
52
53pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
54    family.fonts.iter().reduce(|acc, e| {
55        if exemplar_score(acc) >= exemplar_score(e) {
56            acc
57        } else {
58            e
59        }
60    })
61}
62
63fn iter_families(
64    root: &Path,
65    filter: Option<&Regex>,
66) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
67    WalkDir::new(root)
68        .into_iter()
69        .filter_map(|d| d.ok())
70        .filter(|d| d.file_name() == "METADATA.pb")
71        .filter(move |d| {
72            filter
73                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
74                .unwrap_or(true)
75        })
76        .map(|d| {
77            (
78                d.path().to_path_buf(),
79                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
80            )
81        })
82}
83
84pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
85    WalkDir::new(root)
86        .into_iter()
87        .filter_map(|d| d.ok())
88        .filter(|d| {
89            d.path()
90                .canonicalize()
91                .unwrap()
92                .to_str()
93                .unwrap()
94                .contains("gflanguages/data/languages")
95                && d.file_name().to_string_lossy().ends_with(".textproto")
96        })
97        .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
98}
99
100pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
101    let mut tag_file = root.to_path_buf();
102    tag_file.push("tags/all/families.csv");
103    let fd = File::open(tag_file)?;
104    let rdr = BufReader::new(fd);
105    Ok(rdr
106        .lines()
107        .map(|s| s.expect("Valid tag lines"))
108        .map(|s| Tag::from_str(&s).expect("Valid tag lines"))
109        .collect())
110}
111
112#[derive(Clone, Debug)]
113pub struct Tag {
114    pub family: String,
115    pub tag: String,
116    pub value: f32,
117}
118
119impl FromStr for Tag {
120    type Err = Error;
121
122    fn from_str(s: &str) -> Result<Self, Self::Err> {
123        let mut it = s.split(",");
124        let Some(family) = it.next() else {
125            return Err(Error::new(ErrorKind::InvalidData, "Too few tag parts"));
126        };
127        let Some(tag) = it.next() else {
128            return Err(Error::new(ErrorKind::InvalidData, "Too few tag parts"));
129        };
130        let Some(value) = it.next() else {
131            return Err(Error::new(ErrorKind::InvalidData, "Too few tag parts"));
132        };
133        let Ok(value) = f32::from_str(value) else {
134            return Err(Error::new(ErrorKind::InvalidData, "Invalid tag value"));
135        };
136        Ok(Tag {
137            family: family.to_string(),
138            tag: tag.to_string(),
139            value,
140        })
141    }
142}
143
144pub struct GoogleFonts {
145    repo_dir: PathBuf,
146    family_filter: Option<Regex>,
147    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
148    languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
149    family_by_font_file: OnceCell<HashMap<String, usize>>,
150    tags: OnceCell<Result<Vec<Tag>, Error>>,
151}
152
153impl GoogleFonts {
154    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
155        Self {
156            repo_dir: p,
157            family_filter,
158            families: OnceCell::new(),
159            languages: OnceCell::new(),
160            family_by_font_file: OnceCell::new(),
161            tags: OnceCell::new(),
162        }
163    }
164
165    pub fn tags(&self) -> Result<&[Tag], &Error> {
166        self.tags
167            .get_or_init(|| read_tags(&self.repo_dir))
168            .as_ref()
169            .map(|tags| tags.as_slice())
170    }
171
172    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
173        self.families
174            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
175            .as_slice()
176    }
177
178    pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
179        self.languages
180            .get_or_init(|| iter_languages(&self.repo_dir).collect())
181            .as_slice()
182    }
183
184    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
185        self.languages()
186            .iter()
187            .filter_map(|l| l.as_ref().ok())
188            .find(|l| l.id() == lang_id)
189    }
190
191    fn family_by_font_file(&self) -> &HashMap<String, usize> {
192        self.family_by_font_file.get_or_init(|| {
193            self.families()
194                .iter()
195                .enumerate()
196                .filter(|(_, (_, f))| f.is_ok())
197                .flat_map(|(i, (_, f))| {
198                    f.as_ref()
199                        .unwrap()
200                        .fonts
201                        .iter()
202                        .map(move |f| (f.filename().to_string(), i))
203                })
204                .collect()
205        })
206    }
207
208    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
209        self.family_by_font_file()
210            .get(font.filename())
211            .copied()
212            .map(|i| {
213                let (p, f) = &self.families()[i];
214                (p.as_path(), f.as_ref().unwrap())
215            })
216    }
217
218    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
219        let Some((family_path, _)) = self.family(font) else {
220            return None;
221        };
222        let mut font_file = family_path.parent().unwrap().to_path_buf();
223        font_file.push(font.filename());
224        if !font_file.exists() {
225            eprintln!("No such file as {font_file:?}");
226        }
227        font_file.exists().then_some(font_file)
228    }
229
230    /// Our best guess at the primary language for this family
231    ///
232    /// Meant to be a good choice for things like rendering a sample string
233    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
234        // Probe primary lang, primary script, then default baselessly to latin
235        let mut primary_language: Option<&LanguageProto> = None;
236        if primary_language.is_none() && family.has_primary_language() {
237            if let Some(lang) = self.language(family.primary_language()) {
238                primary_language = Some(lang);
239            } else {
240                eprintln!(
241                    "{} specifies invalid primary_language {}",
242                    family.name(),
243                    family.primary_language()
244                );
245            }
246        }
247        if primary_language.is_none() && family.has_primary_script() {
248            // If our script matches many languages pick the one with the highest population
249            let lang = self
250                .languages()
251                .iter()
252                .filter_map(|r| r.as_ref().ok())
253                .filter(|l| l.has_script() && l.script() == family.primary_script())
254                .reduce(|acc, e| {
255                    if acc.population() > e.population() {
256                        acc
257                    } else {
258                        e
259                    }
260                });
261            if let Some(lang) = lang {
262                primary_language = Some(lang);
263            } else {
264                eprintln!(
265                    "{} specifies a primary_script that matches no languages {}",
266                    family.name(),
267                    family.primary_script()
268                );
269            }
270        }
271        if primary_language.is_none() {
272            primary_language = self.language("en_Latn");
273        }
274        primary_language
275            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
276    }
277}
278
279#[cfg(test)]
280mod tests {
281
282    use std::fs;
283
284    use super::*;
285
286    fn testdata_dir() -> std::path::PathBuf {
287        // cargo test seems to run in the project directory
288        // VSCode test seems to run in the workspace directory
289        // probe for the file we want in hopes of finding it regardless
290
291        ["./resources/testdata", "../resources/testdata"]
292            .iter()
293            .map(std::path::PathBuf::from)
294            .find(|pb| pb.exists())
295            .unwrap()
296    }
297
298    fn testdata_file_content(relative_path: &str) -> String {
299        let mut p = testdata_dir();
300        p.push(relative_path);
301        fs::read_to_string(p).unwrap()
302    }
303
304    #[test]
305    fn roboto_exemplar() {
306        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
307        let exemplar = exemplar(&roboto).unwrap();
308        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
309    }
310
311    #[test]
312    fn wix_exemplar() {
313        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
314        let exemplar = exemplar(&roboto).unwrap();
315        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
316    }
317
318    #[test]
319    fn parse_roboto_metadata() {
320        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
321    }
322
323    #[test]
324    fn parse_wix_metadata() {
325        // Has the undocumented position field
326        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
327    }
328
329    #[test]
330    fn parse_primary_lang_script_metadata() {
331        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
332        assert_eq!(
333            ("Jpan", "Invalid"),
334            (family.primary_script(), family.primary_language())
335        );
336    }
337}