gf_metadata/
lib.rs

1mod fonts_public;
2mod languages_public;
3
4use std::{
5    cell::OnceCell,
6    collections::HashMap,
7    fs::{self, File},
8    io::{BufRead, BufReader, Error, ErrorKind},
9    path::{Path, PathBuf},
10    str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22    if s.contains("position") {
23        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24        let s = re.replace_all(s, "");
25        protobuf::text_format::parse_from_str(&s)
26    } else {
27        protobuf::text_format::parse_from_str(s)
28    }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32    protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto, preferred_style: FontStyle, preferred_weight: i32) -> i32 {
36    let mut score = 0;
37    // prefer preferred_style
38    if font.style() == preferred_style.style() {
39        score += 16;
40    }
41
42    // prefer closer to preferred_weight
43    score -= (font.weight() - preferred_weight).abs() / 100;
44
45    // prefer more weight to less weight
46    if font.weight() > preferred_weight {
47        score += 1;
48    }
49
50    // prefer variable
51    if font.filename().contains("].") {
52        score += 2;
53    }
54
55    score
56}
57
58pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
59    fn score(font: &FontProto) -> i32 {
60        exemplar_score(font, FontStyle::Normal, 400)
61    }
62    family
63        .fonts
64        .iter()
65        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
66}
67
68#[derive(Copy, Clone, Debug, PartialEq)]
69pub enum FontStyle {
70    Normal,
71    Italic,
72}
73
74impl FontStyle {
75    fn style(&self) -> &str {
76        match self {
77            FontStyle::Normal => "normal",
78            FontStyle::Italic => "italic",
79        }
80    }
81}
82
83pub fn select_font(
84    family: &FamilyProto,
85    preferred_style: FontStyle,
86    preferred_weight: i32,
87) -> Option<&FontProto> {
88    let score =
89        |font: &FontProto| -> i32 { exemplar_score(font, preferred_style, preferred_weight) };
90    family
91        .fonts
92        .iter()
93        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
94}
95
96fn iter_families(
97    root: &Path,
98    filter: Option<&Regex>,
99) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
100    WalkDir::new(root)
101        .into_iter()
102        .filter_map(|d| d.ok())
103        .filter(|d| d.file_name() == "METADATA.pb")
104        .filter(move |d| {
105            filter
106                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
107                .unwrap_or(true)
108        })
109        .map(|d| {
110            (
111                d.path().to_path_buf(),
112                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
113            )
114        })
115}
116
117pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
118    WalkDir::new(root)
119        .into_iter()
120        .filter_map(|d| d.ok())
121        .filter(|d| {
122            d.path()
123                .canonicalize()
124                .unwrap()
125                .to_str()
126                .unwrap()
127                .contains("gflanguages/data/languages")
128                && d.file_name().to_string_lossy().ends_with(".textproto")
129        })
130        .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
131}
132
133pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
134    let mut tag_dir = root.to_path_buf();
135    tag_dir.push("tags/all");
136    let mut tags = Vec::new();
137    for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
138        let entry = entry.expect("To access tag dir entries");
139        if entry
140            .path()
141            .extension()
142            .expect("To have extensions")
143            .to_str()
144            .expect("utf-8")
145            != "csv"
146        {
147            continue;
148        }
149        let fd = File::open(&entry.path())?;
150        let rdr = BufReader::new(fd);
151        tags.extend(
152            rdr.lines()
153                .map(|s| s.expect("Valid tag lines"))
154                .map(|s| Tag::from_str(&s).expect("Valid tag lines")),
155        );
156    }
157    Ok(tags)
158}
159
160pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
161    let mut tag_metadata_file = root.to_path_buf();
162    tag_metadata_file.push("tags/tags_metadata.csv");
163    let mut metadata = Vec::new();
164
165    let fd = File::open(&tag_metadata_file)?;
166    let rdr = BufReader::new(fd);
167    metadata.extend(
168        rdr.lines()
169            .map(|s| s.expect("Valid tag lines"))
170            .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
171    );
172
173    Ok(metadata)
174}
175
176fn csv_values(s: &str) -> Vec<&str> {
177    let mut s = s;
178    let mut values = Vec::new();
179    while !s.is_empty() {
180        s = s.trim();
181        let mut end_idx = None;
182        if s.starts_with('"') {
183            end_idx = Some(*(&s[1..].find('"').expect("Close quote")));
184        }
185        end_idx = s[end_idx.unwrap_or_default()..]
186            .find(',')
187            .map(|v| v + end_idx.unwrap_or_default());
188        if let Some(end_idx) = end_idx {
189            let (value, rest) = s.split_at(end_idx);
190            values.push(value.trim());
191            s = &rest[1..];
192        } else {
193            values.push(s);
194            s = "";
195        }
196    }
197    values
198}
199
200#[derive(Clone, Debug)]
201pub struct Tag {
202    pub family: String,
203    pub loc: String,
204    pub tag: String,
205    pub value: f32,
206}
207
208impl FromStr for Tag {
209    type Err = Error;
210
211    fn from_str(s: &str) -> Result<Self, Self::Err> {
212        let values = csv_values(s);
213        let (family, loc, tag, value) = match values[..] {
214            [family, tag, value] => (family, "", tag, value),
215            [family, loc, tag, value] => (family, loc, tag, value),
216            _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
217        };
218        Ok(Tag {
219            family: family.to_string(),
220            loc: loc.to_string(),
221            tag: tag.to_string(),
222            value: f32::from_str(value)
223                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
224        })
225    }
226}
227
228#[derive(Clone, Debug)]
229pub struct TagMetadata {
230    pub tag: String,
231    pub min_value: f32,
232    pub max_value: f32,
233    pub prompt_name: String,
234}
235
236impl FromStr for TagMetadata {
237    type Err = Error;
238
239    fn from_str(s: &str) -> Result<Self, Self::Err> {
240        let values = csv_values(s);
241        let [tag, min, max, prompt_name] = values[..] else {
242            return Err(Error::new(
243                ErrorKind::InvalidData,
244                "Unparseable tag metadata, wrong number of values",
245            ));
246        };
247        Ok(TagMetadata {
248            tag: tag.into(),
249            min_value: f32::from_str(min)
250                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
251            max_value: f32::from_str(max)
252                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
253            prompt_name: prompt_name.into(),
254        })
255    }
256}
257
258pub struct GoogleFonts {
259    repo_dir: PathBuf,
260    family_filter: Option<Regex>,
261    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
262    languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
263    family_by_font_file: OnceCell<HashMap<String, usize>>,
264    tags: OnceCell<Result<Vec<Tag>, Error>>,
265    tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
266}
267
268impl GoogleFonts {
269    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
270        Self {
271            repo_dir: p,
272            family_filter,
273            families: OnceCell::new(),
274            languages: OnceCell::new(),
275            family_by_font_file: OnceCell::new(),
276            tags: OnceCell::new(),
277            tag_metadata: OnceCell::new(),
278        }
279    }
280
281    pub fn tags(&self) -> Result<&[Tag], &Error> {
282        self.tags
283            .get_or_init(|| read_tags(&self.repo_dir))
284            .as_ref()
285            .map(|tags| tags.as_slice())
286    }
287
288    pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
289        self.tag_metadata
290            .get_or_init(|| read_tag_metadata(&self.repo_dir))
291            .as_ref()
292            .map(|metadata| metadata.as_slice())
293    }
294
295    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
296        self.families
297            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
298            .as_slice()
299    }
300
301    pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
302        self.languages
303            .get_or_init(|| iter_languages(&self.repo_dir).collect())
304            .as_slice()
305    }
306
307    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
308        self.languages()
309            .iter()
310            .filter_map(|l| l.as_ref().ok())
311            .find(|l| l.id() == lang_id)
312    }
313
314    fn family_by_font_file(&self) -> &HashMap<String, usize> {
315        self.family_by_font_file.get_or_init(|| {
316            self.families()
317                .iter()
318                .enumerate()
319                .filter(|(_, (_, f))| f.is_ok())
320                .flat_map(|(i, (_, f))| {
321                    f.as_ref()
322                        .unwrap()
323                        .fonts
324                        .iter()
325                        .map(move |f| (f.filename().to_string(), i))
326                })
327                .collect()
328        })
329    }
330
331    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
332        self.family_by_font_file()
333            .get(font.filename())
334            .copied()
335            .map(|i| {
336                let (p, f) = &self.families()[i];
337                (p.as_path(), f.as_ref().unwrap())
338            })
339    }
340
341    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
342        let Some((family_path, _)) = self.family(font) else {
343            return None;
344        };
345        let mut font_file = family_path.parent().unwrap().to_path_buf();
346        font_file.push(font.filename());
347        if !font_file.exists() {
348            eprintln!("No such file as {font_file:?}");
349        }
350        font_file.exists().then_some(font_file)
351    }
352
353    /// Our best guess at the primary language for this family
354    ///
355    /// Meant to be a good choice for things like rendering a sample string
356    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
357        // Probe primary lang, primary script, then default baselessly to latin
358        let mut primary_language: Option<&LanguageProto> = None;
359        if primary_language.is_none() && family.has_primary_language() {
360            if let Some(lang) = self.language(family.primary_language()) {
361                primary_language = Some(lang);
362            } else {
363                eprintln!(
364                    "{} specifies invalid primary_language {}",
365                    family.name(),
366                    family.primary_language()
367                );
368            }
369        }
370        if primary_language.is_none() && family.has_primary_script() {
371            // If our script matches many languages pick the one with the highest population
372            let lang = self
373                .languages()
374                .iter()
375                .filter_map(|r| r.as_ref().ok())
376                .filter(|l| l.has_script() && l.script() == family.primary_script())
377                .reduce(|acc, e| {
378                    if acc.population() > e.population() {
379                        acc
380                    } else {
381                        e
382                    }
383                });
384            if let Some(lang) = lang {
385                primary_language = Some(lang);
386            } else {
387                eprintln!(
388                    "{} specifies a primary_script that matches no languages {}",
389                    family.name(),
390                    family.primary_script()
391                );
392            }
393        }
394        if primary_language.is_none() {
395            primary_language = self.language("en_Latn");
396        }
397        primary_language
398            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
399    }
400}
401
402#[cfg(test)]
403mod tests {
404
405    use std::fs;
406
407    use super::*;
408
409    fn testdata_dir() -> std::path::PathBuf {
410        // cargo test seems to run in the project directory
411        // VSCode test seems to run in the workspace directory
412        // probe for the file we want in hopes of finding it regardless
413
414        ["./resources/testdata", "../resources/testdata"]
415            .iter()
416            .map(std::path::PathBuf::from)
417            .find(|pb| pb.exists())
418            .unwrap()
419    }
420
421    fn testdata_file_content(relative_path: &str) -> String {
422        let mut p = testdata_dir();
423        p.push(relative_path);
424        fs::read_to_string(p).unwrap()
425    }
426
427    #[test]
428    fn roboto_exemplar() {
429        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
430        let exemplar = exemplar(&roboto).unwrap();
431        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
432    }
433
434    #[test]
435    fn wix_exemplar() {
436        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
437        let exemplar = exemplar(&roboto).unwrap();
438        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
439    }
440
441    #[test]
442    fn parse_roboto_metadata() {
443        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
444    }
445
446    #[test]
447    fn parse_wix_metadata() {
448        // Has the undocumented position field
449        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
450    }
451
452    #[test]
453    fn parse_primary_lang_script_metadata() {
454        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
455        assert_eq!(
456            ("Jpan", "Invalid"),
457            (family.primary_script(), family.primary_language())
458        );
459    }
460
461    #[test]
462    fn parse_tag3() {
463        Tag::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
464    }
465
466    #[test]
467    fn parse_tag4() {
468        Tag::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31").expect("To parse");
469    }
470
471    #[test]
472    fn parse_tag_quoted() {
473        Tag::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
474            .expect("To parse");
475    }
476
477    #[test]
478    fn parse_tag_quoted2() {
479        Tag::from_str("\"\",t,1").expect("To parse");
480    }
481}