gf_metadata/
lib.rs

1mod fonts_public;
2mod languages_public;
3
4use std::{
5    cell::OnceCell,
6    collections::HashMap,
7    fs::{self, File},
8    io::{BufRead, BufReader, Error, ErrorKind},
9    path::{Path, PathBuf},
10    str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15    ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22    if s.contains("position") {
23        let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24        let s = re.replace_all(s, "");
25        protobuf::text_format::parse_from_str(&s)
26    } else {
27        protobuf::text_format::parse_from_str(s)
28    }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32    protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto, preferred_style: FontStyle, preferred_weight: i32) -> i32 {
36    let mut score = 0;
37    // prefer preferred_style
38    if font.style() == preferred_style.style() {
39        score += 16;
40    }
41
42    // prefer closer to preferred_weight
43    score -= (font.weight() - preferred_weight) / 100;
44
45    // prefer variable
46    if font.filename().contains("].") {
47        score += 1;
48    }
49
50    score
51}
52
53pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
54    fn score(font: &FontProto) -> i32 {
55        exemplar_score(font, FontStyle::Normal, 400)
56    }
57    family
58        .fonts
59        .iter()
60        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
61}
62
63#[derive(Copy, Clone, Debug, PartialEq)]
64pub enum FontStyle {
65    Normal,
66    Italic,
67}
68
69impl FontStyle {
70    fn style(&self) -> &str {
71        match self {
72            FontStyle::Normal => "normal",
73            FontStyle::Italic => "italic",
74        }
75    }
76}
77
78pub fn select_font(
79    family: &FamilyProto,
80    preferred_style: FontStyle,
81    preferred_weight: i32,
82) -> Option<&FontProto> {
83    let score =
84        |font: &FontProto| -> i32 { exemplar_score(font, preferred_style, preferred_weight) };
85    family
86        .fonts
87        .iter()
88        .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
89}
90
91fn iter_families(
92    root: &Path,
93    filter: Option<&Regex>,
94) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
95    WalkDir::new(root)
96        .into_iter()
97        .filter_map(|d| d.ok())
98        .filter(|d| d.file_name() == "METADATA.pb")
99        .filter(move |d| {
100            filter
101                .map(|r| r.find(&d.path().to_string_lossy()).is_some())
102                .unwrap_or(true)
103        })
104        .map(|d| {
105            (
106                d.path().to_path_buf(),
107                read_family(&fs::read_to_string(d.path()).expect("To read files!")),
108            )
109        })
110}
111
112pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
113    WalkDir::new(root)
114        .into_iter()
115        .filter_map(|d| d.ok())
116        .filter(|d| {
117            d.path()
118                .canonicalize()
119                .unwrap()
120                .to_str()
121                .unwrap()
122                .contains("gflanguages/data/languages")
123                && d.file_name().to_string_lossy().ends_with(".textproto")
124        })
125        .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
126}
127
128pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
129    let mut tag_dir = root.to_path_buf();
130    tag_dir.push("tags/all");
131    let mut tags = Vec::new();
132    for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
133        let entry = entry.expect("To access tag dir entries");
134        if entry
135            .path()
136            .extension()
137            .expect("To have extensions")
138            .to_str()
139            .expect("utf-8")
140            != "csv"
141        {
142            continue;
143        }
144        let fd = File::open(&entry.path())?;
145        let rdr = BufReader::new(fd);
146        tags.extend(
147            rdr.lines()
148                .map(|s| s.expect("Valid tag lines"))
149                .map(|s| Tag::from_str(&s).expect("Valid tag lines")),
150        );
151    }
152    Ok(tags)
153}
154
155pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
156    let mut tag_metadata_file = root.to_path_buf();
157    tag_metadata_file.push("tags/tags_metadata.csv");
158    let mut metadata = Vec::new();
159
160    let fd = File::open(&tag_metadata_file)?;
161    let rdr = BufReader::new(fd);
162    metadata.extend(
163        rdr.lines()
164            .map(|s| s.expect("Valid tag lines"))
165            .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
166    );
167
168    Ok(metadata)
169}
170
171fn csv_values(s: &str) -> Vec<&str> {
172    let mut s = s;
173    let mut values = Vec::new();
174    while !s.is_empty() {
175        s = s.trim();
176        let mut end_idx = None;
177        if s.starts_with('"') {
178            end_idx = Some(*(&s[1..].find('"').expect("Close quote")));
179        }
180        end_idx = s[end_idx.unwrap_or_default()..]
181            .find(',')
182            .map(|v| v + end_idx.unwrap_or_default());
183        if let Some(end_idx) = end_idx {
184            let (value, rest) = s.split_at(end_idx);
185            values.push(value.trim());
186            s = &rest[1..];
187        } else {
188            values.push(s);
189            s = "";
190        }
191    }
192    values
193}
194
195#[derive(Clone, Debug)]
196pub struct Tag {
197    pub family: String,
198    pub loc: String,
199    pub tag: String,
200    pub value: f32,
201}
202
203impl FromStr for Tag {
204    type Err = Error;
205
206    fn from_str(s: &str) -> Result<Self, Self::Err> {
207        let values = csv_values(s);
208        let (family, loc, tag, value) = match values[..] {
209            [family, tag, value] => (family, "", tag, value),
210            [family, loc, tag, value] => (family, loc, tag, value),
211            _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
212        };
213        Ok(Tag {
214            family: family.to_string(),
215            loc: loc.to_string(),
216            tag: tag.to_string(),
217            value: f32::from_str(value)
218                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
219        })
220    }
221}
222
223#[derive(Clone, Debug)]
224pub struct TagMetadata {
225    pub tag: String,
226    pub min_value: f32,
227    pub max_value: f32,
228    pub prompt_name: String,
229}
230
231impl FromStr for TagMetadata {
232    type Err = Error;
233
234    fn from_str(s: &str) -> Result<Self, Self::Err> {
235        let values = csv_values(s);
236        let [tag, min, max, prompt_name] = values[..] else {
237            return Err(Error::new(
238                ErrorKind::InvalidData,
239                "Unparseable tag metadata, wrong number of values",
240            ));
241        };
242        Ok(TagMetadata {
243            tag: tag.into(),
244            min_value: f32::from_str(min)
245                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
246            max_value: f32::from_str(max)
247                .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
248            prompt_name: prompt_name.into(),
249        })
250    }
251}
252
253pub struct GoogleFonts {
254    repo_dir: PathBuf,
255    family_filter: Option<Regex>,
256    families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
257    languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
258    family_by_font_file: OnceCell<HashMap<String, usize>>,
259    tags: OnceCell<Result<Vec<Tag>, Error>>,
260    tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
261}
262
263impl GoogleFonts {
264    pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
265        Self {
266            repo_dir: p,
267            family_filter,
268            families: OnceCell::new(),
269            languages: OnceCell::new(),
270            family_by_font_file: OnceCell::new(),
271            tags: OnceCell::new(),
272            tag_metadata: OnceCell::new(),
273        }
274    }
275
276    pub fn tags(&self) -> Result<&[Tag], &Error> {
277        self.tags
278            .get_or_init(|| read_tags(&self.repo_dir))
279            .as_ref()
280            .map(|tags| tags.as_slice())
281    }
282
283    pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
284        self.tag_metadata
285            .get_or_init(|| read_tag_metadata(&self.repo_dir))
286            .as_ref()
287            .map(|metadata| metadata.as_slice())
288    }
289
290    pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
291        self.families
292            .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
293            .as_slice()
294    }
295
296    pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
297        self.languages
298            .get_or_init(|| iter_languages(&self.repo_dir).collect())
299            .as_slice()
300    }
301
302    pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
303        self.languages()
304            .iter()
305            .filter_map(|l| l.as_ref().ok())
306            .find(|l| l.id() == lang_id)
307    }
308
309    fn family_by_font_file(&self) -> &HashMap<String, usize> {
310        self.family_by_font_file.get_or_init(|| {
311            self.families()
312                .iter()
313                .enumerate()
314                .filter(|(_, (_, f))| f.is_ok())
315                .flat_map(|(i, (_, f))| {
316                    f.as_ref()
317                        .unwrap()
318                        .fonts
319                        .iter()
320                        .map(move |f| (f.filename().to_string(), i))
321                })
322                .collect()
323        })
324    }
325
326    pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
327        self.family_by_font_file()
328            .get(font.filename())
329            .copied()
330            .map(|i| {
331                let (p, f) = &self.families()[i];
332                (p.as_path(), f.as_ref().unwrap())
333            })
334    }
335
336    pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
337        let Some((family_path, _)) = self.family(font) else {
338            return None;
339        };
340        let mut font_file = family_path.parent().unwrap().to_path_buf();
341        font_file.push(font.filename());
342        if !font_file.exists() {
343            eprintln!("No such file as {font_file:?}");
344        }
345        font_file.exists().then_some(font_file)
346    }
347
348    /// Our best guess at the primary language for this family
349    ///
350    /// Meant to be a good choice for things like rendering a sample string
351    pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
352        // Probe primary lang, primary script, then default baselessly to latin
353        let mut primary_language: Option<&LanguageProto> = None;
354        if primary_language.is_none() && family.has_primary_language() {
355            if let Some(lang) = self.language(family.primary_language()) {
356                primary_language = Some(lang);
357            } else {
358                eprintln!(
359                    "{} specifies invalid primary_language {}",
360                    family.name(),
361                    family.primary_language()
362                );
363            }
364        }
365        if primary_language.is_none() && family.has_primary_script() {
366            // If our script matches many languages pick the one with the highest population
367            let lang = self
368                .languages()
369                .iter()
370                .filter_map(|r| r.as_ref().ok())
371                .filter(|l| l.has_script() && l.script() == family.primary_script())
372                .reduce(|acc, e| {
373                    if acc.population() > e.population() {
374                        acc
375                    } else {
376                        e
377                    }
378                });
379            if let Some(lang) = lang {
380                primary_language = Some(lang);
381            } else {
382                eprintln!(
383                    "{} specifies a primary_script that matches no languages {}",
384                    family.name(),
385                    family.primary_script()
386                );
387            }
388        }
389        if primary_language.is_none() {
390            primary_language = self.language("en_Latn");
391        }
392        primary_language
393            .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
394    }
395}
396
397#[cfg(test)]
398mod tests {
399
400    use std::fs;
401
402    use super::*;
403
404    fn testdata_dir() -> std::path::PathBuf {
405        // cargo test seems to run in the project directory
406        // VSCode test seems to run in the workspace directory
407        // probe for the file we want in hopes of finding it regardless
408
409        ["./resources/testdata", "../resources/testdata"]
410            .iter()
411            .map(std::path::PathBuf::from)
412            .find(|pb| pb.exists())
413            .unwrap()
414    }
415
416    fn testdata_file_content(relative_path: &str) -> String {
417        let mut p = testdata_dir();
418        p.push(relative_path);
419        fs::read_to_string(p).unwrap()
420    }
421
422    #[test]
423    fn roboto_exemplar() {
424        let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
425        let exemplar = exemplar(&roboto).unwrap();
426        assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
427    }
428
429    #[test]
430    fn wix_exemplar() {
431        let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
432        let exemplar = exemplar(&roboto).unwrap();
433        assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
434    }
435
436    #[test]
437    fn parse_roboto_metadata() {
438        read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
439    }
440
441    #[test]
442    fn parse_wix_metadata() {
443        // Has the undocumented position field
444        read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
445    }
446
447    #[test]
448    fn parse_primary_lang_script_metadata() {
449        let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
450        assert_eq!(
451            ("Jpan", "Invalid"),
452            (family.primary_script(), family.primary_language())
453        );
454    }
455
456    #[test]
457    fn parse_tag3() {
458        Tag::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
459    }
460
461    #[test]
462    fn parse_tag4() {
463        Tag::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31").expect("To parse");
464    }
465
466    #[test]
467    fn parse_tag_quoted() {
468        Tag::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
469            .expect("To parse");
470    }
471
472    #[test]
473    fn parse_tag_quoted2() {
474        Tag::from_str("\"\",t,1").expect("To parse");
475    }
476}