1mod fonts_public;
2mod languages_public;
3
4use std::{
5 cell::OnceCell,
6 collections::HashMap,
7 fs::{self, File},
8 io::{BufRead, BufReader, Error, ErrorKind},
9 path::{Path, PathBuf},
10 str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15 ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22 if s.contains("position") {
23 let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24 let s = re.replace_all(s, "");
25 protobuf::text_format::parse_from_str(&s)
26 } else {
27 protobuf::text_format::parse_from_str(s)
28 }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32 protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto) -> i32 {
36 let mut score = 0;
37 if font.style() == "normal" {
39 score += 16;
40 }
41
42 score -= (font.weight() - 400) / 100;
44
45 if font.filename().contains("].") {
47 score += 1;
48 }
49
50 score
51}
52
53pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
54 family.fonts.iter().reduce(|acc, e| {
55 if exemplar_score(acc) >= exemplar_score(e) {
56 acc
57 } else {
58 e
59 }
60 })
61}
62
63fn iter_families(
64 root: &Path,
65 filter: Option<&Regex>,
66) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
67 WalkDir::new(root)
68 .into_iter()
69 .filter_map(|d| d.ok())
70 .filter(|d| d.file_name() == "METADATA.pb")
71 .filter(move |d| {
72 filter
73 .map(|r| r.find(&d.path().to_string_lossy()).is_some())
74 .unwrap_or(true)
75 })
76 .map(|d| {
77 (
78 d.path().to_path_buf(),
79 read_family(&fs::read_to_string(d.path()).expect("To read files!")),
80 )
81 })
82}
83
84pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
85 WalkDir::new(root)
86 .into_iter()
87 .filter_map(|d| d.ok())
88 .filter(|d| {
89 d.path()
90 .canonicalize()
91 .unwrap()
92 .to_str()
93 .unwrap()
94 .contains("gflanguages/data/languages")
95 && d.file_name().to_string_lossy().ends_with(".textproto")
96 })
97 .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
98}
99
100pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
101 let mut tag_dir = root.to_path_buf();
102 tag_dir.push("tags/all");
103 let mut tags = Vec::new();
104 for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
105 let entry = entry.expect("To access tag dir entries");
106 if entry
107 .path()
108 .extension()
109 .expect("To have extensions")
110 .to_str()
111 .expect("utf-8")
112 != "csv"
113 {
114 continue;
115 }
116 let fd = File::open(&entry.path())?;
117 let rdr = BufReader::new(fd);
118 tags.extend(
119 rdr.lines()
120 .map(|s| s.expect("Valid tag lines"))
121 .map(|s| Tag::from_str(&s).expect("Valid tag lines")),
122 );
123 }
124 Ok(tags)
125}
126
127pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
128 let mut tag_metadata_file = root.to_path_buf();
129 tag_metadata_file.push("tags/tags_metadata.csv");
130 let mut metadata = Vec::new();
131
132 let fd = File::open(&tag_metadata_file)?;
133 let rdr = BufReader::new(fd);
134 metadata.extend(
135 rdr.lines()
136 .map(|s| s.expect("Valid tag lines"))
137 .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
138 );
139
140 Ok(metadata)
141}
142
143fn csv_values(s: &str) -> Vec<&str> {
144 let mut s = s;
145 let mut values = Vec::new();
146 while !s.is_empty() {
147 s = s.trim();
148 let mut end_idx = None;
149 if s.starts_with('"') {
150 end_idx = Some(*(&s[1..].find('"').expect("Close quote")));
151 }
152 end_idx = s[end_idx.unwrap_or_default()..]
153 .find(',')
154 .map(|v| v + end_idx.unwrap_or_default());
155 if let Some(end_idx) = end_idx {
156 let (value, rest) = s.split_at(end_idx);
157 values.push(value.trim());
158 s = &rest[1..];
159 } else {
160 values.push(s);
161 s = "";
162 }
163 }
164 values
165}
166
167#[derive(Clone, Debug)]
168pub struct Tag {
169 pub family: String,
170 pub loc: String,
171 pub tag: String,
172 pub value: f32,
173}
174
175impl FromStr for Tag {
176 type Err = Error;
177
178 fn from_str(s: &str) -> Result<Self, Self::Err> {
179 let values = csv_values(s);
180 let (family, loc, tag, value) = match values[..] {
181 [family, tag, value] => (family, "", tag, value),
182 [family, loc, tag, value] => (family, loc, tag, value),
183 _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
184 };
185 Ok(Tag {
186 family: family.to_string(),
187 loc: loc.to_string(),
188 tag: tag.to_string(),
189 value: f32::from_str(value)
190 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
191 })
192 }
193}
194
195#[derive(Clone, Debug)]
196pub struct TagMetadata {
197 pub tag: String,
198 pub min_value: f32,
199 pub max_value: f32,
200 pub prompt_name: String,
201}
202
203impl FromStr for TagMetadata {
204 type Err = Error;
205
206 fn from_str(s: &str) -> Result<Self, Self::Err> {
207 let values = csv_values(s);
208 eprintln!("{s} => {values:?}");
209 let [tag, min, max, prompt_name] = values[..] else {
210 return Err(Error::new(
211 ErrorKind::InvalidData,
212 "Unparseable tag metadata, wrong number of values",
213 ));
214 };
215 Ok(TagMetadata {
216 tag: tag.into(),
217 min_value: f32::from_str(min)
218 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
219 max_value: f32::from_str(max)
220 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
221 prompt_name: prompt_name.into(),
222 })
223 }
224}
225
226pub struct GoogleFonts {
227 repo_dir: PathBuf,
228 family_filter: Option<Regex>,
229 families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
230 languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
231 family_by_font_file: OnceCell<HashMap<String, usize>>,
232 tags: OnceCell<Result<Vec<Tag>, Error>>,
233 tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
234}
235
236impl GoogleFonts {
237 pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
238 Self {
239 repo_dir: p,
240 family_filter,
241 families: OnceCell::new(),
242 languages: OnceCell::new(),
243 family_by_font_file: OnceCell::new(),
244 tags: OnceCell::new(),
245 tag_metadata: OnceCell::new(),
246 }
247 }
248
249 pub fn tags(&self) -> Result<&[Tag], &Error> {
250 self.tags
251 .get_or_init(|| read_tags(&self.repo_dir))
252 .as_ref()
253 .map(|tags| tags.as_slice())
254 }
255
256 pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
257 self.tag_metadata
258 .get_or_init(|| read_tag_metadata(&self.repo_dir))
259 .as_ref()
260 .map(|metadata| metadata.as_slice())
261 }
262
263 pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
264 self.families
265 .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
266 .as_slice()
267 }
268
269 pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
270 self.languages
271 .get_or_init(|| iter_languages(&self.repo_dir).collect())
272 .as_slice()
273 }
274
275 pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
276 self.languages()
277 .iter()
278 .filter_map(|l| l.as_ref().ok())
279 .find(|l| l.id() == lang_id)
280 }
281
282 fn family_by_font_file(&self) -> &HashMap<String, usize> {
283 self.family_by_font_file.get_or_init(|| {
284 self.families()
285 .iter()
286 .enumerate()
287 .filter(|(_, (_, f))| f.is_ok())
288 .flat_map(|(i, (_, f))| {
289 f.as_ref()
290 .unwrap()
291 .fonts
292 .iter()
293 .map(move |f| (f.filename().to_string(), i))
294 })
295 .collect()
296 })
297 }
298
299 pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
300 self.family_by_font_file()
301 .get(font.filename())
302 .copied()
303 .map(|i| {
304 let (p, f) = &self.families()[i];
305 (p.as_path(), f.as_ref().unwrap())
306 })
307 }
308
309 pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
310 let Some((family_path, _)) = self.family(font) else {
311 return None;
312 };
313 let mut font_file = family_path.parent().unwrap().to_path_buf();
314 font_file.push(font.filename());
315 if !font_file.exists() {
316 eprintln!("No such file as {font_file:?}");
317 }
318 font_file.exists().then_some(font_file)
319 }
320
321 pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
325 let mut primary_language: Option<&LanguageProto> = None;
327 if primary_language.is_none() && family.has_primary_language() {
328 if let Some(lang) = self.language(family.primary_language()) {
329 primary_language = Some(lang);
330 } else {
331 eprintln!(
332 "{} specifies invalid primary_language {}",
333 family.name(),
334 family.primary_language()
335 );
336 }
337 }
338 if primary_language.is_none() && family.has_primary_script() {
339 let lang = self
341 .languages()
342 .iter()
343 .filter_map(|r| r.as_ref().ok())
344 .filter(|l| l.has_script() && l.script() == family.primary_script())
345 .reduce(|acc, e| {
346 if acc.population() > e.population() {
347 acc
348 } else {
349 e
350 }
351 });
352 if let Some(lang) = lang {
353 primary_language = Some(lang);
354 } else {
355 eprintln!(
356 "{} specifies a primary_script that matches no languages {}",
357 family.name(),
358 family.primary_script()
359 );
360 }
361 }
362 if primary_language.is_none() {
363 primary_language = self.language("en_Latn");
364 }
365 primary_language
366 .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
367 }
368}
369
370#[cfg(test)]
371mod tests {
372
373 use std::fs;
374
375 use super::*;
376
377 fn testdata_dir() -> std::path::PathBuf {
378 ["./resources/testdata", "../resources/testdata"]
383 .iter()
384 .map(std::path::PathBuf::from)
385 .find(|pb| pb.exists())
386 .unwrap()
387 }
388
389 fn testdata_file_content(relative_path: &str) -> String {
390 let mut p = testdata_dir();
391 p.push(relative_path);
392 fs::read_to_string(p).unwrap()
393 }
394
395 #[test]
396 fn roboto_exemplar() {
397 let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
398 let exemplar = exemplar(&roboto).unwrap();
399 assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
400 }
401
402 #[test]
403 fn wix_exemplar() {
404 let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
405 let exemplar = exemplar(&roboto).unwrap();
406 assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
407 }
408
409 #[test]
410 fn parse_roboto_metadata() {
411 read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
412 }
413
414 #[test]
415 fn parse_wix_metadata() {
416 read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
418 }
419
420 #[test]
421 fn parse_primary_lang_script_metadata() {
422 let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
423 assert_eq!(
424 ("Jpan", "Invalid"),
425 (family.primary_script(), family.primary_language())
426 );
427 }
428
429 #[test]
430 fn parse_tag3() {
431 Tag::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
432 }
433
434 #[test]
435 fn parse_tag4() {
436 Tag::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31").expect("To parse");
437 }
438
439 #[test]
440 fn parse_tag_quoted() {
441 Tag::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
442 .expect("To parse");
443 }
444
445 #[test]
446 fn parse_tag_quoted2() {
447 Tag::from_str("\"\",t,1").expect("To parse");
448 }
449}