1mod fonts_public;
2mod languages_public;
3
4use std::{
5 cell::OnceCell,
6 collections::HashMap,
7 fs::{self, File},
8 io::{BufRead, BufReader, Error, ErrorKind},
9 path::{Path, PathBuf},
10 str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15 ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22 if s.contains("position") {
23 let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24 let s = re.replace_all(s, "");
25 protobuf::text_format::parse_from_str(&s)
26 } else {
27 protobuf::text_format::parse_from_str(s)
28 }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32 protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto) -> i32 {
36 let mut score = 0;
37 if font.style() == "normal" {
39 score += 16;
40 }
41
42 score -= (font.weight() - 400) / 100;
44
45 if font.filename().contains("].") {
47 score += 1;
48 }
49
50 score
51}
52
53pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
54 family.fonts.iter().reduce(|acc, e| {
55 if exemplar_score(acc) >= exemplar_score(e) {
56 acc
57 } else {
58 e
59 }
60 })
61}
62
63fn iter_families(
64 root: &Path,
65 filter: Option<&Regex>,
66) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
67 WalkDir::new(root)
68 .into_iter()
69 .filter_map(|d| d.ok())
70 .filter(|d| d.file_name() == "METADATA.pb")
71 .filter(move |d| {
72 filter
73 .map(|r| r.find(&d.path().to_string_lossy()).is_some())
74 .unwrap_or(true)
75 })
76 .map(|d| {
77 (
78 d.path().to_path_buf(),
79 read_family(&fs::read_to_string(d.path()).expect("To read files!")),
80 )
81 })
82}
83
84pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
85 WalkDir::new(root)
86 .into_iter()
87 .filter_map(|d| d.ok())
88 .filter(|d| {
89 d.path()
90 .canonicalize()
91 .unwrap()
92 .to_str()
93 .unwrap()
94 .contains("gflanguages/data/languages")
95 && d.file_name().to_string_lossy().ends_with(".textproto")
96 })
97 .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
98}
99
100pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
101 let mut tag_dir = root.to_path_buf();
102 tag_dir.push("tags/all");
103 let mut tags = Vec::new();
104 for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
105 let entry = entry.expect("To access tag dir entries");
106 if entry
107 .path()
108 .extension()
109 .expect("To have extensions")
110 .to_str()
111 .expect("utf-8")
112 != "csv"
113 {
114 continue;
115 }
116 let fd = File::open(&entry.path())?;
117 let rdr = BufReader::new(fd);
118 tags.extend(
119 rdr.lines()
120 .map(|s| s.expect("Valid tag lines"))
121 .map(|s| Tag::from_str(&s).expect("Valid tag lines")),
122 );
123 }
124 Ok(tags)
125}
126
127pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
128 let mut tag_metadata_file = root.to_path_buf();
129 tag_metadata_file.push("tags/tags_metadata.csv");
130 let mut metadata = Vec::new();
131
132 let fd = File::open(&tag_metadata_file)?;
133 let rdr = BufReader::new(fd);
134 metadata.extend(
135 rdr.lines()
136 .map(|s| s.expect("Valid tag lines"))
137 .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
138 );
139
140 Ok(metadata)
141}
142
143fn csv_values(s: &str) -> Vec<&str> {
144 let mut s = s;
145 let mut values = Vec::new();
146 while !s.is_empty() {
147 s = s.trim();
148 let mut end_idx = None;
149 if s.starts_with('"') {
150 end_idx = Some(*(&s[1..].find('"').expect("Close quote")));
151 }
152 end_idx = s[end_idx.unwrap_or_default()..]
153 .find(',')
154 .map(|v| v + end_idx.unwrap_or_default());
155 if let Some(end_idx) = end_idx {
156 let (value, rest) = s.split_at(end_idx);
157 values.push(value.trim());
158 s = &rest[1..];
159 } else {
160 values.push(s);
161 s = "";
162 }
163 }
164 values
165}
166
167#[derive(Clone, Debug)]
168pub struct Tag {
169 pub family: String,
170 pub loc: String,
171 pub tag: String,
172 pub value: f32,
173}
174
175impl FromStr for Tag {
176 type Err = Error;
177
178 fn from_str(s: &str) -> Result<Self, Self::Err> {
179 let values = csv_values(s);
180 let (family, loc, tag, value) = match values[..] {
181 [family, tag, value] => (family, "", tag, value),
182 [family, loc, tag, value] => (family, loc, tag, value),
183 _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
184 };
185 Ok(Tag {
186 family: family.to_string(),
187 loc: loc.to_string(),
188 tag: tag.to_string(),
189 value: f32::from_str(value)
190 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
191 })
192 }
193}
194
195#[derive(Clone, Debug)]
196pub struct TagMetadata {
197 pub tag: String,
198 pub min_value: f32,
199 pub max_value: f32,
200 pub prompt_name: String,
201}
202
203impl FromStr for TagMetadata {
204 type Err = Error;
205
206 fn from_str(s: &str) -> Result<Self, Self::Err> {
207 let values = csv_values(s);
208 let [tag, min, max, prompt_name] = values[..] else {
209 return Err(Error::new(
210 ErrorKind::InvalidData,
211 "Unparseable tag metadata, wrong number of values",
212 ));
213 };
214 Ok(TagMetadata {
215 tag: tag.into(),
216 min_value: f32::from_str(min)
217 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
218 max_value: f32::from_str(max)
219 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
220 prompt_name: prompt_name.into(),
221 })
222 }
223}
224
225pub struct GoogleFonts {
226 repo_dir: PathBuf,
227 family_filter: Option<Regex>,
228 families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
229 languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
230 family_by_font_file: OnceCell<HashMap<String, usize>>,
231 tags: OnceCell<Result<Vec<Tag>, Error>>,
232 tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
233}
234
235impl GoogleFonts {
236 pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
237 Self {
238 repo_dir: p,
239 family_filter,
240 families: OnceCell::new(),
241 languages: OnceCell::new(),
242 family_by_font_file: OnceCell::new(),
243 tags: OnceCell::new(),
244 tag_metadata: OnceCell::new(),
245 }
246 }
247
248 pub fn tags(&self) -> Result<&[Tag], &Error> {
249 self.tags
250 .get_or_init(|| read_tags(&self.repo_dir))
251 .as_ref()
252 .map(|tags| tags.as_slice())
253 }
254
255 pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
256 self.tag_metadata
257 .get_or_init(|| read_tag_metadata(&self.repo_dir))
258 .as_ref()
259 .map(|metadata| metadata.as_slice())
260 }
261
262 pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
263 self.families
264 .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
265 .as_slice()
266 }
267
268 pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
269 self.languages
270 .get_or_init(|| iter_languages(&self.repo_dir).collect())
271 .as_slice()
272 }
273
274 pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
275 self.languages()
276 .iter()
277 .filter_map(|l| l.as_ref().ok())
278 .find(|l| l.id() == lang_id)
279 }
280
281 fn family_by_font_file(&self) -> &HashMap<String, usize> {
282 self.family_by_font_file.get_or_init(|| {
283 self.families()
284 .iter()
285 .enumerate()
286 .filter(|(_, (_, f))| f.is_ok())
287 .flat_map(|(i, (_, f))| {
288 f.as_ref()
289 .unwrap()
290 .fonts
291 .iter()
292 .map(move |f| (f.filename().to_string(), i))
293 })
294 .collect()
295 })
296 }
297
298 pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
299 self.family_by_font_file()
300 .get(font.filename())
301 .copied()
302 .map(|i| {
303 let (p, f) = &self.families()[i];
304 (p.as_path(), f.as_ref().unwrap())
305 })
306 }
307
308 pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
309 let Some((family_path, _)) = self.family(font) else {
310 return None;
311 };
312 let mut font_file = family_path.parent().unwrap().to_path_buf();
313 font_file.push(font.filename());
314 if !font_file.exists() {
315 eprintln!("No such file as {font_file:?}");
316 }
317 font_file.exists().then_some(font_file)
318 }
319
320 pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
324 let mut primary_language: Option<&LanguageProto> = None;
326 if primary_language.is_none() && family.has_primary_language() {
327 if let Some(lang) = self.language(family.primary_language()) {
328 primary_language = Some(lang);
329 } else {
330 eprintln!(
331 "{} specifies invalid primary_language {}",
332 family.name(),
333 family.primary_language()
334 );
335 }
336 }
337 if primary_language.is_none() && family.has_primary_script() {
338 let lang = self
340 .languages()
341 .iter()
342 .filter_map(|r| r.as_ref().ok())
343 .filter(|l| l.has_script() && l.script() == family.primary_script())
344 .reduce(|acc, e| {
345 if acc.population() > e.population() {
346 acc
347 } else {
348 e
349 }
350 });
351 if let Some(lang) = lang {
352 primary_language = Some(lang);
353 } else {
354 eprintln!(
355 "{} specifies a primary_script that matches no languages {}",
356 family.name(),
357 family.primary_script()
358 );
359 }
360 }
361 if primary_language.is_none() {
362 primary_language = self.language("en_Latn");
363 }
364 primary_language
365 .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
366 }
367}
368
369#[cfg(test)]
370mod tests {
371
372 use std::fs;
373
374 use super::*;
375
376 fn testdata_dir() -> std::path::PathBuf {
377 ["./resources/testdata", "../resources/testdata"]
382 .iter()
383 .map(std::path::PathBuf::from)
384 .find(|pb| pb.exists())
385 .unwrap()
386 }
387
388 fn testdata_file_content(relative_path: &str) -> String {
389 let mut p = testdata_dir();
390 p.push(relative_path);
391 fs::read_to_string(p).unwrap()
392 }
393
394 #[test]
395 fn roboto_exemplar() {
396 let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
397 let exemplar = exemplar(&roboto).unwrap();
398 assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
399 }
400
401 #[test]
402 fn wix_exemplar() {
403 let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
404 let exemplar = exemplar(&roboto).unwrap();
405 assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
406 }
407
408 #[test]
409 fn parse_roboto_metadata() {
410 read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
411 }
412
413 #[test]
414 fn parse_wix_metadata() {
415 read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
417 }
418
419 #[test]
420 fn parse_primary_lang_script_metadata() {
421 let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
422 assert_eq!(
423 ("Jpan", "Invalid"),
424 (family.primary_script(), family.primary_language())
425 );
426 }
427
428 #[test]
429 fn parse_tag3() {
430 Tag::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
431 }
432
433 #[test]
434 fn parse_tag4() {
435 Tag::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31").expect("To parse");
436 }
437
438 #[test]
439 fn parse_tag_quoted() {
440 Tag::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
441 .expect("To parse");
442 }
443
444 #[test]
445 fn parse_tag_quoted2() {
446 Tag::from_str("\"\",t,1").expect("To parse");
447 }
448}