1mod fonts_public;
2mod languages_public;
3
4use std::{
5 cell::OnceCell,
6 collections::HashMap,
7 fs::{self, File},
8 io::{BufRead, BufReader, Error, ErrorKind},
9 path::{Path, PathBuf},
10 str::FromStr,
11};
12
13pub use fonts_public::*;
14pub use languages_public::{
15 ExemplarCharsProto, LanguageProto, RegionProto, SampleTextProto, ScriptProto,
16};
17use protobuf::text_format::ParseError;
18use regex::Regex;
19use walkdir::WalkDir;
20
21pub fn read_family(s: &str) -> Result<FamilyProto, ParseError> {
22 if s.contains("position") {
23 let re = Regex::new(r"(?m)position\s+\{[^}]*\}").expect("Valid re");
24 let s = re.replace_all(s, "");
25 protobuf::text_format::parse_from_str(&s)
26 } else {
27 protobuf::text_format::parse_from_str(s)
28 }
29}
30
31pub fn read_language(s: &str) -> Result<LanguageProto, ParseError> {
32 protobuf::text_format::parse_from_str(s)
33}
34
35fn exemplar_score(font: &FontProto, preferred_style: FontStyle, preferred_weight: i32) -> i32 {
36 let mut score = 0;
37 if font.style() == preferred_style.style() {
39 score += 16;
40 }
41
42 score -= (font.weight() - preferred_weight).abs() / 100;
44
45 if font.weight() > preferred_weight {
47 score += 1;
48 }
49
50 if font.filename().contains("].") {
52 score += 2;
53 }
54
55 score
56}
57
58pub fn exemplar(family: &FamilyProto) -> Option<&FontProto> {
59 fn score(font: &FontProto) -> i32 {
60 exemplar_score(font, FontStyle::Normal, 400)
61 }
62 family
63 .fonts
64 .iter()
65 .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
66}
67
68#[derive(Copy, Clone, Debug, PartialEq)]
69pub enum FontStyle {
70 Normal,
71 Italic,
72}
73
74impl FontStyle {
75 fn style(&self) -> &str {
76 match self {
77 FontStyle::Normal => "normal",
78 FontStyle::Italic => "italic",
79 }
80 }
81}
82
83pub fn select_font(
84 family: &FamilyProto,
85 preferred_style: FontStyle,
86 preferred_weight: i32,
87) -> Option<&FontProto> {
88 let score =
89 |font: &FontProto| -> i32 { exemplar_score(font, preferred_style, preferred_weight) };
90 family
91 .fonts
92 .iter()
93 .reduce(|acc, e| if score(acc) >= score(e) { acc } else { e })
94}
95
96fn iter_families(
97 root: &Path,
98 filter: Option<&Regex>,
99) -> impl Iterator<Item = (PathBuf, Result<FamilyProto, ParseError>)> {
100 WalkDir::new(root)
101 .into_iter()
102 .filter_map(|d| d.ok())
103 .filter(|d| d.file_name() == "METADATA.pb")
104 .filter(move |d| {
105 filter
106 .map(|r| r.find(&d.path().to_string_lossy()).is_some())
107 .unwrap_or(true)
108 })
109 .map(|d| {
110 (
111 d.path().to_path_buf(),
112 read_family(&fs::read_to_string(d.path()).expect("To read files!")),
113 )
114 })
115}
116
117pub fn iter_languages(root: &Path) -> impl Iterator<Item = Result<LanguageProto, ParseError>> {
118 WalkDir::new(root)
119 .into_iter()
120 .filter_map(|d| d.ok())
121 .filter(|d| {
122 d.path()
123 .canonicalize()
124 .unwrap()
125 .to_str()
126 .unwrap()
127 .contains("gflanguages/data/languages")
128 && d.file_name().to_string_lossy().ends_with(".textproto")
129 })
130 .map(|d| read_language(&fs::read_to_string(d.path()).expect("To read files!")))
131}
132
133pub fn read_tags(root: &Path) -> Result<Vec<Tag>, Error> {
134 let mut tag_dir = root.to_path_buf();
135 tag_dir.push("tags/all");
136 let mut tags = Vec::new();
137 for entry in fs::read_dir(&tag_dir).expect("To read tag dir") {
138 let entry = entry.expect("To access tag dir entries");
139 if entry
140 .path()
141 .extension()
142 .expect("To have extensions")
143 .to_str()
144 .expect("utf-8")
145 != "csv"
146 {
147 continue;
148 }
149 let fd = File::open(&entry.path())?;
150 let rdr = BufReader::new(fd);
151 tags.extend(
152 rdr.lines()
153 .map(|s| s.expect("Valid tag lines"))
154 .map(|s| Tag::from_str(&s).expect("Valid tag lines")),
155 );
156 }
157 Ok(tags)
158}
159
160pub fn read_tag_metadata(root: &Path) -> Result<Vec<TagMetadata>, Error> {
161 let mut tag_metadata_file = root.to_path_buf();
162 tag_metadata_file.push("tags/tags_metadata.csv");
163 let mut metadata = Vec::new();
164
165 let fd = File::open(&tag_metadata_file)?;
166 let rdr = BufReader::new(fd);
167 metadata.extend(
168 rdr.lines()
169 .map(|s| s.expect("Valid tag lines"))
170 .map(|s| TagMetadata::from_str(&s).expect("Valid tag metadata lines")),
171 );
172
173 Ok(metadata)
174}
175
176fn csv_values(s: &str) -> Vec<&str> {
177 let mut s = s;
178 let mut values = Vec::new();
179 while !s.is_empty() {
180 s = s.trim();
181 let mut end_idx = None;
182 if s.starts_with('"') {
183 end_idx = Some(*(&s[1..].find('"').expect("Close quote")));
184 }
185 end_idx = s[end_idx.unwrap_or_default()..]
186 .find(',')
187 .map(|v| v + end_idx.unwrap_or_default());
188 if let Some(end_idx) = end_idx {
189 let (value, rest) = s.split_at(end_idx);
190 values.push(value.trim());
191 s = &rest[1..];
192 } else {
193 values.push(s);
194 s = "";
195 }
196 }
197 values
198}
199
200#[derive(Clone, Debug)]
201pub struct Tag {
202 pub family: String,
203 pub loc: String,
204 pub tag: String,
205 pub value: f32,
206}
207
208impl FromStr for Tag {
209 type Err = Error;
210
211 fn from_str(s: &str) -> Result<Self, Self::Err> {
212 let values = csv_values(s);
213 let (family, loc, tag, value) = match values[..] {
214 [family, tag, value] => (family, "", tag, value),
215 [family, loc, tag, value] => (family, loc, tag, value),
216 _ => return Err(Error::new(ErrorKind::InvalidData, "Unparseable tag")),
217 };
218 Ok(Tag {
219 family: family.to_string(),
220 loc: loc.to_string(),
221 tag: tag.to_string(),
222 value: f32::from_str(value)
223 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid tag value"))?,
224 })
225 }
226}
227
228#[derive(Clone, Debug)]
229pub struct TagMetadata {
230 pub tag: String,
231 pub min_value: f32,
232 pub max_value: f32,
233 pub prompt_name: String,
234}
235
236impl FromStr for TagMetadata {
237 type Err = Error;
238
239 fn from_str(s: &str) -> Result<Self, Self::Err> {
240 let values = csv_values(s);
241 let [tag, min, max, prompt_name] = values[..] else {
242 return Err(Error::new(
243 ErrorKind::InvalidData,
244 "Unparseable tag metadata, wrong number of values",
245 ));
246 };
247 Ok(TagMetadata {
248 tag: tag.into(),
249 min_value: f32::from_str(min)
250 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
251 max_value: f32::from_str(max)
252 .map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid min value"))?,
253 prompt_name: prompt_name.into(),
254 })
255 }
256}
257
258pub struct GoogleFonts {
259 repo_dir: PathBuf,
260 family_filter: Option<Regex>,
261 families: OnceCell<Vec<(PathBuf, Result<FamilyProto, ParseError>)>>,
262 languages: OnceCell<Vec<Result<LanguageProto, ParseError>>>,
263 family_by_font_file: OnceCell<HashMap<String, usize>>,
264 tags: OnceCell<Result<Vec<Tag>, Error>>,
265 tag_metadata: OnceCell<Result<Vec<TagMetadata>, Error>>,
266}
267
268impl GoogleFonts {
269 pub fn new(p: PathBuf, family_filter: Option<Regex>) -> Self {
270 Self {
271 repo_dir: p,
272 family_filter,
273 families: OnceCell::new(),
274 languages: OnceCell::new(),
275 family_by_font_file: OnceCell::new(),
276 tags: OnceCell::new(),
277 tag_metadata: OnceCell::new(),
278 }
279 }
280
281 pub fn tags(&self) -> Result<&[Tag], &Error> {
282 self.tags
283 .get_or_init(|| read_tags(&self.repo_dir))
284 .as_ref()
285 .map(|tags| tags.as_slice())
286 }
287
288 pub fn tag_metadata(&self) -> Result<&[TagMetadata], &Error> {
289 self.tag_metadata
290 .get_or_init(|| read_tag_metadata(&self.repo_dir))
291 .as_ref()
292 .map(|metadata| metadata.as_slice())
293 }
294
295 pub fn families(&self) -> &[(PathBuf, Result<FamilyProto, ParseError>)] {
296 self.families
297 .get_or_init(|| iter_families(&self.repo_dir, self.family_filter.as_ref()).collect())
298 .as_slice()
299 }
300
301 pub fn languages(&self) -> &[Result<LanguageProto, ParseError>] {
302 self.languages
303 .get_or_init(|| iter_languages(&self.repo_dir).collect())
304 .as_slice()
305 }
306
307 pub fn language(&self, lang_id: &str) -> Option<&LanguageProto> {
308 self.languages()
309 .iter()
310 .filter_map(|l| l.as_ref().ok())
311 .find(|l| l.id() == lang_id)
312 }
313
314 fn family_by_font_file(&self) -> &HashMap<String, usize> {
315 self.family_by_font_file.get_or_init(|| {
316 self.families()
317 .iter()
318 .enumerate()
319 .filter(|(_, (_, f))| f.is_ok())
320 .flat_map(|(i, (_, f))| {
321 f.as_ref()
322 .unwrap()
323 .fonts
324 .iter()
325 .map(move |f| (f.filename().to_string(), i))
326 })
327 .collect()
328 })
329 }
330
331 pub fn family(&self, font: &FontProto) -> Option<(&Path, &FamilyProto)> {
332 self.family_by_font_file()
333 .get(font.filename())
334 .copied()
335 .map(|i| {
336 let (p, f) = &self.families()[i];
337 (p.as_path(), f.as_ref().unwrap())
338 })
339 }
340
341 pub fn find_font_binary(&self, font: &FontProto) -> Option<PathBuf> {
342 let Some((family_path, _)) = self.family(font) else {
343 return None;
344 };
345 let mut font_file = family_path.parent().unwrap().to_path_buf();
346 font_file.push(font.filename());
347 if !font_file.exists() {
348 eprintln!("No such file as {font_file:?}");
349 }
350 font_file.exists().then_some(font_file)
351 }
352
353 pub fn primary_language(&self, family: &FamilyProto) -> &LanguageProto {
357 let mut primary_language: Option<&LanguageProto> = None;
359 if primary_language.is_none() && family.has_primary_language() {
360 if let Some(lang) = self.language(family.primary_language()) {
361 primary_language = Some(lang);
362 } else {
363 eprintln!(
364 "{} specifies invalid primary_language {}",
365 family.name(),
366 family.primary_language()
367 );
368 }
369 }
370 if primary_language.is_none() && family.has_primary_script() {
371 let lang = self
373 .languages()
374 .iter()
375 .filter_map(|r| r.as_ref().ok())
376 .filter(|l| l.has_script() && l.script() == family.primary_script())
377 .reduce(|acc, e| {
378 if acc.population() > e.population() {
379 acc
380 } else {
381 e
382 }
383 });
384 if let Some(lang) = lang {
385 primary_language = Some(lang);
386 } else {
387 eprintln!(
388 "{} specifies a primary_script that matches no languages {}",
389 family.name(),
390 family.primary_script()
391 );
392 }
393 }
394 if primary_language.is_none() {
395 primary_language = self.language("en_Latn");
396 }
397 primary_language
398 .unwrap_or_else(|| panic!("Not even our final fallback worked for {}", family.name()))
399 }
400}
401
402#[cfg(test)]
403mod tests {
404
405 use std::fs;
406
407 use super::*;
408
409 fn testdata_dir() -> std::path::PathBuf {
410 ["./resources/testdata", "../resources/testdata"]
415 .iter()
416 .map(std::path::PathBuf::from)
417 .find(|pb| pb.exists())
418 .unwrap()
419 }
420
421 fn testdata_file_content(relative_path: &str) -> String {
422 let mut p = testdata_dir();
423 p.push(relative_path);
424 fs::read_to_string(p).unwrap()
425 }
426
427 #[test]
428 fn roboto_exemplar() {
429 let roboto = read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
430 let exemplar = exemplar(&roboto).unwrap();
431 assert_eq!("Roboto[wdth,wght].ttf", exemplar.filename());
432 }
433
434 #[test]
435 fn wix_exemplar() {
436 let roboto = read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
437 let exemplar = exemplar(&roboto).unwrap();
438 assert_eq!("WixMadeforText[wght].ttf", exemplar.filename());
439 }
440
441 #[test]
442 fn parse_roboto_metadata() {
443 read_family(&testdata_file_content("roboto-metadata.pb")).unwrap();
444 }
445
446 #[test]
447 fn parse_wix_metadata() {
448 read_family(&testdata_file_content("wixmadefortext-metadata.pb")).unwrap();
450 }
451
452 #[test]
453 fn parse_primary_lang_script_metadata() {
454 let family = read_family(&testdata_file_content("kosugimaru-metadata.pb")).unwrap();
455 assert_eq!(
456 ("Jpan", "Invalid"),
457 (family.primary_script(), family.primary_language())
458 );
459 }
460
461 #[test]
462 fn parse_tag3() {
463 Tag::from_str("Roboto Slab, /quant/stroke_width_min, 26.31").expect("To parse");
464 }
465
466 #[test]
467 fn parse_tag4() {
468 Tag::from_str("Roboto Slab, wght@100, /quant/stroke_width_min, 26.31").expect("To parse");
469 }
470
471 #[test]
472 fn parse_tag_quoted() {
473 Tag::from_str("Georama, \"ital,wght@1,100\", /quant/stroke_width_min, 16.97")
474 .expect("To parse");
475 }
476
477 #[test]
478 fn parse_tag_quoted2() {
479 Tag::from_str("\"\",t,1").expect("To parse");
480 }
481}