google_fonts_sources/
lib.rs

1//! Finding sources for Google Fonts fonts
2//!
3//! # basic usage:
4//!
5//! ```no_run
6//! # use std::path::Path;
7//! use google_fonts_sources as gfsources;
8//! // get a list of repositories:
9//!
10//! let repo_cache = Path::new("~/where_i_want_to_checkout_fonts");
11//! let font_repos = gfsources::discover_sources(repo_cache).unwrap();
12//!
13//! // for each repo we find, do something with each source:
14//!
15//! for repo in &font_repos.sources {
16//!     let sources = match repo.get_sources(repo_cache) {
17//!         Ok(sources) => sources,
18//!         Err(e) => {
19//!             eprintln!("skipping repo '{}': '{e}'", repo.repo_name());
20//!             continue;
21//!         }
22//!     };
23//!
24//!     println!("repo '{}' contains sources {sources:?}", repo.repo_name());
25//! }
26//! ```
27
28use std::{
29    collections::{BTreeMap, BTreeSet, HashMap, HashSet},
30    path::{Path, PathBuf},
31};
32
33use serde::{de, Deserialize, Serialize};
34
35mod args;
36mod config;
37mod error;
38mod font_source;
39mod metadata;
40
41pub use args::Args;
42pub use config::Config;
43use error::UnwrapOrDie;
44pub use error::{BadConfig, Error, GitFail, LoadRepoError};
45pub use font_source::FontSource;
46use metadata::Metadata;
47
48static GF_REPO_URL: &str = "https://github.com/google/fonts";
49static METADATA_FILE: &str = "METADATA.pb";
50static EXTERNAL_CONFIG_FILE: &str = "config.yaml";
51// github.com/google/fonts
52static GOOGLE_FONTS_REPO: &str = "google/fonts";
53
54const CURRENT_VERSION: Version = Version { major: 1, minor: 1 };
55
56/// A (major, minor) version number.
57#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
58pub struct Version {
59    pub major: u16,
60    pub minor: u16,
61}
62
63/// A versioned file format representing a set of font sources
64#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65#[non_exhaustive]
66pub struct SourceSet {
67    /// The (major, minor) vesion. Serializes as a string.
68    version: Version,
69    /// Sha of the google/fonts repository
70    #[serde(default = "sha_of_google_slash_fonts_repo_as_of_writing")]
71    fonts_repo_sha: String,
72    /// The list of discovered sources.
73    pub sources: Vec<FontSource>,
74}
75
76// if loading older source sets, we supply a default sha (HEAD as of this commit)
77fn sha_of_google_slash_fonts_repo_as_of_writing() -> String {
78    "072f204fd0fce2f1cd6551a0c280684ed73b49f9".into()
79}
80
81impl SourceSet {
82    /// Update the google/fonts repo to the correct commit for these sources.
83    ///
84    /// Will clone the repo if it does not exist in the cache dir. Returns an
85    /// error if a git operation fails.
86    pub fn update_fonts_repo(&self, git_cache_dir: &Path) -> Result<(), Error> {
87        let repo_path = git_cache_dir.join(GOOGLE_FONTS_REPO);
88        if !repo_path.exists() {
89            std::fs::create_dir_all(&repo_path)?;
90            clone_repo(GF_REPO_URL, &repo_path)?;
91        }
92        checkout_rev(&repo_path, &self.fonts_repo_sha)?;
93        Ok(())
94    }
95}
96
97/// entry point for the cli tool
98#[doc(hidden)] // only intended to be used from our binary
99pub fn run(args: &Args) {
100    let repos = discover_sources(&args.fonts_dir).unwrap_or_die(|e| eprintln!("{e}"));
101    let output = if args.list {
102        let urls = repos
103            .sources
104            .iter()
105            .map(|r| r.repo_url.as_str())
106            .collect::<Vec<_>>();
107        urls.join("\n")
108    } else {
109        serde_json::to_string_pretty(&repos)
110            .unwrap_or_die(|e| eprintln!("failed to serialize repo info: '{e}'"))
111    };
112
113    if let Some(out) = args.out.as_ref() {
114        std::fs::write(out, output).unwrap_or_die(|e| eprintln!("failed to write output: '{e}'"));
115    } else {
116        println!("{output}")
117    }
118}
119
120/// Discover repositories containing font source files.
121///
122/// Returns a vec of `FontSource` structs describing repositories containing
123/// known font sources.
124///
125/// This looks at every font in the [google/fonts] github repo, looks to see if
126/// we have a known upstream repository for that font, and then looks to see if
127/// that repo contains a config.yaml file.
128///
129/// The 'git_cache_dir' is the path to a directory where repositories will be
130/// checked out, if necessary. Because we check out lots of repos (and it is
131/// likely that the caller will want to check these out again later) it makes
132/// sense to cache these in most cases.
133///
134/// [google/fonts]: https://github.com/google/fonts
135pub fn discover_sources(git_cache_dir: &Path) -> Result<SourceSet, Error> {
136    let google_slash_fonts = git_cache_dir.join(GOOGLE_FONTS_REPO);
137    let fonts_repo_sha = update_google_fonts_checkout(&google_slash_fonts)?;
138    let candidates = find_metadata_files(&google_slash_fonts);
139    log::info!("found {} metadata files", candidates.len());
140    let sources: BTreeSet<_> = candidates
141        .into_iter()
142        .filter_map(|(meta, path)| {
143            let external_config_path = path.with_file_name(EXTERNAL_CONFIG_FILE);
144            let external_config = external_config_path
145                .exists()
146                .then(|| external_config_path.strip_prefix(git_cache_dir).unwrap());
147
148            let src = match external_config {
149                Some(config) => FontSource::with_external_config(meta.clone(), config),
150                None => FontSource::try_from(meta.clone()),
151            };
152            match src {
153                Ok(item) => Some(item),
154                Err(e) => {
155                    log::warn!("bad metadata for '{}': {e}", meta.name);
156                    None
157                }
158            }
159        })
160        .collect();
161
162    log::info!(
163        "found {} fonts with repo/commit/config fields",
164        sources.len()
165    );
166    let sources = sources.into_iter().collect();
167    let sources = mark_rev_conflicts(sources);
168    Ok(SourceSet {
169        version: CURRENT_VERSION,
170        sources,
171        fonts_repo_sha,
172    })
173}
174
175fn mark_rev_conflicts(mut sources: Vec<FontSource>) -> Vec<FontSource> {
176    let mut revs = HashMap::new();
177
178    for source in &sources {
179        *revs
180            .entry(source.repo_url.clone())
181            .or_insert(BTreeMap::new())
182            .entry(source.git_rev().to_owned())
183            .or_insert(0u32) += 1;
184    }
185
186    revs.retain(|_k, v| v.len() > 1);
187    // in some cases several sources will share the same rev, while another
188    // source has a specific rev; so we want the most common rev to be the 'default'.
189    // In the case of ties, we choose the (lexicographic) `max` rev. (This is
190    // arbitrary, but deterministic.)
191    let has_conflict = revs
192        .iter()
193        .flat_map(|(repo, v)| {
194            let most_common = v.iter().max_by_key(|(rev, v)| (**v, *rev)).unwrap().0;
195            v.keys()
196                // only mark repos that don't use the most common rev
197                .filter_map(move |rev| {
198                    (rev != most_common).then_some((repo.as_str(), rev.as_str()))
199                })
200        })
201        .collect::<HashSet<_>>();
202
203    // finally mark the repos we consider a conflict
204    for source in &mut sources {
205        if has_conflict.contains(&(source.repo_url.as_str(), source.git_rev())) {
206            source.has_rev_conflict = true;
207        }
208    }
209    sources
210}
211
212fn update_google_fonts_checkout(path: &Path) -> Result<String, Error> {
213    if !path.exists() {
214        log::info!("cloning {GF_REPO_URL} to {}", path.display());
215        std::fs::create_dir_all(path)?;
216        clone_repo(GF_REPO_URL, path)?;
217    } else {
218        log::info!("fetching latest from {GF_REPO_URL}");
219        fetch_latest(path)?;
220    }
221    get_git_rev(path).map_err(Into::into)
222}
223
224fn find_metadata_files(path: &Path) -> BTreeSet<(Metadata, PathBuf)> {
225    let licenses = ["ofl", "ufl", "apache"];
226    let mut result = BTreeSet::new();
227    for license in licenses {
228        let license_dir = path.join(license);
229        log::debug!("searching for candidates in {}", license_dir.display());
230        for font_dir in iter_license_subdirectories(&license_dir) {
231            let metadata_path = font_dir.join(METADATA_FILE);
232            let metadata = match Metadata::load(&metadata_path) {
233                Ok(metadata) => (metadata, metadata_path),
234                Err(e) => {
235                    log::debug!("no metadata for font {}: '{}'", font_dir.display(), e);
236                    continue;
237                }
238            };
239            result.insert(metadata);
240        }
241    }
242    result
243}
244
245/// Get the short sha of the current commit in the provided repository.
246///
247/// If no repo provided, run in current directory
248///
249/// returns `None` if the `git` command fails (for instance if the path is not
250/// a git repository)
251fn get_git_rev(repo_path: &Path) -> Result<String, GitFail> {
252    let mut cmd = std::process::Command::new("git");
253    cmd.args(["rev-parse", "HEAD"]).current_dir(repo_path);
254    let output = cmd.output()?;
255
256    if !output.status.success() {
257        let stderr = String::from_utf8_lossy(&output.stderr);
258        return Err(GitFail::GitError {
259            path: repo_path.to_owned(),
260            stderr: stderr.into_owned(),
261        });
262    }
263
264    Ok(std::str::from_utf8(&output.stdout)
265        .expect("rev is always ascii/hex string")
266        .trim()
267        .to_owned())
268}
269
270// try to checkout this rev.
271//
272// returns `true` if successful, `false` otherwise (indicating a git error)
273fn checkout_rev(repo_dir: &Path, rev: &str) -> Result<bool, GitFail> {
274    let sha = get_git_rev(repo_dir)?;
275    // the longer str is on the left, so we check if shorter str is a prefix
276    let (left, right) = if sha.len() > rev.len() {
277        (sha.as_str(), rev)
278    } else {
279        (rev, sha.as_str())
280    };
281    if left.starts_with(right) {
282        return Ok(true);
283    }
284    log::info!(
285        "repo {} needs fetch for {rev} (at {sha})",
286        repo_dir.display()
287    );
288    // checkouts might be shallow, so unshallow before looking for a rev:
289    let _ = std::process::Command::new("git")
290        .current_dir(repo_dir)
291        .args(["fetch", "--unshallow"])
292        .output();
293
294    // but if they're _not_ shallow, we need normal fetch :/
295    let _ = std::process::Command::new("git")
296        .current_dir(repo_dir)
297        .args(["fetch"])
298        .output();
299
300    let result = std::process::Command::new("git")
301        .current_dir(repo_dir)
302        .arg("checkout")
303        .arg(rev)
304        .output()?;
305
306    if result.status.success() {
307        Ok(true)
308    } else {
309        log::warn!("failed to find rev {rev} for {}", repo_dir.display());
310        Ok(false)
311    }
312}
313
314fn iter_license_subdirectories(path: &Path) -> impl Iterator<Item = PathBuf> {
315    let path_str = path.display();
316    let contents = std::fs::read_dir(path)
317        .unwrap_or_die(|e| eprintln!("failed to read '{path_str}' directory: '{e}'"));
318    contents.filter_map(|entry| entry.ok().map(|d| d.path()).filter(|p| p.is_dir()))
319}
320
321fn clone_repo(url: &str, to_dir: &Path) -> Result<(), GitFail> {
322    assert!(to_dir.exists());
323    let output = std::process::Command::new("git")
324        // if a repo requires credentials fail instead of waiting
325        .env("GIT_TERMINAL_PROMPT", "0")
326        .arg("clone")
327        .args(["--depth", "1"])
328        .arg(url)
329        .arg(to_dir)
330        .output()?;
331
332    if !output.status.success() {
333        let stderr = String::from_utf8_lossy(&output.stderr);
334        return Err(GitFail::GitError {
335            path: to_dir.to_owned(),
336            stderr: stderr.into_owned(),
337        });
338    }
339    Ok(())
340}
341
342/// On success returns whether there were any changes
343fn fetch_latest(path: &Path) -> Result<(), GitFail> {
344    let mut output = std::process::Command::new("git")
345        // if a repo requires credentials fail instead of waiting
346        .env("GIT_TERMINAL_PROMPT", "0")
347        .arg("fetch")
348        .current_dir(path)
349        .output()?;
350    if output.status.success() {
351        output = std::process::Command::new("git")
352            .arg("checkout")
353            .arg("origin/HEAD")
354            .current_dir(path)
355            .output()?;
356    }
357    if !output.status.success() {
358        let stderr = String::from_utf8_lossy(&output.stderr);
359        return Err(GitFail::GitError {
360            path: path.to_owned(),
361            stderr: stderr.into_owned(),
362        });
363    }
364    Ok(())
365}
366
367impl Serialize for Version {
368    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
369    where
370        S: serde::Serializer,
371    {
372        format!("{}.{}", self.major, self.minor).serialize(serializer)
373    }
374}
375
376// we currently only have one version, so let's keep this simple, we'll need
377// to figure out a better approach if we add more stuff in the future.
378impl<'de> Deserialize<'de> for Version {
379    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
380    where
381        D: serde::Deserializer<'de>,
382    {
383        let raw: &str = Deserialize::deserialize(deserializer)?;
384        let (major, minor) = raw
385            .split_once('.')
386            .ok_or(de::Error::custom("invalid version"))?;
387        let major = major.parse();
388        let minor = minor.parse();
389        match (major, minor) {
390            (Ok(major), Ok(minor)) if major != 1 => Err(de::Error::custom(format!(
391                "unsupported version {major}.{minor}"
392            ))),
393            (Ok(major), Ok(minor)) => Ok(Version { major, minor }),
394            _ => Err(de::Error::custom("invalid version")),
395        }
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn mark_conflicts() {
405        let items_and_expected_conflict = vec![
406            (FontSource::for_test("hello", "abc", "config.yaml"), false),
407            (FontSource::for_test("hi", "abc", "config_one.yaml"), false),
408            (FontSource::for_test("hi", "def", "config_two.yaml"), true),
409            (
410                FontSource::for_test("hi", "abc", "config_three.yaml"),
411                false,
412            ),
413            (FontSource::for_test("oopsy", "123", "config.yaml"), true),
414            (
415                FontSource::for_test("oopsy", "456", "config_hi.yaml"),
416                false,
417            ),
418        ];
419
420        let (items, expected): (Vec<_>, Vec<_>) =
421            items_and_expected_conflict.iter().cloned().unzip();
422
423        let items = mark_rev_conflicts(items);
424        assert_eq!(
425            items
426                .iter()
427                .map(|item| item.has_rev_conflict)
428                .collect::<Vec<_>>(),
429            expected
430        );
431    }
432
433    #[test]
434    fn roundtrip() {
435        let thingie = SourceSet {
436            version: Version { major: 1, minor: 0 },
437            sources: vec![FontSource::for_test("hi", "abc", "config.yaml")],
438            fonts_repo_sha: "abcdefg".into(),
439        };
440
441        let serd = serde_json::to_string(&thingie).unwrap();
442        let de: SourceSet = serde_json::from_str(&serd).unwrap();
443
444        assert_eq!(thingie, de);
445    }
446
447    #[test]
448    #[should_panic(expected = "unsupported version")]
449    fn deny_unknown_version() {
450        let bad_thingie = SourceSet {
451            version: Version { major: 2, minor: 0 },
452            sources: Vec::new(),
453            fonts_repo_sha: "ohno".into(),
454        };
455
456        let serd = serde_json::to_string(&bad_thingie).unwrap();
457        let _de: SourceSet = serde_json::from_str(&serd).unwrap();
458    }
459}