google_fonts_subsets/
lib.rs

1//! Google Fonts Subsets
2//!
3//! This is the collection of nam files (codepoint subsets) that are used
4//! to subset fonts before serving on the Google Fonts CSS API.
5//!
6//! It exports the subsets as arrays of codepoints - for example,
7//! the `latin` subset is exported as `LATIN: [u32; ...] = [0x0, 0x0d, ...];`.
8//!
9//! It also exports the `SUBSETS` array, which is a list of all subsets in
10//! the form of `(&str, &[u32])` tuples.
11//! This is useful for iterating over all subsets.
12use std::collections::HashSet;
13
14include!(concat!(env!("OUT_DIR"), "/subsets.rs"));
15
16const CONTROL_CHARS: [u32; 4] = [0x0000, 0x000D, 0x0020, 0x00A0];
17
18pub fn subsets_in_font(
19    codepoints: &HashSet<u32>,
20    min_pct: f32,
21    ext_min_pct: Option<f32>,
22) -> Vec<&'static str> {
23    let active_codepoints = codepoints
24        .iter()
25        .filter(|cp| !CONTROL_CHARS.contains(cp))
26        .collect::<HashSet<_>>();
27    let mut subsets = vec![];
28    for (subset, subset_codepoints) in SUBSETS.iter() {
29        let mut subset_codepoints = subset_codepoints.iter().collect::<HashSet<_>>();
30        if subset == &"Khmer" {
31            // Remove LATIN
32            subset_codepoints.retain(|cp| !LATIN.contains(cp));
33        }
34        let target_pct = if subset.ends_with("-ext") {
35            ext_min_pct.unwrap_or(min_pct)
36        } else {
37            min_pct
38        };
39
40        let overlap = active_codepoints.intersection(&subset_codepoints).count() as f32;
41        if 100.0 * overlap / subset_codepoints.len() as f32 >= target_pct {
42            subsets.push(*subset);
43        }
44    }
45    subsets
46}