Skip to main content

google_fonts_subsets/
lib.rs

1//! Google Fonts Subsets
2//!
3//! This is the collection of nam files (codepoint subsets) that are used
4//! to subset fonts before serving on the Google Fonts CSS API.
5//!
6//! It exports the subsets as arrays of codepoints - for example,
7//! the `latin` subset is exported as `LATIN: [u32; ...] = [0x0, 0x0d, ...];`.
8//!
9//! It also exports the `SUBSETS` array, which is a list of all subsets in
10//! the form of `(&str, &[u32])` tuples.
11//! This is useful for iterating over all subsets.
12use std::collections::HashSet;
13
14include!(concat!(env!("OUT_DIR"), "/subsets.rs"));
15
16const CONTROL_CHARS: [u32; 4] = [0x0000, 0x000D, 0x0020, 0x00A0];
17
18/// Determines which subsets are present in a font based on the codepoints it contains.
19///
20/// # Arguments
21/// * `codepoints` - A set of codepoints present in the font.
22/// * `min_pct` - The minimum percentage of codepoints from a subset that must be present for it to be considered included.
23/// * `ext_min_pct` - An optional minimum percentage for subsets that end with "-ext
24pub fn subsets_in_font(
25    codepoints: &HashSet<u32>,
26    min_pct: f32,
27    ext_min_pct: Option<f32>,
28) -> Vec<&'static str> {
29    let active_codepoints = codepoints
30        .iter()
31        .filter(|cp| !CONTROL_CHARS.contains(cp))
32        .collect::<HashSet<_>>();
33    let mut subsets = vec![];
34    for (subset, subset_codepoints) in SUBSETS.iter() {
35        let mut subset_codepoints = subset_codepoints.iter().collect::<HashSet<_>>();
36        if subset == &"Khmer" {
37            // Remove LATIN
38            subset_codepoints.retain(|cp| !LATIN.contains(cp));
39        }
40        let target_pct = if subset.ends_with("-ext") {
41            ext_min_pct.unwrap_or(min_pct)
42        } else {
43            min_pct
44        };
45
46        let overlap = active_codepoints.intersection(&subset_codepoints).count() as f32;
47        if 100.0 * overlap / subset_codepoints.len() as f32 >= target_pct {
48            subsets.push(*subset);
49        }
50    }
51    subsets
52}
53
54/// Returns the subsets that a given codepoint belongs to.
55pub fn subsets_for_codepoint(codepoint: u32) -> impl Iterator<Item = &'static str> {
56    SUBSETS
57        .iter()
58        .filter_map(move |(subset, subset_codepoints)| {
59            if subset_codepoints.contains(&codepoint) {
60                Some(*subset)
61            } else {
62                None
63            }
64        })
65}