google_fonts_glyphsets/
lib.rs1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum GlyphsetError {
9 #[error("Glyphset not found")]
10 GlyphsetNotFound,
11 #[error("Bug in glyphset definitions")]
12 InternalInconsistency,
13}
14
15fn script_name(s: &str) -> String {
16 match s {
18 "Latn" => "Latin".to_string(),
19 "Arab" => "Arabic".to_string(),
20 "Cyrl" => "Cyrillic".to_string(),
21 _ => s.to_string(),
22 }
23}
24
25include!(concat!(env!("OUT_DIR"), "/data.rs"));
26
27#[derive(Default, Debug, Clone, Deserialize, PartialEq)]
28pub struct Glyphset {
30 #[serde(skip)]
31 name: String,
32 #[serde(default)]
33 description: String,
34 #[serde(default)]
35 include_glyphsets: Vec<String>,
36 #[serde(default)]
37 language_codes: Vec<String>,
38 #[serde(skip)]
39 codepoints: HashSet<u32>,
40 use_auxiliary: Option<bool>,
41 historical: Option<bool>,
42 #[serde(default)]
43 regions: Vec<String>,
44 #[serde(default)]
45 exclude_language_codes: Vec<String>,
46 population: Option<u32>,
47}
48
49impl Glyphset {
50 fn script(&self) -> String {
51 self.name
52 .split("_")
53 .skip(1)
54 .take(1)
55 .next()
56 .expect("Malformed glyphset name - no script")
57 .to_string()
58 }
59
60 pub fn iter_codepoints(&self) -> impl Iterator<Item = u32> {
63 let mut seen_glyphsets = HashSet::new();
69 let mut codepoints = HashSet::new();
70 let mut to_process = vec![self.name.clone()];
71 while let Some(glyphset_name) = to_process.pop() {
72 if seen_glyphsets.contains(&glyphset_name) {
73 continue;
74 }
75 seen_glyphsets.insert(glyphset_name.clone());
76 let glyphset = GLYPHSETS
77 .get(&glyphset_name)
78 .expect("Malformed glyphset name in include_glyphsets");
79 for cp in &glyphset.codepoints {
80 codepoints.insert(*cp);
81 }
82 }
83 codepoints.into_iter()
84 }
85}
86
87#[derive(Default, Debug, Clone)]
89pub struct Coverage {
90 pub has: HashSet<u32>,
92 pub missing: HashSet<u32>,
94 pub fraction: f32,
96}
97
98pub fn get_coverage<T>(codepoints: &T, glyphset: &str) -> Option<Coverage>
99where
100 for<'a> &'a T: IntoIterator<Item = &'a u32>,
101{
102 let codepoints_set: HashSet<u32> = codepoints.into_iter().copied().collect();
103 GLYPHSETS.get(glyphset).map(|gs| {
104 let our_codepoints = &gs.codepoints;
105 let mut coverage = Coverage::default();
106 let our_codepoints_set: HashSet<u32> = our_codepoints.iter().copied().collect();
107 coverage.has = our_codepoints_set
108 .intersection(&codepoints_set)
109 .copied()
110 .collect();
111 coverage.missing = our_codepoints_set
112 .difference(&codepoints_set)
113 .copied()
114 .collect();
115 if !coverage.has.is_empty() && coverage.missing.is_empty() {
117 coverage.fraction = 1.0;
118 } else if glyphset == "GF_Latin_Core" || glyphset == "GF_Latin_Kernel" {
119 coverage.fraction = coverage.has.len() as f32 / our_codepoints.len() as f32;
120 } else {
121 let our_codepoints_without_core = our_codepoints_set
122 .difference(&GF_LATIN_CORE_CODEPOINTS.into())
123 .copied()
124 .collect::<HashSet<_>>();
125 let our_codepoints_without_kernel = our_codepoints_set
126 .difference(&GF_LATIN_KERNEL_CODEPOINTS.into())
127 .copied()
128 .collect::<HashSet<_>>();
129 let unicodes_unique_in_glyphset =
130 if our_codepoints_without_core.is_superset(&our_codepoints_without_kernel) {
131 our_codepoints_without_core
132 } else {
133 our_codepoints_without_kernel
134 };
135 if unicodes_unique_in_glyphset.is_empty() {
136 coverage.fraction = 0.0;
137 } else {
138 let has = coverage
139 .has
140 .intersection(&unicodes_unique_in_glyphset)
141 .count();
142 coverage.fraction = has as f32 / unicodes_unique_in_glyphset.len() as f32;
143 }
144 }
145 coverage
146 })
147}
148
149pub fn get_glyphset_coverage<T>(codepoints: &T) -> HashMap<String, Coverage>
150where
151 for<'a> &'a T: IntoIterator<Item = &'a u32>,
152{
153 GLYPHSETS
154 .keys()
155 .map(|glyphset| {
156 let coverage = get_coverage(codepoints, glyphset).unwrap();
157 (glyphset.to_string(), coverage)
158 })
159 .collect()
160}
161
162pub fn languages_per_glyphset(gs: &str) -> Result<Vec<String>, GlyphsetError> {
163 let glyphset = GLYPHSETS.get(gs).ok_or(GlyphsetError::GlyphsetNotFound)?;
164 let mut codes = glyphset.language_codes.clone();
165 for include in &glyphset.include_glyphsets {
166 let include_glyphset = GLYPHSETS
167 .get(include)
168 .ok_or(GlyphsetError::InternalInconsistency)?;
169 codes.extend(include_glyphset.language_codes.clone());
170 }
171 if !glyphset.regions.is_empty() {
172 for language in google_fonts_languages::LANGUAGES.values() {
173 if glyphset
174 .exclude_language_codes
175 .contains(&language.id().to_string())
176 {
177 continue;
178 }
179 if language.historical() && !glyphset.historical.unwrap_or(false) {
180 continue;
181 }
182 if glyphset.population.unwrap_or(0) as i32 > language.population() {
184 continue;
185 }
186 if glyphset.script() == script_name(language.script()) {
187 let language_region_set: HashSet<_> = language.region.iter().collect();
188 if glyphset
189 .regions
190 .iter()
191 .any(|region| language_region_set.contains(region))
192 {
193 codes.push(language.id().to_string());
194 }
195 }
196 }
197 }
198 Ok(codes)
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204
205 #[test]
206 fn test_name() {
207 assert!(languages_per_glyphset("GF_Arabic_Plus").unwrap().len() >= 8);
208 assert!(languages_per_glyphset("GF_Latin_African").unwrap().len() >= 617);
209 }
210
211 #[test]
212 fn test_codepoints() {
213 let kernel_cps = GLYPHSETS
214 .get("GF_Latin_Kernel")
215 .unwrap()
216 .iter_codepoints()
217 .collect::<HashSet<_>>();
218 assert!(kernel_cps.contains(&0x0041)); assert!(!kernel_cps.contains(&0x00E9)); let arabic_core_cps = GLYPHSETS
222 .get("GF_Arabic_Core")
223 .unwrap()
224 .iter_codepoints()
225 .collect::<HashSet<_>>();
226 assert!(arabic_core_cps.is_superset(&kernel_cps));
227 assert!(arabic_core_cps.contains(&0x0627)); let arabic_plus_cps = GLYPHSETS
230 .get("GF_Arabic_Plus")
231 .unwrap()
232 .iter_codepoints()
233 .collect::<HashSet<_>>();
234 assert!(arabic_plus_cps.is_superset(&arabic_core_cps));
235 assert!(arabic_plus_cps.is_superset(&kernel_cps));
236 assert!(arabic_plus_cps.contains(&0x06B3)); }
238}