Skip to main content

typg_core/
query.rs

1/// Query construction and evaluation for font search.
2///
3/// A [`Query`] holds filter criteria (tag lists, name regexes, codepoint sets,
4/// weight/width ranges, family class, variable-only flag). Call `matches()` to
5/// test a [`TypgFontFaceMeta`] against the query.
6///
7/// Made by FontLab https://www.fontlab.com/
8use std::collections::{HashMap, HashSet};
9use std::ops::RangeInclusive;
10
11use anyhow::{anyhow, Result};
12use read_fonts::types::Tag;
13use regex::Regex;
14
15use crate::search::TypgFontFaceMeta;
16use crate::tags::tag4;
17
18/// Filter criteria for font search.
19///
20/// Built via chained `with_*` methods. An empty query matches all fonts.
21#[derive(Debug, Clone, Default)]
22pub struct Query {
23    /// Required variation axis tags.
24    axes: Vec<Tag>,
25    /// Required OpenType feature tags.
26    features: Vec<Tag>,
27    /// Required script tags.
28    scripts: Vec<Tag>,
29    /// Required table tags.
30    tables: Vec<Tag>,
31    /// Regex patterns that must match at least one name string.
32    name_patterns: Vec<Regex>,
33    /// Unicode codepoints the font must cover.
34    codepoints: Vec<char>,
35    /// If true, only match variable fonts.
36    variable_only: bool,
37    /// Required OS/2 weight class range.
38    weight_range: Option<RangeInclusive<u16>>,
39    /// Required OS/2 width class range.
40    width_range: Option<RangeInclusive<u16>>,
41    /// Required OS/2 family class filter.
42    family_class: Option<FamilyClassFilter>,
43    /// Regex patterns that must match creator-related name strings.
44    creator_patterns: Vec<Regex>,
45    /// Regex patterns that must match license-related name strings.
46    license_patterns: Vec<Regex>,
47}
48
49impl Query {
50    pub fn new() -> Self {
51        Self::default()
52    }
53
54    pub fn with_axes(mut self, axes: Vec<Tag>) -> Self {
55        self.axes = axes;
56        self
57    }
58
59    pub fn with_features(mut self, features: Vec<Tag>) -> Self {
60        self.features = features;
61        self
62    }
63
64    pub fn with_scripts(mut self, scripts: Vec<Tag>) -> Self {
65        self.scripts = scripts;
66        self
67    }
68
69    pub fn with_tables(mut self, tables: Vec<Tag>) -> Self {
70        self.tables = tables;
71        self
72    }
73
74    pub fn with_name_patterns(mut self, patterns: Vec<Regex>) -> Self {
75        self.name_patterns = patterns;
76        self
77    }
78
79    pub fn with_codepoints(mut self, cps: Vec<char>) -> Self {
80        self.codepoints = cps;
81        self
82    }
83
84    pub fn require_variable(mut self, yes: bool) -> Self {
85        self.variable_only = yes;
86        self
87    }
88
89    pub fn with_weight_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
90        self.weight_range = range;
91        self
92    }
93
94    pub fn with_width_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
95        self.width_range = range;
96        self
97    }
98
99    pub fn with_family_class(mut self, class: Option<FamilyClassFilter>) -> Self {
100        self.family_class = class;
101        self
102    }
103
104    pub fn with_creator_patterns(mut self, patterns: Vec<Regex>) -> Self {
105        self.creator_patterns = patterns;
106        self
107    }
108
109    pub fn with_license_patterns(mut self, patterns: Vec<Regex>) -> Self {
110        self.license_patterns = patterns;
111        self
112    }
113
114    // Accessor methods for use by the high-performance index module.
115
116    /// Get the required axis tags.
117    pub fn axes(&self) -> &[Tag] {
118        &self.axes
119    }
120
121    /// Get the required feature tags.
122    pub fn features(&self) -> &[Tag] {
123        &self.features
124    }
125
126    /// Get the required script tags.
127    pub fn scripts(&self) -> &[Tag] {
128        &self.scripts
129    }
130
131    /// Get the required table tags.
132    pub fn tables(&self) -> &[Tag] {
133        &self.tables
134    }
135
136    /// Get the name patterns.
137    pub fn name_patterns(&self) -> &[Regex] {
138        &self.name_patterns
139    }
140
141    /// Get the required codepoints.
142    pub fn codepoints(&self) -> &[char] {
143        &self.codepoints
144    }
145
146    /// Check if variable fonts are required.
147    pub fn requires_variable(&self) -> bool {
148        self.variable_only
149    }
150
151    /// Get the weight range filter.
152    pub fn weight_range(&self) -> Option<&RangeInclusive<u16>> {
153        self.weight_range.as_ref()
154    }
155
156    /// Get the width range filter.
157    pub fn width_range(&self) -> Option<&RangeInclusive<u16>> {
158        self.width_range.as_ref()
159    }
160
161    /// Get the family class filter.
162    pub fn family_class(&self) -> Option<&FamilyClassFilter> {
163        self.family_class.as_ref()
164    }
165
166    /// Get the creator patterns.
167    pub fn creator_patterns(&self) -> &[Regex] {
168        &self.creator_patterns
169    }
170
171    /// Get the license patterns.
172    pub fn license_patterns(&self) -> &[Regex] {
173        &self.license_patterns
174    }
175
176    /// Returns true if `meta` satisfies all criteria in this query.
177    pub fn matches(&self, meta: &TypgFontFaceMeta) -> bool {
178        if self.variable_only && !meta.is_variable {
179            return false;
180        }
181
182        if !contains_all_tags(&meta.axis_tags, &self.axes) {
183            return false;
184        }
185
186        if !contains_all_tags(&meta.feature_tags, &self.features) {
187            return false;
188        }
189
190        if !contains_all_tags(&meta.script_tags, &self.scripts) {
191            return false;
192        }
193
194        if !contains_all_tags(&meta.table_tags, &self.tables) {
195            return false;
196        }
197
198        if let Some(range) = &self.weight_range {
199            match meta.weight_class {
200                Some(weight) if range.contains(&weight) => {}
201                _ => return false,
202            }
203        }
204
205        if let Some(range) = &self.width_range {
206            match meta.width_class {
207                Some(width) if range.contains(&width) => {}
208                _ => return false,
209            }
210        }
211
212        if let Some(filter) = &self.family_class {
213            match meta.family_class {
214                Some((class, subclass)) => {
215                    if class != filter.major {
216                        return false;
217                    }
218                    if let Some(expected_subclass) = filter.subclass {
219                        if subclass != expected_subclass {
220                            return false;
221                        }
222                    }
223                }
224                None => return false,
225            }
226        }
227
228        if !self.codepoints.is_empty() {
229            let available: HashSet<char> = meta.codepoints.iter().copied().collect();
230            if !self.codepoints.iter().all(|cp| available.contains(cp)) {
231                return false;
232            }
233        }
234
235        if !self.name_patterns.is_empty() {
236            let matched = meta
237                .names
238                .iter()
239                .any(|name| self.name_patterns.iter().any(|re| re.is_match(name)));
240            if !matched {
241                return false;
242            }
243        }
244
245        if !self.creator_patterns.is_empty() {
246            let matched = meta
247                .creator_names
248                .iter()
249                .any(|name| self.creator_patterns.iter().any(|re| re.is_match(name)));
250            if !matched {
251                return false;
252            }
253        }
254
255        if !self.license_patterns.is_empty() {
256            let matched = meta
257                .license_names
258                .iter()
259                .any(|name| self.license_patterns.iter().any(|re| re.is_match(name)));
260            if !matched {
261                return false;
262            }
263        }
264
265        true
266    }
267}
268
269fn contains_all_tags(haystack: &[Tag], needles: &[Tag]) -> bool {
270    if needles.is_empty() {
271        return true;
272    }
273    let set: HashSet<Tag> = haystack.iter().copied().collect();
274    needles.iter().all(|tag| set.contains(tag))
275}
276
277/// Parse a comma-separated list of codepoints or ranges into a `Vec<char>`.
278///
279/// Accepts single characters ("A"), Unicode escapes ("U+0041"), and ranges
280/// ("A-Z", "U+0041-U+005A"), or any comma-separated combination thereof.
281pub fn parse_codepoint_list(input: &str) -> Result<Vec<char>> {
282    let mut result = Vec::new();
283    if input.trim().is_empty() {
284        return Ok(result);
285    }
286
287    for part in input.split(',') {
288        if part.contains('-') {
289            let pieces: Vec<&str> = part.split('-').collect();
290            if pieces.len() != 2 {
291                return Err(anyhow!("invalid range: {part}"));
292            }
293            let start = parse_codepoint(pieces[0])? as u32;
294            let end = parse_codepoint(pieces[1])? as u32;
295            let (lo, hi) = if start <= end {
296                (start, end)
297            } else {
298                (end, start)
299            };
300            for cp in lo..=hi {
301                if let Some(ch) = char::from_u32(cp) {
302                    result.push(ch);
303                }
304            }
305        } else {
306            result.push(parse_codepoint(part)?);
307        }
308    }
309
310    Ok(result)
311}
312
313fn parse_codepoint(token: &str) -> Result<char> {
314    if token.chars().count() == 1 {
315        return Ok(token.chars().next().unwrap());
316    }
317
318    let trimmed = token.trim_start_matches("U+").trim_start_matches("u+");
319    let cp = u32::from_str_radix(trimmed, 16).map_err(|_| anyhow!("invalid codepoint: {token}"))?;
320    char::from_u32(cp).ok_or_else(|| anyhow!("invalid Unicode scalar: U+{cp:04X}"))
321}
322
323/// Parse a slice of tag strings (e.g. `"wght"`, `"smcp"`) into `Tag` values.
324///
325/// Each string must be 1–4 printable ASCII characters.
326pub fn parse_tag_list(raw: &[String]) -> Result<Vec<Tag>> {
327    raw.iter().map(|s| tag4(s)).collect()
328}
329
330#[derive(Debug, Clone, Copy, PartialEq, Eq)]
331pub struct FamilyClassFilter {
332    pub major: u8,
333    pub subclass: Option<u8>,
334}
335
336/// Parse an OS/2 family class specifier into a [`FamilyClassFilter`].
337///
338/// Accepts numeric values ("8"), hex values ("0x0800"), major.subclass pairs
339/// ("8.11"), and named aliases ("sans", "serif", "script", etc.).
340pub fn parse_family_class(input: &str) -> Result<FamilyClassFilter> {
341    let trimmed = input.trim();
342    if trimmed.is_empty() {
343        return Err(anyhow!("family class cannot be empty"));
344    }
345
346    let lower = trimmed.to_ascii_lowercase();
347    if let Some(major) = lookup_family_class_by_name(&lower) {
348        return Ok(FamilyClassFilter {
349            major,
350            subclass: None,
351        });
352    }
353
354    if let Some((major, subclass)) = parse_major_and_subclass(&lower) {
355        return Ok(FamilyClassFilter {
356            major,
357            subclass: Some(subclass),
358        });
359    }
360
361    let value = if let Some(stripped) = lower.strip_prefix("0x") {
362        u16::from_str_radix(stripped, 16)
363            .map_err(|_| anyhow!("invalid hex family class: {trimmed}"))?
364    } else {
365        lower
366            .parse::<u16>()
367            .map_err(|_| anyhow!("invalid family class: {trimmed}"))?
368    };
369
370    if value <= 0x00FF {
371        return Ok(FamilyClassFilter {
372            major: value as u8,
373            subclass: None,
374        });
375    }
376
377    let major = (value >> 8) as u8;
378    let subclass = (value & 0x00FF) as u8;
379
380    Ok(FamilyClassFilter {
381        major,
382        subclass: Some(subclass),
383    })
384}
385
386fn lookup_family_class_by_name(name: &str) -> Option<u8> {
387    let mut map: HashMap<&str, u8> = HashMap::new();
388    map.insert("none", 0);
389    map.insert("no-class", 0);
390    map.insert("uncategorized", 0);
391    map.insert("oldstyle", 1);
392    map.insert("old-style", 1);
393    map.insert("oldstyle-serif", 1);
394    map.insert("transitional", 2);
395    map.insert("modern", 3);
396    map.insert("clarendon", 4);
397    map.insert("slab", 5);
398    map.insert("slab-serif", 5);
399    map.insert("egyptian", 5);
400    map.insert("freeform", 7);
401    map.insert("freeform-serif", 7);
402    map.insert("sans", 8);
403    map.insert("sans-serif", 8);
404    map.insert("gothic", 8);
405    map.insert("ornamental", 9);
406    map.insert("decorative", 9);
407    map.insert("script", 10);
408    map.insert("symbolic", 12);
409    map.get(name).copied()
410}
411
412fn parse_major_and_subclass(raw: &str) -> Option<(u8, u8)> {
413    for sep in ['.', ':'] {
414        if let Some((major, sub)) = raw.split_once(sep) {
415            let major: u8 = major.parse().ok()?;
416            let subclass: u8 = sub.parse().ok()?;
417            return Some((major, subclass));
418        }
419    }
420    None
421}
422
423/// Parse a single value or range of u16 numbers (e.g., "400" or "300-500").
424pub fn parse_u16_range(input: &str) -> Result<RangeInclusive<u16>> {
425    let trimmed = input.trim();
426    if trimmed.is_empty() {
427        return Err(anyhow!("range cannot be empty"));
428    }
429
430    if let Some((lo, hi)) = trimmed.split_once('-') {
431        let start: u16 = lo.trim().parse()?;
432        let end: u16 = hi.trim().parse()?;
433        let (min, max) = if start <= end {
434            (start, end)
435        } else {
436            (end, start)
437        };
438        Ok(min..=max)
439    } else {
440        let value: u16 = trimmed.parse()?;
441        Ok(value..=value)
442    }
443}