Skip to main content

typg_core/
query.rs

1//! Query construction and evaluation for typg.
2//!
3//! A [`Query`] describes the font you want. An empty query matches every font.
4//! When multiple filters are set, they combine with AND logic: a match must
5//! satisfy every active criterion.
6//!
7//! The same query model is reused for live scans, cached searches, and indexed
8//! searches, so filter behavior stays consistent across the CLI, HTTP server,
9//! and Python bindings.
10//!
11//! Made by FontLab <https://www.fontlab.com/>
12use std::collections::{HashMap, HashSet};
13use std::ops::RangeInclusive;
14
15use anyhow::{anyhow, Result};
16use read_fonts::types::Tag;
17use regex::Regex;
18
19use crate::search::TypgFontFaceMeta;
20use crate::tags::tag4;
21
22/// Filter criteria for font search. Built with chained `with_*` methods.
23///
24/// Every field is optional and defaults to "no constraint." An empty `Query`
25/// matches all fonts. As you add criteria, the filter becomes more selective —
26/// all criteria must be satisfied (AND logic).
27///
28/// The query doesn't touch the filesystem; it only evaluates in-memory
29/// [`TypgFontFaceMeta`] structs. This makes it reusable across live search,
30/// cached search, and indexed search without modification.
31#[derive(Debug, Clone, Default)]
32pub struct Query {
33    /// Variation axis tags the font must define (e.g., `wght`, `wdth`).
34    /// A font matches only if it has *all* listed axes.
35    axes: Vec<Tag>,
36
37    /// OpenType feature tags the font must support (e.g., `liga`, `smcp`).
38    /// Checked against features from both GSUB and GPOS tables.
39    features: Vec<Tag>,
40
41    /// Script tags the font must declare support for (e.g., `latn`, `arab`).
42    /// Checked against script lists in GSUB and GPOS.
43    scripts: Vec<Tag>,
44
45    /// Top-level table tags the font must contain (e.g., `GSUB`, `CFF `).
46    tables: Vec<Tag>,
47
48    /// Regex patterns tested against the font's name strings.
49    /// At least one name must match at least one pattern.
50    /// Use for searches like "find fonts whose name contains 'Mono'."
51    name_patterns: Vec<Regex>,
52
53    /// Unicode codepoints the font must cover (via its `cmap` table).
54    /// The font must have glyphs for *all* listed codepoints.
55    codepoints: Vec<char>,
56
57    /// When `true`, only variable fonts (those with an `fvar` table) match.
58    /// When `false` (default), both static and variable fonts can match.
59    variable_only: bool,
60
61    /// OS/2 `usWeightClass` must fall within this range.
62    /// Standard range: 100 (Thin) to 900 (Black). `None` = no constraint.
63    weight_range: Option<RangeInclusive<u16>>,
64
65    /// OS/2 `usWidthClass` must fall within this range.
66    /// Standard range: 1 (UltraCondensed) to 9 (UltraExpanded). `None` = no constraint.
67    width_range: Option<RangeInclusive<u16>>,
68
69    /// OS/2 family class (major, optionally subclass) the font must match.
70    /// Example: major=8 matches all sans-serif fonts; major=8, subclass=1
71    /// matches only IBM Neo-Grotesque Gothic. `None` = no constraint.
72    family_class: Option<FamilyClassFilter>,
73
74    /// Regex patterns tested against creator/provenance name strings
75    /// (copyright, trademark, manufacturer, designer, description, URLs,
76    /// license text). At least one creator string must match at least one
77    /// pattern.
78    creator_patterns: Vec<Regex>,
79
80    /// Regex patterns tested against license-specific name strings
81    /// (copyright, license description, license URL). At least one license
82    /// string must match at least one pattern.
83    license_patterns: Vec<Regex>,
84}
85
86impl Query {
87    /// Create an empty query that matches all fonts.
88    pub fn new() -> Self {
89        Self::default()
90    }
91
92    /// Require these variation axes. A font must define *all* of them.
93    /// Example: `vec![tag4("wght")?, tag4("wdth")?]`
94    pub fn with_axes(mut self, axes: Vec<Tag>) -> Self {
95        self.axes = axes;
96        self
97    }
98
99    /// Require these OpenType features. The font must list *all* of them.
100    /// Example: `vec![tag4("liga")?, tag4("smcp")?]`
101    pub fn with_features(mut self, features: Vec<Tag>) -> Self {
102        self.features = features;
103        self
104    }
105
106    /// Require these script tags. The font must declare support for *all*.
107    /// Example: `vec![tag4("latn")?, tag4("cyrl")?]`
108    pub fn with_scripts(mut self, scripts: Vec<Tag>) -> Self {
109        self.scripts = scripts;
110        self
111    }
112
113    /// Require these top-level tables. The font file must contain *all*.
114    /// Example: `vec![tag4("GSUB")?, tag4("GPOS")?]`
115    pub fn with_tables(mut self, tables: Vec<Tag>) -> Self {
116        self.tables = tables;
117        self
118    }
119
120    /// Require at least one font name to match at least one regex pattern.
121    /// Patterns are tested against all name strings (family, full, PostScript, etc.).
122    pub fn with_name_patterns(mut self, patterns: Vec<Regex>) -> Self {
123        self.name_patterns = patterns;
124        self
125    }
126
127    /// Require the font to have glyphs for *all* of these Unicode codepoints.
128    /// Example: `vec!['A', 'B', 'ñ']`
129    pub fn with_codepoints(mut self, cps: Vec<char>) -> Self {
130        self.codepoints = cps;
131        self
132    }
133
134    /// When `true`, only variable fonts match. Default: `false` (both match).
135    pub fn require_variable(mut self, yes: bool) -> Self {
136        self.variable_only = yes;
137        self
138    }
139
140    /// Require OS/2 weight class within this range. Example: `Some(300..=700)`.
141    pub fn with_weight_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
142        self.weight_range = range;
143        self
144    }
145
146    /// Require OS/2 width class within this range. Example: `Some(3..=7)`.
147    pub fn with_width_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
148        self.width_range = range;
149        self
150    }
151
152    /// Require a specific OS/2 family class (and optionally subclass).
153    pub fn with_family_class(mut self, class: Option<FamilyClassFilter>) -> Self {
154        self.family_class = class;
155        self
156    }
157
158    /// Require at least one creator string to match at least one regex.
159    pub fn with_creator_patterns(mut self, patterns: Vec<Regex>) -> Self {
160        self.creator_patterns = patterns;
161        self
162    }
163
164    /// Require at least one license string to match at least one regex.
165    pub fn with_license_patterns(mut self, patterns: Vec<Regex>) -> Self {
166        self.license_patterns = patterns;
167        self
168    }
169
170    /// The required axis tags, if any.
171    pub fn axes(&self) -> &[Tag] {
172        &self.axes
173    }
174
175    /// The required feature tags, if any.
176    pub fn features(&self) -> &[Tag] {
177        &self.features
178    }
179
180    /// The required script tags, if any.
181    pub fn scripts(&self) -> &[Tag] {
182        &self.scripts
183    }
184
185    /// The required table tags, if any.
186    pub fn tables(&self) -> &[Tag] {
187        &self.tables
188    }
189
190    /// The name regex patterns, if any.
191    pub fn name_patterns(&self) -> &[Regex] {
192        &self.name_patterns
193    }
194
195    /// The required codepoints, if any.
196    pub fn codepoints(&self) -> &[char] {
197        &self.codepoints
198    }
199
200    /// Whether only variable fonts are accepted.
201    pub fn requires_variable(&self) -> bool {
202        self.variable_only
203    }
204
205    /// The weight class range constraint, if set.
206    pub fn weight_range(&self) -> Option<&RangeInclusive<u16>> {
207        self.weight_range.as_ref()
208    }
209
210    /// The width class range constraint, if set.
211    pub fn width_range(&self) -> Option<&RangeInclusive<u16>> {
212        self.width_range.as_ref()
213    }
214
215    /// The family class constraint, if set.
216    pub fn family_class(&self) -> Option<&FamilyClassFilter> {
217        self.family_class.as_ref()
218    }
219
220    /// The creator/provenance regex patterns, if any.
221    pub fn creator_patterns(&self) -> &[Regex] {
222        &self.creator_patterns
223    }
224
225    /// The license regex patterns, if any.
226    pub fn license_patterns(&self) -> &[Regex] {
227        &self.license_patterns
228    }
229
230    /// Test a font's metadata against every criterion in this query.
231    ///
232    /// Returns `true` only if *all* active criteria are satisfied.
233    /// Criteria that aren't set (empty vecs, `None` ranges) are skipped.
234    ///
235    /// Evaluation order is roughly cheapest-first: boolean checks, then
236    /// tag set intersections, then numeric ranges, then codepoint coverage,
237    /// then regex matching (most expensive). Short-circuits on the first
238    /// failure.
239    pub fn matches(&self, meta: &TypgFontFaceMeta) -> bool {
240        if self.variable_only && !meta.is_variable {
241            return false;
242        }
243
244        if !contains_all_tags(&meta.axis_tags, &self.axes) {
245            return false;
246        }
247
248        if !contains_all_tags(&meta.feature_tags, &self.features) {
249            return false;
250        }
251
252        if !contains_all_tags(&meta.script_tags, &self.scripts) {
253            return false;
254        }
255
256        if !contains_all_tags(&meta.table_tags, &self.tables) {
257            return false;
258        }
259
260        if let Some(range) = &self.weight_range {
261            match meta.weight_class {
262                Some(weight) if range.contains(&weight) => {}
263                _ => return false,
264            }
265        }
266
267        if let Some(range) = &self.width_range {
268            match meta.width_class {
269                Some(width) if range.contains(&width) => {}
270                _ => return false,
271            }
272        }
273
274        if let Some(filter) = &self.family_class {
275            match meta.family_class {
276                Some((class, subclass)) => {
277                    if class != filter.major {
278                        return false;
279                    }
280                    if let Some(expected_subclass) = filter.subclass {
281                        if subclass != expected_subclass {
282                            return false;
283                        }
284                    }
285                }
286                None => return false,
287            }
288        }
289
290        if !self.codepoints.is_empty() {
291            let available: HashSet<char> = meta.codepoints.iter().copied().collect();
292            if !self.codepoints.iter().all(|cp| available.contains(cp)) {
293                return false;
294            }
295        }
296
297        if !self.name_patterns.is_empty() {
298            let matched = meta
299                .names
300                .iter()
301                .any(|name| self.name_patterns.iter().any(|re| re.is_match(name)));
302            if !matched {
303                return false;
304            }
305        }
306
307        if !self.creator_patterns.is_empty() {
308            let matched = meta
309                .creator_names
310                .iter()
311                .any(|name| self.creator_patterns.iter().any(|re| re.is_match(name)));
312            if !matched {
313                return false;
314            }
315        }
316
317        if !self.license_patterns.is_empty() {
318            let matched = meta
319                .license_names
320                .iter()
321                .any(|name| self.license_patterns.iter().any(|re| re.is_match(name)));
322            if !matched {
323                return false;
324            }
325        }
326
327        true
328    }
329}
330
331/// Check that `haystack` contains every tag in `needles` (set subset check).
332/// Returns `true` if `needles` is empty (vacuous truth — no requirements).
333fn contains_all_tags(haystack: &[Tag], needles: &[Tag]) -> bool {
334    if needles.is_empty() {
335        return true;
336    }
337    let set: HashSet<Tag> = haystack.iter().copied().collect();
338    needles.iter().all(|tag| set.contains(tag))
339}
340
341/// Parse a comma-separated list of codepoints or ranges into a `Vec<char>`.
342///
343/// Accepts single characters ("A"), Unicode escapes ("U+0041"), and ranges
344/// ("A-Z", "U+0041-U+005A"), or any comma-separated combination thereof.
345pub fn parse_codepoint_list(input: &str) -> Result<Vec<char>> {
346    let mut result = Vec::new();
347    if input.trim().is_empty() {
348        return Ok(result);
349    }
350
351    for part in input.split(',') {
352        if part.contains('-') {
353            let pieces: Vec<&str> = part.split('-').collect();
354            if pieces.len() != 2 {
355                return Err(anyhow!("invalid range: {part}"));
356            }
357            let start = parse_codepoint(pieces[0])? as u32;
358            let end = parse_codepoint(pieces[1])? as u32;
359            let (lo, hi) = if start <= end {
360                (start, end)
361            } else {
362                (end, start)
363            };
364            for cp in lo..=hi {
365                if let Some(ch) = char::from_u32(cp) {
366                    result.push(ch);
367                }
368            }
369        } else {
370            result.push(parse_codepoint(part)?);
371        }
372    }
373
374    Ok(result)
375}
376
377fn parse_codepoint(token: &str) -> Result<char> {
378    if token.chars().count() == 1 {
379        return Ok(token.chars().next().unwrap());
380    }
381
382    let trimmed = token.trim_start_matches("U+").trim_start_matches("u+");
383    let cp = u32::from_str_radix(trimmed, 16).map_err(|_| anyhow!("invalid codepoint: {token}"))?;
384    char::from_u32(cp).ok_or_else(|| anyhow!("invalid Unicode scalar: U+{cp:04X}"))
385}
386
387/// Parse a slice of tag strings (e.g. `"wght"`, `"smcp"`) into `Tag` values.
388///
389/// Each string must be 1–4 printable ASCII characters.
390pub fn parse_tag_list(raw: &[String]) -> Result<Vec<Tag>> {
391    raw.iter().map(|s| tag4(s)).collect()
392}
393
394/// Filter for the OS/2 family-class field.
395///
396/// `major` selects a broad class such as serif, sans-serif, or script.
397/// `subclass`, when present, narrows the match to one subclass inside that
398/// major class.
399#[derive(Debug, Clone, Copy, PartialEq, Eq)]
400pub struct FamilyClassFilter {
401    pub major: u8,
402    pub subclass: Option<u8>,
403}
404
405/// Parse an OS/2 family class specifier into a [`FamilyClassFilter`].
406///
407/// Accepts numeric values ("8"), hex values ("0x0800"), major.subclass pairs
408/// ("8.11"), and named aliases ("sans", "serif", "script", etc.).
409pub fn parse_family_class(input: &str) -> Result<FamilyClassFilter> {
410    let trimmed = input.trim();
411    if trimmed.is_empty() {
412        return Err(anyhow!("family class cannot be empty"));
413    }
414
415    let lower = trimmed.to_ascii_lowercase();
416    if let Some(major) = lookup_family_class_by_name(&lower) {
417        return Ok(FamilyClassFilter {
418            major,
419            subclass: None,
420        });
421    }
422
423    if let Some((major, subclass)) = parse_major_and_subclass(&lower) {
424        return Ok(FamilyClassFilter {
425            major,
426            subclass: Some(subclass),
427        });
428    }
429
430    let value = if let Some(stripped) = lower.strip_prefix("0x") {
431        u16::from_str_radix(stripped, 16)
432            .map_err(|_| anyhow!("invalid hex family class: {trimmed}"))?
433    } else {
434        lower
435            .parse::<u16>()
436            .map_err(|_| anyhow!("invalid family class: {trimmed}"))?
437    };
438
439    if value <= 0x00FF {
440        return Ok(FamilyClassFilter {
441            major: value as u8,
442            subclass: None,
443        });
444    }
445
446    let major = (value >> 8) as u8;
447    let subclass = (value & 0x00FF) as u8;
448
449    Ok(FamilyClassFilter {
450        major,
451        subclass: Some(subclass),
452    })
453}
454
455fn lookup_family_class_by_name(name: &str) -> Option<u8> {
456    let mut map: HashMap<&str, u8> = HashMap::new();
457    map.insert("none", 0);
458    map.insert("no-class", 0);
459    map.insert("uncategorized", 0);
460    map.insert("oldstyle", 1);
461    map.insert("old-style", 1);
462    map.insert("oldstyle-serif", 1);
463    map.insert("transitional", 2);
464    map.insert("modern", 3);
465    map.insert("clarendon", 4);
466    map.insert("slab", 5);
467    map.insert("slab-serif", 5);
468    map.insert("egyptian", 5);
469    map.insert("freeform", 7);
470    map.insert("freeform-serif", 7);
471    map.insert("sans", 8);
472    map.insert("sans-serif", 8);
473    map.insert("gothic", 8);
474    map.insert("ornamental", 9);
475    map.insert("decorative", 9);
476    map.insert("script", 10);
477    map.insert("symbolic", 12);
478    map.get(name).copied()
479}
480
481fn parse_major_and_subclass(raw: &str) -> Option<(u8, u8)> {
482    for sep in ['.', ':'] {
483        if let Some((major, sub)) = raw.split_once(sep) {
484            let major: u8 = major.parse().ok()?;
485            let subclass: u8 = sub.parse().ok()?;
486            return Some((major, subclass));
487        }
488    }
489    None
490}
491
492/// Parse a single value or range of u16 numbers (e.g., "400" or "300-500").
493pub fn parse_u16_range(input: &str) -> Result<RangeInclusive<u16>> {
494    let trimmed = input.trim();
495    if trimmed.is_empty() {
496        return Err(anyhow!("range cannot be empty"));
497    }
498
499    if let Some((lo, hi)) = trimmed.split_once('-') {
500        let start: u16 = lo.trim().parse()?;
501        let end: u16 = hi.trim().parse()?;
502        let (min, max) = if start <= end {
503            (start, end)
504        } else {
505            (end, start)
506        };
507        Ok(min..=max)
508    } else {
509        let value: u16 = trimmed.parse()?;
510        Ok(value..=value)
511    }
512}