Skip to main content

typg_core/
query.rs

1//! Query construction and evaluation for typg.
2//!
3//! A [`Query`] describes the font you want. An empty query matches every font.
4//! When multiple filters are set, they combine with AND logic: a match must
5//! satisfy every active criterion.
6//!
7//! The same query model is reused for live scans, cached searches, and indexed
8//! searches, so filter behavior stays consistent across the CLI, HTTP server,
9//! and Python bindings.
10//!
11//! Made by FontLab <https://www.fontlab.com/>
12use std::collections::{HashMap, HashSet};
13use std::ops::RangeInclusive;
14
15use anyhow::{anyhow, Result};
16use read_fonts::types::Tag;
17use regex::Regex;
18
19use crate::scriptmap::ScriptRequirement;
20use crate::search::TypgFontFaceMeta;
21use crate::tags::tag4;
22
23/// Filter criteria for font search. Built with chained `with_*` methods.
24///
25/// Every field is optional and defaults to "no constraint." An empty `Query`
26/// matches all fonts. As you add criteria, the filter becomes more selective —
27/// all criteria must be satisfied (AND logic).
28///
29/// The query doesn't touch the filesystem; it only evaluates in-memory
30/// [`TypgFontFaceMeta`] structs. This makes it reusable across live search,
31/// cached search, and indexed search without modification.
32#[derive(Debug, Clone, Default)]
33pub struct Query {
34    /// Variation axis tags the font must define (e.g., `wght`, `wdth`).
35    /// A font matches only if it has *all* listed axes.
36    axes: Vec<Tag>,
37
38    /// OpenType feature tags the font must support (e.g., `liga`, `smcp`).
39    /// Checked against features from both GSUB and GPOS tables.
40    features: Vec<Tag>,
41
42    /// Script requirements the font must satisfy (e.g., `latn`, `Deva`).
43    ///
44    /// Each entry resolves an ISO 15924 or OpenType input into an OpenType tag
45    /// group plus a Unicode script. The font matches if it satisfies *every*
46    /// requirement through the OpenType path (a group tag in GSUB/GPOS), **or**
47    /// satisfies *every* requirement through the Unicode path (cmap coverage).
48    /// See [`ScriptRequirement`] and [`Query::matches`].
49    script_reqs: Vec<ScriptRequirement>,
50
51    /// Top-level table tags the font must contain (e.g., `GSUB`, `CFF `).
52    tables: Vec<Tag>,
53
54    /// Regex patterns tested against the font's name strings.
55    /// At least one name must match at least one pattern.
56    /// Use for searches like "find fonts whose name contains 'Mono'."
57    name_patterns: Vec<Regex>,
58
59    /// Unicode codepoints the font must cover (via its `cmap` table).
60    /// The font must have glyphs for *all* listed codepoints.
61    codepoints: Vec<char>,
62
63    /// When `true`, only variable fonts (those with an `fvar` table) match.
64    /// When `false` (default), both static and variable fonts can match.
65    variable_only: bool,
66
67    /// OS/2 `usWeightClass` must fall within this range.
68    /// Standard range: 100 (Thin) to 900 (Black). `None` = no constraint.
69    weight_range: Option<RangeInclusive<u16>>,
70
71    /// OS/2 `usWidthClass` must fall within this range.
72    /// Standard range: 1 (UltraCondensed) to 9 (UltraExpanded). `None` = no constraint.
73    width_range: Option<RangeInclusive<u16>>,
74
75    /// OS/2 family class (major, optionally subclass) the font must match.
76    /// Example: major=8 matches all sans-serif fonts; major=8, subclass=1
77    /// matches only IBM Neo-Grotesque Gothic. `None` = no constraint.
78    family_class: Option<FamilyClassFilter>,
79
80    /// Regex patterns tested against creator/provenance name strings
81    /// (copyright, trademark, manufacturer, designer, description, URLs,
82    /// license text). At least one creator string must match at least one
83    /// pattern.
84    creator_patterns: Vec<Regex>,
85
86    /// Regex patterns tested against license-specific name strings
87    /// (copyright, license description, license URL). At least one license
88    /// string must match at least one pattern.
89    license_patterns: Vec<Regex>,
90}
91
92impl Query {
93    /// Create an empty query that matches all fonts.
94    pub fn new() -> Self {
95        Self::default()
96    }
97
98    /// Require these variation axes. A font must define *all* of them.
99    /// Example: `vec![tag4("wght")?, tag4("wdth")?]`
100    pub fn with_axes(mut self, axes: Vec<Tag>) -> Self {
101        self.axes = axes;
102        self
103    }
104
105    /// Require these OpenType features. The font must list *all* of them.
106    /// Example: `vec![tag4("liga")?, tag4("smcp")?]`
107    pub fn with_features(mut self, features: Vec<Tag>) -> Self {
108        self.features = features;
109        self
110    }
111
112    /// Require these script requirements. The font must satisfy *all* of them
113    /// (via OpenType tags, or via Unicode cmap coverage — see [`Query::matches`]).
114    /// Build the requirements with [`crate::scriptmap::resolve_scripts`].
115    pub fn with_scripts(mut self, scripts: Vec<ScriptRequirement>) -> Self {
116        self.script_reqs = scripts;
117        self
118    }
119
120    /// Require these top-level tables. The font file must contain *all*.
121    /// Example: `vec![tag4("GSUB")?, tag4("GPOS")?]`
122    pub fn with_tables(mut self, tables: Vec<Tag>) -> Self {
123        self.tables = tables;
124        self
125    }
126
127    /// Require at least one font name to match at least one regex pattern.
128    /// Patterns are tested against all name strings (family, full, PostScript, etc.).
129    pub fn with_name_patterns(mut self, patterns: Vec<Regex>) -> Self {
130        self.name_patterns = patterns;
131        self
132    }
133
134    /// Require the font to have glyphs for *all* of these Unicode codepoints.
135    /// Example: `vec!['A', 'B', 'ñ']`
136    pub fn with_codepoints(mut self, cps: Vec<char>) -> Self {
137        self.codepoints = cps;
138        self
139    }
140
141    /// When `true`, only variable fonts match. Default: `false` (both match).
142    pub fn require_variable(mut self, yes: bool) -> Self {
143        self.variable_only = yes;
144        self
145    }
146
147    /// Require OS/2 weight class within this range. Example: `Some(300..=700)`.
148    pub fn with_weight_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
149        self.weight_range = range;
150        self
151    }
152
153    /// Require OS/2 width class within this range. Example: `Some(3..=7)`.
154    pub fn with_width_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
155        self.width_range = range;
156        self
157    }
158
159    /// Require a specific OS/2 family class (and optionally subclass).
160    pub fn with_family_class(mut self, class: Option<FamilyClassFilter>) -> Self {
161        self.family_class = class;
162        self
163    }
164
165    /// Require at least one creator string to match at least one regex.
166    pub fn with_creator_patterns(mut self, patterns: Vec<Regex>) -> Self {
167        self.creator_patterns = patterns;
168        self
169    }
170
171    /// Require at least one license string to match at least one regex.
172    pub fn with_license_patterns(mut self, patterns: Vec<Regex>) -> Self {
173        self.license_patterns = patterns;
174        self
175    }
176
177    /// The required axis tags, if any.
178    pub fn axes(&self) -> &[Tag] {
179        &self.axes
180    }
181
182    /// The required feature tags, if any.
183    pub fn features(&self) -> &[Tag] {
184        &self.features
185    }
186
187    /// The resolved script requirements, if any.
188    pub fn scripts(&self) -> &[ScriptRequirement] {
189        &self.script_reqs
190    }
191
192    /// The required table tags, if any.
193    pub fn tables(&self) -> &[Tag] {
194        &self.tables
195    }
196
197    /// The name regex patterns, if any.
198    pub fn name_patterns(&self) -> &[Regex] {
199        &self.name_patterns
200    }
201
202    /// The required codepoints, if any.
203    pub fn codepoints(&self) -> &[char] {
204        &self.codepoints
205    }
206
207    /// Whether only variable fonts are accepted.
208    pub fn requires_variable(&self) -> bool {
209        self.variable_only
210    }
211
212    /// The weight class range constraint, if set.
213    pub fn weight_range(&self) -> Option<&RangeInclusive<u16>> {
214        self.weight_range.as_ref()
215    }
216
217    /// The width class range constraint, if set.
218    pub fn width_range(&self) -> Option<&RangeInclusive<u16>> {
219        self.width_range.as_ref()
220    }
221
222    /// The family class constraint, if set.
223    pub fn family_class(&self) -> Option<&FamilyClassFilter> {
224        self.family_class.as_ref()
225    }
226
227    /// The creator/provenance regex patterns, if any.
228    pub fn creator_patterns(&self) -> &[Regex] {
229        &self.creator_patterns
230    }
231
232    /// The license regex patterns, if any.
233    pub fn license_patterns(&self) -> &[Regex] {
234        &self.license_patterns
235    }
236
237    /// Test a font's metadata against every criterion in this query.
238    ///
239    /// Returns `true` only if *all* active criteria are satisfied.
240    /// Criteria that aren't set (empty vecs, `None` ranges) are skipped.
241    ///
242    /// Evaluation order is roughly cheapest-first: boolean checks, then
243    /// tag set intersections, then numeric ranges, then codepoint coverage,
244    /// then regex matching (most expensive). Short-circuits on the first
245    /// failure.
246    pub fn matches(&self, meta: &TypgFontFaceMeta) -> bool {
247        if self.variable_only && !meta.is_variable {
248            return false;
249        }
250
251        if !contains_all_tags(&meta.axis_tags, &self.axes) {
252            return false;
253        }
254
255        if !contains_all_tags(&meta.feature_tags, &self.features) {
256            return false;
257        }
258
259        if !self.script_reqs.is_empty() {
260            let font_scripts: HashSet<Tag> = meta.script_tags.iter().copied().collect();
261            // Whole-OR: the font supports the requested list if *every* script is
262            // present in GSUB/GPOS (OpenType path), OR *every* script is covered
263            // by the cmap (Unicode path). The two paths are not mixed per-script.
264            let all_ot = self
265                .script_reqs
266                .iter()
267                .all(|req| req.ot_satisfied(&font_scripts));
268            let all_unicode = || {
269                self.script_reqs
270                    .iter()
271                    .all(|req| req.unicode_satisfied(meta.codepoints.iter().copied()))
272            };
273            if !(all_ot || all_unicode()) {
274                return false;
275            }
276        }
277
278        if !contains_all_tags(&meta.table_tags, &self.tables) {
279            return false;
280        }
281
282        if let Some(range) = &self.weight_range {
283            match meta.weight_class {
284                Some(weight) if range.contains(&weight) => {}
285                _ => return false,
286            }
287        }
288
289        if let Some(range) = &self.width_range {
290            match meta.width_class {
291                Some(width) if range.contains(&width) => {}
292                _ => return false,
293            }
294        }
295
296        if let Some(filter) = &self.family_class {
297            match meta.family_class {
298                Some((class, subclass)) => {
299                    if class != filter.major {
300                        return false;
301                    }
302                    if let Some(expected_subclass) = filter.subclass {
303                        if subclass != expected_subclass {
304                            return false;
305                        }
306                    }
307                }
308                None => return false,
309            }
310        }
311
312        if !self.codepoints.is_empty() {
313            let available: HashSet<char> = meta.codepoints.iter().copied().collect();
314            if !self.codepoints.iter().all(|cp| available.contains(cp)) {
315                return false;
316            }
317        }
318
319        if !self.name_patterns.is_empty() {
320            let matched = meta
321                .names
322                .iter()
323                .any(|name| self.name_patterns.iter().any(|re| re.is_match(name)));
324            if !matched {
325                return false;
326            }
327        }
328
329        if !self.creator_patterns.is_empty() {
330            let matched = meta
331                .creator_names
332                .iter()
333                .any(|name| self.creator_patterns.iter().any(|re| re.is_match(name)));
334            if !matched {
335                return false;
336            }
337        }
338
339        if !self.license_patterns.is_empty() {
340            let matched = meta
341                .license_names
342                .iter()
343                .any(|name| self.license_patterns.iter().any(|re| re.is_match(name)));
344            if !matched {
345                return false;
346            }
347        }
348
349        true
350    }
351}
352
353/// Check that `haystack` contains every tag in `needles` (set subset check).
354/// Returns `true` if `needles` is empty (vacuous truth — no requirements).
355fn contains_all_tags(haystack: &[Tag], needles: &[Tag]) -> bool {
356    if needles.is_empty() {
357        return true;
358    }
359    let set: HashSet<Tag> = haystack.iter().copied().collect();
360    needles.iter().all(|tag| set.contains(tag))
361}
362
363/// Parse a comma-separated list of codepoints or ranges into a `Vec<char>`.
364///
365/// Accepts single characters ("A"), Unicode escapes ("U+0041"), and ranges
366/// ("A-Z", "U+0041-U+005A"), or any comma-separated combination thereof.
367pub fn parse_codepoint_list(input: &str) -> Result<Vec<char>> {
368    let mut result = Vec::new();
369    if input.trim().is_empty() {
370        return Ok(result);
371    }
372
373    for part in input.split(',') {
374        if part.contains('-') {
375            let pieces: Vec<&str> = part.split('-').collect();
376            if pieces.len() != 2 {
377                return Err(anyhow!("invalid range: {part}"));
378            }
379            let start = parse_codepoint(pieces[0])? as u32;
380            let end = parse_codepoint(pieces[1])? as u32;
381            let (lo, hi) = if start <= end {
382                (start, end)
383            } else {
384                (end, start)
385            };
386            for cp in lo..=hi {
387                if let Some(ch) = char::from_u32(cp) {
388                    result.push(ch);
389                }
390            }
391        } else {
392            result.push(parse_codepoint(part)?);
393        }
394    }
395
396    Ok(result)
397}
398
399fn parse_codepoint(token: &str) -> Result<char> {
400    if token.chars().count() == 1 {
401        return Ok(token.chars().next().unwrap());
402    }
403
404    let trimmed = token.trim_start_matches("U+").trim_start_matches("u+");
405    let cp = u32::from_str_radix(trimmed, 16).map_err(|_| anyhow!("invalid codepoint: {token}"))?;
406    char::from_u32(cp).ok_or_else(|| anyhow!("invalid Unicode scalar: U+{cp:04X}"))
407}
408
409/// Parse a slice of tag strings (e.g. `"wght"`, `"smcp"`) into `Tag` values.
410///
411/// Each string must be 1–4 printable ASCII characters.
412pub fn parse_tag_list(raw: &[String]) -> Result<Vec<Tag>> {
413    raw.iter().map(|s| tag4(s)).collect()
414}
415
416/// Filter for the OS/2 family-class field.
417///
418/// `major` selects a broad class such as serif, sans-serif, or script.
419/// `subclass`, when present, narrows the match to one subclass inside that
420/// major class.
421#[derive(Debug, Clone, Copy, PartialEq, Eq)]
422pub struct FamilyClassFilter {
423    pub major: u8,
424    pub subclass: Option<u8>,
425}
426
427/// Parse an OS/2 family class specifier into a [`FamilyClassFilter`].
428///
429/// Accepts numeric values ("8"), hex values ("0x0800"), major.subclass pairs
430/// ("8.11"), and named aliases ("sans", "serif", "script", etc.).
431pub fn parse_family_class(input: &str) -> Result<FamilyClassFilter> {
432    let trimmed = input.trim();
433    if trimmed.is_empty() {
434        return Err(anyhow!("family class cannot be empty"));
435    }
436
437    let lower = trimmed.to_ascii_lowercase();
438    if let Some(major) = lookup_family_class_by_name(&lower) {
439        return Ok(FamilyClassFilter {
440            major,
441            subclass: None,
442        });
443    }
444
445    if let Some((major, subclass)) = parse_major_and_subclass(&lower) {
446        return Ok(FamilyClassFilter {
447            major,
448            subclass: Some(subclass),
449        });
450    }
451
452    let value = if let Some(stripped) = lower.strip_prefix("0x") {
453        u16::from_str_radix(stripped, 16)
454            .map_err(|_| anyhow!("invalid hex family class: {trimmed}"))?
455    } else {
456        lower
457            .parse::<u16>()
458            .map_err(|_| anyhow!("invalid family class: {trimmed}"))?
459    };
460
461    if value <= 0x00FF {
462        return Ok(FamilyClassFilter {
463            major: value as u8,
464            subclass: None,
465        });
466    }
467
468    let major = (value >> 8) as u8;
469    let subclass = (value & 0x00FF) as u8;
470
471    Ok(FamilyClassFilter {
472        major,
473        subclass: Some(subclass),
474    })
475}
476
477fn lookup_family_class_by_name(name: &str) -> Option<u8> {
478    let mut map: HashMap<&str, u8> = HashMap::new();
479    map.insert("none", 0);
480    map.insert("no-class", 0);
481    map.insert("uncategorized", 0);
482    map.insert("oldstyle", 1);
483    map.insert("old-style", 1);
484    map.insert("oldstyle-serif", 1);
485    map.insert("transitional", 2);
486    map.insert("modern", 3);
487    map.insert("clarendon", 4);
488    map.insert("slab", 5);
489    map.insert("slab-serif", 5);
490    map.insert("egyptian", 5);
491    map.insert("freeform", 7);
492    map.insert("freeform-serif", 7);
493    map.insert("sans", 8);
494    map.insert("sans-serif", 8);
495    map.insert("gothic", 8);
496    map.insert("ornamental", 9);
497    map.insert("decorative", 9);
498    map.insert("script", 10);
499    map.insert("symbolic", 12);
500    map.get(name).copied()
501}
502
503fn parse_major_and_subclass(raw: &str) -> Option<(u8, u8)> {
504    for sep in ['.', ':'] {
505        if let Some((major, sub)) = raw.split_once(sep) {
506            let major: u8 = major.parse().ok()?;
507            let subclass: u8 = sub.parse().ok()?;
508            return Some((major, subclass));
509        }
510    }
511    None
512}
513
514/// Parse a single value or range of u16 numbers (e.g., "400" or "300-500").
515pub fn parse_u16_range(input: &str) -> Result<RangeInclusive<u16>> {
516    let trimmed = input.trim();
517    if trimmed.is_empty() {
518        return Err(anyhow!("range cannot be empty"));
519    }
520
521    if let Some((lo, hi)) = trimmed.split_once('-') {
522        let start: u16 = lo.trim().parse()?;
523        let end: u16 = hi.trim().parse()?;
524        let (min, max) = if start <= end {
525            (start, end)
526        } else {
527            (end, start)
528        };
529        Ok(min..=max)
530    } else {
531        let value: u16 = trimmed.parse()?;
532        Ok(value..=value)
533    }
534}