Skip to main content

typg_core/
query.rs

1/// The charming matchmaker who knows exactly which fonts you're looking for
2///
3/// Think of this as your personal font dating service - you tell us your
4/// preferences, your must-haves, your deal-breakers, and we'll find the
5/// fonts that make your heart skip a beat. Whether you're looking for
6/// something variable, something that speaks Arabic, or something with
7/// just the right weight, we ask the right questions to find your perfect match.
8///
9/// We're fluent in the language of fonts - tags, ranges, patterns, and more.
10/// Tell us what you want in human terms, and we'll translate it into the
11/// precise criteria that make our index purr with satisfaction.
12///
13/// Crafted with matchmaking expertise at FontLab https://www.fontlab.com/
14use std::collections::{HashMap, HashSet};
15use std::ops::RangeInclusive;
16
17use anyhow::{anyhow, Result};
18use read_fonts::types::Tag;
19use regex::Regex;
20
21use crate::search::TypgFontFaceMeta;
22use crate::tags::tag4;
23
24/// Your personal font shopping list with very specific tastes
25///
26/// We remember every detail of what you're looking for - the features you
27/// need, the scripts that are non-negotiable, the weight range that feels
28/// just right, and whether you need a font that can shape-shift like a
29/// superhero. This is your complete shopping manifest that tells our
30/// index exactly what to hunt for.
31#[derive(Debug, Clone, Default)]
32pub struct Query {
33    /// Variable font capabilities you can't live without
34    axes: Vec<Tag>,
35    /// OpenType features that make you swoon
36    features: Vec<Tag>,
37    /// Languages and scripts your font must speak
38    scripts: Vec<Tag>,
39    /// Table requirements for your typographic adventures
40    tables: Vec<Tag>,
41    /// Name patterns that catch your eye
42    name_patterns: Vec<Regex>,
43    /// Specific characters your font must know how to draw
44    codepoints: Vec<char>,
45    /// Are you only looking for fonts that can shape-shift?
46    variable_only: bool,
47    /// The perfect weight range (from delicate whispers to bold declarations)
48    weight_range: Option<RangeInclusive<u16>>,
49    /// How wide or narrow you like your fonts to stretch
50    width_range: Option<RangeInclusive<u16>>,
51    /// What typographic family you belong to
52    family_class: Option<FamilyClassFilter>,
53    /// Regex patterns to match against creator-related name strings
54    creator_patterns: Vec<Regex>,
55    /// Regex patterns to match against license-related name strings
56    license_patterns: Vec<Regex>,
57}
58
59impl Query {
60    pub fn new() -> Self {
61        Self::default()
62    }
63
64    pub fn with_axes(mut self, axes: Vec<Tag>) -> Self {
65        self.axes = axes;
66        self
67    }
68
69    pub fn with_features(mut self, features: Vec<Tag>) -> Self {
70        self.features = features;
71        self
72    }
73
74    pub fn with_scripts(mut self, scripts: Vec<Tag>) -> Self {
75        self.scripts = scripts;
76        self
77    }
78
79    pub fn with_tables(mut self, tables: Vec<Tag>) -> Self {
80        self.tables = tables;
81        self
82    }
83
84    pub fn with_name_patterns(mut self, patterns: Vec<Regex>) -> Self {
85        self.name_patterns = patterns;
86        self
87    }
88
89    pub fn with_codepoints(mut self, cps: Vec<char>) -> Self {
90        self.codepoints = cps;
91        self
92    }
93
94    pub fn require_variable(mut self, yes: bool) -> Self {
95        self.variable_only = yes;
96        self
97    }
98
99    pub fn with_weight_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
100        self.weight_range = range;
101        self
102    }
103
104    pub fn with_width_range(mut self, range: Option<RangeInclusive<u16>>) -> Self {
105        self.width_range = range;
106        self
107    }
108
109    pub fn with_family_class(mut self, class: Option<FamilyClassFilter>) -> Self {
110        self.family_class = class;
111        self
112    }
113
114    pub fn with_creator_patterns(mut self, patterns: Vec<Regex>) -> Self {
115        self.creator_patterns = patterns;
116        self
117    }
118
119    pub fn with_license_patterns(mut self, patterns: Vec<Regex>) -> Self {
120        self.license_patterns = patterns;
121        self
122    }
123
124    // Accessor methods for use by the high-performance index module.
125
126    /// Get the required axis tags.
127    pub fn axes(&self) -> &[Tag] {
128        &self.axes
129    }
130
131    /// Get the required feature tags.
132    pub fn features(&self) -> &[Tag] {
133        &self.features
134    }
135
136    /// Get the required script tags.
137    pub fn scripts(&self) -> &[Tag] {
138        &self.scripts
139    }
140
141    /// Get the required table tags.
142    pub fn tables(&self) -> &[Tag] {
143        &self.tables
144    }
145
146    /// Get the name patterns.
147    pub fn name_patterns(&self) -> &[Regex] {
148        &self.name_patterns
149    }
150
151    /// Get the required codepoints.
152    pub fn codepoints(&self) -> &[char] {
153        &self.codepoints
154    }
155
156    /// Check if variable fonts are required.
157    pub fn requires_variable(&self) -> bool {
158        self.variable_only
159    }
160
161    /// Get the weight range filter.
162    pub fn weight_range(&self) -> Option<&RangeInclusive<u16>> {
163        self.weight_range.as_ref()
164    }
165
166    /// Get the width range filter.
167    pub fn width_range(&self) -> Option<&RangeInclusive<u16>> {
168        self.width_range.as_ref()
169    }
170
171    /// Get the family class filter.
172    pub fn family_class(&self) -> Option<&FamilyClassFilter> {
173        self.family_class.as_ref()
174    }
175
176    /// Get the creator patterns.
177    pub fn creator_patterns(&self) -> &[Regex] {
178        &self.creator_patterns
179    }
180
181    /// Get the license patterns.
182    pub fn license_patterns(&self) -> &[Regex] {
183        &self.license_patterns
184    }
185
186    /// The moment of truth - does this font make your heart flutter?
187    ///
188    /// We gently interview each font against your complete wishlist.
189    /// Every requirement gets checked - no corner cutting, no compromises.
190    /// Only fonts that truly match your vision get the coveted "yes" that
191    /// makes them part of your search results.
192    ///
193    /// Returns true if this font is worthy of your affection, false otherwise.
194    pub fn matches(&self, meta: &TypgFontFaceMeta) -> bool {
195        if self.variable_only && !meta.is_variable {
196            return false;
197        }
198
199        if !contains_all_tags(&meta.axis_tags, &self.axes) {
200            return false;
201        }
202
203        if !contains_all_tags(&meta.feature_tags, &self.features) {
204            return false;
205        }
206
207        if !contains_all_tags(&meta.script_tags, &self.scripts) {
208            return false;
209        }
210
211        if !contains_all_tags(&meta.table_tags, &self.tables) {
212            return false;
213        }
214
215        if let Some(range) = &self.weight_range {
216            match meta.weight_class {
217                Some(weight) if range.contains(&weight) => {}
218                _ => return false,
219            }
220        }
221
222        if let Some(range) = &self.width_range {
223            match meta.width_class {
224                Some(width) if range.contains(&width) => {}
225                _ => return false,
226            }
227        }
228
229        if let Some(filter) = &self.family_class {
230            match meta.family_class {
231                Some((class, subclass)) => {
232                    if class != filter.major {
233                        return false;
234                    }
235                    if let Some(expected_subclass) = filter.subclass {
236                        if subclass != expected_subclass {
237                            return false;
238                        }
239                    }
240                }
241                None => return false,
242            }
243        }
244
245        if !self.codepoints.is_empty() {
246            let available: HashSet<char> = meta.codepoints.iter().copied().collect();
247            if !self.codepoints.iter().all(|cp| available.contains(cp)) {
248                return false;
249            }
250        }
251
252        if !self.name_patterns.is_empty() {
253            let matched = meta
254                .names
255                .iter()
256                .any(|name| self.name_patterns.iter().any(|re| re.is_match(name)));
257            if !matched {
258                return false;
259            }
260        }
261
262        if !self.creator_patterns.is_empty() {
263            let matched = meta
264                .creator_names
265                .iter()
266                .any(|name| self.creator_patterns.iter().any(|re| re.is_match(name)));
267            if !matched {
268                return false;
269            }
270        }
271
272        if !self.license_patterns.is_empty() {
273            let matched = meta
274                .license_names
275                .iter()
276                .any(|name| self.license_patterns.iter().any(|re| re.is_match(name)));
277            if !matched {
278                return false;
279            }
280        }
281
282        true
283    }
284}
285
286fn contains_all_tags(haystack: &[Tag], needles: &[Tag]) -> bool {
287    if needles.is_empty() {
288        return true;
289    }
290    let set: HashSet<Tag> = haystack.iter().copied().collect();
291    needles.iter().all(|tag| set.contains(tag))
292}
293
294/// Translates your character wishes into Unicode reality
295///
296/// We understand when you say "A-D" or "U+0041-U+0044" or just "A,B,C".
297/// We'll happily parse comma-separated values, ranges, single characters,
298/// or those fancy U+ codes that Unicode professionals love. Just give us
299/// your character shopping list and we'll return it in a format our system
300/// can understand.
301///
302/// Accepts: "A-Z", "U+0041-U+005A", "A,B,C", or any combination thereof.
303pub fn parse_codepoint_list(input: &str) -> Result<Vec<char>> {
304    let mut result = Vec::new();
305    if input.trim().is_empty() {
306        return Ok(result);
307    }
308
309    for part in input.split(',') {
310        if part.contains('-') {
311            let pieces: Vec<&str> = part.split('-').collect();
312            if pieces.len() != 2 {
313                return Err(anyhow!("invalid range: {part}"));
314            }
315            let start = parse_codepoint(pieces[0])? as u32;
316            let end = parse_codepoint(pieces[1])? as u32;
317            let (lo, hi) = if start <= end {
318                (start, end)
319            } else {
320                (end, start)
321            };
322            for cp in lo..=hi {
323                if let Some(ch) = char::from_u32(cp) {
324                    result.push(ch);
325                }
326            }
327        } else {
328            result.push(parse_codepoint(part)?);
329        }
330    }
331
332    Ok(result)
333}
334
335fn parse_codepoint(token: &str) -> Result<char> {
336    if token.chars().count() == 1 {
337        return Ok(token.chars().next().unwrap());
338    }
339
340    let trimmed = token.trim_start_matches("U+").trim_start_matches("u+");
341    let cp = u32::from_str_radix(trimmed, 16).map_err(|_| anyhow!("invalid codepoint: {token}"))?;
342    char::from_u32(cp).ok_or_else(|| anyhow!("invalid Unicode scalar: U+{cp:04X}"))
343}
344
345/// Translates human-readable tag strings into the cryptic codes fonts speak
346///
347/// You give us friendly strings like "wght", "smcp", or "GSUB" and we
348/// convert them into the proper 4-byte tags that fonts actually understand.
349/// We're picky about formatting - no cheating with tags that are too long
350/// or contain mysterious characters. Only the finest tags make it through.
351///
352/// Each string must be 1-4 characters of printable ASCII goodness.
353pub fn parse_tag_list(raw: &[String]) -> Result<Vec<Tag>> {
354    raw.iter().map(|s| tag4(s)).collect()
355}
356
357#[derive(Debug, Clone, Copy, PartialEq, Eq)]
358pub struct FamilyClassFilter {
359    pub major: u8,
360    pub subclass: Option<u8>,
361}
362
363/// Decodes the secret family language of typographic classification
364///
365/// Font families speak in mysterious codes that tell us where they belong
366/// in the grand typographic taxonomy. We understand their native tongue
367/// whether you speak in numbers ("8"), hex ("0x0800"), decimal with subclass
368/// ("8.11"), or human-friendly names like "sans", "serif", or "script".
369///
370/// Each font family has a story to tell, and we're fluent in their dialect.
371pub fn parse_family_class(input: &str) -> Result<FamilyClassFilter> {
372    let trimmed = input.trim();
373    if trimmed.is_empty() {
374        return Err(anyhow!("family class cannot be empty"));
375    }
376
377    let lower = trimmed.to_ascii_lowercase();
378    if let Some(major) = lookup_family_class_by_name(&lower) {
379        return Ok(FamilyClassFilter {
380            major,
381            subclass: None,
382        });
383    }
384
385    if let Some((major, subclass)) = parse_major_and_subclass(&lower) {
386        return Ok(FamilyClassFilter {
387            major,
388            subclass: Some(subclass),
389        });
390    }
391
392    let value = if let Some(stripped) = lower.strip_prefix("0x") {
393        u16::from_str_radix(stripped, 16)
394            .map_err(|_| anyhow!("invalid hex family class: {trimmed}"))?
395    } else {
396        lower
397            .parse::<u16>()
398            .map_err(|_| anyhow!("invalid family class: {trimmed}"))?
399    };
400
401    if value <= 0x00FF {
402        return Ok(FamilyClassFilter {
403            major: value as u8,
404            subclass: None,
405        });
406    }
407
408    let major = (value >> 8) as u8;
409    let subclass = (value & 0x00FF) as u8;
410
411    Ok(FamilyClassFilter {
412        major,
413        subclass: Some(subclass),
414    })
415}
416
417fn lookup_family_class_by_name(name: &str) -> Option<u8> {
418    let mut map: HashMap<&str, u8> = HashMap::new();
419    map.insert("none", 0);
420    map.insert("no-class", 0);
421    map.insert("uncategorized", 0);
422    map.insert("oldstyle", 1);
423    map.insert("old-style", 1);
424    map.insert("oldstyle-serif", 1);
425    map.insert("transitional", 2);
426    map.insert("modern", 3);
427    map.insert("clarendon", 4);
428    map.insert("slab", 5);
429    map.insert("slab-serif", 5);
430    map.insert("egyptian", 5);
431    map.insert("freeform", 7);
432    map.insert("freeform-serif", 7);
433    map.insert("sans", 8);
434    map.insert("sans-serif", 8);
435    map.insert("gothic", 8);
436    map.insert("ornamental", 9);
437    map.insert("decorative", 9);
438    map.insert("script", 10);
439    map.insert("symbolic", 12);
440    map.get(name).copied()
441}
442
443fn parse_major_and_subclass(raw: &str) -> Option<(u8, u8)> {
444    for sep in ['.', ':'] {
445        if let Some((major, sub)) = raw.split_once(sep) {
446            let major: u8 = major.parse().ok()?;
447            let subclass: u8 = sub.parse().ok()?;
448            return Some((major, subclass));
449        }
450    }
451    None
452}
453
454/// Parse a single value or range of u16 numbers (e.g., "400" or "300-500").
455pub fn parse_u16_range(input: &str) -> Result<RangeInclusive<u16>> {
456    let trimmed = input.trim();
457    if trimmed.is_empty() {
458        return Err(anyhow!("range cannot be empty"));
459    }
460
461    if let Some((lo, hi)) = trimmed.split_once('-') {
462        let start: u16 = lo.trim().parse()?;
463        let end: u16 = hi.trim().parse()?;
464        let (min, max) = if start <= end {
465            (start, end)
466        } else {
467            (end, start)
468        };
469        Ok(min..=max)
470    } else {
471        let value: u16 = trimmed.parse()?;
472        Ok(value..=value)
473    }
474}