Skip to main content

typg_core/
search.rs

1/// Font metadata extraction and search.
2///
3/// Reads font files via `read-fonts` and `skrifa`, extracts metadata (names,
4/// axes, features, scripts, tables, codepoints, OS/2 fields), and filters
5/// results against a [`Query`]. Uses `rayon` for parallel processing.
6///
7/// Unreadable or unparseable files are skipped with a warning to stderr.
8///
9/// Made by FontLab https://www.fontlab.com/
10use std::fs;
11use std::path::{Path, PathBuf};
12use std::sync::mpsc::Sender;
13
14use anyhow::{Context, Result};
15use rayon::prelude::*;
16use rayon::ThreadPoolBuilder;
17use read_fonts::tables::name::NameId;
18use read_fonts::types::Tag;
19use read_fonts::{FontRef, TableProvider};
20use serde::{Deserialize, Serialize};
21use skrifa::{FontRef as SkrifaFontRef, MetadataProvider};
22
23use crate::discovery::{FontDiscovery, PathDiscovery};
24use crate::query::Query;
25use crate::tags::{tag4, tag_to_string};
26
27/// Extracted metadata for a single font face.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct TypgFontFaceMeta {
30    /// Name strings: family, full, postscript, subfamily, plus file stem as fallback.
31    pub names: Vec<String>,
32    /// Variation axis tags (wght, wdth, opsz, ...). Empty for static fonts.
33    #[serde(
34        serialize_with = "serialize_tags",
35        deserialize_with = "deserialize_tags"
36    )]
37    pub axis_tags: Vec<Tag>,
38    /// OpenType feature tags from GSUB and GPOS tables.
39    #[serde(
40        serialize_with = "serialize_tags",
41        deserialize_with = "deserialize_tags"
42    )]
43    pub feature_tags: Vec<Tag>,
44    /// Script tags from GSUB and GPOS tables.
45    #[serde(
46        serialize_with = "serialize_tags",
47        deserialize_with = "deserialize_tags"
48    )]
49    pub script_tags: Vec<Tag>,
50    /// Top-level table tags present in the font.
51    #[serde(
52        serialize_with = "serialize_tags",
53        deserialize_with = "deserialize_tags"
54    )]
55    pub table_tags: Vec<Tag>,
56    /// Unicode codepoints covered by the font's cmap.
57    pub codepoints: Vec<char>,
58    /// True if the font contains an `fvar` table (variable font).
59    pub is_variable: bool,
60    /// OS/2 usWeightClass (typically 100-900).
61    #[serde(default)]
62    pub weight_class: Option<u16>,
63    /// OS/2 usWidthClass (1-9).
64    #[serde(default)]
65    pub width_class: Option<u16>,
66    /// OS/2 sFamilyClass split into (class, subclass).
67    #[serde(default)]
68    pub family_class: Option<(u8, u8)>,
69    /// Creator-related name strings (copyright, trademark, manufacturer, designer, description, vendor URL, designer URL, license, license URL).
70    #[serde(default)]
71    pub creator_names: Vec<String>,
72    /// License-related name strings (copyright, license description, license URL).
73    #[serde(default)]
74    pub license_names: Vec<String>,
75}
76
77/// Location of a font face on disk.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct TypgFontSource {
80    /// Path to the font file.
81    pub path: PathBuf,
82    /// Index within a TTC/OTC collection, or `None` for single-face files.
83    pub ttc_index: Option<u32>,
84}
85
86impl TypgFontSource {
87    /// Format as `path#index` for collection members, plain path otherwise.
88    pub fn path_with_index(&self) -> String {
89        if let Some(idx) = self.ttc_index {
90            format!("{}#{idx}", self.path.display())
91        } else {
92            self.path.display().to_string()
93        }
94    }
95}
96
97/// A search result: font metadata paired with its file location.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct TypgFontFaceMatch {
100    /// File location and collection index.
101    pub source: TypgFontSource,
102    /// Extracted metadata for this face.
103    pub metadata: TypgFontFaceMeta,
104}
105
106/// Controls search parallelism and traversal behavior.
107#[derive(Debug, Default, Clone)]
108pub struct SearchOptions {
109    /// Follow symlinks during directory traversal.
110    pub follow_symlinks: bool,
111    /// Worker thread count. `None` uses the rayon default (CPU count).
112    pub jobs: Option<usize>,
113}
114
115/// Search filesystem paths for fonts matching a query. Returns all results sorted by path.
116///
117/// Files that can't be read or parsed are skipped with a warning to stderr.
118pub fn search(
119    paths: &[PathBuf],
120    query: &Query,
121    opts: &SearchOptions,
122) -> Result<Vec<TypgFontFaceMatch>> {
123    let discovery = PathDiscovery::new(paths.iter().cloned()).follow_symlinks(opts.follow_symlinks);
124    let candidates = discovery.discover()?;
125
126    let run_search = || -> Vec<TypgFontFaceMatch> {
127        let mut matches: Vec<TypgFontFaceMatch> = candidates
128            .par_iter()
129            .flat_map_iter(|loc| match load_metadata(&loc.path) {
130                Ok(faces) => faces,
131                Err(_) => Vec::new(),
132            })
133            .filter(|face| query.matches(&face.metadata))
134            .collect();
135
136        sort_matches(&mut matches);
137        matches
138    };
139
140    let matches = if let Some(jobs) = opts.jobs {
141        let pool = ThreadPoolBuilder::new().num_threads(jobs).build()?;
142        pool.install(run_search)
143    } else {
144        run_search()
145    };
146
147    Ok(matches)
148}
149
150/// Search filesystem paths and stream each matching font through `tx` as found.
151///
152/// Results are not sorted -- they arrive in processing order. Use this for
153/// line-oriented output formats (plain text, paths, NDJSON) where the user
154/// benefits from seeing results immediately.
155///
156/// Files that can't be read or parsed are skipped with a warning to stderr.
157pub fn search_streaming(
158    paths: &[PathBuf],
159    query: &Query,
160    opts: &SearchOptions,
161    tx: Sender<TypgFontFaceMatch>,
162) -> Result<()> {
163    let discovery = PathDiscovery::new(paths.iter().cloned()).follow_symlinks(opts.follow_symlinks);
164    let candidates = discovery.discover()?;
165
166    let run_search = || {
167        candidates
168            .par_iter()
169            .for_each_with(tx, |tx, loc| match load_metadata(&loc.path) {
170                Ok(faces) => {
171                    for face in faces {
172                        if query.matches(&face.metadata) {
173                            let _ = tx.send(face);
174                        }
175                    }
176                }
177                Err(_) => {}
178            });
179    };
180
181    if let Some(jobs) = opts.jobs {
182        let pool = ThreadPoolBuilder::new().num_threads(jobs).build()?;
183        pool.install(run_search);
184    } else {
185        run_search();
186    }
187
188    Ok(())
189}
190
191/// Filter pre-loaded entries against a query without file I/O.
192pub fn filter_cached(entries: &[TypgFontFaceMatch], query: &Query) -> Vec<TypgFontFaceMatch> {
193    let mut matches: Vec<TypgFontFaceMatch> = entries
194        .iter()
195        .filter(|entry| query.matches(&entry.metadata))
196        .cloned()
197        .collect();
198
199    sort_matches(&mut matches);
200    matches
201}
202
203/// Read a font file and extract metadata for each face it contains.
204fn load_metadata(path: &Path) -> Result<Vec<TypgFontFaceMatch>> {
205    let data = fs::read(path).with_context(|| format!("reading {}", path.display()))?;
206    let mut metas = Vec::new();
207
208    for font in FontRef::fonts(&data) {
209        let font = font?;
210        let ttc_index = font.ttc_index();
211        let sfont = if let Some(idx) = ttc_index {
212            SkrifaFontRef::from_index(&data, idx)?
213        } else {
214            SkrifaFontRef::new(&data)?
215        };
216
217        let names = collect_names(&font);
218        let mut axis_tags = collect_axes(&font);
219        let mut feature_tags = collect_features(&font);
220        let mut script_tags = collect_scripts(&font);
221        let mut table_tags = collect_tables(&font);
222        let mut codepoints = collect_codepoints(&sfont);
223        let fvar_tag = Tag::new(b"fvar");
224        let is_variable = table_tags.contains(&fvar_tag);
225        let (weight_class, width_class, family_class) = collect_classification(&font);
226        let mut creator_names = collect_creator_names(&font);
227        let mut license_names = collect_license_names(&font);
228
229        dedup_tags(&mut axis_tags);
230        dedup_tags(&mut feature_tags);
231        dedup_tags(&mut script_tags);
232        dedup_tags(&mut table_tags);
233        dedup_codepoints(&mut codepoints);
234        creator_names.sort_unstable();
235        creator_names.dedup();
236        license_names.sort_unstable();
237        license_names.dedup();
238
239        metas.push(TypgFontFaceMatch {
240            source: TypgFontSource {
241                path: path.to_path_buf(),
242                ttc_index,
243            },
244            metadata: TypgFontFaceMeta {
245                names: dedup_names(names, path),
246                axis_tags,
247                feature_tags,
248                script_tags,
249                table_tags,
250                codepoints,
251                is_variable,
252                weight_class,
253                width_class,
254                family_class,
255                creator_names,
256                license_names,
257            },
258        });
259    }
260
261    Ok(metas)
262}
263
264fn collect_tables(font: &FontRef) -> Vec<Tag> {
265    font.table_directory
266        .table_records()
267        .iter()
268        .map(|rec| rec.tag())
269        .collect()
270}
271
272fn collect_axes(font: &FontRef) -> Vec<Tag> {
273    if let Ok(fvar) = font.fvar() {
274        if let Ok(axes) = fvar.axes() {
275            return axes.iter().map(|axis| axis.axis_tag()).collect();
276        }
277    }
278    Vec::new()
279}
280
281fn collect_features(font: &FontRef) -> Vec<Tag> {
282    let mut tags = Vec::new();
283    if let Ok(gsub) = font.gsub() {
284        if let Ok(list) = gsub.feature_list() {
285            tags.extend(list.feature_records().iter().map(|rec| rec.feature_tag()));
286        }
287    }
288    if let Ok(gpos) = font.gpos() {
289        if let Ok(list) = gpos.feature_list() {
290            tags.extend(list.feature_records().iter().map(|rec| rec.feature_tag()));
291        }
292    }
293    tags
294}
295
296fn collect_scripts(font: &FontRef) -> Vec<Tag> {
297    let mut tags = Vec::new();
298    if let Ok(gsub) = font.gsub() {
299        if let Ok(list) = gsub.script_list() {
300            tags.extend(list.script_records().iter().map(|rec| rec.script_tag()));
301        }
302    }
303    if let Ok(gpos) = font.gpos() {
304        if let Ok(list) = gpos.script_list() {
305            tags.extend(list.script_records().iter().map(|rec| rec.script_tag()));
306        }
307    }
308    tags
309}
310
311fn collect_codepoints(font: &SkrifaFontRef) -> Vec<char> {
312    let mut cps = Vec::new();
313    for (cp, _) in font.charmap().mappings() {
314        if let Some(ch) = char::from_u32(cp) {
315            cps.push(ch);
316        }
317    }
318    cps
319}
320
321fn collect_names(font: &FontRef) -> Vec<String> {
322    let mut names = Vec::new();
323
324    if let Ok(name_table) = font.name() {
325        let data = name_table.string_data();
326        let wanted = [
327            NameId::FAMILY_NAME,
328            NameId::TYPOGRAPHIC_FAMILY_NAME,
329            NameId::SUBFAMILY_NAME,
330            NameId::TYPOGRAPHIC_SUBFAMILY_NAME,
331            NameId::FULL_NAME,
332            NameId::POSTSCRIPT_NAME,
333        ];
334
335        for record in name_table.name_record() {
336            if !record.is_unicode() {
337                continue;
338            }
339            if !wanted.contains(&record.name_id()) {
340                continue;
341            }
342            if let Ok(entry) = record.string(data) {
343                let rendered = entry.to_string();
344                if !rendered.trim().is_empty() {
345                    names.push(rendered);
346                }
347            }
348        }
349    }
350
351    names
352}
353
354fn collect_creator_names(font: &FontRef) -> Vec<String> {
355    let mut names = Vec::new();
356
357    if let Ok(name_table) = font.name() {
358        let data = name_table.string_data();
359        let wanted = [
360            NameId::COPYRIGHT_NOTICE,
361            NameId::TRADEMARK,
362            NameId::MANUFACTURER,
363            NameId::DESIGNER,
364            NameId::DESCRIPTION,
365            NameId::VENDOR_URL,
366            NameId::DESIGNER_URL,
367            NameId::LICENSE_DESCRIPTION,
368            NameId::LICENSE_URL,
369        ];
370
371        for record in name_table.name_record() {
372            if !record.is_unicode() {
373                continue;
374            }
375            if !wanted.contains(&record.name_id()) {
376                continue;
377            }
378            if let Ok(entry) = record.string(data) {
379                let rendered = entry.to_string();
380                if !rendered.trim().is_empty() {
381                    names.push(rendered);
382                }
383            }
384        }
385    }
386
387    names
388}
389
390fn collect_license_names(font: &FontRef) -> Vec<String> {
391    let mut names = Vec::new();
392
393    if let Ok(name_table) = font.name() {
394        let data = name_table.string_data();
395        let wanted = [
396            NameId::COPYRIGHT_NOTICE,
397            NameId::LICENSE_DESCRIPTION,
398            NameId::LICENSE_URL,
399        ];
400
401        for record in name_table.name_record() {
402            if !record.is_unicode() {
403                continue;
404            }
405            if !wanted.contains(&record.name_id()) {
406                continue;
407            }
408            if let Ok(entry) = record.string(data) {
409                let rendered = entry.to_string();
410                if !rendered.trim().is_empty() {
411                    names.push(rendered);
412                }
413            }
414        }
415    }
416
417    names
418}
419
420fn collect_classification(font: &FontRef) -> (Option<u16>, Option<u16>, Option<(u8, u8)>) {
421    match font.os2() {
422        Ok(table) => {
423            let raw_family = table.s_family_class() as u16;
424            let class = (raw_family >> 8) as u8;
425            let subclass = (raw_family & 0x00FF) as u8;
426            (
427                Some(table.us_weight_class()),
428                Some(table.us_width_class()),
429                Some((class, subclass)),
430            )
431        }
432        Err(_) => (None, None, None),
433    }
434}
435
436fn sort_matches(matches: &mut [TypgFontFaceMatch]) {
437    matches.sort_by(|a, b| {
438        a.source
439            .path
440            .cmp(&b.source.path)
441            .then_with(|| a.source.ttc_index.cmp(&b.source.ttc_index))
442    });
443}
444
445fn dedup_tags(tags: &mut Vec<Tag>) {
446    tags.sort_unstable();
447    tags.dedup();
448}
449
450fn dedup_codepoints(codepoints: &mut Vec<char>) {
451    codepoints.sort_unstable();
452    codepoints.dedup();
453}
454
455fn dedup_names(mut names: Vec<String>, path: &Path) -> Vec<String> {
456    names.push(
457        path.file_stem()
458            .map(|s| s.to_string_lossy().to_string())
459            .unwrap_or_else(|| path.display().to_string()),
460    );
461
462    for name in names.iter_mut() {
463        *name = name.trim().to_string();
464    }
465
466    names.retain(|n| !n.is_empty());
467    names.sort_unstable();
468    names.dedup();
469    names
470}
471
472fn serialize_tags<S>(tags: &[Tag], serializer: S) -> Result<S::Ok, S::Error>
473where
474    S: serde::Serializer,
475{
476    let as_strings: Vec<String> = tags.iter().copied().map(tag_to_string).collect();
477    as_strings.serialize(serializer)
478}
479
480fn deserialize_tags<'de, D>(deserializer: D) -> Result<Vec<Tag>, D::Error>
481where
482    D: serde::Deserializer<'de>,
483{
484    let raw: Vec<String> = Vec::<String>::deserialize(deserializer)?;
485    raw.into_iter()
486        .map(|s| tag4(&s).map_err(serde::de::Error::custom))
487        .collect()
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    #[test]
495    fn dedup_names_adds_fallback_and_trims() {
496        let names = vec!["  Alpha  ".to_string(), "Alpha".to_string()];
497        let path = Path::new("/fonts/Beta.ttf");
498        let deduped = dedup_names(names, path);
499
500        assert!(
501            deduped.contains(&"Alpha".to_string()),
502            "original names should be trimmed and kept"
503        );
504        assert!(
505            deduped.contains(&"Beta".to_string()),
506            "file stem should be added as fallback name"
507        );
508        assert_eq!(
509            deduped.len(),
510            2,
511            "dedup should remove duplicate entries and empty strings"
512        );
513    }
514
515    #[test]
516    fn dedup_tags_sorts_and_dedups() {
517        let mut tags = vec![
518            tag4("wght").unwrap(),
519            tag4("wght").unwrap(),
520            tag4("GSUB").unwrap(),
521        ];
522        dedup_tags(&mut tags);
523
524        assert_eq!(tags, vec![tag4("GSUB").unwrap(), tag4("wght").unwrap()]);
525    }
526
527    #[test]
528    fn dedup_codepoints_sorts_and_dedups() {
529        let mut cps = vec!['b', 'a', 'b'];
530        dedup_codepoints(&mut cps);
531        assert_eq!(cps, vec!['a', 'b']);
532    }
533}