Skip to main content

typg_core/
search.rs

1/// Font metadata extraction and search.
2///
3/// Reads font files via `read-fonts` and `skrifa`, extracts metadata (names,
4/// axes, features, scripts, tables, codepoints, OS/2 fields), and filters
5/// results against a [`Query`]. Uses `rayon` for parallel processing.
6///
7/// Unreadable or unparseable files are skipped with a warning to stderr.
8///
9/// Made by FontLab https://www.fontlab.com/
10use std::fs;
11use std::path::{Path, PathBuf};
12use std::sync::mpsc::Sender;
13
14use anyhow::{Context, Result};
15use rayon::prelude::*;
16use rayon::ThreadPoolBuilder;
17use read_fonts::tables::name::NameId;
18use read_fonts::types::Tag;
19use read_fonts::{FontRef, TableProvider};
20use serde::{Deserialize, Serialize};
21use skrifa::{FontRef as SkrifaFontRef, MetadataProvider};
22
23use crate::discovery::{FontDiscovery, PathDiscovery};
24use crate::query::Query;
25use crate::tags::{tag4, tag_to_string};
26
27/// Extracted metadata for a single font face.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct TypgFontFaceMeta {
30    /// Name strings: family, full, postscript, subfamily, plus file stem as fallback.
31    pub names: Vec<String>,
32    /// Variation axis tags (wght, wdth, opsz, ...). Empty for static fonts.
33    #[serde(
34        serialize_with = "serialize_tags",
35        deserialize_with = "deserialize_tags"
36    )]
37    pub axis_tags: Vec<Tag>,
38    /// OpenType feature tags from GSUB and GPOS tables.
39    #[serde(
40        serialize_with = "serialize_tags",
41        deserialize_with = "deserialize_tags"
42    )]
43    pub feature_tags: Vec<Tag>,
44    /// Script tags from GSUB and GPOS tables.
45    #[serde(
46        serialize_with = "serialize_tags",
47        deserialize_with = "deserialize_tags"
48    )]
49    pub script_tags: Vec<Tag>,
50    /// Top-level table tags present in the font.
51    #[serde(
52        serialize_with = "serialize_tags",
53        deserialize_with = "deserialize_tags"
54    )]
55    pub table_tags: Vec<Tag>,
56    /// Unicode codepoints covered by the font's cmap.
57    pub codepoints: Vec<char>,
58    /// True if the font contains an `fvar` table (variable font).
59    pub is_variable: bool,
60    /// OS/2 usWeightClass (typically 100-900).
61    #[serde(default)]
62    pub weight_class: Option<u16>,
63    /// OS/2 usWidthClass (1-9).
64    #[serde(default)]
65    pub width_class: Option<u16>,
66    /// OS/2 sFamilyClass split into (class, subclass).
67    #[serde(default)]
68    pub family_class: Option<(u8, u8)>,
69    /// Creator-related name strings (copyright, trademark, manufacturer, designer, description, vendor URL, designer URL, license, license URL).
70    #[serde(default)]
71    pub creator_names: Vec<String>,
72    /// License-related name strings (copyright, license description, license URL).
73    #[serde(default)]
74    pub license_names: Vec<String>,
75}
76
77/// Location of a font face on disk.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct TypgFontSource {
80    /// Path to the font file.
81    pub path: PathBuf,
82    /// Index within a TTC/OTC collection, or `None` for single-face files.
83    pub ttc_index: Option<u32>,
84}
85
86impl TypgFontSource {
87    /// Format as `path#index` for collection members, plain path otherwise.
88    pub fn path_with_index(&self) -> String {
89        if let Some(idx) = self.ttc_index {
90            format!("{}#{idx}", self.path.display())
91        } else {
92            self.path.display().to_string()
93        }
94    }
95}
96
97/// A search result: font metadata paired with its file location.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct TypgFontFaceMatch {
100    /// File location and collection index.
101    pub source: TypgFontSource,
102    /// Extracted metadata for this face.
103    pub metadata: TypgFontFaceMeta,
104}
105
106/// Controls search parallelism and traversal behavior.
107#[derive(Debug, Default, Clone)]
108pub struct SearchOptions {
109    /// Follow symlinks during directory traversal.
110    pub follow_symlinks: bool,
111    /// Worker thread count. `None` uses the rayon default (CPU count).
112    pub jobs: Option<usize>,
113}
114
115/// Search filesystem paths for fonts matching a query. Returns all results sorted by path.
116///
117/// Files that can't be read or parsed are skipped with a warning to stderr.
118pub fn search(
119    paths: &[PathBuf],
120    query: &Query,
121    opts: &SearchOptions,
122) -> Result<Vec<TypgFontFaceMatch>> {
123    let discovery = PathDiscovery::new(paths.iter().cloned()).follow_symlinks(opts.follow_symlinks);
124    let candidates = discovery.discover()?;
125
126    let run_search = || -> Vec<TypgFontFaceMatch> {
127        let mut matches: Vec<TypgFontFaceMatch> = candidates
128            .par_iter()
129            .flat_map_iter(|loc| match load_metadata(&loc.path) {
130                Ok(faces) => faces,
131                Err(e) => {
132                    eprintln!("warning: {e}");
133                    Vec::new()
134                }
135            })
136            .filter(|face| query.matches(&face.metadata))
137            .collect();
138
139        sort_matches(&mut matches);
140        matches
141    };
142
143    let matches = if let Some(jobs) = opts.jobs {
144        let pool = ThreadPoolBuilder::new().num_threads(jobs).build()?;
145        pool.install(run_search)
146    } else {
147        run_search()
148    };
149
150    Ok(matches)
151}
152
153/// Search filesystem paths and stream each matching font through `tx` as found.
154///
155/// Results are not sorted -- they arrive in processing order. Use this for
156/// line-oriented output formats (plain text, paths, NDJSON) where the user
157/// benefits from seeing results immediately.
158///
159/// Files that can't be read or parsed are skipped with a warning to stderr.
160pub fn search_streaming(
161    paths: &[PathBuf],
162    query: &Query,
163    opts: &SearchOptions,
164    tx: Sender<TypgFontFaceMatch>,
165) -> Result<()> {
166    let discovery = PathDiscovery::new(paths.iter().cloned()).follow_symlinks(opts.follow_symlinks);
167    let candidates = discovery.discover()?;
168
169    let run_search = || {
170        candidates
171            .par_iter()
172            .for_each_with(tx, |tx, loc| match load_metadata(&loc.path) {
173                Ok(faces) => {
174                    for face in faces {
175                        if query.matches(&face.metadata) {
176                            let _ = tx.send(face);
177                        }
178                    }
179                }
180                Err(e) => {
181                    eprintln!("warning: {e}");
182                }
183            });
184    };
185
186    if let Some(jobs) = opts.jobs {
187        let pool = ThreadPoolBuilder::new().num_threads(jobs).build()?;
188        pool.install(run_search);
189    } else {
190        run_search();
191    }
192
193    Ok(())
194}
195
196/// Filter pre-loaded entries against a query without file I/O.
197pub fn filter_cached(entries: &[TypgFontFaceMatch], query: &Query) -> Vec<TypgFontFaceMatch> {
198    let mut matches: Vec<TypgFontFaceMatch> = entries
199        .iter()
200        .filter(|entry| query.matches(&entry.metadata))
201        .cloned()
202        .collect();
203
204    sort_matches(&mut matches);
205    matches
206}
207
208/// Read a font file and extract metadata for each face it contains.
209fn load_metadata(path: &Path) -> Result<Vec<TypgFontFaceMatch>> {
210    let data = fs::read(path).with_context(|| format!("reading {}", path.display()))?;
211    let mut metas = Vec::new();
212
213    for font in FontRef::fonts(&data) {
214        let font = font?;
215        let ttc_index = font.ttc_index();
216        let sfont = if let Some(idx) = ttc_index {
217            SkrifaFontRef::from_index(&data, idx)?
218        } else {
219            SkrifaFontRef::new(&data)?
220        };
221
222        let names = collect_names(&font);
223        let mut axis_tags = collect_axes(&font);
224        let mut feature_tags = collect_features(&font);
225        let mut script_tags = collect_scripts(&font);
226        let mut table_tags = collect_tables(&font);
227        let mut codepoints = collect_codepoints(&sfont);
228        let fvar_tag = Tag::new(b"fvar");
229        let is_variable = table_tags.contains(&fvar_tag);
230        let (weight_class, width_class, family_class) = collect_classification(&font);
231        let mut creator_names = collect_creator_names(&font);
232        let mut license_names = collect_license_names(&font);
233
234        dedup_tags(&mut axis_tags);
235        dedup_tags(&mut feature_tags);
236        dedup_tags(&mut script_tags);
237        dedup_tags(&mut table_tags);
238        dedup_codepoints(&mut codepoints);
239        creator_names.sort_unstable();
240        creator_names.dedup();
241        license_names.sort_unstable();
242        license_names.dedup();
243
244        metas.push(TypgFontFaceMatch {
245            source: TypgFontSource {
246                path: path.to_path_buf(),
247                ttc_index,
248            },
249            metadata: TypgFontFaceMeta {
250                names: dedup_names(names, path),
251                axis_tags,
252                feature_tags,
253                script_tags,
254                table_tags,
255                codepoints,
256                is_variable,
257                weight_class,
258                width_class,
259                family_class,
260                creator_names,
261                license_names,
262            },
263        });
264    }
265
266    Ok(metas)
267}
268
269fn collect_tables(font: &FontRef) -> Vec<Tag> {
270    font.table_directory
271        .table_records()
272        .iter()
273        .map(|rec| rec.tag())
274        .collect()
275}
276
277fn collect_axes(font: &FontRef) -> Vec<Tag> {
278    if let Ok(fvar) = font.fvar() {
279        if let Ok(axes) = fvar.axes() {
280            return axes.iter().map(|axis| axis.axis_tag()).collect();
281        }
282    }
283    Vec::new()
284}
285
286fn collect_features(font: &FontRef) -> Vec<Tag> {
287    let mut tags = Vec::new();
288    if let Ok(gsub) = font.gsub() {
289        if let Ok(list) = gsub.feature_list() {
290            tags.extend(list.feature_records().iter().map(|rec| rec.feature_tag()));
291        }
292    }
293    if let Ok(gpos) = font.gpos() {
294        if let Ok(list) = gpos.feature_list() {
295            tags.extend(list.feature_records().iter().map(|rec| rec.feature_tag()));
296        }
297    }
298    tags
299}
300
301fn collect_scripts(font: &FontRef) -> Vec<Tag> {
302    let mut tags = Vec::new();
303    if let Ok(gsub) = font.gsub() {
304        if let Ok(list) = gsub.script_list() {
305            tags.extend(list.script_records().iter().map(|rec| rec.script_tag()));
306        }
307    }
308    if let Ok(gpos) = font.gpos() {
309        if let Ok(list) = gpos.script_list() {
310            tags.extend(list.script_records().iter().map(|rec| rec.script_tag()));
311        }
312    }
313    tags
314}
315
316fn collect_codepoints(font: &SkrifaFontRef) -> Vec<char> {
317    let mut cps = Vec::new();
318    for (cp, _) in font.charmap().mappings() {
319        if let Some(ch) = char::from_u32(cp) {
320            cps.push(ch);
321        }
322    }
323    cps
324}
325
326fn collect_names(font: &FontRef) -> Vec<String> {
327    let mut names = Vec::new();
328
329    if let Ok(name_table) = font.name() {
330        let data = name_table.string_data();
331        let wanted = [
332            NameId::FAMILY_NAME,
333            NameId::TYPOGRAPHIC_FAMILY_NAME,
334            NameId::SUBFAMILY_NAME,
335            NameId::TYPOGRAPHIC_SUBFAMILY_NAME,
336            NameId::FULL_NAME,
337            NameId::POSTSCRIPT_NAME,
338        ];
339
340        for record in name_table.name_record() {
341            if !record.is_unicode() {
342                continue;
343            }
344            if !wanted.contains(&record.name_id()) {
345                continue;
346            }
347            if let Ok(entry) = record.string(data) {
348                let rendered = entry.to_string();
349                if !rendered.trim().is_empty() {
350                    names.push(rendered);
351                }
352            }
353        }
354    }
355
356    names
357}
358
359fn collect_creator_names(font: &FontRef) -> Vec<String> {
360    let mut names = Vec::new();
361
362    if let Ok(name_table) = font.name() {
363        let data = name_table.string_data();
364        let wanted = [
365            NameId::COPYRIGHT_NOTICE,
366            NameId::TRADEMARK,
367            NameId::MANUFACTURER,
368            NameId::DESIGNER,
369            NameId::DESCRIPTION,
370            NameId::VENDOR_URL,
371            NameId::DESIGNER_URL,
372            NameId::LICENSE_DESCRIPTION,
373            NameId::LICENSE_URL,
374        ];
375
376        for record in name_table.name_record() {
377            if !record.is_unicode() {
378                continue;
379            }
380            if !wanted.contains(&record.name_id()) {
381                continue;
382            }
383            if let Ok(entry) = record.string(data) {
384                let rendered = entry.to_string();
385                if !rendered.trim().is_empty() {
386                    names.push(rendered);
387                }
388            }
389        }
390    }
391
392    names
393}
394
395fn collect_license_names(font: &FontRef) -> Vec<String> {
396    let mut names = Vec::new();
397
398    if let Ok(name_table) = font.name() {
399        let data = name_table.string_data();
400        let wanted = [
401            NameId::COPYRIGHT_NOTICE,
402            NameId::LICENSE_DESCRIPTION,
403            NameId::LICENSE_URL,
404        ];
405
406        for record in name_table.name_record() {
407            if !record.is_unicode() {
408                continue;
409            }
410            if !wanted.contains(&record.name_id()) {
411                continue;
412            }
413            if let Ok(entry) = record.string(data) {
414                let rendered = entry.to_string();
415                if !rendered.trim().is_empty() {
416                    names.push(rendered);
417                }
418            }
419        }
420    }
421
422    names
423}
424
425fn collect_classification(font: &FontRef) -> (Option<u16>, Option<u16>, Option<(u8, u8)>) {
426    match font.os2() {
427        Ok(table) => {
428            let raw_family = table.s_family_class() as u16;
429            let class = (raw_family >> 8) as u8;
430            let subclass = (raw_family & 0x00FF) as u8;
431            (
432                Some(table.us_weight_class()),
433                Some(table.us_width_class()),
434                Some((class, subclass)),
435            )
436        }
437        Err(_) => (None, None, None),
438    }
439}
440
441fn sort_matches(matches: &mut [TypgFontFaceMatch]) {
442    matches.sort_by(|a, b| {
443        a.source
444            .path
445            .cmp(&b.source.path)
446            .then_with(|| a.source.ttc_index.cmp(&b.source.ttc_index))
447    });
448}
449
450fn dedup_tags(tags: &mut Vec<Tag>) {
451    tags.sort_unstable();
452    tags.dedup();
453}
454
455fn dedup_codepoints(codepoints: &mut Vec<char>) {
456    codepoints.sort_unstable();
457    codepoints.dedup();
458}
459
460fn dedup_names(mut names: Vec<String>, path: &Path) -> Vec<String> {
461    names.push(
462        path.file_stem()
463            .map(|s| s.to_string_lossy().to_string())
464            .unwrap_or_else(|| path.display().to_string()),
465    );
466
467    for name in names.iter_mut() {
468        *name = name.trim().to_string();
469    }
470
471    names.retain(|n| !n.is_empty());
472    names.sort_unstable();
473    names.dedup();
474    names
475}
476
477fn serialize_tags<S>(tags: &[Tag], serializer: S) -> Result<S::Ok, S::Error>
478where
479    S: serde::Serializer,
480{
481    let as_strings: Vec<String> = tags.iter().copied().map(tag_to_string).collect();
482    as_strings.serialize(serializer)
483}
484
485fn deserialize_tags<'de, D>(deserializer: D) -> Result<Vec<Tag>, D::Error>
486where
487    D: serde::Deserializer<'de>,
488{
489    let raw: Vec<String> = Vec::<String>::deserialize(deserializer)?;
490    raw.into_iter()
491        .map(|s| tag4(&s).map_err(serde::de::Error::custom))
492        .collect()
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498
499    #[test]
500    fn dedup_names_adds_fallback_and_trims() {
501        let names = vec!["  Alpha  ".to_string(), "Alpha".to_string()];
502        let path = Path::new("/fonts/Beta.ttf");
503        let deduped = dedup_names(names, path);
504
505        assert!(
506            deduped.contains(&"Alpha".to_string()),
507            "original names should be trimmed and kept"
508        );
509        assert!(
510            deduped.contains(&"Beta".to_string()),
511            "file stem should be added as fallback name"
512        );
513        assert_eq!(
514            deduped.len(),
515            2,
516            "dedup should remove duplicate entries and empty strings"
517        );
518    }
519
520    #[test]
521    fn dedup_tags_sorts_and_dedups() {
522        let mut tags = vec![
523            tag4("wght").unwrap(),
524            tag4("wght").unwrap(),
525            tag4("GSUB").unwrap(),
526        ];
527        dedup_tags(&mut tags);
528
529        assert_eq!(tags, vec![tag4("GSUB").unwrap(), tag4("wght").unwrap()]);
530    }
531
532    #[test]
533    fn dedup_codepoints_sorts_and_dedups() {
534        let mut cps = vec!['b', 'a', 'b'];
535        dedup_codepoints(&mut cps);
536        assert_eq!(cps, vec!['a', 'b']);
537    }
538}