Skip to main content

rust_fontconfig/
utils.rs

1use alloc::string::String;
2
3/// Known font file extensions (lowercase).
4pub const FONT_EXTENSIONS: &[&str] = &["ttf", "otf", "ttc", "woff", "woff2", "dfont"];
5
6/// Size (in bytes) of the head/tail samples taken by
7/// [`content_dedup_hash_u64`]. The full hash spans the file size plus
8/// these two samples, so collisions are only possible for files that
9/// agree on size *and* both head + tail windows — adequate for
10/// deduping the same `.ttc` read under different paths without
11/// incurring a full-file walk through mmapped pages.
12pub const CONTENT_DEDUP_SAMPLE_BYTES: usize = 4096;
13
14/// Deterministic 64-bit "cheap" content hash derived from
15/// `(file_size, first 4 KiB, last 4 KiB)`.
16///
17/// Same guarantees as [`content_hash_u64`] — stable across process
18/// runs, usable for the on-disk font cache — but avoids materialising
19/// every page of a multi-megabyte `.ttc` into RSS just to compute a
20/// dedup key. Callers typically have the scout's mmap open and have
21/// already faulted-in the header tables anyway, so the head sample is
22/// free; the tail sample costs at most one extra page fault.
23pub fn content_dedup_hash_u64(bytes: &[u8]) -> u64 {
24    let len = bytes.len();
25    let head_len = len.min(CONTENT_DEDUP_SAMPLE_BYTES);
26    let tail_len = (len - head_len).min(CONTENT_DEDUP_SAMPLE_BYTES);
27    let tail_start = len - tail_len;
28    // Mix size first so two equal head+tail samples with different
29    // lengths produce different hashes.
30    let mut seed_buf = [0u8; 8];
31    seed_buf.copy_from_slice(&(len as u64).to_le_bytes());
32    let seed = content_hash_u64(&seed_buf);
33    let head = content_hash_u64(&bytes[..head_len]);
34    let tail = content_hash_u64(&bytes[tail_start..tail_start + tail_len]);
35    // Combine — wrapping_mul + xor avalanches the three sub-hashes
36    // reasonably without needing a separate mixing function.
37    const K: u64 = 0x9E3779B97F4A7C15;
38    let mut h = seed;
39    h ^= head;
40    h = h.wrapping_mul(K);
41    h ^= tail;
42    h = h.wrapping_mul(K);
43    h ^= h >> 33;
44    h
45}
46
47/// Deterministic 64-bit content hash over an arbitrary byte slice.
48///
49/// Walks every byte — for large font files (`.ttc` can be tens of
50/// MiB) this materialises the whole mmap into RSS, so production
51/// callers that just want a dedup key should prefer the cheaper
52/// [`content_dedup_hash_u64`]. This variant stays as a building
53/// block and for tests that need strict equality.
54///
55/// Not cryptographic. Stable across process runs and across builds —
56/// unlike `std::collections::hash_map::DefaultHasher`, which is
57/// randomised per-process — so hashes can be persisted to the disk
58/// cache. Processes 8 bytes per iteration, trivial no-dep impl.
59pub fn content_hash_u64(bytes: &[u8]) -> u64 {
60    // Golden-ratio multiplier; used by xxhash and others as a simple
61    // avalanche-friendly constant.
62    const K: u64 = 0x9E3779B97F4A7C15;
63
64    let mut h: u64 = K ^ (bytes.len() as u64);
65    let chunks = bytes.chunks_exact(8);
66    let remainder = chunks.remainder();
67    for chunk in chunks {
68        let mut arr = [0u8; 8];
69        arr.copy_from_slice(chunk);
70        let v = u64::from_le_bytes(arr);
71        h = h.wrapping_add(v).wrapping_mul(K);
72        h ^= h >> 33;
73    }
74    // Fold in any 1..7 trailing bytes.
75    let mut tail: u64 = 0;
76    for (i, b) in remainder.iter().enumerate() {
77        tail |= (*b as u64) << (i * 8);
78    }
79    h = h.wrapping_add(tail).wrapping_mul(K);
80    h ^= h >> 33;
81    h = h.wrapping_mul(K);
82    h ^= h >> 33;
83    h
84}
85
86/// Normalize a family/font name for comparison: lowercase, strip all non-alphanumeric characters.
87///
88/// This ensures consistent matching regardless of spaces, hyphens, underscores, or casing.
89pub fn normalize_family_name(name: &str) -> String {
90    name.chars()
91        .filter(|c| c.is_alphanumeric())
92        .map(|c| c.to_ascii_lowercase())
93        .collect()
94}
95
96/// Check if a file has a recognized font extension.
97#[cfg(feature = "std")]
98pub fn is_font_file(path: &std::path::Path) -> bool {
99    path.extension()
100        .and_then(|e| e.to_str())
101        .map(|ext| {
102            let lower = ext.to_lowercase();
103            FONT_EXTENSIONS.contains(&lower.as_str())
104        })
105        .unwrap_or(false)
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn font_extensions_covers_common_formats() {
114        for ext in &["ttf", "otf", "ttc", "woff", "woff2"] {
115            assert!(FONT_EXTENSIONS.contains(ext), "missing extension: {}", ext);
116        }
117    }
118
119    #[cfg(feature = "std")]
120    #[test]
121    fn is_font_file_recognizes_fonts() {
122        use std::path::Path;
123        assert!(is_font_file(Path::new("Arial.ttf")));
124        assert!(is_font_file(Path::new("NotoSans.otf")));
125        assert!(is_font_file(Path::new("Font.TTC"))); // case insensitive
126        assert!(is_font_file(Path::new("web.woff2")));
127    }
128
129    #[cfg(feature = "std")]
130    #[test]
131    fn is_font_file_rejects_non_fonts() {
132        use std::path::Path;
133        assert!(!is_font_file(Path::new("readme.txt")));
134        assert!(!is_font_file(Path::new("image.png")));
135        assert!(!is_font_file(Path::new("no_extension")));
136    }
137}