1use std::collections::BTreeSet;
5use std::path::Path;
6
7use allsorts::binary::read::ReadScope;
8use allsorts::font_data::FontData;
9use allsorts::tables::{FontTableProvider, NameTable, OpenTypeData};
10use ttf_parser::{Face, Permissions, fonts_in_collection, name_id};
11
12use crate::parsers::metadata::ParserMetadata;
13
14pub(crate) const SUPPORTED_FONT_EXTENSIONS: &[&str] =
15 &["ttf", "otf", "woff", "woff2", "eot", "ttc", "otc"];
16pub(crate) const SUPPORTED_FONT_FILE_GLOBS: &[&str] = &[
17 "**/*.ttf",
18 "**/*.otf",
19 "**/*.woff",
20 "**/*.woff2",
21 "**/*.eot",
22 "**/*.ttc",
23 "**/*.otc",
24];
25const OFL_URL_CANONICALIZATIONS: &[(&str, &str)] = &[
26 ("https://scripts.sil.org/OFL/", "http://scripts.sil.org/OFL"),
27 ("https://scripts.sil.org/OFL", "http://scripts.sil.org/OFL"),
28 ("https://openfontlicense.org/", "http://scripts.sil.org/OFL"),
29 ("https://openfontlicense.org", "http://scripts.sil.org/OFL"),
30];
31const ALLSORTS_NAME_TABLE_TAG: u32 = u32::from_be_bytes(*b"name");
32
33pub(crate) static FONT_METADATA: &[ParserMetadata] = &[ParserMetadata {
34 description: "Embedded font legal metadata (native fonts, webfonts, and collections)",
35 file_patterns: SUPPORTED_FONT_FILE_GLOBS,
36 package_type: "",
37 primary_language: "",
38 documentation_url: Some("https://learn.microsoft.com/en-us/typography/opentype/spec/name"),
39}];
40
41pub(crate) fn is_supported_font_extension(extension: &str) -> bool {
42 SUPPORTED_FONT_EXTENSIONS
43 .iter()
44 .any(|supported| supported.eq_ignore_ascii_case(extension))
45}
46
47pub(crate) fn is_supported_font_path(path: &Path) -> bool {
48 path.extension()
49 .and_then(|ext| ext.to_str())
50 .is_some_and(is_supported_font_extension)
51}
52
53pub(crate) fn extract_font_metadata_text(path: &Path, bytes: &[u8]) -> Option<String> {
54 let extension = path.extension().and_then(|ext| ext.to_str())?;
55 let extension = extension.to_ascii_lowercase();
56 if !is_supported_font_extension(&extension) {
57 return None;
58 }
59
60 match extension.as_str() {
61 "ttf" | "otf" | "woff" | "woff2" | "ttc" | "otc" => extract_sfnt_font_metadata_text(
62 bytes,
63 matches!(extension.as_str(), "ttf" | "otf" | "ttc" | "otc"),
64 ),
65 "eot" => extract_eot_metadata_text(bytes),
66 _ => None,
67 }
68}
69
70fn extract_sfnt_font_metadata_text(bytes: &[u8], include_permissions: bool) -> Option<String> {
71 let mut lines = Vec::new();
72 let mut seen = BTreeSet::new();
73
74 for line in extract_allsorts_name_table_lines(bytes) {
75 if seen.insert(line.clone()) {
76 lines.push(line);
77 }
78 }
79
80 if include_permissions {
81 let face_count = fonts_in_collection(bytes).unwrap_or(1);
82 for face_index in 0..face_count {
83 let Some(permissions) = Face::parse(bytes, face_index).ok()?.permissions() else {
84 continue;
85 };
86 let line = format!(
87 "Embedding permissions: {}",
88 font_permission_label(permissions)
89 );
90 if seen.insert(line.clone()) {
91 lines.push(line);
92 }
93 }
94 }
95
96 (!lines.is_empty()).then(|| lines.join("\n"))
97}
98
99fn extract_allsorts_name_table_lines(bytes: &[u8]) -> Vec<String> {
100 let Some(font_data) = ReadScope::new(bytes).read::<FontData<'_>>().ok() else {
101 return Vec::new();
102 };
103
104 let mut lines = Vec::new();
105 let mut seen = BTreeSet::new();
106 for face_index in 0..allsorts_face_count(&font_data) {
107 let Ok(provider) = font_data.table_provider(face_index) else {
108 continue;
109 };
110 let Ok(name_table_data) = provider.read_table_data(ALLSORTS_NAME_TABLE_TAG) else {
111 continue;
112 };
113 let Ok(name_table) = ReadScope::new(name_table_data.as_ref()).read::<NameTable<'_>>()
114 else {
115 continue;
116 };
117
118 for (source_name_id, target_name_id) in [
119 (NameTable::COPYRIGHT_NOTICE, name_id::COPYRIGHT_NOTICE),
120 (NameTable::LICENSE_DESCRIPTION, name_id::LICENSE),
121 (NameTable::LICENSE_INFO_URL, name_id::LICENSE_URL),
122 ] {
123 let Some(value) = name_table.string_for_id(source_name_id) else {
124 continue;
125 };
126 let Some(line) = build_font_metadata_line(target_name_id, value) else {
127 continue;
128 };
129 if seen.insert(line.clone()) {
130 lines.push(line);
131 }
132 }
133 }
134
135 lines
136}
137
138fn allsorts_face_count(font_data: &FontData<'_>) -> usize {
139 match font_data {
140 FontData::OpenType(font) => match &font.data {
141 OpenTypeData::Single(_) => 1,
142 OpenTypeData::Collection(ttc) => ttc.offset_tables.len(),
143 },
144 FontData::Woff(_) => 1,
145 FontData::Woff2(font) => font
146 .collection_directory
147 .as_ref()
148 .map(|directory| directory.fonts().count())
149 .unwrap_or(1),
150 }
151}
152
153fn extract_eot_metadata_text(bytes: &[u8]) -> Option<String> {
154 let text = extract_eot_utf16le_marker_text(bytes).join("\n");
155 if text.is_empty() {
156 return None;
157 }
158
159 let mut lines = Vec::new();
160 let mut seen = BTreeSet::new();
161 for segment in split_eot_legal_metadata_segments(&text) {
162 let normalized = normalize_eot_metadata_segment(&segment);
163 if normalized.is_empty() {
164 continue;
165 }
166 if seen.insert(normalized.clone()) {
167 lines.push(normalized);
168 }
169 }
170
171 (!lines.is_empty()).then(|| lines.join("\n"))
172}
173
174fn extract_eot_utf16le_marker_text(bytes: &[u8]) -> Vec<String> {
175 let mut lines = Vec::new();
176 let mut seen = BTreeSet::new();
177 for marker in [
178 "Copyright",
179 "This Font Software is licensed under",
180 "http://",
181 "https://",
182 ] {
183 let encoded = marker.encode_utf16().collect::<Vec<_>>();
184 let marker_bytes = encoded
185 .iter()
186 .flat_map(|unit| unit.to_le_bytes())
187 .collect::<Vec<_>>();
188 let mut search_start = 0;
189 while let Some(relative_start) = bytes[search_start..]
190 .windows(marker_bytes.len())
191 .position(|window| window == marker_bytes.as_slice())
192 {
193 let start = search_start + relative_start;
194 let decoded = decode_utf16le_ascii_from_offset(bytes, start);
195 if !decoded.is_empty() && seen.insert(decoded.clone()) {
196 lines.push(decoded);
197 }
198 search_start = start + marker_bytes.len();
199 }
200 }
201 lines
202}
203
204fn decode_utf16le_ascii_from_offset(bytes: &[u8], start: usize) -> String {
205 let mut decoded = Vec::new();
206 let mut index = start;
207 while index + 1 < bytes.len() {
208 let lo = bytes[index];
209 let hi = bytes[index + 1];
210 if hi == 0 && (0x20..=0x7E).contains(&lo) {
211 decoded.push(lo);
212 index += 2;
213 continue;
214 }
215 break;
216 }
217 String::from_utf8_lossy(&decoded).into_owned()
218}
219
220fn split_eot_legal_metadata_segments(text: &str) -> Vec<String> {
221 let mut segments = Vec::new();
222
223 if let Some(segment) = extract_text_between_markers(
224 text,
225 "Copyright",
226 &["All Rights Reserved.", "All rights reserved."],
227 ) {
228 segments.push(segment);
229 }
230 if let Some(segment) = extract_text_between_markers(
231 text,
232 "This Font Software is licensed under",
233 &[
234 "governing your use of this Font Software.",
235 "This Font Software.",
236 ],
237 ) {
238 segments.push(segment);
239 }
240 segments.extend(extract_http_segments(text));
241
242 segments
243}
244
245fn extract_text_between_markers(
246 text: &str,
247 start_marker: &str,
248 end_markers: &[&str],
249) -> Option<String> {
250 let start = text.find(start_marker)?;
251 let tail = &text[start..];
252 let end = end_markers
253 .iter()
254 .filter_map(|marker| tail.find(marker).map(|idx| idx + marker.len()))
255 .min()
256 .unwrap_or(tail.len());
257 Some(tail[..end].to_string())
258}
259
260fn extract_http_segments(text: &str) -> Vec<String> {
261 let mut segments = Vec::new();
262 for marker in ["http://", "https://"] {
263 let mut search_start = 0;
264 while let Some(relative_start) = text[search_start..].find(marker) {
265 let start = search_start + relative_start;
266 let tail = &text[start + marker.len()..];
267 let mut end = text.len();
268 for boundary in [
269 "http://",
270 "https://",
271 "This Font Software",
272 "Copyright",
273 "Version ",
274 ] {
275 if let Some(relative_end) = tail.find(boundary) {
276 end = end.min(start + marker.len() + relative_end);
277 }
278 }
279 if let Some(relative_end) = tail.find(char::is_whitespace) {
280 end = end.min(start + marker.len() + relative_end);
281 }
282
283 let segment = text[start..end]
284 .trim_end_matches(&['.', ',', ';', ':'][..])
285 .to_string();
286 if !segment.is_empty() {
287 segments.push(segment);
288 }
289 search_start = end.max(start + marker.len());
290 }
291 }
292 segments
293}
294
295fn normalize_eot_metadata_segment(segment: &str) -> String {
296 let normalized = segment
297 .split_whitespace()
298 .collect::<Vec<_>>()
299 .join(" ")
300 .trim()
301 .to_string();
302
303 if normalized.is_empty() {
304 return normalized;
305 }
306
307 let lowered = normalized.to_ascii_lowercase();
308 if lowered.starts_with("http://") || lowered.starts_with("https://") {
309 return canonicalize_ofl_license_reference_urls(normalized);
310 }
311
312 if lowered.contains("font software") || lowered.contains("open font license") {
313 return canonicalize_ofl_license_reference_urls(normalized);
314 }
315
316 normalized
317}
318
319fn build_font_metadata_line(name_id_value: u16, value: String) -> Option<String> {
320 let value = normalize_font_value(name_id_value, value);
321 if value.is_empty() {
322 return None;
323 }
324
325 if name_id_value == name_id::COPYRIGHT_NOTICE {
326 return Some(value);
327 }
328
329 let label = font_name_label(name_id_value)?;
330 Some(format!("{label}: {value}"))
331}
332
333fn font_name_label(name_id_value: u16) -> Option<&'static str> {
334 match name_id_value {
335 name_id::LICENSE => Some("License Description"),
336 name_id::LICENSE_URL => Some("License Info URL"),
337 _ => None,
338 }
339}
340
341fn normalize_font_value(name_id_value: u16, value: String) -> String {
342 let normalized = value
343 .split_whitespace()
344 .collect::<Vec<_>>()
345 .join(" ")
346 .trim()
347 .to_string();
348
349 match name_id_value {
350 name_id::COPYRIGHT_NOTICE => strip_reserved_font_name_clause(normalized),
351 name_id::LICENSE | name_id::LICENSE_URL => {
352 canonicalize_ofl_license_reference_urls(normalized)
353 }
354 _ => normalized,
355 }
356}
357
358fn strip_reserved_font_name_clause(value: String) -> String {
359 let lower = value.to_ascii_lowercase();
360 for marker in [
361 ", with reserved font name",
362 ", with no reserved font name",
363 " with reserved font name",
364 " with no reserved font name",
365 ] {
366 if let Some(index) = lower.find(marker) {
367 return value[..index]
368 .trim_end_matches(&[',', ';', ':', ' ', '('][..])
369 .trim()
370 .to_string();
371 }
372 }
373
374 value
375}
376
377fn canonicalize_ofl_license_reference_urls(mut value: String) -> String {
378 for (from, to) in OFL_URL_CANONICALIZATIONS {
379 value = value.replace(from, to);
380 }
381 value
382}
383
384fn font_permission_label(permission: Permissions) -> &'static str {
385 match permission {
386 Permissions::Installable => "Installable",
387 Permissions::Restricted => "Restricted",
388 Permissions::PreviewAndPrint => "Preview and Print",
389 Permissions::Editable => "Editable",
390 }
391}
392
393#[cfg(test)]
394mod tests {
395 use std::fs;
396 use std::path::Path;
397
398 use crate::copyright::detect_copyrights;
399 use crate::license_detection::LicenseDetectionEngine;
400 use ttf_parser::name_id;
401
402 use super::{
403 build_font_metadata_line, canonicalize_ofl_license_reference_urls,
404 extract_font_metadata_text,
405 };
406
407 #[test]
408 fn extracts_ofl_metadata_from_lato_font_fixture() {
409 let bytes =
410 fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read lato font fixture");
411
412 let text = extract_font_metadata_text(Path::new("Lato-Bold.ttf"), &bytes)
413 .expect("font metadata text");
414
415 assert!(text.contains("License Description:"), "{text}");
416 assert!(
417 text.contains("Open Font License") || text.contains("OFL"),
418 "{text}"
419 );
420 }
421
422 #[test]
423 fn extracts_apache_metadata_from_underline_test_font_fixture() {
424 let bytes = fs::read("testdata/font-fixtures/UnderlineTest-Close.ttf")
425 .expect("read apache font fixture");
426
427 let text = extract_font_metadata_text(Path::new("UnderlineTest-Close.ttf"), &bytes)
428 .expect("font metadata text");
429
430 assert!(
431 text.contains("License Description:") || text.contains("Copyright"),
432 "{text}"
433 );
434 assert!(
435 text.contains("Apache") || text.contains("http://www.apache.org/licenses"),
436 "{text}"
437 );
438 }
439
440 #[test]
441 fn canonicalizes_ofl_url_variants_in_font_license_metadata() {
442 let canonical = canonicalize_ofl_license_reference_urls(
443 "This license is available with a FAQ at: https://openfontlicense.org/".to_string(),
444 );
445
446 assert_eq!(
447 canonical,
448 "This license is available with a FAQ at: http://scripts.sil.org/OFL"
449 );
450 }
451
452 #[test]
453 fn font_metadata_lines_detect_noto_ofl_text_without_trademark_noise() {
454 let metadata_text = [
455 build_font_metadata_line(
456 name_id::COPYRIGHT_NOTICE,
457 "Copyright 2022 The Noto Project Authors (https://github.com/notofonts/latin-greek-cyrillic)".to_string(),
458 ),
459 build_font_metadata_line(
460 name_id::TRADEMARK,
461 "Noto is a trademark of Google LLC.".to_string(),
462 ),
463 build_font_metadata_line(
464 name_id::LICENSE,
465 "This Font Software is licensed under the SIL Open Font License, Version 1.1. This license is available with a FAQ at: https://scripts.sil.org/OFL".to_string(),
466 ),
467 build_font_metadata_line(
468 name_id::LICENSE_URL,
469 "https://scripts.sil.org/OFL".to_string(),
470 ),
471 ]
472 .into_iter()
473 .flatten()
474 .collect::<Vec<_>>()
475 .join("\n");
476
477 assert!(!metadata_text.contains("Trademark:"), "{metadata_text}");
478 assert!(
479 metadata_text.contains("Copyright 2022 The Noto Project Authors"),
480 "{metadata_text}"
481 );
482 assert!(
483 metadata_text.contains("http://scripts.sil.org/OFL"),
484 "{metadata_text}"
485 );
486
487 let engine = LicenseDetectionEngine::from_embedded().expect("initialize license engine");
488 let detections = engine
489 .detect_with_kind_and_source_with_score(&metadata_text, false, false, "font.ttf", 0.0)
490 .expect("detect licenses from font metadata text");
491
492 assert!(
493 detections.iter().any(|detection| {
494 detection
495 .license_expression_spdx
496 .as_deref()
497 .is_some_and(|expression| expression.contains("OFL-1.1"))
498 }),
499 "detections: {detections:#?}"
500 );
501
502 let (copyrights, holders, _authors) = detect_copyrights(&metadata_text, None);
503 assert!(
504 copyrights.iter().any(|detection| {
505 detection.copyright
506 == "Copyright 2022 The Noto Project Authors (https://github.com/notofonts/latin-greek-cyrillic)"
507 }),
508 "copyrights: {copyrights:#?}"
509 );
510 assert!(
511 holders
512 .iter()
513 .any(|detection| detection.holder == "The Noto Project Authors"),
514 "holders: {holders:#?}"
515 );
516 }
517
518 #[test]
519 fn extracts_metadata_from_sourcecodepro_woff_fixture() {
520 let bytes = fs::read("testdata/font-fixtures/SourceCodePro-Regular.otf.woff")
521 .expect("read woff font fixture");
522
523 let text = extract_font_metadata_text(Path::new("SourceCodePro-Regular.otf.woff"), &bytes)
524 .expect("woff font metadata text");
525
526 assert!(text.contains("Adobe"), "{text}");
527 assert!(
528 text.contains("Open Font License") || text.contains("OFL"),
529 "{text}"
530 );
531 assert!(text.contains("http://scripts.sil.org/OFL"), "{text}");
532 }
533
534 #[test]
535 fn extracts_metadata_from_sourcecodepro_woff2_fixture() {
536 let bytes = fs::read("testdata/font-fixtures/SourceCodePro-Regular.otf.woff2")
537 .expect("read woff2 font fixture");
538
539 let text = extract_font_metadata_text(Path::new("SourceCodePro-Regular.otf.woff2"), &bytes)
540 .expect("woff2 font metadata text");
541
542 assert!(text.contains("Adobe"), "{text}");
543 assert!(
544 text.contains("Open Font License") || text.contains("OFL"),
545 "{text}"
546 );
547 assert!(text.contains("http://scripts.sil.org/OFL"), "{text}");
548 }
549
550 #[test]
551 fn extracts_legal_strings_from_notosans_eot_fixture() {
552 let bytes =
553 fs::read("testdata/font-fixtures/NotoSans-Regular.eot").expect("read eot font fixture");
554
555 let text = extract_font_metadata_text(Path::new("NotoSans-Regular.eot"), &bytes)
556 .expect("eot font metadata text");
557
558 assert!(text.contains("Copyright 2015 Google Inc."), "{text}");
559 assert!(
560 text.contains("This Font Software is licensed under the SIL Open Font License"),
561 "{text}"
562 );
563 assert!(text.contains("http://scripts.sil.org/OFL"), "{text}");
564 }
565
566 #[test]
567 fn wrapped_font_metadata_detects_sourcecodepro_ofl_without_reserved_font_tail() {
568 let bytes = fs::read("testdata/font-fixtures/SourceCodePro-Regular.otf.woff")
569 .expect("read woff font fixture");
570 let metadata_text =
571 extract_font_metadata_text(Path::new("SourceCodePro-Regular.otf.woff"), &bytes)
572 .expect("wrapped font metadata text");
573
574 let engine = LicenseDetectionEngine::from_embedded().expect("initialize license engine");
575 let detections = engine
576 .detect_with_kind_and_source_with_score(&metadata_text, false, false, "font.woff", 0.0)
577 .expect("detect licenses from wrapped font metadata text");
578 assert!(
579 detections.iter().any(|detection| {
580 detection
581 .license_expression_spdx
582 .as_deref()
583 .is_some_and(|expression| expression.contains("OFL-1.1"))
584 }),
585 "detections: {detections:#?}"
586 );
587
588 let (copyrights, holders, _authors) = detect_copyrights(&metadata_text, None);
589 assert!(
590 copyrights.iter().any(|detection| {
591 detection.copyright == "(c) 2023 Adobe (http://www.adobe.com/)"
592 }),
593 "copyrights: {copyrights:#?}"
594 );
595 assert!(
596 holders.iter().any(|detection| detection.holder == "Adobe"),
597 "holders: {holders:#?}"
598 );
599 }
600
601 #[test]
602 fn extracts_metadata_from_ttc_fixture() {
603 let bytes = fs::read("testdata/font-fixtures/TTC.ttc").expect("read ttc font fixture");
604
605 let text = extract_font_metadata_text(Path::new("TTC.ttc"), &bytes)
606 .expect("ttc font metadata text");
607
608 assert!(
609 text.contains("Copyright") || text.contains("License"),
610 "{text}"
611 );
612 assert!(text.contains("No rights reserved"), "{text}");
613 }
614}