1use std::collections::BTreeSet;
5use std::path::Path;
6
7use allsorts::binary::read::ReadScope;
8use allsorts::font_data::FontData;
9use allsorts::tables::{FontTableProvider, NameTable, OpenTypeData};
10use ttf_parser::{Face, Permissions, fonts_in_collection, name_id};
11
12pub(crate) const SUPPORTED_FONT_EXTENSIONS: &[&str] =
13 &["ttf", "otf", "woff", "woff2", "eot", "ttc", "otc"];
14pub(crate) const SUPPORTED_FONT_FILE_GLOBS: &[&str] = &[
15 "**/*.ttf",
16 "**/*.otf",
17 "**/*.woff",
18 "**/*.woff2",
19 "**/*.eot",
20 "**/*.ttc",
21 "**/*.otc",
22];
23const OFL_URL_CANONICALIZATIONS: &[(&str, &str)] = &[
24 ("https://scripts.sil.org/OFL/", "http://scripts.sil.org/OFL"),
25 ("https://scripts.sil.org/OFL", "http://scripts.sil.org/OFL"),
26 ("https://openfontlicense.org/", "http://scripts.sil.org/OFL"),
27 ("https://openfontlicense.org", "http://scripts.sil.org/OFL"),
28];
29const ALLSORTS_NAME_TABLE_TAG: u32 = u32::from_be_bytes(*b"name");
30
31crate::register_detection_surface!(
32 "Embedded font legal metadata (native fonts, webfonts, and collections)",
33 SUPPORTED_FONT_FILE_GLOBS,
34 "",
35 "",
36 Some("https://learn.microsoft.com/en-us/typography/opentype/spec/name"),
37);
38
39pub(crate) fn is_supported_font_extension(extension: &str) -> bool {
40 SUPPORTED_FONT_EXTENSIONS
41 .iter()
42 .any(|supported| supported.eq_ignore_ascii_case(extension))
43}
44
45pub(crate) fn is_supported_font_path(path: &Path) -> bool {
46 path.extension()
47 .and_then(|ext| ext.to_str())
48 .is_some_and(is_supported_font_extension)
49}
50
51pub(crate) fn extract_font_metadata_text(path: &Path, bytes: &[u8]) -> Option<String> {
52 let extension = path.extension().and_then(|ext| ext.to_str())?;
53 let extension = extension.to_ascii_lowercase();
54 if !is_supported_font_extension(&extension) {
55 return None;
56 }
57
58 match extension.as_str() {
59 "ttf" | "otf" | "woff" | "woff2" | "ttc" | "otc" => extract_sfnt_font_metadata_text(
60 bytes,
61 matches!(extension.as_str(), "ttf" | "otf" | "ttc" | "otc"),
62 ),
63 "eot" => extract_eot_metadata_text(bytes),
64 _ => None,
65 }
66}
67
68fn extract_sfnt_font_metadata_text(bytes: &[u8], include_permissions: bool) -> Option<String> {
69 let mut lines = Vec::new();
70 let mut seen = BTreeSet::new();
71
72 for line in extract_allsorts_name_table_lines(bytes) {
73 if seen.insert(line.clone()) {
74 lines.push(line);
75 }
76 }
77
78 if include_permissions {
79 let face_count = fonts_in_collection(bytes).unwrap_or(1);
80 for face_index in 0..face_count {
81 let Some(permissions) = Face::parse(bytes, face_index).ok()?.permissions() else {
82 continue;
83 };
84 let line = format!(
85 "Embedding permissions: {}",
86 font_permission_label(permissions)
87 );
88 if seen.insert(line.clone()) {
89 lines.push(line);
90 }
91 }
92 }
93
94 (!lines.is_empty()).then(|| lines.join("\n"))
95}
96
97fn extract_allsorts_name_table_lines(bytes: &[u8]) -> Vec<String> {
98 let Some(font_data) = ReadScope::new(bytes).read::<FontData<'_>>().ok() else {
99 return Vec::new();
100 };
101
102 let mut lines = Vec::new();
103 let mut seen = BTreeSet::new();
104 for face_index in 0..allsorts_face_count(&font_data) {
105 let Ok(provider) = font_data.table_provider(face_index) else {
106 continue;
107 };
108 let Ok(name_table_data) = provider.read_table_data(ALLSORTS_NAME_TABLE_TAG) else {
109 continue;
110 };
111 let Ok(name_table) = ReadScope::new(name_table_data.as_ref()).read::<NameTable<'_>>()
112 else {
113 continue;
114 };
115
116 for (source_name_id, target_name_id) in [
117 (NameTable::COPYRIGHT_NOTICE, name_id::COPYRIGHT_NOTICE),
118 (NameTable::LICENSE_DESCRIPTION, name_id::LICENSE),
119 (NameTable::LICENSE_INFO_URL, name_id::LICENSE_URL),
120 ] {
121 let Some(value) = name_table.string_for_id(source_name_id) else {
122 continue;
123 };
124 let Some(line) = build_font_metadata_line(target_name_id, value) else {
125 continue;
126 };
127 if seen.insert(line.clone()) {
128 lines.push(line);
129 }
130 }
131 }
132
133 lines
134}
135
136fn allsorts_face_count(font_data: &FontData<'_>) -> usize {
137 match font_data {
138 FontData::OpenType(font) => match &font.data {
139 OpenTypeData::Single(_) => 1,
140 OpenTypeData::Collection(ttc) => ttc.offset_tables.len(),
141 },
142 FontData::Woff(_) => 1,
143 FontData::Woff2(font) => font
144 .collection_directory
145 .as_ref()
146 .map(|directory| directory.fonts().count())
147 .unwrap_or(1),
148 }
149}
150
151fn extract_eot_metadata_text(bytes: &[u8]) -> Option<String> {
152 let text = extract_eot_utf16le_marker_text(bytes).join("\n");
153 if text.is_empty() {
154 return None;
155 }
156
157 let mut lines = Vec::new();
158 let mut seen = BTreeSet::new();
159 for segment in split_eot_legal_metadata_segments(&text) {
160 let normalized = normalize_eot_metadata_segment(&segment);
161 if normalized.is_empty() {
162 continue;
163 }
164 if seen.insert(normalized.clone()) {
165 lines.push(normalized);
166 }
167 }
168
169 (!lines.is_empty()).then(|| lines.join("\n"))
170}
171
172fn extract_eot_utf16le_marker_text(bytes: &[u8]) -> Vec<String> {
173 let mut lines = Vec::new();
174 let mut seen = BTreeSet::new();
175 for marker in [
176 "Copyright",
177 "This Font Software is licensed under",
178 "http://",
179 "https://",
180 ] {
181 let encoded = marker.encode_utf16().collect::<Vec<_>>();
182 let marker_bytes = encoded
183 .iter()
184 .flat_map(|unit| unit.to_le_bytes())
185 .collect::<Vec<_>>();
186 let mut search_start = 0;
187 while let Some(relative_start) = bytes[search_start..]
188 .windows(marker_bytes.len())
189 .position(|window| window == marker_bytes.as_slice())
190 {
191 let start = search_start + relative_start;
192 let decoded = decode_utf16le_ascii_from_offset(bytes, start);
193 if !decoded.is_empty() && seen.insert(decoded.clone()) {
194 lines.push(decoded);
195 }
196 search_start = start + marker_bytes.len();
197 }
198 }
199 lines
200}
201
202fn decode_utf16le_ascii_from_offset(bytes: &[u8], start: usize) -> String {
203 let mut decoded = Vec::new();
204 let mut index = start;
205 while index + 1 < bytes.len() {
206 let lo = bytes[index];
207 let hi = bytes[index + 1];
208 if hi == 0 && (0x20..=0x7E).contains(&lo) {
209 decoded.push(lo);
210 index += 2;
211 continue;
212 }
213 break;
214 }
215 String::from_utf8_lossy(&decoded).into_owned()
216}
217
218fn split_eot_legal_metadata_segments(text: &str) -> Vec<String> {
219 let mut segments = Vec::new();
220
221 if let Some(segment) = extract_text_between_markers(
222 text,
223 "Copyright",
224 &["All Rights Reserved.", "All rights reserved."],
225 ) {
226 segments.push(segment);
227 }
228 if let Some(segment) = extract_text_between_markers(
229 text,
230 "This Font Software is licensed under",
231 &[
232 "governing your use of this Font Software.",
233 "This Font Software.",
234 ],
235 ) {
236 segments.push(segment);
237 }
238 segments.extend(extract_http_segments(text));
239
240 segments
241}
242
243fn extract_text_between_markers(
244 text: &str,
245 start_marker: &str,
246 end_markers: &[&str],
247) -> Option<String> {
248 let start = text.find(start_marker)?;
249 let tail = &text[start..];
250 let end = end_markers
251 .iter()
252 .filter_map(|marker| tail.find(marker).map(|idx| idx + marker.len()))
253 .min()
254 .unwrap_or(tail.len());
255 Some(tail[..end].to_string())
256}
257
258fn extract_http_segments(text: &str) -> Vec<String> {
259 let mut segments = Vec::new();
260 for marker in ["http://", "https://"] {
261 let mut search_start = 0;
262 while let Some(relative_start) = text[search_start..].find(marker) {
263 let start = search_start + relative_start;
264 let tail = &text[start + marker.len()..];
265 let mut end = text.len();
266 for boundary in [
267 "http://",
268 "https://",
269 "This Font Software",
270 "Copyright",
271 "Version ",
272 ] {
273 if let Some(relative_end) = tail.find(boundary) {
274 end = end.min(start + marker.len() + relative_end);
275 }
276 }
277 if let Some(relative_end) = tail.find(char::is_whitespace) {
278 end = end.min(start + marker.len() + relative_end);
279 }
280
281 let segment = text[start..end]
282 .trim_end_matches(&['.', ',', ';', ':'][..])
283 .to_string();
284 if !segment.is_empty() {
285 segments.push(segment);
286 }
287 search_start = end.max(start + marker.len());
288 }
289 }
290 segments
291}
292
293fn normalize_eot_metadata_segment(segment: &str) -> String {
294 let normalized = segment
295 .split_whitespace()
296 .collect::<Vec<_>>()
297 .join(" ")
298 .trim()
299 .to_string();
300
301 if normalized.is_empty() {
302 return normalized;
303 }
304
305 let lowered = normalized.to_ascii_lowercase();
306 if lowered.starts_with("http://") || lowered.starts_with("https://") {
307 return canonicalize_ofl_license_reference_urls(normalized);
308 }
309
310 if lowered.contains("font software") || lowered.contains("open font license") {
311 return canonicalize_ofl_license_reference_urls(normalized);
312 }
313
314 normalized
315}
316
317fn build_font_metadata_line(name_id_value: u16, value: String) -> Option<String> {
318 let value = normalize_font_value(name_id_value, value);
319 if value.is_empty() {
320 return None;
321 }
322
323 if name_id_value == name_id::COPYRIGHT_NOTICE {
324 return Some(value);
325 }
326
327 let label = font_name_label(name_id_value)?;
328 Some(format!("{label}: {value}"))
329}
330
331fn font_name_label(name_id_value: u16) -> Option<&'static str> {
332 match name_id_value {
333 name_id::LICENSE => Some("License Description"),
334 name_id::LICENSE_URL => Some("License Info URL"),
335 _ => None,
336 }
337}
338
339fn normalize_font_value(name_id_value: u16, value: String) -> String {
340 let normalized = value
341 .split_whitespace()
342 .collect::<Vec<_>>()
343 .join(" ")
344 .trim()
345 .to_string();
346
347 match name_id_value {
348 name_id::COPYRIGHT_NOTICE => strip_reserved_font_name_clause(normalized),
349 name_id::LICENSE | name_id::LICENSE_URL => {
350 canonicalize_ofl_license_reference_urls(normalized)
351 }
352 _ => normalized,
353 }
354}
355
356fn strip_reserved_font_name_clause(value: String) -> String {
357 let lower = value.to_ascii_lowercase();
358 for marker in [
359 ", with reserved font name",
360 ", with no reserved font name",
361 " with reserved font name",
362 " with no reserved font name",
363 ] {
364 if let Some(index) = lower.find(marker) {
365 return value[..index]
366 .trim_end_matches(&[',', ';', ':', ' ', '('][..])
367 .trim()
368 .to_string();
369 }
370 }
371
372 value
373}
374
375fn canonicalize_ofl_license_reference_urls(mut value: String) -> String {
376 for (from, to) in OFL_URL_CANONICALIZATIONS {
377 value = value.replace(from, to);
378 }
379 value
380}
381
382fn font_permission_label(permission: Permissions) -> &'static str {
383 match permission {
384 Permissions::Installable => "Installable",
385 Permissions::Restricted => "Restricted",
386 Permissions::PreviewAndPrint => "Preview and Print",
387 Permissions::Editable => "Editable",
388 }
389}
390
391#[cfg(test)]
392mod tests {
393 use std::fs;
394 use std::path::Path;
395
396 use crate::copyright::detect_copyrights;
397 use crate::license_detection::LicenseDetectionEngine;
398 use ttf_parser::name_id;
399
400 use super::{
401 build_font_metadata_line, canonicalize_ofl_license_reference_urls,
402 extract_font_metadata_text,
403 };
404
405 #[test]
406 fn extracts_ofl_metadata_from_lato_font_fixture() {
407 let bytes =
408 fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read lato font fixture");
409
410 let text = extract_font_metadata_text(Path::new("Lato-Bold.ttf"), &bytes)
411 .expect("font metadata text");
412
413 assert!(text.contains("License Description:"), "{text}");
414 assert!(
415 text.contains("Open Font License") || text.contains("OFL"),
416 "{text}"
417 );
418 }
419
420 #[test]
421 fn extracts_apache_metadata_from_underline_test_font_fixture() {
422 let bytes = fs::read("testdata/font-fixtures/UnderlineTest-Close.ttf")
423 .expect("read apache font fixture");
424
425 let text = extract_font_metadata_text(Path::new("UnderlineTest-Close.ttf"), &bytes)
426 .expect("font metadata text");
427
428 assert!(
429 text.contains("License Description:") || text.contains("Copyright"),
430 "{text}"
431 );
432 assert!(
433 text.contains("Apache") || text.contains("http://www.apache.org/licenses"),
434 "{text}"
435 );
436 }
437
438 #[test]
439 fn canonicalizes_ofl_url_variants_in_font_license_metadata() {
440 let canonical = canonicalize_ofl_license_reference_urls(
441 "This license is available with a FAQ at: https://openfontlicense.org/".to_string(),
442 );
443
444 assert_eq!(
445 canonical,
446 "This license is available with a FAQ at: http://scripts.sil.org/OFL"
447 );
448 }
449
450 #[test]
451 fn font_metadata_lines_detect_noto_ofl_text_without_trademark_noise() {
452 let metadata_text = [
453 build_font_metadata_line(
454 name_id::COPYRIGHT_NOTICE,
455 "Copyright 2022 The Noto Project Authors (https://github.com/notofonts/latin-greek-cyrillic)".to_string(),
456 ),
457 build_font_metadata_line(
458 name_id::TRADEMARK,
459 "Noto is a trademark of Google LLC.".to_string(),
460 ),
461 build_font_metadata_line(
462 name_id::LICENSE,
463 "This Font Software is licensed under the SIL Open Font License, Version 1.1. This license is available with a FAQ at: https://scripts.sil.org/OFL".to_string(),
464 ),
465 build_font_metadata_line(
466 name_id::LICENSE_URL,
467 "https://scripts.sil.org/OFL".to_string(),
468 ),
469 ]
470 .into_iter()
471 .flatten()
472 .collect::<Vec<_>>()
473 .join("\n");
474
475 assert!(!metadata_text.contains("Trademark:"), "{metadata_text}");
476 assert!(
477 metadata_text.contains("Copyright 2022 The Noto Project Authors"),
478 "{metadata_text}"
479 );
480 assert!(
481 metadata_text.contains("http://scripts.sil.org/OFL"),
482 "{metadata_text}"
483 );
484
485 let engine = LicenseDetectionEngine::from_embedded().expect("initialize license engine");
486 let detections = engine
487 .detect_with_kind_and_source_with_score(&metadata_text, false, false, "font.ttf", 0.0)
488 .expect("detect licenses from font metadata text");
489
490 assert!(
491 detections.iter().any(|detection| {
492 detection
493 .license_expression_spdx
494 .as_deref()
495 .is_some_and(|expression| expression.contains("OFL-1.1"))
496 }),
497 "detections: {detections:#?}"
498 );
499
500 let (copyrights, holders, _authors) = detect_copyrights(&metadata_text, None);
501 assert!(
502 copyrights.iter().any(|detection| {
503 detection.copyright
504 == "Copyright 2022 The Noto Project Authors (https://github.com/notofonts/latin-greek-cyrillic)"
505 }),
506 "copyrights: {copyrights:#?}"
507 );
508 assert!(
509 holders
510 .iter()
511 .any(|detection| detection.holder == "The Noto Project Authors"),
512 "holders: {holders:#?}"
513 );
514 }
515
516 #[test]
517 fn extracts_metadata_from_sourcecodepro_woff_fixture() {
518 let bytes = fs::read("testdata/font-fixtures/SourceCodePro-Regular.otf.woff")
519 .expect("read woff font fixture");
520
521 let text = extract_font_metadata_text(Path::new("SourceCodePro-Regular.otf.woff"), &bytes)
522 .expect("woff font metadata text");
523
524 assert!(text.contains("Adobe"), "{text}");
525 assert!(
526 text.contains("Open Font License") || text.contains("OFL"),
527 "{text}"
528 );
529 assert!(text.contains("http://scripts.sil.org/OFL"), "{text}");
530 }
531
532 #[test]
533 fn extracts_metadata_from_sourcecodepro_woff2_fixture() {
534 let bytes = fs::read("testdata/font-fixtures/SourceCodePro-Regular.otf.woff2")
535 .expect("read woff2 font fixture");
536
537 let text = extract_font_metadata_text(Path::new("SourceCodePro-Regular.otf.woff2"), &bytes)
538 .expect("woff2 font metadata text");
539
540 assert!(text.contains("Adobe"), "{text}");
541 assert!(
542 text.contains("Open Font License") || text.contains("OFL"),
543 "{text}"
544 );
545 assert!(text.contains("http://scripts.sil.org/OFL"), "{text}");
546 }
547
548 #[test]
549 fn extracts_legal_strings_from_notosans_eot_fixture() {
550 let bytes =
551 fs::read("testdata/font-fixtures/NotoSans-Regular.eot").expect("read eot font fixture");
552
553 let text = extract_font_metadata_text(Path::new("NotoSans-Regular.eot"), &bytes)
554 .expect("eot font metadata text");
555
556 assert!(text.contains("Copyright 2015 Google Inc."), "{text}");
557 assert!(
558 text.contains("This Font Software is licensed under the SIL Open Font License"),
559 "{text}"
560 );
561 assert!(text.contains("http://scripts.sil.org/OFL"), "{text}");
562 }
563
564 #[test]
565 fn wrapped_font_metadata_detects_sourcecodepro_ofl_without_reserved_font_tail() {
566 let bytes = fs::read("testdata/font-fixtures/SourceCodePro-Regular.otf.woff")
567 .expect("read woff font fixture");
568 let metadata_text =
569 extract_font_metadata_text(Path::new("SourceCodePro-Regular.otf.woff"), &bytes)
570 .expect("wrapped font metadata text");
571
572 let engine = LicenseDetectionEngine::from_embedded().expect("initialize license engine");
573 let detections = engine
574 .detect_with_kind_and_source_with_score(&metadata_text, false, false, "font.woff", 0.0)
575 .expect("detect licenses from wrapped font metadata text");
576 assert!(
577 detections.iter().any(|detection| {
578 detection
579 .license_expression_spdx
580 .as_deref()
581 .is_some_and(|expression| expression.contains("OFL-1.1"))
582 }),
583 "detections: {detections:#?}"
584 );
585
586 let (copyrights, holders, _authors) = detect_copyrights(&metadata_text, None);
587 assert!(
588 copyrights.iter().any(|detection| {
589 detection.copyright == "(c) 2023 Adobe (http://www.adobe.com/)"
590 }),
591 "copyrights: {copyrights:#?}"
592 );
593 assert!(
594 holders.iter().any(|detection| detection.holder == "Adobe"),
595 "holders: {holders:#?}"
596 );
597 }
598
599 #[test]
600 fn extracts_metadata_from_ttc_fixture() {
601 let bytes = fs::read("testdata/font-fixtures/TTC.ttc").expect("read ttc font fixture");
602
603 let text = extract_font_metadata_text(Path::new("TTC.ttc"), &bytes)
604 .expect("ttc font metadata text");
605
606 assert!(
607 text.contains("Copyright") || text.contains("License"),
608 "{text}"
609 );
610 assert!(text.contains("No rights reserved"), "{text}");
611 }
612}