use super::*;
use std::fs;
use std::path::PathBuf;
#[test]
fn test_copyright_prefix_preserved_with_unicode_symbol() {
let input = "Copyright \u{00A9} 1998 Tom Tromey";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.starts_with("Copyright")),
"Should preserve 'Copyright' prefix with \u{00A9} symbol, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_drop_shadowed_year_only_prefix_same_start_line() {
let mut copyrights = vec![
CopyrightDetection {
copyright: "(c) 2001".to_string(),
start_line: 5,
end_line: 5,
},
CopyrightDetection {
copyright: "(c) 2001 Foo Bar".to_string(),
start_line: 5,
end_line: 5,
},
];
drop_shadowed_year_only_copyright_prefixes_same_start_line(&mut copyrights);
assert!(
!copyrights.iter().any(|c| c.copyright == "(c) 2001"),
"should drop year-only prefix when longer exists: {copyrights:?}"
);
}
#[test]
fn test_multiline_c_style_holder_name_not_truncated() {
let input = "*\n\
* Copyright (c) The International Cooperation for the Integration of \n\
* Processes in Prepress, Press and Postpress (CIP4). All rights \n\
* reserved.\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights.iter().any(|c| c.copyright
== "Copyright (c) The International Cooperation for the Integration of Processes in Prepress, Press and Postpress (CIP4)"),
"copyrights: {:?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders.iter().any(|h| h.holder
== "The International Cooperation for the Integration of Processes in Prepress, Press and Postpress (CIP4)"),
"holders: {:?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_multiline_leading_dash_suffix_is_extended() {
let input = "Copyright 1998-2010 AOL Inc.\n - Apache\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright 1998-2010 AOL Inc. - Apache"),
"copyrights: {:?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders.iter().any(|h| h.holder == "AOL Inc. - Apache"),
"holders: {:?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_obfuscated_angle_email_is_kept_in_copyright() {
let input = "(C)opyright MMIV-MMV Anselm R. Garbe <garbeam at gmail dot com>";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights.iter().any(|c| {
c.copyright == "Copyright (c) MMIV-MMV Anselm R. Garbe garbeam at gmail dot com"
}),
"copyrights: {:?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders
.iter()
.any(|h| h.holder == "MMIV-MMV Anselm R. Garbe"),
"holders: {:?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_dash_obfuscated_email_is_kept_in_copyright() {
let input = "Copyright (c) 2005, 2006 Nick Galbreath -- nickg [at] modp [dot] com";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights.iter().any(|c| {
c.copyright == "Copyright (c) 2005, 2006 Nick Galbreath - nickg at modp dot com"
}),
"copyrights: {:?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders.iter().any(|h| h.holder == "Nick Galbreath"),
"holders: {:?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_trailing_copy_year_suffix_is_kept() {
let input = "Copyright base-x contributors (c) 2016";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright base-x contributors (c) 2016"),
"copyrights: {:?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders.iter().any(|h| h.holder == "base-x contributors"),
"holders: {:?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_author_prefix_dedup_keeps_short_email_list() {
let input = "Author(s): gthomas, sorin@netappi.com\nContributors: gthomas, sorin@netappi.com, andrew.lunn@ascom.ch\n";
let (_c, _h, authors) = detect_copyrights_from_text(input);
let vals: Vec<&str> = authors.iter().map(|a| a.author.as_str()).collect();
assert!(
vals.contains(&"gthomas, sorin@netappi.com"),
"authors: {vals:?}"
);
assert!(
vals.contains(&"gthomas, sorin@netappi.com, andrew.lunn@ascom.ch"),
"authors: {vals:?}"
);
}
#[test]
fn test_added_copyright_year_for_line_is_extracted() {
let input = "Added the Copyright year (2020) for A11yance";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright year (2020) for A11yance"),
"copyrights: {:?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders.iter().any(|h| h.holder == "A11yance"),
"holders: {:?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_was_developed_by_multiline_author_is_extracted() {
let input = "1. GOST R 34.11-2012 was developed by the Center for Information\nProtection and Special Communications of the Federal Security\nService of the Russian Federation with participation of the Open\n";
let (_copyrights, _holders, authors) = detect_copyrights_from_text(input);
assert!(
authors.iter().any(|a| {
a.author
== "the Center for Information Protection and Special Communications of the Federal Security Service of the Russian Federation"
}),
"authors: {:?}",
authors.iter().map(|a| &a.author).collect::<Vec<_>>()
);
}
#[test]
fn test_written_by_author_email_for_project_is_extracted() {
let input = "Written by Andy Polyakov <appro@openssl.org> for the OpenSSL\nproject.";
let (_copyrights, _holders, authors) = detect_copyrights_from_text(input);
assert!(
authors
.iter()
.any(|a| a.author == "Andy Polyakov <appro@openssl.org>"),
"authors: {:?}",
authors.iter().map(|a| &a.author).collect::<Vec<_>>()
);
}
#[test]
fn test_w3c_registered_holder_is_extracted() {
let input = "This software includes material\n\
copied from [title]. Copyright ©\n\
[YEAR] W3C® (MIT, ERCIM, Keio, Beihang).";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| { c.copyright == "Copyright (c) YEAR W3C(r) (MIT, ERCIM, Keio, Beihang)" }),
"copyrights: {:?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders
.iter()
.any(|h| h.holder == "W3C(r) (MIT, ERCIM, Keio, Beihang)"),
"holders: {:?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_developed_by_sentence_author_is_extracted() {
let input = "developed by the U.S. Government. BAE Systems is enhancing and supporting the SMP";
let (_copyrights, _holders, authors) = detect_copyrights_from_text(input);
assert!(
authors
.iter()
.any(|a| a.author == "the U.S. Government. BAE Systems"),
"authors: {:?}",
authors.iter().map(|a| &a.author).collect::<Vec<_>>()
);
}
#[test]
fn test_developed_by_phrase_author_is_extracted() {
let input = "to acknowledge that it was\n\
developed by the National Center for Supercomputing Applications at the University of Illinois at Urbana-Champaign and to credit the\n\
contributors.";
let (_copyrights, _holders, authors) = detect_copyrights_from_text(input);
assert!(
authors.iter().any(|a| {
a.author
== "the National Center for Supercomputing Applications at the University of Illinois at Urbana-Champaign"
}),
"authors: {:?}",
authors.iter().map(|a| &a.author).collect::<Vec<_>>()
);
}
#[test]
fn test_copyright_prefix_preserved_multiline_debian() {
let input = "Copyright:\n\n Copyright \u{00A9} 1999-2009 Red Hat, Inc.\n Copyright \u{00A9} 1998 Tom Tromey\n Copyright \u{00A9} 1999 Free Software Foundation, Inc.";
let (c, _h, _a) = detect_copyrights_from_text(input);
let missing: Vec<_> = c
.iter()
.filter(|cr| !cr.copyright.starts_with("Copyright"))
.map(|cr| &cr.copyright)
.collect();
assert!(
missing.is_empty(),
"All copyrights should start with 'Copyright', but these don't: {:?}",
missing
);
}
#[test]
fn test_copyright_prefix_preserved_with_html_tags() {
let input = " Copyright \u{00A9} 1998 <s>Tom Tromey</s>\n Copyright \u{00A9} 1999 <s>Free Software Foundation, Inc.</s>";
let (c, _h, _a) = detect_copyrights_from_text(input);
let missing: Vec<_> = c
.iter()
.filter(|cr| !cr.copyright.starts_with("Copyright"))
.map(|cr| &cr.copyright)
.collect();
assert!(
missing.is_empty(),
"All copyrights should start with 'Copyright', but these don't: {:?}",
missing
);
}
#[test]
fn test_copyright_prefix_preserved_debian_copyright_header() {
let input = "Copyright:\n\n\tCopyright (C) 1998-2005 <s>Oliver Rauch</s>";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.starts_with("Copyright")),
"Should preserve 'Copyright' prefix after 'Copyright:' header, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_copyright_prefix_preserved_multi_copyright_block() {
let input = "Copyright:\n Copyright \u{00A9} 1999-2009 <s>Red Hat, Inc.</s>\n Copyright \u{00A9} 1998 <s>Tom Tromey</s>\n Copyright \u{00A9} 1999 <s>Free Software Foundation, Inc.</s>\n Copyright \u{00A9} 2003 <s>Sun Microsystems, Inc.</s>";
let (c, _h, _a) = detect_copyrights_from_text(input);
let missing: Vec<_> = c
.iter()
.filter(|cr| !cr.copyright.starts_with("Copyright"))
.map(|cr| &cr.copyright)
.collect();
assert!(
missing.is_empty(),
"All copyrights should start with 'Copyright', but these don't: {:?}",
missing
);
}
#[test]
fn test_detect_html_multiline_copyright_keeps_copyright_word() {
let input = "<li><p class=\"Legal\" style=\"margin-left: 0pt;\">Copyright \u{00A9} 2002-2009 \n\t Charlie Poole</p></li>";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2002-2009 Charlie Poole"),
"Expected merged Copyright (c) statement, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_lua_org_puc_rio_not_truncated() {
let content = "Copyright © 1994-2011 Lua.org, PUC-Rio\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|s| s.contains("Lua.org") && s.contains("PUC-Rio")),
"copyrights: {cr:#?}"
);
assert!(
hs.iter()
.any(|s| s.contains("Lua.org") && s.contains("PUC-Rio")),
"holders: {hs:#?}"
);
}
#[test]
fn test_detect_copyright_or_copr_without_year() {
let content = "Copyright or Copr. CNRS\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter().any(|s| s == "Copyright or Copr. CNRS"),
"copyrights: {cr:#?}"
);
assert!(hs.iter().any(|s| s == "CNRS"), "holders: {hs:#?}");
}
#[test]
fn test_detect_copr_with_multiple_dash_segments_not_truncated() {
let content = "Copyright or Copr. 2006 INRIA - CIRAD - INRA\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter().any(|s| s == "Copr. 2006 INRIA - CIRAD - INRA"),
"copyrights: {cr:#?}"
);
assert!(
!cr.iter().any(|s| s == "Copr. 2006 INRIA - CIRAD"),
"copyrights: {cr:#?}"
);
assert!(
hs.iter().any(|s| s == "INRIA - CIRAD - INRA"),
"holders: {hs:#?}"
);
assert!(!hs.iter().any(|s| s == "INRIA - CIRAD"), "holders: {hs:#?}");
}
#[test]
fn test_detect_lppl_single_copyright_line() {
let content = "Copyright 2003 Name\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens.clone())
};
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter().any(|s| s == "Copyright 2003 Name"),
"groups: {groups:#?}\n\ntokens: {tokens:#?}\n\ntree: {tree:#?}\n\ncopyrights: {cr:#?}"
);
assert!(hs.iter().any(|s| s == "Name"), "holders: {hs:#?}");
}
#[test]
fn test_detect_person_name_with_middle_initial() {
let content = "Copyright (c) 2004, Richard S. Hall\n";
let (_copyrights, holders, _authors) = detect_copyrights_from_text(content);
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
hs.iter().any(|s| s == "Richard S. Hall"),
"holders: {hs:#?}"
);
}
#[test]
fn test_detect_hall_copyright_fixture_contains_richard_s_hall_holder() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/hall-copyright.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
hs.iter().any(|s| s == "Richard S. Hall"),
"copyrights: {cr:#?}\n\nholders: {hs:#?}"
);
}
#[test]
fn test_math_c_fixture_restores_angle_email_holders_for_modified_by_lines() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/math.c");
let content = fs::read_to_string(&path).expect("read fixture");
let (_copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let debug: Vec<String> = holders
.iter()
.map(|h| format!("{} [{}-{}]", h.holder, h.start_line, h.end_line))
.collect();
assert!(
holders
.iter()
.any(|h| h.holder == "Paul Mundt <lethal@linux-sh.org>"),
"holders: {debug:#?}"
);
assert!(
holders
.iter()
.any(|h| h.holder == "Vladimir Oleynik <dzo@simtreas.ru>"),
"holders: {debug:#?}"
);
}
#[test]
fn test_andre_darcy_fixture_extracts_modifications_copyright_by_line() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/andre_darcy-c.c");
let content = fs::read_to_string(&path).expect("read fixture");
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
assert!(
copyrights.iter().any(|c| {
c.copyright == "copyright 1997, 1998, 1999 by D'Arcy J.M. Cain (darcy@druid.net)"
}),
"copyrights: {:#?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders.iter().any(|h| h.holder == "D'Arcy J.M. Cain"),
"holders: {:#?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
assert!(
!copyrights
.iter()
.any(|c| c.copyright == "copyright 1997, 1998, 1999"),
"copyrights: {:#?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_licco_fixture_merges_author_and_author_email_metadata() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/licco.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let (_copyrights, _holders, authors) = detect_copyrights_from_text(&content);
assert!(
authors
.iter()
.any(|a| { a.author == "Hartmut Goebel Author-email h.goebel@crazy-compilers.com" }),
"authors: {:#?}",
authors.iter().map(|a| &a.author).collect::<Vec<_>>()
);
assert!(
!authors.iter().any(|a| a.author == "Hartmut Goebel"),
"authors: {:#?}",
authors.iter().map(|a| &a.author).collect::<Vec<_>>()
);
assert!(
!authors
.iter()
.any(|a| a.author == "Author-email h.goebel@crazy-compilers.com"),
"authors: {:#?}",
authors.iter().map(|a| &a.author).collect::<Vec<_>>()
);
}
#[test]
fn test_libcompress_raw_zlib_perl_fixture_does_not_merge_debian_copyright_lines() {
let path = PathBuf::from(
"testdata/copyright-golden/copyrights/libcompress_raw_zlib_perl-libcompress_raw_zlib_perl.copyright",
);
let content = fs::read_to_string(&path).expect("read fixture");
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let cr: Vec<&str> = copyrights.iter().map(|c| c.copyright.as_str()).collect();
let hs: Vec<&str> = holders.iter().map(|h| h.holder.as_str()).collect();
assert!(
cr.contains(&"Copyright 1995-2005, Jean-loup Gailly <jloup@gzip.org>"),
"copyrights: {cr:#?}"
);
assert!(
cr.contains(&"Copyright 1995-2005, Mark Adler <madler@alumni.caltech.edu>"),
"copyrights: {cr:#?}"
);
assert!(
!cr.contains(
&"Jean-loup Gailly <jloup@gzip.org> Copyright 1995-2005, Mark Adler <madler@alumni.caltech.edu>"
),
"copyrights: {cr:#?}"
);
assert!(hs.contains(&"Jean-loup Gailly"), "holders: {hs:#?}");
assert!(hs.contains(&"Mark Adler"), "holders: {hs:#?}");
}
#[test]
fn test_libopenraw_fixture_does_not_merge_multiple_debian_copyrights() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/libopenraw1-libopenraw.label");
let content = fs::read_to_string(&path).expect("read fixture");
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let cr: Vec<&str> = copyrights.iter().map(|c| c.copyright.as_str()).collect();
let hs: Vec<&str> = holders.iter().map(|h| h.holder.as_str()).collect();
assert!(
cr.contains(&"(c) 1994, Kongji Huang and Brian C. Smith, Cornell University"),
"copyrights: {cr:#?}"
);
assert!(
cr.contains(&"(c) 2001, Lutz M\u{00fc}ller <lutz@users.sourceforge.net>"),
"copyrights: {cr:#?}"
);
assert!(
cr.contains(&"Copyright (c) 2006, Hubert Figuiere <hub@figuiere.net>"),
"copyrights: {cr:#?}"
);
assert!(
!cr.contains(
&"Hubert Figuiere <hub@figuiere.net> (c) 1994, Kongji Huang and Brian C. Smith, Cornell University"
),
"copyrights: {cr:#?}"
);
assert!(
!cr.contains(
&"Hubert Figuiere <hub@figuiere.net> (c) 2001, Lutz M\u{00fc}ller <lutz@users.sourceforge.net>"
),
"copyrights: {cr:#?}"
);
assert!(
hs.contains(&"Kongji Huang and Brian C. Smith, Cornell University"),
"holders: {hs:#?}"
);
assert!(hs.contains(&"Lutz M\u{00fc}ller"), "holders: {hs:#?}");
assert!(hs.contains(&"Hubert Figuiere"), "holders: {hs:#?}");
}
#[test]
fn test_pre_name_fixture_does_not_restore_angle_email_holders() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/misco4/linux3/pre-name.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let (_copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let hs: Vec<String> = holders.iter().map(|h| h.holder.clone()).collect();
assert!(hs.iter().any(|h| h == "Paul Mundt"), "holders: {hs:#?}");
assert!(
hs.iter().any(|h| h == "Vladimir Oleynik"),
"holders: {hs:#?}"
);
assert!(
!hs.iter().any(|h| h.contains("<lethal@linux-sh.org>")),
"holders: {hs:#?}"
);
assert!(
!hs.iter().any(|h| h.contains("<dzo@simtreas.ru>")),
"holders: {hs:#?}"
);
}
#[test]
fn test_with_trailing_software_fixture_does_not_append_software_to_holder() {
let path =
PathBuf::from("testdata/copyright-golden/copyrights/copytest/with_trailing_software.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let (_copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let hs: Vec<String> = holders.iter().map(|h| h.holder.clone()).collect();
assert!(hs.iter().any(|h| h == "Ian F. Darwin"), "holders: {hs:#?}");
assert!(
!hs.iter().any(|h| h == "Ian F. Darwin Software"),
"holders: {hs:#?}"
);
}
#[test]
fn test_copyright_span_does_not_absorb_following_author_line() {
let input = "Copyright (c) Ian F. Darwin 1986\nSoftware written by Ian F. Darwin and others;";
let (_c, holders, _authors) = detect_copyrights_from_text(input);
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(hs.iter().any(|h| h == "Ian F. Darwin"), "holders: {hs:#?}");
assert!(
!hs.iter().any(|h| h == "Ian F. Darwin Software"),
"holders: {hs:#?}"
);
}
#[test]
fn test_multilines_fixture_detects_split_copyright_by_holder() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/misco4/linux4/multilines.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let (copyrights, holders, authors) = detect_copyrights_from_text(&content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|c| c == "copyright by the University of Cambridge, England"),
"copyrights: {cr:#?}\n\nholders: {hs:#?}"
);
assert!(
hs.iter()
.any(|h| h == "the University of Cambridge, England"),
"holders: {hs:#?}"
);
let as_: Vec<String> = authors.into_iter().map(|a| a.author).collect();
assert!(
as_.iter().any(|a| a == "Philip Hazel, and"),
"authors: {as_:#?}"
);
}
#[test]
fn test_extract_from_tree_nodes_builds_hall_holder_tokens() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/hall-copyright.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let group = groups
.iter()
.find(|g| {
g.iter()
.any(|(_ln, l)| l.contains("Richard") && l.contains("Hall"))
})
.expect("group containing Richard Hall");
let tokens = get_tokens(group);
let tree = parse(tokens);
let mut debug_lines: Vec<String> = Vec::new();
for (i, node) in tree.iter().enumerate() {
let leaves = collect_all_leaves(node);
let line = leaves.first().map(|t| t.start_line).unwrap_or(0);
let has_2004 = leaves
.iter()
.any(|t| t.tag == PosTag::Yr && t.value.starts_with("2004"));
let preview = leaves
.iter()
.take(8)
.map(|t| t.value.as_str())
.collect::<Vec<_>>()
.join(" ");
if has_2004 {
debug_lines.push(format!(
"idx={i} label={:?} line={line} preview={preview:?}",
node.label()
));
}
}
let mut hall_idx: Option<usize> = None;
for (i, node) in tree.iter().enumerate() {
let leaves = collect_all_leaves(node);
let has_2004 = leaves
.iter()
.any(|t| t.tag == PosTag::Yr && t.value.starts_with("2004"));
if !has_2004 {
continue;
}
let has_richard = leaves.iter().any(|t| t.value == "Richard");
let has_hall = leaves.iter().any(|t| t.value == "Hall");
if has_richard && has_hall {
hall_idx = Some(i);
break;
}
}
let hall_idx = hall_idx
.unwrap_or_else(|| panic!("hall node not found. nodes-with-2004: {debug_lines:#?}"));
let hall_node = &tree[hall_idx];
let (trailing_tokens, _skip) = collect_trailing_orphan_tokens(hall_node, &tree, hall_idx + 1);
let copy_line = collect_all_leaves(hall_node)
.iter()
.filter(|t| t.tag == PosTag::Copy && t.value.eq_ignore_ascii_case("copyright"))
.map(|t| t.start_line)
.min();
let keep_prefix_lines = copy_line
.map(|cl| signal_lines_before_copy_line(hall_node, cl))
.unwrap_or_default();
let node_holder_leaves =
collect_holder_filtered_leaves(hall_node, NON_HOLDER_LABELS, NON_HOLDER_POS_TAGS);
let mut holder_tokens: Vec<&Token> = Vec::new();
let mut node_holder_leaves = strip_all_rights_reserved(node_holder_leaves);
if let Some(copy_line) = copy_line {
node_holder_leaves
.retain(|t| t.start_line >= copy_line || keep_prefix_lines.contains(&t.start_line));
}
holder_tokens.extend(node_holder_leaves);
holder_tokens.extend(&trailing_tokens);
let holder_string = normalize_whitespace(&tokens_to_string(&holder_tokens));
let refined = refine_holder_in_copyright_context(&holder_string);
assert_eq!(
refined.as_deref(),
Some("Richard S. Hall"),
"idx={hall_idx} holder_string={holder_string:?} trailing={:?} node={hall_node:#?}",
trailing_tokens
.iter()
.map(|t| t.value.as_str())
.collect::<Vec<_>>()
);
}
#[test]
fn test_detect_hisax_debug_fixture_holder_phrase() {
let path = PathBuf::from(
"testdata/copyright-golden/copyrights/misco4/linux-copyrights/drivers/isdn/hisax/hisax_debug.h",
);
let content = fs::read_to_string(&path).expect("read fixture");
let (_copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
hs.iter().any(|s| s == "Frode Isaksen by Frode Isaksen"),
"holders: {hs:#?}"
);
}
#[test]
fn test_holder_super_fixture_drops_trailing_comma_before_company_line() {
let path = PathBuf::from("testdata/copyright-golden/holders/holder_super_c-c.c");
let content = fs::read_to_string(&path).expect("read fixture");
let (_copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
hs.iter().any(|s| s == "Benjamin Herrenschmuidt IBM Corp."),
"holders: {hs:#?}"
);
assert!(
!hs.iter().any(|s| s == "Benjamin Herrenschmuidt, IBM Corp."),
"holders: {hs:#?}"
);
}
#[test]
fn test_somefile_cpp_fixture_extracts_licensed_material_copyright() {
let path =
PathBuf::from("testdata/copyright-golden/holders/holder_somefile_cpp-somefile_cpp.cpp");
let content = fs::read_to_string(&path).expect("read fixture");
let (copyrights, _holders, _authors) = detect_copyrights_from_text(&content);
let cs: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
assert!(
cs.iter().any(|s| s == "Foobar Company, (c) 2005"),
"copyrights: {cs:#?}"
);
}
#[test]
fn test_device_tree_fixture_extracts_authors_block() {
let path = PathBuf::from("testdata/copyright-golden/authors/device_tree.c");
let content = fs::read_to_string(&path).expect("read fixture");
let (c, _h, authors) = detect_copyrights_from_text(&content);
let authors: Vec<String> = authors.into_iter().map(|a| a.author).collect();
assert!(
authors
.iter()
.any(|a| a
== "Jerone Young <jyoung5@us.ibm.com> Hollis Blanchard <hollisb@us.ibm.com>"),
"authors: {authors:#?}\n\ncopyrights: {c:#?}"
);
}
#[test]
fn test_pata_ali_fixture_preserves_maintainer_suffix() {
let path = PathBuf::from(
"testdata/copyright-golden/copyrights/misco4/linux-copyrights/drivers/ata/pata_ali.c",
);
let content = fs::read_to_string(&path).expect("read fixture");
let raw_line = " * Copyright (C) 1998-2000 Michel Aubry, Maintainer";
let prepared = crate::copyright::prepare::prepare_text_line(raw_line);
assert!(prepared.contains("Maintainer"), "prepared: {prepared}");
let maint_tokens = get_tokens(&[(1, prepared.clone())]);
assert!(
maint_tokens
.iter()
.any(|t| t.value.eq_ignore_ascii_case("Maintainer") && t.tag != PosTag::Junk),
"maintainer tokens: {maint_tokens:#?}"
);
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let cs: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cs.iter()
.any(|c| c == "Copyright (c) 1998-2000 Michel Aubry, Maintainer"),
"copyrights: {cs:#?}\n\nholders: {hs:#?}"
);
assert!(
hs.iter().any(|h| h == "Michel Aubry, Maintainer"),
"copyrights: {cs:#?}\n\nholders: {hs:#?}"
);
}
#[test]
fn test_detect_misc_linux_fixture_tieto_holder() {
let path =
PathBuf::from("testdata/copyright-golden/copyrights/misco4/more-linux/misc-linux.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let (_copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(hs.iter().any(|s| s == "Tieto Poland"), "holders: {hs:#?}");
}
#[test]
fn test_detect_notice_txt_fixture_bare_c_year_range_suffix() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/notice_txt-NOTICE.txt");
let content = fs::read_to_string(&path).expect("read fixture");
let (copyrights, _holders, _authors) = detect_copyrights_from_text(&content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
assert!(
cr.iter().any(|s| s == "(c) 2001-2004"),
"copyrights: {cr:#?}"
);
}
#[test]
fn test_detect_arch_floppy_h_bare_1995_dropped_for_x86() {
let content =
"* Copyright (C) 1995\n */\n#ifndef _ASM_X86_FLOPPY_H\n#define _ASM_X86_FLOPPY_H\n";
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
assert!(copyrights.is_empty());
}
#[test]
fn test_detect_arch_floppy_h_bare_1995_kept_for_alpha() {
let content =
"* Copyright (C) 1995\n */\n#ifndef __ASM_ALPHA_FLOPPY_H\n#define __ASM_ALPHA_FLOPPY_H\n";
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
assert!(
copyrights
.iter()
.any(|c| c.copyright.eq_ignore_ascii_case("Copyright (c) 1995"))
);
}
#[test]
fn test_detect_changelog_timestamp_copyright_and_holder() {
let content = "2008-01-26 11:46 vruppert\n\n2002-09-08 21:14 vruppert\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|s| s == "copyright 2008-01-26 11:46 vruppert")
);
assert!(hs.iter().any(|s| s == "vruppert"));
}
#[test]
fn test_detect_changelog_single_timestamp_is_ignored() {
let content = "updated year in copyright\n\n2008-01-26 11:46 vruppert\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(copyrights.is_empty());
assert!(holders.is_empty());
}
#[test]
fn test_drop_obfuscated_email_year_only_copyright() {
let content = "Copyright (C) 2008 <srinivasa.deevi at conexant dot com>\n";
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
assert!(copyrights.is_empty());
}
#[test]
fn test_extract_parenthesized_copyright_notice() {
let content = "an appropriate copyright notice (3dfx Interactive, Inc. 1999), a notice\n";
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
assert!(
cr.iter()
.any(|s| s == "copyright notice (3dfx Interactive, Inc. 1999)")
);
}
#[test]
fn test_glide_3dfx_copyright_notice_does_not_trigger_for_notice_s_plural() {
let content = "copyright notice(s)\n";
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
assert!(!copyrights.iter().any(|c| {
c.copyright
.to_ascii_lowercase()
.contains("copyright notice")
}));
}
#[test]
fn test_detect_spdx_filecopyrighttext_c_without_year() {
let content = "# SPDX-FileCopyrightText: Copyright (c) SOIM\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright (c) SOIM")
);
assert!(holders.iter().any(|h| h.holder == "SOIM"));
}
#[test]
fn test_extract_html_meta_name_copyright_content() {
let content = concat!(
r#"<meta name="copyright" content="copyright 2005-2006 Cedrik LIME"/>"#,
"\n",
r#"<meta content="copyright 2005-2006 Cedrik LIME" name="copyright"/>"#,
"\n",
r#"<meta NAME = 'copyright' CONTENT = 'copyright 2005-2006 Cedrik LIME'/>"#,
"\n",
r#"<meta content='copyright 2005-2006 Cedrik LIME' name='copyright'/>"#,
);
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "copyright 2005-2006 Cedrik LIME")
);
assert!(holders.iter().any(|h| h.holder == "Cedrik LIME"));
}
#[test]
fn test_extract_pudn_footer_canonicalizes_to_domain_only() {
let content = "© 2004-2009 <a href=\"http://www.pudn.com/\"><font color=\"red\">pudn.com</font></a> ÏæICP±¸07000446";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "(c) 2004-2009 pudn.com"),
"copyrights: {copyrights:?}"
);
assert!(
holders.iter().any(|h| h.holder == "pudn.com"),
"holders: {holders:?}"
);
assert!(!holders.iter().any(|h| h.holder.contains("upload_log.asp")));
}
#[test]
fn test_extract_pudn_upload_log_link_does_not_create_copyright() {
let content = r#" �� �� ��: <a href="http://s.pudn.com/upload_log.asp?e=234428" target="_blank">ɭ��</a>"#;
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
assert!(
!copyrights
.iter()
.any(|c| c.copyright.contains("upload_log.asp")),
"copyrights: {copyrights:?}"
);
}
#[test]
fn test_identical_pudn_html_fixtures_produce_identical_canonical_output() {
let url_path =
PathBuf::from("testdata/copyright-golden/copyrights/url_in_html-detail_9_html.html");
let incorrect_path =
PathBuf::from("testdata/copyright-golden/copyrights/html_incorrect-detail_9_html.html");
let url_bytes = fs::read(&url_path).expect("url_in_html fixture must be readable");
let incorrect_bytes =
fs::read(&incorrect_path).expect("html_incorrect fixture must be readable");
assert_eq!(
url_bytes, incorrect_bytes,
"fixtures must be byte-identical"
);
let url_content = crate::copyright::golden_utils::read_input_content(&url_path)
.expect("url_in_html fixture content must load");
let incorrect_content = crate::copyright::golden_utils::read_input_content(&incorrect_path)
.expect("html_incorrect fixture content must load");
let (c1, h1, a1) = detect_copyrights_from_text(&url_content);
let (c2, h2, a2) = detect_copyrights_from_text(&incorrect_content);
let mut c1v: Vec<String> = c1.into_iter().map(|d| d.copyright).collect();
let mut h1v: Vec<String> = h1.into_iter().map(|d| d.holder).collect();
let mut a1v: Vec<String> = a1.into_iter().map(|d| d.author).collect();
let mut c2v: Vec<String> = c2.into_iter().map(|d| d.copyright).collect();
let mut h2v: Vec<String> = h2.into_iter().map(|d| d.holder).collect();
let mut a2v: Vec<String> = a2.into_iter().map(|d| d.author).collect();
c1v.sort();
h1v.sort();
a1v.sort();
c2v.sort();
h2v.sort();
a2v.sort();
c1v.dedup();
h1v.dedup();
a1v.dedup();
c2v.dedup();
h2v.dedup();
a2v.dedup();
assert_eq!(c1v, c2v, "copyright outputs differ for identical content");
assert_eq!(h1v, h2v, "holder outputs differ for identical content");
assert_eq!(a1v, a2v, "author outputs differ for identical content");
assert_eq!(c1v, vec!["(c) 2004-2009 pudn.com".to_string()]);
assert_eq!(h1v, vec!["pudn.com".to_string()]);
assert!(a1v.is_empty());
}
#[test]
fn test_detect_postscript_percent_copyright_prefix() {
let content = "%%Copyright: -----------------------------------------------------------\n\
%%Copyright: Copyright 1990-2009 Adobe Systems Incorporated.\n\
%%Copyright: All rights reserved.\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
assert!(!groups.is_empty(), "groups unexpectedly empty");
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|s| s == "Copyright 1990-2009 Adobe Systems Incorporated"),
"groups: {groups:#?}\ncr: {cr:#?}"
);
assert!(
hs.iter().any(|s| s == "Adobe Systems Incorporated"),
"{hs:#?}"
);
}
#[test]
fn test_drop_batman_adv_contributors_copyright() {
let content = "/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: */\n\
#ifndef _NET_BATMAN_ADV_TYPES_H_\n\
#define _NET_BATMAN_ADV_TYPES_H_\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(!copyrights.iter().any(|c| {
c.copyright
.to_ascii_lowercase()
.contains("b.a.t.m.a.n. contributors")
}));
assert!(
!holders
.iter()
.any(|h| h.holder == "B.A.T.M.A.N. contributors")
);
}
#[test]
fn test_detect_ed_ed_fixture_does_not_merge_adjacent_copyright_lines() {
let content = "Program Copyright (C) 1993, 1994 Andrew Moore, Talke Studio.\n\
Copyright (C) 2006, 2007 Antonio Diaz Diaz.\n\
Modifications for Debian Copyright (C) 1997-2007 James Troup.\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|s| s == "Copyright (c) 1993, 1994 Andrew Moore, Talke Studio"),
"{cr:#?}"
);
assert!(
cr.iter()
.any(|s| s == "Copyright (c) 2006, 2007 Antonio Diaz Diaz"),
"{cr:#?}"
);
assert!(
cr.iter()
.any(|s| s == "Copyright (c) 1997-2007 James Troup"),
"{cr:#?}"
);
assert!(
hs.iter().any(|s| s == "Andrew Moore, Talke Studio"),
"{hs:#?}"
);
assert!(hs.iter().any(|s| s == "Antonio Diaz Diaz"), "{hs:#?}");
assert!(hs.iter().any(|s| s == "James Troup"), "{hs:#?}");
}
#[test]
fn test_detect_icedax_fixture_includes_libedc_by_line_with_email() {
let path = PathBuf::from("testdata/copyright-golden/copyrights/icedax-icedax.label");
let content = fs::read_to_string(&path).expect("icedax fixture must be readable");
let (copyrights, _holders, _authors) = detect_copyrights_from_text(&content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
assert!(
cr.iter()
.any(|s| { s == "(c) 1998-2002 by Heiko Eissfeldt, heiko@colossus.escape.de" }),
"copyrights: {cr:#?}"
);
}
#[test]
fn test_detect_c_year_range_by_name_comma_email_single_line() {
let content = "(c) 1998-2002 by Heiko Eissfeldt, heiko@colossus.escape.de\n";
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
assert!(
cr.iter()
.any(|s| { s == "(c) 1998-2002 by Heiko Eissfeldt, heiko@colossus.escape.de" }),
"copyrights: {cr:#?}"
);
}
#[test]
fn test_detect_gnome_session_fixture_includes_queen_of_england() {
let path =
PathBuf::from("testdata/copyright-golden/copyrights/gnome_session-gnome_session.copyright");
let content = fs::read_to_string(&path).expect("gnome session fixture must be readable");
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|s| s == "Copyright (c) 2001 Queen of England"),
"copyrights: {cr:#?}"
);
assert!(
hs.iter().any(|s| s == "Queen of England"),
"holders: {hs:#?}"
);
}
#[test]
fn test_detect_copyright_year_name_with_of_single_line() {
let content = "Copyright (c) 2001 Queen of England\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright (c) 2001 Queen of England"),
"copyrights: {:#?}",
copyrights.iter().map(|c| &c.copyright).collect::<Vec<_>>()
);
assert!(
holders.iter().any(|h| h.holder == "Queen of England"),
"holders: {:#?}",
holders.iter().map(|h| &h.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_libsox_alsa_fixture_keeps_sundry_contributors() {
let path = PathBuf::from(
"testdata/copyright-golden/copyrights/libsox_fmt_alsa-libsox_fmt_alsa.copyright",
);
let content = fs::read_to_string(&path).expect("libsox alsa fixture must be readable");
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|s| s == "Copyright 1991 Lance Norskog And Sundry Contributors"),
"copyrights: {cr:#?}"
);
assert!(
hs.iter()
.any(|s| s == "Lance Norskog And Sundry Contributors"),
"holders: {hs:#?}"
);
assert!(
!cr.iter()
.any(|s| s == "Copyright 1991 Lance Norskog And Sundry"),
"copyrights: {cr:#?}"
);
}
#[test]
fn test_detect_swfobject_copyright_line() {
let content = "/* SWFObject v2.1 <http://code.google.com/p/swfobject/>\n\
Copyright (c) 2007-2008 Geoff Stearns, Michael Williams, and Bobby van der Sluis\n\
This software is released under the MIT License <http://www.opensource.org/licenses/mit-license.php>\n\
*/\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let token_dbg: Vec<Vec<(String, PosTag)>> = groups
.iter()
.map(|g| {
crate::copyright::lexer::get_tokens(g)
.into_iter()
.map(|t| (t.value, t.tag))
.collect::<Vec<_>>()
})
.collect();
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens)
};
let has_top_level_nodes = tree.iter().any(|n| {
matches!(
n.label(),
Some(TreeLabel::Copyright) | Some(TreeLabel::Copyright2) | Some(TreeLabel::Author)
)
});
let (copyrights, _holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
assert!(
cr.iter().any(|s| {
s == "Copyright (c) 2007-2008 Geoff Stearns, Michael Williams, and Bobby van der Sluis"
}),
"groups: {groups:#?}\ntokens: {token_dbg:#?}\nparsed_has_top_level_nodes: {has_top_level_nodes}\ncopyrights: {cr:#?}"
);
}
#[test]
fn test_detect_holder_list_continuation_after_comma_and() {
let content = "Copyright 1996-2002, 2006 by David Turner, Robert Wilhelm, and Werner Lemberg\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let token_dbg: Vec<(String, PosTag)> =
tokens.iter().map(|t| (t.value.clone(), t.tag)).collect();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens)
};
let labels_dbg: Vec<Option<TreeLabel>> = tree.iter().map(|n| n.label()).collect();
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter().any(|s| {
s == "Copyright 1996-2002, 2006 by David Turner, Robert Wilhelm, and Werner Lemberg"
}),
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\ncopyrights: {cr:#?}"
);
assert!(
hs.iter()
.any(|s| s == "David Turner, Robert Wilhelm, and Werner Lemberg"),
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\nholders: {hs:#?}"
);
}
#[test]
fn test_detect_long_comma_separated_year_list_with_holder() {
let content = "Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let token_dbg: Vec<(String, PosTag)> =
tokens.iter().map(|t| (t.value.clone(), t.tag)).collect();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens)
};
let labels_dbg: Vec<Option<TreeLabel>> = tree.iter().map(|n| n.label()).collect();
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter().any(|s| {
s == "Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc."
}),
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\ncopyrights: {cr:#?}"
);
assert!(
hs.iter().any(|s| s == "Free Software Foundation, Inc."),
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\nholders: {hs:#?}"
);
}
#[test]
fn test_detect_all_caps_holder_not_truncated_tech_sys() {
let content = "(C) Copyright 1985-1999 ADVANCED TECHNOLOGY SYSTEMS\n";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter()
.any(|s| s.contains("1985-1999") && s.contains("ADVANCED TECHNOLOGY SYSTEMS")),
"copyrights: {cr:#?}"
);
assert!(
hs.iter().any(|s| s == "ADVANCED TECHNOLOGY SYSTEMS"),
"holders: {hs:#?}"
);
}
#[test]
fn test_detect_all_caps_holder_not_truncated_moto_broad() {
let content = "/****************************************************************************\n\
* COPYRIGHT (C) 2005 MOTOROLA, BROADBAND COMMUNICATIONS SECTOR\n\
*\n\
* ALL RIGHTS RESERVED.\n\
*\n\
* NO PART OF THIS CODE MAY BE COPIED OR MODIFIED WITHOUT\n\
* THE WRITTEN CONSENT OF MOTOROLA, BROADBAND COMMUNICATIONS SECTOR\n\
****************************************************************************/\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let token_dbg: Vec<(String, PosTag)> =
tokens.iter().map(|t| (t.value.clone(), t.tag)).collect();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens)
};
let labels_dbg: Vec<Option<TreeLabel>> = tree.iter().map(|n| n.label()).collect();
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter().any(|s| {
s.contains("COPYRIGHT")
&& s.contains("2005")
&& s.contains("MOTOROLA")
&& s.contains("BROADBAND COMMUNICATIONS SECTOR")
}),
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\ncopyrights: {cr:#?}"
);
assert!(
hs.iter()
.any(|s| s == "MOTOROLA, BROADBAND COMMUNICATIONS SECTOR"),
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\nholders: {hs:#?}"
);
}
#[test]
fn test_detect_composite_copy_copyrighted_by_with_trailing_copyright_clause() {
let content =
"FaCE is copyrighted by Object Computing, Inc., St. Louis Missouri, Copyright (C) 2002,\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens.clone())
};
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
assert!(
cr.iter().any(|s| {
s.contains("copyrighted by Object Computing")
&& s.contains("St. Louis Missouri")
&& s.to_ascii_lowercase().contains("copyright")
&& s.contains("2002")
}),
"groups: {groups:#?}\n\ntokens: {tokens:#?}\n\ntree: {tree:#?}\n\ncopyrights: {cr:#?}"
);
assert!(
hs.iter()
.any(|s| s.contains("Object Computing") && s.contains("St. Louis Missouri")),
"holders: {hs:#?}"
);
}
#[test]
fn test_detect_regents_multi_line_merges_year_only_prefix() {
let content = "Copyright (c) 1988, 1993\nCopyright (c) 1992, 1993\nThe Regents of the University of California. All rights reserved.\n";
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens.clone())
};
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
let cr: Vec<String> = copyrights.into_iter().map(|c| c.copyright).collect();
let hs: Vec<String> = holders.into_iter().map(|h| h.holder).collect();
let merged = "Copyright (c) 1988, 1993 Copyright (c) 1992, 1993 The Regents of the University of California";
assert!(
cr.iter().any(|s| s == merged),
"groups: {groups:#?}\n\ntokens: {tokens:#?}\n\ntree: {tree:#?}\n\ncopyrights: {cr:#?}\n\nholders: {hs:#?}"
);
assert!(
!cr.iter().any(|s| s == "Copyright (c) 1988, 1993"),
"copyrights: {cr:#?}"
);
assert!(
hs.iter()
.any(|s| s == "The Regents of the University of California"),
"holders: {hs:#?}"
);
}
#[test]
fn test_index_html_tokens_tag_copyright_word_as_copy() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let path = root.join("testdata/copyright-golden/copyrights/index.html");
let content = fs::read_to_string(&path).expect("read index.html fixture");
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = collect_candidate_lines(numbered_lines);
assert!(!groups.is_empty(), "Expected at least one candidate group");
let tokens = get_tokens(&groups[0]);
assert!(
tokens
.iter()
.any(|t| t.value.eq_ignore_ascii_case("copyright") && t.tag == PosTag::Copy),
"Expected 'Copyright' token tagged as Copy. First group tokens: {:?}",
tokens.iter().take(30).collect::<Vec<_>>()
);
let has_adjacent = tokens.windows(2).any(|w| {
w[0].tag == PosTag::Copy
&& w[0].value.eq_ignore_ascii_case("copyright")
&& w[1].tag == PosTag::Copy
&& w[1].value.eq_ignore_ascii_case("(c)")
});
assert!(
has_adjacent,
"Expected adjacent Copy('Copyright') + Copy('(c)') tokens in first group"
);
}
#[test]
fn test_index_html_first_group_span_extraction_keeps_copyright_word() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let path = root.join("testdata/copyright-golden/copyrights/index.html");
let content = fs::read_to_string(&path).expect("read index.html fixture");
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = collect_candidate_lines(numbered_lines);
let tokens = get_tokens(&groups[0]);
let tree = parse(tokens);
let mut c = Vec::new();
let mut h = Vec::new();
let mut a = Vec::new();
extract_from_spans(&tree, &mut c, &mut h, &mut a, false);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2002-2009 Charlie Poole"),
"Span extraction did not produce expected Copyright (c) line. Got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_index_html_first_group_tree_node_extraction_matches_span_extraction() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let path = root.join("testdata/copyright-golden/copyrights/index.html");
let content = fs::read_to_string(&path).expect("read index.html fixture");
let numbered_lines: Vec<(usize, String)> = content
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = collect_candidate_lines(numbered_lines);
let tokens = get_tokens(&groups[0]);
let tree = parse(tokens);
let mut c = Vec::new();
let mut h = Vec::new();
let mut a = Vec::new();
extract_from_tree_nodes(&tree, &mut c, &mut h, &mut a, false);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2002-2009 Charlie Poole"),
"Tree-node extraction did not produce expected Copyright (c) line. Got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_index_html_end_to_end_has_copyright_word() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let path = root.join("testdata/copyright-golden/copyrights/index.html");
let content = fs::read_to_string(&path).expect("read index.html fixture");
let (c, _h, _a) = detect_copyrights_from_text(&content);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2002-2009 Charlie Poole"),
"End-to-end detection missing expected Copyright (c) line. Got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
!c.iter()
.any(|cr| cr.copyright == "(c) 2002-2009 Charlie Poole"),
"Expected bare (c) variant to be dropped. Got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_index_html_does_not_emit_shadowed_digia_plc_holder() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let path = root.join("testdata/copyright-golden/copyrights/index.html");
let content = fs::read_to_string(&path).expect("read index.html fixture");
let (_c, h, _a) = detect_copyrights_from_text(&content);
assert!(
h.iter().any(|hd| {
hd.holder == "Digia Plc and/or its subsidiary(-ies) and other contributors"
}),
"Expected full Digia holder, got: {:?}",
h.iter().map(|hd| &hd.holder).collect::<Vec<_>>()
);
assert!(
!h.iter().any(|hd| hd.holder == "Digia Plc"),
"Expected shadowed short holder to be dropped, got: {:?}",
h.iter().map(|hd| &hd.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_mpl_portions_created_prefix_preserved() {
let input = "Portions created by the Initial Developer are Copyright (C) 2002\n the Initial Developer.";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| {
cr.copyright
== "Portions created by the Initial Developer are Copyright (c) 2002 the Initial Developer"
}),
"Expected MPL portions-created prefix preserved, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hd| hd.holder == "the Initial Developer"),
"Expected holder 'the Initial Developer', got: {:?}",
h.iter().map(|hd| &hd.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_mpl_prefix_line_without_trailing_holder_keeps_plain_copyright() {
let input = "// Portions created by the Initial Developer are Copyright (C) 2007";
let numbered_lines: Vec<(usize, String)> = input
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = collect_candidate_lines(numbered_lines);
assert_eq!(groups.len(), 1, "Unexpected groups: {groups:?}");
let tokens = get_tokens(&groups[0]);
assert!(!tokens.is_empty(), "No tokens produced");
assert!(
tokens.iter().any(|t| t.tag == PosTag::Copy),
"Expected at least one Copy token, got: {tokens:?}"
);
assert!(
tokens
.iter()
.any(|t| matches!(t.tag, PosTag::Yr | PosTag::BareYr | PosTag::YrPlus)),
"Expected at least one year token, got: {tokens:?}"
);
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "Copyright (c) 2007"),
"Expected plain Copyright (c) year, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_bare_c_year_only_detected() {
let input = "(c) 2008";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "(c) 2008"),
"Expected bare (c) year, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_drop_symbol_year_only_copyright() {
let input = "Copyright © 2021\nCopyright (c) 2017\n";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
!c.iter().any(|cr| cr.copyright == "Copyright (c) 2021"),
"Expected © year-only to be dropped, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
c.iter().any(|cr| cr.copyright == "Copyright (c) 2017"),
"Expected non-© year-only to be kept, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_author_colon_multiline_keeps_emails() {
let input = "/*\n * Authors: Jorge Cwik, <jorge@laser.satlink.net>\n *\t\tArnt Gulbrandsen, <agulbra@nvg.unit.no>\n */\n";
let raw_lines: Vec<&str> = input.lines().collect();
let mut prepared_cache = crate::copyright::line_tracking::PreparedLineCache::new(&raw_lines);
let mut extracted: Vec<AuthorDetection> = Vec::new();
super::author_heuristics::extract_author_colon_blocks(&mut prepared_cache, &mut extracted);
assert!(
extracted.iter().any(|ad| ad.author
== "Jorge Cwik, <jorge@laser.satlink.net> Arnt Gulbrandsen, <agulbra@nvg.unit.no>"),
"Expected direct author-colon extraction to keep emails, got: {:?}",
extracted.iter().map(|ad| &ad.author).collect::<Vec<_>>()
);
let (_c, _h, a) = detect_copyrights_from_text(input);
assert!(
a.iter().any(|ad| ad.author
== "Jorge Cwik, <jorge@laser.satlink.net> Arnt Gulbrandsen, <agulbra@nvg.unit.no>"),
"Expected merged multiline author block, got: {:?}",
a.iter().map(|ad| &ad.author).collect::<Vec<_>>()
);
}
#[test]
fn test_copyright_year_range_only_detected() {
let input = "Copyright (c) 1995-1999.";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "Copyright (c) 1995-1999"),
"Expected Copyright (c) year range, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_copyright_year_range_only_without_c_detected() {
let input = "Copyright 2013-2015,";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "Copyright 2013-2015"),
"Expected Copyright year range, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_parts_copyright_prefix_preserved() {
let input = "Parts Copyright (C) 1992 Uri Blumenthal, IBM";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright == "Parts Copyright (c) 1992 Uri Blumenthal, IBM"),
"Expected Parts prefix preserved, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_copyright_prefix_preserved_after_name() {
let input = "Adobe(R) Flash(R) Player. Copyright (C) 1996 - 2008. Adobe Systems Incorporated. All Rights Reserved.";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("Copyright")),
"Should preserve 'Copyright' prefix when preceded by a name, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_copyright_with_email() {
let (c, h, _a) = detect_copyrights_from_text(
"Copyright (c) 2009 Masayuki Hatta (mhatta) <mhatta@debian.org>",
);
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(
c[0].copyright,
"Copyright (c) 2009 Masayuki Hatta (mhatta) <mhatta@debian.org>"
);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "Masayuki Hatta");
}
#[test]
fn test_detect_copyright_with_short_holder_and_trailing_punct_email() {
let input = "Copyright (c) 2024 bgme <i@bgme.me>.";
let numbered_lines: Vec<(usize, String)> = input
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = collect_candidate_lines(numbered_lines);
assert!(
!groups.is_empty(),
"Expected candidate group, got: {groups:?}"
);
let (c, h, _a) = detect_copyrights_from_text(input);
assert_eq!(
c.len(),
1,
"Should detect one copyright, got: {:?}; groups: {:?}",
c,
groups
);
assert_eq!(
c[0].copyright, "Copyright (c) 2024 bgme <i@bgme.me>",
"Copyright text: {:?}",
c[0].copyright
);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "bgme");
}
#[test]
fn test_detect_copyright_compact_c_parens_with_lowercase_holder_and_email() {
let input = "Copyright(c) 2014 dead_horse <dead_horse@qq.com>";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2014 dead_horse <dead_horse@qq.com>"),
"Expected copyright detected, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hd| hd.holder == "dead_horse"),
"Expected holder detected, got: {:?}",
h.iter().map(|hd| &hd.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_lowercase_username_email_in_parens_fragment() {
let input = "Adapted from bzip2.js, copyright 2011 antimatter15 (antimatter15@gmail.com).";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright == "copyright 2011 antimatter15 (antimatter15@gmail.com)"),
"Expected extracted copyright fragment, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hd| hd.holder == "antimatter15"),
"Expected extracted holder, got: {:?}",
h.iter().map(|hd| &hd.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_extract_copy_entity_year_range_only() {
let input = "expectedHtml = \"<p>Copyright © 2003-2014</p>\",";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "Copyright (c) 2003-2014"),
"Expected Copyright (c) year range extracted, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_extract_hex_a9_entity_year_range_only_as_bare_c() {
let input = "expectedXml = \"<p>Copyright © 2003-2014</p>\",";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "(c) 2003-2014"),
"Expected (c) year range extracted, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_extract_are_copyright_c_year_range_clause() {
let input = "Portions created by Ricoh Silicon Valley, Inc. are Copyright (C) 1995-1999. All Rights Reserved.";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "Copyright (c) 1995-1999"),
"Expected year-range clause extracted, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_empty_input() {
let (c, h, a) = detect_copyrights_from_text("");
assert!(c.is_empty());
assert!(h.is_empty());
assert!(a.is_empty());
}
#[test]
fn test_detect_no_copyright() {
let (c, h, a) = detect_copyrights_from_text("This is just some random code.");
assert!(c.is_empty());
assert!(h.is_empty());
assert!(a.is_empty());
}
#[test]
fn test_detect_simple_copyright() {
let (c, h, _a) = detect_copyrights_from_text("Copyright 2024 Acme Inc.");
assert!(!c.is_empty(), "Should detect copyright");
assert!(
c[0].copyright.contains("Copyright"),
"Copyright text: {}",
c[0].copyright
);
assert!(
c[0].copyright.contains("2024"),
"Should contain year: {}",
c[0].copyright
);
assert_eq!(c[0].start_line, 1);
assert!(!h.is_empty(), "Should detect holder");
}
#[test]
fn test_detect_spdx_filecopyrighttext_contributors_to_project() {
let input = "SPDX-FileCopyrightText: © 2020 Contributors to the project Clay <https://github.com/liferay/clay/graphs/contributors>";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "Copyright (c) 2020 Contributors to the project Clay https://github.com/liferay/clay/graphs/contributors"),
"Missing SPDX-FileCopyrightText copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter()
.any(|ho| ho.holder == "Contributors to the project Clay"),
"Missing SPDX-FileCopyrightText holder, got: {:?}",
h.iter().map(|ho| &ho.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_contributors_as_noted_in_authors_file() {
let input = "Copyright (c) 2020 Contributors as noted in the AUTHORS file";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == input),
"Missing copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter()
.any(|ho| ho.holder == "Contributors as noted in the AUTHORS file"),
"Missing holder, got: {:?}",
h.iter().map(|ho| &ho.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_contributors_et_al() {
let input = "Copyright (c) 2017 Contributors et.al.";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2017 Contributors et.al"),
"Missing copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|ho| ho.holder == "Contributors et.al"),
"Missing holder, got: {:?}",
h.iter().map(|ho| &ho.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_joyent_document_authors_keeps_company_prefix() {
let input = "Copyright (c) 2011 Joyent, Inc. and the persons identified as document authors.";
let (c, h, a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2011 Joyent, Inc."),
"Missing Joyent copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|ho| ho.holder == "Joyent, Inc."),
"Missing Joyent holder, got: {:?}",
h.iter().map(|ho| &ho.holder).collect::<Vec<_>>()
);
assert!(a.is_empty(), "Unexpected authors detected: {:?}", a);
}
#[test]
fn test_detect_not_copyrighted_statement() {
let input = "Not copyrighted 1992 by Mark Adler";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == input),
"Missing copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|ho| ho.holder == "Not by Mark Adler"),
"Missing holder, got: {:?}",
h.iter().map(|ho| &ho.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_fixture_adler_inflate_not_copyrighted() {
let content =
std::fs::read_to_string("testdata/copyright-golden/copyrights/adler_inflate_c-inflate_c.c")
.unwrap();
let (c, h, _a) = detect_copyrights_from_text(&content);
assert!(
c.iter()
.any(|cr| cr.copyright == "Not copyrighted 1992 by Mark Adler"),
"Missing expected copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|ho| ho.holder == "Not by Mark Adler"),
"Missing expected holder, got: {:?}",
h.iter().map(|ho| &ho.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_fixture_linux_inflate_not_copyrighted_normalized() {
let content = std::fs::read_to_string(
"testdata/copyright-golden/copyrights/misco4/linux-copyrights/lib/inflate.c",
)
.unwrap();
let (c, h, _a) = detect_copyrights_from_text(&content);
let cr_texts: Vec<&str> = c.iter().map(|cr| cr.copyright.as_str()).collect();
assert!(
cr_texts.contains(&"copyrighted 1990 Mark Adler"),
"Missing 1990 expected copyright, got: {:?}",
cr_texts
);
assert!(
cr_texts.contains(&"copyrighted 1992 by Mark Adler"),
"Missing 1992 expected copyright, got: {:?}",
cr_texts
);
assert!(
h.iter().any(|ho| ho.holder == "Mark Adler"),
"Missing expected holder, got: {:?}",
h.iter().map(|ho| &ho.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_detect_copyright_c_symbol() {
let (c, h, _a) = detect_copyrights_from_text("Copyright (c) 2020-2024 Foo Bar");
assert!(!c.is_empty(), "Should detect copyright with (c)");
assert_eq!(c[0].copyright, "Copyright (c) 2020-2024 Foo Bar");
assert!(!h.is_empty(), "Should detect holder");
}
#[test]
fn test_detect_copyright_c_symbol_with_all_rights_reserved() {
let (c, _, _) = detect_copyrights_from_text(
"Copyright (c) 1999-2002 Zend Technologies Ltd. All rights reserved.",
);
assert_eq!(
c[0].copyright,
"Copyright (c) 1999-2002 Zend Technologies Ltd."
);
}
#[test]
fn test_detect_copyright_unicode_symbol() {
let (c, _, _) = detect_copyrights_from_text(
"/* Copyright \u{00A9} 2000 ACME, Inc., All Rights Reserved */",
);
assert!(!c.is_empty(), "Should detect copyright with \u{00A9}");
assert!(
c[0].copyright.starts_with("Copyright"),
"Should start with Copyright, got: {}",
c[0].copyright
);
}
#[test]
fn test_detect_copyright_c_no_all_rights() {
let (c, _, _) = detect_copyrights_from_text("Copyright (c) 2009 Google");
assert!(!c.is_empty());
assert_eq!(c[0].copyright, "Copyright (c) 2009 Google");
}
#[test]
fn test_detect_copyright_c_multiline() {
let input = "Copyright (c) 2001 by the TTF2PT1 project\nCopyright (c) 2001 by Sergey Babkin";
let (c, _, _) = detect_copyrights_from_text(input);
assert_eq!(c.len(), 2, "Should detect two copyrights, got: {:?}", c);
assert_eq!(c[0].copyright, "Copyright (c) 2001 by the TTF2PT1 project");
assert_eq!(c[1].copyright, "Copyright (c) 2001 by Sergey Babkin");
}
#[test]
fn test_detect_multiline_copyright() {
let text = "Copyright 2024\n Acme Corporation\n All rights reserved.";
let (c, _h, _a) = detect_copyrights_from_text(text);
assert!(!c.is_empty(), "Should detect multiline copyright");
}
#[test]
fn test_detect_author() {
let (c, h, a) = detect_copyrights_from_text("Written by John Doe");
assert!(c.is_empty(), "Should not detect copyright");
assert!(h.is_empty(), "Should not detect holder");
assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a);
assert_eq!(a[0].author, "John Doe");
assert_eq!(a[0].start_line, 1);
assert_eq!(a[0].end_line, 1);
}
#[test]
fn test_detect_junk_filtered() {
let (c, _h, _a) = detect_copyrights_from_text("Copyright (c)");
assert!(
c.is_empty(),
"Bare 'Copyright (c)' should be filtered as junk"
);
}
#[test]
fn test_detect_multiple_copyrights() {
let text = "Copyright 2020 Foo Inc.\n\n\n\nCopyright 2024 Bar Corp.";
let (c, h, _a) = detect_copyrights_from_text(text);
assert!(
c.len() >= 2,
"Should detect two copyrights, got {}: {:?}",
c.len(),
c
);
assert!(
h.len() >= 2,
"Should detect two holders, got {}: {:?}",
h.len(),
h
);
}
#[test]
fn test_detect_spdx_copyright() {
let (c, _h, _a) = detect_copyrights_from_text("SPDX-FileCopyrightText: 2024 Example Corp");
assert!(!c.is_empty(), "Should detect SPDX copyright");
assert!(
c[0].copyright.contains("Copyright"),
"Should normalize to Copyright: {}",
c[0].copyright
);
}
#[test]
fn test_detect_line_numbers() {
let text = "Some header\nCopyright 2024 Acme Inc.\nSome footer";
let (c, _h, _a) = detect_copyrights_from_text(text);
assert!(!c.is_empty(), "Should detect copyright");
assert_eq!(c[0].start_line, 2, "Copyright should be on line 2");
}
#[test]
fn test_detect_copyright_year_range() {
let (c, h, _a) = detect_copyrights_from_text("Copyright 2020-2024 Foo Corp.");
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(c[0].copyright, "Copyright 2020-2024 Foo Corp.");
assert_eq!(c[0].start_line, 1);
assert_eq!(c[0].end_line, 1);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "Foo Corp.");
assert_eq!(h[0].start_line, 1);
}
#[test]
fn test_fixture_sample_py_motorola_holder_has_dash_variant_only() {
let content =
fs::read_to_string("testdata/copyright-golden/copyrights/sample_py-py.py").unwrap();
let (_c, h, _a) = detect_copyrights_from_text(&content);
let hs: Vec<&str> = h.iter().map(|d| d.holder.as_str()).collect();
assert!(
hs.contains(&"Motorola, Inc. - Motorola Confidential Proprietary"),
"holders: {hs:?}"
);
assert!(
!hs.contains(&"Motorola, Inc. Motorola Confidential Proprietary"),
"holders: {hs:?}"
);
}
#[test]
fn test_mso_document_properties_non_confidential_uses_template_lastauthor_variant() {
let content = "<o:Description>Copyright 2009</o:Description>\n<o:Template>techdoc.dot</o:Template>\n<o:LastAuthor>Jennifer Hruska</o:LastAuthor>";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright 2009 techdoc.dot o:LastAuthor Jennifer Hruska"),
"copyrights: {:?}",
copyrights
);
assert!(
holders
.iter()
.any(|h| h.holder == "techdoc.dot o:LastAuthor Jennifer Hruska"),
"holders: {:?}",
holders
);
assert!(
!copyrights
.iter()
.any(|c| c.copyright == "Jennifer Hruska Copyright 2009")
);
assert!(!holders.iter().any(|h| h.holder == "Jennifer Hruska"));
}
#[test]
fn test_mso_document_properties_confidential_does_not_emit_template_lastauthor_variant() {
let content = "<o:Description>Copyright 2009 Confidential Information</o:Description>\n<o:Template>techdoc.dot</o:Template>\n<o:LastAuthor>Jennifer Hruska</o:LastAuthor>";
let (copyrights, holders, _authors) = detect_copyrights_from_text(content);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright 2009 Confidential"),
"copyrights: {:?}",
copyrights
);
assert!(
holders.iter().any(|h| h.holder == "Confidential"),
"holders: {:?}",
holders
);
assert!(
!copyrights.iter().any(|c| c
.copyright
.contains("techdoc.dot o:LastAuthor Jennifer Hruska")),
"copyrights: {:?}",
copyrights
);
assert!(
!holders.iter().any(|h| h
.holder
.contains("techdoc.dot o:LastAuthor Jennifer Hruska")),
"holders: {:?}",
holders
);
}
#[test]
fn test_detect_copyright_holder_suffix_authors() {
let (c, h, a) = detect_copyrights_from_text("Copyright 2015 The Error Prone Authors.");
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright 2015 The Error Prone Authors"),
"Should keep 'Authors' as part of holder in copyright: {:?}",
c
);
assert!(
h.iter().any(|hd| hd.holder == "The Error Prone Authors"),
"Should keep 'Authors' as part of holder: {:?}",
h
);
assert!(
a.is_empty(),
"Should not treat trailing 'Authors' token as an author: {:?}",
a
);
}
#[test]
fn test_detect_filters_code_like_c_marker_lines() {
let text = "(c) (const unsigned char*)ptr\n(c) c ? foo : bar\n(c) c & 0x3f\n(c) flags |= 0x80";
let (copyrights, holders, authors) = detect_copyrights_from_text(text);
assert!(copyrights.is_empty(), "copyrights: {copyrights:?}");
assert!(holders.is_empty(), "holders: {holders:?}");
assert!(authors.is_empty(), "authors: {authors:?}");
}
#[test]
fn test_complex_html_preserves_parenthesized_obfuscated_email_continuation() {
let content =
fs::read_to_string("testdata/copyright-golden/copyrights/misco4/linux9/complex-html.txt")
.unwrap();
let (copyrights, _holders, _authors) = detect_copyrights_from_text(&content);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright (c) 2001 Karl Garrison (karl AT indy.rr.com)"),
"copyrights: {:?}",
copyrights
);
}
#[test]
fn test_detect_copyright_holder_suffix_university() {
let (c, h, a) = detect_copyrights_from_text("Copyright (c) 2001, Rice University");
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2001, Rice University"),
"Should keep trailing University token in copyright: {:?}",
c
);
assert!(
h.iter().any(|hd| hd.holder == "Rice University"),
"Should keep trailing University token in holder: {:?}",
h
);
assert!(a.is_empty(), "Unexpected authors detected: {:?}", a);
}
#[test]
fn test_detect_copyright_holder_suffix_as_represented() {
let text = "Copyright: (c) 2000 United States Government as represented by the\nSecretary of the Navy. All rights reserved.";
let (c, h, _a) = detect_copyrights_from_text(text);
assert!(
c.iter().any(|cr| {
cr.copyright
== "Copyright (c) 2000 United States Government as represented by the Secretary of the Navy"
}),
"Should keep 'as represented by' continuation in copyright: {:?}",
c
);
assert!(
h.iter().any(|hd| {
hd.holder == "United States Government as represented by the Secretary of the Navy"
}),
"Should keep 'as represented by' continuation in holder: {:?}",
h
);
}
#[test]
fn test_detect_copyright_does_not_absorb_unexpected_as_represented() {
let text = "Copyright 1993 United States Government as represented by the\nDirector, National Security Agency.";
let (c, h, _a) = detect_copyrights_from_text(text);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright 1993 United States Government"),
"Should keep only government without continuation: {:?}",
c
);
assert!(
h.iter().any(|hd| hd.holder == "United States Government"),
"Should keep only government holder without continuation: {:?}",
h
);
}
#[test]
fn test_detect_copyright_holder_suffix_committers() {
let (c, h, a) =
detect_copyrights_from_text("Copyright (c) 2006, 2007, 2008 XStream committers");
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2006, 2007, 2008 XStream committers"),
"Should keep 'committers' as part of holder in copyright: {:?}",
c
);
assert!(
h.iter().any(|hd| hd.holder == "XStream committers"),
"Should keep 'committers' as part of holder: {:?}",
h
);
assert!(a.is_empty(), "Unexpected authors detected: {:?}", a);
}
#[test]
fn test_detect_copyright_holder_suffix_contributors_only() {
let (c, h, a) = detect_copyrights_from_text("Copyright (c) 2015, Contributors");
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2015, Contributors"),
"Should keep Contributors in copyright: {:?}",
c
);
assert!(
h.iter().any(|hd| hd.holder == "Contributors"),
"Should detect Contributors as holder: {:?}",
h
);
assert!(a.is_empty(), "Unexpected authors detected: {:?}", a);
}
#[test]
fn test_detect_copyright_holder_suffix_authors_and_contributors() {
let text = "Copyright 2018-2019 @paritytech/substrate-light-ui authors & contributors";
let prepared = super::super::prepare::prepare_text_line(text);
let tokens = get_tokens(&[(1, prepared)]);
let tree = parse(tokens);
let (copyright_idx, copyright_node) = tree
.iter()
.enumerate()
.find(|(_i, n)| {
matches!(
n.label(),
Some(TreeLabel::Copyright) | Some(TreeLabel::Copyright2)
)
})
.expect("Should parse a COPYRIGHT node");
let start = copyright_idx + 1;
assert!(
should_start_absorbing(copyright_node, &tree, start),
"Should start absorbing trailing suffix nodes; tree={:?}",
tree
);
let (trailing, _skip) = collect_trailing_orphan_tokens(copyright_node, &tree, start);
assert!(
trailing
.iter()
.any(|t| t.value.eq_ignore_ascii_case("authors")),
"Trailing tokens should include 'authors', got: {:?}",
trailing
);
assert!(
trailing
.iter()
.any(|t| t.value.eq_ignore_ascii_case("contributors")),
"Trailing tokens should include 'contributors', got: {:?}",
trailing
);
let (c, h, a) = detect_copyrights_from_text(text);
assert!(
c.iter().any(|cr| cr.copyright == text),
"Should keep authors/contributors suffix in copyright: {:?}",
c
);
assert!(
h.iter()
.any(|hd| hd.holder == "paritytech/substrate-light-ui authors & contributors"),
"Should keep authors/contributors suffix in holder: {:?}",
h
);
assert!(a.is_empty(), "Unexpected authors detected: {:?}", a);
}
#[test]
fn test_detect_copyright_holder_suffix_the_respective_contributors() {
let text = "Copyright (c) 2014, 2015, the respective contributors";
let prepared = super::super::prepare::prepare_text_line(text);
let tokens = get_tokens(&[(1, prepared)]);
let tree = parse(tokens);
let (copyright_idx, copyright_node) = tree
.iter()
.enumerate()
.find(|(_i, n)| {
matches!(
n.label(),
Some(TreeLabel::Copyright) | Some(TreeLabel::Copyright2)
)
})
.expect("Should parse a COPYRIGHT node");
let start = copyright_idx + 1;
assert!(
should_start_absorbing(copyright_node, &tree, start),
"Should start absorbing respective-contributors suffix; tree={:?}",
tree
);
let (trailing, _skip) = collect_trailing_orphan_tokens(copyright_node, &tree, start);
assert!(
trailing
.iter()
.any(|t| t.value.eq_ignore_ascii_case("contributors")),
"Trailing tokens should include 'contributors', got: {:?}",
trailing
);
let (c, h, a) = detect_copyrights_from_text(text);
assert!(
c.iter().any(|cr| cr.copyright == text),
"Should keep the full respective-contributors suffix in copyright: {:?}",
c
);
assert!(
h.iter()
.any(|hd| hd.holder == "the respective contributors"),
"Should detect 'the respective contributors' as holder: {:?}",
h
);
assert!(a.is_empty(), "Unexpected authors detected: {:?}", a);
}
#[test]
fn test_detect_copyright_unicode_holder() {
let (c, h, _a) = detect_copyrights_from_text("Copyright 2024 François Müller");
assert!(!c.is_empty(), "Should detect copyright, got: {:?}", c);
assert!(
c[0].copyright.contains("François Müller"),
"Copyright should preserve Unicode names: {}",
c[0].copyright
);
assert!(!h.is_empty(), "Should detect Unicode holder: {:?}", h);
assert!(
h[0].holder.contains("Müller") || h[0].holder.contains("François"),
"Holder should preserve original Unicode name: {}",
h[0].holder
);
}
#[test]
fn test_detect_all_rights_reserved_by_unicode_holder() {
let text = "Copyright (C) All rights Reserved by 株式会社 朝日住宅社";
let (c, h, _a) = detect_copyrights_from_text(text);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) by 株式会社 朝日住宅社"),
"Should detect reserved-by copyright with Unicode holder: {:?}",
c
);
assert!(
h.iter().any(|hd| hd.holder == "株式会社 朝日住宅社"),
"Should detect Unicode holder from reserved-by line: {:?}",
h
);
}
#[test]
fn test_detect_copyright_and_author_same_text() {
let text = "Copyright 2024 Acme Inc.\n\n\n\nWritten by Jane Smith";
let (c, h, a) = detect_copyrights_from_text(text);
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(c[0].copyright, "Copyright 2024 Acme Inc.");
assert_eq!(c[0].start_line, 1);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "Acme Inc.");
assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a);
assert_eq!(a[0].author, "Jane Smith");
assert_eq!(a[0].start_line, 5);
}
#[test]
fn test_detect_author_written_by() {
let (_c, _h, a) = detect_copyrights_from_text("Written by Jane Smith");
assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a);
assert_eq!(a[0].author, "Jane Smith");
assert_eq!(a[0].start_line, 1);
assert_eq!(a[0].end_line, 1);
}
#[test]
fn test_detect_author_maintained_by() {
let (_c, _h, a) = detect_copyrights_from_text("Maintained by Bob Jones");
assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a);
assert_eq!(a[0].author, "Bob Jones");
assert_eq!(a[0].start_line, 1);
assert_eq!(a[0].end_line, 1);
}
#[test]
fn test_detect_author_authors_keyword() {
let (_c, _h, a) = detect_copyrights_from_text("Authors John Smith");
assert_eq!(
a.len(),
1,
"Should detect author from 'Authors', got: {:?}",
a
);
assert!(
a[0].author.contains("John Smith"),
"Author: {}",
a[0].author
);
}
#[test]
fn test_detect_author_contributors_keyword() {
let (_c, _h, a) = detect_copyrights_from_text("Contributors Jane Doe");
assert_eq!(
a.len(),
1,
"Should detect author from 'Contributors', got: {:?}",
a
);
assert!(a[0].author.contains("Jane Doe"), "Author: {}", a[0].author);
}
#[test]
fn test_detect_author_spdx_contributor() {
let (_c, _h, a) = detect_copyrights_from_text("SPDX-FileContributor: Alice Johnson");
assert_eq!(
a.len(),
1,
"Should detect author from SPDX-FileContributor, got: {:?}",
a
);
assert!(
a[0].author.contains("Alice Johnson"),
"Author: {}",
a[0].author
);
}
#[test]
fn test_detect_copyright_with_company() {
let (c, h, _a) = detect_copyrights_from_text("Copyright (c) 2024 Google LLC");
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(c[0].copyright, "Copyright (c) 2024 Google LLC");
assert_eq!(c[0].start_line, 1);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "Google LLC");
assert_eq!(h[0].start_line, 1);
}
#[test]
fn test_detect_copyright_all_rights_reserved() {
let (c, h, _a) = detect_copyrights_from_text("Copyright 2024 Apple Inc. All rights reserved.");
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(
c[0].copyright, "Copyright 2024 Apple Inc.",
"All rights reserved should be stripped from copyright text"
);
assert_eq!(c[0].start_line, 1);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "Apple Inc.");
assert_eq!(h[0].start_line, 1);
}
#[test]
fn test_strip_all_rights_reserved_basic() {
let tokens = [
Token {
value: "Copyright".to_string(),
tag: PosTag::Copy,
start_line: 1,
},
Token {
value: "2024".to_string(),
tag: PosTag::Yr,
start_line: 1,
},
Token {
value: "Acme".to_string(),
tag: PosTag::Nnp,
start_line: 1,
},
Token {
value: "All".to_string(),
tag: PosTag::Nn,
start_line: 1,
},
Token {
value: "Rights".to_string(),
tag: PosTag::Right,
start_line: 1,
},
Token {
value: "Reserved".to_string(),
tag: PosTag::Reserved,
start_line: 1,
},
];
let refs: Vec<&Token> = tokens.iter().collect();
let result = strip_all_rights_reserved(refs);
assert_eq!(result.len(), 3, "Should strip All Rights Reserved");
assert_eq!(result[0].value, "Copyright");
assert_eq!(result[1].value, "2024");
assert_eq!(result[2].value, "Acme");
}
#[test]
fn test_collect_filtered_leaves_filters_pos_tags() {
let node = ParseNode::Tree {
label: TreeLabel::Copyright,
children: vec![
ParseNode::Leaf(Token {
value: "Copyright".to_string(),
tag: PosTag::Copy,
start_line: 1,
}),
ParseNode::Leaf(Token {
value: "2024".to_string(),
tag: PosTag::Yr,
start_line: 1,
}),
ParseNode::Leaf(Token {
value: "Acme".to_string(),
tag: PosTag::Nnp,
start_line: 1,
}),
],
};
let leaves = collect_filtered_leaves(&node, &[], &[PosTag::Copy, PosTag::Yr]);
assert_eq!(leaves.len(), 1);
assert_eq!(leaves[0].value, "Acme");
}
#[test]
fn test_collect_filtered_leaves_filters_tree_labels() {
let node = ParseNode::Tree {
label: TreeLabel::Copyright,
children: vec![
ParseNode::Leaf(Token {
value: "Copyright".to_string(),
tag: PosTag::Copy,
start_line: 1,
}),
ParseNode::Tree {
label: TreeLabel::YrRange,
children: vec![ParseNode::Leaf(Token {
value: "2024".to_string(),
tag: PosTag::Yr,
start_line: 1,
})],
},
ParseNode::Leaf(Token {
value: "Acme".to_string(),
tag: PosTag::Nnp,
start_line: 1,
}),
],
};
let leaves = collect_filtered_leaves(&node, &[TreeLabel::YrRange], &[]);
assert_eq!(leaves.len(), 2);
assert_eq!(leaves[0].value, "Copyright");
assert_eq!(leaves[1].value, "Acme");
}
#[test]
fn test_detect_copyright_url_trailing_slash() {
let input = "Copyright (c) 2007 Free Software Foundation, Inc. http://fsf.org/";
let (c, h, _a) = detect_copyrights_from_text(input);
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(
c[0].copyright, "Copyright (c) 2007 Free Software Foundation, Inc. http://fsf.org",
"Should strip trailing URL slash"
);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "Free Software Foundation, Inc.");
}
#[test]
fn test_detect_copyright_url_angle_brackets_trailing_slash() {
let input = "Copyright \u{00A9} 2007 Free Software Foundation, Inc. <http://fsf.org/>";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(
c[0].copyright, "Copyright (c) 2007 Free Software Foundation, Inc. http://fsf.org",
"Should strip angle brackets and trailing URL slash"
);
}
#[test]
fn test_detect_copyright_url_slash_full_file() {
let content =
std::fs::read_to_string("testdata/copyright-golden/copyrights/afferogplv3-AfferoGPLv")
.unwrap();
let (c, _h, _a) = detect_copyrights_from_text(&content);
assert!(!c.is_empty(), "Should detect copyright");
assert!(
c.iter()
.any(|cr| cr.copyright
== "Copyright (c) 2007 Free Software Foundation, Inc. http://fsf.org"),
"Should strip trailing URL slash"
);
}
#[test]
fn test_refine_relay_tom_zanussi_line() {
let raw = " * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp";
let prepared = crate::copyright::prepare::prepare_text_line(raw);
let refined = refine_copyright(&prepared);
assert_eq!(
refined,
Some("Copyright (c) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp".to_string())
);
}
#[test]
fn test_add_missing_copyrights_for_relay_holder_line() {
let content = std::fs::read_to_string(
"testdata/copyright-golden/ics/kernel-headers-original-linux/relay.h",
)
.unwrap();
let (copyrights, holders, _authors) = detect_copyrights_from_text(&content);
assert!(
holders.iter().any(|h| h.holder.contains("Tom Zanussi")),
"expected Tom holder"
);
assert!(
copyrights.iter().any(|c| {
c.copyright == "Copyright (c) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp"
}),
"expected Tom copyright added, got: {:?}",
copyrights
);
}
#[test]
fn test_contributed_by_with_latin1_diacritics() {
let content = std::fs::read("testdata/copyright-golden/authors/strverscmp.c").unwrap();
let text = crate::utils::file::decode_bytes_to_string(&content);
let (_c, _h, a) = detect_copyrights_from_text(&text);
assert!(
a.iter()
.any(|a| a.author.contains("Jean-Fran\u{00e7}ois Bignolles")),
"Should detect author with preserved diacritics, got: {:?}",
a
);
}
#[test]
fn test_contributed_by_with_utf8_diacritics() {
let content = std::fs::read("testdata/copyright-golden/authors/strverscmp2.c").unwrap();
let text = crate::utils::file::decode_bytes_to_string(&content);
let (_c, _h, a) = detect_copyrights_from_text(&text);
assert!(
a.iter()
.any(|a| a.author.contains("Jean-Fran\u{00e7}ois Bignolles")),
"Should detect author with preserved diacritics, got: {:?}",
a
);
}
#[test]
fn test_date_by_author() {
let content = "\
Copyright (c) 1998 Softweyr LLC. All rights reserved.
strtok_r, from Berkeley strtok
Oct 13, 1998 by Wes Peters <wes@softweyr.com>";
let (_c, _h, a) = detect_copyrights_from_text(content);
assert!(
a.iter().any(|a| a.author.contains("Wes Peters")),
"Should detect Wes Peters as author, got: {:?}",
a
);
}
#[test]
fn test_oprofile_authors_copyright() {
let content = " * @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @Modifications Daniel Hansel
* Modified by Aravind Menon for Xen
* These modifications are:
* Copyright (C) 2005 Hewlett-Packard Co.";
let (c, h, _a) = detect_copyrights_from_text(content);
let prepared_line =
crate::copyright::prepare::prepare_text_line(" * @remark Copyright 2002 OProfile authors");
let tokens = crate::copyright::lexer::get_tokens(&[(1, prepared_line.clone())]);
let parsed = crate::copyright::parser::parse(tokens.clone());
let refined = crate::copyright::refiner::refine_copyright(&prepared_line);
let token_debug: Vec<String> = tokens
.iter()
.map(|t| format!("{}:{:?}", t.value, t.tag))
.collect();
let parsed_debug: Vec<String> = parsed
.iter()
.map(|n| {
let leaves: Vec<String> = crate::copyright::detector::collect_all_leaves(n)
.iter()
.map(|t| format!("{}:{:?}", t.value, t.tag))
.collect();
format!("label={:?} tag={:?} leaves={leaves:?}", n.label(), n.tag())
})
.collect();
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright 2002 OProfile authors"),
"Should detect 'Copyright 2002 OProfile authors'. prepared={prepared_line:?} refined={refined:?} tokens={token_debug:?} parsed={parsed_debug:?} got: {c:?}",
);
assert!(
h.iter().any(|h| h.holder == "OProfile authors"),
"Should detect 'OProfile authors' holder. prepared={prepared_line:?} tokens={token_debug:?} got: {h:?}",
);
}
#[test]
fn test_drop_shadowed_c_sign_variants_unit() {
let mut c = vec![
CopyrightDetection {
copyright: "Copyright 2007, 2010 Linux Foundation".to_string(),
start_line: 1,
end_line: 1,
},
CopyrightDetection {
copyright: "Copyright (c) 2007, 2010 Linux Foundation".to_string(),
start_line: 1,
end_line: 1,
},
CopyrightDetection {
copyright: "Copyright 1995-2010 Jean-loup Gailly and Mark Adler".to_string(),
start_line: 10,
end_line: 10,
},
CopyrightDetection {
copyright: "Copyright (c) 1995-2010 Jean-loup Gailly and Mark Adler".to_string(),
start_line: 2,
end_line: 2,
},
];
drop_shadowed_c_sign_variants(&mut c);
let mut got: Vec<&str> = c.iter().map(|d| d.copyright.as_str()).collect();
got.sort();
let mut expected = vec![
"Copyright (c) 1995-2010 Jean-loup Gailly and Mark Adler",
"Copyright (c) 2007, 2010 Linux Foundation",
"Copyright 1995-2010 Jean-loup Gailly and Mark Adler",
];
expected.sort();
assert_eq!(got, expected, "After dropping variants, got: {c:?}");
}
#[test]
fn test_linux_foundation_line_prefers_holder_variant_over_bare_years() {
let content = "* Copyright (c) 2007, 2010 Linux Foundation";
let (c, _h, _a) = detect_copyrights_from_text(content);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2007, 2010 Linux Foundation"),
"copyrights: {:?}",
c
);
assert!(
!c.iter()
.any(|cr| cr.copyright == "Copyright (c) 2007, 2010"),
"copyrights: {:?}",
c
);
}
#[test]
fn test_originally_by_author() {
let content = "\
# Copyright 1996-2006 Free Software Foundation, Inc.
# Taken from GNU libtool, 2001
# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996";
let (_c, _h, a) = detect_copyrights_from_text(content);
assert!(
a.iter().any(|a| a.author.contains("Gordon Matzigkeit")),
"Should detect Gordon Matzigkeit as author, got: {:?}",
a
);
}
#[test]
fn test_by_name_email_author_full_file() {
let content = std::fs::read_to_string(
"testdata/copyright-golden/authors/author_var_route_c-var_route_c.c",
)
.unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
assert!(
a.iter()
.any(|a| a.author.contains("Jennifer Bray of Origin")),
"Should detect Jennifer Bray, got: {:?}",
a
);
assert!(
a.iter().any(|a| a.author.contains("Erik Schoenfelder")),
"Should detect Erik Schoenfelder, got: {:?}",
a
);
assert!(
a.iter().any(|a| a.author.contains("Simon Leinen")),
"Should detect Simon Leinen, got: {:?}",
a
);
}
#[test]
fn test_author_uc_contributors() {
let content =
std::fs::read_to_string("testdata/copyright-golden/authors/author_uc-LICENSE").unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
assert!(
a.iter()
.any(|a| a.author == "UC Berkeley and its contributors"),
"Should detect 'UC Berkeley and its contributors', got: {:?}",
a
);
assert!(
a.iter().any(|a| a
.author
.contains("University of California, Berkeley and its contributors")),
"Should detect 'University of California, Berkeley and its contributors', got: {:?}",
a
);
}
#[test]
fn test_multiline_two_copyrights_adjacent_lines() {
let input = "\tCopyright 1988, 1989 by Carnegie Mellon University\n\tCopyright 1989\tTGV, Incorporated\n";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("Carnegie Mellon")),
"Should detect CMU copyright"
);
assert!(
c.iter().any(|cr| cr.copyright.contains("TGV")),
"Should detect TGV copyright, got: {:?}",
c
);
assert!(
h.iter().any(|hr| hr.holder.contains("TGV")),
"Should detect TGV holder, got: {:?}",
h
);
}
#[test]
fn test_multiline_copyright_after_created_line() {
let input = "// Created: Sun Feb 9 10:06:01 2003 by faith@dict.org\n// Copyright 2003, 2004 Rickard E. Faith (faith@dict.org)\n";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("Rickard")),
"Should detect Faith copyright, got: {:?}",
c
);
assert!(
h.iter().any(|hr| hr.holder.contains("Faith")),
"Should detect Faith holder, got: {:?}",
h
);
}
#[test]
fn test_co_maintainer_fixture_extracts_authors() {
let content =
std::fs::read_to_string("testdata/copyright-golden/copyrights/misco4/co-maintainer.txt")
.unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
let authors: Vec<&str> = a.iter().map(|a| a.author.as_str()).collect();
for expected in [
"Norbert Tretkowski <nobse@debian.org>",
"Jeff Bailey <jbailey@raspberryginger.com>",
"Rob Weir <rweir@ertius.org>",
"Andres Salomon <dilinger@debian.org>",
"Lars Wirzenius <liw@iki.fi>",
"Adeodato Simó <dato@net.com.org.es>",
"Wouter van Heyst <larstiq@larstiq.dyndns.org>",
"Jelmer Vernooij <jelmer@samba.org>",
"the pkg-bazaar team",
] {
assert!(authors.contains(&expected), "authors: {authors:#?}");
}
}
#[test]
fn test_debianized_by_fixture_extracts_author() {
let content =
std::fs::read_to_string("testdata/copyright-golden/copyrights/misco4/debianized-by.txt")
.unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
let authors: Vec<&str> = a.iter().map(|a| a.author.as_str()).collect();
assert!(
authors.contains(&"Christian Marillat <marillat@debian.org>"),
"authors: {authors:#?}"
);
}
#[test]
fn test_final_agreement_fixture_extracts_created_by_author() {
let content =
std::fs::read_to_string("testdata/copyright-golden/copyrights/misco4/final-agreement.txt")
.unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
let authors: Vec<&str> = a.iter().map(|a| a.author.as_str()).collect();
assert!(authors.contains(&"the Project"), "authors: {authors:#?}");
}
#[test]
fn test_sata_mv_fixture_merges_written_by_author_block() {
let content = std::fs::read_to_string(
"testdata/copyright-golden/copyrights/misco4/linux-copyrights/drivers/ata/sata_mv.c",
)
.unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
let authors: Vec<&str> = a.iter().map(|a| a.author.as_str()).collect();
assert!(
authors.contains(
&"Brett Russ. Extensive overhaul and enhancement by Mark Lord <mlord@pobox.com>"
),
"authors: {authors:#?}"
);
}
#[test]
fn test_hid_appleir_fixture_merges_written_by_author_block() {
let content = std::fs::read_to_string(
"testdata/copyright-golden/copyrights/misco4/linux-copyrights/drivers/hid/hid-appleir.c",
)
.unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
let authors: Vec<&str> = a.iter().map(|a| a.author.as_str()).collect();
assert!(
authors.contains(&"James McKenzie Ported to recent 2.6 kernel versions by Greg Kroah-Hartman <gregkh@suse.de> Updated to support newer remotes by Bastien Nocera <hadess@hadess.net> Ported to HID subsystem by Benjamin Tissoires <benjamin.tissoires@gmail.com>"),
"authors: {authors:#?}"
);
}
#[test]
fn test_dvb_frontend_fixture_merges_written_by_author_block() {
let content = std::fs::read_to_string(
"testdata/copyright-golden/copyrights/misco4/linux-copyrights/include/media/dvb_frontend.h",
)
.unwrap();
let (_c, _h, a) = detect_copyrights_from_text(&content);
let authors: Vec<&str> = a.iter().map(|a| a.author.as_str()).collect();
assert!(
authors.contains(&"Ralph Metzler Overhauled by Holger Waechtler Kernel I2C stuff by Michael Hunold <hunold@convergence.de>"),
"authors: {authors:#?}"
);
}
#[test]
fn test_auth_nl_copyright_not_author() {
let input = "* Copyright (C) 2016-2018\n* Author: Matt Ranostay <matt.ranostay@konsulko.com>";
let (c, h, a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("Matt Ranostay")),
"Should detect copyright with Matt Ranostay, got: {:?}",
c
);
assert!(
h.iter().any(|hr| hr.holder.contains("Matt Ranostay")),
"Should detect Matt Ranostay as holder, got: {:?}",
h
);
assert!(
a.is_empty(),
"Should NOT detect authors (Author: is part of copyright), got: {:?}",
a
);
}
#[test]
fn test_notice_file_multiple_copyrights() {
let text = " Copyright (C) 1997, 2002, 2005 Free Software Foundation, Inc.\n\
* Copyright (C) 2005 Jens Axboe <axboe@suse.de>\n\
* Copyright (C) 2006 Alan D. Brunelle <Alan.Brunelle@hp.com>\n\
* Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>\n\
* Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)\n\
* Copyright (C) 2009 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)\n\
* Copyright IBM Corp. 2008\n\
# Copyright (c) 2005 SUSE LINUX Products GmbH, Nuernberg, Germany.\n\
# Copyright (c) 2005 Silicon Graphics, Inc.";
let (c, _h, _a) = detect_copyrights_from_text(text);
let cr_texts: Vec<&str> = c.iter().map(|cr| cr.copyright.as_str()).collect();
assert!(
c.len() >= 9,
"Should detect at least 9 copyrights, got {}: {:?}",
c.len(),
cr_texts
);
}
#[test]
fn test_doc_doc_no_overabsorb() {
let input = "are copyrighted by Douglas C. Schmidt and his research group at Washington University, University of California, Irvine, and Vanderbilt University, Copyright (c) 1993-2008, all rights reserved.";
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == "copyrighted by Douglas C. Schmidt and his research group at Washington University, University of California, Irvine, and Vanderbilt University, Copyright (c) 1993-2008"),
"Should merge trailing Copyright (c) clause, got: {:?}",
c
);
}
#[test]
fn test_multiline_copyrighted_by_href_links_merges_trailing_copyright_clause() {
let input = "copyrighted by <A\nHREF=\"http://www.dre.vanderbilt.edu/~schmidt/\">Douglas C. Schmidt</A>\nand his <a\nHREF=\"http://www.cs.wustl.edu/~schmidt/ACE-members.html\">research\ngroup</a> at <A HREF=\"http://www.wustl.edu/\">Washington\nUniversity</A>, <A HREF=\"http://www.uci.edu\">University of California,\nIrvine</A>, and <A HREF=\"http://www.vanderbilt.edu\">Vanderbilt\nUniversity</A>, Copyright (c) 1993-2009, all rights reserved.";
let (c, _h, _a) = detect_copyrights_from_text(input);
let expected = "copyrighted by http://www.dre.vanderbilt.edu/~schmidt/ Douglas C. Schmidt and his http://www.cs.wustl.edu/~schmidt/ACE-members.html research group at http://www.wustl.edu/ Washington University, http://www.uci.edu University of California, Irvine, and http://www.vanderbilt.edu Vanderbilt University, Copyright (c) 1993-2009";
assert!(
c.iter().any(|cr| cr.copyright == expected),
"Expected merged copyrighted-by href copyright, got: {:?}",
c
);
let merged = c.iter().find(|cr| cr.copyright == expected).unwrap();
assert!(
merged.end_line > merged.start_line,
"Expected merged span to extend across lines, got: {:?}",
merged
);
}
#[test]
fn test_html_anchor_copyright_url_multiline_span_preserved() {
let input = "<a href=\"https://example.com/path\">\ncopyright\n</a>";
let (c, h, _a) = detect_copyrights_from_text(input);
let cd = c
.iter()
.find(|cr| cr.copyright == "copyright https://example.com/path")
.unwrap();
assert_eq!((cd.start_line, cd.end_line), (1, 3), "copyrights: {c:?}");
let hd = h
.iter()
.find(|hr| hr.holder == "https://example.com/path")
.unwrap();
assert_eq!((hd.start_line, hd.end_line), (1, 3), "holders: {h:?}");
}
#[test]
fn test_normalize_split_angle_bracket_urls_keeps_tail() {
let input = "Copyright Krzysztof <https://github.com\nHavret>, Stack Builders <https://github.com\nstackbuilders>, end";
let out = super::normalize_split_angle_bracket_urls(input);
let out: &str = out.as_ref();
assert!(
out.contains("https://github.com Havret")
&& out.contains("https://github.com stackbuilders"),
"normalized: {out:?}"
);
}
#[test]
fn test_academy_copyright() {
let input = "Copyright (c) 2006 Academy of Motion Picture Arts and Sciences";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright
== "Copyright (c) 2006 Academy of Motion Picture Arts and Sciences"),
"Should detect Academy copyright, got: {:?}",
c
);
assert!(
h.iter()
.any(|hr| hr.holder == "Academy of Motion Picture Arts and Sciences"),
"Should detect Academy holder, got: {:?}",
h
);
}
#[test]
fn test_define_copyright() {
let input = "#define COPYRIGHT \"Copyright (c) 1999-2008 LSI Corporation\"\n#define MODULEAUTHOR \"LSI Corporation\"";
let (c, h, a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright == "Copyright (c) 1999-2008 LSI Corporation"),
"Should detect 'Copyright (c) 1999-2008 LSI Corporation', got: {:?}",
c
);
assert!(
h.iter().any(|h| h.holder == "LSI Corporation"),
"Should detect holder, got: {:?}",
h
);
assert!(
a.iter().any(|a| a.author == "LSI Corporation"),
"Should detect author from MODULEAUTHOR, got: {:?}",
a
);
}
#[test]
fn test_parts_copyright_prefix() {
let input = " * Parts (C) 1999 David Airlie, airlied@linux.ie";
let numbered_lines: Vec<(usize, String)> = input
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let token_dbg: Vec<(String, PosTag)> =
tokens.iter().map(|t| (t.value.clone(), t.tag)).collect();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens)
};
let labels_dbg: Vec<Option<TreeLabel>> = tree.iter().map(|n| n.label()).collect();
let (c, h, _a) = detect_copyrights_from_text(input);
assert_eq!(
c.len(),
1,
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\nexpected one copyright, got: {c:#?}"
);
assert_eq!(
c[0].copyright, "Parts (c) 1999 David Airlie, airlied@linux.ie",
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\ncopyrights: {c:#?}"
);
assert_eq!(
h.len(),
1,
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\nexpected one holder, got: {h:#?}"
);
assert_eq!(h[0].holder, "David Airlie");
}
#[test]
fn test_trailing_year_included_in_copyright() {
let cases = &[
(
"Copyright (c) IBM Corporation 2008",
"Copyright (c) IBM Corporation 2008",
"IBM Corporation",
),
(
"Copyright (c) Zeus Technology Limited 1996",
"Copyright (c) Zeus Technology Limited 1996",
"Zeus Technology Limited",
),
(
"Copyright IBM, Corp. 2007",
"Copyright IBM, Corp. 2007",
"IBM, Corp.",
),
(
"Copyright IBM Corp. 2004, 2010",
"Copyright IBM Corp. 2004, 2010",
"IBM Corp.",
),
];
for (input, expected_cr, expected_h) in cases {
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright == *expected_cr),
"For '{}': expected CR '{}', got {:?}",
input,
expected_cr,
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hh| hh.holder == *expected_h),
"For '{}': expected holder '{}', got {:?}",
input,
expected_h,
h.iter().map(|hh| &hh.holder).collect::<Vec<_>>()
);
}
}
#[test]
fn test_holder_after_year_range_absorbed() {
let input = "COPYRIGHT (c) 2006 - 2009 DIONYSOS";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("DIONYSOS")),
"Should include 'DIONYSOS' in copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hh| hh.holder.contains("DIONYSOS")),
"Should include 'DIONYSOS' in holder, got: {:?}",
h.iter().map(|hh| &hh.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_multi_word_holder_after_year_range() {
let input = "Copyright (C) 1999-2000 VA Linux Systems";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("VA Linux Systems")),
"Should include full company name, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hh| hh.holder.contains("VA Linux Systems")),
"Should include full company name in holder, got: {:?}",
h.iter().map(|hh| &hh.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_by_keyword_holder_captured() {
let input = "Copyright (c) 1991, 2000, 2001 by Lucent Technologies.";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright.contains("Lucent Technologies")),
"Should include holder after 'by', got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hh| hh.holder.contains("Lucent Technologies")),
"Should include holder after 'by', got: {:?}",
h.iter().map(|hh| &hh.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_holder_company_with_digits_absorbed() {
let input = "Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("Group 42, Inc.")),
"Should include full company name with digits, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hh| hh.holder.contains("Group 42, Inc.")),
"Should include full company name with digits in holder, got: {:?}",
h.iter().map(|hh| &hh.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_copyright_dash_email_tail_absorbed() {
let input = "Copyright (c) 1999, Bob Withers - bwit@pobox.com";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter().any(|cr| cr.copyright.contains("bwit@pobox.com")),
"Should include dash-email tail in copyright, got: {:?}",
c.iter().map(|cr| &cr.copyright).collect::<Vec<_>>()
);
assert!(
h.iter().any(|hh| hh.holder == "Bob Withers"),
"Expected holder 'Bob Withers', got: {:?}",
h.iter().map(|hh| &hh.holder).collect::<Vec<_>>()
);
}
#[test]
fn test_w3c_paren_group_debug() {
let input = "(c) 1998-2008 (W3C) MIT, ERCIM, Keio University";
let numbered_lines: Vec<(usize, String)> = input
.lines()
.enumerate()
.map(|(i, line)| (i + 1, line.to_string()))
.collect();
let groups = crate::copyright::candidates::collect_candidate_lines(numbered_lines);
let tokens: Vec<Token> = groups.first().map(|g| get_tokens(g)).unwrap_or_default();
let token_dbg: Vec<(String, PosTag)> =
tokens.iter().map(|t| (t.value.clone(), t.tag)).collect();
let tree = if tokens.is_empty() {
Vec::new()
} else {
parse(tokens)
};
let labels_dbg: Vec<(String, Option<TreeLabel>)> =
tree.iter().map(|n| (format!("{n:?}"), n.label())).collect();
let (c, _h, _a) = detect_copyrights_from_text(input);
assert!(
c.iter()
.any(|cr| cr.copyright.contains("MIT, ERCIM, Keio University")),
"tokens: {token_dbg:#?}\nlabels: {labels_dbg:#?}\nexpected W3C copyright with MIT/ERCIM/Keio, got: {c:#?}"
);
}
#[test]
fn test_detect_copyright_with_dots_single_line() {
let input = "Copyright . 2008 Foo Name, Inc.";
let (c, h, _a) = detect_copyrights_from_text(input);
assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c);
assert_eq!(
c[0].copyright, "Copyright 2008 Foo Name, Inc.",
"Should detect full copyright with company name"
);
assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h);
assert_eq!(h[0].holder, "Foo Name, Inc.");
}
#[test]
fn test_detect_copyright_with_dots_multiline() {
let input = "Copyright . 2008 company name, inc.";
let (c, h, _a) = detect_copyrights_from_text(input);
assert!(
!c.is_empty(),
"Should detect at least one copyright, got: {:?}",
c
);
assert!(
c.iter().any(|cr| cr.copyright.contains("2008")),
"Should detect copyright with year 2008, got: {:?}",
c
);
assert!(
c.iter()
.any(|cr| cr.copyright.to_lowercase().contains("company name")),
"Should detect full company name, got: {:?}",
c
);
assert!(
h.iter()
.any(|hr| hr.holder.to_lowercase().contains("company name")),
"Should detect holder with company name, got: {:?}",
h
);
}
#[test]
fn test_opensharedmap_inc_holder_detected() {
let input = "Copyright (C) OpenSharedMap Inc.";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| c.copyright.contains("OpenSharedMap Inc")),
"Expected OpenSharedMap copyright detection, got: {copyrights:?}"
);
assert!(
holders
.iter()
.any(|h| h.holder.contains("OpenSharedMap Inc")),
"Expected OpenSharedMap holder detection, got: {holders:?}"
);
}
#[test]
fn test_disclaimer_tail_with_inc_as_does_not_create_holder() {
let input = "Copyright Owner Inc. AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES";
let (_copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
holders.is_empty(),
"Unexpected disclaimer-derived holders: {holders:?}"
);
}
#[test]
fn test_camelcase_provider_not_author_false_positive() {
let input = "A meter implementation is created by a MeterProvider in this system.\nA trace implementation is created by a TracerProvider in this system.";
let (_copyrights, _holders, authors) = detect_copyrights_from_text(input);
let author_values: Vec<&str> = authors.iter().map(|a| a.author.as_str()).collect();
assert!(
author_values
.iter()
.all(|a| *a != "MeterProvider in" && *a != "TracerProvider in"),
"Unexpected provider false-positive authors: {author_values:?}"
);
}
#[test]
fn test_platformdirs_lowercase_holder_detected() {
let input = "Copyright (c) 2010-202x The platformdirs developers";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights.iter().any(|c| c
.copyright
.contains("2010-202x The platformdirs developers")),
"Expected platformdirs copyright, got: {copyrights:?}"
);
assert!(
holders
.iter()
.any(|h| h.holder == "The platformdirs developers"),
"Expected platformdirs holder, got: {holders:?}"
);
}
#[test]
fn test_square_c_sign_detected() {
let input = "[C] The Regents of the University of Michigan and Merit Network, Inc. 1992, 1993, 1994, 1995 All Rights Reserved";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights.iter().any(|c| c
.copyright
.contains("Regents of the University of Michigan")),
"Expected Regents copyright detection, got: {copyrights:?}"
);
assert!(
holders
.iter()
.any(|h| h.holder.contains("Regents of the University of Michigan")),
"Expected Regents holder detection, got: {holders:?}"
);
}
#[test]
fn test_template_literal_copyright_holder_detected() {
let input = "copyright: `Copyright 2010–${new Date().getUTCFullYear()} Mike Bostock`";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright 2010-${new Date .getUTCFullYear } Mike Bostock"),
"copyrights: {copyrights:?}"
);
assert!(
holders.iter().any(|h| h.holder == "Mike Bostock"),
"holders: {holders:?}"
);
assert!(
!copyrights.iter().any(|c| c.copyright == "Copyright 2010-$"),
"copyrights: {copyrights:?}"
);
}
#[test]
fn test_markdown_transition_line_not_author() {
let input = "The meaning of [*transition*.delay](https://github.com/d3/d3-transition/blob/master/README.md#transition_delay) has changed for chained transitions created by [*transition*.transition](https://github.com/d3/d3-transition/blob/master/README.md#transition_transition).";
let (_copyrights, _holders, authors) = detect_copyrights_from_text(input);
assert!(
!authors.iter().any(|a| a.author.contains(
"transition .transition https://github.com/d3/d3-transition/blob/master/README.md"
)),
"authors: {authors:?}"
);
}
#[test]
fn test_normalize_company_suffix_period_holder_variants() {
let input = "Copyright (c) 2020 Foo, Inc\nCopyright (c) 2021 Foo, Inc.";
let (_copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert_eq!(holders.len(), 2, "holders: {holders:?}");
assert!(
holders.iter().all(|h| h.holder == "Foo, Inc."),
"holders: {holders:?}"
);
}
#[test]
fn test_split_multiline_holder_list_with_emails() {
let input = "(c) 1999 Terrehon Bowden <terrehon@pacbell.net>\n Bodo Bauer <bb@ricochet.net>\n";
let (_copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
holders.iter().any(|h| h.holder == "Terrehon Bowden"),
"holders: {holders:?}"
);
assert!(
holders.iter().any(|h| h.holder == "Bodo Bauer"),
"holders: {holders:?}"
);
assert!(
!holders
.iter()
.any(|h| h.holder == "Terrehon Bowden Bodo Bauer"),
"holders: {holders:?}"
);
}
#[test]
fn test_extend_copyright_with_following_all_rights_reserved_line() {
let input = "Copyright 2010-2015 Mike Bostock\nAll rights reserved.";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright 2010-2015 Mike Bostock"),
"copyrights: {copyrights:?}"
);
assert!(
copyrights
.iter()
.any(|c| c.start_line == 1 && c.end_line == 2),
"copyrights: {copyrights:?}"
);
assert!(
holders.iter().any(|h| h.holder == "Mike Bostock"),
"holders: {holders:?}"
);
}
#[test]
fn test_drop_url_embedded_suffix_copyright_and_holder_variants() {
let input =
"/* Copyright (c) 2020 Example Corp. See url(\"https://dummy-url-for-test.com\") */";
let (copyrights, holders, _authors) = detect_copyrights_from_text(input);
assert!(
copyrights
.iter()
.any(|c| c.copyright == "Copyright (c) 2020 Example Corp."),
"copyrights: {copyrights:?}"
);
assert!(
!copyrights
.iter()
.any(|c| c.copyright.contains("See url") || c.copyright.contains("https://")),
"copyrights: {copyrights:?}"
);
assert!(
holders.iter().any(|h| h.holder == "Example Corp."),
"holders: {holders:?}"
);
assert!(
!holders
.iter()
.any(|h| h.holder.contains("See url") || h.holder.contains("http")),
"holders: {holders:?}"
);
}
#[test]
fn test_extract_authors_from_dense_name_email_list() {
let input = "John Doe <john@example.com>\nJane Smith <jane@example.com>\n";
let (_copyrights, _holders, authors) = detect_copyrights_from_text(input);
assert!(
authors
.iter()
.any(|a| a.author == "John Doe <john@example.com>"),
"authors: {authors:?}"
);
assert!(
authors
.iter()
.any(|a| a.author == "Jane Smith <jane@example.com>"),
"authors: {authors:?}"
);
}