use std::sync::LazyLock;
use regex::Regex;
pub const HINT_MARKERS: &[&str] = &[
"©",
"|copy|",
"©",
"©",
"169",
"xa9",
"u00a9",
"00a9",
"\u{00a9}", "\\251",
"opyr",
"opyl",
"copr",
"reserv",
"auth",
"contrib",
"commit",
"filecontributor",
"devel",
"<s>",
"</s>",
"<s/>",
"by ", "@",
];
static YEAR_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"[\(\.,\-\)\s]+(19[6-9][0-9]|20[0-9]{2})([\(\.,\-\)\s]+|$)").unwrap()
});
static UNKNOWN_YEAR_PLACEHOLDER_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)copyright\s*\(c\)\s*\?\?\?\?").unwrap());
pub fn has_year(line: &str) -> bool {
YEAR_REGEX.is_match(line) || UNKNOWN_YEAR_PLACEHOLDER_REGEX.is_match(line)
}
pub fn has_copyright_hint(line: &str) -> bool {
let lower = line.to_lowercase();
HINT_MARKERS.iter().any(|marker| lower.contains(marker)) || has_c_marker_hint(line)
}
fn has_c_marker_hint(line: &str) -> bool {
let lower = line.to_ascii_lowercase();
for (idx, _) in lower.match_indices("(c)") {
if idx > 0 {
let prev = lower.as_bytes()[idx - 1];
if prev.is_ascii_alphanumeric() {
continue;
}
}
let mut j = idx + 3;
while j < line.len() {
let b = line.as_bytes()[j];
if b.is_ascii_whitespace() {
j += 1;
continue;
}
return b.is_ascii_digit() || b.is_ascii_uppercase();
}
}
false
}
pub fn is_candidate(line: &str) -> bool {
has_copyright_hint(line) || has_year(line)
}
pub fn has_trailing_year(s: &str) -> bool {
let trimmed = s.trim_end_matches(|c: char| c.is_ascii_punctuation() || c.is_whitespace());
if trimmed.len() < 4 {
return false;
}
let last4 = &trimmed[trimmed.len() - 4..];
if let Ok(year) = last4.parse::<u32>() {
(1960..=2099).contains(&year)
} else {
false
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hint_copyright_symbol() {
assert!(has_copyright_hint("© 2024 Acme Inc"));
}
#[test]
fn test_hint_c_in_parens() {
assert!(has_copyright_hint("Copyright (C) 2024"));
}
#[test]
fn test_hint_rst_copy() {
assert!(has_copyright_hint("Some |copy| notice"));
}
#[test]
fn test_hint_html_entity_decimal() {
assert!(has_copyright_hint("© 2024 Foo"));
}
#[test]
fn test_hint_html_entity_hex() {
assert!(has_copyright_hint("© 2024 Foo"));
}
#[test]
fn test_hint_opyr() {
assert!(has_copyright_hint("Copyright 2024"));
}
#[test]
fn test_hint_opyl() {
assert!(has_copyright_hint("Copyleft notice"));
}
#[test]
fn test_hint_copr() {
assert!(has_copyright_hint("Copr. 2024 Foo"));
}
#[test]
fn test_hint_right() {
assert!(has_copyright_hint("All rights reserved"));
}
#[test]
fn test_hint_reserv() {
assert!(has_copyright_hint("All Rights Reserved."));
}
#[test]
fn test_hint_auth() {
assert!(has_copyright_hint("@author John Doe"));
}
#[test]
fn test_hint_filecontributor() {
assert!(has_copyright_hint("SPDX-FileContributor: Jane"));
}
#[test]
fn test_hint_devel() {
assert!(has_copyright_hint("Developed by Acme"));
}
#[test]
fn test_hint_debian_markup() {
assert!(has_copyright_hint("<s>John Doe</s>"));
}
#[test]
fn test_hint_by_with_space() {
assert!(has_copyright_hint("Written by John"));
}
#[test]
fn test_hint_at_sign() {
assert!(has_copyright_hint("user@example.com"));
}
#[test]
fn test_hint_negative_no_markers() {
assert!(!has_copyright_hint("This is a plain line of code"));
}
#[test]
fn test_hint_negative_empty() {
assert!(!has_copyright_hint(""));
}
#[test]
fn test_hint_case_insensitive() {
assert!(has_copyright_hint("COPYRIGHT 2024"));
assert!(has_copyright_hint("AUTHOR: John"));
assert!(has_copyright_hint("DEVELOPED BY Acme"));
}
#[test]
fn test_year_1959_no_match() {
assert!(!has_year(" 1959 "));
}
#[test]
fn test_year_1960_match() {
assert!(has_year(" 1960 "));
}
#[test]
fn test_year_2024_match() {
assert!(has_year(" 2024 "));
}
#[test]
fn test_year_2039_match() {
assert!(has_year(" 2039 "));
}
#[test]
fn test_year_2040_match() {
assert!(has_year(" 2040 "));
}
#[test]
fn test_year_2099_match() {
assert!(has_year(" 2099 "));
}
#[test]
fn test_year_2100_no_match() {
assert!(!has_year(" 2100 "));
}
#[test]
fn test_year_in_copyright_line() {
assert!(has_year("Copyright (c) 2024 Acme Inc."));
}
#[test]
fn test_year_with_dash_separator() {
assert!(has_year("2020-2024"));
}
#[test]
fn test_year_no_surrounding_punct() {
assert!(!has_year("abc2024def"));
}
#[test]
fn test_year_at_end_of_line() {
assert!(has_year("Copyright 2024"));
}
#[test]
fn test_unknown_year_placeholder_in_copyright_matches() {
assert!(has_year(
"Copyright (C) ???? Simon Mourier <simonm@microsoft.com>"
));
}
#[test]
fn test_candidate_with_hint() {
assert!(is_candidate("Copyright 2024 Acme"));
}
#[test]
fn test_candidate_with_year_only() {
assert!(is_candidate("Some notice 2024 "));
}
#[test]
fn test_candidate_negative() {
assert!(!is_candidate("Just a plain line of code"));
}
#[test]
fn test_trailing_year_present() {
assert!(has_trailing_year("some text 2024"));
assert!(has_trailing_year("some text 2024."));
assert!(has_trailing_year("some text 2024, "));
}
#[test]
fn test_trailing_year_absent() {
assert!(!has_trailing_year("some text"));
assert!(!has_trailing_year("some text abc"));
}
#[test]
fn test_trailing_year_boundary_low() {
assert!(has_trailing_year("text 1960"));
assert!(!has_trailing_year("text 1959"));
}
#[test]
fn test_trailing_year_boundary_high() {
assert!(has_trailing_year("text 2099"));
assert!(!has_trailing_year("text 2100"));
}
#[test]
fn test_trailing_year_short_string() {
assert!(!has_trailing_year("20"));
assert!(!has_trailing_year(""));
}
}