use super::*;
fn truncate_at_baslerstr(s: &str) -> Option<String> {
let lower = s.to_ascii_lowercase();
let needle = ", baslerstr";
let idx = lower.find(needle)?;
let prefix = s[..idx].trim_end_matches(&[',', ' '][..]).trim();
if prefix.is_empty() {
None
} else {
Some(prefix.to_string())
}
}
pub fn truncate_lonely_svox_baslerstr_address(
copyrights: &mut [CopyrightDetection],
holders: &mut [HolderDetection],
) {
if copyrights.len() != 1 || holders.len() != 1 {
return;
}
let c0 = ©rights[0].copyright;
let h0 = &holders[0].holder;
if !c0.contains("SVOX")
|| !h0.contains("SVOX")
|| !c0.to_ascii_lowercase().contains("baslerstr")
|| !h0.to_ascii_lowercase().contains("baslerstr")
{
return;
}
if let Some(tc) = truncate_at_baslerstr(c0) {
copyrights[0].copyright = tc;
}
if let Some(th) = truncate_at_baslerstr(h0) {
holders[0].holder = th;
}
}
pub fn add_short_svox_baslerstr_variants(
copyrights: &[CopyrightDetection],
holders: &[HolderDetection],
seen: &SeenTextSets,
) -> (Vec<CopyrightDetection>, Vec<HolderDetection>) {
if copyrights.is_empty() || holders.is_empty() {
return (Vec::new(), Vec::new());
}
if copyrights.len() == 1 && holders.len() == 1 {
return (Vec::new(), Vec::new());
}
let full_copyrights: Vec<&CopyrightDetection> = copyrights
.iter()
.filter(|c| {
c.copyright.contains("SVOX") && c.copyright.to_ascii_lowercase().contains("baslerstr")
})
.collect();
if full_copyrights.is_empty() {
return (Vec::new(), Vec::new());
}
let new_c = full_copyrights
.into_iter()
.filter_map(|c| {
let short = truncate_at_baslerstr(&c.copyright)?;
(!seen.copyrights.contains(&short)).then_some(CopyrightDetection {
copyright: short,
start_line: c.start_line,
end_line: c.end_line,
})
})
.collect();
let new_h = holders
.iter()
.filter(|h| {
h.holder.contains("SVOX") && h.holder.to_ascii_lowercase().contains("baslerstr")
})
.filter_map(|h| {
let short = truncate_at_baslerstr(&h.holder)?;
(!seen.holders.contains(&short)).then_some(HolderDetection {
holder: short,
start_line: h.start_line,
end_line: h.end_line,
})
})
.collect();
(new_c, new_h)
}
pub fn drop_shadowed_linux_foundation_holder_copyrights_same_line(
copyrights: &mut Vec<CopyrightDetection>,
) {
if copyrights.len() < 2 {
return;
}
static WITH_C_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)^copyright\s*\(c\)\s*(?P<years>\d{4}(?:\s*,\s*\d{4})*)$").unwrap()
});
static WITH_HOLDER_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)^copyright\s+(?P<years>\d{4}(?:\s*,\s*\d{4})*)\s+linux\s+foundation$")
.unwrap()
});
let years_by_line: HashSet<(usize, String)> = copyrights
.iter()
.filter_map(|c| {
let cap = WITH_C_RE.captures(c.copyright.trim())?;
let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim();
if years.is_empty() {
return None;
}
Some((c.start_line.get(), years.to_string()))
})
.collect();
copyrights.retain(|c| {
let Some(cap) = WITH_HOLDER_RE.captures(c.copyright.trim()) else {
return true;
};
let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim();
if years.is_empty() {
return true;
}
!years_by_line.contains(&(c.start_line.get(), years.to_string()))
});
}
pub fn restore_linux_foundation_copyrights_from_raw_lines(
raw_lines: &[&str],
copyrights: &mut Vec<CopyrightDetection>,
) {
if raw_lines.is_empty() {
return;
}
static RAW_LINUX_FOUNDATION_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?i)copyright\s*\(c\)\s*(?P<years>\d{4}(?:\s*,\s*\d{4})*)\s+linux\s+foundation",
)
.unwrap()
});
let mut to_add: Vec<CopyrightDetection> = Vec::new();
for (idx, raw) in raw_lines.iter().enumerate() {
let ln = idx + 1;
let Some(cap) = RAW_LINUX_FOUNDATION_RE.captures(raw) else {
continue;
};
let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim();
if years.is_empty() {
continue;
}
let full = super::super::token_utils::normalize_whitespace(&format!(
"Copyright (c) {years} Linux Foundation"
));
if copyrights
.iter()
.any(|c| c.start_line.get() == ln && c.end_line.get() == ln && c.copyright == full)
{
continue;
}
to_add.push(CopyrightDetection {
copyright: full.clone(),
start_line: LineNumber::new(ln).unwrap(),
end_line: LineNumber::new(ln).unwrap(),
});
let bare =
super::super::token_utils::normalize_whitespace(&format!("Copyright (c) {years}"));
copyrights.retain(|c| {
!(c.start_line.get() == ln && c.end_line.get() == ln && c.copyright == bare)
});
}
copyrights.extend(to_add);
}
pub fn add_karlsruhe_university_short_variants(
copyrights: &[CopyrightDetection],
holders: &[HolderDetection],
) -> (Vec<CopyrightDetection>, Vec<HolderDetection>) {
if copyrights.is_empty() && holders.is_empty() {
return (Vec::new(), Vec::new());
}
static KARLSRUHE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)\bUniversity\s+of\s+Karlsruhe\b").unwrap());
static KARLSRUHE_TERMINAL_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)\bUniversity\s+of\s+Karlsruhe\b\s*[)\]\.\,;:]?\s*$").unwrap()
});
fn shorten_karlsruhe(
text: &str,
karlsruhe_re: &Regex,
karlsruhe_terminal_re: &Regex,
) -> Option<String> {
if !karlsruhe_re.is_match(text) || !karlsruhe_terminal_re.is_match(text) {
return None;
}
let short = karlsruhe_re.replace_all(text, "University").to_string();
let short = super::super::token_utils::normalize_whitespace(&short);
(short != text).then_some(short)
}
let new_c = copyrights
.iter()
.filter_map(|c| {
let short =
shorten_karlsruhe(c.copyright.as_str(), &KARLSRUHE_RE, &KARLSRUHE_TERMINAL_RE)?;
Some(CopyrightDetection {
copyright: short,
start_line: c.start_line,
end_line: c.end_line,
})
})
.collect();
let new_h = holders
.iter()
.filter_map(|h| {
let short =
shorten_karlsruhe(h.holder.as_str(), &KARLSRUHE_RE, &KARLSRUHE_TERMINAL_RE)?;
Some(HolderDetection {
holder: short,
start_line: h.start_line,
end_line: h.end_line,
})
})
.collect();
(new_c, new_h)
}
pub fn add_intel_and_sun_non_portions_variants(
prepared_cache: &PreparedLines<'_>,
copyrights: &[CopyrightDetection],
) -> Vec<CopyrightDetection> {
if prepared_cache.is_empty() || copyrights.is_empty() {
return Vec::new();
}
static PORTIONS_SUN_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?i)^Portions\s+Copyright\s+(?P<year>\d{4})\s+Sun\s+Microsystems\b(?P<tail>.*)$",
)
.unwrap()
});
static PORTIONS_INTEL_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)^Portions\s+Copyright\s+(?P<year>\d{4})\s+Intel\b").unwrap()
});
static INTEL_EMAILS_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)Portions\s+Copyright\s+2002\s+Intel\s*\((?P<emails>[^)]*@[\s\S]*?)\)")
.unwrap()
});
copyrights
.iter()
.flat_map(|c| {
let trimmed = c.copyright.trim();
let sun_variant = PORTIONS_SUN_RE.captures(trimmed).and_then(|cap| {
let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
let tail = cap.name("tail").map(|m| m.as_str()).unwrap_or("");
if year.is_empty() {
return None;
}
let candidate = super::super::token_utils::normalize_whitespace(&format!(
"Copyright {year} Sun Microsystems{tail}"
));
let refined = refine_copyright(&candidate)?;
Some(CopyrightDetection {
copyright: refined,
start_line: c.start_line,
end_line: c.end_line,
})
});
let intel_variant = if PORTIONS_INTEL_RE.is_match(trimmed)
&& (c.end_line.get() > c.start_line.get() || trimmed.contains('('))
{
let joined = (c.start_line.get()..=c.end_line.get())
.filter_map(|ln| prepared_cache.get(ln))
.collect::<Vec<_>>()
.join(" ");
let joined = super::super::token_utils::normalize_whitespace(&joined);
INTEL_EMAILS_RE.captures(joined.as_str()).and_then(|cap| {
let emails = cap.name("emails").map(|m| m.as_str()).unwrap_or("").trim();
if emails.is_empty() {
return None;
}
let candidate = super::super::token_utils::normalize_whitespace(&format!(
"Copyright 2002 Intel ({emails})"
));
let refined = refine_copyright(&candidate)?;
Some(CopyrightDetection {
copyright: refined,
start_line: c.start_line,
end_line: c.end_line,
})
})
} else {
None
};
[sun_variant, intel_variant].into_iter().flatten()
})
.collect()
}
pub fn add_but_suffix_short_variants(copyrights: &[CopyrightDetection]) -> Vec<CopyrightDetection> {
if copyrights.is_empty() {
return Vec::new();
}
static BUT_SUFFIX_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)^(?P<prefix>.+?),\s*but\s*$").unwrap());
copyrights
.iter()
.filter_map(|c| {
let trimmed = c.copyright.trim();
let cap = BUT_SUFFIX_RE.captures(trimmed)?;
let prefix = cap.name("prefix").map(|m| m.as_str()).unwrap_or("").trim();
if prefix.is_empty() {
return None;
}
let refined = refine_copyright(prefix)?;
Some(CopyrightDetection {
copyright: refined,
start_line: c.start_line,
end_line: c.end_line,
})
})
.collect()
}
pub fn add_pipe_read_parenthetical_variants(
prepared_cache: &PreparedLines<'_>,
copyrights: &[CopyrightDetection],
) -> Vec<CopyrightDetection> {
if prepared_cache.len() < 2 || copyrights.is_empty() {
return Vec::new();
}
static PIPE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)^\(\s*pipe\s+read\s+code\s+from\s+[^)]+\)\s*$").unwrap());
prepared_cache
.adjacent_pairs()
.filter_map(|(first, second)| {
if first.prepared.is_empty() || second.prepared.is_empty() {
return None;
}
if !first.prepared.to_ascii_lowercase().contains("copyright") {
return None;
}
if !PIPE_RE.is_match(second.prepared) {
return None;
}
let combined = format!("{} {}", first.prepared, second.prepared);
let refined = refine_copyright(&combined)?;
Some(CopyrightDetection {
copyright: refined,
start_line: first.line_number,
end_line: second.line_number,
})
})
.collect()
}
pub fn add_from_url_parenthetical_copyright_variants(
prepared_cache: &PreparedLines<'_>,
_copyrights: &[CopyrightDetection],
) -> Vec<CopyrightDetection> {
if prepared_cache.is_empty() {
return Vec::new();
}
static FROM_URL_COPY_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?i)\bfrom\s+https?://\S+\s*\(\s*copyright\b").unwrap());
prepared_cache
.iter_non_empty()
.filter_map(|line| {
if !FROM_URL_COPY_RE.is_match(line.prepared) {
return None;
}
let lower = line.prepared.to_ascii_lowercase();
let candidate = if lower.starts_with("adapted from ") {
format!(
"from {}",
line.prepared["adapted from ".len()..].trim_start()
)
} else {
line.prepared.to_string()
};
let refined = refine_copyright(&candidate)?;
Some(CopyrightDetection {
copyright: refined,
start_line: line.line_number,
end_line: line.line_number,
})
})
.collect()
}
pub fn add_missing_holder_from_single_copyright(
copyrights: &[CopyrightDetection],
holders: &[HolderDetection],
) -> Option<HolderDetection> {
if !holders.is_empty() || copyrights.len() != 1 {
return None;
}
let c = ©rights[0];
let h = derive_holder_from_simple_copyright_string(&c.copyright)?;
let h = refine_holder_in_copyright_context(&h)?;
let trimmed = h.trim();
if trimmed.to_ascii_lowercase().starts_with("copyright ") {
return None;
}
static YEAR_ONLY_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\d{4}(?:\s*[-–]\s*\d{4})?$").unwrap());
if YEAR_ONLY_RE.is_match(trimmed) {
return None;
}
Some(HolderDetection {
holder: h,
start_line: c.start_line,
end_line: c.end_line,
})
}
pub fn merge_freebird_c_inc_urls(
prepared_cache: &PreparedLines<'_>,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
if !prepared_cache.contains_ci("(c)") || !prepared_cache.contains_ci("inc") {
return;
}
if !prepared_cache.contains_ci("coventive") && !prepared_cache.contains_ci("legend") {
return;
}
for prepared_line in prepared_cache.iter_non_empty() {
let line_lower = prepared_line.prepared.to_ascii_lowercase();
if !line_lower.contains("(c)") || !line_lower.contains("inc") {
continue;
}
let url = prepared_cache
.next_non_empty_line(prepared_line.line_number)
.and_then(|next| {
let next_lower = next.prepared.to_ascii_lowercase();
if !next_lower.contains("http") {
return None;
}
if next_lower.contains("web.archive.org/web") {
return Some("http://web.archive.org/web".to_string());
}
next_lower
.contains("coventive.com")
.then(|| next.prepared.to_string())
});
let Some(url) = url else {
continue;
};
let cr_raw = format!("(c), Inc. {url}");
if let Some(cr) = refine_copyright(&cr_raw) {
copyrights.push(CopyrightDetection {
copyright: cr,
start_line: prepared_line.line_number,
end_line: prepared_line.line_number,
});
}
let holder_raw = "Inc.";
if let Some(h) = refine_holder(holder_raw) {
holders.push(HolderDetection {
holder: h,
start_line: prepared_line.line_number,
end_line: prepared_line.line_number,
});
}
}
}
pub fn merge_debugging390_best_viewed_suffix(
prepared_cache: &PreparedLines<'_>,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
if !prepared_cache.contains_ci("Best viewed") {
return;
}
static IBM_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)^copyright\s*\(c\)\s*2000-2001\s+(?P<who>IBM\b.+)$").unwrap()
});
for (first, second) in prepared_cache.adjacent_pairs() {
let Some(cap) = IBM_RE.captures(first.prepared) else {
continue;
};
let who = cap.name("who").map(|m| m.as_str()).unwrap_or("").trim();
if who.is_empty() || !second.prepared.trim_start().starts_with("Best") {
continue;
}
let merged_raw = format!("Copyright (c) 2000-2001 {who} Best");
let Some(merged) = refine_copyright(&merged_raw) else {
continue;
};
copyrights.retain(|c| {
!(c.start_line == first.line_number
&& c.copyright.contains(who)
&& c.copyright.contains("2000-2001")
&& !c.copyright.ends_with("Best"))
});
if !copyrights.iter().any(|c| c.copyright == merged) {
copyrights.push(CopyrightDetection {
copyright: merged,
start_line: first.line_number,
end_line: second.line_number,
});
}
let holder_raw = format!("{who} Best");
holders.retain(|h| !(h.start_line == first.line_number && h.holder == who));
if let Some(h) = refine_holder_in_copyright_context(&holder_raw) {
holders.push(HolderDetection {
holder: h,
start_line: first.line_number,
end_line: second.line_number,
});
}
}
}
pub fn merge_fsf_gdb_notice_lines(
prepared_cache: &PreparedLines<'_>,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
if !prepared_cache.contains_ci("GDB is free software") {
return;
}
for (first, second) in prepared_cache.adjacent_pairs() {
if !first
.prepared
.starts_with("Copyright 1998 Free Software Foundation")
{
continue;
}
if !second.prepared.starts_with("GDB is free software") {
continue;
}
let tail = if let Some(idx) = second.prepared.find("GNU General Public License,") {
&second.prepared[..(idx + "GNU General Public License,".len())]
} else {
second.prepared
};
let merged_raw = format!("{} {tail}", first.prepared);
let merged = super::super::token_utils::normalize_whitespace(&merged_raw);
if !merged.ends_with(',') {
continue;
}
if !copyrights.iter().any(|c| c.copyright == merged) {
copyrights.push(CopyrightDetection {
copyright: merged,
start_line: first.line_number,
end_line: second.line_number,
});
}
let holder = "Free Software Foundation, Inc. GDB free software, covered by the GNU General Public License";
if !holders.iter().any(|x| x.holder == holder) {
holders.push(HolderDetection {
holder: holder.to_string(),
start_line: first.line_number,
end_line: second.line_number,
});
}
}
}
pub fn merge_axis_ethereal_suffix(
prepared_cache: &PreparedLines<'_>,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
if !prepared_cache.contains_ci("Axis Communications") {
return;
}
for (first, second) in prepared_cache.adjacent_pairs() {
if first.prepared != "Copyright 2000, Axis Communications AB" {
continue;
}
if !second.prepared.starts_with("Ethereal") {
continue;
}
let merged_raw = "Copyright 2000, Axis Communications AB Ethereal";
let Some(merged) = refine_copyright(merged_raw) else {
continue;
};
copyrights
.retain(|c| !(c.start_line == first.line_number && c.copyright == first.prepared));
if !copyrights.iter().any(|c| c.copyright == merged) {
copyrights.push(CopyrightDetection {
copyright: merged,
start_line: first.line_number,
end_line: second.line_number,
});
}
holders.retain(|h| {
!(h.start_line == first.line_number && h.holder == "Axis Communications AB")
});
if let Some(h) = refine_holder_in_copyright_context("Axis Communications AB Ethereal")
&& !holders.iter().any(|x| x.holder == h)
{
holders.push(HolderDetection {
holder: h,
start_line: first.line_number,
end_line: second.line_number,
});
}
}
}
pub fn merge_kirkwood_converted_to(
prepared_cache: &PreparedLines<'_>,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
if !prepared_cache.contains_ci("Kirkwood") || !prepared_cache.contains_ci("converted") {
return;
}
static EMBEDDED_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)\(c\)\s+(?P<year>19\d{2}|20\d{2})\s+(?P<who>M\.?\s*Kirkwood)\b").unwrap()
});
for (first, second) in prepared_cache.adjacent_pairs() {
let Some(cap) = EMBEDDED_RE.captures(first.prepared) else {
continue;
};
let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
let who = cap.name("who").map(|m| m.as_str()).unwrap_or("").trim();
if year.is_empty() || who.is_empty() {
continue;
}
let p2 = second.prepared.trim_start_matches('*').trim_start();
if !p2.to_ascii_lowercase().starts_with("converted to") {
continue;
}
let cr_raw = format!("(c) {year} {who} Converted to");
if let Some(cr) = refine_copyright(&cr_raw) {
copyrights.push(CopyrightDetection {
copyright: cr,
start_line: first.line_number,
end_line: second.line_number,
});
}
let holder_raw = format!("{who} Converted");
if let Some(h) = refine_holder_in_copyright_context(&holder_raw) {
holders.push(HolderDetection {
holder: h,
start_line: first.line_number,
end_line: second.line_number,
});
}
}
}
pub fn fix_shm_inline_copyrights(
prepared_cache: &PreparedLines<'_>,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
if !prepared_cache.contains_ci("/proc/sysvipc/shm support")
|| !prepared_cache.contains_ci("(c) 1999")
|| !prepared_cache.contains_ci("dragos@iname.com")
{
return;
}
static INLINE_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)\(c\)\s*(?P<year>\d{4})\s+(?P<name>[^<]+?)\s*<(?P<email>[^>\s]+@[^>\s]+)>")
.unwrap()
});
for prepared_line in prepared_cache.iter_non_empty() {
if !prepared_line.prepared.contains("/proc/sysvipc/shm") {
continue;
}
let Some(cap) = INLINE_RE.captures(prepared_line.prepared) else {
continue;
};
let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
let name = cap.name("name").map(|m| m.as_str()).unwrap_or("").trim();
let email = cap.name("email").map(|m| m.as_str()).unwrap_or("").trim();
if year.is_empty() || name.is_empty() || email.is_empty() {
continue;
}
let cr_raw = format!("(c) {year} {name} <{email}>");
let Some(cr) = refine_copyright(&cr_raw) else {
continue;
};
copyrights.push(CopyrightDetection {
copyright: cr,
start_line: prepared_line.line_number,
end_line: prepared_line.line_number,
});
if let Some(holder) = refine_holder(name) {
holders.push(HolderDetection {
holder,
start_line: prepared_line.line_number,
end_line: prepared_line.line_number,
});
}
break;
}
}
pub fn fix_n_tty_linus_torvalds_written_by_clause(
content: &str,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
if !content.contains("n_tty.c") {
return;
}
if !content.contains("Linus Torvalds") {
return;
}
if !content.contains("Copyright 1991, 1992, 1993") {
return;
}
let lines: Vec<&str> = content.lines().collect();
for i in 0..lines.len().saturating_sub(1) {
if !lines[i].contains("Linus Torvalds") {
continue;
}
if !lines[i + 1].contains("Copyright 1991") {
continue;
}
let ln = i + 1;
let cr = "Linus Torvalds, Copyright 1991, 1992, 1993".to_string();
copyrights.push(CopyrightDetection {
copyright: cr,
start_line: LineNumber::new(ln).unwrap(),
end_line: LineNumber::new(ln + 1).expect("invalid line number"),
});
let holder = "Linus Torvalds".to_string();
holders.push(HolderDetection {
holder,
start_line: LineNumber::new(ln).unwrap(),
end_line: LineNumber::new(ln + 1).expect("invalid line number"),
});
break;
}
}
pub fn fix_sundry_contributors_truncation(
prepared_cache: &PreparedLines<'_>,
copyrights: &mut Vec<CopyrightDetection>,
holders: &mut Vec<HolderDetection>,
) {
static COPYRIGHT_SUNDRY_CONTRIBUTORS_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?i)^\s*Copyright\s+(?P<year>19\d{2}|20\d{2})\s+(?P<name>.+?)\s+And\s+(?P<tail>Sundry\s+Contributors)\s*$",
)
.unwrap()
});
let mut matched: Option<(LineNumber, String, String, String)> = None;
for prepared_line in prepared_cache.iter_non_empty() {
if let Some(cap) = COPYRIGHT_SUNDRY_CONTRIBUTORS_RE.captures(prepared_line.prepared) {
let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
let name = cap.name("name").map(|m| m.as_str()).unwrap_or("").trim();
let tail = cap.name("tail").map(|m| m.as_str()).unwrap_or("").trim();
matched = Some((
prepared_line.line_number,
year.to_string(),
name.to_string(),
tail.to_string(),
));
break;
}
}
let Some((line_number, year, name, tail)) = matched else {
return;
};
if year.is_empty() || name.is_empty() || tail.is_empty() {
return;
}
let full_cr_raw = format!("Copyright {year} {name} And {tail}");
let full_holder_raw = format!("{name} And {tail}");
let Some(full_cr) = refine_copyright(&full_cr_raw) else {
return;
};
let Some(full_holder) = refine_holder(&full_holder_raw) else {
return;
};
let truncated_cr_raw = format!("Copyright {year} {name} And Sundry");
let truncated_holder_raw = format!("{name} And Sundry");
let truncated_cr = refine_copyright(&truncated_cr_raw);
let truncated_holder = refine_holder(&truncated_holder_raw);
if let Some(truncated_cr) = truncated_cr {
for det in copyrights.iter_mut() {
if det.copyright == truncated_cr {
det.copyright = full_cr.clone();
}
}
}
if let Some(truncated_holder) = truncated_holder {
for det in holders.iter_mut() {
if det.holder == truncated_holder {
det.holder = full_holder.clone();
}
}
}
if !copyrights.iter().any(|c| c.copyright == full_cr) {
copyrights.push(CopyrightDetection {
copyright: full_cr,
start_line: line_number,
end_line: line_number,
});
}
if !holders.iter().any(|h| h.holder == full_holder) {
holders.push(HolderDetection {
holder: full_holder,
start_line: line_number,
end_line: line_number,
});
}
}
pub fn add_missing_holders_for_debian_modifications(
content: &str,
copyrights: &[CopyrightDetection],
) -> Vec<HolderDetection> {
let has_debian_mods_line = content.lines().any(|l| {
let lower = l.trim().to_ascii_lowercase();
lower.starts_with("modifications for debian copyright")
});
if !has_debian_mods_line {
return Vec::new();
}
copyrights
.iter()
.filter_map(|cr| {
let holder = derive_holder_from_simple_copyright_string(&cr.copyright)?;
Some(HolderDetection {
holder,
start_line: cr.start_line,
end_line: cr.end_line,
})
})
.collect()
}
pub fn add_confidential_short_variants_late(
copyrights: &[CopyrightDetection],
_holders: &[HolderDetection],
) -> (Vec<CopyrightDetection>, Vec<HolderDetection>) {
if copyrights.is_empty() {
return (Vec::new(), Vec::new());
}
static CONF_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?i)^copyright\s+(?P<year>\d{4})\s+confidential(?:[\s,;:.\-]+information|[\s,;:.\-]+proprietary|[\s,;:.\-]+and[\s,;:.\-]+proprietary)\b",
)
.unwrap()
});
let new_copyrights = copyrights
.iter()
.filter_map(|c| {
let cap = CONF_RE.captures(c.copyright.as_str())?;
let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
let short_c = refine_copyright(&format!("Copyright {year} Confidential"))?;
Some(CopyrightDetection {
copyright: short_c,
start_line: c.start_line,
end_line: c.end_line,
})
})
.collect();
(new_copyrights, Vec::new())
}