provenant-cli 0.1.1

// SPDX-FileCopyrightText: Provenant contributors
// SPDX-License-Identifier: Apache-2.0

use super::*;

fn truncate_at_baslerstr(s: &str) -> Option<String> {
    let lower = s.to_ascii_lowercase();
    let needle = ", baslerstr";
    let idx = lower.find(needle)?;
    let prefix = s[..idx].trim_end_matches(&[',', ' '][..]).trim();
    if prefix.is_empty() {
        None
    } else {
        Some(prefix.to_string())
    }
}

pub fn truncate_lonely_svox_baslerstr_address(
    copyrights: &mut [CopyrightDetection],
    holders: &mut [HolderDetection],
) {
    if copyrights.len() != 1 || holders.len() != 1 {
        return;
    }

    let c0 = &copyrights[0].copyright;
    let h0 = &holders[0].holder;
    if !c0.contains("SVOX")
        || !h0.contains("SVOX")
        || !c0.to_ascii_lowercase().contains("baslerstr")
        || !h0.to_ascii_lowercase().contains("baslerstr")
    {
        return;
    }

    if let Some(tc) = truncate_at_baslerstr(c0) {
        copyrights[0].copyright = tc;
    }
    if let Some(th) = truncate_at_baslerstr(h0) {
        holders[0].holder = th;
    }
}

pub fn add_short_svox_baslerstr_variants(
    copyrights: &[CopyrightDetection],
    holders: &[HolderDetection],
    seen: &SeenTextSets,
) -> (Vec<CopyrightDetection>, Vec<HolderDetection>) {
    if copyrights.is_empty() || holders.is_empty() {
        return (Vec::new(), Vec::new());
    }
    if copyrights.len() == 1 && holders.len() == 1 {
        return (Vec::new(), Vec::new());
    }

    let full_copyrights: Vec<&CopyrightDetection> = copyrights
        .iter()
        .filter(|c| {
            c.copyright.contains("SVOX") && c.copyright.to_ascii_lowercase().contains("baslerstr")
        })
        .collect();
    if full_copyrights.is_empty() {
        return (Vec::new(), Vec::new());
    }

    let new_c = full_copyrights
        .into_iter()
        .filter_map(|c| {
            let short = truncate_at_baslerstr(&c.copyright)?;
            (!seen.copyrights.contains(&short)).then_some(CopyrightDetection {
                copyright: short,
                start_line: c.start_line,
                end_line: c.end_line,
            })
        })
        .collect();

    let new_h = holders
        .iter()
        .filter(|h| {
            h.holder.contains("SVOX") && h.holder.to_ascii_lowercase().contains("baslerstr")
        })
        .filter_map(|h| {
            let short = truncate_at_baslerstr(&h.holder)?;
            (!seen.holders.contains(&short)).then_some(HolderDetection {
                holder: short,
                start_line: h.start_line,
                end_line: h.end_line,
            })
        })
        .collect();
    (new_c, new_h)
}

pub fn drop_shadowed_linux_foundation_holder_copyrights_same_line(
    copyrights: &mut Vec<CopyrightDetection>,
) {
    if copyrights.len() < 2 {
        return;
    }

    static WITH_C_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)^copyright\s*\(c\)\s*(?P<years>\d{4}(?:\s*,\s*\d{4})*)$").unwrap()
    });
    static WITH_HOLDER_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)^copyright\s+(?P<years>\d{4}(?:\s*,\s*\d{4})*)\s+linux\s+foundation$")
            .unwrap()
    });

    let years_by_line: HashSet<(usize, String)> = copyrights
        .iter()
        .filter_map(|c| {
            let cap = WITH_C_RE.captures(c.copyright.trim())?;
            let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim();
            if years.is_empty() {
                return None;
            }
            Some((c.start_line.get(), years.to_string()))
        })
        .collect();

    copyrights.retain(|c| {
        let Some(cap) = WITH_HOLDER_RE.captures(c.copyright.trim()) else {
            return true;
        };
        let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim();
        if years.is_empty() {
            return true;
        }
        !years_by_line.contains(&(c.start_line.get(), years.to_string()))
    });
}

pub fn restore_linux_foundation_copyrights_from_raw_lines(
    raw_lines: &[&str],
    copyrights: &mut Vec<CopyrightDetection>,
) {
    if raw_lines.is_empty() {
        return;
    }

    static RAW_LINUX_FOUNDATION_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(
            r"(?i)copyright\s*\(c\)\s*(?P<years>\d{4}(?:\s*,\s*\d{4})*)\s+linux\s+foundation",
        )
        .unwrap()
    });

    let mut to_add: Vec<CopyrightDetection> = Vec::new();
    for (idx, raw) in raw_lines.iter().enumerate() {
        let ln = idx + 1;
        let Some(cap) = RAW_LINUX_FOUNDATION_RE.captures(raw) else {
            continue;
        };
        let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim();
        if years.is_empty() {
            continue;
        }

        let full = super::super::token_utils::normalize_whitespace(&format!(
            "Copyright (c) {years} Linux Foundation"
        ));
        if copyrights
            .iter()
            .any(|c| c.start_line.get() == ln && c.end_line.get() == ln && c.copyright == full)
        {
            continue;
        }

        to_add.push(CopyrightDetection {
            copyright: full.clone(),
            start_line: LineNumber::new(ln).unwrap(),
            end_line: LineNumber::new(ln).unwrap(),
        });

        let bare =
            super::super::token_utils::normalize_whitespace(&format!("Copyright (c) {years}"));
        copyrights.retain(|c| {
            !(c.start_line.get() == ln && c.end_line.get() == ln && c.copyright == bare)
        });
    }

    copyrights.extend(to_add);
}

pub fn add_karlsruhe_university_short_variants(
    copyrights: &[CopyrightDetection],
    holders: &[HolderDetection],
) -> (Vec<CopyrightDetection>, Vec<HolderDetection>) {
    if copyrights.is_empty() && holders.is_empty() {
        return (Vec::new(), Vec::new());
    }

    static KARLSRUHE_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"(?i)\bUniversity\s+of\s+Karlsruhe\b").unwrap());
    static KARLSRUHE_TERMINAL_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)\bUniversity\s+of\s+Karlsruhe\b\s*[)\]\.\,;:]?\s*$").unwrap()
    });

    fn shorten_karlsruhe(
        text: &str,
        karlsruhe_re: &Regex,
        karlsruhe_terminal_re: &Regex,
    ) -> Option<String> {
        if !karlsruhe_re.is_match(text) || !karlsruhe_terminal_re.is_match(text) {
            return None;
        }
        let short = karlsruhe_re.replace_all(text, "University").to_string();
        let short = super::super::token_utils::normalize_whitespace(&short);
        (short != text).then_some(short)
    }

    let new_c = copyrights
        .iter()
        .filter_map(|c| {
            let short =
                shorten_karlsruhe(c.copyright.as_str(), &KARLSRUHE_RE, &KARLSRUHE_TERMINAL_RE)?;
            Some(CopyrightDetection {
                copyright: short,
                start_line: c.start_line,
                end_line: c.end_line,
            })
        })
        .collect();

    let new_h = holders
        .iter()
        .filter_map(|h| {
            let short =
                shorten_karlsruhe(h.holder.as_str(), &KARLSRUHE_RE, &KARLSRUHE_TERMINAL_RE)?;
            Some(HolderDetection {
                holder: short,
                start_line: h.start_line,
                end_line: h.end_line,
            })
        })
        .collect();
    (new_c, new_h)
}

pub fn add_intel_and_sun_non_portions_variants(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &[CopyrightDetection],
) -> Vec<CopyrightDetection> {
    if prepared_cache.is_empty() || copyrights.is_empty() {
        return Vec::new();
    }

    static PORTIONS_SUN_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(
            r"(?i)^Portions\s+Copyright\s+(?P<year>\d{4})\s+Sun\s+Microsystems\b(?P<tail>.*)$",
        )
        .unwrap()
    });
    static PORTIONS_INTEL_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)^Portions\s+Copyright\s+(?P<year>\d{4})\s+Intel\b").unwrap()
    });
    static INTEL_EMAILS_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)Portions\s+Copyright\s+2002\s+Intel\s*\((?P<emails>[^)]*@[\s\S]*?)\)")
            .unwrap()
    });

    copyrights
        .iter()
        .flat_map(|c| {
            let trimmed = c.copyright.trim();

            let sun_variant = PORTIONS_SUN_RE.captures(trimmed).and_then(|cap| {
                let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
                let tail = cap.name("tail").map(|m| m.as_str()).unwrap_or("");
                if year.is_empty() {
                    return None;
                }
                let candidate = super::super::token_utils::normalize_whitespace(&format!(
                    "Copyright {year} Sun Microsystems{tail}"
                ));
                let refined = refine_copyright(&candidate)?;
                Some(CopyrightDetection {
                    copyright: refined,
                    start_line: c.start_line,
                    end_line: c.end_line,
                })
            });

            let intel_variant = if PORTIONS_INTEL_RE.is_match(trimmed)
                && (c.end_line.get() > c.start_line.get() || trimmed.contains('('))
            {
                let joined = (c.start_line.get()..=c.end_line.get())
                    .filter_map(|ln| prepared_cache.get(ln))
                    .collect::<Vec<_>>()
                    .join(" ");
                let joined = super::super::token_utils::normalize_whitespace(&joined);
                INTEL_EMAILS_RE.captures(joined.as_str()).and_then(|cap| {
                    let emails = cap.name("emails").map(|m| m.as_str()).unwrap_or("").trim();
                    if emails.is_empty() {
                        return None;
                    }
                    let candidate = super::super::token_utils::normalize_whitespace(&format!(
                        "Copyright 2002 Intel ({emails})"
                    ));
                    let refined = refine_copyright(&candidate)?;
                    Some(CopyrightDetection {
                        copyright: refined,
                        start_line: c.start_line,
                        end_line: c.end_line,
                    })
                })
            } else {
                None
            };

            [sun_variant, intel_variant].into_iter().flatten()
        })
        .collect()
}

pub fn add_but_suffix_short_variants(copyrights: &[CopyrightDetection]) -> Vec<CopyrightDetection> {
    if copyrights.is_empty() {
        return Vec::new();
    }

    static BUT_SUFFIX_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"(?i)^(?P<prefix>.+?),\s*but\s*$").unwrap());

    copyrights
        .iter()
        .filter_map(|c| {
            let trimmed = c.copyright.trim();
            let cap = BUT_SUFFIX_RE.captures(trimmed)?;
            let prefix = cap.name("prefix").map(|m| m.as_str()).unwrap_or("").trim();
            if prefix.is_empty() {
                return None;
            }
            let refined = refine_copyright(prefix)?;
            Some(CopyrightDetection {
                copyright: refined,
                start_line: c.start_line,
                end_line: c.end_line,
            })
        })
        .collect()
}

pub fn add_pipe_read_parenthetical_variants(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &[CopyrightDetection],
) -> Vec<CopyrightDetection> {
    if prepared_cache.len() < 2 || copyrights.is_empty() {
        return Vec::new();
    }

    static PIPE_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"(?i)^\(\s*pipe\s+read\s+code\s+from\s+[^)]+\)\s*$").unwrap());

    prepared_cache
        .adjacent_pairs()
        .filter_map(|(first, second)| {
            if first.prepared.is_empty() || second.prepared.is_empty() {
                return None;
            }
            if !first.prepared.to_ascii_lowercase().contains("copyright") {
                return None;
            }
            if !PIPE_RE.is_match(second.prepared) {
                return None;
            }
            let combined = format!("{} {}", first.prepared, second.prepared);
            let refined = refine_copyright(&combined)?;
            Some(CopyrightDetection {
                copyright: refined,
                start_line: first.line_number,
                end_line: second.line_number,
            })
        })
        .collect()
}

pub fn add_from_url_parenthetical_copyright_variants(
    prepared_cache: &PreparedLines<'_>,
    _copyrights: &[CopyrightDetection],
) -> Vec<CopyrightDetection> {
    if prepared_cache.is_empty() {
        return Vec::new();
    }

    static FROM_URL_COPY_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"(?i)\bfrom\s+https?://\S+\s*\(\s*copyright\b").unwrap());

    prepared_cache
        .iter_non_empty()
        .filter_map(|line| {
            if !FROM_URL_COPY_RE.is_match(line.prepared) {
                return None;
            }
            let lower = line.prepared.to_ascii_lowercase();
            let candidate = if lower.starts_with("adapted from ") {
                format!(
                    "from {}",
                    line.prepared["adapted from ".len()..].trim_start()
                )
            } else {
                line.prepared.to_string()
            };
            let refined = refine_copyright(&candidate)?;
            Some(CopyrightDetection {
                copyright: refined,
                start_line: line.line_number,
                end_line: line.line_number,
            })
        })
        .collect()
}

pub fn add_missing_holder_from_single_copyright(
    copyrights: &[CopyrightDetection],
    holders: &[HolderDetection],
) -> Option<HolderDetection> {
    if !holders.is_empty() || copyrights.len() != 1 {
        return None;
    }
    let c = &copyrights[0];
    let h = derive_holder_from_simple_copyright_string(&c.copyright)?;
    let h = refine_holder_in_copyright_context(&h)?;

    let trimmed = h.trim();
    if trimmed.to_ascii_lowercase().starts_with("copyright ") {
        return None;
    }
    static YEAR_ONLY_RE: LazyLock<Regex> =
        LazyLock::new(|| Regex::new(r"^\d{4}(?:\s*[-–]\s*\d{4})?$").unwrap());
    if YEAR_ONLY_RE.is_match(trimmed) {
        return None;
    }
    Some(HolderDetection {
        holder: h,
        start_line: c.start_line,
        end_line: c.end_line,
    })
}

pub fn merge_freebird_c_inc_urls(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    if !prepared_cache.contains_ci("(c)") || !prepared_cache.contains_ci("inc") {
        return;
    }
    if !prepared_cache.contains_ci("coventive") && !prepared_cache.contains_ci("legend") {
        return;
    }

    for prepared_line in prepared_cache.iter_non_empty() {
        let line_lower = prepared_line.prepared.to_ascii_lowercase();
        if !line_lower.contains("(c)") || !line_lower.contains("inc") {
            continue;
        }

        let url = prepared_cache
            .next_non_empty_line(prepared_line.line_number)
            .and_then(|next| {
                let next_lower = next.prepared.to_ascii_lowercase();
                if !next_lower.contains("http") {
                    return None;
                }
                if next_lower.contains("web.archive.org/web") {
                    return Some("http://web.archive.org/web".to_string());
                }
                next_lower
                    .contains("coventive.com")
                    .then(|| next.prepared.to_string())
            });

        let Some(url) = url else {
            continue;
        };

        let cr_raw = format!("(c), Inc. {url}");
        if let Some(cr) = refine_copyright(&cr_raw) {
            copyrights.push(CopyrightDetection {
                copyright: cr,
                start_line: prepared_line.line_number,
                end_line: prepared_line.line_number,
            });
        }
        let holder_raw = "Inc.";
        if let Some(h) = refine_holder(holder_raw) {
            holders.push(HolderDetection {
                holder: h,
                start_line: prepared_line.line_number,
                end_line: prepared_line.line_number,
            });
        }
    }
}

pub fn merge_debugging390_best_viewed_suffix(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    if !prepared_cache.contains_ci("Best viewed") {
        return;
    }

    static IBM_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)^copyright\s*\(c\)\s*2000-2001\s+(?P<who>IBM\b.+)$").unwrap()
    });

    for (first, second) in prepared_cache.adjacent_pairs() {
        let Some(cap) = IBM_RE.captures(first.prepared) else {
            continue;
        };
        let who = cap.name("who").map(|m| m.as_str()).unwrap_or("").trim();
        if who.is_empty() || !second.prepared.trim_start().starts_with("Best") {
            continue;
        }

        let merged_raw = format!("Copyright (c) 2000-2001 {who} Best");
        let Some(merged) = refine_copyright(&merged_raw) else {
            continue;
        };

        copyrights.retain(|c| {
            !(c.start_line == first.line_number
                && c.copyright.contains(who)
                && c.copyright.contains("2000-2001")
                && !c.copyright.ends_with("Best"))
        });
        if !copyrights.iter().any(|c| c.copyright == merged) {
            copyrights.push(CopyrightDetection {
                copyright: merged,
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }

        let holder_raw = format!("{who} Best");
        holders.retain(|h| !(h.start_line == first.line_number && h.holder == who));
        if let Some(h) = refine_holder_in_copyright_context(&holder_raw) {
            holders.push(HolderDetection {
                holder: h,
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }
    }
}

pub fn merge_fsf_gdb_notice_lines(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    if !prepared_cache.contains_ci("GDB is free software") {
        return;
    }

    for (first, second) in prepared_cache.adjacent_pairs() {
        if !first
            .prepared
            .starts_with("Copyright 1998 Free Software Foundation")
        {
            continue;
        }
        if !second.prepared.starts_with("GDB is free software") {
            continue;
        }

        let tail = if let Some(idx) = second.prepared.find("GNU General Public License,") {
            &second.prepared[..(idx + "GNU General Public License,".len())]
        } else {
            second.prepared
        };

        let merged_raw = format!("{} {tail}", first.prepared);
        let merged = super::super::token_utils::normalize_whitespace(&merged_raw);
        if !merged.ends_with(',') {
            continue;
        }
        if !copyrights.iter().any(|c| c.copyright == merged) {
            copyrights.push(CopyrightDetection {
                copyright: merged,
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }

        let holder = "Free Software Foundation, Inc. GDB free software, covered by the GNU General Public License";
        if !holders.iter().any(|x| x.holder == holder) {
            holders.push(HolderDetection {
                holder: holder.to_string(),
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }
    }
}

pub fn merge_axis_ethereal_suffix(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    if !prepared_cache.contains_ci("Axis Communications") {
        return;
    }

    for (first, second) in prepared_cache.adjacent_pairs() {
        if first.prepared != "Copyright 2000, Axis Communications AB" {
            continue;
        }
        if !second.prepared.starts_with("Ethereal") {
            continue;
        }
        let merged_raw = "Copyright 2000, Axis Communications AB Ethereal";
        let Some(merged) = refine_copyright(merged_raw) else {
            continue;
        };

        copyrights
            .retain(|c| !(c.start_line == first.line_number && c.copyright == first.prepared));
        if !copyrights.iter().any(|c| c.copyright == merged) {
            copyrights.push(CopyrightDetection {
                copyright: merged,
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }

        holders.retain(|h| {
            !(h.start_line == first.line_number && h.holder == "Axis Communications AB")
        });
        if let Some(h) = refine_holder_in_copyright_context("Axis Communications AB Ethereal")
            && !holders.iter().any(|x| x.holder == h)
        {
            holders.push(HolderDetection {
                holder: h,
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }
    }
}

pub fn merge_kirkwood_converted_to(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    if !prepared_cache.contains_ci("Kirkwood") || !prepared_cache.contains_ci("converted") {
        return;
    }

    static EMBEDDED_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)\(c\)\s+(?P<year>19\d{2}|20\d{2})\s+(?P<who>M\.?\s*Kirkwood)\b").unwrap()
    });

    for (first, second) in prepared_cache.adjacent_pairs() {
        let Some(cap) = EMBEDDED_RE.captures(first.prepared) else {
            continue;
        };
        let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
        let who = cap.name("who").map(|m| m.as_str()).unwrap_or("").trim();
        if year.is_empty() || who.is_empty() {
            continue;
        }
        let p2 = second.prepared.trim_start_matches('*').trim_start();
        if !p2.to_ascii_lowercase().starts_with("converted to") {
            continue;
        }

        let cr_raw = format!("(c) {year} {who} Converted to");
        if let Some(cr) = refine_copyright(&cr_raw) {
            copyrights.push(CopyrightDetection {
                copyright: cr,
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }
        let holder_raw = format!("{who} Converted");
        if let Some(h) = refine_holder_in_copyright_context(&holder_raw) {
            holders.push(HolderDetection {
                holder: h,
                start_line: first.line_number,
                end_line: second.line_number,
            });
        }
    }
}

pub fn fix_shm_inline_copyrights(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    if !prepared_cache.contains_ci("/proc/sysvipc/shm support")
        || !prepared_cache.contains_ci("(c) 1999")
        || !prepared_cache.contains_ci("dragos@iname.com")
    {
        return;
    }

    static INLINE_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?i)\(c\)\s*(?P<year>\d{4})\s+(?P<name>[^<]+?)\s*<(?P<email>[^>\s]+@[^>\s]+)>")
            .unwrap()
    });

    for prepared_line in prepared_cache.iter_non_empty() {
        if !prepared_line.prepared.contains("/proc/sysvipc/shm") {
            continue;
        }
        let Some(cap) = INLINE_RE.captures(prepared_line.prepared) else {
            continue;
        };
        let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
        let name = cap.name("name").map(|m| m.as_str()).unwrap_or("").trim();
        let email = cap.name("email").map(|m| m.as_str()).unwrap_or("").trim();
        if year.is_empty() || name.is_empty() || email.is_empty() {
            continue;
        }

        let cr_raw = format!("(c) {year} {name} <{email}>");
        let Some(cr) = refine_copyright(&cr_raw) else {
            continue;
        };
        copyrights.push(CopyrightDetection {
            copyright: cr,
            start_line: prepared_line.line_number,
            end_line: prepared_line.line_number,
        });

        if let Some(holder) = refine_holder(name) {
            holders.push(HolderDetection {
                holder,
                start_line: prepared_line.line_number,
                end_line: prepared_line.line_number,
            });
        }
        break;
    }
}

pub fn fix_n_tty_linus_torvalds_written_by_clause(
    content: &str,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    if !content.contains("n_tty.c") {
        return;
    }
    if !content.contains("Linus Torvalds") {
        return;
    }
    if !content.contains("Copyright 1991, 1992, 1993") {
        return;
    }

    let lines: Vec<&str> = content.lines().collect();
    for i in 0..lines.len().saturating_sub(1) {
        if !lines[i].contains("Linus Torvalds") {
            continue;
        }
        if !lines[i + 1].contains("Copyright 1991") {
            continue;
        }
        let ln = i + 1;
        let cr = "Linus Torvalds, Copyright 1991, 1992, 1993".to_string();
        copyrights.push(CopyrightDetection {
            copyright: cr,
            start_line: LineNumber::new(ln).unwrap(),
            end_line: LineNumber::new(ln + 1).expect("invalid line number"),
        });
        let holder = "Linus Torvalds".to_string();
        holders.push(HolderDetection {
            holder,
            start_line: LineNumber::new(ln).unwrap(),
            end_line: LineNumber::new(ln + 1).expect("invalid line number"),
        });
        break;
    }
}

pub fn fix_sundry_contributors_truncation(
    prepared_cache: &PreparedLines<'_>,
    copyrights: &mut Vec<CopyrightDetection>,
    holders: &mut Vec<HolderDetection>,
) {
    static COPYRIGHT_SUNDRY_CONTRIBUTORS_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(
            r"(?i)^\s*Copyright\s+(?P<year>19\d{2}|20\d{2})\s+(?P<name>.+?)\s+And\s+(?P<tail>Sundry\s+Contributors)\s*$",
        )
        .unwrap()
    });

    let mut matched: Option<(LineNumber, String, String, String)> = None;
    for prepared_line in prepared_cache.iter_non_empty() {
        if let Some(cap) = COPYRIGHT_SUNDRY_CONTRIBUTORS_RE.captures(prepared_line.prepared) {
            let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
            let name = cap.name("name").map(|m| m.as_str()).unwrap_or("").trim();
            let tail = cap.name("tail").map(|m| m.as_str()).unwrap_or("").trim();
            matched = Some((
                prepared_line.line_number,
                year.to_string(),
                name.to_string(),
                tail.to_string(),
            ));
            break;
        }
    }

    let Some((line_number, year, name, tail)) = matched else {
        return;
    };

    if year.is_empty() || name.is_empty() || tail.is_empty() {
        return;
    }

    let full_cr_raw = format!("Copyright {year} {name} And {tail}");
    let full_holder_raw = format!("{name} And {tail}");
    let Some(full_cr) = refine_copyright(&full_cr_raw) else {
        return;
    };
    let Some(full_holder) = refine_holder(&full_holder_raw) else {
        return;
    };

    let truncated_cr_raw = format!("Copyright {year} {name} And Sundry");
    let truncated_holder_raw = format!("{name} And Sundry");
    let truncated_cr = refine_copyright(&truncated_cr_raw);
    let truncated_holder = refine_holder(&truncated_holder_raw);

    if let Some(truncated_cr) = truncated_cr {
        for det in copyrights.iter_mut() {
            if det.copyright == truncated_cr {
                det.copyright = full_cr.clone();
            }
        }
    }
    if let Some(truncated_holder) = truncated_holder {
        for det in holders.iter_mut() {
            if det.holder == truncated_holder {
                det.holder = full_holder.clone();
            }
        }
    }

    if !copyrights.iter().any(|c| c.copyright == full_cr) {
        copyrights.push(CopyrightDetection {
            copyright: full_cr,
            start_line: line_number,
            end_line: line_number,
        });
    }
    if !holders.iter().any(|h| h.holder == full_holder) {
        holders.push(HolderDetection {
            holder: full_holder,
            start_line: line_number,
            end_line: line_number,
        });
    }
}

pub fn add_missing_holders_for_debian_modifications(
    content: &str,
    copyrights: &[CopyrightDetection],
) -> Vec<HolderDetection> {
    let has_debian_mods_line = content.lines().any(|l| {
        let lower = l.trim().to_ascii_lowercase();
        lower.starts_with("modifications for debian copyright")
    });
    if !has_debian_mods_line {
        return Vec::new();
    }

    copyrights
        .iter()
        .filter_map(|cr| {
            let holder = derive_holder_from_simple_copyright_string(&cr.copyright)?;
            Some(HolderDetection {
                holder,
                start_line: cr.start_line,
                end_line: cr.end_line,
            })
        })
        .collect()
}

pub fn add_confidential_short_variants_late(
    copyrights: &[CopyrightDetection],
    _holders: &[HolderDetection],
) -> (Vec<CopyrightDetection>, Vec<HolderDetection>) {
    if copyrights.is_empty() {
        return (Vec::new(), Vec::new());
    }

    static CONF_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(
            r"(?i)^copyright\s+(?P<year>\d{4})\s+confidential(?:[\s,;:.\-]+information|[\s,;:.\-]+proprietary|[\s,;:.\-]+and[\s,;:.\-]+proprietary)\b",
        )
        .unwrap()
    });

    let new_copyrights = copyrights
        .iter()
        .filter_map(|c| {
            let cap = CONF_RE.captures(c.copyright.as_str())?;
            let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim();
            let short_c = refine_copyright(&format!("Copyright {year} Confidential"))?;
            Some(CopyrightDetection {
                copyright: short_c,
                start_line: c.start_line,
                end_line: c.end_line,
            })
        })
        .collect();

    (new_copyrights, Vec::new())
}