Skip to main content

resq_cli/commands/
copyright.rs

1/*
2 * Copyright 2026 ResQ
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! Copyright header command.
18//!
19//! Checks and updates copyright headers in source files to ensure
20//! proper licensing and attribution.
21
22use anyhow::{Context, Result};
23use chrono::Datelike;
24use glob::glob;
25use regex::Regex;
26use std::collections::HashSet;
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::process::Command;
30use walkdir::WalkDir;
31
32// ── CLI Args ────────────────────────────────────────────────────────────────
33
34/// CLI arguments for the copyright header management command.
35#[derive(clap::Args, Debug)]
36#[allow(clippy::struct_excessive_bools)]
37pub struct CopyrightArgs {
38    /// License type (apache-2.0, mit, gpl-3.0, bsd-3-clause)
39    #[arg(short, long, default_value = "apache-2.0")]
40    pub license: String,
41
42    /// Copyright holder name
43    #[arg(short, long, default_value = "ResQ Systems, Inc.")]
44    pub author: String,
45
46    /// Copyright year (defaults to current year)
47    #[arg(short, long)]
48    pub year: Option<String>,
49
50    /// Overwrite existing headers
51    #[arg(long)]
52    pub force: bool,
53
54    /// Preview changes without writing files
55    #[arg(long)]
56    pub dry_run: bool,
57
58    /// Check for missing headers (CI mode, exits non-zero if any missing)
59    #[arg(long)]
60    pub check: bool,
61
62    /// Print detailed processing info
63    #[arg(short, long)]
64    pub verbose: bool,
65
66    /// Glob patterns to match files (e.g. "src/**/*.rs")
67    #[arg(long)]
68    pub glob: Vec<String>,
69
70    /// File extensions to include (e.g. --ext rs,js,py)
71    #[arg(long, value_delimiter = ',')]
72    pub ext: Vec<String>,
73
74    /// Patterns to exclude from processing
75    #[arg(short, long)]
76    pub exclude: Vec<String>,
77}
78
79// ── License Templates ───────────────────────────────────────────────────────
80
81const VALID_LICENSES: &[&str] = &["apache-2.0", "mit", "gpl-3.0", "bsd-3-clause"];
82
83fn get_license_template(license: &str, author: &str, year: &str) -> Result<String> {
84    let text = match license {
85        "apache-2.0" => format!(
86            "Copyright {year} {author}\n\n\
87             Licensed under the Apache License, Version 2.0 (the \"License\");\n\
88             you may not use this file except in compliance with the License.\n\
89             You may obtain a copy of the License at\n\n\
90             \x20   http://www.apache.org/licenses/LICENSE-2.0\n\n\
91             Unless required by applicable law or agreed to in writing, software\n\
92             distributed under the License is distributed on an \"AS IS\" BASIS,\n\
93             WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n\
94             See the License for the specific language governing permissions and\n\
95             limitations under the License."
96        ),
97        "mit" => format!(
98            "Copyright (c) {year} {author}\n\n\
99             Permission is hereby granted, free of charge, to any person obtaining a copy\n\
100             of this software and associated documentation files (the \"Software\"), to deal\n\
101             in the Software without restriction, including without limitation the rights\n\
102             to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n\
103             copies of the Software, and to permit persons to whom the Software is\n\
104             furnished to do so, subject to the following conditions:\n\n\
105             The above copyright notice and this permission notice shall be included in all\n\
106             copies or substantial portions of the Software.\n\n\
107             THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n\
108             IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n\
109             FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n\
110             AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n\
111             LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n\
112             OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n\
113             SOFTWARE."
114        ),
115        "gpl-3.0" => format!(
116            "Copyright (C) {year} {author}\n\n\
117             This program is free software: you can redistribute it and/or modify\n\
118             it under the terms of the GNU General Public License as published by\n\
119             the Free Software Foundation, either version 3 of the License, or\n\
120             (at your option) any later version.\n\n\
121             This program is distributed in the hope that it will be useful,\n\
122             but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
123             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
124             GNU General Public License for more details.\n\n\
125             You should have received a copy of the GNU General Public License\n\
126             along with this program. If not, see <https://www.gnu.org/licenses/>."
127        ),
128        "bsd-3-clause" => format!(
129            "Copyright (c) {year}, {author}\n\
130             All rights reserved.\n\n\
131             Redistribution and use in source and binary forms, with or without\n\
132             modification, are permitted provided that the following conditions are met:\n\n\
133             1. Redistributions of source code must retain the above copyright notice, this\n\
134             \x20  list of conditions and the following disclaimer.\n\
135             2. Redistributions in binary form must reproduce the above copyright notice,\n\
136             \x20  this list of conditions and the following disclaimer in the documentation\n\
137             \x20  and/or other materials provided with the distribution.\n\
138             3. Neither the name of the copyright holder nor the names of its\n\
139             \x20  contributors may be used to endorse or promote products derived from\n\
140             \x20  this software without specific prior written permission.\n\n\
141             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n\
142             AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n\
143             IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n\
144             DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\n\
145             FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n\
146             DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n\
147             SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n\
148             CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n\
149             OR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
150             OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
151        ),
152        _ => anyhow::bail!("Unsupported license: '{license}'. Valid options: {VALID_LICENSES:?}"),
153    };
154    Ok(text)
155}
156
157// ── Comment Styles ──────────────────────────────────────────────────────────
158
159#[derive(Debug, Clone, Copy, PartialEq)]
160enum CommentKind {
161    Block,
162    Line,
163}
164
165#[derive(Debug)]
166struct CommentStyle {
167    kind: CommentKind,
168    open: Option<&'static str>,
169    line: &'static str,
170    close: Option<&'static str>,
171}
172
173impl CommentStyle {
174    const fn block(open: &'static str, line: &'static str, close: &'static str) -> Self {
175        Self {
176            kind: CommentKind::Block,
177            open: Some(open),
178            line,
179            close: Some(close),
180        }
181    }
182
183    const fn line(prefix: &'static str) -> Self {
184        Self {
185            kind: CommentKind::Line,
186            open: None,
187            line: prefix,
188            close: None,
189        }
190    }
191}
192
193const C_STYLE_BLOCK: CommentStyle = CommentStyle::block("/**", " *", " */");
194/// Rust uses `/* */` instead of `/** */` to avoid creating a doc comment that
195/// conflicts with `//!` inner doc comments in `lib.rs` crate roots.
196const RUST_BLOCK: CommentStyle = CommentStyle::block("/*", " *", " */");
197const XML_BLOCK: CommentStyle = CommentStyle::block("<!--", " ", "-->");
198const ASCIIDOC_BLOCK: CommentStyle = CommentStyle::block("////", "", "////");
199const HASH_LINE: CommentStyle = CommentStyle::line("#");
200const DASH_LINE: CommentStyle = CommentStyle::line("--");
201const ELISP_LINE: CommentStyle = CommentStyle::line(";;");
202
203/// Map file extension / filename to comment style.
204/// Returns `None` for unsupported or binary file types.
205fn get_comment_style(path: &Path, content: &str) -> Option<&'static CommentStyle> {
206    // Agent/Claude instruction files are not source files — omit copyright headers.
207    static SKIP_FILENAMES: &[&str] = &["AGENTS.md", "CLAUDE.md"];
208    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
209        if SKIP_FILENAMES.contains(&name) {
210            return None;
211        }
212    }
213
214    // Shebang lines always get hash-style comments
215    if content.starts_with("#!/") {
216        return Some(&HASH_LINE);
217    }
218
219    let ext = path
220        .extension()
221        .and_then(|e| e.to_str())
222        .unwrap_or("")
223        .to_ascii_lowercase();
224    let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
225
226    match ext.as_str() {
227        // C-family block comments
228        "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" | "css" | "scss" | "less" | "styl" | "c"
229        | "cc" | "cpp" | "h" | "hpp" | "cs" | "java" | "kt" | "kts" | "swift" | "m" | "mm"
230        | "go" | "php" | "dart" | "scala" | "groovy" | "gradle" | "proto" | "zig" | "v" | "sv" => {
231            Some(&C_STYLE_BLOCK)
232        }
233
234        // Rust — non-doc block comment to avoid conflicting with //! inner docs
235        "rs" => Some(&RUST_BLOCK),
236
237        // Markup / XML
238        "html" | "htm" | "xml" | "xhtml" | "svg" | "md" | "rst" | "xsl" | "xslt" | "vue"
239        | "svelte" => Some(&XML_BLOCK),
240
241        // AsciiDoc
242        "adoc" | "asciidoc" => Some(&ASCIIDOC_BLOCK),
243
244        // Hash-line comments
245        "sh" | "bash" | "zsh" | "fish" | "py" | "pyi" | "rb" | "pl" | "pm" | "yml" | "yaml"
246        | "toml" | "ini" | "cfg" | "conf" | "env" | "mk" | "make" | "r" | "jl" | "tf" | "hcl"
247        | "nix" | "cmake" => Some(&HASH_LINE),
248
249        // Double-dash comments
250        "sql" | "lua" | "hs" | "elm" => Some(&DASH_LINE),
251
252        // Elisp / Clojure
253        "el" | "clj" | "cljs" | "cljc" | "edn" => Some(&ELISP_LINE),
254
255        _ => {
256            // Fallback: match well-known filenames
257            static HASH_FILENAMES: &[&str] = &[
258                "Makefile",
259                "Dockerfile",
260                "Containerfile",
261                "Vagrantfile",
262                ".env",
263                ".gitignore",
264                ".dockerignore",
265                ".editorconfig",
266                "Gemfile",
267                "Rakefile",
268                "Justfile",
269                "CMakeLists.txt",
270            ];
271            if HASH_FILENAMES
272                .iter()
273                .any(|&name| name.eq_ignore_ascii_case(filename))
274            {
275                Some(&HASH_LINE)
276            } else {
277                None
278            }
279        }
280    }
281}
282
283// ── Header Construction ─────────────────────────────────────────────────────
284
285fn build_header(style: &CommentStyle, license_text: &str) -> String {
286    let lines: Vec<&str> = license_text.split('\n').collect();
287    let mut header = String::with_capacity(license_text.len() + lines.len() * 4 + 32);
288
289    match style.kind {
290        CommentKind::Block => {
291            if let Some(open) = style.open {
292                header.push_str(open);
293                header.push('\n');
294            }
295            for line in &lines {
296                if line.is_empty() {
297                    // Avoid trailing whitespace on blank comment lines
298                    header.push_str(style.line.trim_end());
299                } else {
300                    header.push_str(style.line);
301                    header.push(' ');
302                    header.push_str(line);
303                }
304                header.push('\n');
305            }
306            if let Some(close) = style.close {
307                header.push_str(close);
308                header.push('\n');
309            }
310            header.push('\n');
311        }
312        CommentKind::Line => {
313            for line in &lines {
314                if line.is_empty() {
315                    header.push_str(style.line.trim_end());
316                } else {
317                    header.push_str(style.line);
318                    header.push(' ');
319                    header.push_str(line);
320                }
321                header.push('\n');
322            }
323            header.push('\n');
324        }
325    }
326    header
327}
328
329// ── Header Detection ────────────────────────────────────────────────────────
330
331/// Compiled regexes, initialized once.
332#[allow(clippy::expect_used)]
333static HEADER_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
334    Regex::new(r"(?i)copyright\s*(\(c\)\s*)?\d{4}|SPDX-License-Identifier:")
335        .expect("Static regex pattern is valid")
336});
337
338#[allow(clippy::expect_used)]
339static COMMENT_START_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
340    Regex::new(r"^\s*(#|--|//|;;)").expect("Static regex pattern is valid")
341});
342
343/// Check whether the first N lines of `content` contain a copyright header.
344fn has_header(content: &str) -> bool {
345    let head: String = content.lines().take(20).collect::<Vec<_>>().join("\n");
346    HEADER_RE.is_match(&head)
347}
348
349// ── License Detection ───────────────────────────────────────────────────────
350
351/// Fingerprints that uniquely identify each supported license inside a header.
352#[allow(clippy::expect_used)]
353static APACHE_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
354    Regex::new(r"(?i)Apache\s+License|apache\.org/licenses").expect("Static regex pattern is valid")
355});
356#[allow(clippy::expect_used)]
357static MIT_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
358    Regex::new(r"(?i)Permission is hereby granted|\bMIT\s+License\b")
359        .expect("Static regex pattern is valid")
360});
361#[allow(clippy::expect_used)]
362static GPL3_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
363    Regex::new(r"(?i)GNU\s+General\s+Public\s+License|gnu\.org/licenses")
364        .expect("Static regex pattern is valid")
365});
366#[allow(clippy::expect_used)]
367static BSD3_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
368    Regex::new(r"(?i)Redistribution and use.*permitted|BSD.*3.*Clause")
369        .expect("Static regex pattern is valid")
370});
371
372/// SPDX tag regex — captures the identifier value.
373#[allow(clippy::expect_used)]
374static SPDX_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
375    Regex::new(r"(?i)SPDX-License-Identifier:\s*([\w\-.]+)").expect("Static regex pattern is valid")
376});
377
378/// `Copyright [(c)] YEAR[-YEAR] <AUTHOR>` — captures the author tail up to
379/// end-of-line. The caller strips trailing "All rights reserved."
380/// boilerplate; legal-name periods (`Inc.`, `LLC.`) are preserved.
381#[allow(clippy::expect_used)]
382static AUTHOR_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
383    Regex::new(r"(?im)Copyright\s*(?:\([cC]\))?\s*\d{4}(?:\s*[-,]\s*\d{4})?\s*,?\s*([^\r\n]+)$")
384        .expect("Static regex pattern is valid")
385});
386
387/// Detect the author name in the existing header.
388/// Returns the trimmed author string, or `None` if no recognisable
389/// `Copyright YEAR <author>` line is found in the first 20 lines.
390fn detect_header_author(content: &str) -> Option<String> {
391    let head: String = content.lines().take(20).collect::<Vec<_>>().join("\n");
392    let caps = AUTHOR_RE.captures(&head)?;
393    let raw = caps.get(1)?.as_str().trim();
394
395    // Strip "All rights reserved" boilerplate (case-insensitive). Preserve
396    // the trailing period of legal suffixes like "Inc." / "Corp." — only
397    // strip a separator comma or whitespace between the name and the
398    // boilerplate (e.g. BSD's "Acme, All rights reserved").
399    let lower = raw.to_ascii_lowercase();
400    let cleaned = if let Some(idx) = lower.rfind("all rights reserved") {
401        raw[..idx].trim_end().trim_end_matches(',').trim_end()
402    } else {
403        raw.trim_end_matches(|c: char| c == ',' || c.is_whitespace())
404    };
405
406    if cleaned.is_empty() {
407        None
408    } else {
409        Some(cleaned.to_string())
410    }
411}
412
413/// Detect which license the existing header uses.
414/// Returns a SPDX-style identifier or `None` if unrecognised.
415fn detect_header_license(content: &str) -> Option<&'static str> {
416    let head: String = content.lines().take(30).collect::<Vec<_>>().join("\n");
417
418    // Prefer an explicit SPDX tag if present.
419    if let Some(caps) = SPDX_RE.captures(&head) {
420        let id = caps.get(1).map_or("", |m| m.as_str());
421        return match id.to_ascii_lowercase().as_str() {
422            "apache-2.0" => Some("apache-2.0"),
423            "mit" => Some("mit"),
424            "gpl-3.0" | "gpl-3.0-only" | "gpl-3.0-or-later" => Some("gpl-3.0"),
425            "bsd-3-clause" => Some("bsd-3-clause"),
426            _ => None,
427        };
428    }
429
430    // Fingerprint-based detection.
431    if APACHE_FP.is_match(&head) {
432        return Some("apache-2.0");
433    }
434    if MIT_FP.is_match(&head) {
435        return Some("mit");
436    }
437    if GPL3_FP.is_match(&head) {
438        return Some("gpl-3.0");
439    }
440    if BSD3_FP.is_match(&head) {
441        return Some("bsd-3-clause");
442    }
443    None
444}
445
446// ── Body License-Name Replacement ───────────────────────────────────────────
447
448/// Human-readable display name for a SPDX identifier.
449fn license_display_name(spdx: &str) -> &'static str {
450    match spdx {
451        "apache-2.0" => "Apache License, Version 2.0",
452        "mit" => "MIT License",
453        "gpl-3.0" => "GNU General Public License v3.0",
454        "bsd-3-clause" => "BSD 3-Clause License",
455        _ => "Unknown",
456    }
457}
458
459/// Shields.io badge fragment for a license (used in `img.shields.io/badge/…`).
460fn license_badge_fragment(spdx: &str) -> &'static str {
461    match spdx {
462        "apache-2.0" => "License-Apache%202.0-blue.svg",
463        "mit" => "License-MIT-blue.svg",
464        "gpl-3.0" => "License-GPL%20v3-blue.svg",
465        "bsd-3-clause" => "License-BSD%203--Clause-blue.svg",
466        _ => "License-Unknown-lightgrey.svg",
467    }
468}
469
470/// Shields.io Markdown badge label for a license.
471fn license_badge_label(spdx: &str) -> &'static str {
472    match spdx {
473        "apache-2.0" => "License: Apache 2.0",
474        "mit" => "License: MIT",
475        "gpl-3.0" => "License: GPL v3",
476        "bsd-3-clause" => "License: BSD 3-Clause",
477        _ => "License",
478    }
479}
480
481/// SPDX identifier in the canonical casing expected by tooling.
482fn license_spdx_canonical(spdx: &str) -> &str {
483    match spdx {
484        "apache-2.0" => "Apache-2.0",
485        "mit" => "MIT",
486        "gpl-3.0" => "GPL-3.0-only",
487        "bsd-3-clause" => "BSD-3-Clause",
488        _ => spdx,
489    }
490}
491
492/// Replace targeted license-name patterns in the body of a file.
493///
494/// This only touches well-known patterns (SPDX tags, shield.io badges,
495/// "licensed under" prose) — generic mentions of license names (e.g. in
496/// dependency lists) are intentionally left alone.
497fn replace_license_mentions(content: &str, from: &str, to: &str) -> Result<String> {
498    let mut out = content.to_string();
499
500    // 1. SPDX-License-Identifier tags.
501    let spdx_from = license_spdx_canonical(from);
502    let spdx_to = license_spdx_canonical(to);
503    let spdx_pat = Regex::new(&format!(
504        r"(?i)(SPDX-License-Identifier:\s*){}",
505        regex::escape(spdx_from)
506    ))
507    .context("Invalid Regex pattern for SPDX-License-Identifier")?;
508    out = spdx_pat
509        .replace_all(&out, format!("${{1}}{spdx_to}"))
510        .to_string();
511
512    // 2. Shields.io badge URLs.
513    let badge_from = license_badge_fragment(from);
514    let badge_to = license_badge_fragment(to);
515    out = out.replace(badge_from, badge_to);
516
517    // 3. Shields.io badge alt-text / Markdown label.
518    let label_from = license_badge_label(from);
519    let label_to = license_badge_label(to);
520    out = out.replace(label_from, label_to);
521
522    // 4. "licensed under the <License Name>" prose (case-insensitive).
523    let name_from = license_display_name(from);
524    let name_to = license_display_name(to);
525    if name_from != "Unknown" && name_to != "Unknown" {
526        // Plain text.
527        let prose_pat = Regex::new(&format!(r"(?i){}", regex::escape(name_from)))
528            .context("Invalid Regex pattern for license prose")?;
529        out = prose_pat.replace_all(&out, name_to).to_string();
530        // Bold Markdown variant: **MIT License** → **Apache License, Version 2.0**
531        let bold_from = format!("**{name_from}**");
532        let bold_to = format!("**{name_to}**");
533        out = out.replace(&bold_from, &bold_to);
534    }
535
536    Ok(out)
537}
538
539fn strip_existing_header(content: &str) -> String {
540    let (shebang, rest) = split_shebang(content);
541
542    // Try block-comment stripping first
543    if let Some(stripped) = try_strip_block_header(rest) {
544        return join_shebang(shebang, &stripped);
545    }
546
547    // Try line-comment stripping
548    let lines: Vec<&str> = rest.lines().collect();
549    let end_idx = find_line_header_end_index(&lines);
550    if let Some(idx) = end_idx {
551        let stripped = lines[(idx + 1)..]
552            .join("\n")
553            .trim_start_matches(['\r', '\n'])
554            .to_string();
555        return join_shebang(shebang, &stripped);
556    }
557
558    content.to_string()
559}
560
561/// Split optional shebang from rest of file content.
562fn split_shebang(content: &str) -> (Option<&str>, &str) {
563    if content.starts_with("#!") {
564        content.find('\n').map_or((Some(content), ""), |newline| {
565            let shebang = &content[..newline];
566            let rest = content[newline + 1..].trim_start_matches(['\r', '\n']);
567            (Some(shebang), rest)
568        })
569    } else {
570        (None, content)
571    }
572}
573
574fn join_shebang(shebang: Option<&str>, body: &str) -> String {
575    shebang.map_or_else(|| body.to_string(), |s| format!("{s}\n{body}"))
576}
577
578fn try_strip_block_header(content: &str) -> Option<String> {
579    // C-style block comments: /* ... */
580    #[allow(clippy::expect_used)]
581    static BLOCK_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
582        Regex::new(r"^\s*/\*[\s\S]*?\*/\s*").expect("Static regex pattern is valid")
583    });
584    // XML-style comments: <!-- ... -->
585    #[allow(clippy::expect_used)]
586    static XML_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
587        Regex::new(r"^\s*<!--[\s\S]*?-->\s*").expect("Static regex pattern is valid")
588    });
589
590    for re in [&*BLOCK_RE, &*XML_RE] {
591        if let Some(mat) = re.find(content) {
592            if HEADER_RE.is_match(mat.as_str()) {
593                return Some(
594                    content[mat.end()..]
595                        .trim_start_matches(['\r', '\n'])
596                        .to_string(),
597                );
598            }
599        }
600    }
601    None
602}
603
604fn find_line_header_end_index(lines: &[&str]) -> Option<usize> {
605    let mut header_end: Option<usize> = None;
606    let mut in_header = false;
607    let max_lines = 30.min(lines.len());
608
609    for (i, line) in lines.iter().enumerate().take(max_lines) {
610        if COMMENT_START_RE.is_match(line) {
611            if HEADER_RE.is_match(line) {
612                in_header = true;
613            }
614            if in_header {
615                header_end = Some(i);
616            }
617        } else if line.trim().is_empty() && in_header {
618            header_end = Some(i);
619        } else {
620            break;
621        }
622    }
623
624    if in_header {
625        header_end
626    } else {
627        None
628    }
629}
630
631// ── Binary Detection ────────────────────────────────────────────────────────
632
633fn is_binary(content: &str) -> bool {
634    if content.contains('\0') {
635        return true;
636    }
637    let mut limit = content.len().min(1024);
638    while limit > 0 && !content.is_char_boundary(limit) {
639        limit -= 1;
640    }
641    let sample = &content[..limit];
642    if sample.is_empty() {
643        return false;
644    }
645    let non_printable = sample
646        .chars()
647        .filter(|c| {
648            let code = *c as u32;
649            code < 9 || (code > 13 && code < 32) || (code > 126 && code < 160)
650        })
651        .count();
652    #[allow(clippy::cast_precision_loss)]
653    let ratio = non_printable as f64 / sample.len() as f64;
654    ratio > 0.1
655}
656
657// ── File Discovery ──────────────────────────────────────────────────────────
658
659// Directory excludes are now sourced from `.gitignore` via crate::gitignore.
660
661fn collect_files_from_globs(patterns: &[String], verbose: bool) -> Result<Vec<PathBuf>> {
662    let mut files = Vec::new();
663    if verbose {
664        eprintln!("Searching with glob patterns...");
665    }
666    for pattern in patterns {
667        for entry in glob(pattern).context("Failed to read glob pattern")? {
668            match entry {
669                Ok(path) if path.is_file() => files.push(path),
670                _ => {}
671            }
672        }
673    }
674    Ok(files)
675}
676
677fn collect_files_from_git(verbose: bool) -> Option<Vec<PathBuf>> {
678    if verbose {
679        eprintln!("Attempting git ls-files...");
680    }
681    let output = Command::new("git").arg("ls-files").output().ok()?;
682    if !output.status.success() {
683        return None;
684    }
685
686    let mut files: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
687        .lines()
688        .filter(|l| !l.trim().is_empty())
689        .map(PathBuf::from)
690        .collect();
691
692    // Also pick up untracked (but not ignored) files
693    if let Ok(untracked) = Command::new("git")
694        .args(["ls-files", "-o", "--exclude-standard"])
695        .output()
696    {
697        if untracked.status.success() {
698            files.extend(
699                String::from_utf8_lossy(&untracked.stdout)
700                    .lines()
701                    .filter(|l| !l.trim().is_empty())
702                    .map(PathBuf::from),
703            );
704        }
705    }
706
707    Some(files)
708}
709
710fn collect_files_from_walk(root: &Path) -> Vec<PathBuf> {
711    WalkDir::new(root)
712        .into_iter()
713        .filter_map(std::result::Result::ok)
714        .filter(|e| e.file_type().is_file())
715        .map(walkdir::DirEntry::into_path)
716        .collect()
717}
718
719fn discover_files(args: &CopyrightArgs) -> Result<Vec<PathBuf>> {
720    let root = crate::utils::find_project_root();
721
722    let raw = if !args.glob.is_empty() {
723        // Adjust globs to be relative to root or handle them as is
724        collect_files_from_globs(&args.glob, args.verbose)?
725    } else if let Some(git_files) = collect_files_from_git(args.verbose) {
726        git_files.into_iter().map(|p| root.join(p)).collect()
727    } else {
728        if args.verbose {
729            eprintln!(
730                "git not available, falling back to directory walk from {}.",
731                root.display()
732            );
733        }
734        collect_files_from_walk(&root)
735    };
736
737    // Build exclude set: user excludes + gitignore-derived dirs
738    let gitignore_excludes = crate::gitignore::parse_gitignore(&root);
739    let exclude_patterns: Vec<String> = args
740        .exclude
741        .iter()
742        .cloned()
743        .chain(gitignore_excludes)
744        .collect();
745
746    // Normalize extensions for filtering
747    let ext_filter: HashSet<String> = args
748        .ext
749        .iter()
750        .map(|e| e.trim_start_matches('.').to_ascii_lowercase())
751        .collect();
752
753    let files: Vec<PathBuf> = raw
754        .into_iter()
755        .filter(|p| {
756            let s = p.to_string_lossy();
757            !exclude_patterns.iter().any(|ex| s.contains(ex.as_str()))
758        })
759        .filter(|p| {
760            if ext_filter.is_empty() {
761                return true;
762            }
763            p.extension()
764                .and_then(|e| e.to_str())
765                .is_some_and(|e| ext_filter.contains(&e.to_ascii_lowercase()))
766        })
767        .collect();
768
769    // Deduplicate (globs or git can return dupes)
770    let mut seen = HashSet::with_capacity(files.len());
771    Ok(files
772        .into_iter()
773        .filter(|p| seen.insert(p.clone()))
774        .collect())
775}
776
777// ── Processing ──────────────────────────────────────────────────────────────
778
779#[derive(Default)]
780struct Stats {
781    updated: usize,
782    skipped: usize,
783    missing: usize,
784    mismatched: usize,
785    errors: usize,
786}
787
788/// Process a single file to update its copyright header.
789///
790/// # Errors
791/// Returns an error if reading from or writing to the file fails, or if license replacement fails.
792fn process_file(
793    path: &Path,
794    license_body: &str,
795    args: &CopyrightArgs,
796    stats: &mut Stats,
797) -> Result<()> {
798    let content = match fs::read_to_string(path) {
799        Ok(c) => c,
800        Err(e) => {
801            if args.verbose {
802                eprintln!("Skipping {}: {e}", path.display());
803            }
804            stats.skipped += 1;
805            return Ok(());
806        }
807    };
808
809    if content.trim().is_empty() || is_binary(&content) {
810        stats.skipped += 1;
811        return Ok(());
812    }
813
814    let Some(style) = get_comment_style(path, &content) else {
815        if args.verbose {
816            eprintln!("Skipping (unsupported type): {}", path.display());
817        }
818        stats.skipped += 1;
819        return Ok(());
820    };
821
822    let already_has_header = has_header(&content);
823    let detected_license = if already_has_header {
824        detect_header_license(&content)
825    } else {
826        None
827    };
828    let detected_author = if already_has_header {
829        detect_header_author(&content)
830    } else {
831        None
832    };
833    let is_license_mismatch =
834        already_has_header && detected_license.is_some_and(|d| d != args.license);
835    let is_author_mismatch =
836        already_has_header && detected_author.as_deref().is_some_and(|a| a != args.author);
837    let is_mismatch = is_license_mismatch || is_author_mismatch;
838
839    // --check mode: report missing *and* mismatched headers.
840    if args.check {
841        if !already_has_header {
842            println!("Missing header: {}", path.display());
843            stats.missing += 1;
844        } else if is_license_mismatch {
845            println!(
846                "Mismatched license ({} → {}): {}",
847                detected_license.unwrap_or("unknown"),
848                args.license,
849                path.display()
850            );
851            stats.mismatched += 1;
852        } else if is_author_mismatch {
853            println!(
854                "Mismatched author ({} → {}): {}",
855                detected_author.as_deref().unwrap_or("unknown"),
856                args.author,
857                path.display()
858            );
859            stats.mismatched += 1;
860        }
861        return Ok(());
862    }
863
864    // Decide whether we need to rewrite this file.
865    let needs_rewrite = !already_has_header       // no header yet
866        || args.force                              // explicit force
867        || is_mismatch; // wrong license OR wrong author
868
869    if !needs_rewrite {
870        if args.verbose {
871            eprintln!("Skipping (correct header): {}", path.display());
872        }
873        stats.skipped += 1;
874        return Ok(());
875    }
876
877    // Strip old header when replacing.
878    let base = if already_has_header {
879        strip_existing_header(&content)
880    } else {
881        content.clone()
882    };
883
884    // Replace stale license-name mentions in the body when migrating.
885    let base = if let Some(old_license) = detected_license {
886        if old_license == args.license {
887            base
888        } else {
889            replace_license_mentions(&base, old_license, &args.license)?
890        }
891    } else {
892        base
893    };
894
895    let header = build_header(style, license_body);
896    let new_content = prepend_header(&base, &header);
897
898    if args.dry_run {
899        if is_mismatch {
900            println!(
901                "Would migrate ({} → {}): {}",
902                detected_license.unwrap_or("unknown"),
903                args.license,
904                path.display()
905            );
906        } else {
907            println!("Would update: {}", path.display());
908        }
909        stats.updated += 1;
910        return Ok(());
911    }
912
913    match fs::write(path, &new_content) {
914        Ok(()) => {
915            if is_mismatch {
916                if args.verbose {
917                    eprintln!(
918                        "Migrated ({} → {}): {}",
919                        detected_license.unwrap_or("unknown"),
920                        args.license,
921                        path.display()
922                    );
923                }
924                stats.mismatched += 1;
925            } else if args.verbose {
926                eprintln!("Updated: {}", path.display());
927            }
928            stats.updated += 1;
929        }
930        Err(e) => {
931            eprintln!("Error writing {}: {e}", path.display());
932            stats.errors += 1;
933        }
934    }
935    Ok(())
936}
937
938/// Prepend header, preserving any shebang line at position 0.
939fn prepend_header(content: &str, header: &str) -> String {
940    if content.starts_with("#!") {
941        let (shebang, rest) = split_shebang(content);
942        match shebang {
943            Some(s) => format!("{s}\n\n{header}{rest}"),
944            None => format!("{header}{content}"),
945        }
946    } else {
947        format!("{header}{content}")
948    }
949}
950
951// ── Entry Point ─────────────────────────────────────────────────────────────
952
953/// Run the copyright header management command.
954/// Runs the copyright tool with the provided arguments.
955///
956/// # Errors
957/// Returns an error if the license year is invalid, the license file cannot be read,
958/// or if processing any of the files fails.
959pub fn run(args: &CopyrightArgs) -> Result<()> {
960    // Validate license upfront
961    if !VALID_LICENSES.contains(&args.license.as_str()) {
962        anyhow::bail!(
963            "Invalid license type: '{}'. Valid options: {:?}",
964            args.license,
965            VALID_LICENSES
966        );
967    }
968
969    let year = args
970        .year
971        .clone()
972        .unwrap_or_else(|| chrono::Utc::now().year().to_string());
973    let license_body = get_license_template(&args.license, &args.author, &year)?;
974
975    let files = discover_files(args)?;
976    if args.verbose {
977        eprintln!("Found {} files to process.", files.len());
978    }
979
980    let mut stats = Stats::default();
981
982    for path in &files {
983        process_file(path, &license_body, args, &mut stats)?;
984    }
985
986    // Report results
987    if args.check {
988        let problems = stats.missing + stats.mismatched;
989        if problems > 0 {
990            anyhow::bail!(
991                "{} file(s) have issues ({} missing, {} mismatched).",
992                problems,
993                stats.missing,
994                stats.mismatched
995            );
996        }
997        if args.verbose {
998            println!("All files have correct copyright headers.");
999        }
1000    } else if stats.updated > 0 || stats.errors > 0 {
1001        println!(
1002            "Done. Updated: {}, Migrated: {}, Skipped: {}, Errors: {}",
1003            stats.updated, stats.mismatched, stats.skipped, stats.errors
1004        );
1005    }
1006
1007    Ok(())
1008}
1009
1010// ── Tests ───────────────────────────────────────────────────────────────────
1011
1012#[cfg(test)]
1013#[allow(clippy::unwrap_used)]
1014mod tests {
1015    use super::*;
1016
1017    #[test]
1018    fn test_has_header_positive() {
1019        assert!(has_header("// Copyright (c) 2024 Acme\nfn main() {}"));
1020        assert!(has_header("# copyright 2023 Foo\nimport os"));
1021        assert!(has_header("/* SPDX-License-Identifier: MIT */\n"));
1022    }
1023
1024    #[test]
1025    fn test_has_header_negative() {
1026        assert!(!has_header("fn main() { println!(\"hello\"); }"));
1027        assert!(!has_header("#!/usr/bin/env python\nimport sys"));
1028    }
1029
1030    #[test]
1031    fn test_is_binary() {
1032        assert!(is_binary("\0ELF binary content"));
1033        assert!(!is_binary("fn main() { println!(\"hello\"); }"));
1034    }
1035
1036    #[test]
1037    fn test_is_binary_char_boundary() {
1038        // '─' is 3 bytes: [226, 148, 128]
1039        // We want to place it so it straddles the 1024 byte boundary.
1040        let mut content = "a".repeat(1023);
1041        content.push('─'); // bytes 1023, 1024, 1025
1042        content.push_str(" rest of content");
1043
1044        // This should not panic
1045        assert!(!is_binary(&content));
1046    }
1047
1048    #[test]
1049    fn test_build_header_block() {
1050        let header = build_header(&C_STYLE_BLOCK, "Copyright 2024 Test");
1051        assert!(header.starts_with("/**\n"));
1052        assert!(header.contains(" * Copyright 2024 Test"));
1053        assert!(header.contains(" */\n"));
1054    }
1055
1056    #[test]
1057    fn test_build_header_line() {
1058        let header = build_header(&HASH_LINE, "Copyright 2024 Test");
1059        assert!(header.starts_with("# Copyright 2024 Test\n"));
1060        assert!(!header.contains("/**"));
1061    }
1062
1063    #[test]
1064    fn test_shebang_preserved() {
1065        let content = "#!/usr/bin/env python\nimport os\n";
1066        let header = "# Copyright 2024 Test\n\n";
1067        let result = prepend_header(content, header);
1068        assert!(result.starts_with("#!/usr/bin/env python\n"));
1069        assert!(result.contains("# Copyright 2024 Test"));
1070        assert!(result.contains("import os"));
1071    }
1072
1073    #[test]
1074    fn test_strip_existing_block_header() {
1075        let content = "/** Copyright (c) 2023 Old */\nfn main() {}";
1076        let stripped = strip_existing_header(content);
1077        assert_eq!(stripped.trim(), "fn main() {}");
1078    }
1079
1080    #[test]
1081    fn test_get_comment_style() {
1082        let rs_path = Path::new("main.rs");
1083        let py_path = Path::new("script.py");
1084        let html_path = Path::new("index.html");
1085
1086        assert_eq!(
1087            get_comment_style(rs_path, "fn main()")
1088                .expect("Should return comment style for .rs")
1089                .kind,
1090            CommentKind::Block
1091        );
1092        assert_eq!(
1093            get_comment_style(py_path, "import os")
1094                .expect("Should return comment style for .py")
1095                .kind,
1096            CommentKind::Line
1097        );
1098        assert_eq!(
1099            get_comment_style(html_path, "<html>")
1100                .expect("Should return comment style for .html")
1101                .kind,
1102            CommentKind::Block
1103        );
1104    }
1105
1106    #[test]
1107    fn test_license_templates() {
1108        for license in VALID_LICENSES {
1109            let result = get_license_template(license, "Test", "2024");
1110            assert!(result.is_ok(), "Failed for license: {license}");
1111            assert!(
1112                result.expect("Should generate theme").contains("2024"),
1113                "Template should contain year"
1114            );
1115        }
1116        assert!(get_license_template("invalid", "Test", "2024").is_err());
1117    }
1118
1119    // ── License Detection Tests ─────────────────────────────────────────
1120
1121    #[test]
1122    fn test_detect_apache_header() {
1123        let content = "/*\n * Copyright 2024 Acme\n *\n * Licensed under the Apache License, Version 2.0\n */\nfn main() {}";
1124        assert_eq!(detect_header_license(content), Some("apache-2.0"));
1125    }
1126
1127    #[test]
1128    fn test_detect_mit_header() {
1129        let content = "/*\n * Copyright (c) 2024 Acme\n *\n * Permission is hereby granted, free of charge\n */\nfn main() {}";
1130        assert_eq!(detect_header_license(content), Some("mit"));
1131    }
1132
1133    #[test]
1134    fn test_detect_gpl_header() {
1135        let content = "# Copyright (C) 2024 Acme\n# GNU General Public License v3\nimport os";
1136        assert_eq!(detect_header_license(content), Some("gpl-3.0"));
1137    }
1138
1139    #[test]
1140    fn test_detect_bsd_header() {
1141        let content = "/*\n * Copyright (c) 2024 Acme\n * Redistribution and use in source and binary forms, with or without modification, are permitted\n */\nint main() {}";
1142        assert_eq!(detect_header_license(content), Some("bsd-3-clause"));
1143    }
1144
1145    #[test]
1146    fn test_detect_spdx_tag() {
1147        assert_eq!(
1148            detect_header_license("// SPDX-License-Identifier: MIT\nfn main() {}"),
1149            Some("mit")
1150        );
1151        assert_eq!(
1152            detect_header_license("// SPDX-License-Identifier: Apache-2.0\nfn main() {}"),
1153            Some("apache-2.0")
1154        );
1155    }
1156
1157    #[test]
1158    fn test_detect_no_license() {
1159        assert_eq!(
1160            detect_header_license("// Copyright (c) 2024 Acme\nfn main() {}"),
1161            None
1162        );
1163    }
1164
1165    // ── Author Detection Tests ──────────────────────────────────────────
1166
1167    #[test]
1168    fn test_detect_author_basic() {
1169        assert_eq!(
1170            detect_header_author("# Copyright 2026 ResQ Software\nfoo"),
1171            Some("ResQ Software".to_string())
1172        );
1173    }
1174
1175    #[test]
1176    fn test_detect_author_with_c_marker_preserves_legal_period() {
1177        // "Corp." ending must stay — it's part of the legal name.
1178        assert_eq!(
1179            detect_header_author("// Copyright (c) 2024 Acme Corp.\nfn main() {}"),
1180            Some("Acme Corp.".to_string())
1181        );
1182    }
1183
1184    #[test]
1185    fn test_detect_author_with_capital_c_marker() {
1186        assert_eq!(
1187            detect_header_author("# Copyright (C) 2024 Acme\nx = 1"),
1188            Some("Acme".to_string())
1189        );
1190    }
1191
1192    #[test]
1193    fn test_detect_author_year_range_preserves_inc_period() {
1194        // "ResQ Systems, Inc." with trailing legal period must round-trip.
1195        assert_eq!(
1196            detect_header_author("// Copyright 2024-2026 ResQ Systems, Inc.\nfn main() {}"),
1197            Some("ResQ Systems, Inc.".to_string())
1198        );
1199    }
1200
1201    #[test]
1202    fn test_detect_author_strips_all_rights_reserved() {
1203        assert_eq!(
1204            detect_header_author(
1205                "/*\n * Copyright (c) 2024 Acme Corp. All rights reserved.\n */\nint main() {}"
1206            ),
1207            Some("Acme Corp.".to_string())
1208        );
1209    }
1210
1211    #[test]
1212    fn test_detect_author_bsd_comma() {
1213        assert_eq!(
1214            detect_header_author(
1215                "/*\n * Copyright (c) 2024, Acme\n * All rights reserved.\n */\nint main() {}"
1216            ),
1217            Some("Acme".to_string())
1218        );
1219    }
1220
1221    #[test]
1222    fn test_detect_author_block_comment() {
1223        let content = "/*\n * Copyright 2026 ResQ Software\n *\n * Licensed under Apache-2.0\n */\nfn main() {}";
1224        assert_eq!(
1225            detect_header_author(content),
1226            Some("ResQ Software".to_string())
1227        );
1228    }
1229
1230    #[test]
1231    fn test_detect_author_none_when_no_copyright_line() {
1232        assert_eq!(
1233            detect_header_author("// SPDX-License-Identifier: Apache-2.0\nfn main() {}"),
1234            None
1235        );
1236    }
1237
1238    #[test]
1239    fn test_detect_author_none_on_empty() {
1240        assert_eq!(detect_header_author(""), None);
1241    }
1242
1243    // ── Body License-Name Replacement Tests ─────────────────────────────
1244
1245    #[test]
1246    fn test_replace_spdx_identifier() {
1247        let input = "// SPDX-License-Identifier: MIT\nfn main() {}";
1248        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1249        assert!(result.contains("SPDX-License-Identifier: Apache-2.0"));
1250        assert!(!result.contains("SPDX-License-Identifier: MIT"));
1251    }
1252
1253    #[test]
1254    fn test_replace_badge_url() {
1255        let input = "[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)";
1256        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1257        assert!(result.contains("License-Apache%202.0-blue.svg"));
1258        assert!(result.contains("License: Apache 2.0"));
1259        assert!(!result.contains("License-MIT-blue.svg"));
1260    }
1261
1262    #[test]
1263    fn test_replace_prose_license_name() {
1264        let input = "This project is licensed under the **MIT License** - see LICENSE.";
1265        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1266        assert!(result.contains("**Apache License, Version 2.0**"));
1267        assert!(!result.contains("MIT License"));
1268    }
1269
1270    #[test]
1271    fn test_replace_no_false_positives() {
1272        // Should not change dependency-level mentions that don't match known patterns.
1273        let input = "dependencies:\n  some-lib: MIT\n  other-lib: BSD";
1274        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1275        // "MIT" alone without "License" suffix should not be replaced
1276        // (the prose pattern matches "MIT License", not bare "MIT")
1277        assert!(result.contains("some-lib: MIT"));
1278    }
1279}