Skip to main content

resq_cli/commands/
copyright.rs

1/*
2 * Copyright 2026 ResQ
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! Copyright header command.
18//!
19//! Checks and updates copyright headers in source files to ensure
20//! proper licensing and attribution.
21
22use anyhow::{Context, Result};
23use chrono::Datelike;
24use glob::glob;
25use regex::Regex;
26use std::collections::HashSet;
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::process::Command;
30use walkdir::WalkDir;
31
32// ── CLI Args ────────────────────────────────────────────────────────────────
33
34/// CLI arguments for the copyright header management command.
35#[derive(clap::Args, Debug)]
36#[allow(clippy::struct_excessive_bools)]
37pub struct CopyrightArgs {
38    /// License type (apache-2.0, mit, gpl-3.0, bsd-3-clause)
39    #[arg(short, long, default_value = "apache-2.0")]
40    pub license: String,
41
42    /// Copyright holder name
43    #[arg(short, long, default_value = "ResQ Systems, Inc.")]
44    pub author: String,
45
46    /// Copyright year (defaults to current year)
47    #[arg(short, long)]
48    pub year: Option<String>,
49
50    /// Overwrite existing headers
51    #[arg(long)]
52    pub force: bool,
53
54    /// Preview changes without writing files
55    #[arg(long)]
56    pub dry_run: bool,
57
58    /// Check for missing headers (CI mode, exits non-zero if any missing)
59    #[arg(long)]
60    pub check: bool,
61
62    /// Print detailed processing info
63    #[arg(short, long)]
64    pub verbose: bool,
65
66    /// Glob patterns to match files (e.g. "src/**/*.rs")
67    #[arg(long)]
68    pub glob: Vec<String>,
69
70    /// File extensions to include (e.g. --ext rs,js,py)
71    #[arg(long, value_delimiter = ',')]
72    pub ext: Vec<String>,
73
74    /// Patterns to exclude from processing
75    #[arg(short, long)]
76    pub exclude: Vec<String>,
77}
78
79// ── License Templates ───────────────────────────────────────────────────────
80
81const VALID_LICENSES: &[&str] = &["apache-2.0", "mit", "gpl-3.0", "bsd-3-clause"];
82
83fn get_license_template(license: &str, author: &str, year: &str) -> Result<String> {
84    let text = match license {
85        "apache-2.0" => format!(
86            "Copyright {year} {author}\n\n\
87             Licensed under the Apache License, Version 2.0 (the \"License\");\n\
88             you may not use this file except in compliance with the License.\n\
89             You may obtain a copy of the License at\n\n\
90             \x20   http://www.apache.org/licenses/LICENSE-2.0\n\n\
91             Unless required by applicable law or agreed to in writing, software\n\
92             distributed under the License is distributed on an \"AS IS\" BASIS,\n\
93             WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n\
94             See the License for the specific language governing permissions and\n\
95             limitations under the License."
96        ),
97        "mit" => format!(
98            "Copyright (c) {year} {author}\n\n\
99             Permission is hereby granted, free of charge, to any person obtaining a copy\n\
100             of this software and associated documentation files (the \"Software\"), to deal\n\
101             in the Software without restriction, including without limitation the rights\n\
102             to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n\
103             copies of the Software, and to permit persons to whom the Software is\n\
104             furnished to do so, subject to the following conditions:\n\n\
105             The above copyright notice and this permission notice shall be included in all\n\
106             copies or substantial portions of the Software.\n\n\
107             THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n\
108             IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n\
109             FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n\
110             AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n\
111             LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n\
112             OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n\
113             SOFTWARE."
114        ),
115        "gpl-3.0" => format!(
116            "Copyright (C) {year} {author}\n\n\
117             This program is free software: you can redistribute it and/or modify\n\
118             it under the terms of the GNU General Public License as published by\n\
119             the Free Software Foundation, either version 3 of the License, or\n\
120             (at your option) any later version.\n\n\
121             This program is distributed in the hope that it will be useful,\n\
122             but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
123             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
124             GNU General Public License for more details.\n\n\
125             You should have received a copy of the GNU General Public License\n\
126             along with this program. If not, see <https://www.gnu.org/licenses/>."
127        ),
128        "bsd-3-clause" => format!(
129            "Copyright (c) {year}, {author}\n\
130             All rights reserved.\n\n\
131             Redistribution and use in source and binary forms, with or without\n\
132             modification, are permitted provided that the following conditions are met:\n\n\
133             1. Redistributions of source code must retain the above copyright notice, this\n\
134             \x20  list of conditions and the following disclaimer.\n\
135             2. Redistributions in binary form must reproduce the above copyright notice,\n\
136             \x20  this list of conditions and the following disclaimer in the documentation\n\
137             \x20  and/or other materials provided with the distribution.\n\
138             3. Neither the name of the copyright holder nor the names of its\n\
139             \x20  contributors may be used to endorse or promote products derived from\n\
140             \x20  this software without specific prior written permission.\n\n\
141             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n\
142             AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n\
143             IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n\
144             DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\n\
145             FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n\
146             DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n\
147             SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n\
148             CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n\
149             OR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
150             OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
151        ),
152        _ => anyhow::bail!("Unsupported license: '{license}'. Valid options: {VALID_LICENSES:?}"),
153    };
154    Ok(text)
155}
156
157// ── Comment Styles ──────────────────────────────────────────────────────────
158
159#[derive(Debug, Clone, Copy, PartialEq)]
160enum CommentKind {
161    Block,
162    Line,
163}
164
165#[derive(Debug)]
166struct CommentStyle {
167    kind: CommentKind,
168    open: Option<&'static str>,
169    line: &'static str,
170    close: Option<&'static str>,
171}
172
173impl CommentStyle {
174    const fn block(open: &'static str, line: &'static str, close: &'static str) -> Self {
175        Self {
176            kind: CommentKind::Block,
177            open: Some(open),
178            line,
179            close: Some(close),
180        }
181    }
182
183    const fn line(prefix: &'static str) -> Self {
184        Self {
185            kind: CommentKind::Line,
186            open: None,
187            line: prefix,
188            close: None,
189        }
190    }
191}
192
193const C_STYLE_BLOCK: CommentStyle = CommentStyle::block("/**", " *", " */");
194/// Rust uses `/* */` instead of `/** */` to avoid creating a doc comment that
195/// conflicts with `//!` inner doc comments in `lib.rs` crate roots.
196const RUST_BLOCK: CommentStyle = CommentStyle::block("/*", " *", " */");
197const XML_BLOCK: CommentStyle = CommentStyle::block("<!--", " ", "-->");
198const ASCIIDOC_BLOCK: CommentStyle = CommentStyle::block("////", "", "////");
199const HASH_LINE: CommentStyle = CommentStyle::line("#");
200const DASH_LINE: CommentStyle = CommentStyle::line("--");
201const ELISP_LINE: CommentStyle = CommentStyle::line(";;");
202
203/// Map file extension / filename to comment style.
204/// Returns `None` for unsupported or binary file types.
205fn get_comment_style(path: &Path, content: &str) -> Option<&'static CommentStyle> {
206    // Agent/Claude instruction files are not source files — omit copyright headers.
207    static SKIP_FILENAMES: &[&str] = &["AGENTS.md", "CLAUDE.md"];
208    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
209        if SKIP_FILENAMES.contains(&name) {
210            return None;
211        }
212    }
213
214    // Shebang lines always get hash-style comments
215    if content.starts_with("#!/") {
216        return Some(&HASH_LINE);
217    }
218
219    let ext = path
220        .extension()
221        .and_then(|e| e.to_str())
222        .unwrap_or("")
223        .to_ascii_lowercase();
224    let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
225
226    match ext.as_str() {
227        // C-family block comments
228        "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" | "css" | "scss" | "less" | "styl" | "c"
229        | "cc" | "cpp" | "h" | "hpp" | "cs" | "java" | "kt" | "kts" | "swift" | "m" | "mm"
230        | "go" | "php" | "dart" | "scala" | "groovy" | "gradle" | "proto" | "zig" | "v" | "sv" => {
231            Some(&C_STYLE_BLOCK)
232        }
233
234        // Rust — non-doc block comment to avoid conflicting with //! inner docs
235        "rs" => Some(&RUST_BLOCK),
236
237        // Markup / XML
238        "html" | "htm" | "xml" | "xhtml" | "svg" | "md" | "rst" | "xsl" | "xslt" | "vue"
239        | "svelte" => Some(&XML_BLOCK),
240
241        // AsciiDoc
242        "adoc" | "asciidoc" => Some(&ASCIIDOC_BLOCK),
243
244        // Hash-line comments
245        "sh" | "bash" | "zsh" | "fish" | "py" | "pyi" | "rb" | "pl" | "pm" | "yml" | "yaml"
246        | "toml" | "ini" | "cfg" | "conf" | "env" | "mk" | "make" | "r" | "jl" | "tf" | "hcl"
247        | "nix" | "cmake" => Some(&HASH_LINE),
248
249        // Double-dash comments
250        "sql" | "lua" | "hs" | "elm" => Some(&DASH_LINE),
251
252        // Elisp / Clojure
253        "el" | "clj" | "cljs" | "cljc" | "edn" => Some(&ELISP_LINE),
254
255        _ => {
256            // Fallback: match well-known filenames
257            static HASH_FILENAMES: &[&str] = &[
258                "Makefile",
259                "Dockerfile",
260                "Containerfile",
261                "Vagrantfile",
262                ".env",
263                ".gitignore",
264                ".dockerignore",
265                ".editorconfig",
266                "Gemfile",
267                "Rakefile",
268                "Justfile",
269                "CMakeLists.txt",
270            ];
271            if HASH_FILENAMES
272                .iter()
273                .any(|&name| name.eq_ignore_ascii_case(filename))
274            {
275                Some(&HASH_LINE)
276            } else {
277                None
278            }
279        }
280    }
281}
282
283// ── Header Construction ─────────────────────────────────────────────────────
284
285fn build_header(style: &CommentStyle, license_text: &str) -> String {
286    let lines: Vec<&str> = license_text.split('\n').collect();
287    let mut header = String::with_capacity(license_text.len() + lines.len() * 4 + 32);
288
289    match style.kind {
290        CommentKind::Block => {
291            if let Some(open) = style.open {
292                header.push_str(open);
293                header.push('\n');
294            }
295            for line in &lines {
296                if line.is_empty() {
297                    // Avoid trailing whitespace on blank comment lines
298                    header.push_str(style.line.trim_end());
299                } else {
300                    header.push_str(style.line);
301                    header.push(' ');
302                    header.push_str(line);
303                }
304                header.push('\n');
305            }
306            if let Some(close) = style.close {
307                header.push_str(close);
308                header.push('\n');
309            }
310            header.push('\n');
311        }
312        CommentKind::Line => {
313            for line in &lines {
314                if line.is_empty() {
315                    header.push_str(style.line.trim_end());
316                } else {
317                    header.push_str(style.line);
318                    header.push(' ');
319                    header.push_str(line);
320                }
321                header.push('\n');
322            }
323            header.push('\n');
324        }
325    }
326    header
327}
328
329// ── Header Detection ────────────────────────────────────────────────────────
330
331/// Compiled regexes, initialized once.
332#[allow(clippy::expect_used)]
333static HEADER_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
334    Regex::new(r"(?i)copyright\s*(\(c\)\s*)?\d{4}|SPDX-License-Identifier:")
335        .expect("Static regex pattern is valid")
336});
337
338#[allow(clippy::expect_used)]
339static COMMENT_START_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
340    Regex::new(r"^\s*(#|--|//|;;)").expect("Static regex pattern is valid")
341});
342
343/// Check whether the first N lines of `content` contain a copyright header.
344fn has_header(content: &str) -> bool {
345    let head: String = content.lines().take(20).collect::<Vec<_>>().join("\n");
346    HEADER_RE.is_match(&head)
347}
348
349// ── License Detection ───────────────────────────────────────────────────────
350
351/// Fingerprints that uniquely identify each supported license inside a header.
352#[allow(clippy::expect_used)]
353static APACHE_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
354    Regex::new(r"(?i)Apache\s+License|apache\.org/licenses").expect("Static regex pattern is valid")
355});
356#[allow(clippy::expect_used)]
357static MIT_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
358    Regex::new(r"(?i)Permission is hereby granted|\bMIT\s+License\b")
359        .expect("Static regex pattern is valid")
360});
361#[allow(clippy::expect_used)]
362static GPL3_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
363    Regex::new(r"(?i)GNU\s+General\s+Public\s+License|gnu\.org/licenses")
364        .expect("Static regex pattern is valid")
365});
366#[allow(clippy::expect_used)]
367static BSD3_FP: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
368    Regex::new(r"(?i)Redistribution and use.*permitted|BSD.*3.*Clause")
369        .expect("Static regex pattern is valid")
370});
371
372/// SPDX tag regex — captures the identifier value.
373#[allow(clippy::expect_used)]
374static SPDX_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
375    Regex::new(r"(?i)SPDX-License-Identifier:\s*([\w\-.]+)").expect("Static regex pattern is valid")
376});
377
378/// Detect which license the existing header uses.
379/// Returns a SPDX-style identifier or `None` if unrecognised.
380fn detect_header_license(content: &str) -> Option<&'static str> {
381    let head: String = content.lines().take(30).collect::<Vec<_>>().join("\n");
382
383    // Prefer an explicit SPDX tag if present.
384    if let Some(caps) = SPDX_RE.captures(&head) {
385        let id = caps.get(1).map_or("", |m| m.as_str());
386        return match id.to_ascii_lowercase().as_str() {
387            "apache-2.0" => Some("apache-2.0"),
388            "mit" => Some("mit"),
389            "gpl-3.0" | "gpl-3.0-only" | "gpl-3.0-or-later" => Some("gpl-3.0"),
390            "bsd-3-clause" => Some("bsd-3-clause"),
391            _ => None,
392        };
393    }
394
395    // Fingerprint-based detection.
396    if APACHE_FP.is_match(&head) {
397        return Some("apache-2.0");
398    }
399    if MIT_FP.is_match(&head) {
400        return Some("mit");
401    }
402    if GPL3_FP.is_match(&head) {
403        return Some("gpl-3.0");
404    }
405    if BSD3_FP.is_match(&head) {
406        return Some("bsd-3-clause");
407    }
408    None
409}
410
411// ── Body License-Name Replacement ───────────────────────────────────────────
412
413/// Human-readable display name for a SPDX identifier.
414fn license_display_name(spdx: &str) -> &'static str {
415    match spdx {
416        "apache-2.0" => "Apache License, Version 2.0",
417        "mit" => "MIT License",
418        "gpl-3.0" => "GNU General Public License v3.0",
419        "bsd-3-clause" => "BSD 3-Clause License",
420        _ => "Unknown",
421    }
422}
423
424/// Shields.io badge fragment for a license (used in `img.shields.io/badge/…`).
425fn license_badge_fragment(spdx: &str) -> &'static str {
426    match spdx {
427        "apache-2.0" => "License-Apache%202.0-blue.svg",
428        "mit" => "License-MIT-blue.svg",
429        "gpl-3.0" => "License-GPL%20v3-blue.svg",
430        "bsd-3-clause" => "License-BSD%203--Clause-blue.svg",
431        _ => "License-Unknown-lightgrey.svg",
432    }
433}
434
435/// Shields.io Markdown badge label for a license.
436fn license_badge_label(spdx: &str) -> &'static str {
437    match spdx {
438        "apache-2.0" => "License: Apache 2.0",
439        "mit" => "License: MIT",
440        "gpl-3.0" => "License: GPL v3",
441        "bsd-3-clause" => "License: BSD 3-Clause",
442        _ => "License",
443    }
444}
445
446/// SPDX identifier in the canonical casing expected by tooling.
447fn license_spdx_canonical(spdx: &str) -> &str {
448    match spdx {
449        "apache-2.0" => "Apache-2.0",
450        "mit" => "MIT",
451        "gpl-3.0" => "GPL-3.0-only",
452        "bsd-3-clause" => "BSD-3-Clause",
453        _ => spdx,
454    }
455}
456
457/// Replace targeted license-name patterns in the body of a file.
458///
459/// This only touches well-known patterns (SPDX tags, shield.io badges,
460/// "licensed under" prose) — generic mentions of license names (e.g. in
461/// dependency lists) are intentionally left alone.
462fn replace_license_mentions(content: &str, from: &str, to: &str) -> Result<String> {
463    let mut out = content.to_string();
464
465    // 1. SPDX-License-Identifier tags.
466    let spdx_from = license_spdx_canonical(from);
467    let spdx_to = license_spdx_canonical(to);
468    let spdx_pat = Regex::new(&format!(
469        r"(?i)(SPDX-License-Identifier:\s*){}",
470        regex::escape(spdx_from)
471    ))
472    .context("Invalid Regex pattern for SPDX-License-Identifier")?;
473    out = spdx_pat
474        .replace_all(&out, format!("${{1}}{spdx_to}"))
475        .to_string();
476
477    // 2. Shields.io badge URLs.
478    let badge_from = license_badge_fragment(from);
479    let badge_to = license_badge_fragment(to);
480    out = out.replace(badge_from, badge_to);
481
482    // 3. Shields.io badge alt-text / Markdown label.
483    let label_from = license_badge_label(from);
484    let label_to = license_badge_label(to);
485    out = out.replace(label_from, label_to);
486
487    // 4. "licensed under the <License Name>" prose (case-insensitive).
488    let name_from = license_display_name(from);
489    let name_to = license_display_name(to);
490    if name_from != "Unknown" && name_to != "Unknown" {
491        // Plain text.
492        let prose_pat = Regex::new(&format!(r"(?i){}", regex::escape(name_from)))
493            .context("Invalid Regex pattern for license prose")?;
494        out = prose_pat.replace_all(&out, name_to).to_string();
495        // Bold Markdown variant: **MIT License** → **Apache License, Version 2.0**
496        let bold_from = format!("**{name_from}**");
497        let bold_to = format!("**{name_to}**");
498        out = out.replace(&bold_from, &bold_to);
499    }
500
501    Ok(out)
502}
503
504fn strip_existing_header(content: &str) -> String {
505    let (shebang, rest) = split_shebang(content);
506
507    // Try block-comment stripping first
508    if let Some(stripped) = try_strip_block_header(rest) {
509        return join_shebang(shebang, &stripped);
510    }
511
512    // Try line-comment stripping
513    let lines: Vec<&str> = rest.lines().collect();
514    let end_idx = find_line_header_end_index(&lines);
515    if let Some(idx) = end_idx {
516        let stripped = lines[(idx + 1)..]
517            .join("\n")
518            .trim_start_matches(['\r', '\n'])
519            .to_string();
520        return join_shebang(shebang, &stripped);
521    }
522
523    content.to_string()
524}
525
526/// Split optional shebang from rest of file content.
527fn split_shebang(content: &str) -> (Option<&str>, &str) {
528    if content.starts_with("#!") {
529        content.find('\n').map_or((Some(content), ""), |newline| {
530            let shebang = &content[..newline];
531            let rest = content[newline + 1..].trim_start_matches(['\r', '\n']);
532            (Some(shebang), rest)
533        })
534    } else {
535        (None, content)
536    }
537}
538
539fn join_shebang(shebang: Option<&str>, body: &str) -> String {
540    shebang.map_or_else(|| body.to_string(), |s| format!("{s}\n{body}"))
541}
542
543fn try_strip_block_header(content: &str) -> Option<String> {
544    // C-style block comments: /* ... */
545    #[allow(clippy::expect_used)]
546    static BLOCK_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
547        Regex::new(r"^\s*/\*[\s\S]*?\*/\s*").expect("Static regex pattern is valid")
548    });
549    // XML-style comments: <!-- ... -->
550    #[allow(clippy::expect_used)]
551    static XML_RE: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
552        Regex::new(r"^\s*<!--[\s\S]*?-->\s*").expect("Static regex pattern is valid")
553    });
554
555    for re in [&*BLOCK_RE, &*XML_RE] {
556        if let Some(mat) = re.find(content) {
557            if HEADER_RE.is_match(mat.as_str()) {
558                return Some(
559                    content[mat.end()..]
560                        .trim_start_matches(['\r', '\n'])
561                        .to_string(),
562                );
563            }
564        }
565    }
566    None
567}
568
569fn find_line_header_end_index(lines: &[&str]) -> Option<usize> {
570    let mut header_end: Option<usize> = None;
571    let mut in_header = false;
572    let max_lines = 30.min(lines.len());
573
574    for (i, line) in lines.iter().enumerate().take(max_lines) {
575        if COMMENT_START_RE.is_match(line) {
576            if HEADER_RE.is_match(line) {
577                in_header = true;
578            }
579            if in_header {
580                header_end = Some(i);
581            }
582        } else if line.trim().is_empty() && in_header {
583            header_end = Some(i);
584        } else {
585            break;
586        }
587    }
588
589    if in_header {
590        header_end
591    } else {
592        None
593    }
594}
595
596// ── Binary Detection ────────────────────────────────────────────────────────
597
598fn is_binary(content: &str) -> bool {
599    if content.contains('\0') {
600        return true;
601    }
602    let mut limit = content.len().min(1024);
603    while limit > 0 && !content.is_char_boundary(limit) {
604        limit -= 1;
605    }
606    let sample = &content[..limit];
607    if sample.is_empty() {
608        return false;
609    }
610    let non_printable = sample
611        .chars()
612        .filter(|c| {
613            let code = *c as u32;
614            code < 9 || (code > 13 && code < 32) || (code > 126 && code < 160)
615        })
616        .count();
617    #[allow(clippy::cast_precision_loss)]
618    let ratio = non_printable as f64 / sample.len() as f64;
619    ratio > 0.1
620}
621
622// ── File Discovery ──────────────────────────────────────────────────────────
623
624// Directory excludes are now sourced from `.gitignore` via crate::gitignore.
625
626fn collect_files_from_globs(patterns: &[String], verbose: bool) -> Result<Vec<PathBuf>> {
627    let mut files = Vec::new();
628    if verbose {
629        eprintln!("Searching with glob patterns...");
630    }
631    for pattern in patterns {
632        for entry in glob(pattern).context("Failed to read glob pattern")? {
633            match entry {
634                Ok(path) if path.is_file() => files.push(path),
635                _ => {}
636            }
637        }
638    }
639    Ok(files)
640}
641
642fn collect_files_from_git(verbose: bool) -> Option<Vec<PathBuf>> {
643    if verbose {
644        eprintln!("Attempting git ls-files...");
645    }
646    let output = Command::new("git").arg("ls-files").output().ok()?;
647    if !output.status.success() {
648        return None;
649    }
650
651    let mut files: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
652        .lines()
653        .filter(|l| !l.trim().is_empty())
654        .map(PathBuf::from)
655        .collect();
656
657    // Also pick up untracked (but not ignored) files
658    if let Ok(untracked) = Command::new("git")
659        .args(["ls-files", "-o", "--exclude-standard"])
660        .output()
661    {
662        if untracked.status.success() {
663            files.extend(
664                String::from_utf8_lossy(&untracked.stdout)
665                    .lines()
666                    .filter(|l| !l.trim().is_empty())
667                    .map(PathBuf::from),
668            );
669        }
670    }
671
672    Some(files)
673}
674
675fn collect_files_from_walk(root: &Path) -> Vec<PathBuf> {
676    WalkDir::new(root)
677        .into_iter()
678        .filter_map(std::result::Result::ok)
679        .filter(|e| e.file_type().is_file())
680        .map(walkdir::DirEntry::into_path)
681        .collect()
682}
683
684fn discover_files(args: &CopyrightArgs) -> Result<Vec<PathBuf>> {
685    let root = crate::utils::find_project_root();
686
687    let raw = if !args.glob.is_empty() {
688        // Adjust globs to be relative to root or handle them as is
689        collect_files_from_globs(&args.glob, args.verbose)?
690    } else if let Some(git_files) = collect_files_from_git(args.verbose) {
691        git_files.into_iter().map(|p| root.join(p)).collect()
692    } else {
693        if args.verbose {
694            eprintln!(
695                "git not available, falling back to directory walk from {}.",
696                root.display()
697            );
698        }
699        collect_files_from_walk(&root)
700    };
701
702    // Build exclude set: user excludes + gitignore-derived dirs
703    let gitignore_excludes = crate::gitignore::parse_gitignore(&root);
704    let exclude_patterns: Vec<String> = args
705        .exclude
706        .iter()
707        .cloned()
708        .chain(gitignore_excludes)
709        .collect();
710
711    // Normalize extensions for filtering
712    let ext_filter: HashSet<String> = args
713        .ext
714        .iter()
715        .map(|e| e.trim_start_matches('.').to_ascii_lowercase())
716        .collect();
717
718    let files: Vec<PathBuf> = raw
719        .into_iter()
720        .filter(|p| {
721            let s = p.to_string_lossy();
722            !exclude_patterns.iter().any(|ex| s.contains(ex.as_str()))
723        })
724        .filter(|p| {
725            if ext_filter.is_empty() {
726                return true;
727            }
728            p.extension()
729                .and_then(|e| e.to_str())
730                .is_some_and(|e| ext_filter.contains(&e.to_ascii_lowercase()))
731        })
732        .collect();
733
734    // Deduplicate (globs or git can return dupes)
735    let mut seen = HashSet::with_capacity(files.len());
736    Ok(files
737        .into_iter()
738        .filter(|p| seen.insert(p.clone()))
739        .collect())
740}
741
742// ── Processing ──────────────────────────────────────────────────────────────
743
744#[derive(Default)]
745struct Stats {
746    updated: usize,
747    skipped: usize,
748    missing: usize,
749    mismatched: usize,
750    errors: usize,
751}
752
753/// Process a single file to update its copyright header.
754///
755/// # Errors
756/// Returns an error if reading from or writing to the file fails, or if license replacement fails.
757fn process_file(
758    path: &Path,
759    license_body: &str,
760    args: &CopyrightArgs,
761    stats: &mut Stats,
762) -> Result<()> {
763    let content = match fs::read_to_string(path) {
764        Ok(c) => c,
765        Err(e) => {
766            if args.verbose {
767                eprintln!("Skipping {}: {e}", path.display());
768            }
769            stats.skipped += 1;
770            return Ok(());
771        }
772    };
773
774    if content.trim().is_empty() || is_binary(&content) {
775        stats.skipped += 1;
776        return Ok(());
777    }
778
779    let Some(style) = get_comment_style(path, &content) else {
780        if args.verbose {
781            eprintln!("Skipping (unsupported type): {}", path.display());
782        }
783        stats.skipped += 1;
784        return Ok(());
785    };
786
787    let already_has_header = has_header(&content);
788    let detected_license = if already_has_header {
789        detect_header_license(&content)
790    } else {
791        None
792    };
793    let is_mismatch = already_has_header && detected_license.is_some_and(|d| d != args.license);
794
795    // --check mode: report missing *and* mismatched headers.
796    if args.check {
797        if !already_has_header {
798            println!("Missing header: {}", path.display());
799            stats.missing += 1;
800        } else if is_mismatch {
801            println!(
802                "Mismatched license ({} → {}): {}",
803                detected_license.unwrap_or("unknown"),
804                args.license,
805                path.display()
806            );
807            stats.mismatched += 1;
808        }
809        return Ok(());
810    }
811
812    // Decide whether we need to rewrite this file.
813    let needs_rewrite = !already_has_header       // no header yet
814        || args.force                              // explicit force
815        || is_mismatch; // wrong license
816
817    if !needs_rewrite {
818        if args.verbose {
819            eprintln!("Skipping (correct header): {}", path.display());
820        }
821        stats.skipped += 1;
822        return Ok(());
823    }
824
825    // Strip old header when replacing.
826    let base = if already_has_header {
827        strip_existing_header(&content)
828    } else {
829        content.clone()
830    };
831
832    // Replace stale license-name mentions in the body when migrating.
833    let base = if let Some(old_license) = detected_license {
834        if old_license == args.license {
835            base
836        } else {
837            replace_license_mentions(&base, old_license, &args.license)?
838        }
839    } else {
840        base
841    };
842
843    let header = build_header(style, license_body);
844    let new_content = prepend_header(&base, &header);
845
846    if args.dry_run {
847        if is_mismatch {
848            println!(
849                "Would migrate ({} → {}): {}",
850                detected_license.unwrap_or("unknown"),
851                args.license,
852                path.display()
853            );
854        } else {
855            println!("Would update: {}", path.display());
856        }
857        stats.updated += 1;
858        return Ok(());
859    }
860
861    match fs::write(path, &new_content) {
862        Ok(()) => {
863            if is_mismatch {
864                if args.verbose {
865                    eprintln!(
866                        "Migrated ({} → {}): {}",
867                        detected_license.unwrap_or("unknown"),
868                        args.license,
869                        path.display()
870                    );
871                }
872                stats.mismatched += 1;
873            } else if args.verbose {
874                eprintln!("Updated: {}", path.display());
875            }
876            stats.updated += 1;
877        }
878        Err(e) => {
879            eprintln!("Error writing {}: {e}", path.display());
880            stats.errors += 1;
881        }
882    }
883    Ok(())
884}
885
886/// Prepend header, preserving any shebang line at position 0.
887fn prepend_header(content: &str, header: &str) -> String {
888    if content.starts_with("#!") {
889        let (shebang, rest) = split_shebang(content);
890        match shebang {
891            Some(s) => format!("{s}\n\n{header}{rest}"),
892            None => format!("{header}{content}"),
893        }
894    } else {
895        format!("{header}{content}")
896    }
897}
898
899// ── Entry Point ─────────────────────────────────────────────────────────────
900
901/// Run the copyright header management command.
902/// Runs the copyright tool with the provided arguments.
903///
904/// # Errors
905/// Returns an error if the license year is invalid, the license file cannot be read,
906/// or if processing any of the files fails.
907pub fn run(args: &CopyrightArgs) -> Result<()> {
908    // Validate license upfront
909    if !VALID_LICENSES.contains(&args.license.as_str()) {
910        anyhow::bail!(
911            "Invalid license type: '{}'. Valid options: {:?}",
912            args.license,
913            VALID_LICENSES
914        );
915    }
916
917    let year = args
918        .year
919        .clone()
920        .unwrap_or_else(|| chrono::Utc::now().year().to_string());
921    let license_body = get_license_template(&args.license, &args.author, &year)?;
922
923    let files = discover_files(args)?;
924    if args.verbose {
925        eprintln!("Found {} files to process.", files.len());
926    }
927
928    let mut stats = Stats::default();
929
930    for path in &files {
931        process_file(path, &license_body, args, &mut stats)?;
932    }
933
934    // Report results
935    if args.check {
936        let problems = stats.missing + stats.mismatched;
937        if problems > 0 {
938            anyhow::bail!(
939                "{} file(s) have issues ({} missing, {} mismatched).",
940                problems,
941                stats.missing,
942                stats.mismatched
943            );
944        }
945        if args.verbose {
946            println!("All files have correct copyright headers.");
947        }
948    } else if stats.updated > 0 || stats.errors > 0 {
949        println!(
950            "Done. Updated: {}, Migrated: {}, Skipped: {}, Errors: {}",
951            stats.updated, stats.mismatched, stats.skipped, stats.errors
952        );
953    }
954
955    Ok(())
956}
957
958// ── Tests ───────────────────────────────────────────────────────────────────
959
960#[cfg(test)]
961#[allow(clippy::unwrap_used)]
962mod tests {
963    use super::*;
964
965    #[test]
966    fn test_has_header_positive() {
967        assert!(has_header("// Copyright (c) 2024 Acme\nfn main() {}"));
968        assert!(has_header("# copyright 2023 Foo\nimport os"));
969        assert!(has_header("/* SPDX-License-Identifier: MIT */\n"));
970    }
971
972    #[test]
973    fn test_has_header_negative() {
974        assert!(!has_header("fn main() { println!(\"hello\"); }"));
975        assert!(!has_header("#!/usr/bin/env python\nimport sys"));
976    }
977
978    #[test]
979    fn test_is_binary() {
980        assert!(is_binary("\0ELF binary content"));
981        assert!(!is_binary("fn main() { println!(\"hello\"); }"));
982    }
983
984    #[test]
985    fn test_is_binary_char_boundary() {
986        // '─' is 3 bytes: [226, 148, 128]
987        // We want to place it so it straddles the 1024 byte boundary.
988        let mut content = "a".repeat(1023);
989        content.push('─'); // bytes 1023, 1024, 1025
990        content.push_str(" rest of content");
991
992        // This should not panic
993        assert!(!is_binary(&content));
994    }
995
996    #[test]
997    fn test_build_header_block() {
998        let header = build_header(&C_STYLE_BLOCK, "Copyright 2024 Test");
999        assert!(header.starts_with("/**\n"));
1000        assert!(header.contains(" * Copyright 2024 Test"));
1001        assert!(header.contains(" */\n"));
1002    }
1003
1004    #[test]
1005    fn test_build_header_line() {
1006        let header = build_header(&HASH_LINE, "Copyright 2024 Test");
1007        assert!(header.starts_with("# Copyright 2024 Test\n"));
1008        assert!(!header.contains("/**"));
1009    }
1010
1011    #[test]
1012    fn test_shebang_preserved() {
1013        let content = "#!/usr/bin/env python\nimport os\n";
1014        let header = "# Copyright 2024 Test\n\n";
1015        let result = prepend_header(content, header);
1016        assert!(result.starts_with("#!/usr/bin/env python\n"));
1017        assert!(result.contains("# Copyright 2024 Test"));
1018        assert!(result.contains("import os"));
1019    }
1020
1021    #[test]
1022    fn test_strip_existing_block_header() {
1023        let content = "/** Copyright (c) 2023 Old */\nfn main() {}";
1024        let stripped = strip_existing_header(content);
1025        assert_eq!(stripped.trim(), "fn main() {}");
1026    }
1027
1028    #[test]
1029    fn test_get_comment_style() {
1030        let rs_path = Path::new("main.rs");
1031        let py_path = Path::new("script.py");
1032        let html_path = Path::new("index.html");
1033
1034        assert_eq!(
1035            get_comment_style(rs_path, "fn main()")
1036                .expect("Should return comment style for .rs")
1037                .kind,
1038            CommentKind::Block
1039        );
1040        assert_eq!(
1041            get_comment_style(py_path, "import os")
1042                .expect("Should return comment style for .py")
1043                .kind,
1044            CommentKind::Line
1045        );
1046        assert_eq!(
1047            get_comment_style(html_path, "<html>")
1048                .expect("Should return comment style for .html")
1049                .kind,
1050            CommentKind::Block
1051        );
1052    }
1053
1054    #[test]
1055    fn test_license_templates() {
1056        for license in VALID_LICENSES {
1057            let result = get_license_template(license, "Test", "2024");
1058            assert!(result.is_ok(), "Failed for license: {license}");
1059            assert!(
1060                result.expect("Should generate theme").contains("2024"),
1061                "Template should contain year"
1062            );
1063        }
1064        assert!(get_license_template("invalid", "Test", "2024").is_err());
1065    }
1066
1067    // ── License Detection Tests ─────────────────────────────────────────
1068
1069    #[test]
1070    fn test_detect_apache_header() {
1071        let content = "/*\n * Copyright 2024 Acme\n *\n * Licensed under the Apache License, Version 2.0\n */\nfn main() {}";
1072        assert_eq!(detect_header_license(content), Some("apache-2.0"));
1073    }
1074
1075    #[test]
1076    fn test_detect_mit_header() {
1077        let content = "/*\n * Copyright (c) 2024 Acme\n *\n * Permission is hereby granted, free of charge\n */\nfn main() {}";
1078        assert_eq!(detect_header_license(content), Some("mit"));
1079    }
1080
1081    #[test]
1082    fn test_detect_gpl_header() {
1083        let content = "# Copyright (C) 2024 Acme\n# GNU General Public License v3\nimport os";
1084        assert_eq!(detect_header_license(content), Some("gpl-3.0"));
1085    }
1086
1087    #[test]
1088    fn test_detect_bsd_header() {
1089        let content = "/*\n * Copyright (c) 2024 Acme\n * Redistribution and use in source and binary forms, with or without modification, are permitted\n */\nint main() {}";
1090        assert_eq!(detect_header_license(content), Some("bsd-3-clause"));
1091    }
1092
1093    #[test]
1094    fn test_detect_spdx_tag() {
1095        assert_eq!(
1096            detect_header_license("// SPDX-License-Identifier: MIT\nfn main() {}"),
1097            Some("mit")
1098        );
1099        assert_eq!(
1100            detect_header_license("// SPDX-License-Identifier: Apache-2.0\nfn main() {}"),
1101            Some("apache-2.0")
1102        );
1103    }
1104
1105    #[test]
1106    fn test_detect_no_license() {
1107        assert_eq!(
1108            detect_header_license("// Copyright (c) 2024 Acme\nfn main() {}"),
1109            None
1110        );
1111    }
1112
1113    // ── Body License-Name Replacement Tests ─────────────────────────────
1114
1115    #[test]
1116    fn test_replace_spdx_identifier() {
1117        let input = "// SPDX-License-Identifier: MIT\nfn main() {}";
1118        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1119        assert!(result.contains("SPDX-License-Identifier: Apache-2.0"));
1120        assert!(!result.contains("SPDX-License-Identifier: MIT"));
1121    }
1122
1123    #[test]
1124    fn test_replace_badge_url() {
1125        let input = "[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)";
1126        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1127        assert!(result.contains("License-Apache%202.0-blue.svg"));
1128        assert!(result.contains("License: Apache 2.0"));
1129        assert!(!result.contains("License-MIT-blue.svg"));
1130    }
1131
1132    #[test]
1133    fn test_replace_prose_license_name() {
1134        let input = "This project is licensed under the **MIT License** - see LICENSE.";
1135        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1136        assert!(result.contains("**Apache License, Version 2.0**"));
1137        assert!(!result.contains("MIT License"));
1138    }
1139
1140    #[test]
1141    fn test_replace_no_false_positives() {
1142        // Should not change dependency-level mentions that don't match known patterns.
1143        let input = "dependencies:\n  some-lib: MIT\n  other-lib: BSD";
1144        let result = replace_license_mentions(input, "mit", "apache-2.0").unwrap();
1145        // "MIT" alone without "License" suffix should not be replaced
1146        // (the prose pattern matches "MIT License", not bare "MIT")
1147        assert!(result.contains("some-lib: MIT"));
1148    }
1149}