Skip to main content

normalize_native_rules/
long_file.rs

1//! `long-file` native rule — flags source files exceeding a line count threshold.
2//!
3//! Excludes well-known lock files and paths matching the `allow` list configured
4//! under `[rules.rule."long-file"] allow = [...]` in `.normalize/config.toml`.
5//!
6//! # Configuration
7//!
8//! ```toml
9//! [rules.rule."long-file"]
10//! threshold = 300         # default: 500
11//! allow = ["grammars/**"] # paths to skip (glob patterns)
12//! ```
13
14use normalize_output::diagnostics::{DiagnosticsReport, Issue, Severity};
15use std::path::Path;
16
17use crate::cache::{FileRule, run_file_rule};
18use normalize_rules_config::WalkConfig;
19
20/// Serializable per-file finding for the long-file rule.
21#[derive(serde::Serialize, serde::Deserialize)]
22pub struct LongFileFinding {
23    rel_path: String,
24    line_count: usize,
25}
26
27/// Well-known lock files that should never be flagged as large.
28fn is_lockfile(name: &str) -> bool {
29    matches!(
30        name,
31        "uv.lock"
32            | "Cargo.lock"
33            | "package-lock.json"
34            | "yarn.lock"
35            | "pnpm-lock.yaml"
36            | "bun.lockb"
37            | "bun.lock"
38            | "poetry.lock"
39            | "Pipfile.lock"
40            | "Gemfile.lock"
41            | "composer.lock"
42            | "go.sum"
43            | "flake.lock"
44            | "packages.lock.json"
45            | "paket.lock"
46            | "pubspec.lock"
47            | "mix.lock"
48            | "rebar.lock"
49            | "Podfile.lock"
50            | "shrinkwrap.yaml"
51            | "deno.lock"
52            | "gradle.lockfile"
53    )
54}
55
56/// Rule that flags source files exceeding a line count threshold.
57pub struct LongFileRule {
58    pub threshold: usize,
59    allow_patterns: Vec<glob::Pattern>,
60}
61
62impl LongFileRule {
63    /// Create a new `LongFileRule` with the given threshold and allow-list patterns.
64    ///
65    /// Callers should pass the `allow` list from `[rules.rule."long-file"]` in config:
66    /// ```ignore
67    /// let allow = config.rules.rules.get("long-file").map(|r| r.allow.clone()).unwrap_or_default();
68    /// let rule = LongFileRule::new(threshold, &allow);
69    /// ```
70    pub fn new(threshold: usize, allow: &[String]) -> Self {
71        let allow_patterns = allow
72            .iter()
73            .filter_map(|p| glob::Pattern::new(p).ok())
74            .collect();
75        Self {
76            threshold,
77            allow_patterns,
78        }
79    }
80}
81
82impl FileRule for LongFileRule {
83    type Finding = LongFileFinding;
84
85    fn engine_name(&self) -> &str {
86        "long-file"
87    }
88
89    fn config_hash(&self) -> String {
90        self.threshold.to_string()
91    }
92
93    fn check_file(&self, path: &Path, root: &Path) -> Vec<Self::Finding> {
94        let rel_path = path
95            .strip_prefix(root)
96            .unwrap_or(path)
97            .to_string_lossy()
98            .to_string();
99
100        // Skip lock files.
101        let file_name = path
102            .file_name()
103            .map(|n| n.to_string_lossy().to_string())
104            .unwrap_or_default();
105        if is_lockfile(&file_name) {
106            return Vec::new();
107        }
108
109        // Skip allowlisted paths.
110        if self.allow_patterns.iter().any(|p| p.matches(&rel_path)) {
111            return Vec::new();
112        }
113
114        let lines = match std::fs::read_to_string(path) {
115            Ok(content) => content.lines().count(),
116            Err(_) => return Vec::new(),
117        };
118
119        if lines >= self.threshold {
120            vec![LongFileFinding {
121                rel_path,
122                line_count: lines,
123            }]
124        } else {
125            Vec::new()
126        }
127    }
128
129    fn to_diagnostics(
130        &self,
131        findings: Vec<(std::path::PathBuf, Vec<Self::Finding>)>,
132        _root: &Path,
133        files_checked: usize,
134    ) -> DiagnosticsReport {
135        let threshold = self.threshold;
136
137        let mut issues: Vec<Issue> = findings
138            .into_iter()
139            .flat_map(|(_path, file_findings)| file_findings)
140            .map(|f| Issue {
141                file: f.rel_path,
142                line: None,
143                column: None,
144                end_line: None,
145                end_column: None,
146                rule_id: "long-file".into(),
147                message: format!("{} lines (threshold: {threshold})", f.line_count),
148                severity: Severity::Warning,
149                source: "long-file".into(),
150                related: vec![],
151                suggestion: Some("consider splitting into smaller, focused modules".into()),
152            })
153            .collect();
154
155        // Sort by line count descending.
156        issues.sort_by(|a, b| {
157            let a_lines: usize = a
158                .message
159                .split(' ')
160                .next()
161                .and_then(|s| s.parse().ok())
162                .unwrap_or(0);
163            let b_lines: usize = b
164                .message
165                .split(' ')
166                .next()
167                .and_then(|s| s.parse().ok())
168                .unwrap_or(0);
169            b_lines.cmp(&a_lines)
170        });
171
172        DiagnosticsReport {
173            issues,
174            files_checked,
175            sources_run: vec!["long-file".into()],
176            tool_errors: vec![],
177            daemon_cached: false,
178        }
179    }
180}
181
182/// Build a `DiagnosticsReport` for the `long-file` rule.
183///
184/// Walks all source files under `root`, counts lines, and emits an issue for
185/// each file exceeding the threshold. Lock files and `allow`-listed paths are
186/// skipped. `allow` should come from `[rules.rule."long-file"] allow` in config.
187pub fn build_long_file_report(
188    root: &Path,
189    threshold: usize,
190    files: Option<&[std::path::PathBuf]>,
191    walk_config: &WalkConfig,
192    allow: &[String],
193) -> DiagnosticsReport {
194    let rule = LongFileRule::new(threshold, allow);
195    run_file_rule(&rule, root, files, walk_config)
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use std::io::Write as _;
202
203    fn make_file_with_lines(dir: &std::path::Path, name: &str, n: usize) -> std::path::PathBuf {
204        let path = dir.join(name);
205        let mut f = std::fs::File::create(&path).unwrap();
206        for i in 0..n {
207            writeln!(f, "line {i}").unwrap();
208        }
209        path
210    }
211
212    #[test]
213    fn test_default_threshold_not_triggered() {
214        let dir = tempfile::tempdir().unwrap();
215        // 499 lines — below default threshold of 500
216        let path = make_file_with_lines(dir.path(), "short.rs", 499);
217        let rule = LongFileRule::new(500, &[]);
218        let findings = rule.check_file(&path, dir.path());
219        assert!(
220            findings.is_empty(),
221            "499 lines should not trigger default threshold of 500"
222        );
223    }
224
225    #[test]
226    fn test_default_threshold_triggered() {
227        let dir = tempfile::tempdir().unwrap();
228        // 500 lines — at default threshold
229        let path = make_file_with_lines(dir.path(), "long.rs", 500);
230        let rule = LongFileRule::new(500, &[]);
231        let findings = rule.check_file(&path, dir.path());
232        assert_eq!(
233            findings.len(),
234            1,
235            "500 lines should trigger default threshold of 500"
236        );
237    }
238
239    #[test]
240    fn test_custom_threshold_lower() {
241        let dir = tempfile::tempdir().unwrap();
242        // 100 lines — below default but above custom threshold of 50
243        let path = make_file_with_lines(dir.path(), "medium.rs", 100);
244        let rule = LongFileRule::new(50, &[]);
245        let findings = rule.check_file(&path, dir.path());
246        assert_eq!(
247            findings.len(),
248            1,
249            "100 lines should trigger custom threshold of 50"
250        );
251    }
252
253    #[test]
254    fn test_custom_threshold_higher() {
255        let dir = tempfile::tempdir().unwrap();
256        // 500 lines — at default but below custom threshold of 1000
257        let path = make_file_with_lines(dir.path(), "medium.rs", 500);
258        let rule = LongFileRule::new(1000, &[]);
259        let findings = rule.check_file(&path, dir.path());
260        assert!(
261            findings.is_empty(),
262            "500 lines should not trigger custom threshold of 1000"
263        );
264    }
265}