repolens 2.0.0

A CLI tool to audit and prepare repositories for open source or enterprise standards
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
//! File-related rules
//!
//! This module provides rules for checking repository files, including:
//! - Large files that should use Git LFS
//! - .gitignore configuration and recommended entries
//! - Temporary files that shouldn't be committed

use crate::config::Config;
use crate::error::RepoLensError;
use crate::rules::engine::RuleCategory;
use crate::rules::results::{Finding, Severity};
use crate::scanner::Scanner;
use crate::utils::{detect_languages, get_gitignore_entries_with_descriptions};

/// Rules for checking repository files
pub struct FilesRules;

#[async_trait::async_trait]
impl RuleCategory for FilesRules {
    /// Get the category name
    fn name(&self) -> &'static str {
        "files"
    }

    /// Run all file-related rules
    ///
    /// # Arguments
    ///
    /// * `scanner` - The scanner to access repository files
    /// * `config` - The configuration with enabled rules
    ///
    /// # Returns
    ///
    /// A vector of findings for file-related issues
    async fn run(&self, scanner: &Scanner, config: &Config) -> Result<Vec<Finding>, RepoLensError> {
        let mut findings = Vec::new();

        // Check for large files
        if config.is_rule_enabled("files/large") {
            findings.extend(check_large_files(scanner).await?);
        }

        // Check .gitignore
        if config.is_rule_enabled("files/gitignore") {
            findings.extend(check_gitignore(scanner).await?);
        }

        // Check for temporary files
        if config.is_rule_enabled("files/temp") {
            findings.extend(check_temp_files(scanner).await?);
        }

        Ok(findings)
    }
}

/// Check for files larger than the recommended threshold
///
/// Large files can slow down repository operations and should use Git LFS.
///
/// # Arguments
///
/// * `scanner` - The scanner to access repository files
///
/// # Returns
///
/// A vector of findings for large files
async fn check_large_files(scanner: &Scanner) -> Result<Vec<Finding>, RepoLensError> {
    let mut findings = Vec::new();

    // 10MB threshold
    const LARGE_FILE_THRESHOLD: u64 = 10 * 1024 * 1024;

    for file in scanner.files_larger_than(LARGE_FILE_THRESHOLD) {
        let size_mb = file.size as f64 / 1024.0 / 1024.0;

        findings.push(
            Finding::new(
                "FILE001",
                "files",
                Severity::Warning,
                format!("Large file detected ({:.1} MB)", size_mb),
            )
            .with_location(&file.path)
            .with_description(
                "Large files can slow down repository operations and increase clone times.",
            )
            .with_remediation(
                "Consider using Git LFS (Large File Storage) for binary or large files.",
            ),
        );
    }

    Ok(findings)
}

/// Check .gitignore file existence and recommended entries
///
/// Verifies that .gitignore exists and contains recommended patterns
/// to prevent committing unwanted files.
///
/// # Arguments
///
/// * `scanner` - The scanner to access repository files
///
/// # Returns
///
/// A vector of findings for .gitignore issues
async fn check_gitignore(scanner: &Scanner) -> Result<Vec<Finding>, RepoLensError> {
    let mut findings = Vec::new();

    // Check if .gitignore exists
    if !scanner.file_exists(".gitignore") {
        findings.push(
            Finding::new(
                "FILE002",
                "files",
                Severity::Warning,
                ".gitignore file is missing",
            )
            .with_description(
                "A .gitignore file helps prevent accidentally committing unwanted files.",
            )
            .with_remediation(
                "Create a .gitignore file with appropriate patterns for your project type.",
            ),
        );
        return Ok(findings);
    }

    // Check for recommended entries based on detected languages
    let gitignore_content = scanner.read_file(".gitignore").unwrap_or_else(|e| {
        tracing::warn!("Failed to read .gitignore: {}", e);
        String::new()
    });

    // Detect languages present in the repository
    let languages = detect_languages(scanner);

    // Get recommended entries for detected languages
    let recommended_entries = get_gitignore_entries_with_descriptions(&languages);

    for (pattern, description) in recommended_entries {
        // Check if pattern already exists (handle various formats)
        let pattern_clean = pattern.trim_end_matches('/');
        let pattern_variants = [
            pattern.as_str(),
            &format!("/{}", pattern),
            &format!("{}/", pattern),
            pattern_clean,
            &format!("/{}", pattern_clean),
            &format!("{}/", pattern_clean),
        ];

        let exists = gitignore_content.lines().any(|line| {
            let line = line.trim();
            let line_clean = line.trim_end_matches('/');
            pattern_variants
                .iter()
                .any(|p| line == *p || line_clean == pattern_clean)
        });

        if !exists {
            findings.push(
                Finding::new(
                    "FILE003",
                    "files",
                    Severity::Info,
                    format!(".gitignore missing recommended entry: {}", pattern),
                )
                .with_description(format!(
                    "Adding '{}' to .gitignore helps prevent committing {}.",
                    pattern,
                    description.to_lowercase()
                )),
            );
        }
    }

    Ok(findings)
}

/// Check for temporary files that shouldn't be committed
///
/// Detects common temporary file patterns like .log, .tmp, .swp, etc.
///
/// # Arguments
///
/// * `scanner` - The scanner to access repository files
///
/// # Returns
///
/// A vector of findings for temporary files
async fn check_temp_files(scanner: &Scanner) -> Result<Vec<Finding>, RepoLensError> {
    let mut findings = Vec::new();

    let temp_patterns = ["*.log", "*.tmp", "*.temp", "*~", "*.swp", "*.swo", "*.bak"];

    for pattern in temp_patterns {
        for file in scanner.files_matching_pattern(pattern) {
            findings.push(
                Finding::new(
                    "FILE004",
                    "files",
                    Severity::Warning,
                    "Temporary file found in repository",
                )
                .with_location(&file.path)
                .with_description("Temporary files should not be committed to version control.")
                .with_remediation("Remove the file and add the pattern to .gitignore."),
            );
        }
    }

    Ok(findings)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::scanner::Scanner;
    use std::fs;
    use tempfile::TempDir;

    #[tokio::test]
    async fn test_check_large_files_detects_large_file() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();
        let large_file = root.join("large.bin");

        let large_content = vec![0u8; 11 * 1024 * 1024];
        fs::write(&large_file, large_content).unwrap();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_large_files(&scanner).await.unwrap();

        assert!(!findings.is_empty());
        assert!(findings.iter().any(|f| f.rule_id == "FILE001"));
    }

    #[tokio::test]
    async fn test_check_gitignore_missing() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_gitignore(&scanner).await.unwrap();

        assert!(!findings.is_empty());
        assert!(findings.iter().any(|f| f.rule_id == "FILE002"));
    }

    #[tokio::test]
    async fn test_check_gitignore_missing_recommended_entries() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();
        let gitignore = root.join(".gitignore");

        fs::write(&gitignore, "node_modules/").unwrap();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_gitignore(&scanner).await.unwrap();

        assert!(findings.iter().any(|f| f.rule_id == "FILE003"));
    }

    #[tokio::test]
    async fn test_check_temp_files_detects_tmp() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();
        let tmp_file = root.join("temp.tmp");

        fs::write(&tmp_file, "temporary content").unwrap();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_temp_files(&scanner).await.unwrap();

        assert!(!findings.is_empty());
        assert!(findings.iter().any(|f| f.rule_id == "FILE004"));
    }

    #[tokio::test]
    async fn test_check_gitignore_rust_project_no_node_modules() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();
        let gitignore = root.join(".gitignore");
        let cargo_toml = root.join("Cargo.toml");

        // Create a Rust project
        fs::write(
            &cargo_toml,
            "[package]\nname = \"test\"\nversion = \"0.1.0\"",
        )
        .unwrap();
        fs::write(&gitignore, ".env\n*.key\n").unwrap();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_gitignore(&scanner).await.unwrap();

        // Should suggest target/ for Rust
        let target_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains("target/"));
        assert!(
            target_finding.is_some(),
            "Should suggest target/ for Rust projects"
        );

        // Should NOT suggest node_modules for Rust projects
        let node_modules_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains("node_modules"));
        assert!(
            node_modules_finding.is_none(),
            "Should NOT suggest node_modules for Rust projects"
        );
    }

    #[tokio::test]
    async fn test_check_gitignore_javascript_project_suggests_node_modules() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();
        let gitignore = root.join(".gitignore");
        let package_json = root.join("package.json");

        // Create a JavaScript project
        fs::write(
            &package_json,
            "{\"name\": \"test\", \"version\": \"1.0.0\"}",
        )
        .unwrap();
        fs::write(&gitignore, ".env\n*.key\n").unwrap();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_gitignore(&scanner).await.unwrap();

        // Should suggest node_modules/ for JavaScript
        let node_modules_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains("node_modules"));
        assert!(
            node_modules_finding.is_some(),
            "Should suggest node_modules/ for JavaScript projects"
        );

        // Should NOT suggest target/ for JavaScript projects
        let target_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains("target/"));
        assert!(
            target_finding.is_none(),
            "Should NOT suggest target/ for JavaScript projects"
        );
    }

    #[tokio::test]
    async fn test_check_gitignore_universal_entries_always_suggested() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();
        let gitignore = root.join(".gitignore");

        // Create empty .gitignore
        fs::write(&gitignore, "").unwrap();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_gitignore(&scanner).await.unwrap();

        // Should suggest universal entries
        let env_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains(".env"));
        assert!(
            env_finding.is_some(),
            "Should suggest .env (universal entry)"
        );

        let key_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains("*.key"));
        assert!(
            key_finding.is_some(),
            "Should suggest *.key (universal entry)"
        );
    }

    #[tokio::test]
    async fn test_check_gitignore_python_project_suggests_python_entries() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();
        let gitignore = root.join(".gitignore");
        let requirements_txt = root.join("requirements.txt");

        // Create a Python project
        fs::write(&requirements_txt, "requests==2.28.0\n").unwrap();
        fs::write(&gitignore, ".env\n").unwrap();

        let scanner = Scanner::new(root.to_path_buf());
        let findings = check_gitignore(&scanner).await.unwrap();

        // Should suggest Python-specific entries
        let pycache_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains("__pycache__"));
        assert!(
            pycache_finding.is_some(),
            "Should suggest __pycache__/ for Python projects"
        );

        // Should NOT suggest node_modules
        let node_modules_finding = findings
            .iter()
            .find(|f| f.rule_id == "FILE003" && f.message.contains("node_modules"));
        assert!(
            node_modules_finding.is_none(),
            "Should NOT suggest node_modules for Python projects"
        );
    }
}