Skip to main content

codelens_core/analyzer/
file.rs

1//! Single file analyzer.
2
3use std::fs;
4use std::path::Path;
5use std::sync::Arc;
6
7use crate::config::Config;
8use crate::error::Result;
9use crate::language::LanguageRegistry;
10
11use super::complexity::ComplexityAnalyzer;
12use super::counter;
13use super::stats::{FileStats, LineStats};
14
15/// Maximum bytes to inspect for binary detection.
16const BINARY_CHECK_LEN: usize = 10 * 1024;
17
18/// Analyzes individual source files.
19pub struct FileAnalyzer {
20    registry: Arc<LanguageRegistry>,
21    complexity_analyzer: ComplexityAnalyzer,
22    min_lines: Option<usize>,
23    max_lines: Option<usize>,
24}
25
26impl FileAnalyzer {
27    /// Create a new file analyzer.
28    pub fn new(registry: Arc<LanguageRegistry>, config: &Config) -> Self {
29        Self {
30            registry,
31            complexity_analyzer: ComplexityAnalyzer::new(),
32            min_lines: config.filter.min_lines,
33            max_lines: config.filter.max_lines,
34        }
35    }
36
37    /// Analyze a single file.
38    ///
39    /// Returns `None` if the file's language is not recognized or the file is binary.
40    pub fn analyze(&self, path: &Path) -> Result<Option<FileStats>> {
41        let content = match fs::read(path) {
42            Ok(bytes) => bytes,
43            Err(e) => {
44                return Err(crate::error::Error::FileRead {
45                    path: path.to_path_buf(),
46                    source: e,
47                })
48            }
49        };
50
51        self.analyze_from_bytes(path, &content)
52    }
53
54    /// Analyze a file from pre-read bytes (for buffer reuse).
55    ///
56    /// Returns `None` if the file's language is not recognized or the file is binary.
57    pub fn analyze_from_bytes(&self, path: &Path, content: &[u8]) -> Result<Option<FileStats>> {
58        // Detect language
59        let language = match self.registry.detect(path) {
60            Some(lang) => lang,
61            None => return Ok(None),
62        };
63
64        // Detect binary files: check first 10KB for null bytes
65        let check_len = content.len().min(BINARY_CHECK_LEN);
66        if content[..check_len].contains(&0) {
67            return Ok(None);
68        }
69
70        // Count lines using byte-level state machine
71        let (trie, mask) = language.tokens();
72        let lines: LineStats = counter::count_stats(content, trie, *mask);
73
74        // Apply line filters
75        if let Some(min) = self.min_lines {
76            if lines.total < min {
77                return Ok(None);
78            }
79        }
80        if let Some(max) = self.max_lines {
81            if lines.total > max {
82                return Ok(None);
83            }
84        }
85
86        // File size from content length
87        let size = content.len() as u64;
88
89        // Analyze complexity (needs string representation)
90        let text = String::from_utf8_lossy(content);
91        let complexity = self.complexity_analyzer.analyze(&text, &language);
92
93        Ok(Some(FileStats {
94            path: path.to_path_buf(),
95            language: language.name.clone(),
96            lines,
97            size,
98            complexity,
99        }))
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use std::io::Write;
107    use tempfile::NamedTempFile;
108
109    fn make_rust_registry() -> Arc<LanguageRegistry> {
110        let mut registry = LanguageRegistry::empty();
111        registry
112            .load_toml(
113                r#"
114                [rust]
115                name = "Rust"
116                extensions = [".rs"]
117                line_comments = ["//"]
118                block_comments = [["/*", "*/"]]
119                nested_comments = true
120            "#,
121            )
122            .unwrap();
123        Arc::new(registry)
124    }
125
126    #[test]
127    fn test_analyze_from_bytes_basic_rust() {
128        let registry = make_rust_registry();
129        let analyzer = FileAnalyzer::new(registry, &Config::default());
130
131        let content = b"fn main() {\n    println!(\"hello\");\n}\n";
132        let path = Path::new("test.rs");
133        let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
134
135        assert_eq!(result.lines.total, 3);
136        assert_eq!(result.lines.code, 3);
137        assert_eq!(result.lines.blank, 0);
138        assert_eq!(result.lines.comment, 0);
139        assert_eq!(result.language, "Rust");
140        assert_eq!(result.size, content.len() as u64);
141    }
142
143    #[test]
144    fn test_analyze_from_bytes_with_comments() {
145        let registry = make_rust_registry();
146        let analyzer = FileAnalyzer::new(registry, &Config::default());
147
148        let content = b"// This is a comment\nfn main() {\n    println!(\"hello\");\n}\n";
149        let path = Path::new("test.rs");
150        let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
151
152        assert_eq!(result.lines.total, 4);
153        assert_eq!(result.lines.code, 3);
154        assert_eq!(result.lines.comment, 1);
155    }
156
157    #[test]
158    fn test_analyze_from_bytes_multiline_block_comment() {
159        let registry = make_rust_registry();
160        let analyzer = FileAnalyzer::new(registry, &Config::default());
161
162        let content = b"/*\n * Multi-line\n * comment\n */\nfn main() {}\n";
163        let path = Path::new("test.rs");
164        let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
165
166        assert_eq!(result.lines.total, 5);
167        assert_eq!(result.lines.code, 1);
168        assert_eq!(result.lines.comment, 4);
169    }
170
171    #[test]
172    fn test_analyze_from_bytes_returns_none_for_unknown_language() {
173        let registry = make_rust_registry();
174        let analyzer = FileAnalyzer::new(registry, &Config::default());
175
176        let content = b"some content";
177        let path = Path::new("test.xyz");
178        let result = analyzer.analyze_from_bytes(path, content).unwrap();
179
180        assert!(result.is_none());
181    }
182
183    #[test]
184    fn test_analyze_from_bytes_detects_binary() {
185        let registry = make_rust_registry();
186        let analyzer = FileAnalyzer::new(registry, &Config::default());
187
188        let mut content = b"fn main() {}\n".to_vec();
189        content.push(0); // null byte makes it binary
190        let path = Path::new("test.rs");
191        let result = analyzer.analyze_from_bytes(path, &content).unwrap();
192
193        assert!(result.is_none());
194    }
195
196    #[test]
197    fn test_analyze_from_bytes_complexity() {
198        let mut registry = LanguageRegistry::empty();
199        registry
200            .load_toml(
201                r#"
202                [rust]
203                name = "Rust"
204                extensions = [".rs"]
205                line_comments = ["//"]
206                block_comments = [["/*", "*/"]]
207                nested_comments = true
208                function_pattern = '(?m)^\s*(pub\s+)?(async\s+)?fn\s+\w+'
209                complexity_keywords = ["if", "for"]
210            "#,
211            )
212            .unwrap();
213        let registry = Arc::new(registry);
214        let analyzer = FileAnalyzer::new(registry, &Config::default());
215
216        let content = b"fn main() {\n    if true {\n        for i in 0..10 {}\n    }\n}\n";
217        let path = Path::new("test.rs");
218        let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
219
220        assert_eq!(result.complexity.functions, 1);
221        assert!(result.complexity.cyclomatic >= 3); // 1 fn + 1 if + 1 for
222    }
223
224    #[test]
225    fn test_analyze_from_bytes_line_filter_min() {
226        let registry = make_rust_registry();
227        let mut config = Config::default();
228        config.filter.min_lines = Some(10);
229        let analyzer = FileAnalyzer::new(registry, &config);
230
231        let content = b"fn main() {}\n";
232        let path = Path::new("test.rs");
233        let result = analyzer.analyze_from_bytes(path, content).unwrap();
234
235        assert!(
236            result.is_none(),
237            "File with 1 line should be filtered by min_lines=10"
238        );
239    }
240
241    #[test]
242    fn test_analyze_from_bytes_line_filter_max() {
243        let registry = make_rust_registry();
244        let mut config = Config::default();
245        config.filter.max_lines = Some(1);
246        let analyzer = FileAnalyzer::new(registry, &config);
247
248        let content = b"fn main() {\n    println!(\"hello\");\n}\n";
249        let path = Path::new("test.rs");
250        let result = analyzer.analyze_from_bytes(path, content).unwrap();
251
252        assert!(
253            result.is_none(),
254            "File with 3 lines should be filtered by max_lines=1"
255        );
256    }
257
258    #[test]
259    fn test_analyze_reads_file_from_disk() {
260        let registry = make_rust_registry();
261        let analyzer = FileAnalyzer::new(registry, &Config::default());
262
263        let mut tmp = NamedTempFile::with_suffix(".rs").unwrap();
264        writeln!(tmp, "fn main() {{}}").unwrap();
265
266        let result = analyzer.analyze(tmp.path()).unwrap().unwrap();
267        assert_eq!(result.lines.total, 1);
268        assert_eq!(result.lines.code, 1);
269        assert_eq!(result.size, 13); // "fn main() {}\n" is 13 bytes
270    }
271
272    #[test]
273    fn test_analyze_delegates_to_analyze_from_bytes() {
274        let registry = make_rust_registry();
275        let analyzer = FileAnalyzer::new(registry, &Config::default());
276
277        let mut tmp = NamedTempFile::with_suffix(".rs").unwrap();
278        let content = b"// comment\nfn main() {}\n";
279        tmp.write_all(content).unwrap();
280
281        let from_disk = analyzer.analyze(tmp.path()).unwrap().unwrap();
282        let from_bytes = analyzer
283            .analyze_from_bytes(tmp.path(), content)
284            .unwrap()
285            .unwrap();
286
287        assert_eq!(from_disk.lines, from_bytes.lines);
288        assert_eq!(from_disk.size, from_bytes.size);
289        assert_eq!(from_disk.language, from_bytes.language);
290    }
291}