codelens_core/analyzer/
file.rs1use std::fs;
4use std::path::Path;
5use std::sync::Arc;
6
7use crate::config::Config;
8use crate::error::Result;
9use crate::language::LanguageRegistry;
10
11use super::complexity::ComplexityAnalyzer;
12use super::counter;
13use super::stats::{FileStats, LineStats};
14
15const BINARY_CHECK_LEN: usize = 10 * 1024;
17
18pub struct FileAnalyzer {
20 registry: Arc<LanguageRegistry>,
21 complexity_analyzer: ComplexityAnalyzer,
22 min_lines: Option<usize>,
23 max_lines: Option<usize>,
24}
25
26impl FileAnalyzer {
27 pub fn new(registry: Arc<LanguageRegistry>, config: &Config) -> Self {
29 Self {
30 registry,
31 complexity_analyzer: ComplexityAnalyzer::new(),
32 min_lines: config.filter.min_lines,
33 max_lines: config.filter.max_lines,
34 }
35 }
36
37 pub fn analyze(&self, path: &Path) -> Result<Option<FileStats>> {
41 let content = match fs::read(path) {
42 Ok(bytes) => bytes,
43 Err(e) => {
44 return Err(crate::error::Error::FileRead {
45 path: path.to_path_buf(),
46 source: e,
47 })
48 }
49 };
50
51 self.analyze_from_bytes(path, &content)
52 }
53
54 pub fn analyze_from_bytes(&self, path: &Path, content: &[u8]) -> Result<Option<FileStats>> {
58 let language = match self.registry.detect(path) {
60 Some(lang) => lang,
61 None => return Ok(None),
62 };
63
64 let check_len = content.len().min(BINARY_CHECK_LEN);
66 if content[..check_len].contains(&0) {
67 return Ok(None);
68 }
69
70 let (trie, mask) = language.tokens();
72 let lines: LineStats = counter::count_stats(content, trie, *mask);
73
74 if let Some(min) = self.min_lines {
76 if lines.total < min {
77 return Ok(None);
78 }
79 }
80 if let Some(max) = self.max_lines {
81 if lines.total > max {
82 return Ok(None);
83 }
84 }
85
86 let size = content.len() as u64;
88
89 let text = String::from_utf8_lossy(content);
91 let complexity = self.complexity_analyzer.analyze(&text, &language);
92
93 Ok(Some(FileStats {
94 path: path.to_path_buf(),
95 language: language.name.clone(),
96 lines,
97 size,
98 complexity,
99 }))
100 }
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106 use std::io::Write;
107 use tempfile::NamedTempFile;
108
109 fn make_rust_registry() -> Arc<LanguageRegistry> {
110 let mut registry = LanguageRegistry::empty();
111 registry
112 .load_toml(
113 r#"
114 [rust]
115 name = "Rust"
116 extensions = [".rs"]
117 line_comments = ["//"]
118 block_comments = [["/*", "*/"]]
119 nested_comments = true
120 "#,
121 )
122 .unwrap();
123 Arc::new(registry)
124 }
125
126 #[test]
127 fn test_analyze_from_bytes_basic_rust() {
128 let registry = make_rust_registry();
129 let analyzer = FileAnalyzer::new(registry, &Config::default());
130
131 let content = b"fn main() {\n println!(\"hello\");\n}\n";
132 let path = Path::new("test.rs");
133 let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
134
135 assert_eq!(result.lines.total, 3);
136 assert_eq!(result.lines.code, 3);
137 assert_eq!(result.lines.blank, 0);
138 assert_eq!(result.lines.comment, 0);
139 assert_eq!(result.language, "Rust");
140 assert_eq!(result.size, content.len() as u64);
141 }
142
143 #[test]
144 fn test_analyze_from_bytes_with_comments() {
145 let registry = make_rust_registry();
146 let analyzer = FileAnalyzer::new(registry, &Config::default());
147
148 let content = b"// This is a comment\nfn main() {\n println!(\"hello\");\n}\n";
149 let path = Path::new("test.rs");
150 let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
151
152 assert_eq!(result.lines.total, 4);
153 assert_eq!(result.lines.code, 3);
154 assert_eq!(result.lines.comment, 1);
155 }
156
157 #[test]
158 fn test_analyze_from_bytes_multiline_block_comment() {
159 let registry = make_rust_registry();
160 let analyzer = FileAnalyzer::new(registry, &Config::default());
161
162 let content = b"/*\n * Multi-line\n * comment\n */\nfn main() {}\n";
163 let path = Path::new("test.rs");
164 let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
165
166 assert_eq!(result.lines.total, 5);
167 assert_eq!(result.lines.code, 1);
168 assert_eq!(result.lines.comment, 4);
169 }
170
171 #[test]
172 fn test_analyze_from_bytes_returns_none_for_unknown_language() {
173 let registry = make_rust_registry();
174 let analyzer = FileAnalyzer::new(registry, &Config::default());
175
176 let content = b"some content";
177 let path = Path::new("test.xyz");
178 let result = analyzer.analyze_from_bytes(path, content).unwrap();
179
180 assert!(result.is_none());
181 }
182
183 #[test]
184 fn test_analyze_from_bytes_detects_binary() {
185 let registry = make_rust_registry();
186 let analyzer = FileAnalyzer::new(registry, &Config::default());
187
188 let mut content = b"fn main() {}\n".to_vec();
189 content.push(0); let path = Path::new("test.rs");
191 let result = analyzer.analyze_from_bytes(path, &content).unwrap();
192
193 assert!(result.is_none());
194 }
195
196 #[test]
197 fn test_analyze_from_bytes_complexity() {
198 let mut registry = LanguageRegistry::empty();
199 registry
200 .load_toml(
201 r#"
202 [rust]
203 name = "Rust"
204 extensions = [".rs"]
205 line_comments = ["//"]
206 block_comments = [["/*", "*/"]]
207 nested_comments = true
208 function_pattern = '(?m)^\s*(pub\s+)?(async\s+)?fn\s+\w+'
209 complexity_keywords = ["if", "for"]
210 "#,
211 )
212 .unwrap();
213 let registry = Arc::new(registry);
214 let analyzer = FileAnalyzer::new(registry, &Config::default());
215
216 let content = b"fn main() {\n if true {\n for i in 0..10 {}\n }\n}\n";
217 let path = Path::new("test.rs");
218 let result = analyzer.analyze_from_bytes(path, content).unwrap().unwrap();
219
220 assert_eq!(result.complexity.functions, 1);
221 assert!(result.complexity.cyclomatic >= 3); }
223
224 #[test]
225 fn test_analyze_from_bytes_line_filter_min() {
226 let registry = make_rust_registry();
227 let mut config = Config::default();
228 config.filter.min_lines = Some(10);
229 let analyzer = FileAnalyzer::new(registry, &config);
230
231 let content = b"fn main() {}\n";
232 let path = Path::new("test.rs");
233 let result = analyzer.analyze_from_bytes(path, content).unwrap();
234
235 assert!(
236 result.is_none(),
237 "File with 1 line should be filtered by min_lines=10"
238 );
239 }
240
241 #[test]
242 fn test_analyze_from_bytes_line_filter_max() {
243 let registry = make_rust_registry();
244 let mut config = Config::default();
245 config.filter.max_lines = Some(1);
246 let analyzer = FileAnalyzer::new(registry, &config);
247
248 let content = b"fn main() {\n println!(\"hello\");\n}\n";
249 let path = Path::new("test.rs");
250 let result = analyzer.analyze_from_bytes(path, content).unwrap();
251
252 assert!(
253 result.is_none(),
254 "File with 3 lines should be filtered by max_lines=1"
255 );
256 }
257
258 #[test]
259 fn test_analyze_reads_file_from_disk() {
260 let registry = make_rust_registry();
261 let analyzer = FileAnalyzer::new(registry, &Config::default());
262
263 let mut tmp = NamedTempFile::with_suffix(".rs").unwrap();
264 writeln!(tmp, "fn main() {{}}").unwrap();
265
266 let result = analyzer.analyze(tmp.path()).unwrap().unwrap();
267 assert_eq!(result.lines.total, 1);
268 assert_eq!(result.lines.code, 1);
269 assert_eq!(result.size, 13); }
271
272 #[test]
273 fn test_analyze_delegates_to_analyze_from_bytes() {
274 let registry = make_rust_registry();
275 let analyzer = FileAnalyzer::new(registry, &Config::default());
276
277 let mut tmp = NamedTempFile::with_suffix(".rs").unwrap();
278 let content = b"// comment\nfn main() {}\n";
279 tmp.write_all(content).unwrap();
280
281 let from_disk = analyzer.analyze(tmp.path()).unwrap().unwrap();
282 let from_bytes = analyzer
283 .analyze_from_bytes(tmp.path(), content)
284 .unwrap()
285 .unwrap();
286
287 assert_eq!(from_disk.lines, from_bytes.lines);
288 assert_eq!(from_disk.size, from_bytes.size);
289 assert_eq!(from_disk.language, from_bytes.language);
290 }
291}