1use std::fs;
4use std::path::Path;
5use std::sync::Arc;
6
7use crate::config::Config;
8use crate::error::Result;
9use crate::language::{Language, LanguageRegistry};
10
11use super::complexity::ComplexityAnalyzer;
12use super::stats::{FileStats, LineStats};
13
14#[derive(Debug, Clone)]
16struct StringDelimiter {
17 end_pattern: String,
19 is_raw: bool,
21 is_docstring: bool,
23}
24
25pub struct FileAnalyzer {
27 registry: Arc<LanguageRegistry>,
28 complexity_analyzer: ComplexityAnalyzer,
29 min_lines: Option<usize>,
30 max_lines: Option<usize>,
31}
32
33impl FileAnalyzer {
34 pub fn new(registry: Arc<LanguageRegistry>, config: &Config) -> Self {
36 Self {
37 registry,
38 complexity_analyzer: ComplexityAnalyzer::new(),
39 min_lines: config.filter.min_lines,
40 max_lines: config.filter.max_lines,
41 }
42 }
43
44 pub fn analyze(&self, path: &Path) -> Result<Option<FileStats>> {
48 let language = match self.registry.detect(path) {
50 Some(lang) => lang,
51 None => return Ok(None),
52 };
53
54 let content = match fs::read_to_string(path) {
56 Ok(c) => c,
57 Err(_) => {
58 match fs::read(path) {
60 Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
61 Err(e) => {
62 return Err(crate::error::Error::FileRead {
63 path: path.to_path_buf(),
64 source: e,
65 })
66 }
67 }
68 }
69 };
70
71 let lines = self.count_lines(&content, &language);
73
74 if let Some(min) = self.min_lines {
76 if lines.total < min {
77 return Ok(None);
78 }
79 }
80 if let Some(max) = self.max_lines {
81 if lines.total > max {
82 return Ok(None);
83 }
84 }
85
86 let size = fs::metadata(path).map(|m| m.len()).unwrap_or(0);
88
89 let complexity = self.complexity_analyzer.analyze(&content, &language);
91
92 Ok(Some(FileStats {
93 path: path.to_path_buf(),
94 language: language.name.clone(),
95 lines,
96 size,
97 complexity,
98 }))
99 }
100
101 fn count_lines(&self, content: &str, lang: &Language) -> LineStats {
103 let mut stats = LineStats::default();
104 let mut in_block_comment = false;
105 let mut block_comment_end = "";
106 let mut in_multiline_string = false;
107 let mut string_delimiter: Option<StringDelimiter> = None;
108
109 for line in content.lines() {
110 stats.total += 1;
111 let trimmed = line.trim();
112
113 if trimmed.is_empty() {
115 stats.blank += 1;
116 continue;
117 }
118
119 if in_multiline_string {
121 if let Some(ref delim) = string_delimiter {
122 if delim.is_docstring {
124 stats.comment += 1;
125 } else {
126 stats.code += 1;
127 }
128 if self.line_ends_string(line, delim) {
129 in_multiline_string = false;
130 string_delimiter = None;
131 }
132 }
133 continue;
134 }
135
136 if in_block_comment {
138 stats.comment += 1;
139 if let Some(pos) = trimmed.find(block_comment_end) {
140 let after = trimmed[pos + block_comment_end.len()..].trim();
142 if !after.is_empty() && !self.starts_with_comment(after, lang) {
143 stats.comment -= 1;
146 stats.code += 1;
147 }
148 in_block_comment = false;
149 }
150 continue;
151 }
152
153 if let Some(delim) = self.starts_multiline_string(line, lang) {
156 if delim.is_docstring {
158 stats.comment += 1;
159 } else {
160 stats.code += 1;
161 }
162 in_multiline_string = true;
163 string_delimiter = Some(delim);
164 continue;
165 }
166
167 if lang.name == "Python" {
169 if let Some(is_docstring) = self.is_single_line_docstring(trimmed) {
170 if is_docstring {
171 stats.comment += 1;
172 } else {
173 stats.code += 1;
174 }
175 continue;
176 }
177 }
178
179 let mut found_block_start = false;
181 for (start, end) in &lang.block_comments {
182 if let Some(start_pos) = trimmed.find(start.as_str()) {
183 let before = &trimmed[..start_pos];
185 if self.is_in_string(before, lang) {
186 continue;
187 }
188
189 found_block_start = true;
190 let after_start = &trimmed[start_pos + start.len()..];
191
192 if let Some(end_pos) = after_start.find(end.as_str()) {
193 let after_end = after_start[end_pos + end.len()..].trim();
195 if before.trim().is_empty() && after_end.is_empty() {
196 stats.comment += 1;
197 } else {
198 stats.code += 1;
200 }
201 } else {
202 in_block_comment = true;
204 block_comment_end = end;
205 if before.trim().is_empty() {
206 stats.comment += 1;
207 } else {
208 stats.code += 1;
210 }
211 }
212 break;
213 }
214 }
215
216 if found_block_start {
217 continue;
218 }
219
220 let is_line_comment = lang
222 .line_comments
223 .iter()
224 .any(|prefix| trimmed.starts_with(prefix.as_str()));
225
226 if is_line_comment {
227 stats.comment += 1;
228 } else {
229 stats.code += 1;
230 }
231 }
232
233 stats
234 }
235
236 fn starts_multiline_string(&self, line: &str, lang: &Language) -> Option<StringDelimiter> {
239 if lang.name == "Rust" {
241 if let Some(delim) = self.detect_rust_raw_string_start(line) {
242 return Some(delim);
243 }
244 }
245
246 if lang.name == "Python" {
248 for pattern in &["\"\"\"", "'''"] {
249 if let Some(pos) = line.find(pattern) {
250 let before = &line[..pos];
251 if !self.is_in_string(before, lang) {
252 let after = &line[pos + 3..];
253 if after.find(pattern).is_none() {
255 let is_docstring = !before.contains('=');
257 return Some(StringDelimiter {
258 end_pattern: pattern.to_string(),
259 is_raw: false,
260 is_docstring,
261 });
262 }
263 }
264 }
265 }
266 }
267
268 let mut in_string = false;
270 let mut string_char = '"';
271 let mut escape_next = false;
272
273 let chars: Vec<char> = line.chars().collect();
274 let mut i = 0;
275 while i < chars.len() {
276 let c = chars[i];
277
278 if escape_next {
279 escape_next = false;
280 i += 1;
281 continue;
282 }
283
284 if c == '\\' && in_string {
285 escape_next = true;
286 i += 1;
287 continue;
288 }
289
290 if (c == '"' || c == '\'') && !in_string {
291 let byte_pos: usize = chars[..i].iter().map(|ch| ch.len_utf8()).sum();
293 let before = &line[..byte_pos];
294 if !self.is_in_string(before, lang) {
295 in_string = true;
296 string_char = c;
297 }
298 } else if c == string_char && in_string {
299 in_string = false;
300 }
301
302 i += 1;
303 }
304
305 if in_string {
306 return Some(StringDelimiter {
307 end_pattern: string_char.to_string(),
308 is_raw: false,
309 is_docstring: false,
310 });
311 }
312
313 None
314 }
315
316 fn detect_rust_raw_string_start(&self, line: &str) -> Option<StringDelimiter> {
318 let bytes = line.as_bytes();
319 let len = bytes.len();
320 let mut i = 0;
321
322 while i < len {
323 if bytes[i] == b'r' && i + 1 < len {
325 let start = i;
326 i += 1;
327
328 let mut hash_count = 0;
330 while i < len && bytes[i] == b'#' {
331 hash_count += 1;
332 i += 1;
333 }
334
335 if i < len && bytes[i] == b'"' {
337 if start == 0 || !bytes[start - 1].is_ascii_alphanumeric() {
339 let end_pattern = format!("\"{}", "#".repeat(hash_count));
341
342 let after_quote = &line[i + 1..];
344 if after_quote.find(&end_pattern).is_none() {
345 return Some(StringDelimiter {
346 end_pattern,
347 is_raw: true,
348 is_docstring: false,
349 });
350 }
351 }
352 }
353 }
354 i += 1;
355 }
356
357 None
358 }
359
360 fn line_ends_string(&self, line: &str, delim: &StringDelimiter) -> bool {
362 if delim.is_raw {
363 line.contains(&delim.end_pattern)
365 } else {
366 let mut chars = line.chars().peekable();
368 let target: Vec<char> = delim.end_pattern.chars().collect();
369
370 while let Some(c) = chars.next() {
371 if c == '\\' {
372 chars.next();
374 continue;
375 }
376
377 if !target.is_empty() && c == target[0] {
378 let mut matched = true;
380 for expected in target.iter().skip(1) {
381 if chars.next() != Some(*expected) {
382 matched = false;
383 break;
384 }
385 }
386 if matched {
387 return true;
388 }
389 }
390 }
391 false
392 }
393 }
394
395 fn is_single_line_docstring(&self, trimmed: &str) -> Option<bool> {
399 for pattern in &["\"\"\"", "'''"] {
400 if let Some(start_pos) = trimmed.find(pattern) {
401 let after_start = &trimmed[start_pos + 3..];
402 if let Some(end_pos) = after_start.find(pattern) {
404 let after_end = after_start[end_pos + 3..].trim();
406 if after_end.is_empty() || after_end.starts_with('#') {
407 let before = &trimmed[..start_pos];
409 return Some(!before.contains('='));
411 }
412 }
413 }
414 }
415 None
416 }
417
418 fn is_in_string(&self, text: &str, _lang: &Language) -> bool {
420 let mut in_string = false;
422 let mut chars = text.chars().peekable();
423
424 while let Some(c) = chars.next() {
425 match c {
426 '"' | '\'' => {
427 in_string = !in_string;
428 }
429 '\\' => {
430 chars.next();
432 }
433 _ => {}
434 }
435 }
436
437 in_string
438 }
439
440 fn starts_with_comment(&self, text: &str, lang: &Language) -> bool {
442 lang.line_comments
443 .iter()
444 .any(|prefix| text.starts_with(prefix.as_str()))
445 || lang
446 .block_comments
447 .iter()
448 .any(|(start, _)| text.starts_with(start.as_str()))
449 }
450}
451
452#[cfg(test)]
453mod tests {
454 use super::*;
455
456 fn make_rust_lang() -> Language {
457 Language {
458 name: "Rust".to_string(),
459 extensions: vec![".rs".to_string()],
460 filenames: vec![],
461 line_comments: vec!["//".to_string()],
462 block_comments: vec![("/*".to_string(), "*/".to_string())],
463 string_delimiters: vec![],
464 function_pattern: None,
465 complexity_keywords: vec![],
466 nested_comments: true,
467 }
468 }
469
470 #[test]
471 fn test_count_lines_basic() {
472 let lang = make_rust_lang();
473 let registry = Arc::new(LanguageRegistry::empty());
474 let analyzer = FileAnalyzer::new(registry, &Config::default());
475
476 let content = "fn main() {\n println!(\"hello\");\n}\n";
477 let stats = analyzer.count_lines(content, &lang);
478 assert_eq!(stats.total, 3);
479 assert_eq!(stats.code, 3);
480 assert_eq!(stats.blank, 0);
481 assert_eq!(stats.comment, 0);
482 }
483
484 #[test]
485 fn test_count_lines_with_comments() {
486 let lang = make_rust_lang();
487 let registry = Arc::new(LanguageRegistry::empty());
488 let analyzer = FileAnalyzer::new(registry, &Config::default());
489
490 let content = "// This is a comment\nfn main() {\n /* block comment */\n println!(\"hello\");\n}\n";
491 let stats = analyzer.count_lines(content, &lang);
492 assert_eq!(stats.total, 5);
493 assert_eq!(stats.code, 3);
494 assert_eq!(stats.comment, 2);
495 assert_eq!(stats.blank, 0);
496 }
497
498 #[test]
499 fn test_count_lines_multiline_comment() {
500 let lang = make_rust_lang();
501 let registry = Arc::new(LanguageRegistry::empty());
502 let analyzer = FileAnalyzer::new(registry, &Config::default());
503
504 let content = "/*\n * Multi-line\n * comment\n */\nfn main() {}\n";
505 let stats = analyzer.count_lines(content, &lang);
506 assert_eq!(stats.total, 5);
507 assert_eq!(stats.code, 1);
508 assert_eq!(stats.comment, 4);
509 assert_eq!(stats.blank, 0);
510 }
511
512 #[test]
513 fn test_count_lines_multiline_string() {
514 let lang = make_rust_lang();
515 let registry = Arc::new(LanguageRegistry::empty());
516 let analyzer = FileAnalyzer::new(registry, &Config::default());
517
518 let content = "let s = \"hello\n// not a comment\nworld\";\n";
520 let stats = analyzer.count_lines(content, &lang);
521 assert_eq!(stats.total, 3);
522 assert_eq!(stats.code, 3, "All lines should be code (inside string)");
523 assert_eq!(stats.comment, 0, "No comments - // is inside string");
524 assert_eq!(stats.blank, 0);
525 }
526
527 #[test]
528 fn test_count_lines_raw_string() {
529 let lang = make_rust_lang();
530 let registry = Arc::new(LanguageRegistry::empty());
531 let analyzer = FileAnalyzer::new(registry, &Config::default());
532
533 let content = "let s = r#\"hello\n// not a comment\n/* also not */\nworld\"#;\n";
535 let stats = analyzer.count_lines(content, &lang);
536 assert_eq!(stats.total, 4);
537 assert_eq!(stats.code, 4, "All lines should be code (inside raw string)");
538 assert_eq!(stats.comment, 0, "No comments - everything is inside raw string");
539 assert_eq!(stats.blank, 0);
540 }
541}