codelens_core/analyzer/
file.rs

1//! Single file analyzer.
2
3use std::fs;
4use std::path::Path;
5use std::sync::Arc;
6
7use crate::config::Config;
8use crate::error::Result;
9use crate::language::{Language, LanguageRegistry};
10
11use super::complexity::ComplexityAnalyzer;
12use super::stats::{FileStats, LineStats};
13
14/// Analyzes individual source files.
15pub struct FileAnalyzer {
16    registry: Arc<LanguageRegistry>,
17    complexity_analyzer: ComplexityAnalyzer,
18    min_lines: Option<usize>,
19    max_lines: Option<usize>,
20}
21
22impl FileAnalyzer {
23    /// Create a new file analyzer.
24    pub fn new(registry: Arc<LanguageRegistry>, config: &Config) -> Self {
25        Self {
26            registry,
27            complexity_analyzer: ComplexityAnalyzer::new(),
28            min_lines: config.filter.min_lines,
29            max_lines: config.filter.max_lines,
30        }
31    }
32
33    /// Analyze a single file.
34    ///
35    /// Returns `None` if the file's language is not recognized.
36    pub fn analyze(&self, path: &Path) -> Result<Option<FileStats>> {
37        // Detect language
38        let language = match self.registry.detect(path) {
39            Some(lang) => lang,
40            None => return Ok(None),
41        };
42
43        // Read file content
44        let content = match fs::read_to_string(path) {
45            Ok(c) => c,
46            Err(_) => {
47                // Try reading as lossy UTF-8
48                match fs::read(path) {
49                    Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
50                    Err(e) => {
51                        return Err(crate::error::Error::FileRead {
52                            path: path.to_path_buf(),
53                            source: e,
54                        })
55                    }
56                }
57            }
58        };
59
60        // Count lines
61        let lines = self.count_lines(&content, &language);
62
63        // Apply line filters
64        if let Some(min) = self.min_lines {
65            if lines.total < min {
66                return Ok(None);
67            }
68        }
69        if let Some(max) = self.max_lines {
70            if lines.total > max {
71                return Ok(None);
72            }
73        }
74
75        // Get file size
76        let size = fs::metadata(path).map(|m| m.len()).unwrap_or(0);
77
78        // Analyze complexity
79        let complexity = self.complexity_analyzer.analyze(&content, &language);
80
81        Ok(Some(FileStats {
82            path: path.to_path_buf(),
83            language: language.name.clone(),
84            lines,
85            size,
86            complexity,
87        }))
88    }
89
90    /// Count lines in file content.
91    fn count_lines(&self, content: &str, lang: &Language) -> LineStats {
92        let mut stats = LineStats::default();
93        let mut in_block_comment = false;
94        let mut block_comment_end = "";
95
96        for line in content.lines() {
97            stats.total += 1;
98            let trimmed = line.trim();
99
100            // Empty line
101            if trimmed.is_empty() {
102                stats.blank += 1;
103                continue;
104            }
105
106            // Inside block comment
107            if in_block_comment {
108                stats.comment += 1;
109                if let Some(pos) = trimmed.find(block_comment_end) {
110                    // Check if there's code after the comment end
111                    let after = trimmed[pos + block_comment_end.len()..].trim();
112                    if !after.is_empty() && !self.starts_with_comment(after, lang) {
113                        // Line has code after comment - count as code too
114                        // But we already counted as comment, so adjust
115                        stats.comment -= 1;
116                        stats.code += 1;
117                    }
118                    in_block_comment = false;
119                }
120                continue;
121            }
122
123            // Check for block comment start
124            let mut found_block_start = false;
125            for (start, end) in &lang.block_comments {
126                if let Some(start_pos) = trimmed.find(start.as_str()) {
127                    // Check if it's inside a string (simplified check)
128                    let before = &trimmed[..start_pos];
129                    if self.is_in_string(before, lang) {
130                        continue;
131                    }
132
133                    found_block_start = true;
134                    let after_start = &trimmed[start_pos + start.len()..];
135
136                    if let Some(end_pos) = after_start.find(end.as_str()) {
137                        // Single-line block comment
138                        let after_end = after_start[end_pos + end.len()..].trim();
139                        if before.trim().is_empty() && after_end.is_empty() {
140                            stats.comment += 1;
141                        } else {
142                            // Mixed line - count as code
143                            stats.code += 1;
144                        }
145                    } else {
146                        // Multi-line block comment starts
147                        in_block_comment = true;
148                        block_comment_end = end;
149                        if before.trim().is_empty() {
150                            stats.comment += 1;
151                        } else {
152                            // Code before comment start
153                            stats.code += 1;
154                        }
155                    }
156                    break;
157                }
158            }
159
160            if found_block_start {
161                continue;
162            }
163
164            // Check for line comment
165            let is_line_comment = lang
166                .line_comments
167                .iter()
168                .any(|prefix| trimmed.starts_with(prefix.as_str()));
169
170            if is_line_comment {
171                stats.comment += 1;
172            } else {
173                stats.code += 1;
174            }
175        }
176
177        stats
178    }
179
180    /// Check if a string position is likely inside a string literal.
181    fn is_in_string(&self, text: &str, _lang: &Language) -> bool {
182        // Simplified check: count unescaped quotes
183        let mut in_string = false;
184        let mut chars = text.chars().peekable();
185
186        while let Some(c) = chars.next() {
187            match c {
188                '"' | '\'' => {
189                    in_string = !in_string;
190                }
191                '\\' => {
192                    // Skip escaped character
193                    chars.next();
194                }
195                _ => {}
196            }
197        }
198
199        in_string
200    }
201
202    /// Check if text starts with a comment.
203    fn starts_with_comment(&self, text: &str, lang: &Language) -> bool {
204        lang.line_comments
205            .iter()
206            .any(|prefix| text.starts_with(prefix.as_str()))
207            || lang
208                .block_comments
209                .iter()
210                .any(|(start, _)| text.starts_with(start.as_str()))
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    fn make_rust_lang() -> Language {
219        Language {
220            name: "Rust".to_string(),
221            extensions: vec![".rs".to_string()],
222            filenames: vec![],
223            line_comments: vec!["//".to_string()],
224            block_comments: vec![("/*".to_string(), "*/".to_string())],
225            string_delimiters: vec![],
226            function_pattern: None,
227            complexity_keywords: vec![],
228            nested_comments: true,
229        }
230    }
231
232    #[test]
233    fn test_count_lines_basic() {
234        let lang = make_rust_lang();
235        let registry = Arc::new(LanguageRegistry::empty());
236        let analyzer = FileAnalyzer::new(registry, &Config::default());
237
238        let content = "fn main() {\n    println!(\"hello\");\n}\n";
239        let stats = analyzer.count_lines(content, &lang);
240        assert_eq!(stats.total, 3);
241        assert_eq!(stats.code, 3);
242        assert_eq!(stats.blank, 0);
243        assert_eq!(stats.comment, 0);
244    }
245
246    #[test]
247    fn test_count_lines_with_comments() {
248        let lang = make_rust_lang();
249        let registry = Arc::new(LanguageRegistry::empty());
250        let analyzer = FileAnalyzer::new(registry, &Config::default());
251
252        let content = "// This is a comment\nfn main() {\n    /* block comment */\n    println!(\"hello\");\n}\n";
253        let stats = analyzer.count_lines(content, &lang);
254        assert_eq!(stats.total, 5);
255        assert_eq!(stats.code, 3);
256        assert_eq!(stats.comment, 2);
257        assert_eq!(stats.blank, 0);
258    }
259
260    #[test]
261    fn test_count_lines_multiline_comment() {
262        let lang = make_rust_lang();
263        let registry = Arc::new(LanguageRegistry::empty());
264        let analyzer = FileAnalyzer::new(registry, &Config::default());
265
266        let content = "/*\n * Multi-line\n * comment\n */\nfn main() {}\n";
267        let stats = analyzer.count_lines(content, &lang);
268        assert_eq!(stats.total, 5);
269        assert_eq!(stats.code, 1);
270        assert_eq!(stats.comment, 4);
271        assert_eq!(stats.blank, 0);
272    }
273}