codelens_core/analyzer/
file.rs1use std::fs;
4use std::path::Path;
5use std::sync::Arc;
6
7use crate::config::Config;
8use crate::error::Result;
9use crate::language::{Language, LanguageRegistry};
10
11use super::complexity::ComplexityAnalyzer;
12use super::stats::{FileStats, LineStats};
13
14pub struct FileAnalyzer {
16 registry: Arc<LanguageRegistry>,
17 complexity_analyzer: ComplexityAnalyzer,
18 min_lines: Option<usize>,
19 max_lines: Option<usize>,
20}
21
22impl FileAnalyzer {
23 pub fn new(registry: Arc<LanguageRegistry>, config: &Config) -> Self {
25 Self {
26 registry,
27 complexity_analyzer: ComplexityAnalyzer::new(),
28 min_lines: config.filter.min_lines,
29 max_lines: config.filter.max_lines,
30 }
31 }
32
33 pub fn analyze(&self, path: &Path) -> Result<Option<FileStats>> {
37 let language = match self.registry.detect(path) {
39 Some(lang) => lang,
40 None => return Ok(None),
41 };
42
43 let content = match fs::read_to_string(path) {
45 Ok(c) => c,
46 Err(_) => {
47 match fs::read(path) {
49 Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
50 Err(e) => {
51 return Err(crate::error::Error::FileRead {
52 path: path.to_path_buf(),
53 source: e,
54 })
55 }
56 }
57 }
58 };
59
60 let lines = self.count_lines(&content, &language);
62
63 if let Some(min) = self.min_lines {
65 if lines.total < min {
66 return Ok(None);
67 }
68 }
69 if let Some(max) = self.max_lines {
70 if lines.total > max {
71 return Ok(None);
72 }
73 }
74
75 let size = fs::metadata(path).map(|m| m.len()).unwrap_or(0);
77
78 let complexity = self.complexity_analyzer.analyze(&content, &language);
80
81 Ok(Some(FileStats {
82 path: path.to_path_buf(),
83 language: language.name.clone(),
84 lines,
85 size,
86 complexity,
87 }))
88 }
89
90 fn count_lines(&self, content: &str, lang: &Language) -> LineStats {
92 let mut stats = LineStats::default();
93 let mut in_block_comment = false;
94 let mut block_comment_end = "";
95
96 for line in content.lines() {
97 stats.total += 1;
98 let trimmed = line.trim();
99
100 if trimmed.is_empty() {
102 stats.blank += 1;
103 continue;
104 }
105
106 if in_block_comment {
108 stats.comment += 1;
109 if let Some(pos) = trimmed.find(block_comment_end) {
110 let after = trimmed[pos + block_comment_end.len()..].trim();
112 if !after.is_empty() && !self.starts_with_comment(after, lang) {
113 stats.comment -= 1;
116 stats.code += 1;
117 }
118 in_block_comment = false;
119 }
120 continue;
121 }
122
123 let mut found_block_start = false;
125 for (start, end) in &lang.block_comments {
126 if let Some(start_pos) = trimmed.find(start.as_str()) {
127 let before = &trimmed[..start_pos];
129 if self.is_in_string(before, lang) {
130 continue;
131 }
132
133 found_block_start = true;
134 let after_start = &trimmed[start_pos + start.len()..];
135
136 if let Some(end_pos) = after_start.find(end.as_str()) {
137 let after_end = after_start[end_pos + end.len()..].trim();
139 if before.trim().is_empty() && after_end.is_empty() {
140 stats.comment += 1;
141 } else {
142 stats.code += 1;
144 }
145 } else {
146 in_block_comment = true;
148 block_comment_end = end;
149 if before.trim().is_empty() {
150 stats.comment += 1;
151 } else {
152 stats.code += 1;
154 }
155 }
156 break;
157 }
158 }
159
160 if found_block_start {
161 continue;
162 }
163
164 let is_line_comment = lang
166 .line_comments
167 .iter()
168 .any(|prefix| trimmed.starts_with(prefix.as_str()));
169
170 if is_line_comment {
171 stats.comment += 1;
172 } else {
173 stats.code += 1;
174 }
175 }
176
177 stats
178 }
179
180 fn is_in_string(&self, text: &str, _lang: &Language) -> bool {
182 let mut in_string = false;
184 let mut chars = text.chars().peekable();
185
186 while let Some(c) = chars.next() {
187 match c {
188 '"' | '\'' => {
189 in_string = !in_string;
190 }
191 '\\' => {
192 chars.next();
194 }
195 _ => {}
196 }
197 }
198
199 in_string
200 }
201
202 fn starts_with_comment(&self, text: &str, lang: &Language) -> bool {
204 lang.line_comments
205 .iter()
206 .any(|prefix| text.starts_with(prefix.as_str()))
207 || lang
208 .block_comments
209 .iter()
210 .any(|(start, _)| text.starts_with(start.as_str()))
211 }
212}
213
214#[cfg(test)]
215mod tests {
216 use super::*;
217
218 fn make_rust_lang() -> Language {
219 Language {
220 name: "Rust".to_string(),
221 extensions: vec![".rs".to_string()],
222 filenames: vec![],
223 line_comments: vec!["//".to_string()],
224 block_comments: vec![("/*".to_string(), "*/".to_string())],
225 string_delimiters: vec![],
226 function_pattern: None,
227 complexity_keywords: vec![],
228 nested_comments: true,
229 }
230 }
231
232 #[test]
233 fn test_count_lines_basic() {
234 let lang = make_rust_lang();
235 let registry = Arc::new(LanguageRegistry::empty());
236 let analyzer = FileAnalyzer::new(registry, &Config::default());
237
238 let content = "fn main() {\n println!(\"hello\");\n}\n";
239 let stats = analyzer.count_lines(content, &lang);
240 assert_eq!(stats.total, 3);
241 assert_eq!(stats.code, 3);
242 assert_eq!(stats.blank, 0);
243 assert_eq!(stats.comment, 0);
244 }
245
246 #[test]
247 fn test_count_lines_with_comments() {
248 let lang = make_rust_lang();
249 let registry = Arc::new(LanguageRegistry::empty());
250 let analyzer = FileAnalyzer::new(registry, &Config::default());
251
252 let content = "// This is a comment\nfn main() {\n /* block comment */\n println!(\"hello\");\n}\n";
253 let stats = analyzer.count_lines(content, &lang);
254 assert_eq!(stats.total, 5);
255 assert_eq!(stats.code, 3);
256 assert_eq!(stats.comment, 2);
257 assert_eq!(stats.blank, 0);
258 }
259
260 #[test]
261 fn test_count_lines_multiline_comment() {
262 let lang = make_rust_lang();
263 let registry = Arc::new(LanguageRegistry::empty());
264 let analyzer = FileAnalyzer::new(registry, &Config::default());
265
266 let content = "/*\n * Multi-line\n * comment\n */\nfn main() {}\n";
267 let stats = analyzer.count_lines(content, &lang);
268 assert_eq!(stats.total, 5);
269 assert_eq!(stats.code, 1);
270 assert_eq!(stats.comment, 4);
271 assert_eq!(stats.blank, 0);
272 }
273}