1mod patterns;
7mod regex_fallback;
8
9#[cfg(feature = "tree-sitter")]
10mod tree_sitter;
11
12pub use patterns::{CompiledPattern, PatternRegistry};
13pub use regex_fallback::RegexExtractor;
14
15use crate::config::{Pattern, PatternCategory, Severity};
16use crate::Result;
17use std::collections::HashMap;
18use std::path::Path;
19
20#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
22pub struct Comment {
23 pub line: usize,
25 pub column: usize,
27 pub content: String,
29}
30
31#[derive(Debug, Clone, serde::Serialize)]
33pub struct Finding {
34 pub file: String,
36 pub line: usize,
38 pub column: usize,
40 pub severity: Severity,
42 pub category: PatternCategory,
44 pub message: String,
46 pub match_text: String,
48 pub pattern_regex: String,
50}
51
52#[derive(Debug, Clone, serde::Serialize)]
54pub struct FileScanResult {
55 pub path: String,
57 pub findings: Vec<Finding>,
59 pub score: u32,
61}
62
63#[derive(Debug, Clone, serde::Serialize)]
65pub struct ScanSummary {
66 pub files_scanned: usize,
68 pub files_with_findings: usize,
70 pub total_findings: usize,
72 pub total_score: u32,
74 pub by_severity: HashMap<Severity, usize>,
76 pub by_category: HashMap<PatternCategory, usize>,
78}
79
80impl ScanSummary {
81 pub fn new(results: &[FileScanResult]) -> Self {
83 let mut summary = Self {
84 files_scanned: results.len(),
85 files_with_findings: 0,
86 total_findings: 0,
87 total_score: 0,
88 by_severity: HashMap::new(),
89 by_category: HashMap::new(),
90 };
91
92 for result in results {
93 if !result.findings.is_empty() {
94 summary.files_with_findings += 1;
95 }
96 summary.total_findings += result.findings.len();
97 summary.total_score += result.score;
98
99 for finding in &result.findings {
100 *summary
101 .by_severity
102 .entry(finding.severity.clone())
103 .or_insert(0) += 1;
104 *summary
105 .by_category
106 .entry(finding.category.clone())
107 .or_insert(0) += 1;
108 }
109 }
110
111 summary
112 }
113}
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117pub enum Language {
118 Python,
120 JavaScript,
122 TypeScript,
124 Jsx,
126 Tsx,
128 Rust,
130 Go,
132 Java,
134 Kotlin,
136 CCpp,
138 CSharp,
140 Ruby,
142 Php,
144 Swift,
146 Haskell,
148 Lua,
150 Perl,
152 R,
154 Scala,
156 Shell,
158 Unknown,
160}
161
162impl Language {
163 pub fn from_path(path: &Path) -> Self {
165 path.extension()
166 .and_then(|e| e.to_str())
167 .map(|ext| match ext {
168 "py" => Language::Python,
169 "js" | "mjs" | "cjs" => Language::JavaScript,
170 "ts" => Language::TypeScript,
171 "jsx" => Language::Jsx,
172 "tsx" => Language::Tsx,
173 "rs" => Language::Rust,
174 "go" => Language::Go,
175 "java" => Language::Java,
176 "kt" | "kts" => Language::Kotlin,
177 "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" => Language::CCpp,
178 "cs" => Language::CSharp,
179 "rb" => Language::Ruby,
180 "php" => Language::Php,
181 "swift" => Language::Swift,
182 "hs" => Language::Haskell,
183 "lua" => Language::Lua,
184 "pl" | "pm" => Language::Perl,
185 "r" | "R" => Language::R,
186 "scala" => Language::Scala,
187 "sh" | "bash" | "zsh" | "fish" => Language::Shell,
188 _ => Language::Unknown,
189 })
190 .unwrap_or(Language::Unknown)
191 }
192
193 pub fn has_tree_sitter(self) -> bool {
195 match self {
196 #[cfg(feature = "python")]
197 Language::Python => true,
198 #[cfg(feature = "javascript")]
199 Language::JavaScript | Language::Jsx => true,
200 #[cfg(feature = "typescript")]
201 Language::TypeScript | Language::Tsx => true,
202 #[cfg(feature = "rust")]
203 Language::Rust => true,
204 #[cfg(feature = "go")]
205 Language::Go => true,
206 #[cfg(feature = "java")]
207 Language::Java => true,
208 #[cfg(feature = "cpp")]
209 Language::CCpp => true,
210 #[cfg(feature = "c-sharp")]
211 Language::CSharp => true,
212 #[cfg(feature = "php")]
213 Language::Php => true,
214 #[cfg(feature = "ruby")]
215 Language::Ruby => true,
216 #[cfg(feature = "haskell")]
217 Language::Haskell => true,
218 #[cfg(feature = "lua")]
219 Language::Lua => true,
220 #[cfg(feature = "scala")]
221 Language::Scala => true,
222 _ => false,
223 }
224 }
225
226 #[cfg(not(feature = "tree-sitter"))]
228 pub fn has_tree_sitter(self) -> bool {
229 false
230 }
231}
232
233pub trait CommentExtractor {
235 fn extract(&self, source: &str) -> Vec<Comment>;
237}
238
239pub struct Scanner {
241 registry: PatternRegistry,
242}
243
244impl Scanner {
245 pub fn new(patterns: Vec<Pattern>) -> Result<Self> {
247 let registry = PatternRegistry::new(patterns)?;
248 Ok(Self { registry })
249 }
250
251 pub fn scan_file(&self, path: &str, content: &str) -> FileScanResult {
253 let lang = Language::from_path(Path::new(path));
254 let mut comment_findings = self.findings_from_comments(path, lang, content);
255
256 #[cfg(feature = "tree-sitter")]
258 if lang.has_tree_sitter() {
259 if let Some(mut extractor) = self::tree_sitter::get_extractor(lang) {
260 let patterns: Vec<&Pattern> =
262 self.registry.patterns.iter().map(|p| &p.pattern).collect();
263 let pattern_refs: Vec<Pattern> = patterns.iter().map(|p| (**p).clone()).collect();
265 let ast_findings = extractor.extract_ast_findings(content, &pattern_refs);
266
267 for mut finding in ast_findings {
269 finding.file = path.to_string();
270 comment_findings.score += finding.severity.score();
271 comment_findings.findings.push(finding);
272 }
273 }
274 }
275
276 comment_findings
277 }
278
279 fn extract_comments(&self, lang: Language, source: &str) -> Vec<Comment> {
281 #[cfg(feature = "tree-sitter")]
282 if lang.has_tree_sitter() {
283 if let Some(mut extractor) = self::tree_sitter::get_extractor(lang) {
284 return extractor.extract(source);
285 }
286 }
287
288 RegexExtractor::new().extract(source)
290 }
291
292 fn findings_from_comments(&self, path: &str, lang: Language, source: &str) -> FileScanResult {
294 let mut findings = Vec::new();
295 let mut total_score = 0u32;
296
297 let comments = self.extract_comments(lang, source);
298
299 for comment in &comments {
300 for pattern in &self.registry.patterns {
301 if pattern.pattern.ast_query.is_some() {
303 continue;
304 }
305
306 if let Some(regex) = &pattern.compiled {
307 if let Some(mat) = regex.find(&comment.content) {
308 let severity = pattern.pattern.severity.clone();
309 total_score += severity.score();
310
311 findings.push(Finding {
312 file: path.to_string(),
313 line: comment.line,
314 column: comment.column + mat.start(),
315 severity,
316 category: pattern.pattern.category.clone(),
317 message: pattern.pattern.message.clone(),
318 match_text: mat.as_str().to_string(),
319 pattern_regex: pattern.pattern.regex.to_string(),
320 });
321 }
322 }
323 }
324 }
325
326 FileScanResult {
327 path: path.to_string(),
328 findings,
329 score: total_score,
330 }
331 }
332}
333
334#[cfg(test)]
335mod tests {
336 use super::*;
337 use crate::config::RegexPattern;
338
339 fn test_patterns() -> Vec<Pattern> {
340 vec![
341 Pattern {
342 regex: RegexPattern::new("(?i)TODO:".to_string()).unwrap(),
343 severity: Severity::Medium,
344 message: "Placeholder comment found".to_string(),
345 category: PatternCategory::Placeholder,
346 ast_query: None,
347 languages: vec![],
348 },
349 Pattern {
350 regex: RegexPattern::new("(?i)for now".to_string()).unwrap(),
351 severity: Severity::Low,
352 message: "Deferral phrase detected".to_string(),
353 category: PatternCategory::Deferral,
354 ast_query: None,
355 languages: vec![],
356 },
357 ]
358 }
359
360 #[test]
361 fn test_scan_file_findings() {
362 let scanner = Scanner::new(test_patterns()).unwrap();
363 let code = r#"
364# TODO: implement this later
365# This is fine
366# for now we'll do it this way
367"#;
368 let result = scanner.scan_file("test.py", code);
369 assert_eq!(result.findings.len(), 2);
370 assert_eq!(result.findings[0].category, PatternCategory::Placeholder);
371 assert_eq!(result.findings[1].category, PatternCategory::Deferral);
372 }
373
374 #[test]
375 fn test_score_calculation() {
376 let scanner = Scanner::new(test_patterns()).unwrap();
377 let code = "# TODO: fix this # for now we do this";
378 let result = scanner.scan_file("test.py", code);
379 assert_eq!(result.score, 6);
380 }
381
382 #[test]
383 fn test_language_detection() {
384 assert_eq!(Language::from_path(Path::new("test.py")), Language::Python);
385 assert_eq!(Language::from_path(Path::new("test.rs")), Language::Rust);
386 assert_eq!(
387 Language::from_path(Path::new("test.js")),
388 Language::JavaScript
389 );
390 assert_eq!(Language::from_path(Path::new("test.tsx")), Language::Tsx);
391 assert_eq!(
392 Language::from_path(Path::new("test.xyz")),
393 Language::Unknown
394 );
395 }
396
397 #[test]
398 fn test_language_detection_all_types() {
399 assert_eq!(
401 Language::from_path(Path::new("test.ts")),
402 Language::TypeScript
403 );
404 assert_eq!(Language::from_path(Path::new("test.jsx")), Language::Jsx);
405 assert_eq!(Language::from_path(Path::new("test.go")), Language::Go);
406 assert_eq!(Language::from_path(Path::new("test.java")), Language::Java);
407 assert_eq!(Language::from_path(Path::new("test.kt")), Language::Kotlin);
408 assert_eq!(Language::from_path(Path::new("test.kts")), Language::Kotlin);
409 assert_eq!(Language::from_path(Path::new("test.c")), Language::CCpp);
410 assert_eq!(Language::from_path(Path::new("test.cpp")), Language::CCpp);
411 assert_eq!(Language::from_path(Path::new("test.cs")), Language::CSharp);
412 assert_eq!(Language::from_path(Path::new("test.rb")), Language::Ruby);
413 assert_eq!(Language::from_path(Path::new("test.php")), Language::Php);
414 assert_eq!(
415 Language::from_path(Path::new("test.swift")),
416 Language::Swift
417 );
418 assert_eq!(Language::from_path(Path::new("test.hs")), Language::Haskell);
419 assert_eq!(Language::from_path(Path::new("test.lua")), Language::Lua);
420 assert_eq!(Language::from_path(Path::new("test.pl")), Language::Perl);
421 assert_eq!(Language::from_path(Path::new("test.pm")), Language::Perl);
422 assert_eq!(
423 Language::from_path(Path::new("test.scala")),
424 Language::Scala
425 );
426 assert_eq!(Language::from_path(Path::new("test.sh")), Language::Shell);
427 assert_eq!(Language::from_path(Path::new("test.bash")), Language::Shell);
428 assert_eq!(Language::from_path(Path::new("test.zsh")), Language::Shell);
429 assert_eq!(Language::from_path(Path::new("test.fish")), Language::Shell);
430 }
431
432 #[test]
433 fn test_language_from_path_no_extension() {
434 assert_eq!(
436 Language::from_path(Path::new("Makefile")),
437 Language::Unknown
438 );
439 assert_eq!(
440 Language::from_path(Path::new(".gitignore")),
441 Language::Unknown
442 );
443 assert_eq!(Language::from_path(Path::new("test")), Language::Unknown);
444 }
445
446 #[test]
447 fn test_comment_struct() {
448 let comment = Comment {
449 line: 10,
450 column: 5,
451 content: "TODO: implement this".to_string(),
452 };
453 assert_eq!(comment.line, 10);
454 assert_eq!(comment.column, 5);
455 assert_eq!(comment.content, "TODO: implement this");
456 }
457
458 #[test]
459 fn test_finding_struct() {
460 let finding = Finding {
461 file: "test.py".to_string(),
462 line: 10,
463 column: 5,
464 severity: Severity::Medium,
465 category: PatternCategory::Placeholder,
466 message: "TODO comment found".to_string(),
467 match_text: "TODO".to_string(),
468 pattern_regex: "(?i)todo".to_string(),
469 };
470 assert_eq!(finding.file, "test.py");
471 assert_eq!(finding.line, 10);
472 assert_eq!(finding.severity, Severity::Medium);
473 assert_eq!(finding.category, PatternCategory::Placeholder);
474 }
475
476 #[test]
477 fn test_file_scan_result_struct() {
478 let result = FileScanResult {
479 path: "test.py".to_string(),
480 findings: vec![],
481 score: 0,
482 };
483 assert_eq!(result.path, "test.py");
484 assert!(result.findings.is_empty());
485 assert_eq!(result.score, 0);
486 }
487
488 #[test]
489 fn test_scan_summary_new_empty() {
490 let results = vec![];
491 let summary = ScanSummary::new(&results);
492 assert_eq!(summary.files_scanned, 0);
493 assert_eq!(summary.files_with_findings, 0);
494 assert_eq!(summary.total_findings, 0);
495 assert_eq!(summary.total_score, 0);
496 }
497
498 #[test]
499 fn test_scan_summary_new_with_results() {
500 let results = vec![FileScanResult {
501 path: "test.py".to_string(),
502 findings: vec![Finding {
503 file: "test.py".to_string(),
504 line: 1,
505 column: 1,
506 severity: Severity::Medium,
507 category: PatternCategory::Placeholder,
508 message: "TODO".to_string(),
509 match_text: "TODO".to_string(),
510 pattern_regex: "(?i)todo".to_string(),
511 }],
512 score: 5,
513 }];
514 let summary = ScanSummary::new(&results);
515 assert_eq!(summary.files_scanned, 1);
516 assert_eq!(summary.files_with_findings, 1);
517 assert_eq!(summary.total_findings, 1);
518 assert_eq!(summary.total_score, 5);
519 assert_eq!(*summary.by_severity.get(&Severity::Medium).unwrap(), 1);
520 assert_eq!(
521 *summary
522 .by_category
523 .get(&PatternCategory::Placeholder)
524 .unwrap(),
525 1
526 );
527 }
528
529 #[test]
530 fn test_scan_summary_new_empty_results() {
531 let results = vec![
532 FileScanResult {
533 path: "clean.py".to_string(),
534 findings: vec![],
535 score: 0,
536 },
537 FileScanResult {
538 path: "sloppy.py".to_string(),
539 findings: vec![],
540 score: 0,
541 },
542 ];
543 let summary = ScanSummary::new(&results);
544 assert_eq!(summary.files_scanned, 2);
545 assert_eq!(summary.files_with_findings, 0);
546 assert_eq!(summary.total_findings, 0);
547 assert_eq!(summary.total_score, 0);
548 }
549}