1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use git2::{BlameOptions, Repository};
4use ignore::WalkBuilder;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, HashSet};
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct CodeDebtItem {
13 pub file_path: PathBuf,
14 pub line_number: usize,
15 pub column: usize,
16 pub line_content: String,
17 pub pattern_type: String,
18 pub severity: Severity,
19
20 #[serde(skip_serializing_if = "Option::is_none")]
22 pub author: Option<String>,
23 #[serde(skip_serializing_if = "Option::is_none")]
24 pub age_days: Option<i64>,
25 #[serde(skip_serializing_if = "Option::is_none")]
26 pub commit_hash: Option<String>,
27 #[serde(skip_serializing_if = "Option::is_none")]
28 pub created_at: Option<DateTime<Utc>>,
29 #[serde(skip_serializing_if = "Option::is_none")]
30 pub file_extension: Option<String>,
31 pub duplicate_count: usize,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
35pub enum Severity {
36 Critical,
37 High,
38 Medium,
39 Low,
40}
41
42#[derive(Debug, Clone)]
43pub struct Pattern {
44 pub name: String,
45 pub regex: Regex,
46 pub severity: Severity,
47}
48
49pub struct CodeDebtScanner {
50 patterns: Vec<Pattern>,
51 file_extensions: Vec<String>,
52 ignore_dirs: Vec<String>,
53 enable_git_blame: bool,
54 detect_duplicates: bool,
55 git_repo: Option<Repository>,
56}
57
58impl Default for CodeDebtScanner {
59 fn default() -> Self {
60 Self::new()
61 }
62}
63
64impl CodeDebtScanner {
65 pub fn new() -> Self {
66 let patterns = vec![
67 Pattern {
68 name: "HACK".to_string(),
69 regex: Regex::new(r"(?i)\b(HACK|XXX)\b").unwrap(),
70 severity: Severity::Critical,
71 },
72 Pattern {
73 name: "FIXME".to_string(),
74 regex: Regex::new(r"(?i)\bFIXME\b").unwrap(),
75 severity: Severity::High,
76 },
77 Pattern {
78 name: "TODO".to_string(),
79 regex: Regex::new(r"(?i)\bTODO\b").unwrap(),
80 severity: Severity::Medium,
81 },
82 Pattern {
83 name: "NOTE_FIX".to_string(),
84 regex: Regex::new(r"(?i)\bNOTE.*fix\b").unwrap(),
85 severity: Severity::Medium,
86 },
87 Pattern {
88 name: "TEMPORARY".to_string(),
89 regex: Regex::new(r"(?i)\b(temporary|temp|placeholder)\b").unwrap(),
90 severity: Severity::High,
91 },
92 Pattern {
93 name: "MOCK_STUB".to_string(),
94 regex: Regex::new(r"(?i)\b(mock|stub)\b").unwrap(),
95 severity: Severity::Low,
96 },
97 Pattern {
98 name: "PRODUCTION_DEBT".to_string(),
99 regex: Regex::new(r"(?i)(temporary|placeholder|mock).*production").unwrap(),
100 severity: Severity::Critical,
101 },
102 ];
103
104 let file_extensions = vec![
105 "rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "cc", "cxx", "h",
106 "hpp", "rb", "php", "cs", "swift", "kt", "scala", "clj", "ml", "hs", "elm", "dart",
107 "lua", "pl", "r", "jl", "nim", "zig", "v", "cr",
108 ]
109 .into_iter()
110 .map(String::from)
111 .collect();
112
113 let ignore_dirs = vec![
114 "node_modules",
115 ".git",
116 "target",
117 "dist",
118 "build",
119 ".next",
120 "vendor",
121 "__pycache__",
122 ".pytest_cache",
123 "coverage",
124 ".nyc_output",
125 "bower_components",
126 ]
127 .into_iter()
128 .map(String::from)
129 .collect();
130
131 Self {
132 patterns,
133 file_extensions,
134 ignore_dirs,
135 enable_git_blame: false,
136 detect_duplicates: false,
137 git_repo: None,
138 }
139 }
140
141 pub fn with_patterns(mut self, patterns: Vec<Pattern>) -> Self {
142 self.patterns = patterns;
143 self
144 }
145
146 pub fn with_file_extensions(mut self, extensions: Vec<String>) -> Self {
147 self.file_extensions = extensions;
148 self
149 }
150
151 pub fn with_ignore_dirs(mut self, dirs: Vec<String>) -> Self {
152 self.ignore_dirs = dirs;
153 self
154 }
155
156 pub fn with_git_blame(mut self, enable: bool) -> Self {
157 self.enable_git_blame = enable;
158 if enable {
159 if let Ok(repo) = Repository::discover(".") {
161 self.git_repo = Some(repo);
162 }
163 }
164 self
165 }
166
167 pub fn with_duplicate_detection(mut self, enable: bool) -> Self {
168 self.detect_duplicates = enable;
169 self
170 }
171
172 pub fn scan<P: AsRef<Path>>(&self, root_path: P) -> Result<Vec<CodeDebtItem>> {
173 let patterns = Arc::new(&self.patterns);
174 let extensions: HashSet<String> = self.file_extensions.iter().cloned().collect();
175
176 let walker = WalkBuilder::new(&root_path)
177 .hidden(false)
178 .ignore(true)
179 .git_ignore(true)
180 .build_parallel();
181
182 let (tx, rx) = std::sync::mpsc::channel();
183
184 walker.run(|| {
185 let tx = tx.clone();
186 let patterns = Arc::clone(&patterns);
187 let extensions = extensions.clone();
188
189 Box::new(move |entry| {
190 if let Ok(entry) = entry {
191 let path = entry.path();
192
193 if path.is_file() {
194 if let Some(ext) = path.extension() {
195 if let Some(ext_str) = ext.to_str() {
196 if extensions.contains(ext_str) {
197 if let Ok(content) = std::fs::read_to_string(path) {
198 let items = Self::scan_content(path, &content, &patterns);
199 for item in items {
200 let _ = tx.send(item);
201 }
202 }
203 }
204 }
205 }
206 }
207 }
208 ignore::WalkState::Continue
209 })
210 });
211
212 drop(tx);
213 let mut results: Vec<CodeDebtItem> = rx.iter().collect();
214
215 if self.enable_git_blame {
217 self.add_git_information(&mut results);
218 }
219
220 if self.detect_duplicates {
222 self.detect_duplicate_patterns(&mut results);
223 }
224
225 self.add_file_extensions(&mut results);
227
228 results.sort_by(|a, b| {
229 a.severity
230 .cmp(&b.severity)
231 .then_with(|| a.file_path.cmp(&b.file_path))
232 .then_with(|| a.line_number.cmp(&b.line_number))
233 });
234
235 Ok(results)
236 }
237
238 fn scan_content(file_path: &Path, content: &str, patterns: &[Pattern]) -> Vec<CodeDebtItem> {
239 content
240 .lines()
241 .enumerate()
242 .flat_map(|(line_idx, line)| {
243 patterns
244 .iter()
245 .filter_map(|pattern| {
246 pattern.regex.find(line).map(|m| CodeDebtItem {
247 file_path: file_path.to_path_buf(),
248 line_number: line_idx + 1,
249 column: m.start() + 1,
250 line_content: line.trim().to_string(),
251 pattern_type: pattern.name.clone(),
252 severity: pattern.severity.clone(),
253 author: None,
254 age_days: None,
255 commit_hash: None,
256 created_at: None,
257 file_extension: None,
258 duplicate_count: 0,
259 })
260 })
261 .collect::<Vec<_>>()
262 })
263 .collect()
264 }
265
266 pub fn get_summary(&self, items: &[CodeDebtItem]) -> HashMap<String, usize> {
267 let mut summary = HashMap::new();
268 for item in items {
269 *summary.entry(item.pattern_type.clone()).or_insert(0) += 1;
270 }
271 summary
272 }
273
274 pub fn filter_by_severity(
275 &self,
276 items: &[CodeDebtItem],
277 min_severity: Severity,
278 ) -> Vec<CodeDebtItem> {
279 items
280 .iter()
281 .filter(|item| item.severity <= min_severity)
282 .cloned()
283 .collect()
284 }
285
286 fn add_git_information(&self, items: &mut [CodeDebtItem]) {
287 if let Some(repo) = &self.git_repo {
288 for item in items.iter_mut() {
289 if let Ok(relative_path) = item
290 .file_path
291 .strip_prefix(repo.workdir().unwrap_or_else(|| std::path::Path::new(".")))
292 {
293 if let Ok(blame) =
294 repo.blame_file(relative_path, Some(&mut BlameOptions::new()))
295 {
296 if let Some(hunk) = blame.get_line(item.line_number) {
297 let sig = hunk.final_signature();
298 let oid = hunk.final_commit_id();
299
300 item.author = sig.name().map(|s| s.to_string());
301 item.commit_hash = Some(oid.to_string());
302
303 if let Ok(commit) = repo.find_commit(oid) {
304 let timestamp = commit.time().seconds();
305 let datetime =
306 DateTime::from_timestamp(timestamp, 0).unwrap_or_else(Utc::now);
307 item.created_at = Some(datetime);
308 let now = Utc::now();
309 let duration = now.signed_duration_since(datetime);
310 item.age_days = Some(duration.num_days());
311 }
312 }
313 }
314 }
315 }
316 }
317 }
318
319 fn detect_duplicate_patterns(&self, items: &mut [CodeDebtItem]) {
320 let mut pattern_counts: HashMap<String, usize> = HashMap::new();
321
322 for item in items.iter() {
324 let key = format!("{}:{}", item.pattern_type, item.line_content.trim());
325 *pattern_counts.entry(key).or_insert(0) += 1;
326 }
327
328 for item in items.iter_mut() {
330 let key = format!("{}:{}", item.pattern_type, item.line_content.trim());
331 item.duplicate_count = pattern_counts.get(&key).copied().unwrap_or(0);
332 }
333 }
334
335 fn add_file_extensions(&self, items: &mut [CodeDebtItem]) {
336 for item in items.iter_mut() {
337 if let Some(ext) = item.file_path.extension() {
338 item.file_extension = ext.to_str().map(|s| s.to_string());
339 }
340 }
341 }
342
343 pub fn get_file_type_summary(&self, items: &[CodeDebtItem]) -> HashMap<String, usize> {
344 let mut summary = HashMap::new();
345 for item in items {
346 let file_type = item.file_extension.as_deref().unwrap_or("unknown");
347 *summary.entry(file_type.to_string()).or_insert(0) += 1;
348 }
349 summary
350 }
351
352 pub fn get_age_distribution(&self, items: &[CodeDebtItem]) -> HashMap<String, usize> {
353 let mut distribution = HashMap::new();
354 for item in items {
355 if let Some(age) = item.age_days {
356 let bucket = match age {
357 0..=7 => "This week",
358 8..=30 => "This month",
359 31..=90 => "Last 3 months",
360 91..=365 => "This year",
361 _ => "Over a year",
362 };
363 *distribution.entry(bucket.to_string()).or_insert(0) += 1;
364 } else {
365 *distribution.entry("Unknown age".to_string()).or_insert(0) += 1;
366 }
367 }
368 distribution
369 }
370
371 pub fn filter_by_age(&self, items: &[CodeDebtItem], max_age_days: i64) -> Vec<CodeDebtItem> {
372 items
373 .iter()
374 .filter(|item| item.age_days.is_none_or(|age| age <= max_age_days))
375 .cloned()
376 .collect()
377 }
378
379 pub fn find_duplicates(&self, items: &[CodeDebtItem], min_count: usize) -> Vec<CodeDebtItem> {
380 items
381 .iter()
382 .filter(|item| item.duplicate_count >= min_count)
383 .cloned()
384 .collect()
385 }
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391 use std::fs;
392 use tempfile::TempDir;
393
394 fn create_test_file(dir: &Path, name: &str, content: &str) -> PathBuf {
395 let file_path = dir.join(name);
396 fs::write(&file_path, content).unwrap();
397 file_path
398 }
399
400 #[test]
401 fn test_scanner_creation() {
402 let scanner = CodeDebtScanner::new();
403 assert!(!scanner.patterns.is_empty());
404 assert!(!scanner.file_extensions.is_empty());
405 assert!(!scanner.ignore_dirs.is_empty());
406 }
407
408 #[test]
409 fn test_default_patterns() {
410 let scanner = CodeDebtScanner::new();
411 let pattern_names: Vec<String> = scanner.patterns.iter().map(|p| p.name.clone()).collect();
412
413 assert!(pattern_names.contains(&"TODO".to_string()));
414 assert!(pattern_names.contains(&"FIXME".to_string()));
415 assert!(pattern_names.contains(&"HACK".to_string()));
416 assert!(pattern_names.contains(&"TEMPORARY".to_string()));
417 assert!(pattern_names.contains(&"PRODUCTION_DEBT".to_string()));
418 }
419
420 #[test]
421 fn test_scan_content() {
422 let test_content = r#"
423fn main() {
424 // TODO: implement this function
425 println!("Hello, world!");
426 // FIXME: this is broken
427 let x = 5;
428 // HACK: workaround
429 let y = x * 2;
430}
431"#;
432
433 let scanner = CodeDebtScanner::new();
434 let file_path = Path::new("test.rs");
435 let items = CodeDebtScanner::scan_content(file_path, test_content, &scanner.patterns);
436
437 assert_eq!(items.len(), 3);
438
439 let todo_item = items
441 .iter()
442 .find(|item| item.pattern_type == "TODO")
443 .unwrap();
444 assert_eq!(todo_item.severity, Severity::Medium);
445 assert_eq!(todo_item.line_number, 3);
446
447 let fixme_item = items
449 .iter()
450 .find(|item| item.pattern_type == "FIXME")
451 .unwrap();
452 assert_eq!(fixme_item.severity, Severity::High);
453 assert_eq!(fixme_item.line_number, 5);
454
455 let hack_item = items
457 .iter()
458 .find(|item| item.pattern_type == "HACK")
459 .unwrap();
460 assert_eq!(hack_item.severity, Severity::Critical);
461 assert_eq!(hack_item.line_number, 7);
462 }
463
464 #[test]
465 fn test_production_debt_pattern() {
466 let test_content = r#"
467const API_KEY = "placeholder for production";
468let temp_production_fix = true;
469"#;
470
471 let scanner = CodeDebtScanner::new();
472 let file_path = Path::new("test.js");
473 let items = CodeDebtScanner::scan_content(file_path, test_content, &scanner.patterns);
474
475 let production_debt = items
476 .iter()
477 .find(|item| item.pattern_type == "PRODUCTION_DEBT");
478 assert!(production_debt.is_some());
479 assert_eq!(production_debt.unwrap().severity, Severity::Critical);
480 }
481
482 #[test]
483 fn test_custom_patterns() {
484 let custom_patterns = vec![Pattern {
485 name: "URGENT".to_string(),
486 regex: Regex::new(r"(?i)\bURGENT\b").unwrap(),
487 severity: Severity::Critical,
488 }];
489
490 let scanner = CodeDebtScanner::new().with_patterns(custom_patterns);
491 assert_eq!(scanner.patterns.len(), 1);
492 assert_eq!(scanner.patterns[0].name, "URGENT");
493 }
494
495 #[test]
496 fn test_file_extensions_filter() {
497 let scanner =
498 CodeDebtScanner::new().with_file_extensions(vec!["rs".to_string(), "py".to_string()]);
499
500 assert_eq!(scanner.file_extensions.len(), 2);
501 assert!(scanner.file_extensions.contains(&"rs".to_string()));
502 assert!(scanner.file_extensions.contains(&"py".to_string()));
503 }
504
505 #[test]
506 fn test_ignore_dirs_filter() {
507 let custom_ignore = vec!["my_custom_dir".to_string()];
508 let scanner = CodeDebtScanner::new().with_ignore_dirs(custom_ignore);
509
510 assert!(scanner.ignore_dirs.contains(&"my_custom_dir".to_string()));
511 }
512
513 #[test]
514 fn test_get_summary() {
515 let items = vec![
516 CodeDebtItem {
517 file_path: PathBuf::from("test.rs"),
518 line_number: 1,
519 column: 1,
520 line_content: "// TODO: test".to_string(),
521 pattern_type: "TODO".to_string(),
522 severity: Severity::Medium,
523 author: None,
524 age_days: None,
525 commit_hash: None,
526 created_at: None,
527 file_extension: None,
528 duplicate_count: 0,
529 },
530 CodeDebtItem {
531 file_path: PathBuf::from("test.rs"),
532 line_number: 2,
533 column: 1,
534 line_content: "// TODO: another test".to_string(),
535 pattern_type: "TODO".to_string(),
536 severity: Severity::Medium,
537 author: None,
538 age_days: None,
539 commit_hash: None,
540 created_at: None,
541 file_extension: None,
542 duplicate_count: 0,
543 },
544 CodeDebtItem {
545 file_path: PathBuf::from("test.rs"),
546 line_number: 3,
547 column: 1,
548 line_content: "// FIXME: broken".to_string(),
549 pattern_type: "FIXME".to_string(),
550 severity: Severity::High,
551 author: None,
552 age_days: None,
553 commit_hash: None,
554 created_at: None,
555 file_extension: None,
556 duplicate_count: 0,
557 },
558 ];
559
560 let scanner = CodeDebtScanner::new();
561 let summary = scanner.get_summary(&items);
562
563 assert_eq!(summary.get("TODO"), Some(&2));
564 assert_eq!(summary.get("FIXME"), Some(&1));
565 }
566
567 #[test]
568 fn test_filter_by_severity() {
569 let items = vec![
570 CodeDebtItem {
571 file_path: PathBuf::from("test.rs"),
572 line_number: 1,
573 column: 1,
574 line_content: "// TODO: test".to_string(),
575 pattern_type: "TODO".to_string(),
576 severity: Severity::Medium,
577 author: None,
578 age_days: None,
579 commit_hash: None,
580 created_at: None,
581 file_extension: None,
582 duplicate_count: 0,
583 },
584 CodeDebtItem {
585 file_path: PathBuf::from("test.rs"),
586 line_number: 2,
587 column: 1,
588 line_content: "// HACK: critical".to_string(),
589 pattern_type: "HACK".to_string(),
590 severity: Severity::Critical,
591 author: None,
592 age_days: None,
593 commit_hash: None,
594 created_at: None,
595 file_extension: None,
596 duplicate_count: 0,
597 },
598 CodeDebtItem {
599 file_path: PathBuf::from("test.rs"),
600 line_number: 3,
601 column: 1,
602 line_content: "// mock data".to_string(),
603 pattern_type: "MOCK_STUB".to_string(),
604 severity: Severity::Low,
605 author: None,
606 age_days: None,
607 commit_hash: None,
608 created_at: None,
609 file_extension: None,
610 duplicate_count: 0,
611 },
612 ];
613
614 let scanner = CodeDebtScanner::new();
615
616 let high_items = scanner.filter_by_severity(&items, Severity::High);
618 assert_eq!(high_items.len(), 1); assert_eq!(high_items[0].pattern_type, "HACK");
620
621 let medium_items = scanner.filter_by_severity(&items, Severity::Medium);
623 assert_eq!(medium_items.len(), 2); }
625
626 #[test]
627 fn test_scan_real_directory() {
628 let temp_dir = TempDir::new().unwrap();
629
630 create_test_file(
632 temp_dir.path(),
633 "test.rs",
634 "// TODO: implement\nfn main() {\n // FIXME: broken\n println!(\"test\");\n}",
635 );
636
637 create_test_file(
638 temp_dir.path(),
639 "test.py",
640 "# TODO: add error handling\ndef test():\n # HACK: quick fix\n pass",
641 );
642
643 create_test_file(temp_dir.path(), "test.txt", "TODO: this should be ignored");
645
646 let scanner = CodeDebtScanner::new();
647 let items = scanner.scan(temp_dir.path()).unwrap();
648
649 assert_eq!(items.len(), 4);
651
652 for item in &items {
654 assert!(item.file_path.exists());
655 assert!(item.line_number > 0);
656 assert!(item.column > 0);
657 assert!(!item.line_content.is_empty());
658 }
659 }
660
661 #[test]
662 fn test_severity_ordering() {
663 assert!(Severity::Critical < Severity::High);
664 assert!(Severity::High < Severity::Medium);
665 assert!(Severity::Medium < Severity::Low);
666 }
667
668 #[test]
669 fn test_case_insensitive_patterns() {
670 let test_content = r#"
671// todo: lowercase
672// TODO: uppercase
673// ToDo: mixed case
674// FIXME: test
675// fixme: lowercase
676"#;
677
678 let scanner = CodeDebtScanner::new();
679 let file_path = Path::new("test.rs");
680 let items = CodeDebtScanner::scan_content(file_path, test_content, &scanner.patterns);
681
682 let todo_items: Vec<_> = items
683 .iter()
684 .filter(|item| item.pattern_type == "TODO")
685 .collect();
686 let fixme_items: Vec<_> = items
687 .iter()
688 .filter(|item| item.pattern_type == "FIXME")
689 .collect();
690
691 assert_eq!(todo_items.len(), 3); assert_eq!(fixme_items.len(), 2); }
694}