codedebt/
lib.rs

1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use git2::{BlameOptions, Repository};
4use ignore::WalkBuilder;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, HashSet};
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct CodeDebtItem {
13    pub file_path: PathBuf,
14    pub line_number: usize,
15    pub column: usize,
16    pub line_content: String,
17    pub pattern_type: String,
18    pub severity: Severity,
19
20    // Enhanced intelligence
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub author: Option<String>,
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub age_days: Option<i64>,
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub commit_hash: Option<String>,
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub created_at: Option<DateTime<Utc>>,
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub file_extension: Option<String>,
31    pub duplicate_count: usize,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
35pub enum Severity {
36    Critical,
37    High,
38    Medium,
39    Low,
40}
41
42#[derive(Debug, Clone)]
43pub struct Pattern {
44    pub name: String,
45    pub regex: Regex,
46    pub severity: Severity,
47}
48
49pub struct CodeDebtScanner {
50    patterns: Vec<Pattern>,
51    file_extensions: Vec<String>,
52    ignore_dirs: Vec<String>,
53    enable_git_blame: bool,
54    detect_duplicates: bool,
55    git_repo: Option<Repository>,
56}
57
58impl Default for CodeDebtScanner {
59    fn default() -> Self {
60        Self::new()
61    }
62}
63
64impl CodeDebtScanner {
65    pub fn new() -> Self {
66        let patterns = vec![
67            Pattern {
68                name: "HACK".to_string(),
69                regex: Regex::new(r"(?i)\b(HACK|XXX)\b").unwrap(),
70                severity: Severity::Critical,
71            },
72            Pattern {
73                name: "FIXME".to_string(),
74                regex: Regex::new(r"(?i)\bFIXME\b").unwrap(),
75                severity: Severity::High,
76            },
77            Pattern {
78                name: "TODO".to_string(),
79                regex: Regex::new(r"(?i)\bTODO\b").unwrap(),
80                severity: Severity::Medium,
81            },
82            Pattern {
83                name: "NOTE_FIX".to_string(),
84                regex: Regex::new(r"(?i)\bNOTE.*fix\b").unwrap(),
85                severity: Severity::Medium,
86            },
87            Pattern {
88                name: "TEMPORARY".to_string(),
89                regex: Regex::new(r"(?i)\b(temporary|temp|placeholder)\b").unwrap(),
90                severity: Severity::High,
91            },
92            Pattern {
93                name: "MOCK_STUB".to_string(),
94                regex: Regex::new(r"(?i)\b(mock|stub)\b").unwrap(),
95                severity: Severity::Low,
96            },
97            Pattern {
98                name: "PRODUCTION_DEBT".to_string(),
99                regex: Regex::new(r"(?i)(temporary|placeholder|mock).*production").unwrap(),
100                severity: Severity::Critical,
101            },
102        ];
103
104        let file_extensions = vec![
105            "rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "cc", "cxx", "h",
106            "hpp", "rb", "php", "cs", "swift", "kt", "scala", "clj", "ml", "hs", "elm", "dart",
107            "lua", "pl", "r", "jl", "nim", "zig", "v", "cr",
108        ]
109        .into_iter()
110        .map(String::from)
111        .collect();
112
113        let ignore_dirs = vec![
114            "node_modules",
115            ".git",
116            "target",
117            "dist",
118            "build",
119            ".next",
120            "vendor",
121            "__pycache__",
122            ".pytest_cache",
123            "coverage",
124            ".nyc_output",
125            "bower_components",
126        ]
127        .into_iter()
128        .map(String::from)
129        .collect();
130
131        Self {
132            patterns,
133            file_extensions,
134            ignore_dirs,
135            enable_git_blame: false,
136            detect_duplicates: false,
137            git_repo: None,
138        }
139    }
140
141    pub fn with_patterns(mut self, patterns: Vec<Pattern>) -> Self {
142        self.patterns = patterns;
143        self
144    }
145
146    pub fn with_file_extensions(mut self, extensions: Vec<String>) -> Self {
147        self.file_extensions = extensions;
148        self
149    }
150
151    pub fn with_ignore_dirs(mut self, dirs: Vec<String>) -> Self {
152        self.ignore_dirs = dirs;
153        self
154    }
155
156    pub fn with_git_blame(mut self, enable: bool) -> Self {
157        self.enable_git_blame = enable;
158        if enable {
159            // Try to open git repository
160            if let Ok(repo) = Repository::discover(".") {
161                self.git_repo = Some(repo);
162            }
163        }
164        self
165    }
166
167    pub fn with_duplicate_detection(mut self, enable: bool) -> Self {
168        self.detect_duplicates = enable;
169        self
170    }
171
172    pub fn scan<P: AsRef<Path>>(&self, root_path: P) -> Result<Vec<CodeDebtItem>> {
173        let patterns = Arc::new(&self.patterns);
174        let extensions: HashSet<String> = self.file_extensions.iter().cloned().collect();
175
176        let walker = WalkBuilder::new(&root_path)
177            .hidden(false)
178            .ignore(true)
179            .git_ignore(true)
180            .build_parallel();
181
182        let (tx, rx) = std::sync::mpsc::channel();
183
184        walker.run(|| {
185            let tx = tx.clone();
186            let patterns = Arc::clone(&patterns);
187            let extensions = extensions.clone();
188
189            Box::new(move |entry| {
190                if let Ok(entry) = entry {
191                    let path = entry.path();
192
193                    if path.is_file() {
194                        if let Some(ext) = path.extension() {
195                            if let Some(ext_str) = ext.to_str() {
196                                if extensions.contains(ext_str) {
197                                    if let Ok(content) = std::fs::read_to_string(path) {
198                                        let items = Self::scan_content(path, &content, &patterns);
199                                        for item in items {
200                                            let _ = tx.send(item);
201                                        }
202                                    }
203                                }
204                            }
205                        }
206                    }
207                }
208                ignore::WalkState::Continue
209            })
210        });
211
212        drop(tx);
213        let mut results: Vec<CodeDebtItem> = rx.iter().collect();
214
215        // Add git blame information if enabled
216        if self.enable_git_blame {
217            self.add_git_information(&mut results);
218        }
219
220        // Detect duplicates if enabled
221        if self.detect_duplicates {
222            self.detect_duplicate_patterns(&mut results);
223        }
224
225        // Add file extension information
226        self.add_file_extensions(&mut results);
227
228        results.sort_by(|a, b| {
229            a.severity
230                .cmp(&b.severity)
231                .then_with(|| a.file_path.cmp(&b.file_path))
232                .then_with(|| a.line_number.cmp(&b.line_number))
233        });
234
235        Ok(results)
236    }
237
238    fn scan_content(file_path: &Path, content: &str, patterns: &[Pattern]) -> Vec<CodeDebtItem> {
239        content
240            .lines()
241            .enumerate()
242            .flat_map(|(line_idx, line)| {
243                patterns
244                    .iter()
245                    .filter_map(|pattern| {
246                        pattern.regex.find(line).map(|m| CodeDebtItem {
247                            file_path: file_path.to_path_buf(),
248                            line_number: line_idx + 1,
249                            column: m.start() + 1,
250                            line_content: line.trim().to_string(),
251                            pattern_type: pattern.name.clone(),
252                            severity: pattern.severity.clone(),
253                            author: None,
254                            age_days: None,
255                            commit_hash: None,
256                            created_at: None,
257                            file_extension: None,
258                            duplicate_count: 0,
259                        })
260                    })
261                    .collect::<Vec<_>>()
262            })
263            .collect()
264    }
265
266    pub fn get_summary(&self, items: &[CodeDebtItem]) -> HashMap<String, usize> {
267        let mut summary = HashMap::new();
268        for item in items {
269            *summary.entry(item.pattern_type.clone()).or_insert(0) += 1;
270        }
271        summary
272    }
273
274    pub fn filter_by_severity(
275        &self,
276        items: &[CodeDebtItem],
277        min_severity: Severity,
278    ) -> Vec<CodeDebtItem> {
279        items
280            .iter()
281            .filter(|item| item.severity <= min_severity)
282            .cloned()
283            .collect()
284    }
285
286    fn add_git_information(&self, items: &mut [CodeDebtItem]) {
287        if let Some(repo) = &self.git_repo {
288            for item in items.iter_mut() {
289                if let Ok(relative_path) = item
290                    .file_path
291                    .strip_prefix(repo.workdir().unwrap_or_else(|| std::path::Path::new(".")))
292                {
293                    if let Ok(blame) =
294                        repo.blame_file(relative_path, Some(&mut BlameOptions::new()))
295                    {
296                        if let Some(hunk) = blame.get_line(item.line_number) {
297                            let sig = hunk.final_signature();
298                            let oid = hunk.final_commit_id();
299
300                            item.author = sig.name().map(|s| s.to_string());
301                            item.commit_hash = Some(oid.to_string());
302
303                            if let Ok(commit) = repo.find_commit(oid) {
304                                let timestamp = commit.time().seconds();
305                                let datetime =
306                                    DateTime::from_timestamp(timestamp, 0).unwrap_or_else(Utc::now);
307                                item.created_at = Some(datetime);
308                                let now = Utc::now();
309                                let duration = now.signed_duration_since(datetime);
310                                item.age_days = Some(duration.num_days());
311                            }
312                        }
313                    }
314                }
315            }
316        }
317    }
318
319    fn detect_duplicate_patterns(&self, items: &mut [CodeDebtItem]) {
320        let mut pattern_counts: HashMap<String, usize> = HashMap::new();
321
322        // Count occurrences of similar patterns
323        for item in items.iter() {
324            let key = format!("{}:{}", item.pattern_type, item.line_content.trim());
325            *pattern_counts.entry(key).or_insert(0) += 1;
326        }
327
328        // Update duplicate counts
329        for item in items.iter_mut() {
330            let key = format!("{}:{}", item.pattern_type, item.line_content.trim());
331            item.duplicate_count = pattern_counts.get(&key).copied().unwrap_or(0);
332        }
333    }
334
335    fn add_file_extensions(&self, items: &mut [CodeDebtItem]) {
336        for item in items.iter_mut() {
337            if let Some(ext) = item.file_path.extension() {
338                item.file_extension = ext.to_str().map(|s| s.to_string());
339            }
340        }
341    }
342
343    pub fn get_file_type_summary(&self, items: &[CodeDebtItem]) -> HashMap<String, usize> {
344        let mut summary = HashMap::new();
345        for item in items {
346            let file_type = item.file_extension.as_deref().unwrap_or("unknown");
347            *summary.entry(file_type.to_string()).or_insert(0) += 1;
348        }
349        summary
350    }
351
352    pub fn get_age_distribution(&self, items: &[CodeDebtItem]) -> HashMap<String, usize> {
353        let mut distribution = HashMap::new();
354        for item in items {
355            if let Some(age) = item.age_days {
356                let bucket = match age {
357                    0..=7 => "This week",
358                    8..=30 => "This month",
359                    31..=90 => "Last 3 months",
360                    91..=365 => "This year",
361                    _ => "Over a year",
362                };
363                *distribution.entry(bucket.to_string()).or_insert(0) += 1;
364            } else {
365                *distribution.entry("Unknown age".to_string()).or_insert(0) += 1;
366            }
367        }
368        distribution
369    }
370
371    pub fn filter_by_age(&self, items: &[CodeDebtItem], max_age_days: i64) -> Vec<CodeDebtItem> {
372        items
373            .iter()
374            .filter(|item| item.age_days.is_none_or(|age| age <= max_age_days))
375            .cloned()
376            .collect()
377    }
378
379    pub fn find_duplicates(&self, items: &[CodeDebtItem], min_count: usize) -> Vec<CodeDebtItem> {
380        items
381            .iter()
382            .filter(|item| item.duplicate_count >= min_count)
383            .cloned()
384            .collect()
385    }
386}
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391    use std::fs;
392    use tempfile::TempDir;
393
394    fn create_test_file(dir: &Path, name: &str, content: &str) -> PathBuf {
395        let file_path = dir.join(name);
396        fs::write(&file_path, content).unwrap();
397        file_path
398    }
399
400    #[test]
401    fn test_scanner_creation() {
402        let scanner = CodeDebtScanner::new();
403        assert!(!scanner.patterns.is_empty());
404        assert!(!scanner.file_extensions.is_empty());
405        assert!(!scanner.ignore_dirs.is_empty());
406    }
407
408    #[test]
409    fn test_default_patterns() {
410        let scanner = CodeDebtScanner::new();
411        let pattern_names: Vec<String> = scanner.patterns.iter().map(|p| p.name.clone()).collect();
412
413        assert!(pattern_names.contains(&"TODO".to_string()));
414        assert!(pattern_names.contains(&"FIXME".to_string()));
415        assert!(pattern_names.contains(&"HACK".to_string()));
416        assert!(pattern_names.contains(&"TEMPORARY".to_string()));
417        assert!(pattern_names.contains(&"PRODUCTION_DEBT".to_string()));
418    }
419
420    #[test]
421    fn test_scan_content() {
422        let test_content = r#"
423fn main() {
424    // TODO: implement this function
425    println!("Hello, world!");
426    // FIXME: this is broken
427    let x = 5;
428    // HACK: workaround
429    let y = x * 2;
430}
431"#;
432
433        let scanner = CodeDebtScanner::new();
434        let file_path = Path::new("test.rs");
435        let items = CodeDebtScanner::scan_content(file_path, test_content, &scanner.patterns);
436
437        assert_eq!(items.len(), 3);
438
439        // Check TODO
440        let todo_item = items
441            .iter()
442            .find(|item| item.pattern_type == "TODO")
443            .unwrap();
444        assert_eq!(todo_item.severity, Severity::Medium);
445        assert_eq!(todo_item.line_number, 3);
446
447        // Check FIXME
448        let fixme_item = items
449            .iter()
450            .find(|item| item.pattern_type == "FIXME")
451            .unwrap();
452        assert_eq!(fixme_item.severity, Severity::High);
453        assert_eq!(fixme_item.line_number, 5);
454
455        // Check HACK
456        let hack_item = items
457            .iter()
458            .find(|item| item.pattern_type == "HACK")
459            .unwrap();
460        assert_eq!(hack_item.severity, Severity::Critical);
461        assert_eq!(hack_item.line_number, 7);
462    }
463
464    #[test]
465    fn test_production_debt_pattern() {
466        let test_content = r#"
467const API_KEY = "placeholder for production";
468let temp_production_fix = true;
469"#;
470
471        let scanner = CodeDebtScanner::new();
472        let file_path = Path::new("test.js");
473        let items = CodeDebtScanner::scan_content(file_path, test_content, &scanner.patterns);
474
475        let production_debt = items
476            .iter()
477            .find(|item| item.pattern_type == "PRODUCTION_DEBT");
478        assert!(production_debt.is_some());
479        assert_eq!(production_debt.unwrap().severity, Severity::Critical);
480    }
481
482    #[test]
483    fn test_custom_patterns() {
484        let custom_patterns = vec![Pattern {
485            name: "URGENT".to_string(),
486            regex: Regex::new(r"(?i)\bURGENT\b").unwrap(),
487            severity: Severity::Critical,
488        }];
489
490        let scanner = CodeDebtScanner::new().with_patterns(custom_patterns);
491        assert_eq!(scanner.patterns.len(), 1);
492        assert_eq!(scanner.patterns[0].name, "URGENT");
493    }
494
495    #[test]
496    fn test_file_extensions_filter() {
497        let scanner =
498            CodeDebtScanner::new().with_file_extensions(vec!["rs".to_string(), "py".to_string()]);
499
500        assert_eq!(scanner.file_extensions.len(), 2);
501        assert!(scanner.file_extensions.contains(&"rs".to_string()));
502        assert!(scanner.file_extensions.contains(&"py".to_string()));
503    }
504
505    #[test]
506    fn test_ignore_dirs_filter() {
507        let custom_ignore = vec!["my_custom_dir".to_string()];
508        let scanner = CodeDebtScanner::new().with_ignore_dirs(custom_ignore);
509
510        assert!(scanner.ignore_dirs.contains(&"my_custom_dir".to_string()));
511    }
512
513    #[test]
514    fn test_get_summary() {
515        let items = vec![
516            CodeDebtItem {
517                file_path: PathBuf::from("test.rs"),
518                line_number: 1,
519                column: 1,
520                line_content: "// TODO: test".to_string(),
521                pattern_type: "TODO".to_string(),
522                severity: Severity::Medium,
523                author: None,
524                age_days: None,
525                commit_hash: None,
526                created_at: None,
527                file_extension: None,
528                duplicate_count: 0,
529            },
530            CodeDebtItem {
531                file_path: PathBuf::from("test.rs"),
532                line_number: 2,
533                column: 1,
534                line_content: "// TODO: another test".to_string(),
535                pattern_type: "TODO".to_string(),
536                severity: Severity::Medium,
537                author: None,
538                age_days: None,
539                commit_hash: None,
540                created_at: None,
541                file_extension: None,
542                duplicate_count: 0,
543            },
544            CodeDebtItem {
545                file_path: PathBuf::from("test.rs"),
546                line_number: 3,
547                column: 1,
548                line_content: "// FIXME: broken".to_string(),
549                pattern_type: "FIXME".to_string(),
550                severity: Severity::High,
551                author: None,
552                age_days: None,
553                commit_hash: None,
554                created_at: None,
555                file_extension: None,
556                duplicate_count: 0,
557            },
558        ];
559
560        let scanner = CodeDebtScanner::new();
561        let summary = scanner.get_summary(&items);
562
563        assert_eq!(summary.get("TODO"), Some(&2));
564        assert_eq!(summary.get("FIXME"), Some(&1));
565    }
566
567    #[test]
568    fn test_filter_by_severity() {
569        let items = vec![
570            CodeDebtItem {
571                file_path: PathBuf::from("test.rs"),
572                line_number: 1,
573                column: 1,
574                line_content: "// TODO: test".to_string(),
575                pattern_type: "TODO".to_string(),
576                severity: Severity::Medium,
577                author: None,
578                age_days: None,
579                commit_hash: None,
580                created_at: None,
581                file_extension: None,
582                duplicate_count: 0,
583            },
584            CodeDebtItem {
585                file_path: PathBuf::from("test.rs"),
586                line_number: 2,
587                column: 1,
588                line_content: "// HACK: critical".to_string(),
589                pattern_type: "HACK".to_string(),
590                severity: Severity::Critical,
591                author: None,
592                age_days: None,
593                commit_hash: None,
594                created_at: None,
595                file_extension: None,
596                duplicate_count: 0,
597            },
598            CodeDebtItem {
599                file_path: PathBuf::from("test.rs"),
600                line_number: 3,
601                column: 1,
602                line_content: "// mock data".to_string(),
603                pattern_type: "MOCK_STUB".to_string(),
604                severity: Severity::Low,
605                author: None,
606                age_days: None,
607                commit_hash: None,
608                created_at: None,
609                file_extension: None,
610                duplicate_count: 0,
611            },
612        ];
613
614        let scanner = CodeDebtScanner::new();
615
616        // Filter for high and above
617        let high_items = scanner.filter_by_severity(&items, Severity::High);
618        assert_eq!(high_items.len(), 1); // Only the HACK item
619        assert_eq!(high_items[0].pattern_type, "HACK");
620
621        // Filter for medium and above
622        let medium_items = scanner.filter_by_severity(&items, Severity::Medium);
623        assert_eq!(medium_items.len(), 2); // HACK and TODO
624    }
625
626    #[test]
627    fn test_scan_real_directory() {
628        let temp_dir = TempDir::new().unwrap();
629
630        // Create test files
631        create_test_file(
632            temp_dir.path(),
633            "test.rs",
634            "// TODO: implement\nfn main() {\n    // FIXME: broken\n    println!(\"test\");\n}",
635        );
636
637        create_test_file(
638            temp_dir.path(),
639            "test.py",
640            "# TODO: add error handling\ndef test():\n    # HACK: quick fix\n    pass",
641        );
642
643        // Create a file with unsupported extension (should be ignored)
644        create_test_file(temp_dir.path(), "test.txt", "TODO: this should be ignored");
645
646        let scanner = CodeDebtScanner::new();
647        let items = scanner.scan(temp_dir.path()).unwrap();
648
649        // Should find 4 items (2 from .rs file, 2 from .py file, 0 from .txt file)
650        assert_eq!(items.len(), 4);
651
652        // Check that all items have valid file paths
653        for item in &items {
654            assert!(item.file_path.exists());
655            assert!(item.line_number > 0);
656            assert!(item.column > 0);
657            assert!(!item.line_content.is_empty());
658        }
659    }
660
661    #[test]
662    fn test_severity_ordering() {
663        assert!(Severity::Critical < Severity::High);
664        assert!(Severity::High < Severity::Medium);
665        assert!(Severity::Medium < Severity::Low);
666    }
667
668    #[test]
669    fn test_case_insensitive_patterns() {
670        let test_content = r#"
671// todo: lowercase
672// TODO: uppercase
673// ToDo: mixed case
674// FIXME: test
675// fixme: lowercase
676"#;
677
678        let scanner = CodeDebtScanner::new();
679        let file_path = Path::new("test.rs");
680        let items = CodeDebtScanner::scan_content(file_path, test_content, &scanner.patterns);
681
682        let todo_items: Vec<_> = items
683            .iter()
684            .filter(|item| item.pattern_type == "TODO")
685            .collect();
686        let fixme_items: Vec<_> = items
687            .iter()
688            .filter(|item| item.pattern_type == "FIXME")
689            .collect();
690
691        assert_eq!(todo_items.len(), 3); // All variations of TODO
692        assert_eq!(fixme_items.len(), 2); // All variations of FIXME
693    }
694}