Skip to main content

garbage_code_hunter/last_words/
scanner.rs

1//! Scan source files for "last words" — TODO, FIXME, HACK, TEMP comments.
2
3use regex::Regex;
4use std::path::{Path, PathBuf};
5use walkdir::WalkDir;
6
7/// A discovered "last word" comment in the codebase.
8#[derive(Debug, Clone)]
9pub struct LastWord {
10    pub file: PathBuf,
11    pub line: usize,
12    pub kind: LastWordKind,
13    pub text: String,
14    pub age_days: Option<u64>,
15}
16
17/// The type of last-word comment.
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum LastWordKind {
20    Todo,
21    Fixme,
22    Hack,
23    Temp,
24    QuickFix,
25    Wontfix,
26    Workaround,
27    Deprecated,
28    Safety,
29}
30
31impl LastWordKind {
32    pub fn label(&self) -> &'static str {
33        match self {
34            Self::Todo => "TODO",
35            Self::Fixme => "FIXME",
36            Self::Hack => "HACK",
37            Self::Temp => "TEMP",
38            Self::QuickFix => "quick fix",
39            Self::Wontfix => "WONTFIX",
40            Self::Workaround => "workaround",
41            Self::Deprecated => "DEPRECATED",
42            Self::Safety => "SAFETY",
43        }
44    }
45
46    pub fn tombstone_quote(&self) -> &'static str {
47        match self {
48            Self::Todo => "I'll do it later",
49            Self::Fixme => "This is fine... probably",
50            Self::Hack => "Don't touch this",
51            Self::Temp => "Temporary workaround",
52            Self::QuickFix => "Quick fix for now",
53            Self::Wontfix => "Won't fix, not my problem",
54            Self::Workaround => "It works, don't ask how",
55            Self::Deprecated => "Dead code walking",
56            Self::Safety => "Unsafe but necessary",
57        }
58    }
59}
60
61/// Scan a directory for last-word comments.
62pub fn scan(path: &Path) -> Vec<LastWord> {
63    let patterns = build_patterns();
64    let mut results = Vec::new();
65
66    let entries: Vec<_> = if path.is_file() {
67        vec![path.to_path_buf()]
68    } else {
69        WalkDir::new(path)
70            .into_iter()
71            .filter_map(|e| e.ok())
72            .filter(|e| is_source_file(e.path()))
73            .map(|e| e.path().to_path_buf())
74            .collect()
75    };
76
77    for file_path in entries {
78        let content = match std::fs::read_to_string(&file_path) {
79            Ok(c) => c,
80            Err(_) => continue,
81        };
82
83        for (line_num, line) in content.lines().enumerate() {
84            let trimmed = line.trim();
85            // Skip lines that are just code, not comments
86            if !is_comment_line(trimmed) {
87                continue;
88            }
89
90            for (kind, re) in &patterns {
91                if re.is_match(trimmed) {
92                    results.push(LastWord {
93                        file: file_path.clone(),
94                        line: line_num + 1,
95                        kind: kind.clone(),
96                        text: trimmed.to_string(),
97                        age_days: None,
98                    });
99                }
100            }
101        }
102    }
103
104    results
105}
106
107/// Try to get the age of a TODO comment via git blame.
108/// Returns age in days if successful.
109pub fn try_get_age(file: &Path, line: usize) -> Option<u64> {
110    let output = std::process::Command::new("git")
111        .args([
112            "blame",
113            "-L",
114            &format!("{},{}", line, line),
115            "--porcelain",
116            &file.to_string_lossy(),
117        ])
118        .output()
119        .ok()?;
120
121    if !output.status.success() {
122        return None;
123    }
124
125    let stdout = String::from_utf8_lossy(&output.stdout);
126    for l in stdout.lines() {
127        if let Some(rest) = l.strip_prefix("committer-time ") {
128            let timestamp: u64 = rest.trim().parse().ok()?;
129            let now = std::time::SystemTime::now()
130                .duration_since(std::time::UNIX_EPOCH)
131                .ok()?
132                .as_secs();
133            return now.checked_sub(timestamp).map(|d| d / 86400);
134        }
135    }
136
137    None
138}
139
140fn build_patterns() -> Vec<(LastWordKind, Regex)> {
141    vec![
142        (
143            LastWordKind::Fixme,
144            Regex::new(r"(?i)\bFIXME\b").expect("FIXME regex is a valid hardcoded literal"),
145        ),
146        (
147            LastWordKind::Todo,
148            Regex::new(r"(?i)\bTODO\b").expect("TODO regex is a valid hardcoded literal"),
149        ),
150        (
151            LastWordKind::Hack,
152            Regex::new(r"(?i)\bHACK\b").expect("HACK regex is a valid hardcoded literal"),
153        ),
154        (
155            LastWordKind::Temp,
156            Regex::new(r"(?i)\bTEMP(ORARY)?\b").expect("TEMP regex is a valid hardcoded literal"),
157        ),
158        (
159            LastWordKind::QuickFix,
160            Regex::new(r"(?i)\bquick\s*fix\b")
161                .expect("quick-fix regex is a valid hardcoded literal"),
162        ),
163        (
164            LastWordKind::Wontfix,
165            Regex::new(r"(?i)\bWONT\s*FIX\b").expect("wontfix regex is a valid hardcoded literal"),
166        ),
167        (
168            LastWordKind::Workaround,
169            Regex::new(r"(?i)\bworkaround\b")
170                .expect("workaround regex is a valid hardcoded literal"),
171        ),
172        (
173            LastWordKind::Deprecated,
174            Regex::new(r"(?i)\bDEPRECATED?\b")
175                .expect("deprecated regex is a valid hardcoded literal"),
176        ),
177        (
178            LastWordKind::Safety,
179            Regex::new(r"(?i)\bSAFETY\b").expect("SAFETY regex is a valid hardcoded literal"),
180        ),
181    ]
182}
183
184fn is_comment_line(line: &str) -> bool {
185    line.starts_with("//")
186        || line.starts_with("/*")
187        || line.starts_with("*")
188        || line.starts_with("#")
189        || line.starts_with("<!--")
190}
191
192fn is_source_file(path: &Path) -> bool {
193    matches!(
194        path.extension().and_then(|e| e.to_str()),
195        Some(
196            "rs" | "py"
197                | "js"
198                | "ts"
199                | "go"
200                | "java"
201                | "c"
202                | "cpp"
203                | "h"
204                | "hpp"
205                | "rb"
206                | "php"
207                | "swift"
208                | "kt"
209                | "scala"
210                | "sh"
211                | "bash"
212                | "zsh"
213                | "toml"
214                | "yaml"
215                | "yml"
216                | "json"
217                | "md"
218                | "html"
219                | "css"
220                | "sql"
221        )
222    )
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    // ── kind_label ─────────────────────────────────────────────────
230
231    /// Objective: Verify all variants map to the expected label strings.
232    /// Invariants: Every LastWordKind has a unique, non-empty label.
233    #[test]
234    fn test_kind_label_all_variants() {
235        assert_eq!(LastWordKind::Todo.label(), "TODO");
236        assert_eq!(LastWordKind::Fixme.label(), "FIXME");
237        assert_eq!(LastWordKind::Hack.label(), "HACK");
238        assert_eq!(LastWordKind::Temp.label(), "TEMP");
239        assert_eq!(LastWordKind::QuickFix.label(), "quick fix");
240        assert_eq!(LastWordKind::Wontfix.label(), "WONTFIX");
241        assert_eq!(LastWordKind::Workaround.label(), "workaround");
242        assert_eq!(LastWordKind::Deprecated.label(), "DEPRECATED");
243        assert_eq!(LastWordKind::Safety.label(), "SAFETY");
244    }
245
246    // ── tombstone_quote ────────────────────────────────────────────
247
248    /// Objective: Verify all variants return a non-empty tombstone quote.
249    /// Invariants: Every quote is a unique, non-empty string (no panics).
250    #[test]
251    fn test_tombstone_quote_all_variants() {
252        for kind in &[
253            LastWordKind::Todo,
254            LastWordKind::Fixme,
255            LastWordKind::Hack,
256            LastWordKind::Temp,
257            LastWordKind::QuickFix,
258            LastWordKind::Wontfix,
259            LastWordKind::Workaround,
260            LastWordKind::Deprecated,
261            LastWordKind::Safety,
262        ] {
263            let quote = kind.tombstone_quote();
264            assert!(
265                !quote.is_empty(),
266                "{:?}.tombstone_quote() should not be empty",
267                kind
268            );
269        }
270    }
271
272    // ── is_comment_line ────────────────────────────────────────────
273
274    /// Objective: Verify all comment line prefixes are recognized.
275    /// Invariants: Lines starting with //, /*, *, #, <!-- are comments.
276    #[test]
277    fn test_is_comment_line_all_styles() {
278        assert!(is_comment_line("// TODO: fix"), "// line");
279        assert!(is_comment_line("/* FIXME */"), "/* line");
280        assert!(is_comment_line("* multiline continuation"), "* line");
281        assert!(is_comment_line("# HACK"), "# line");
282        assert!(is_comment_line("<!-- HTML comment -->"), "<!-- line");
283    }
284
285    /// Objective: Verify known non-comment lines are rejected.
286    #[test]
287    fn test_is_comment_line_non_comment() {
288        assert!(!is_comment_line("let x = 1;"), "code line");
289        assert!(!is_comment_line(""), "empty line");
290        assert!(!is_comment_line("   "), "whitespace only");
291        assert!(
292            !is_comment_line("x // inline comment is not a comment line"),
293            "code with trailing //"
294        );
295    }
296
297    // ── is_source_file ─────────────────────────────────────────────
298
299    /// Objective: Verify is_source_file returns true for all supported extensions.
300    /// Invariants: Any file with a supported extension is recognized as a source file.
301    #[test]
302    fn test_is_source_file_supported() {
303        for ext in &[
304            "rs", "py", "js", "ts", "go", "java", "c", "cpp", "h", "hpp", "rb", "php", "swift",
305            "kt", "scala", "sh", "bash", "zsh", "toml", "yaml", "yml", "json", "md", "html", "css",
306            "sql",
307        ] {
308            let path = PathBuf::from(format!("file.{}", ext));
309            assert!(
310                is_source_file(&path),
311                "file.{ext} should be recognized as source"
312            );
313        }
314    }
315
316    /// Objective: Verify is_source_file returns false for unsupported extensions.
317    #[test]
318    fn test_is_source_file_unsupported() {
319        assert!(
320            !is_source_file(Path::new("file.txt")),
321            ".txt should not be source"
322        );
323        assert!(
324            !is_source_file(Path::new("file.pdf")),
325            ".pdf should not be source"
326        );
327        assert!(
328            !is_source_file(Path::new("Makefile")),
329            "no ext should not be source"
330        );
331        assert!(
332            !is_source_file(Path::new("file.")),
333            "trailing dot should not be source"
334        );
335    }
336
337    // ── build_patterns ─────────────────────────────────────────────
338
339    /// Objective: Verify each pattern matches its expected keyword and rejects others.
340    /// Invariants: Patterns are case-insensitive; they match keywords anywhere in a line.
341    #[test]
342    fn test_build_patterns_todo() {
343        let patterns = build_patterns();
344        let todo_re = patterns
345            .iter()
346            .find(|(k, _)| *k == LastWordKind::Todo)
347            .map(|(_, r)| r)
348            .unwrap();
349        assert!(
350            todo_re.is_match("// TODO: fix me"),
351            "TODO pattern should match '// TODO: fix me'"
352        );
353        assert!(
354            todo_re.is_match("// todo: lower case"),
355            "TODO i flag: 'todo'"
356        );
357        assert!(
358            !todo_re.is_match("// todolist"),
359            "TODO should not match 'todolist' (\\b)"
360        );
361    }
362
363    /// Objective: Verify FIXME pattern works correctly.
364    #[test]
365    fn test_build_patterns_fixme() {
366        let patterns = build_patterns();
367        let fixme_re = patterns
368            .iter()
369            .find(|(k, _)| *k == LastWordKind::Fixme)
370            .map(|(_, r)| r)
371            .unwrap();
372        assert!(fixme_re.is_match("// FIXME: broken"), "FIXME should match");
373        assert!(fixme_re.is_match("# fixme"), "fixme lowercase");
374        assert!(
375            !fixme_re.is_match("// fixable"),
376            "fixme should not match 'fixable'"
377        );
378    }
379
380    /// Objective: Verify HACK pattern matches with word boundary.
381    #[test]
382    fn test_build_patterns_hack() {
383        let patterns = build_patterns();
384        let hack_re = patterns
385            .iter()
386            .find(|(k, _)| *k == LastWordKind::Hack)
387            .map(|(_, r)| r)
388            .unwrap();
389        assert!(
390            hack_re.is_match("// HACK: ugly but works"),
391            "HACK should match"
392        );
393        assert!(
394            !hack_re.is_match("// hackneyed"),
395            "HACK should not match 'hackneyed' (\\b)"
396        );
397    }
398
399    /// Objective: Verify TEMP pattern matches both "TEMP" and "TEMPORARY".
400    #[test]
401    fn test_build_patterns_temp() {
402        let patterns = build_patterns();
403        let temp_re = patterns
404            .iter()
405            .find(|(k, _)| *k == LastWordKind::Temp)
406            .map(|(_, r)| r)
407            .unwrap();
408        assert!(temp_re.is_match("// TEMP: quick fix"), "TEMP should match");
409        assert!(
410            temp_re.is_match("// TEMPORARY workaround"),
411            "TEMPORARY should match"
412        );
413        assert!(
414            !temp_re.is_match("// temperature"),
415            "TEMPORARY? with \\b should not match 'temperature'"
416        );
417    }
418
419    /// Objective: Verify quick-fix pattern matches with optional space between words.
420    #[test]
421    fn test_build_patterns_quickfix() {
422        let patterns = build_patterns();
423        let qfix_re = patterns
424            .iter()
425            .find(|(k, _)| *k == LastWordKind::QuickFix)
426            .map(|(_, r)| r)
427            .unwrap();
428        assert!(qfix_re.is_match("// quick fix"), "quick fix should match");
429        assert!(
430            qfix_re.is_match("// quickfix"),
431            "quickfix should match (\\s* allows zero)"
432        );
433        assert!(
434            qfix_re.is_match("// Quick Fix"),
435            "Quick Fix should match (i flag)"
436        );
437    }
438
439    /// Objective: Verify WONTFIX pattern matches standard form.
440    #[test]
441    fn test_build_patterns_wontfix() {
442        let patterns = build_patterns();
443        let wontfix_re = patterns
444            .iter()
445            .find(|(k, _)| *k == LastWordKind::Wontfix)
446            .map(|(_, r)| r)
447            .unwrap();
448        assert!(wontfix_re.is_match("// WONT FIX"), "WONT FIX should match");
449        assert!(
450            wontfix_re.is_match("// wontfix"),
451            "wontfix collapsed form matches (\\s* zero)"
452        );
453        assert!(
454            wontfix_re.is_match("// WONT   FIX"),
455            "multiple spaces between WONT and FIX"
456        );
457        assert!(
458            !wontfix_re.is_match("// won't fix"),
459            "should not match apostrophe form"
460        );
461    }
462
463    /// Objective: Verify WORKAROUND pattern matches.
464    #[test]
465    fn test_build_patterns_workaround() {
466        let patterns = build_patterns();
467        let work_re = patterns
468            .iter()
469            .find(|(k, _)| *k == LastWordKind::Workaround)
470            .map(|(_, r)| r)
471            .unwrap();
472        assert!(
473            work_re.is_match("// workaround: temp fix"),
474            "workaround should match"
475        );
476        assert!(work_re.is_match("/* WORKAROUND */"), "WORKAROUND uppercase");
477    }
478
479    /// Objective: Verify DEPRECATED pattern matches both "DEPRECATED" and "DEPRECATE".
480    #[test]
481    fn test_build_patterns_deprecated() {
482        let patterns = build_patterns();
483        let dep_re = patterns
484            .iter()
485            .find(|(k, _)| *k == LastWordKind::Deprecated)
486            .map(|(_, r)| r)
487            .unwrap();
488        assert!(
489            dep_re.is_match("// DEPRECATED: old code"),
490            "DEPRECATED should match"
491        );
492        assert!(
493            dep_re.is_match("// Deprecate this in v2"),
494            "DEPRECATE? should match 'Deprecate'"
495        );
496        assert!(
497            !dep_re.is_match("// depreciation"),
498            "DEPRECATE? with \\b pos should not match 'depreciation'"
499        );
500    }
501
502    /// Objective: Verify SAFETY pattern matches.
503    #[test]
504    fn test_build_patterns_safety() {
505        let patterns = build_patterns();
506        let safety_re = patterns
507            .iter()
508            .find(|(k, _)| *k == LastWordKind::Safety)
509            .map(|(_, r)| r)
510            .unwrap();
511        assert!(
512            safety_re.is_match("// SAFETY: unsafe block"),
513            "SAFETY should match"
514        );
515        assert!(safety_re.is_match("# safety: required"), "safety lowercase");
516    }
517
518    // ── scan ───────────────────────────────────────────────────────
519
520    /// Objective: Verify scan finds TODO and FIXME in a Rust file.
521    /// Invariants: Each matching comment creates one LastWord result.
522    #[test]
523    fn test_scan_finds_todos_and_fixmes() {
524        let dir = std::env::temp_dir().join("gch_last_words_test_scan");
525        let _ = std::fs::create_dir_all(&dir);
526        let file = dir.join("test.rs");
527        std::fs::write(&file, "// TODO: fix this\nfn main() {}\n// FIXME: broken\n").unwrap();
528
529        let results = scan(&dir);
530        assert!(
531            results.iter().any(|r| r.kind == LastWordKind::Todo),
532            "should find TODO"
533        );
534        assert!(
535            results.iter().any(|r| r.kind == LastWordKind::Fixme),
536            "should find FIXME"
537        );
538        assert_eq!(results.len(), 2, "exactly 2 last-words in the file");
539
540        let _ = std::fs::remove_dir_all(&dir);
541    }
542
543    /// Objective: Verify scan finds all comment types: #, //, /* */ in different files.
544    #[test]
545    fn test_scan_finds_hack_in_python() {
546        let dir = std::env::temp_dir().join("gch_last_words_test_hack");
547        let _ = std::fs::create_dir_all(&dir);
548        let py_file = dir.join("script.py");
549        std::fs::write(&py_file, "# HACK: this is terrible\nx = 1\n").unwrap();
550
551        let results = scan(&dir);
552        // Only the .py file should be scanned
553        let hacks: Vec<_> = results
554            .iter()
555            .filter(|r| r.kind == LastWordKind::Hack)
556            .collect();
557        assert_eq!(hacks.len(), 1, "should find 1 HACK in Python file");
558        assert_eq!(hacks[0].line, 1, "HACK should be on line 1");
559
560        let _ = std::fs::remove_dir_all(&dir);
561    }
562
563    /// Objective: Verify scan skips non-comment lines (code only).
564    #[test]
565    fn test_scan_skips_code_lines() {
566        let dir = std::env::temp_dir().join("gch_last_words_test_skip");
567        let _ = std::fs::create_dir_all(&dir);
568        let file = dir.join("test.rs");
569        std::fs::write(&file, "fn main() {\n    let todo = \"not a comment\";\n}\n").unwrap();
570
571        let results = scan(&dir);
572        assert!(
573            results.is_empty(),
574            "code with no comments should produce 0 results, got {}",
575            results.len()
576        );
577
578        let _ = std::fs::remove_dir_all(&dir);
579    }
580
581    /// Objective: Verify scan handles empty files without panicking.
582    #[test]
583    fn test_scan_empty_file() {
584        let dir = std::env::temp_dir().join("gch_last_words_test_empty");
585        let _ = std::fs::create_dir_all(&dir);
586        let file = dir.join("empty.rs");
587        std::fs::write(&file, "").unwrap();
588
589        let results = scan(&dir);
590        assert!(results.is_empty(), "empty file should produce 0 results");
591
592        let _ = std::fs::remove_dir_all(&dir);
593    }
594
595    /// Objective: Verify scan ignores non-source files.
596    #[test]
597    fn test_scan_ignores_unsupported_extensions() {
598        let dir = std::env::temp_dir().join("gch_last_words_test_unsup");
599        let _ = std::fs::create_dir_all(&dir);
600        let file = dir.join("readme.txt");
601        std::fs::write(&file, "// TODO: ignored").unwrap();
602
603        let results = scan(&dir);
604        assert!(
605            results.is_empty(),
606            ".txt file should be ignored, got {} results",
607            results.len()
608        );
609
610        let _ = std::fs::remove_dir_all(&dir);
611    }
612
613    /// Objective: Verify scan works on a single file (not a directory).
614    #[test]
615    fn test_scan_single_file() {
616        let dir = std::env::temp_dir().join("gch_last_words_test_single");
617        let _ = std::fs::create_dir_all(&dir);
618        let file = dir.join("single.rs");
619        std::fs::write(&file, "// TODO: single file\n// HACK: also here\n").unwrap();
620
621        let results = scan(&file);
622        assert_eq!(results.len(), 2, "single file scan should find 2 results");
623        assert!(results.iter().any(|r| r.kind == LastWordKind::Todo));
624        assert!(results.iter().any(|r| r.kind == LastWordKind::Hack));
625
626        let _ = std::fs::remove_dir_all(&dir);
627    }
628}