Skip to main content

reformat_core/
refs.rs

1//! Reference scanning and fixing for broken file references
2//!
3//! This module provides functionality to scan codebases for references to
4//! moved/renamed files and generate fixes for those references.
5
6use crate::changes::ChangeRecord;
7use aho_corasick::AhoCorasick;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::fs;
11use std::path::{Path, PathBuf};
12use walkdir::WalkDir;
13
14/// A proposed fix for a broken reference
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct ReferenceFix {
17    /// File containing the reference
18    pub file: String,
19    /// Line number (1-indexed)
20    pub line: usize,
21    /// Column number (1-indexed)
22    pub column: usize,
23    /// The line content with context
24    pub context: String,
25    /// The old reference that needs to be fixed
26    pub old_reference: String,
27    /// The new reference to replace it with
28    pub new_reference: String,
29}
30
31/// Collection of fixes to be applied
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct FixRecord {
34    /// Source of the changes that caused these fixes
35    pub generated_from: String,
36    /// ISO 8601 timestamp of when the scan was performed
37    pub timestamp: String,
38    /// Directories that were scanned
39    pub scan_directories: Vec<String>,
40    /// List of proposed fixes
41    pub fixes: Vec<ReferenceFix>,
42}
43
44impl FixRecord {
45    /// Creates a new fix record
46    pub fn new(generated_from: &str, scan_directories: &[PathBuf]) -> Self {
47        let timestamp = chrono::Utc::now().to_rfc3339();
48        FixRecord {
49            generated_from: generated_from.to_string(),
50            timestamp,
51            scan_directories: scan_directories
52                .iter()
53                .map(|p| p.to_string_lossy().to_string())
54                .collect(),
55            fixes: Vec::new(),
56        }
57    }
58
59    /// Returns true if there are no fixes
60    pub fn is_empty(&self) -> bool {
61        self.fixes.is_empty()
62    }
63
64    /// Returns the number of fixes
65    pub fn len(&self) -> usize {
66        self.fixes.len()
67    }
68
69    /// Writes the fix record to a JSON file
70    pub fn write_to_file(&self, path: &Path) -> crate::Result<()> {
71        let json = serde_json::to_string_pretty(self)?;
72        fs::write(path, json)?;
73        Ok(())
74    }
75
76    /// Reads a fix record from a JSON file
77    pub fn read_from_file(path: &Path) -> crate::Result<Self> {
78        let json = fs::read_to_string(path)?;
79        let record: FixRecord = serde_json::from_str(&json)?;
80        Ok(record)
81    }
82}
83
84/// Options for reference scanning
85#[derive(Debug, Clone)]
86pub struct ScanOptions {
87    /// File extensions to scan (empty means all text files)
88    pub extensions: Vec<String>,
89    /// Directories/patterns to exclude from scanning
90    pub exclude_patterns: Vec<String>,
91    /// Whether to scan recursively
92    pub recursive: bool,
93    /// Whether to print verbose output during scanning
94    pub verbose: bool,
95}
96
97impl Default for ScanOptions {
98    fn default() -> Self {
99        ScanOptions {
100            extensions: vec![
101                ".go".to_string(),
102                ".py".to_string(),
103                ".js".to_string(),
104                ".ts".to_string(),
105                ".jsx".to_string(),
106                ".tsx".to_string(),
107                ".rs".to_string(),
108                ".java".to_string(),
109                ".c".to_string(),
110                ".cpp".to_string(),
111                ".h".to_string(),
112                ".hpp".to_string(),
113                ".html".to_string(),
114                ".tmpl".to_string(),
115                ".yaml".to_string(),
116                ".yml".to_string(),
117                ".json".to_string(),
118                ".toml".to_string(),
119                ".xml".to_string(),
120                ".md".to_string(),
121                ".txt".to_string(),
122                ".cfg".to_string(),
123                ".conf".to_string(),
124                ".ini".to_string(),
125            ],
126            exclude_patterns: vec![
127                ".git".to_string(),
128                "node_modules".to_string(),
129                "target".to_string(),
130                "vendor".to_string(),
131                "__pycache__".to_string(),
132                ".venv".to_string(),
133                "dist".to_string(),
134                "build".to_string(),
135            ],
136            recursive: true,
137            verbose: false,
138        }
139    }
140}
141
142/// Reference scanner for finding broken references after file moves
143pub struct ReferenceScanner {
144    options: ScanOptions,
145    /// Map of old filename -> new path
146    file_moves: HashMap<String, String>,
147    /// Aho-Corasick automaton for O(n) multi-pattern matching
148    automaton: AhoCorasick,
149    /// Ordered list of patterns (index matches automaton pattern indices)
150    patterns: Vec<String>,
151}
152
153impl ReferenceScanner {
154    /// Creates a new reference scanner from a change record
155    pub fn from_change_record(record: &ChangeRecord, options: ScanOptions) -> Self {
156        let mut file_moves = HashMap::new();
157
158        for (from, to) in record.file_moves() {
159            // Extract just the filename from the 'from' path
160            let from_filename = Path::new(from)
161                .file_name()
162                .and_then(|n| n.to_str())
163                .unwrap_or(from);
164
165            file_moves.insert(from_filename.to_string(), to.to_string());
166
167            // Also add the full path as a key
168            if from != from_filename {
169                file_moves.insert(from.to_string(), to.to_string());
170            }
171        }
172
173        Self::new(file_moves, options)
174    }
175
176    /// Creates a scanner from a mapping of old -> new paths
177    pub fn new(file_moves: HashMap<String, String>, options: ScanOptions) -> Self {
178        // Build the Aho-Corasick automaton for O(n) multi-pattern matching
179        let patterns: Vec<String> = file_moves.keys().cloned().collect();
180        let automaton =
181            AhoCorasick::new(&patterns).expect("Failed to build Aho-Corasick automaton");
182
183        ReferenceScanner {
184            options,
185            file_moves,
186            automaton,
187            patterns,
188        }
189    }
190
191    /// Checks if a directory entry should be excluded from scanning
192    /// Used with filter_entry to prune entire subtrees before descending
193    fn should_include_entry(
194        entry: &walkdir::DirEntry,
195        exclude_patterns: &[String],
196        verbose: bool,
197    ) -> bool {
198        let name = match entry.file_name().to_str() {
199            Some(n) => n,
200            None => return false, // Skip entries with invalid UTF-8 names
201        };
202
203        // Skip hidden files/directories
204        if name.starts_with('.') {
205            if verbose && entry.file_type().is_dir() {
206                eprintln!("  [skip] {} (hidden)", entry.path().display());
207            }
208            return false;
209        }
210
211        // Skip excluded patterns
212        if exclude_patterns.iter().any(|p| p == name) {
213            if verbose && entry.file_type().is_dir() {
214                eprintln!(
215                    "  [skip] {} (excluded pattern: {})",
216                    entry.path().display(),
217                    name
218                );
219            }
220            return false;
221        }
222
223        true
224    }
225
226    /// Checks if a file should be scanned based on extension
227    fn should_scan_file(&self, path: &Path) -> bool {
228        if self.options.extensions.is_empty() {
229            return true;
230        }
231
232        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
233            let ext_with_dot = format!(".{}", ext);
234            self.options.extensions.iter().any(|e| e == &ext_with_dot)
235        } else {
236            false
237        }
238    }
239
240    /// Scans a file for references to moved files using Aho-Corasick for O(n) matching
241    fn scan_file(&self, path: &Path) -> crate::Result<Vec<ReferenceFix>> {
242        let content = fs::read_to_string(path)?;
243
244        if self.patterns.is_empty() {
245            return Ok(Vec::new());
246        }
247
248        // Build line index for efficient line/column lookup
249        let line_starts: Vec<usize> = std::iter::once(0)
250            .chain(content.match_indices('\n').map(|(i, _)| i + 1))
251            .collect();
252
253        let mut fixes = Vec::new();
254        let file_path_str = path.to_string_lossy().to_string();
255
256        // Single pass through the file using Aho-Corasick
257        for mat in self.automaton.find_iter(&content) {
258            let pattern_idx = mat.pattern().as_usize();
259            let old_ref = &self.patterns[pattern_idx];
260            let new_ref = match self.file_moves.get(old_ref) {
261                Some(r) => r,
262                None => continue,
263            };
264
265            // Binary search to find line number
266            let byte_pos = mat.start();
267            let line_idx = line_starts.partition_point(|&start| start <= byte_pos) - 1;
268            let line_start = line_starts[line_idx];
269            let column = byte_pos - line_start;
270
271            // Extract line content for context
272            let line_end = line_starts
273                .get(line_idx + 1)
274                .map(|&s| s.saturating_sub(1))
275                .unwrap_or(content.len());
276            let line_content = &content[line_start..line_end];
277
278            fixes.push(ReferenceFix {
279                file: file_path_str.clone(),
280                line: line_idx + 1,
281                column: column + 1,
282                context: line_content.trim().to_string(),
283                old_reference: old_ref.clone(),
284                new_reference: new_ref.clone(),
285            });
286        }
287
288        Ok(fixes)
289    }
290
291    /// Scans directories for broken references
292    pub fn scan(&self, directories: &[PathBuf]) -> crate::Result<FixRecord> {
293        let mut fix_record = FixRecord::new("changes.json", directories);
294        let verbose = self.options.verbose;
295        let mut files_scanned = 0;
296
297        for dir in directories {
298            if !dir.exists() {
299                if verbose {
300                    eprintln!("[scan] Directory does not exist: {}", dir.display());
301                }
302                continue;
303            }
304
305            if verbose {
306                eprintln!("[scan] Starting scan of: {}", dir.display());
307            }
308
309            let walker = if self.options.recursive {
310                WalkDir::new(dir)
311            } else {
312                WalkDir::new(dir).max_depth(1)
313            };
314
315            // Use filter_entry to prune excluded directories BEFORE descending into them.
316            // This prevents walking into node_modules, .git, target, etc. entirely,
317            // rather than entering them and then skipping files one by one.
318            let exclude_patterns = &self.options.exclude_patterns;
319            let walker = walker
320                .into_iter()
321                .filter_entry(|e| Self::should_include_entry(e, exclude_patterns, verbose));
322
323            for entry in walker.filter_map(|e| e.ok()) {
324                let path = entry.path();
325
326                // Print when entering a new directory
327                if verbose && entry.file_type().is_dir() {
328                    eprintln!("[scan] Entering directory: {}", path.display());
329                    continue;
330                }
331
332                if !path.is_file() {
333                    continue;
334                }
335
336                if !self.should_scan_file(path) {
337                    if verbose {
338                        eprintln!("  [skip] {} (extension not in scan list)", path.display());
339                    }
340                    continue;
341                }
342
343                if verbose {
344                    eprintln!("  [file] {}", path.display());
345                }
346                files_scanned += 1;
347
348                match self.scan_file(path) {
349                    Ok(fixes) => {
350                        if verbose && !fixes.is_empty() {
351                            eprintln!("    -> Found {} reference(s)", fixes.len());
352                        }
353                        fix_record.fixes.extend(fixes);
354                    }
355                    Err(e) => {
356                        if verbose {
357                            eprintln!("    -> Error: {}", e);
358                        }
359                        log::debug!("Skipping {}: {}", path.display(), e);
360                    }
361                }
362            }
363        }
364
365        if verbose {
366            eprintln!(
367                "[scan] Complete. Scanned {} files, found {} references.",
368                files_scanned,
369                fix_record.fixes.len()
370            );
371        }
372
373        // Deduplicate fixes (same file/line might have multiple matches)
374        fix_record
375            .fixes
376            .sort_by(|a, b| (&a.file, a.line, a.column).cmp(&(&b.file, b.line, b.column)));
377        fix_record.fixes.dedup_by(|a, b| {
378            a.file == b.file && a.line == b.line && a.old_reference == b.old_reference
379        });
380
381        Ok(fix_record)
382    }
383}
384
385/// Applies fixes from a fix record
386pub struct ReferenceFixer;
387
388impl ReferenceFixer {
389    /// Applies all fixes from a fix record
390    pub fn apply_fixes(fix_record: &FixRecord) -> crate::Result<ApplyResult> {
391        let mut result = ApplyResult::default();
392
393        // Group fixes by file
394        let mut fixes_by_file: HashMap<&str, Vec<&ReferenceFix>> = HashMap::new();
395        for fix in &fix_record.fixes {
396            fixes_by_file.entry(&fix.file).or_default().push(fix);
397        }
398
399        for (file_path, fixes) in fixes_by_file {
400            match Self::apply_fixes_to_file(Path::new(file_path), &fixes) {
401                Ok(count) => {
402                    result.files_modified += 1;
403                    result.references_fixed += count;
404                }
405                Err(e) => {
406                    result.errors.push(format!("{}: {}", file_path, e));
407                }
408            }
409        }
410
411        Ok(result)
412    }
413
414    /// Applies fixes to a single file
415    fn apply_fixes_to_file(path: &Path, fixes: &[&ReferenceFix]) -> crate::Result<usize> {
416        let content = fs::read_to_string(path)?;
417        let mut new_content = content.clone();
418        let mut fixed_count = 0;
419
420        // Apply fixes (we need to be careful about overlapping replacements)
421        for fix in fixes {
422            let old = &fix.old_reference;
423            let new = &fix.new_reference;
424
425            if new_content.contains(old) {
426                new_content = new_content.replace(old, new);
427                fixed_count += 1;
428            }
429        }
430
431        if new_content != content {
432            fs::write(path, new_content)?;
433        }
434
435        Ok(fixed_count)
436    }
437
438    /// Performs a dry run, returning what would be changed
439    pub fn dry_run(fix_record: &FixRecord) -> Vec<String> {
440        fix_record
441            .fixes
442            .iter()
443            .map(|fix| {
444                format!(
445                    "{}:{}: '{}' -> '{}'",
446                    fix.file, fix.line, fix.old_reference, fix.new_reference
447                )
448            })
449            .collect()
450    }
451}
452
453/// Result of applying fixes
454#[derive(Debug, Default)]
455pub struct ApplyResult {
456    /// Number of files modified
457    pub files_modified: usize,
458    /// Number of references fixed
459    pub references_fixed: usize,
460    /// Errors encountered
461    pub errors: Vec<String>,
462}
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467    use std::sync::atomic::{AtomicU64, Ordering};
468
469    static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);
470
471    fn create_test_dir(name: &str) -> PathBuf {
472        let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst);
473        let test_dir = std::env::temp_dir().join(format!(
474            "reformat_refs_{}_{}_{}",
475            name,
476            std::process::id(),
477            counter
478        ));
479        let _ = fs::remove_dir_all(&test_dir);
480        fs::create_dir_all(&test_dir).unwrap();
481        test_dir
482    }
483
484    #[test]
485    fn test_find_reference_quoted() {
486        let test_dir = create_test_dir("quoted");
487
488        let mut moves = HashMap::new();
489        moves.insert("old.tmpl".to_string(), "new/old.tmpl".to_string());
490
491        let scanner = ReferenceScanner::new(moves, ScanOptions::default());
492
493        // Test with double quotes
494        let file1 = test_dir.join("test1.go");
495        fs::write(&file1, r#"include "old.tmpl""#).unwrap();
496        let fixes = scanner.scan_file(&file1).unwrap();
497        assert_eq!(fixes.len(), 1);
498
499        // Test with single quotes
500        let file2 = test_dir.join("test2.go");
501        fs::write(&file2, r#"include 'old.tmpl'"#).unwrap();
502        let fixes = scanner.scan_file(&file2).unwrap();
503        assert_eq!(fixes.len(), 1);
504
505        // Test with colon prefix
506        let file3 = test_dir.join("test3.yaml");
507        fs::write(&file3, "template: old.tmpl").unwrap();
508        let fixes = scanner.scan_file(&file3).unwrap();
509        assert_eq!(fixes.len(), 1);
510
511        let _ = fs::remove_dir_all(&test_dir);
512    }
513
514    #[test]
515    fn test_scan_file() {
516        let test_dir = create_test_dir("scan");
517
518        // Create a file with references
519        let test_file = test_dir.join("handler.go");
520        fs::write(
521            &test_file,
522            r#"
523package main
524
525func render() {
526    t := template.ParseFiles("wbs_create.tmpl")
527    t2 := template.ParseFiles("wbs_delete.tmpl")
528}
529"#,
530        )
531        .unwrap();
532
533        let mut moves = HashMap::new();
534        moves.insert("wbs_create.tmpl".to_string(), "wbs/create.tmpl".to_string());
535        moves.insert("wbs_delete.tmpl".to_string(), "wbs/delete.tmpl".to_string());
536
537        let scanner = ReferenceScanner::new(moves, ScanOptions::default());
538        let fixes = scanner.scan_file(&test_file).unwrap();
539
540        assert_eq!(fixes.len(), 2);
541        assert_eq!(fixes[0].old_reference, "wbs_create.tmpl");
542        assert_eq!(fixes[0].new_reference, "wbs/create.tmpl");
543
544        let _ = fs::remove_dir_all(&test_dir);
545    }
546
547    #[test]
548    fn test_scan_directories() {
549        let test_dir = create_test_dir("scandir");
550
551        // Create files with references
552        fs::write(
553            test_dir.join("main.go"),
554            r#"
555include "old_file.tmpl"
556"#,
557        )
558        .unwrap();
559
560        fs::write(
561            test_dir.join("config.yaml"),
562            r#"
563template: old_file.tmpl
564"#,
565        )
566        .unwrap();
567
568        let mut moves = HashMap::new();
569        moves.insert(
570            "old_file.tmpl".to_string(),
571            "templates/file.tmpl".to_string(),
572        );
573
574        let scanner = ReferenceScanner::new(moves, ScanOptions::default());
575        let fix_record = scanner.scan(&[test_dir.clone()]).unwrap();
576
577        assert_eq!(fix_record.len(), 2);
578
579        let _ = fs::remove_dir_all(&test_dir);
580    }
581
582    #[test]
583    fn test_apply_fixes() {
584        let test_dir = create_test_dir("apply");
585
586        let test_file = test_dir.join("test.go");
587        fs::write(&test_file, r#"include "old.tmpl""#).unwrap();
588
589        let fix_record = FixRecord {
590            generated_from: "test".to_string(),
591            timestamp: "2026-01-15T00:00:00Z".to_string(),
592            scan_directories: vec![test_dir.to_string_lossy().to_string()],
593            fixes: vec![ReferenceFix {
594                file: test_file.to_string_lossy().to_string(),
595                line: 1,
596                column: 10,
597                context: r#"include "old.tmpl""#.to_string(),
598                old_reference: "old.tmpl".to_string(),
599                new_reference: "new/old.tmpl".to_string(),
600            }],
601        };
602
603        let result = ReferenceFixer::apply_fixes(&fix_record).unwrap();
604        assert_eq!(result.files_modified, 1);
605        assert_eq!(result.references_fixed, 1);
606
607        let content = fs::read_to_string(&test_file).unwrap();
608        assert!(content.contains("new/old.tmpl"));
609        assert!(!content.contains(r#""old.tmpl""#));
610
611        let _ = fs::remove_dir_all(&test_dir);
612    }
613
614    #[test]
615    fn test_fix_record_serialization() {
616        let fix_record = FixRecord {
617            generated_from: "changes.json".to_string(),
618            timestamp: "2026-01-15T00:00:00Z".to_string(),
619            scan_directories: vec!["/tmp/src".to_string()],
620            fixes: vec![ReferenceFix {
621                file: "/tmp/src/main.go".to_string(),
622                line: 10,
623                column: 15,
624                context: r#"include "old.tmpl""#.to_string(),
625                old_reference: "old.tmpl".to_string(),
626                new_reference: "new/old.tmpl".to_string(),
627            }],
628        };
629
630        let json = serde_json::to_string_pretty(&fix_record).unwrap();
631        assert!(json.contains("\"generated_from\": \"changes.json\""));
632        assert!(json.contains("\"old_reference\": \"old.tmpl\""));
633
634        let parsed: FixRecord = serde_json::from_str(&json).unwrap();
635        assert_eq!(parsed.fixes.len(), 1);
636    }
637
638    #[test]
639    fn test_exclude_patterns() {
640        let test_dir = create_test_dir("exclude");
641
642        // Create a directory structure with excluded directories
643        let node_modules = test_dir.join("node_modules");
644        let git_dir = test_dir.join(".git");
645        let src_dir = test_dir.join("src");
646        fs::create_dir_all(&node_modules).unwrap();
647        fs::create_dir_all(&git_dir).unwrap();
648        fs::create_dir_all(&src_dir).unwrap();
649
650        // Create files in each directory that reference "old.tmpl"
651        fs::write(node_modules.join("index.js"), "require('old.tmpl')").unwrap();
652        fs::write(git_dir.join("config"), "path = old.tmpl").unwrap();
653        fs::write(src_dir.join("main.rs"), r#"include!("old.tmpl")"#).unwrap();
654
655        let mut moves = HashMap::new();
656        moves.insert("old.tmpl".to_string(), "new/old.tmpl".to_string());
657
658        let scanner = ReferenceScanner::new(moves, ScanOptions::default());
659        let fix_record = scanner.scan(&[test_dir.clone()]).unwrap();
660
661        // Only src/main.rs should be scanned - node_modules and .git should be excluded
662        assert_eq!(fix_record.len(), 1);
663        assert!(fix_record.fixes[0].file.contains("src"));
664
665        let _ = fs::remove_dir_all(&test_dir);
666    }
667}