Skip to main content

ralph/commands/task/
refactor.rs

1//! Refactor task generation for large files exceeding LOC thresholds.
2//!
3//! Responsibilities:
4//! - Scan directories for Rust files exceeding LOC thresholds.
5//! - Count lines of code (excluding comments and empty lines).
6//! - Group related files based on batch mode strategy.
7//! - Generate refactoring tasks using the task builder.
8//! - Build request text and scope strings for task creation.
9//!
10//! Not handled here:
11//! - Task building (delegates to build.rs via build_task).
12//! - Task updating (see update.rs).
13//! - CLI argument parsing or command routing.
14//! - Non-Rust file scanning.
15//!
16//! Invariants/assumptions:
17//! - LOC counting excludes comments and empty lines for accurate measurement.
18//! - Hidden directories, target/, and .ralph/cache/ are skipped during scanning.
19//! - File grouping uses test file naming conventions for relationship detection.
20//! - Batch mode determines grouping strategy (Auto, Never, Aggressive).
21
22use super::{BatchMode, TaskBuildOptions, TaskBuildRefactorOptions};
23use crate::config;
24use anyhow::Result;
25use std::path::{Path, PathBuf};
26
27/// Build refactoring tasks for large files exceeding the LOC threshold.
28///
29/// Scans the specified directory for Rust files, identifies those exceeding
30/// the threshold, groups them based on batch mode, and creates tasks using
31/// the task builder.
32pub fn build_refactor_tasks(
33    resolved: &config::Resolved,
34    opts: TaskBuildRefactorOptions,
35) -> Result<()> {
36    // Determine scan path (default to repo root for generic usage)
37    let scan_path = opts
38        .path
39        .clone()
40        .unwrap_or_else(|| resolved.repo_root.clone());
41
42    // Scan for large .rs files
43    let large_files = scan_for_large_files(&scan_path, opts.threshold)?;
44
45    if large_files.is_empty() {
46        println!(
47            "No files found exceeding {} LOC threshold in {}.",
48            opts.threshold,
49            scan_path.display()
50        );
51        return Ok(());
52    }
53
54    println!(
55        "Found {} file(s) exceeding {} LOC:",
56        large_files.len(),
57        opts.threshold
58    );
59    for (path, loc) in &large_files {
60        println!("  {} ({} LOC)", path.display(), loc);
61    }
62
63    // Group files based on batch mode
64    let groups = group_files(&large_files, opts.batch);
65
66    println!("\nWill create {} task(s):", groups.len());
67    for (i, group) in groups.iter().enumerate() {
68        match &group[..] {
69            [(path, loc)] => {
70                println!("  {}. {} ({} LOC)", i + 1, path.display(), loc);
71            }
72            multiple => {
73                let total_loc: usize = multiple.iter().map(|(_, loc)| loc).sum();
74                println!(
75                    "  {}. {} files in {} ({} total LOC)",
76                    i + 1,
77                    multiple.len(),
78                    multiple[0].0.parent().unwrap_or(&multiple[0].0).display(),
79                    total_loc
80                );
81            }
82        }
83    }
84
85    if opts.dry_run {
86        println!("\nDry run - no tasks created.");
87        return Ok(());
88    }
89
90    // Create tasks for each group
91    let mut created_count = 0;
92    for group in groups {
93        let request = build_refactor_request(&group);
94        let scope = build_scope(&group);
95
96        let mut hint_tags = "refactor,large-file".to_string();
97        if !opts.extra_tags.is_empty() {
98            hint_tags.push(',');
99            hint_tags.push_str(&opts.extra_tags);
100        }
101
102        super::build_task(
103            resolved,
104            TaskBuildOptions {
105                request,
106                hint_tags,
107                hint_scope: scope,
108                runner_override: opts.runner_override.clone(),
109                model_override: opts.model_override.clone(),
110                reasoning_effort_override: opts.reasoning_effort_override,
111                runner_cli_overrides: opts.runner_cli_overrides.clone(),
112                force: opts.force,
113                repoprompt_tool_injection: opts.repoprompt_tool_injection,
114                template_hint: Some("refactor".to_string()),
115                template_target: None,
116                strict_templates: false,
117                estimated_minutes: None,
118            },
119        )?;
120        created_count += 1;
121    }
122
123    println!("\nCreated {} refactoring task(s).", created_count);
124    Ok(())
125}
126
127/// Scan directory for .rs files exceeding threshold.
128/// Returns Vec of (path, loc_count) sorted by loc descending.
129fn scan_for_large_files(root: &Path, threshold: usize) -> Result<Vec<(PathBuf, usize)>> {
130    let mut results = Vec::new();
131    scan_directory_recursive(root, root, threshold, &mut results)?;
132
133    // Sort by LOC descending (largest first)
134    results.sort_by(|a, b| b.1.cmp(&a.1));
135    Ok(results)
136}
137
138/// Recursively scan directory for Rust files.
139#[allow(clippy::only_used_in_recursion)]
140fn scan_directory_recursive(
141    root: &Path,
142    current: &Path,
143    threshold: usize,
144    results: &mut Vec<(PathBuf, usize)>,
145) -> Result<()> {
146    let entries = std::fs::read_dir(current)?;
147
148    for entry in entries {
149        let entry = entry?;
150        let path = entry.path();
151        let name = entry.file_name();
152        let name_str = name.to_string_lossy();
153
154        // Skip hidden dirs, target/, and .ralph/cache/
155        if path.is_dir() {
156            if name_str.starts_with('.') || name_str == "target" {
157                continue;
158            }
159            // Skip .ralph/cache/ to avoid scanning generated/temp files
160            if path
161                .components()
162                .any(|c| c.as_os_str() == ".ralph" || c.as_os_str() == "cache")
163            {
164                continue;
165            }
166            scan_directory_recursive(root, &path, threshold, results)?;
167        } else if path.extension().is_some_and(|ext| ext == "rs") {
168            let loc = count_lines_of_code(&path)?;
169            if loc > threshold {
170                results.push((path.to_path_buf(), loc));
171            }
172        }
173    }
174
175    Ok(())
176}
177
178/// Count non-empty, non-comment lines in a Rust file.
179fn count_lines_of_code(path: &Path) -> Result<usize> {
180    let content = std::fs::read_to_string(path)?;
181    let mut count = 0;
182    let mut in_block_comment = false;
183
184    for line in content.lines() {
185        let trimmed = line.trim();
186
187        if trimmed.is_empty() {
188            continue;
189        }
190
191        if in_block_comment {
192            if trimmed.contains("*/") {
193                in_block_comment = false;
194            }
195            continue;
196        }
197
198        if trimmed.starts_with("//") {
199            continue;
200        }
201
202        if trimmed.starts_with("/*") {
203            if !trimmed.contains("*/") {
204                in_block_comment = true;
205            }
206            continue;
207        }
208
209        count += 1;
210    }
211
212    Ok(count)
213}
214
215/// Group files based on batch mode strategy.
216fn group_files(files: &[(PathBuf, usize)], mode: BatchMode) -> Vec<Vec<(PathBuf, usize)>> {
217    match mode {
218        BatchMode::Never => files.iter().map(|f| vec![f.clone()]).collect(),
219        BatchMode::Aggressive => {
220            // Group by parent directory
221            let mut groups: std::collections::HashMap<PathBuf, Vec<(PathBuf, usize)>> =
222                std::collections::HashMap::new();
223            for (path, loc) in files {
224                let parent = path.parent().map(|p| p.to_path_buf()).unwrap_or_default();
225                groups.entry(parent).or_default().push((path.clone(), *loc));
226            }
227            groups.into_values().collect()
228        }
229        BatchMode::Auto => {
230            // Group files with similar names in same directory
231            // (e.g., test_*.rs, *_tests.rs)
232            let mut groups: Vec<Vec<(PathBuf, usize)>> = Vec::new();
233            let mut used: std::collections::HashSet<usize> = std::collections::HashSet::new();
234
235            for (i, (path, loc)) in files.iter().enumerate() {
236                if used.contains(&i) {
237                    continue;
238                }
239
240                let parent = path.parent();
241                let stem = path.file_stem().and_then(|s| s.to_str());
242
243                let mut group = vec![(path.clone(), *loc)];
244                used.insert(i);
245
246                // Look for related files
247                for (j, (other_path, other_loc)) in files.iter().enumerate().skip(i + 1) {
248                    if used.contains(&j) {
249                        continue;
250                    }
251
252                    if other_path.parent() != parent {
253                        continue;
254                    }
255
256                    let other_stem = other_path.file_stem().and_then(|s| s.to_str());
257
258                    // Check for test file relationships
259                    if let (Some(s), Some(os)) = (stem, other_stem)
260                        && is_related_file(s, os)
261                    {
262                        group.push((other_path.clone(), *other_loc));
263                        used.insert(j);
264                    }
265                }
266
267                groups.push(group);
268            }
269
270            groups
271        }
272    }
273}
274
275/// Check if two file stems are related (e.g., "foo" and "foo_tests").
276fn is_related_file(a: &str, b: &str) -> bool {
277    let test_suffixes = ["_test", "_tests", "test_"];
278
279    for suffix in &test_suffixes {
280        if a.starts_with(suffix) && b == &a[suffix.len()..] {
281            return true;
282        }
283        if b.starts_with(suffix) && a == &b[suffix.len()..] {
284            return true;
285        }
286        if a.ends_with(suffix) && b == &a[..a.len() - suffix.len()] {
287            return true;
288        }
289        if b.ends_with(suffix) && a == &b[..b.len() - suffix.len()] {
290            return true;
291        }
292    }
293
294    false
295}
296
297/// Build the request text for a refactoring task.
298fn build_refactor_request(group: &[(PathBuf, usize)]) -> String {
299    match group {
300        [(path, loc)] => {
301            format!(
302                "Refactor {} ({} LOC) to improve maintainability by splitting it into smaller, cohesive modules per AGENTS.md guidelines.",
303                path.display(),
304                loc
305            )
306        }
307        files => {
308            let total_loc: usize = files.iter().map(|(_, loc)| loc).sum();
309            let paths: Vec<String> = files.iter().map(|(p, _)| p.display().to_string()).collect();
310            format!(
311                "Refactor {} related files ({} total LOC) to improve maintainability by splitting them into smaller, cohesive modules per AGENTS.md guidelines. Files: {}",
312                files.len(),
313                total_loc,
314                paths.join(", ")
315            )
316        }
317    }
318}
319
320/// Build the scope string for a group of files.
321fn build_scope(group: &[(PathBuf, usize)]) -> String {
322    group
323        .iter()
324        .map(|(p, _)| p.display().to_string())
325        .collect::<Vec<_>>()
326        .join(",")
327}
328
329#[cfg(test)]
330mod tests {
331    use super::{build_refactor_request, build_scope, count_lines_of_code, is_related_file};
332    use std::io::Write;
333    use std::path::PathBuf;
334    use tempfile::TempDir;
335
336    #[test]
337    fn count_lines_of_code_skips_comments_and_empty() {
338        let dir = TempDir::new().unwrap();
339        let file = dir.path().join("test.rs");
340        let mut f = std::fs::File::create(&file).unwrap();
341        writeln!(f, "// comment").unwrap();
342        writeln!(f).unwrap();
343        writeln!(f, "fn main() {{").unwrap();
344        writeln!(f, "    println!(\"hello\");").unwrap();
345        writeln!(f, "}}").unwrap();
346
347        let loc = count_lines_of_code(&file).unwrap();
348        assert_eq!(loc, 3); // fn main, println, closing brace
349    }
350
351    #[test]
352    fn count_lines_of_code_handles_block_comments() {
353        let dir = TempDir::new().unwrap();
354        let file = dir.path().join("test.rs");
355        let mut f = std::fs::File::create(&file).unwrap();
356        writeln!(f, "/* block comment start").unwrap();
357        writeln!(f, "   continues here */").unwrap();
358        writeln!(f, "fn main() {{").unwrap();
359        writeln!(f, "    /* inline */ println!(\"hello\");").unwrap();
360        writeln!(f, "}}").unwrap();
361
362        let loc = count_lines_of_code(&file).unwrap();
363        assert_eq!(loc, 2); // fn main, println
364    }
365
366    #[test]
367    fn is_related_file_detects_test_pairs() {
368        assert!(is_related_file("foo", "foo_test"));
369        assert!(is_related_file("foo_test", "foo"));
370        assert!(is_related_file("test_foo", "foo"));
371        assert!(is_related_file("foo", "test_foo"));
372        assert!(is_related_file("foo_tests", "foo"));
373        assert!(is_related_file("foo", "foo_tests"));
374        assert!(!is_related_file("foo", "bar"));
375        assert!(!is_related_file("foo_test", "bar"));
376    }
377
378    #[test]
379    fn build_refactor_request_single_file() {
380        let group = vec![(PathBuf::from("src/main.rs"), 1200)];
381        let request = build_refactor_request(&group);
382        assert!(request.contains("src/main.rs"));
383        assert!(request.contains("1200 LOC"));
384        assert!(request.contains("AGENTS.md"));
385    }
386
387    #[test]
388    fn build_refactor_request_multiple_files() {
389        let group = vec![
390            (PathBuf::from("src/foo.rs"), 800),
391            (PathBuf::from("src/foo_test.rs"), 500),
392        ];
393        let request = build_refactor_request(&group);
394        assert!(request.contains("2 related files"));
395        assert!(request.contains("1300 total LOC"));
396        assert!(request.contains("src/foo.rs"));
397        assert!(request.contains("src/foo_test.rs"));
398    }
399
400    #[test]
401    fn build_scope_single_file() {
402        let group = vec![(PathBuf::from("src/main.rs"), 1200)];
403        let scope = build_scope(&group);
404        assert_eq!(scope, "src/main.rs");
405    }
406
407    #[test]
408    fn build_scope_multiple_files() {
409        let group = vec![
410            (PathBuf::from("src/foo.rs"), 800),
411            (PathBuf::from("src/bar.rs"), 500),
412        ];
413        let scope = build_scope(&group);
414        assert_eq!(scope, "src/foo.rs,src/bar.rs");
415    }
416}