ralph_workflow/pipeline/idle_timeout/file_activity.rs
1//! File activity tracking for timeout detection.
2//!
3//! This module provides infrastructure to detect when an agent is actively
4//! writing files, even when there's minimal stdout/stderr output. This prevents
5//! false timeout kills when agents are making progress through file updates.
6
7use crate::workspace::Workspace;
8use std::path::Path;
9use std::sync::OnceLock;
10use std::time::{Duration, SystemTime};
11
12fn file_age(now: SystemTime, mtime: SystemTime) -> Duration {
13 // If the filesystem reports a future mtime (clock skew, network FS), treat it
14 // as fresh activity rather than extremely old.
15 now.duration_since(mtime).unwrap_or(Duration::ZERO)
16}
17
18/// Maximum depth for recursive workspace scan.
19///
20/// Depth 0 = workspace root files only (no recursion).
21/// Depth 1 = workspace root subdirectory files (previous behaviour).
22/// Depth 8 = covers standard Rust workspace layouts (crate/src/module/submod/…).
23const MAX_SCAN_DEPTH: usize = 8;
24
25static WORKSPACE_SCAN_UNREADABLE_DIR_WARNED: OnceLock<()> = OnceLock::new();
26
27/// Recursively scan a directory for recently modified, non-noise files.
28///
29/// Returns `Ok(true)` as soon as a file younger than `timeout` is found.
30/// Excluded directories and extensions are skipped at every level.
31/// `remaining_depth` bounds worst-case traversal to prevent hangs on deep trees.
32///
33/// `#[inline(never)]` prevents this function from being merged into its caller's
34/// stack frame, keeping each frame independently bounded.
35#[inline(never)]
36fn scan_dir_recursive(
37 workspace: &dyn Workspace,
38 dir: &Path,
39 now: SystemTime,
40 timeout: Duration,
41 remaining_depth: usize,
42 is_root: bool,
43) -> std::io::Result<bool> {
44 let entries = match workspace.read_dir(dir) {
45 Ok(entries) => entries,
46 Err(e) => {
47 if is_root {
48 return Err(e);
49 }
50
51 // Subdirectories may be unreadable due to permissions or transient
52 // filesystem issues. Skipping them is fine, but we must not silently
53 // treat that as "no activity".
54 if WORKSPACE_SCAN_UNREADABLE_DIR_WARNED.set(()).is_ok() {
55 eprintln!(
56 "Warning: workspace scan skipped unreadable directory '{}' ({e}); file-activity detection may be incomplete",
57 dir.display()
58 );
59 }
60
61 return Ok(false);
62 }
63 };
64 for entry in entries {
65 let path = entry.path();
66 if entry.is_file() {
67 if FileActivityTracker::is_excluded_workspace_file(path) {
68 continue;
69 }
70 if let Some(mtime) = entry.modified() {
71 let age = file_age(now, mtime);
72 if age <= timeout {
73 return Ok(true);
74 }
75 }
76 } else if entry.is_dir() {
77 if FileActivityTracker::is_excluded_workspace_dir(path) {
78 continue;
79 }
80 if remaining_depth > 0
81 && scan_dir_recursive(
82 workspace,
83 entry.path(),
84 now,
85 timeout,
86 remaining_depth - 1,
87 false,
88 )?
89 {
90 return Ok(true);
91 }
92 }
93 }
94 Ok(false)
95}
96
97/// Tracks file modification activity for timeout detection.
98///
99/// This tracker monitors AI-generated files in the `.agent/` directory to detect
100/// ongoing work that may not produce stdout/stderr output. It tracks modification
101/// times and distinguishes meaningful AI progress from log churn and system artifacts.
102pub struct FileActivityTracker {
103 _private: (),
104}
105
106impl FileActivityTracker {
107 /// Create a new file activity tracker.
108 #[must_use]
109 pub const fn new() -> Self {
110 Self { _private: () }
111 }
112
113 /// Check if any AI-generated files have been modified within `timeout`.
114 ///
115 /// This method scans two areas for evidence of recent agent work:
116 ///
117 /// 1. **`.agent/` whitelist** – files representing meaningful AI progress
118 /// (PLAN.md, ISSUES.md, NOTES.md, STATUS.md, commit-message.txt,
119 /// `.agent/tmp/*.xml`).
120 /// 2. **Workspace recursive scan (max depth 8)** – any file outside excluded
121 /// noise directories (`.git/`, `target/`, `tmp/`, `node_modules/`,
122 /// `.agent/`) and excluded extensions (`*.log`, `*.swp`, `*.tmp`,
123 /// `*.bak`, `*~`). This detects coding work (source edits, test writes,
124 /// `Cargo.toml` changes) that produces no stdout/stderr output, including
125 /// files nested deeply inside workspace crates (e.g. `crate/src/mod/file.rs`).
126 ///
127 /// Returns `Ok(true)` if recent activity is detected, `Ok(false)` if no
128 /// recent activity, or `Err` if a required directory read fails.
129 ///
130 /// # Arguments
131 ///
132 /// * `workspace` - The workspace to read files from
133 /// * `timeout` - The recency window (typically 300 seconds)
134 ///
135 /// # Errors
136 ///
137 /// Returns error if the `.agent/` directory exists but cannot be read.
138 pub fn check_for_recent_activity(
139 &self,
140 workspace: &dyn Workspace,
141 timeout: Duration,
142 ) -> std::io::Result<bool> {
143 let now = SystemTime::now();
144 let agent_dir = Path::new(".agent");
145
146 // Check .agent/ whitelist if the directory exists.
147 if workspace.exists(agent_dir) {
148 let entries = workspace.read_dir(agent_dir)?;
149
150 for entry in &entries {
151 if !entry.is_file() {
152 continue;
153 }
154 let path = entry.path();
155 if !Self::is_ai_generated_file(path) {
156 continue;
157 }
158 let Some(mtime) = entry.modified() else {
159 continue;
160 };
161 let age = file_age(now, mtime);
162 if age <= timeout {
163 return Ok(true);
164 }
165 }
166 }
167
168 // Also check .agent/tmp/ for XML artifacts.
169 let tmp_dir = Path::new(".agent/tmp");
170 if workspace.exists(tmp_dir) {
171 if let Ok(tmp_entries) = workspace.read_dir(tmp_dir) {
172 for entry in tmp_entries {
173 if !entry.is_file() {
174 continue;
175 }
176 let path = entry.path();
177 if path.extension().is_none_or(|ext| ext != "xml") {
178 continue;
179 }
180 let Some(mtime) = entry.modified() else {
181 continue;
182 };
183 let age = file_age(now, mtime);
184 if age <= timeout {
185 return Ok(true);
186 }
187 }
188 }
189 }
190
191 // Recursively scan workspace for recently modified source files.
192 // Excludes noise directories (.git, target, tmp, node_modules, .agent)
193 // and noise extensions (*.log, *.swp, *.tmp, *.bak, *~).
194 // Short-circuits on first match for performance.
195 // The .agent/ directory is excluded here; it is handled above.
196 if scan_dir_recursive(workspace, Path::new(""), now, timeout, MAX_SCAN_DEPTH, true)? {
197 return Ok(true);
198 }
199
200 Ok(false)
201 }
202
203 /// Check if a path represents an AI-generated file that should be tracked.
204 ///
205 /// Includes:
206 /// - PLAN.md
207 /// - ISSUES.md
208 /// - NOTES.md
209 /// - STATUS.md
210 /// - commit-message.txt
211 ///
212 /// Excludes:
213 /// - *.log (log files)
214 /// - checkpoint.json (internal state)
215 /// - `start_commit` (initialization artifact)
216 /// - `review_baseline.txt` (baseline tracking)
217 /// - Temporary/editor files (.swp, .tmp, ~, .bak)
218 fn is_ai_generated_file(path: &Path) -> bool {
219 let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
220 return false;
221 };
222
223 // Exclude patterns
224 let has_excluded_ext = path.extension().is_some_and(|ext| {
225 ext.eq_ignore_ascii_case("log")
226 || ext.eq_ignore_ascii_case("swp")
227 || ext.eq_ignore_ascii_case("tmp")
228 || ext.eq_ignore_ascii_case("bak")
229 });
230
231 if has_excluded_ext
232 || file_name == "checkpoint.json"
233 || file_name == "start_commit"
234 || file_name == "review_baseline.txt"
235 || file_name.ends_with('~')
236 {
237 return false;
238 }
239
240 // Include patterns - AI-generated artifacts
241 matches!(
242 file_name,
243 "PLAN.md" | "ISSUES.md" | "NOTES.md" | "STATUS.md" | "commit-message.txt"
244 )
245 }
246
247 /// Check if a workspace-root directory should be excluded from the activity scan.
248 ///
249 /// Excludes directories that contain noise or are handled elsewhere:
250 /// - `.git/` – version-control metadata
251 /// - `target/` – Cargo build artifacts
252 /// - `tmp/` – temporary files
253 /// - `node_modules/` – npm dependencies
254 /// - `.agent/` – already handled by the dedicated whitelist scan above
255 fn is_excluded_workspace_dir(path: &Path) -> bool {
256 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
257 return false;
258 };
259 matches!(name, ".git" | "target" | "tmp" | "node_modules" | ".agent")
260 }
261
262 /// Check if a workspace file should be excluded from the activity scan.
263 ///
264 /// Excludes file types that represent noise rather than productive work:
265 /// - `*.log` – log output, append-only
266 /// - `*.swp` – Vim swap files
267 /// - `*.tmp` – generic temporaries
268 /// - `*.bak` – backup copies
269 /// - `*~` – editor backup suffix
270 fn is_excluded_workspace_file(path: &Path) -> bool {
271 let has_excluded_ext = path.extension().is_some_and(|ext| {
272 ext.eq_ignore_ascii_case("log")
273 || ext.eq_ignore_ascii_case("swp")
274 || ext.eq_ignore_ascii_case("tmp")
275 || ext.eq_ignore_ascii_case("bak")
276 });
277 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
278 has_excluded_ext || file_name.ends_with('~')
279 }
280}
281
282impl Default for FileActivityTracker {
283 fn default() -> Self {
284 Self::new()
285 }
286}