Skip to main content

ralph_workflow/pipeline/idle_timeout/
file_activity.rs

1//! File activity tracking for timeout detection.
2//!
3//! This module provides infrastructure to detect when an agent is actively
4//! writing files, even when there's minimal stdout/stderr output. This prevents
5//! false timeout kills when agents are making progress through file updates.
6
7use crate::workspace::Workspace;
8use std::path::Path;
9use std::sync::OnceLock;
10use std::time::{Duration, SystemTime};
11
12fn file_age(now: SystemTime, mtime: SystemTime) -> Duration {
13    // If the filesystem reports a future mtime (clock skew, network FS), treat it
14    // as fresh activity rather than extremely old.
15    now.duration_since(mtime).unwrap_or(Duration::ZERO)
16}
17
18/// Maximum depth for recursive workspace scan.
19///
20/// Depth 0 = workspace root files only (no recursion).
21/// Depth 1 = workspace root subdirectory files (previous behaviour).
22/// Depth 8 = covers standard Rust workspace layouts (crate/src/module/submod/…).
23const MAX_SCAN_DEPTH: usize = 8;
24
25static WORKSPACE_SCAN_UNREADABLE_DIR_WARNED: OnceLock<()> = OnceLock::new();
26
27/// Recursively scan a directory for recently modified, non-noise files.
28///
29/// Returns `Ok(true)` as soon as a file younger than `timeout` is found.
30/// Excluded directories and extensions are skipped at every level.
31/// `remaining_depth` bounds worst-case traversal to prevent hangs on deep trees.
32///
33/// `#[inline(never)]` prevents this function from being merged into its caller's
34/// stack frame, keeping each frame independently bounded.
35#[inline(never)]
36fn scan_dir_recursive(
37    workspace: &dyn Workspace,
38    dir: &Path,
39    now: SystemTime,
40    timeout: Duration,
41    remaining_depth: usize,
42    is_root: bool,
43) -> std::io::Result<bool> {
44    let entries = match workspace.read_dir(dir) {
45        Ok(entries) => entries,
46        Err(e) => {
47            if is_root {
48                return Err(e);
49            }
50
51            // Subdirectories may be unreadable due to permissions or transient
52            // filesystem issues. Skipping them is fine, but we must not silently
53            // treat that as "no activity".
54            if WORKSPACE_SCAN_UNREADABLE_DIR_WARNED.set(()).is_ok() {
55                eprintln!(
56                    "Warning: workspace scan skipped unreadable directory '{}' ({e}); file-activity detection may be incomplete",
57                    dir.display()
58                );
59            }
60
61            return Ok(false);
62        }
63    };
64    for entry in entries {
65        let path = entry.path();
66        if entry.is_file() {
67            if FileActivityTracker::is_excluded_workspace_file(path) {
68                continue;
69            }
70            if let Some(mtime) = entry.modified() {
71                let age = file_age(now, mtime);
72                if age <= timeout {
73                    return Ok(true);
74                }
75            }
76        } else if entry.is_dir() {
77            if FileActivityTracker::is_excluded_workspace_dir(path) {
78                continue;
79            }
80            if remaining_depth > 0
81                && scan_dir_recursive(
82                    workspace,
83                    entry.path(),
84                    now,
85                    timeout,
86                    remaining_depth - 1,
87                    false,
88                )?
89            {
90                return Ok(true);
91            }
92        }
93    }
94    Ok(false)
95}
96
97/// Tracks file modification activity for timeout detection.
98///
99/// This tracker monitors AI-generated files in the `.agent/` directory to detect
100/// ongoing work that may not produce stdout/stderr output. It tracks modification
101/// times and distinguishes meaningful AI progress from log churn and system artifacts.
102pub struct FileActivityTracker {
103    _private: (),
104}
105
106impl FileActivityTracker {
107    /// Create a new file activity tracker.
108    #[must_use]
109    pub const fn new() -> Self {
110        Self { _private: () }
111    }
112
113    /// Check if any AI-generated files have been modified within `timeout`.
114    ///
115    /// This method scans two areas for evidence of recent agent work:
116    ///
117    /// 1. **`.agent/` whitelist** – files representing meaningful AI progress
118    ///    (PLAN.md, ISSUES.md, NOTES.md, STATUS.md, commit-message.txt,
119    ///    `.agent/tmp/*.xml`).
120    /// 2. **Workspace recursive scan (max depth 8)** – any file outside excluded
121    ///    noise directories (`.git/`, `target/`, `tmp/`, `node_modules/`,
122    ///    `.agent/`) and excluded extensions (`*.log`, `*.swp`, `*.tmp`,
123    ///    `*.bak`, `*~`). This detects coding work (source edits, test writes,
124    ///    `Cargo.toml` changes) that produces no stdout/stderr output, including
125    ///    files nested deeply inside workspace crates (e.g. `crate/src/mod/file.rs`).
126    ///
127    /// Returns `Ok(true)` if recent activity is detected, `Ok(false)` if no
128    /// recent activity, or `Err` if a required directory read fails.
129    ///
130    /// # Arguments
131    ///
132    /// * `workspace` - The workspace to read files from
133    /// * `timeout` - The recency window (typically 300 seconds)
134    ///
135    /// # Errors
136    ///
137    /// Returns error if the `.agent/` directory exists but cannot be read.
138    pub fn check_for_recent_activity(
139        &self,
140        workspace: &dyn Workspace,
141        timeout: Duration,
142    ) -> std::io::Result<bool> {
143        let now = SystemTime::now();
144        let agent_dir = Path::new(".agent");
145
146        // Check .agent/ whitelist if the directory exists.
147        if workspace.exists(agent_dir) {
148            let entries = workspace.read_dir(agent_dir)?;
149
150            for entry in &entries {
151                if !entry.is_file() {
152                    continue;
153                }
154                let path = entry.path();
155                if !Self::is_ai_generated_file(path) {
156                    continue;
157                }
158                let Some(mtime) = entry.modified() else {
159                    continue;
160                };
161                let age = file_age(now, mtime);
162                if age <= timeout {
163                    return Ok(true);
164                }
165            }
166        }
167
168        // Also check .agent/tmp/ for XML artifacts.
169        let tmp_dir = Path::new(".agent/tmp");
170        if workspace.exists(tmp_dir) {
171            if let Ok(tmp_entries) = workspace.read_dir(tmp_dir) {
172                for entry in tmp_entries {
173                    if !entry.is_file() {
174                        continue;
175                    }
176                    let path = entry.path();
177                    if path.extension().is_none_or(|ext| ext != "xml") {
178                        continue;
179                    }
180                    let Some(mtime) = entry.modified() else {
181                        continue;
182                    };
183                    let age = file_age(now, mtime);
184                    if age <= timeout {
185                        return Ok(true);
186                    }
187                }
188            }
189        }
190
191        // Recursively scan workspace for recently modified source files.
192        // Excludes noise directories (.git, target, tmp, node_modules, .agent)
193        // and noise extensions (*.log, *.swp, *.tmp, *.bak, *~).
194        // Short-circuits on first match for performance.
195        // The .agent/ directory is excluded here; it is handled above.
196        if scan_dir_recursive(workspace, Path::new(""), now, timeout, MAX_SCAN_DEPTH, true)? {
197            return Ok(true);
198        }
199
200        Ok(false)
201    }
202
203    /// Check if a path represents an AI-generated file that should be tracked.
204    ///
205    /// Includes:
206    /// - PLAN.md
207    /// - ISSUES.md
208    /// - NOTES.md
209    /// - STATUS.md
210    /// - commit-message.txt
211    ///
212    /// Excludes:
213    /// - *.log (log files)
214    /// - checkpoint.json (internal state)
215    /// - `start_commit` (initialization artifact)
216    /// - `review_baseline.txt` (baseline tracking)
217    /// - Temporary/editor files (.swp, .tmp, ~, .bak)
218    fn is_ai_generated_file(path: &Path) -> bool {
219        let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
220            return false;
221        };
222
223        // Exclude patterns
224        let has_excluded_ext = path.extension().is_some_and(|ext| {
225            ext.eq_ignore_ascii_case("log")
226                || ext.eq_ignore_ascii_case("swp")
227                || ext.eq_ignore_ascii_case("tmp")
228                || ext.eq_ignore_ascii_case("bak")
229        });
230
231        if has_excluded_ext
232            || file_name == "checkpoint.json"
233            || file_name == "start_commit"
234            || file_name == "review_baseline.txt"
235            || file_name.ends_with('~')
236        {
237            return false;
238        }
239
240        // Include patterns - AI-generated artifacts
241        matches!(
242            file_name,
243            "PLAN.md" | "ISSUES.md" | "NOTES.md" | "STATUS.md" | "commit-message.txt"
244        )
245    }
246
247    /// Check if a workspace-root directory should be excluded from the activity scan.
248    ///
249    /// Excludes directories that contain noise or are handled elsewhere:
250    /// - `.git/` – version-control metadata
251    /// - `target/` – Cargo build artifacts
252    /// - `tmp/` – temporary files
253    /// - `node_modules/` – npm dependencies
254    /// - `.agent/` – already handled by the dedicated whitelist scan above
255    fn is_excluded_workspace_dir(path: &Path) -> bool {
256        let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
257            return false;
258        };
259        matches!(name, ".git" | "target" | "tmp" | "node_modules" | ".agent")
260    }
261
262    /// Check if a workspace file should be excluded from the activity scan.
263    ///
264    /// Excludes file types that represent noise rather than productive work:
265    /// - `*.log` – log output, append-only
266    /// - `*.swp` – Vim swap files
267    /// - `*.tmp` – generic temporaries
268    /// - `*.bak` – backup copies
269    /// - `*~` – editor backup suffix
270    fn is_excluded_workspace_file(path: &Path) -> bool {
271        let has_excluded_ext = path.extension().is_some_and(|ext| {
272            ext.eq_ignore_ascii_case("log")
273                || ext.eq_ignore_ascii_case("swp")
274                || ext.eq_ignore_ascii_case("tmp")
275                || ext.eq_ignore_ascii_case("bak")
276        });
277        let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
278        has_excluded_ext || file_name.ends_with('~')
279    }
280}
281
282impl Default for FileActivityTracker {
283    fn default() -> Self {
284        Self::new()
285    }
286}