Skip to main content

openclaw_scan/scanner/
history.rs

1//! Data exposure scanner.
2//!
3//! Checks for excessive data retention in history files, debug logs, and
4//! backups that increase the blast radius of a breach.
5
6use std::path::Path;
7
8use anyhow::Result;
9use chrono::{DateTime, Utc};
10
11use crate::finding::{Category, Finding, Severity};
12use crate::scanner::{ScanContext, Scanner};
13
14/// 10 MB — history files larger than this warrant a HIGH finding.
15const HISTORY_SIZE_WARN: u64 = 10 * 1024 * 1024;
16/// 50 MB — debug directories larger than this warrant a MEDIUM finding.
17const DEBUG_SIZE_WARN: u64 = 50 * 1024 * 1024;
18/// Backup files older than this number of days.
19const BACKUP_MAX_AGE_DAYS: i64 = 30;
20/// More than this many backup files is itself a risk.
21const BACKUP_MAX_COUNT: usize = 5;
22
23pub struct HistoryScanner;
24
25impl Scanner for HistoryScanner {
26    fn name(&self) -> &'static str {
27        "history"
28    }
29
30    fn scan(&self, ctx: &ScanContext) -> Result<Vec<Finding>> {
31        let mut findings = Vec::new();
32
33        check_history_size(&ctx.root, &mut findings);
34        check_debug_directory(&ctx.root, &mut findings);
35        check_backup_files(&ctx.root, &mut findings);
36        check_shell_snapshots(&ctx.root, &mut findings);
37
38        Ok(findings)
39    }
40}
41
42fn check_history_size(root: &Path, findings: &mut Vec<Finding>) {
43    let path = root.join("history.jsonl");
44    if !path.exists() {
45        return;
46    }
47    let size = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
48    if size > HISTORY_SIZE_WARN {
49        findings.push(
50            Finding::new(
51                Severity::High,
52                Category::DataExposure,
53                "Conversation history file is very large",
54                format!(
55                    "'{}' is {:.1} MB. Large history files increase the risk of \
56                 credential exposure — the more history stored, the more likely \
57                 it contains sensitive information pasted during past sessions.",
58                    path.display(),
59                    size as f64 / 1024.0 / 1024.0
60                ),
61                &path,
62                "Review and periodically clear conversation history. Consider configuring \
63             a maximum history retention period.",
64            )
65            .with_evidence(format!("{:.1} MB", size as f64 / 1024.0 / 1024.0)),
66        );
67    }
68}
69
70fn check_debug_directory(root: &Path, findings: &mut Vec<Finding>) {
71    let debug_dir = root.join("debug");
72    if !debug_dir.is_dir() {
73        return;
74    }
75
76    let total_size: u64 = walkdir::WalkDir::new(&debug_dir)
77        .into_iter()
78        .filter_map(|e| e.ok())
79        .filter(|e| e.file_type().is_file())
80        .filter_map(|e| e.metadata().ok())
81        .map(|m| m.len())
82        .sum();
83
84    if total_size > DEBUG_SIZE_WARN {
85        findings.push(
86            Finding::new(
87                Severity::Medium,
88                Category::DataExposure,
89                "Debug log directory is very large",
90                format!(
91                    "The debug directory '{}' contains {:.1} MB of logs. Debug logs \
92                 often contain full conversation content, tool inputs and outputs, \
93                 and can include credentials that were pasted into conversations.",
94                    debug_dir.display(),
95                    total_size as f64 / 1024.0 / 1024.0
96                ),
97                &debug_dir,
98                "Clear old debug logs periodically. Check whether debug logging is \
99             enabled and disable it if not needed.",
100            )
101            .with_evidence(format!("{:.1} MB", total_size as f64 / 1024.0 / 1024.0)),
102        );
103    }
104}
105
106fn check_backup_files(root: &Path, findings: &mut Vec<Finding>) {
107    let backups_dir = root.join("backups");
108    if !backups_dir.is_dir() {
109        return;
110    }
111
112    let mut old_backups: Vec<(std::path::PathBuf, i64)> = Vec::new();
113    let mut total_count = 0usize;
114
115    for entry in walkdir::WalkDir::new(&backups_dir)
116        .max_depth(2)
117        .into_iter()
118        .filter_map(|e| e.ok())
119        .filter(|e| e.file_type().is_file())
120    {
121        total_count += 1;
122        if let Ok(meta) = entry.metadata() {
123            if let Ok(modified) = meta.modified() {
124                let modified_dt: DateTime<Utc> = modified.into();
125                let age_days = (Utc::now() - modified_dt).num_days();
126                if age_days > BACKUP_MAX_AGE_DAYS {
127                    old_backups.push((entry.path().to_path_buf(), age_days));
128                }
129            }
130        }
131    }
132
133    if !old_backups.is_empty() {
134        findings.push(
135            Finding::new(
136                Severity::Medium,
137                Category::DataExposure,
138                format!(
139                    "{} backup file(s) older than {} days",
140                    old_backups.len(),
141                    BACKUP_MAX_AGE_DAYS
142                ),
143                format!(
144                    "Found {} backup files in '{}' that are over {} days old (oldest: {} days). \
145                 Old backups accumulate sensitive data and are rarely needed for recovery.",
146                    old_backups.len(),
147                    backups_dir.display(),
148                    BACKUP_MAX_AGE_DAYS,
149                    old_backups
150                        .iter()
151                        .map(|(_, d)| d)
152                        .max()
153                        .copied()
154                        .unwrap_or(0)
155                ),
156                &backups_dir,
157                format!(
158                    "Delete backups older than {} days. Consider configuring automatic \
159                 backup rotation.",
160                    BACKUP_MAX_AGE_DAYS
161                ),
162            )
163            .with_evidence(format!("{} old files", old_backups.len())),
164        );
165    }
166
167    if total_count > BACKUP_MAX_COUNT {
168        findings.push(Finding::new(
169            Severity::Low,
170            Category::DataExposure,
171            format!("Excessive number of backup files ({})", total_count),
172            format!(
173                "The backups directory '{}' contains {} files. Keeping many backups \
174                 increases the attack surface if the directory is ever accessed by \
175                 an unauthorised party.",
176                backups_dir.display(),
177                total_count
178            ),
179            &backups_dir,
180            format!(
181                "Limit the number of retained backups to {}. Delete older backups.",
182                BACKUP_MAX_COUNT
183            ),
184        ));
185    }
186}
187
188fn check_shell_snapshots(root: &Path, findings: &mut Vec<Finding>) {
189    let snapshots_dir = root.join("shell-snapshots");
190    if !snapshots_dir.is_dir() {
191        return;
192    }
193
194    let count = walkdir::WalkDir::new(&snapshots_dir)
195        .into_iter()
196        .filter_map(|e| e.ok())
197        .filter(|e| e.file_type().is_file())
198        .count();
199
200    if count > 0 {
201        findings.push(Finding::new(
202            Severity::Info,
203            Category::DataExposure,
204            format!("{} shell snapshot(s) stored", count),
205            format!(
206                "The directory '{}' contains {} shell snapshot file(s). Shell snapshots \
207                 capture terminal output and may contain command output with sensitive data.",
208                snapshots_dir.display(),
209                count
210            ),
211            &snapshots_dir,
212            "Review shell snapshots and delete any that contain sensitive output. \
213             Disable shell snapshot capture if not required.",
214        ));
215    }
216}
217
218// ── Tests ─────────────────────────────────────────────────────────────────────
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use tempfile::TempDir;
224
225    fn make_file(dir: &TempDir, name: &str, size_bytes: usize) {
226        let path = dir.path().join(name);
227        let data = vec![b'x'; size_bytes];
228        std::fs::write(path, data).unwrap();
229    }
230
231    #[test]
232    fn no_findings_on_empty_dir() {
233        let dir = tempfile::tempdir().unwrap();
234        let ctx = crate::scanner::ScanContext {
235            root: dir.path().to_path_buf(),
236            framework: crate::paths::FrameworkHint::Unknown,
237        };
238        let scanner = HistoryScanner;
239        let findings = scanner.scan(&ctx).unwrap();
240        assert!(findings.is_empty());
241    }
242
243    #[test]
244    fn detects_large_history() {
245        let dir = tempfile::tempdir().unwrap();
246        make_file(&dir, "history.jsonl", (HISTORY_SIZE_WARN + 1) as usize);
247        let mut findings = Vec::new();
248        check_history_size(dir.path(), &mut findings);
249        assert!(findings.iter().any(|f| f.severity == Severity::High));
250    }
251
252    #[test]
253    fn no_finding_for_small_history() {
254        let dir = tempfile::tempdir().unwrap();
255        make_file(&dir, "history.jsonl", 1024);
256        let mut findings = Vec::new();
257        check_history_size(dir.path(), &mut findings);
258        assert!(findings.is_empty());
259    }
260
261    #[test]
262    fn detects_excessive_backups() {
263        let dir = tempfile::tempdir().unwrap();
264        let backups = dir.path().join("backups");
265        std::fs::create_dir(&backups).unwrap();
266        for i in 0..=BACKUP_MAX_COUNT {
267            std::fs::write(backups.join(format!("backup_{}.json", i)), b"{}").unwrap();
268        }
269        let mut findings = Vec::new();
270        check_backup_files(dir.path(), &mut findings);
271        assert!(findings.iter().any(|f| f.title.contains("Excessive")));
272    }
273}