1use std::path::Path;
7
8use anyhow::Result;
9use chrono::{DateTime, Utc};
10
11use crate::finding::{Category, Finding, Severity};
12use crate::scanner::{ScanContext, Scanner};
13
14const HISTORY_SIZE_WARN: u64 = 10 * 1024 * 1024;
16const DEBUG_SIZE_WARN: u64 = 50 * 1024 * 1024;
18const BACKUP_MAX_AGE_DAYS: i64 = 30;
20const BACKUP_MAX_COUNT: usize = 5;
22
23pub struct HistoryScanner;
24
25impl Scanner for HistoryScanner {
26 fn name(&self) -> &'static str {
27 "history"
28 }
29
30 fn scan(&self, ctx: &ScanContext) -> Result<Vec<Finding>> {
31 let mut findings = Vec::new();
32
33 check_history_size(&ctx.root, &mut findings);
34 check_debug_directory(&ctx.root, &mut findings);
35 check_backup_files(&ctx.root, &mut findings);
36 check_shell_snapshots(&ctx.root, &mut findings);
37
38 Ok(findings)
39 }
40}
41
42fn check_history_size(root: &Path, findings: &mut Vec<Finding>) {
43 let path = root.join("history.jsonl");
44 if !path.exists() {
45 return;
46 }
47 let size = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
48 if size > HISTORY_SIZE_WARN {
49 findings.push(
50 Finding::new(
51 Severity::High,
52 Category::DataExposure,
53 "Conversation history file is very large",
54 format!(
55 "'{}' is {:.1} MB. Large history files increase the risk of \
56 credential exposure — the more history stored, the more likely \
57 it contains sensitive information pasted during past sessions.",
58 path.display(),
59 size as f64 / 1024.0 / 1024.0
60 ),
61 &path,
62 "Review and periodically clear conversation history. Consider configuring \
63 a maximum history retention period.",
64 )
65 .with_evidence(format!("{:.1} MB", size as f64 / 1024.0 / 1024.0)),
66 );
67 }
68}
69
70fn check_debug_directory(root: &Path, findings: &mut Vec<Finding>) {
71 let debug_dir = root.join("debug");
72 if !debug_dir.is_dir() {
73 return;
74 }
75
76 let total_size: u64 = walkdir::WalkDir::new(&debug_dir)
77 .into_iter()
78 .filter_map(|e| e.ok())
79 .filter(|e| e.file_type().is_file())
80 .filter_map(|e| e.metadata().ok())
81 .map(|m| m.len())
82 .sum();
83
84 if total_size > DEBUG_SIZE_WARN {
85 findings.push(
86 Finding::new(
87 Severity::Medium,
88 Category::DataExposure,
89 "Debug log directory is very large",
90 format!(
91 "The debug directory '{}' contains {:.1} MB of logs. Debug logs \
92 often contain full conversation content, tool inputs and outputs, \
93 and can include credentials that were pasted into conversations.",
94 debug_dir.display(),
95 total_size as f64 / 1024.0 / 1024.0
96 ),
97 &debug_dir,
98 "Clear old debug logs periodically. Check whether debug logging is \
99 enabled and disable it if not needed.",
100 )
101 .with_evidence(format!("{:.1} MB", total_size as f64 / 1024.0 / 1024.0)),
102 );
103 }
104}
105
106fn check_backup_files(root: &Path, findings: &mut Vec<Finding>) {
107 let backups_dir = root.join("backups");
108 if !backups_dir.is_dir() {
109 return;
110 }
111
112 let mut old_backups: Vec<(std::path::PathBuf, i64)> = Vec::new();
113 let mut total_count = 0usize;
114
115 for entry in walkdir::WalkDir::new(&backups_dir)
116 .max_depth(2)
117 .into_iter()
118 .filter_map(|e| e.ok())
119 .filter(|e| e.file_type().is_file())
120 {
121 total_count += 1;
122 if let Ok(meta) = entry.metadata() {
123 if let Ok(modified) = meta.modified() {
124 let modified_dt: DateTime<Utc> = modified.into();
125 let age_days = (Utc::now() - modified_dt).num_days();
126 if age_days > BACKUP_MAX_AGE_DAYS {
127 old_backups.push((entry.path().to_path_buf(), age_days));
128 }
129 }
130 }
131 }
132
133 if !old_backups.is_empty() {
134 findings.push(
135 Finding::new(
136 Severity::Medium,
137 Category::DataExposure,
138 format!(
139 "{} backup file(s) older than {} days",
140 old_backups.len(),
141 BACKUP_MAX_AGE_DAYS
142 ),
143 format!(
144 "Found {} backup files in '{}' that are over {} days old (oldest: {} days). \
145 Old backups accumulate sensitive data and are rarely needed for recovery.",
146 old_backups.len(),
147 backups_dir.display(),
148 BACKUP_MAX_AGE_DAYS,
149 old_backups
150 .iter()
151 .map(|(_, d)| d)
152 .max()
153 .copied()
154 .unwrap_or(0)
155 ),
156 &backups_dir,
157 format!(
158 "Delete backups older than {} days. Consider configuring automatic \
159 backup rotation.",
160 BACKUP_MAX_AGE_DAYS
161 ),
162 )
163 .with_evidence(format!("{} old files", old_backups.len())),
164 );
165 }
166
167 if total_count > BACKUP_MAX_COUNT {
168 findings.push(Finding::new(
169 Severity::Low,
170 Category::DataExposure,
171 format!("Excessive number of backup files ({})", total_count),
172 format!(
173 "The backups directory '{}' contains {} files. Keeping many backups \
174 increases the attack surface if the directory is ever accessed by \
175 an unauthorised party.",
176 backups_dir.display(),
177 total_count
178 ),
179 &backups_dir,
180 format!(
181 "Limit the number of retained backups to {}. Delete older backups.",
182 BACKUP_MAX_COUNT
183 ),
184 ));
185 }
186}
187
188fn check_shell_snapshots(root: &Path, findings: &mut Vec<Finding>) {
189 let snapshots_dir = root.join("shell-snapshots");
190 if !snapshots_dir.is_dir() {
191 return;
192 }
193
194 let count = walkdir::WalkDir::new(&snapshots_dir)
195 .into_iter()
196 .filter_map(|e| e.ok())
197 .filter(|e| e.file_type().is_file())
198 .count();
199
200 if count > 0 {
201 findings.push(Finding::new(
202 Severity::Info,
203 Category::DataExposure,
204 format!("{} shell snapshot(s) stored", count),
205 format!(
206 "The directory '{}' contains {} shell snapshot file(s). Shell snapshots \
207 capture terminal output and may contain command output with sensitive data.",
208 snapshots_dir.display(),
209 count
210 ),
211 &snapshots_dir,
212 "Review shell snapshots and delete any that contain sensitive output. \
213 Disable shell snapshot capture if not required.",
214 ));
215 }
216}
217
218#[cfg(test)]
221mod tests {
222 use super::*;
223 use tempfile::TempDir;
224
225 fn make_file(dir: &TempDir, name: &str, size_bytes: usize) {
226 let path = dir.path().join(name);
227 let data = vec![b'x'; size_bytes];
228 std::fs::write(path, data).unwrap();
229 }
230
231 #[test]
232 fn no_findings_on_empty_dir() {
233 let dir = tempfile::tempdir().unwrap();
234 let ctx = crate::scanner::ScanContext {
235 root: dir.path().to_path_buf(),
236 framework: crate::paths::FrameworkHint::Unknown,
237 };
238 let scanner = HistoryScanner;
239 let findings = scanner.scan(&ctx).unwrap();
240 assert!(findings.is_empty());
241 }
242
243 #[test]
244 fn detects_large_history() {
245 let dir = tempfile::tempdir().unwrap();
246 make_file(&dir, "history.jsonl", (HISTORY_SIZE_WARN + 1) as usize);
247 let mut findings = Vec::new();
248 check_history_size(dir.path(), &mut findings);
249 assert!(findings.iter().any(|f| f.severity == Severity::High));
250 }
251
252 #[test]
253 fn no_finding_for_small_history() {
254 let dir = tempfile::tempdir().unwrap();
255 make_file(&dir, "history.jsonl", 1024);
256 let mut findings = Vec::new();
257 check_history_size(dir.path(), &mut findings);
258 assert!(findings.is_empty());
259 }
260
261 #[test]
262 fn detects_excessive_backups() {
263 let dir = tempfile::tempdir().unwrap();
264 let backups = dir.path().join("backups");
265 std::fs::create_dir(&backups).unwrap();
266 for i in 0..=BACKUP_MAX_COUNT {
267 std::fs::write(backups.join(format!("backup_{}.json", i)), b"{}").unwrap();
268 }
269 let mut findings = Vec::new();
270 check_backup_files(dir.path(), &mut findings);
271 assert!(findings.iter().any(|f| f.title.contains("Excessive")));
272 }
273}