use std::path::Path;
use anyhow::Result;
use chrono::{DateTime, Utc};
use crate::finding::{Category, Finding, Severity};
use crate::scanner::{ScanContext, Scanner};
const HISTORY_SIZE_WARN: u64 = 10 * 1024 * 1024;
const DEBUG_SIZE_WARN: u64 = 50 * 1024 * 1024;
const BACKUP_MAX_AGE_DAYS: i64 = 30;
const BACKUP_MAX_COUNT: usize = 5;
pub struct HistoryScanner;
impl Scanner for HistoryScanner {
fn name(&self) -> &'static str {
"history"
}
fn scan(&self, ctx: &ScanContext) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
check_history_size(&ctx.root, &mut findings);
check_debug_directory(&ctx.root, &mut findings);
check_backup_files(&ctx.root, &mut findings);
check_shell_snapshots(&ctx.root, &mut findings);
Ok(findings)
}
}
fn check_history_size(root: &Path, findings: &mut Vec<Finding>) {
let path = root.join("history.jsonl");
if !path.exists() {
return;
}
let size = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
if size > HISTORY_SIZE_WARN {
findings.push(
Finding::new(
Severity::High,
Category::DataExposure,
"Conversation history file is very large",
format!(
"'{}' is {:.1} MB. Large history files increase the risk of \
credential exposure — the more history stored, the more likely \
it contains sensitive information pasted during past sessions.",
path.display(),
size as f64 / 1024.0 / 1024.0
),
&path,
"Review and periodically clear conversation history. Consider configuring \
a maximum history retention period.",
)
.with_evidence(format!("{:.1} MB", size as f64 / 1024.0 / 1024.0)),
);
}
}
fn check_debug_directory(root: &Path, findings: &mut Vec<Finding>) {
let debug_dir = root.join("debug");
if !debug_dir.is_dir() {
return;
}
let total_size: u64 = walkdir::WalkDir::new(&debug_dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter_map(|e| e.metadata().ok())
.map(|m| m.len())
.sum();
if total_size > DEBUG_SIZE_WARN {
findings.push(
Finding::new(
Severity::Medium,
Category::DataExposure,
"Debug log directory is very large",
format!(
"The debug directory '{}' contains {:.1} MB of logs. Debug logs \
often contain full conversation content, tool inputs and outputs, \
and can include credentials that were pasted into conversations.",
debug_dir.display(),
total_size as f64 / 1024.0 / 1024.0
),
&debug_dir,
"Clear old debug logs periodically. Check whether debug logging is \
enabled and disable it if not needed.",
)
.with_evidence(format!("{:.1} MB", total_size as f64 / 1024.0 / 1024.0)),
);
}
}
fn check_backup_files(root: &Path, findings: &mut Vec<Finding>) {
let backups_dir = root.join("backups");
if !backups_dir.is_dir() {
return;
}
let mut old_backups: Vec<(std::path::PathBuf, i64)> = Vec::new();
let mut total_count = 0usize;
for entry in walkdir::WalkDir::new(&backups_dir)
.max_depth(2)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
total_count += 1;
if let Ok(meta) = entry.metadata() {
if let Ok(modified) = meta.modified() {
let modified_dt: DateTime<Utc> = modified.into();
let age_days = (Utc::now() - modified_dt).num_days();
if age_days > BACKUP_MAX_AGE_DAYS {
old_backups.push((entry.path().to_path_buf(), age_days));
}
}
}
}
if !old_backups.is_empty() {
findings.push(
Finding::new(
Severity::Medium,
Category::DataExposure,
format!(
"{} backup file(s) older than {} days",
old_backups.len(),
BACKUP_MAX_AGE_DAYS
),
format!(
"Found {} backup files in '{}' that are over {} days old (oldest: {} days). \
Old backups accumulate sensitive data and are rarely needed for recovery.",
old_backups.len(),
backups_dir.display(),
BACKUP_MAX_AGE_DAYS,
old_backups
.iter()
.map(|(_, d)| d)
.max()
.copied()
.unwrap_or(0)
),
&backups_dir,
format!(
"Delete backups older than {} days. Consider configuring automatic \
backup rotation.",
BACKUP_MAX_AGE_DAYS
),
)
.with_evidence(format!("{} old files", old_backups.len())),
);
}
if total_count > BACKUP_MAX_COUNT {
findings.push(Finding::new(
Severity::Low,
Category::DataExposure,
format!("Excessive number of backup files ({})", total_count),
format!(
"The backups directory '{}' contains {} files. Keeping many backups \
increases the attack surface if the directory is ever accessed by \
an unauthorised party.",
backups_dir.display(),
total_count
),
&backups_dir,
format!(
"Limit the number of retained backups to {}. Delete older backups.",
BACKUP_MAX_COUNT
),
));
}
}
fn check_shell_snapshots(root: &Path, findings: &mut Vec<Finding>) {
let snapshots_dir = root.join("shell-snapshots");
if !snapshots_dir.is_dir() {
return;
}
let count = walkdir::WalkDir::new(&snapshots_dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.count();
if count > 0 {
findings.push(Finding::new(
Severity::Info,
Category::DataExposure,
format!("{} shell snapshot(s) stored", count),
format!(
"The directory '{}' contains {} shell snapshot file(s). Shell snapshots \
capture terminal output and may contain command output with sensitive data.",
snapshots_dir.display(),
count
),
&snapshots_dir,
"Review shell snapshots and delete any that contain sensitive output. \
Disable shell snapshot capture if not required.",
));
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn make_file(dir: &TempDir, name: &str, size_bytes: usize) {
let path = dir.path().join(name);
let data = vec![b'x'; size_bytes];
std::fs::write(path, data).unwrap();
}
#[test]
fn no_findings_on_empty_dir() {
let dir = tempfile::tempdir().unwrap();
let ctx = crate::scanner::ScanContext {
root: dir.path().to_path_buf(),
framework: crate::paths::FrameworkHint::Unknown,
};
let scanner = HistoryScanner;
let findings = scanner.scan(&ctx).unwrap();
assert!(findings.is_empty());
}
#[test]
fn detects_large_history() {
let dir = tempfile::tempdir().unwrap();
make_file(&dir, "history.jsonl", (HISTORY_SIZE_WARN + 1) as usize);
let mut findings = Vec::new();
check_history_size(dir.path(), &mut findings);
assert!(findings.iter().any(|f| f.severity == Severity::High));
}
#[test]
fn no_finding_for_small_history() {
let dir = tempfile::tempdir().unwrap();
make_file(&dir, "history.jsonl", 1024);
let mut findings = Vec::new();
check_history_size(dir.path(), &mut findings);
assert!(findings.is_empty());
}
#[test]
fn detects_excessive_backups() {
let dir = tempfile::tempdir().unwrap();
let backups = dir.path().join("backups");
std::fs::create_dir(&backups).unwrap();
for i in 0..=BACKUP_MAX_COUNT {
std::fs::write(backups.join(format!("backup_{}.json", i)), b"{}").unwrap();
}
let mut findings = Vec::new();
check_backup_files(dir.path(), &mut findings);
assert!(findings.iter().any(|f| f.title.contains("Excessive")));
}
}