openclaw-scan 0.1.1

Security scanner for agentic AI framework installations (OpenClaw, Claude Code, and compatible)
Documentation
//! Data exposure scanner.
//!
//! Checks for excessive data retention in history files, debug logs, and
//! backups that increase the blast radius of a breach.

use std::path::Path;

use anyhow::Result;
use chrono::{DateTime, Utc};

use crate::finding::{Category, Finding, Severity};
use crate::scanner::{ScanContext, Scanner};

/// 10 MB — history files larger than this warrant a HIGH finding.
const HISTORY_SIZE_WARN: u64 = 10 * 1024 * 1024;
/// 50 MB — debug directories larger than this warrant a MEDIUM finding.
const DEBUG_SIZE_WARN: u64 = 50 * 1024 * 1024;
/// Backup files older than this number of days.
const BACKUP_MAX_AGE_DAYS: i64 = 30;
/// More than this many backup files is itself a risk.
const BACKUP_MAX_COUNT: usize = 5;

pub struct HistoryScanner;

impl Scanner for HistoryScanner {
    fn name(&self) -> &'static str {
        "history"
    }

    fn scan(&self, ctx: &ScanContext) -> Result<Vec<Finding>> {
        let mut findings = Vec::new();

        check_history_size(&ctx.root, &mut findings);
        check_debug_directory(&ctx.root, &mut findings);
        check_backup_files(&ctx.root, &mut findings);
        check_shell_snapshots(&ctx.root, &mut findings);

        Ok(findings)
    }
}

fn check_history_size(root: &Path, findings: &mut Vec<Finding>) {
    let path = root.join("history.jsonl");
    if !path.exists() {
        return;
    }
    let size = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
    if size > HISTORY_SIZE_WARN {
        findings.push(
            Finding::new(
                Severity::High,
                Category::DataExposure,
                "Conversation history file is very large",
                format!(
                    "'{}' is {:.1} MB. Large history files increase the risk of \
                 credential exposure — the more history stored, the more likely \
                 it contains sensitive information pasted during past sessions.",
                    path.display(),
                    size as f64 / 1024.0 / 1024.0
                ),
                &path,
                "Review and periodically clear conversation history. Consider configuring \
             a maximum history retention period.",
            )
            .with_evidence(format!("{:.1} MB", size as f64 / 1024.0 / 1024.0)),
        );
    }
}

fn check_debug_directory(root: &Path, findings: &mut Vec<Finding>) {
    let debug_dir = root.join("debug");
    if !debug_dir.is_dir() {
        return;
    }

    let total_size: u64 = walkdir::WalkDir::new(&debug_dir)
        .into_iter()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_file())
        .filter_map(|e| e.metadata().ok())
        .map(|m| m.len())
        .sum();

    if total_size > DEBUG_SIZE_WARN {
        findings.push(
            Finding::new(
                Severity::Medium,
                Category::DataExposure,
                "Debug log directory is very large",
                format!(
                    "The debug directory '{}' contains {:.1} MB of logs. Debug logs \
                 often contain full conversation content, tool inputs and outputs, \
                 and can include credentials that were pasted into conversations.",
                    debug_dir.display(),
                    total_size as f64 / 1024.0 / 1024.0
                ),
                &debug_dir,
                "Clear old debug logs periodically. Check whether debug logging is \
             enabled and disable it if not needed.",
            )
            .with_evidence(format!("{:.1} MB", total_size as f64 / 1024.0 / 1024.0)),
        );
    }
}

fn check_backup_files(root: &Path, findings: &mut Vec<Finding>) {
    let backups_dir = root.join("backups");
    if !backups_dir.is_dir() {
        return;
    }

    let mut old_backups: Vec<(std::path::PathBuf, i64)> = Vec::new();
    let mut total_count = 0usize;

    for entry in walkdir::WalkDir::new(&backups_dir)
        .max_depth(2)
        .into_iter()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_file())
    {
        total_count += 1;
        if let Ok(meta) = entry.metadata() {
            if let Ok(modified) = meta.modified() {
                let modified_dt: DateTime<Utc> = modified.into();
                let age_days = (Utc::now() - modified_dt).num_days();
                if age_days > BACKUP_MAX_AGE_DAYS {
                    old_backups.push((entry.path().to_path_buf(), age_days));
                }
            }
        }
    }

    if !old_backups.is_empty() {
        findings.push(
            Finding::new(
                Severity::Medium,
                Category::DataExposure,
                format!(
                    "{} backup file(s) older than {} days",
                    old_backups.len(),
                    BACKUP_MAX_AGE_DAYS
                ),
                format!(
                    "Found {} backup files in '{}' that are over {} days old (oldest: {} days). \
                 Old backups accumulate sensitive data and are rarely needed for recovery.",
                    old_backups.len(),
                    backups_dir.display(),
                    BACKUP_MAX_AGE_DAYS,
                    old_backups
                        .iter()
                        .map(|(_, d)| d)
                        .max()
                        .copied()
                        .unwrap_or(0)
                ),
                &backups_dir,
                format!(
                    "Delete backups older than {} days. Consider configuring automatic \
                 backup rotation.",
                    BACKUP_MAX_AGE_DAYS
                ),
            )
            .with_evidence(format!("{} old files", old_backups.len())),
        );
    }

    if total_count > BACKUP_MAX_COUNT {
        findings.push(Finding::new(
            Severity::Low,
            Category::DataExposure,
            format!("Excessive number of backup files ({})", total_count),
            format!(
                "The backups directory '{}' contains {} files. Keeping many backups \
                 increases the attack surface if the directory is ever accessed by \
                 an unauthorised party.",
                backups_dir.display(),
                total_count
            ),
            &backups_dir,
            format!(
                "Limit the number of retained backups to {}. Delete older backups.",
                BACKUP_MAX_COUNT
            ),
        ));
    }
}

fn check_shell_snapshots(root: &Path, findings: &mut Vec<Finding>) {
    let snapshots_dir = root.join("shell-snapshots");
    if !snapshots_dir.is_dir() {
        return;
    }

    let count = walkdir::WalkDir::new(&snapshots_dir)
        .into_iter()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_file())
        .count();

    if count > 0 {
        findings.push(Finding::new(
            Severity::Info,
            Category::DataExposure,
            format!("{} shell snapshot(s) stored", count),
            format!(
                "The directory '{}' contains {} shell snapshot file(s). Shell snapshots \
                 capture terminal output and may contain command output with sensitive data.",
                snapshots_dir.display(),
                count
            ),
            &snapshots_dir,
            "Review shell snapshots and delete any that contain sensitive output. \
             Disable shell snapshot capture if not required.",
        ));
    }
}

// ── Tests ─────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    fn make_file(dir: &TempDir, name: &str, size_bytes: usize) {
        let path = dir.path().join(name);
        let data = vec![b'x'; size_bytes];
        std::fs::write(path, data).unwrap();
    }

    #[test]
    fn no_findings_on_empty_dir() {
        let dir = tempfile::tempdir().unwrap();
        let ctx = crate::scanner::ScanContext {
            root: dir.path().to_path_buf(),
            framework: crate::paths::FrameworkHint::Unknown,
        };
        let scanner = HistoryScanner;
        let findings = scanner.scan(&ctx).unwrap();
        assert!(findings.is_empty());
    }

    #[test]
    fn detects_large_history() {
        let dir = tempfile::tempdir().unwrap();
        make_file(&dir, "history.jsonl", (HISTORY_SIZE_WARN + 1) as usize);
        let mut findings = Vec::new();
        check_history_size(dir.path(), &mut findings);
        assert!(findings.iter().any(|f| f.severity == Severity::High));
    }

    #[test]
    fn no_finding_for_small_history() {
        let dir = tempfile::tempdir().unwrap();
        make_file(&dir, "history.jsonl", 1024);
        let mut findings = Vec::new();
        check_history_size(dir.path(), &mut findings);
        assert!(findings.is_empty());
    }

    #[test]
    fn detects_excessive_backups() {
        let dir = tempfile::tempdir().unwrap();
        let backups = dir.path().join("backups");
        std::fs::create_dir(&backups).unwrap();
        for i in 0..=BACKUP_MAX_COUNT {
            std::fs::write(backups.join(format!("backup_{}.json", i)), b"{}").unwrap();
        }
        let mut findings = Vec::new();
        check_backup_files(dir.path(), &mut findings);
        assert!(findings.iter().any(|f| f.title.contains("Excessive")));
    }
}