Skip to main content

clawbox_proxy/
audit.rs

1//! Audit trail — structured append-only logging for proxy requests.
2
3use chrono::Utc;
4use serde::{Deserialize, Serialize};
5use std::fs::{self, OpenOptions};
6use std::io::Write;
7use std::path::PathBuf;
8use std::sync::Mutex;
9use std::time::Instant;
10
11/// Maximum audit writes per second before throttling.
12const AUDIT_MAX_WRITES_PER_SEC: usize = 100;
13/// Window size for rate limit tracking.
14const AUDIT_RATE_WINDOW_SIZE: usize = 100;
15
16/// Maximum log file size before rotation (10 MB).
17const MAX_LOG_SIZE: u64 = 10 * 1024 * 1024;
18/// Maximum number of rotated log files to keep.
19const MAX_ROTATED_FILES: u32 = 5;
20
21/// Errors from audit logging operations.
22#[derive(Debug, thiserror::Error)]
23#[non_exhaustive]
24pub enum AuditError {
25    /// I/O error writing the audit log.
26    #[error("audit I/O error: {0}")]
27    Io(#[from] std::io::Error),
28    /// Serialization error encoding an audit entry.
29    #[error("audit serialization error: {0}")]
30    Serialization(#[from] serde_json::Error),
31}
32
33/// A single audit log entry for a proxied request.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35#[non_exhaustive]
36pub struct AuditEntry {
37    /// ISO 8601 timestamp.
38    pub timestamp: String,
39    /// The request URL (query parameter values are redacted when logged).
40    pub url: String,
41    /// HTTP method.
42    pub method: String,
43    /// Response status code.
44    pub status: u16,
45    /// Request duration in milliseconds.
46    pub duration_ms: u64,
47    /// Whether the request was blocked by the allowlist.
48    pub blocked: bool,
49    /// Whether a credential leak was detected.
50    pub leak_detected: bool,
51    /// Domain for which a credential was injected, if any.
52    pub credential_injected: Option<String>,
53}
54
55impl AuditEntry {
56    /// Create a new entry with the current timestamp.
57    /// Create a new audit entry for a proxy request.
58    pub fn new(url: String, method: String) -> Self {
59        Self {
60            timestamp: Utc::now().to_rfc3339(),
61            url,
62            method,
63            status: 0,
64            duration_ms: 0,
65            blocked: false,
66            leak_detected: false,
67            credential_injected: None,
68        }
69    }
70}
71
72/// Redact query parameter values in a URL, replacing them with `[REDACTED]`.
73fn redact_query_params(url: &str) -> String {
74    match url::Url::parse(url) {
75        Ok(parsed) => {
76            let pairs: Vec<(String, String)> = parsed
77                .query_pairs()
78                .map(|(k, _v)| (k.into_owned(), "[REDACTED]".to_string()))
79                .collect();
80            if pairs.is_empty() {
81                return url.to_string();
82            }
83            // Build query string manually to avoid percent-encoding the brackets
84            let query = pairs
85                .iter()
86                .map(|(k, v)| format!("{}={}", k, v))
87                .collect::<Vec<_>>()
88                .join("&");
89            let mut result = parsed.clone();
90            result.set_query(Some(&query));
91            result.to_string()
92        }
93        Err(_) => url.to_string(),
94    }
95}
96
97/// Append-only structured log that writes JSON lines to a file.
98///
99/// Supports automatic log rotation: when the file exceeds 10 MB,
100/// existing logs are rotated (`{name}.1` through `{name}.5`) and
101/// a fresh file is started.
102#[non_exhaustive]
103pub struct AuditLog {
104    path: PathBuf,
105    /// Timestamps of recent writes for rate limiting.
106    recent_writes: Mutex<Vec<Instant>>,
107    /// Count of entries dropped due to rate limiting since last logged warning.
108    dropped_count: Mutex<u64>,
109}
110
111impl AuditLog {
112    /// Create a new audit log at the given path.
113    /// Create a new audit log writing to the given file path.
114    pub fn new(path: impl Into<PathBuf>) -> Self {
115        Self {
116            path: path.into(),
117            recent_writes: Mutex::new(Vec::with_capacity(AUDIT_RATE_WINDOW_SIZE)),
118            dropped_count: Mutex::new(0),
119        }
120    }
121
122    /// Check if we are within the rate limit. Returns true if the write should proceed.
123    fn check_rate_limit(&self) -> bool {
124        let now = Instant::now();
125        let mut recent = self.recent_writes.lock().unwrap_or_else(|e| e.into_inner());
126        // Remove entries older than 1 second
127        recent.retain(|t| now.duration_since(*t).as_secs_f64() < 1.0);
128        if recent.len() >= AUDIT_MAX_WRITES_PER_SEC {
129            return false;
130        }
131        recent.push(now);
132        true
133    }
134
135    /// Rotate log files: `path` → `path.1`, `path.1` → `path.2`, etc.
136    /// Keeps at most `MAX_ROTATED_FILES` rotated files.
137    fn rotate(&self) -> Result<(), AuditError> {
138        // Remove the oldest rotated file if it exists
139        let oldest = format!("{}.{}", self.path.display(), MAX_ROTATED_FILES);
140        let _ = fs::remove_file(&oldest);
141
142        // Shift existing rotated files up by one
143        for i in (1..MAX_ROTATED_FILES).rev() {
144            let from = format!("{}.{}", self.path.display(), i);
145            let to = format!("{}.{}", self.path.display(), i + 1);
146            if std::path::Path::new(&from).exists() {
147                fs::rename(&from, &to)?;
148            }
149        }
150
151        // Rotate current file to .1
152        let first = format!("{}.1", self.path.display());
153        if self.path.exists() {
154            fs::rename(&self.path, &first)?;
155        }
156
157        Ok(())
158    }
159
160    /// Record an audit entry by appending a JSON line.
161    /// Query parameter values in the URL are redacted before logging.
162    /// If the log file exceeds 10 MB, it is rotated first.
163    pub fn record(&self, entry: &AuditEntry) -> Result<(), AuditError> {
164        if !self.check_rate_limit() {
165            let mut dropped = self.dropped_count.lock().unwrap_or_else(|e| e.into_inner());
166            *dropped += 1;
167            if *dropped == 1 || (*dropped).is_multiple_of(1000) {
168                tracing::warn!(
169                    dropped = *dropped,
170                    "Audit log rate limit exceeded, entries dropped"
171                );
172            }
173            return Ok(());
174        }
175        // Flush any pending dropped count
176        {
177            let mut dropped = self.dropped_count.lock().unwrap_or_else(|e| e.into_inner());
178            if *dropped > 0 {
179                tracing::info!(
180                    dropped = *dropped,
181                    "Audit log rate limit recovered, total entries dropped"
182                );
183                *dropped = 0;
184            }
185        }
186
187        if let Some(parent) = self.path.parent() {
188            fs::create_dir_all(parent)?;
189        }
190
191        // Check if rotation is needed
192        if let Ok(meta) = fs::metadata(&self.path)
193            && meta.len() >= MAX_LOG_SIZE
194        {
195            self.rotate()?;
196        }
197
198        // Redact query params in the URL before logging
199        let mut redacted_entry = entry.clone();
200        redacted_entry.url = redact_query_params(&entry.url);
201
202        let mut file = OpenOptions::new()
203            .create(true)
204            .append(true)
205            .open(&self.path)?;
206        let line = serde_json::to_string(&redacted_entry)?;
207        writeln!(file, "{}", line)?;
208        Ok(())
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use std::fs;
216
217    fn temp_path(name: &str) -> std::path::PathBuf {
218        std::env::temp_dir().join(format!("clawbox_test_{}_{}", name, std::process::id()))
219    }
220
221    #[test]
222    fn test_audit_log_roundtrip() {
223        let path = temp_path("audit.jsonl");
224        let log = AuditLog::new(&path);
225
226        let mut entry = AuditEntry::new("https://api.github.com/repos".into(), "GET".into());
227        entry.status = 200;
228        entry.duration_ms = 42;
229        entry.credential_injected = Some("GITHUB_TOKEN".into());
230
231        log.record(&entry).unwrap();
232        log.record(&entry).unwrap();
233
234        let content = fs::read_to_string(&path).unwrap();
235        let lines: Vec<&str> = content.trim().lines().collect();
236        assert_eq!(lines.len(), 2);
237
238        let parsed: AuditEntry = serde_json::from_str(lines[0]).unwrap();
239        assert_eq!(parsed.status, 200);
240        assert_eq!(parsed.credential_injected.as_deref(), Some("GITHUB_TOKEN"));
241    }
242
243    #[test]
244    fn test_query_params_redacted() {
245        let path = temp_path("audit_redact.jsonl");
246        let log = AuditLog::new(&path);
247
248        let entry = AuditEntry::new(
249            "https://api.example.com/data?token=secret123&user=admin".into(),
250            "GET".into(),
251        );
252        log.record(&entry).unwrap();
253
254        let content = fs::read_to_string(&path).unwrap();
255        assert!(!content.contains("secret123"));
256        assert!(!content.contains("admin"));
257        assert!(content.contains("[REDACTED]"));
258        assert!(content.contains("token"));
259        assert!(content.contains("user"));
260    }
261
262    #[test]
263    fn test_url_without_query_unchanged() {
264        let url = "https://api.github.com/repos";
265        assert_eq!(redact_query_params(url), url);
266    }
267
268    #[test]
269    fn test_log_rotation() {
270        let path = temp_path("audit_rotate.jsonl");
271        let log = AuditLog::new(&path);
272
273        // Write enough to trigger rotation (just test the rotation logic directly)
274        log.rotate().unwrap();
275
276        // After rotation, original file should be gone (it didn't exist)
277        // Just verify it doesn't panic
278    }
279
280    #[test]
281    fn test_audit_error_display() {
282        let err = AuditError::Io(std::io::Error::new(
283            std::io::ErrorKind::PermissionDenied,
284            "nope",
285        ));
286        assert!(err.to_string().contains("audit I/O error"));
287    }
288
289    #[test]
290    fn test_log_rotation_with_large_file() {
291        let path = temp_path("audit_rot_large.jsonl");
292        let log = AuditLog::new(&path);
293
294        // Create a file larger than MAX_LOG_SIZE (10MB)
295        {
296            let mut f = std::fs::OpenOptions::new()
297                .create(true)
298                .write(true)
299                .truncate(true)
300                .open(&path)
301                .unwrap();
302            use std::io::Write;
303            let big_line = "x".repeat(1024);
304            for _ in 0..(11 * 1024) {
305                writeln!(f, "{}", big_line).unwrap();
306            }
307        }
308
309        // Now record should trigger rotation
310        let entry = AuditEntry::new("https://example.com".into(), "GET".into());
311        log.record(&entry).unwrap();
312
313        // Original file should be small now (just the new entry)
314        let meta = fs::metadata(&path).unwrap();
315        assert!(
316            meta.len() < 1024,
317            "After rotation, main file should be small, got {}",
318            meta.len()
319        );
320
321        // Rotated file should exist
322        let rotated = format!("{}.1", path.display());
323        assert!(
324            std::path::Path::new(&rotated).exists(),
325            "Rotated file .1 should exist"
326        );
327
328        // Cleanup
329        let _ = fs::remove_file(&path);
330        let _ = fs::remove_file(&rotated);
331    }
332
333    #[test]
334    fn test_audit_entry_new_has_timestamp() {
335        let entry = AuditEntry::new("https://example.com".into(), "POST".into());
336        assert!(!entry.timestamp.is_empty());
337        assert_eq!(entry.method, "POST");
338        assert_eq!(entry.status, 0);
339        assert!(!entry.blocked);
340        assert!(!entry.leak_detected);
341    }
342}