Skip to main content

tryaudex_core/
audit.rs

1use std::path::PathBuf;
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6use crate::error::Result;
7use crate::session::{CloudProvider, Session};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct AuditEntry {
11    pub timestamp: DateTime<Utc>,
12    pub session_id: String,
13    /// Cloud provider for this entry (defaults to "aws" for backwards compat).
14    #[serde(default = "default_provider")]
15    pub provider: String,
16    pub event: AuditEvent,
17}
18
19fn default_provider() -> String {
20    "aws".to_string()
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24#[serde(tag = "type", rename_all = "snake_case")]
25pub enum AuditEvent {
26    SessionCreated {
27        /// "Role ARN" for AWS, "Service Account" for GCP, "Subscription" for Azure
28        role_arn: String,
29        ttl_seconds: u64,
30        budget: Option<f64>,
31        allowed_actions: Vec<String>,
32        command: Vec<String>,
33        /// AI agent/model identity (from AUDEX_AGENT_ID env var).
34        #[serde(default, skip_serializing_if = "Option::is_none")]
35        agent_id: Option<String>,
36    },
37    CredentialsIssued {
38        /// Credential identifier: AWS access key ID, GCP token prefix, or Azure token prefix.
39        /// Field name kept as `access_key_id` for backwards compatibility with existing logs.
40        access_key_id: String,
41        expires_at: DateTime<Utc>,
42    },
43    SessionEnded {
44        status: String,
45        duration_seconds: i64,
46        exit_code: Option<i32>,
47    },
48    BudgetWarning {
49        current_spend: f64,
50        limit: f64,
51    },
52    BudgetExceeded {
53        final_spend: f64,
54        limit: f64,
55    },
56    /// A cloud resource created during this session (Phase 1 of the
57    /// resource-lifecycle flow). Emitted once per resource after the wrapped
58    /// subprocess exits successfully, parsed from the command args.
59    ResourceCreated {
60        service: String,
61        resource_type: String,
62        identifier: String,
63    },
64    /// The --allow policy is advisory-only (no enforcement) for this session.
65    /// Logged when GCP CAB downscoping fails or Azure tokens cannot be scoped.
66    PolicyAdvisoryOnly {
67        reason: String,
68    },
69}
70
71/// Append-only audit logger that writes to a local JSONL file.
72pub struct AuditLog {
73    path: PathBuf,
74}
75
76impl AuditLog {
77    pub fn new() -> Result<Self> {
78        let dir = dirs::data_local_dir()
79            .unwrap_or_else(|| PathBuf::from("."))
80            .join("audex")
81            .join("audit");
82        std::fs::create_dir_all(&dir)?;
83        let path = dir.join("audit.jsonl");
84        Ok(Self { path })
85    }
86
87    /// Construct an AuditLog at a caller-provided path. Used by tests so they
88    /// don't pollute the real audit log at `~/.local/share/audex/audit/audit.jsonl`.
89    pub fn with_path(path: PathBuf) -> Result<Self> {
90        if let Some(parent) = path.parent() {
91            std::fs::create_dir_all(parent)?;
92        }
93        Ok(Self { path })
94    }
95
96    pub fn log(&self, session_id: &str, event: AuditEvent) -> Result<()> {
97        self.log_with_provider(session_id, "aws", event)
98    }
99
100    pub fn log_with_provider(
101        &self,
102        session_id: &str,
103        provider: &str,
104        event: AuditEvent,
105    ) -> Result<()> {
106        let entry = AuditEntry {
107            timestamp: Utc::now(),
108            session_id: session_id.to_string(),
109            provider: provider.to_string(),
110            event,
111        };
112        let json_line = serde_json::to_string(&entry)?;
113
114        // Redact any credential material before persisting
115        let json_line = crate::leakdetect::redact_secrets(&json_line);
116
117        // Take an exclusive OS-level lock on the audit log for the entire
118        // read-hash-then-append sequence. Without this, concurrent writers
119        // (threads OR separate tryaudex processes) would each read the same
120        // `previous_hash`, compute HMACs against it, and append — breaking
121        // the chain for every line after the first one to hit disk.
122        use fs4::fs_std::FileExt;
123        use std::io::Write;
124        let mut opts = std::fs::OpenOptions::new();
125        opts.create(true).append(true).read(true);
126        #[cfg(unix)]
127        {
128            use std::os::unix::fs::OpenOptionsExt;
129            opts.mode(0o600);
130        }
131        let mut file = opts.open(&self.path)?;
132        file.lock_exclusive()?;
133
134        // Warn once if using the hardcoded default HMAC key.
135        crate::integrity::warn_if_default_key();
136
137        // Sign with chain HMAC for tamper detection — now inside the lock.
138        let previous_hash = crate::integrity::last_chain_hash(&self.path);
139        let signed_line = crate::integrity::sign_line(&json_line, &previous_hash);
140
141        let mut output = signed_line;
142        output.push('\n');
143
144        let write_result = file.write_all(output.as_bytes());
145        // Flush to disk before releasing lock so concurrent writers
146        // cannot corrupt the HMAC chain.
147        if write_result.is_ok() {
148            let _ = file.sync_all();
149        }
150        // Always release the lock, even if the write failed.
151        let _ = FileExt::unlock(&file);
152        write_result?;
153
154        Ok(())
155    }
156
157    pub fn log_session_created(&self, session: &Session) -> Result<()> {
158        let allowed_actions: Vec<String> = match session.provider {
159            CloudProvider::Gcp => session
160                .policy
161                .actions
162                .iter()
163                .map(|a| a.to_gcp_permission())
164                .collect(),
165            CloudProvider::Azure => session
166                .policy
167                .actions
168                .iter()
169                .map(|a| a.to_azure_permission())
170                .collect(),
171            CloudProvider::Aws => session
172                .policy
173                .actions
174                .iter()
175                .map(|a| a.to_iam_action())
176                .collect(),
177        };
178
179        self.log_with_provider(
180            &session.id,
181            &session.provider.to_string(),
182            AuditEvent::SessionCreated {
183                role_arn: session.role_arn.clone(),
184                ttl_seconds: session.ttl_seconds,
185                budget: session.budget,
186                allowed_actions,
187                command: session.command.clone(),
188                agent_id: session.agent_id.clone(),
189            },
190        )
191    }
192
193    pub fn log_session_ended(&self, session: &Session, exit_code: Option<i32>) -> Result<()> {
194        let duration = (Utc::now() - session.created_at).num_seconds();
195        self.log_with_provider(
196            &session.id,
197            &session.provider.to_string(),
198            AuditEvent::SessionEnded {
199                status: session.status.to_string(),
200                duration_seconds: duration,
201                exit_code,
202            },
203        )
204    }
205
206    /// Read all audit entries, optionally filtered by session ID.
207    pub fn read(&self, session_id: Option<&str>) -> Result<Vec<AuditEntry>> {
208        self.read_filtered(session_id, None)
209    }
210
211    /// Read audit entries with optional session ID and provider filters.
212    pub fn read_filtered(
213        &self,
214        session_id: Option<&str>,
215        provider: Option<&str>,
216    ) -> Result<Vec<AuditEntry>> {
217        if !self.path.exists() {
218            return Ok(Vec::new());
219        }
220        let content = std::fs::read_to_string(&self.path)?;
221        let entries: Vec<AuditEntry> = content
222            .lines()
223            .filter(|l| !l.trim().is_empty())
224            .filter_map(|l| {
225                // Strip HMAC signature if present before parsing JSON
226                let (json_content, _) = crate::integrity::parse_line(l);
227                serde_json::from_str(json_content).ok()
228            })
229            .filter(|e: &AuditEntry| session_id.is_none_or(|id| e.session_id == id))
230            .filter(|e: &AuditEntry| provider.is_none_or(|p| e.provider == p))
231            .collect();
232        Ok(entries)
233    }
234
235    /// Read at most `max_entries` recent audit entries without loading the
236    /// entire log into memory.  Reads the last `max_bytes` of the file
237    /// (default 1 MB) and parses only those lines.
238    ///
239    /// Takes a shared file lock for the duration of the read so concurrent
240    /// writers in `log_with_provider` cannot tear a line mid-flush
241    /// (R6-H20), verifies the HMAC chain before returning any data and
242    /// refuses to serve entries from a tampered log (R6-H21), and uses a
243    /// byte-oriented tail reader so seeks landing inside a multi-byte
244    /// UTF-8 character or a pathological log with no newlines no longer
245    /// silently return zero entries (R6-H19).
246    pub fn read_recent(&self, max_entries: usize) -> Result<Vec<AuditEntry>> {
247        use fs4::fs_std::FileExt;
248        use std::io::{Read, Seek, SeekFrom};
249
250        if !self.path.exists() {
251            return Ok(Vec::new());
252        }
253
254        const MAX_TAIL_BYTES: u64 = 1_048_576; // 1 MB
255        let mut file = std::fs::File::open(&self.path)?;
256
257        // R6-H20: shared OS lock prevents reading a half-flushed line that
258        // a concurrent `log_with_provider` writer has not yet sync_all'd.
259        // The writer holds an exclusive lock across write+flush, so our
260        // shared lock will block until the writer finishes.
261        file.lock_shared()?;
262
263        // R6-H21: verify the tamper-evidence chain before returning any
264        // entries.  Previously MCP and the server's /v1/audit endpoint
265        // called this path and cheerfully served forged lines with no
266        // warning — an attacker who could write to the audit file could
267        // plant arbitrary entries visible via the admin view.  `parse_line`
268        // strips the HMAC silently, so a bad actor only had to replace
269        // the tail with unsigned JSON and wait for an admin to look.
270        //
271        // We run the full-chain verification inside the lock so a writer
272        // cannot race our check.  Audit logs are bounded by rotation so
273        // O(n) verification on an admin-facing endpoint is acceptable.
274        let verification = crate::integrity::verify_audit_log(&self.path)?;
275        if !verification.tampered_lines.is_empty() {
276            let _ = FileExt::unlock(&file);
277            return Err(crate::error::AvError::InvalidPolicy(format!(
278                "Audit log integrity check failed — {} tampered line(s) detected at {:?}. \
279                 Refusing to return recent entries. Run `audex audit verify` to investigate.",
280                verification.tampered_lines.len(),
281                verification.tampered_lines
282            )));
283        }
284
285        let file_len = file.metadata()?.len();
286
287        // R6-H19: read as bytes, locate the first full line after the
288        // tail cut-point, then decode lossily.  The old code used
289        // `read_to_string` directly after byte-wise skipping to a newline,
290        // which errored out when the 1 MB seek point landed inside a
291        // multi-byte UTF-8 character (JSON escapes/unicode in log bodies)
292        // and returned zero entries when the tail contained a single
293        // huge line with no newline at all.
294        let tail_start = file_len.saturating_sub(MAX_TAIL_BYTES);
295        file.seek(SeekFrom::Start(tail_start))?;
296        let mut bytes = Vec::with_capacity((file_len - tail_start) as usize);
297        file.read_to_end(&mut bytes)?;
298        let _ = FileExt::unlock(&file);
299
300        // When we cut the file mid-line, drop the leading partial line;
301        // when there is no newline in the slice, keep the whole thing —
302        // `from_utf8_lossy` + per-line JSON parsing will naturally drop
303        // any garbled prefix rather than silently returning nothing.
304        let slice: &[u8] = if tail_start > 0 {
305            match bytes.iter().position(|b| *b == b'\n') {
306                Some(pos) => &bytes[pos + 1..],
307                None => &bytes[..],
308            }
309        } else {
310            &bytes[..]
311        };
312
313        let tail = String::from_utf8_lossy(slice);
314        let entries: Vec<AuditEntry> = tail
315            .lines()
316            .rev()
317            .filter(|l| !l.trim().is_empty())
318            .take(max_entries)
319            .filter_map(|l| {
320                let (json_content, _) = crate::integrity::parse_line(l);
321                serde_json::from_str(json_content).ok()
322            })
323            .collect();
324
325        Ok(entries)
326    }
327
328    /// Verify the integrity of the audit log using chain HMACs.
329    pub fn verify(&self) -> Result<crate::integrity::VerificationResult> {
330        crate::integrity::verify_audit_log(&self.path)
331    }
332
333    pub fn path(&self) -> &PathBuf {
334        &self.path
335    }
336}