Skip to main content

apcore_cli/security/
audit.rs

1// apcore-cli — Audit logger.
2// Protocol spec: SEC-01 (AuditLogger)
3
4use std::io::{BufWriter, Write};
5use std::path::PathBuf;
6
7use chrono::Utc;
8use serde_json::{json, Value};
9use sha2::{Digest, Sha256};
10use thiserror::Error;
11
12// ---------------------------------------------------------------------------
13// Internal helpers
14// ---------------------------------------------------------------------------
15
16/// Serialize `v` to a compact, deterministically sorted JSON string.
17///
18/// Spec: each language serializes to sorted-key JSON before hashing so that
19/// equivalent input dicts produce the same hash regardless of insertion order.
20fn sorted_json(v: &Value) -> String {
21    match v {
22        Value::Object(map) => {
23            let sorted: std::collections::BTreeMap<_, _> = map.iter().collect();
24            let pairs: Vec<String> = sorted
25                .iter()
26                .map(|(k, val)| format!("{}:{}", serde_json::json!(k), sorted_json(val)))
27                .collect();
28            format!("{{{}}}", pairs.join(","))
29        }
30        other => other.to_string(),
31    }
32}
33
34// ---------------------------------------------------------------------------
35// AuditLogger
36// ---------------------------------------------------------------------------
37
38/// Append-only audit logger that records each module execution to a JSONL file.
39///
40/// When constructed with `path = None`, logging is a no-op (disabled).
41#[derive(Debug, Clone)]
42pub struct AuditLogger {
43    path: Option<PathBuf>,
44}
45
46impl AuditLogger {
47    /// Return the default path: `~/.apcore-cli/audit.jsonl`.
48    pub fn default_path() -> Option<PathBuf> {
49        dirs::home_dir().map(|h| h.join(".apcore-cli").join("audit.jsonl"))
50    }
51
52    /// Create a new `AuditLogger`.
53    ///
54    /// # Arguments
55    /// * `path` — path to the JSONL audit log file; `None` uses the default
56    ///   path `~/.apcore-cli/audit.jsonl`.
57    pub fn new(path: Option<PathBuf>) -> Self {
58        let resolved = path.or_else(Self::default_path);
59        if let Some(ref p) = resolved {
60            if let Some(parent) = p.parent() {
61                // Best-effort; failure is silent.
62                let _ = std::fs::create_dir_all(parent);
63                // Restrict the parent dir to owner-only on Unix so audit-log
64                // entries are not enumerable by other local UIDs on shared
65                // systems.
66                #[cfg(unix)]
67                {
68                    use std::os::unix::fs::PermissionsExt;
69                    let _ =
70                        std::fs::set_permissions(parent, std::fs::Permissions::from_mode(0o700));
71                }
72            }
73        }
74        Self { path: resolved }
75    }
76
77    /// Resolve the username for an audit log entry.
78    ///
79    /// Spec (SEC-01): canonical resolution chain is
80    ///   `getlogin → getpwuid(geteuid) → USER → LOGNAME → USERNAME → "unknown"`.
81    ///
82    /// `getlogin()` returns the controlling-terminal owner; `getpwuid` looks up
83    /// the effective UID in the password database. Either of those wins over
84    /// env vars, which can be spoofed or stale in container/su scenarios.
85    fn get_user() -> String {
86        // 1. getlogin() — fastest path; queries the controlling terminal.
87        #[cfg(unix)]
88        {
89            // SAFETY: getlogin() returns either NULL or a pointer to a
90            // statically allocated, NUL-terminated string owned by libc. We
91            // copy it into an owned String before returning so we never hold
92            // the libc-owned pointer past this block.
93            unsafe {
94                let raw = libc::getlogin();
95                if !raw.is_null() {
96                    let cstr = std::ffi::CStr::from_ptr(raw);
97                    let name = cstr.to_string_lossy().into_owned();
98                    if !name.is_empty() {
99                        return name;
100                    }
101                }
102            }
103            // 2. pwd lookup by effective UID.
104            let euid = nix::unistd::geteuid();
105            if let Ok(Some(user)) = nix::unistd::User::from_uid(euid) {
106                if !user.name.is_empty() {
107                    return user.name;
108                }
109            }
110        }
111        // 3. Fall back to env vars: USER → LOGNAME → USERNAME → "unknown".
112        Self::resolve_user_from_env(&|k| std::env::var(k).ok())
113    }
114
115    /// Pure env-fallback helper, separated for unit testing the priority chain.
116    ///
117    /// Walks USER → LOGNAME → USERNAME → "unknown". Returns the first non-empty
118    /// value the lookup function yields.
119    fn resolve_user_from_env<F>(env_lookup: &F) -> String
120    where
121        F: Fn(&str) -> Option<String>,
122    {
123        for key in ["USER", "LOGNAME", "USERNAME"] {
124            if let Some(v) = env_lookup(key) {
125                if !v.is_empty() {
126                    return v;
127                }
128            }
129        }
130        "unknown".to_string()
131    }
132
133    /// Hash `input_data` with a fresh 16-byte random salt.
134    ///
135    /// Digest = SHA-256(salt(16) || sorted_json(`input_data`)).
136    /// Returns hex-encoded SHA-256 hash (64 chars).
137    ///
138    /// Spec (SEC-03): hash = sha256(salt + json.dumps(input, sort_keys=True)).
139    /// A fresh per-call salt prevents cross-invocation input correlation.
140    fn hash_input(input_data: &Value) -> String {
141        use aes_gcm::aead::rand_core::RngCore;
142        use aes_gcm::aead::OsRng;
143
144        let mut salt = [0u8; 16];
145        OsRng.fill_bytes(&mut salt);
146
147        let payload = sorted_json(input_data);
148        let mut hasher = Sha256::new();
149        hasher.update(salt);
150        hasher.update(payload.as_bytes());
151        format!("{:x}", hasher.finalize())
152    }
153
154    /// Log a single module execution event.
155    ///
156    /// Appends one JSON line to the audit log. IO failures emit a
157    /// `tracing::warn!` and are otherwise ignored — this method never panics
158    /// or propagates an error.
159    ///
160    /// # Fields written
161    /// * `timestamp`   — ISO 8601 UTC timestamp
162    /// * `user`        — username from `USER`/`LOGNAME`
163    /// * `module_id`   — the executed module's identifier
164    /// * `input_salt`  — 16-byte hex salt fed into the hash (persists so a
165    ///   verifier can reproduce the digest from a known input)
166    /// * `input_hash`  — salted SHA-256 of the JSON-serialised input
167    /// * `status`      — `"success"` or `"error"`
168    /// * `exit_code`   — process exit code
169    /// * `duration_ms` — wall-clock execution time in milliseconds
170    pub fn log_execution(
171        &self,
172        module_id: &str,
173        input_data: &Value,
174        status: &str,
175        exit_code: i32,
176        duration_ms: u64,
177    ) {
178        let Some(ref path) = self.path else {
179            return; // logging disabled
180        };
181
182        let timestamp = Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
183        let input_hash = Self::hash_input(input_data);
184        let entry = json!({
185            "timestamp":   timestamp,
186            "user":        Self::get_user(),
187            "module_id":   module_id,
188            "input_hash":  input_hash,
189            "status":      status,
190            "exit_code":   exit_code,
191            "duration_ms": duration_ms,
192        });
193
194        let result = (|| -> std::io::Result<()> {
195            let file = std::fs::OpenOptions::new()
196                .create(true)
197                .append(true)
198                .open(path)?;
199            // Restrict to owner read/write on Unix so audit entries are not
200            // readable by other local UIDs on shared systems. set_permissions
201            // is idempotent across appends; a no-op on subsequent writes.
202            #[cfg(unix)]
203            {
204                use std::os::unix::fs::PermissionsExt;
205                let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600));
206            }
207            let mut writer = BufWriter::new(file);
208            serde_json::to_writer(&mut writer, &entry).map_err(std::io::Error::other)?;
209            writeln!(writer)?;
210            writer.flush()?;
211            Ok(())
212        })();
213
214        if let Err(e) = result {
215            tracing::warn!("Could not write audit log: {e}");
216        }
217    }
218}
219
220/// Errors produced by the audit logger (reserved for future use).
221#[derive(Debug, Error)]
222pub enum AuditLogError {
223    #[error("failed to write audit log: {0}")]
224    Io(#[from] std::io::Error),
225
226    #[error("failed to serialise audit record: {0}")]
227    Serialise(#[from] serde_json::Error),
228}
229
230// ---------------------------------------------------------------------------
231// Unit tests
232// ---------------------------------------------------------------------------
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use serde_json::json;
238
239    #[test]
240    fn test_audit_logger_disabled_no_op() {
241        // AuditLogger with path=None must not write any files.
242        let logger = AuditLogger { path: None };
243        // Should not panic even with no path.
244        logger.log_execution("mod.test", &json!({}), "success", 0, 1);
245    }
246
247    #[test]
248    fn test_audit_logger_writes_jsonl_record() {
249        let dir = tempfile::tempdir().unwrap();
250        let path = dir.path().join("audit.jsonl");
251        let logger = AuditLogger::new(Some(path.clone()));
252        logger.log_execution("math.add", &json!({"a": 1}), "success", 0, 42);
253        let content = std::fs::read_to_string(&path).unwrap();
254        let entry: serde_json::Value = serde_json::from_str(content.trim()).unwrap();
255        assert_eq!(entry["module_id"], "math.add");
256        assert_eq!(entry["status"], "success");
257        assert_eq!(entry["exit_code"], 0);
258        assert_eq!(entry["duration_ms"], 42);
259    }
260
261    #[test]
262    fn test_audit_logger_appends_multiple_records() {
263        let dir = tempfile::tempdir().unwrap();
264        let path = dir.path().join("audit.jsonl");
265        let logger = AuditLogger::new(Some(path.clone()));
266        logger.log_execution("a.b", &json!({}), "success", 0, 1);
267        logger.log_execution("c.d", &json!({}), "error", 1, 2);
268        let content = std::fs::read_to_string(&path).unwrap();
269        let lines: Vec<&str> = content.lines().collect();
270        assert_eq!(lines.len(), 2);
271    }
272
273    #[test]
274    fn test_audit_logger_record_contains_required_fields() {
275        let dir = tempfile::tempdir().unwrap();
276        let path = dir.path().join("audit.jsonl");
277        let logger = AuditLogger::new(Some(path.clone()));
278        logger.log_execution("x.y", &json!({"k": "v"}), "success", 0, 10);
279        let raw = std::fs::read_to_string(&path).unwrap();
280        let entry: serde_json::Value = serde_json::from_str(raw.trim()).unwrap();
281        assert!(entry["timestamp"].as_str().unwrap().ends_with('Z'));
282        assert!(entry["user"].is_string());
283        assert_eq!(entry["module_id"], "x.y");
284        assert!(entry["input_hash"].as_str().unwrap().len() == 64); // hex SHA-256
285                                                                    // input_salt is NOT persisted per spec (A-D-007 fix)
286        assert!(entry.get("input_salt").is_none());
287        assert_eq!(entry["status"], "success");
288        assert!(entry["exit_code"].is_number());
289        assert!(entry["duration_ms"].is_number());
290    }
291
292    #[test]
293    fn test_audit_logger_different_inputs_produce_different_hashes() {
294        // Even without a persisted salt, different inputs must produce different
295        // hashes in practice (different sorted JSON payload).
296        let dir = tempfile::tempdir().unwrap();
297        let path = dir.path().join("audit.jsonl");
298        let logger = AuditLogger::new(Some(path.clone()));
299        logger.log_execution("x.y", &json!({"a": 1}), "success", 0, 0);
300        logger.log_execution("x.y", &json!({"a": 2}), "success", 0, 0);
301        let lines: Vec<String> = std::fs::read_to_string(&path)
302            .unwrap()
303            .lines()
304            .map(String::from)
305            .collect();
306        let h0 = serde_json::from_str::<serde_json::Value>(&lines[0]).unwrap()["input_hash"]
307            .as_str()
308            .unwrap()
309            .to_string();
310        let h1 = serde_json::from_str::<serde_json::Value>(&lines[1]).unwrap()["input_hash"]
311            .as_str()
312            .unwrap()
313            .to_string();
314        assert_ne!(h0, h1, "different inputs must produce different hashes");
315    }
316
317    #[test]
318    fn test_audit_logger_same_input_different_hash_per_call() {
319        // Each invocation uses a fresh random salt, so two calls with the same
320        // input must produce different hash values.
321        let dir = tempfile::tempdir().unwrap();
322        let path = dir.path().join("audit.jsonl");
323        let logger = AuditLogger::new(Some(path.clone()));
324        logger.log_execution("u.v", &json!({}), "success", 0, 0);
325        logger.log_execution("u.v", &json!({}), "success", 0, 0);
326        let lines: Vec<String> = std::fs::read_to_string(&path)
327            .unwrap()
328            .lines()
329            .map(String::from)
330            .collect();
331        let h0 = serde_json::from_str::<serde_json::Value>(&lines[0]).unwrap()["input_hash"]
332            .as_str()
333            .unwrap()
334            .to_string();
335        let h1 = serde_json::from_str::<serde_json::Value>(&lines[1]).unwrap()["input_hash"]
336            .as_str()
337            .unwrap()
338            .to_string();
339        assert_ne!(
340            h0, h1,
341            "same input across calls must produce different hashes (random salt)"
342        );
343    }
344
345    #[cfg(unix)]
346    #[test]
347    fn test_audit_logger_file_mode_is_owner_only() {
348        use std::os::unix::fs::PermissionsExt;
349        let dir = tempfile::tempdir().unwrap();
350        let path = dir.path().join("audit.jsonl");
351        let logger = AuditLogger::new(Some(path.clone()));
352        logger.log_execution("perm.test", &json!({}), "success", 0, 0);
353        let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777;
354        assert_eq!(mode, 0o600, "audit log must be 0600; got {:o}", mode);
355    }
356
357    #[cfg(unix)]
358    #[test]
359    fn test_audit_logger_parent_dir_mode_is_owner_only() {
360        use std::os::unix::fs::PermissionsExt;
361        let dir = tempfile::tempdir().unwrap();
362        let nested = dir.path().join("nested-audit-dir");
363        let path = nested.join("audit.jsonl");
364        let _logger = AuditLogger::new(Some(path));
365        let mode = std::fs::metadata(&nested).unwrap().permissions().mode() & 0o777;
366        assert_eq!(mode, 0o700, "parent dir must be 0700; got {:o}", mode);
367    }
368
369    /// D10-007: env-fallback helper must walk USER -> LOGNAME -> USERNAME -> "unknown".
370    #[test]
371    fn test_resolve_user_from_env_priority_chain() {
372        // USER takes precedence over LOGNAME/USERNAME.
373        let env = |k: &str| -> Option<String> {
374            match k {
375                "USER" => Some("user_val".to_string()),
376                "LOGNAME" => Some("logname_val".to_string()),
377                "USERNAME" => Some("username_val".to_string()),
378                _ => None,
379            }
380        };
381        assert_eq!(AuditLogger::resolve_user_from_env(&env), "user_val");
382
383        // LOGNAME wins when USER is unset.
384        let env = |k: &str| -> Option<String> {
385            match k {
386                "LOGNAME" => Some("logname_val".to_string()),
387                "USERNAME" => Some("username_val".to_string()),
388                _ => None,
389            }
390        };
391        assert_eq!(AuditLogger::resolve_user_from_env(&env), "logname_val");
392
393        // USERNAME wins when USER and LOGNAME are unset (Windows-style).
394        let env = |k: &str| -> Option<String> {
395            match k {
396                "USERNAME" => Some("username_val".to_string()),
397                _ => None,
398            }
399        };
400        assert_eq!(AuditLogger::resolve_user_from_env(&env), "username_val");
401
402        // All unset → "unknown".
403        let env = |_: &str| -> Option<String> { None };
404        assert_eq!(AuditLogger::resolve_user_from_env(&env), "unknown");
405    }
406
407    /// D10-007: get_user prefers system identity (getlogin/getpwuid) over env vars.
408    /// On Unix dev hosts, getpwuid(geteuid()) always succeeds, so even if USER and
409    /// LOGNAME are set to sentinel values, get_user must NOT return those sentinels.
410    #[cfg(unix)]
411    #[test]
412    fn test_get_user_prefers_system_identity_over_env() {
413        // SAFETY: This test sets process-wide env vars. Other tests in the same
414        // binary that read USER/LOGNAME could observe these values; we restore
415        // them at the end. The critical assertion (system identity beats env) is
416        // independent of pre-existing values.
417        let prev_user = std::env::var("USER").ok();
418        let prev_logname = std::env::var("LOGNAME").ok();
419        std::env::set_var("USER", "sentinel_user_d10_007");
420        std::env::set_var("LOGNAME", "sentinel_logname_d10_007");
421
422        let resolved = AuditLogger::get_user();
423
424        // Restore env first so a panic in the assertions below leaves a clean state.
425        match prev_user {
426            Some(v) => std::env::set_var("USER", v),
427            None => std::env::remove_var("USER"),
428        }
429        match prev_logname {
430            Some(v) => std::env::set_var("LOGNAME", v),
431            None => std::env::remove_var("LOGNAME"),
432        }
433
434        assert_ne!(
435            resolved, "sentinel_user_d10_007",
436            "get_user must consult getlogin/getpwuid before USER env var"
437        );
438        assert_ne!(
439            resolved, "sentinel_logname_d10_007",
440            "get_user must consult getlogin/getpwuid before LOGNAME env var"
441        );
442        assert!(!resolved.is_empty(), "get_user must never return empty");
443    }
444}