apcore-cli 0.10.0

Command-line interface for apcore modules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
// apcore-cli — Audit logger.
// Protocol spec: SEC-01 (AuditLogger)

use std::io::{BufWriter, Write};
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;

use chrono::Utc;
use serde_json::{json, Value};
use sha2::{Digest, Sha256};
use thiserror::Error;

// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------

/// Serialize `v` to a compact, deterministically sorted JSON string.
///
/// Spec: each language serializes to sorted-key JSON before hashing so that
/// equivalent input dicts produce the same hash regardless of insertion order.
///
/// Recursion is required at every level — both inside object values and inside
/// array elements — so that nested objects with reordered keys hash to the
/// same digest as their canonical form (audit D11-002, 2026-05-08).
fn sorted_json(v: &Value) -> String {
    match v {
        Value::Object(map) => {
            // Recurse into each value, then re-emit with keys sorted lexicographically.
            let mut entries: Vec<(&String, &Value)> = map.iter().collect();
            entries.sort_by(|a, b| a.0.cmp(b.0));
            let pairs: Vec<String> = entries
                .iter()
                .map(|(k, val)| format!("{}:{}", serde_json::json!(k), sorted_json(val)))
                .collect();
            format!("{{{}}}", pairs.join(","))
        }
        Value::Array(arr) => {
            // Recurse into each element so nested objects inside arrays are
            // canonicalised. Element order itself is preserved (arrays are
            // ordered by spec).
            let parts: Vec<String> = arr.iter().map(sorted_json).collect();
            format!("[{}]", parts.join(","))
        }
        other => other.to_string(),
    }
}

// ---------------------------------------------------------------------------
// AuditLogger
// ---------------------------------------------------------------------------

/// Append-only audit logger that records each module execution to a JSONL file.
///
/// When constructed with `path = None`, logging is a no-op (disabled).
///
/// `write_failure_warned` is shared across clones so a single logger instance
/// (and any of its clones) emits the "Could not write audit log" warning at
/// most once. Cross-SDK parity with TypeScript `_writeFailureWarned` and
/// Python `_write_failure_warned` (audit D11-010).
#[derive(Debug, Clone)]
pub struct AuditLogger {
    path: Option<PathBuf>,
    write_failure_warned: Arc<AtomicBool>,
}

impl AuditLogger {
    /// Return the default path: `~/.apcore-cli/audit.jsonl`.
    pub fn default_path() -> Option<PathBuf> {
        dirs::home_dir().map(|h| h.join(".apcore-cli").join("audit.jsonl"))
    }

    /// Create a new `AuditLogger`.
    ///
    /// # Arguments
    /// * `path` — path to the JSONL audit log file; `None` uses the default
    ///   path `~/.apcore-cli/audit.jsonl`.
    pub fn new(path: Option<PathBuf>) -> Self {
        let resolved = path.or_else(Self::default_path);
        if let Some(ref p) = resolved {
            if let Some(parent) = p.parent() {
                // Best-effort; failure is silent.
                let _ = std::fs::create_dir_all(parent);
                // Restrict the parent dir to owner-only on Unix so audit-log
                // entries are not enumerable by other local UIDs on shared
                // systems.
                #[cfg(unix)]
                {
                    use std::os::unix::fs::PermissionsExt;
                    let _ =
                        std::fs::set_permissions(parent, std::fs::Permissions::from_mode(0o700));
                }
            }
        }
        Self {
            path: resolved,
            write_failure_warned: Arc::new(AtomicBool::new(false)),
        }
    }

    /// Resolve the username for an audit log entry.
    ///
    /// Spec (SEC-01): canonical resolution chain is
    ///   `getlogin → getpwuid(geteuid) → USER → LOGNAME → USERNAME → "unknown"`.
    ///
    /// `getlogin()` returns the controlling-terminal owner; `getpwuid` looks up
    /// the effective UID in the password database. Either of those wins over
    /// env vars, which can be spoofed or stale in container/su scenarios.
    fn get_user() -> String {
        // 1. getlogin() — fastest path; queries the controlling terminal.
        #[cfg(unix)]
        {
            // SAFETY: getlogin() returns either NULL or a pointer to a
            // statically allocated, NUL-terminated string owned by libc. We
            // copy it into an owned String before returning so we never hold
            // the libc-owned pointer past this block.
            unsafe {
                let raw = libc::getlogin();
                if !raw.is_null() {
                    let cstr = std::ffi::CStr::from_ptr(raw);
                    let name = cstr.to_string_lossy().into_owned();
                    if !name.is_empty() {
                        return name;
                    }
                }
            }
            // 2. pwd lookup by effective UID.
            let euid = nix::unistd::geteuid();
            if let Ok(Some(user)) = nix::unistd::User::from_uid(euid) {
                if !user.name.is_empty() {
                    return user.name;
                }
            }
        }
        // 3. Fall back to env vars: USER → LOGNAME → USERNAME → "unknown".
        Self::resolve_user_from_env(&|k| std::env::var(k).ok())
    }

    /// Pure env-fallback helper, separated for unit testing the priority chain.
    ///
    /// Walks USER → LOGNAME → USERNAME → "unknown". Returns the first non-empty
    /// value the lookup function yields.
    fn resolve_user_from_env<F>(env_lookup: &F) -> String
    where
        F: Fn(&str) -> Option<String>,
    {
        for key in ["USER", "LOGNAME", "USERNAME"] {
            if let Some(v) = env_lookup(key) {
                if !v.is_empty() {
                    return v;
                }
            }
        }
        "unknown".to_string()
    }

    /// Hash `input_data` with a fresh 16-byte random salt.
    ///
    /// Digest = SHA-256(salt(16) || sorted_json(`input_data`)).
    /// Returns hex-encoded SHA-256 hash (64 chars).
    ///
    /// Spec (SEC-03): hash = sha256(salt + json.dumps(input, sort_keys=True)).
    /// A fresh per-call salt prevents cross-invocation input correlation.
    fn hash_input(input_data: &Value) -> String {
        use aes_gcm::aead::rand_core::RngCore;
        use aes_gcm::aead::OsRng;

        let mut salt = [0u8; 16];
        OsRng.fill_bytes(&mut salt);

        let payload = sorted_json(input_data);
        let mut hasher = Sha256::new();
        hasher.update(salt);
        hasher.update(payload.as_bytes());
        format!("{:x}", hasher.finalize())
    }

    /// Log a single module execution event.
    ///
    /// Appends one JSON line to the audit log. IO failures emit a
    /// `tracing::warn!` and are otherwise ignored — this method never panics
    /// or propagates an error.
    ///
    /// # Fields written
    /// * `timestamp`   — ISO 8601 UTC timestamp
    /// * `user`        — username from `USER`/`LOGNAME`
    /// * `module_id`   — the executed module's identifier
    /// * `input_salt`  — 16-byte hex salt fed into the hash (persists so a
    ///   verifier can reproduce the digest from a known input)
    /// * `input_hash`  — salted SHA-256 of the JSON-serialised input
    /// * `status`      — `"success"` or `"error"`
    /// * `exit_code`   — process exit code
    /// * `duration_ms` — wall-clock execution time in milliseconds
    pub fn log_execution(
        &self,
        module_id: &str,
        input_data: &Value,
        status: &str,
        exit_code: i32,
        duration_ms: u64,
    ) {
        let Some(ref path) = self.path else {
            return; // logging disabled
        };

        let timestamp = Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
        let input_hash = Self::hash_input(input_data);
        let entry = json!({
            "timestamp":   timestamp,
            "user":        Self::get_user(),
            "module_id":   module_id,
            "input_hash":  input_hash,
            "status":      status,
            "exit_code":   exit_code,
            "duration_ms": duration_ms,
        });

        let result = (|| -> std::io::Result<()> {
            let file = std::fs::OpenOptions::new()
                .create(true)
                .append(true)
                .open(path)?;
            // Restrict to owner read/write on Unix so audit entries are not
            // readable by other local UIDs on shared systems. set_permissions
            // is idempotent across appends; a no-op on subsequent writes.
            #[cfg(unix)]
            {
                use std::os::unix::fs::PermissionsExt;
                let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600));
            }
            let mut writer = BufWriter::new(file);
            serde_json::to_writer(&mut writer, &entry).map_err(std::io::Error::other)?;
            writeln!(writer)?;
            writer.flush()?;
            Ok(())
        })();

        if let Err(e) = result {
            // Dedup write-failure warnings: emit at most once per logger
            // instance (and any of its clones, since the flag is held in an
            // Arc). Subsequent failures fall through to trace level so log
            // output stays diagnosable without flooding stderr. Cross-SDK
            // parity with TypeScript and Python (audit D11-010).
            if !self.write_failure_warned.swap(true, Ordering::Relaxed) {
                tracing::warn!("Could not write audit log: {e}");
            } else {
                tracing::trace!("Could not write audit log (suppressed): {e}");
            }
        }
    }
}

/// Errors produced by the audit logger (reserved for future use).
#[derive(Debug, Error)]
pub enum AuditLogError {
    #[error("failed to write audit log: {0}")]
    Io(#[from] std::io::Error),

    #[error("failed to serialise audit record: {0}")]
    Serialise(#[from] serde_json::Error),
}

// ---------------------------------------------------------------------------
// Unit tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    #[test]
    fn test_audit_logger_disabled_no_op() {
        // AuditLogger with path=None must not write any files.
        let logger = AuditLogger {
            path: None,
            write_failure_warned: Arc::new(AtomicBool::new(false)),
        };
        // Should not panic even with no path.
        logger.log_execution("mod.test", &json!({}), "success", 0, 1);
    }

    #[test]
    fn test_audit_logger_writes_jsonl_record() {
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("audit.jsonl");
        let logger = AuditLogger::new(Some(path.clone()));
        logger.log_execution("math.add", &json!({"a": 1}), "success", 0, 42);
        let content = std::fs::read_to_string(&path).unwrap();
        let entry: serde_json::Value = serde_json::from_str(content.trim()).unwrap();
        assert_eq!(entry["module_id"], "math.add");
        assert_eq!(entry["status"], "success");
        assert_eq!(entry["exit_code"], 0);
        assert_eq!(entry["duration_ms"], 42);
    }

    #[test]
    fn test_audit_logger_appends_multiple_records() {
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("audit.jsonl");
        let logger = AuditLogger::new(Some(path.clone()));
        logger.log_execution("a.b", &json!({}), "success", 0, 1);
        logger.log_execution("c.d", &json!({}), "error", 1, 2);
        let content = std::fs::read_to_string(&path).unwrap();
        let lines: Vec<&str> = content.lines().collect();
        assert_eq!(lines.len(), 2);
    }

    #[test]
    fn test_audit_logger_record_contains_required_fields() {
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("audit.jsonl");
        let logger = AuditLogger::new(Some(path.clone()));
        logger.log_execution("x.y", &json!({"k": "v"}), "success", 0, 10);
        let raw = std::fs::read_to_string(&path).unwrap();
        let entry: serde_json::Value = serde_json::from_str(raw.trim()).unwrap();
        assert!(entry["timestamp"].as_str().unwrap().ends_with('Z'));
        assert!(entry["user"].is_string());
        assert_eq!(entry["module_id"], "x.y");
        assert!(entry["input_hash"].as_str().unwrap().len() == 64); // hex SHA-256
                                                                    // input_salt is NOT persisted per spec (A-D-007 fix)
        assert!(entry.get("input_salt").is_none());
        assert_eq!(entry["status"], "success");
        assert!(entry["exit_code"].is_number());
        assert!(entry["duration_ms"].is_number());
    }

    #[test]
    fn test_audit_logger_different_inputs_produce_different_hashes() {
        // Even without a persisted salt, different inputs must produce different
        // hashes in practice (different sorted JSON payload).
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("audit.jsonl");
        let logger = AuditLogger::new(Some(path.clone()));
        logger.log_execution("x.y", &json!({"a": 1}), "success", 0, 0);
        logger.log_execution("x.y", &json!({"a": 2}), "success", 0, 0);
        let lines: Vec<String> = std::fs::read_to_string(&path)
            .unwrap()
            .lines()
            .map(String::from)
            .collect();
        let h0 = serde_json::from_str::<serde_json::Value>(&lines[0]).unwrap()["input_hash"]
            .as_str()
            .unwrap()
            .to_string();
        let h1 = serde_json::from_str::<serde_json::Value>(&lines[1]).unwrap()["input_hash"]
            .as_str()
            .unwrap()
            .to_string();
        assert_ne!(h0, h1, "different inputs must produce different hashes");
    }

    #[test]
    fn test_audit_logger_same_input_different_hash_per_call() {
        // Each invocation uses a fresh random salt, so two calls with the same
        // input must produce different hash values.
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("audit.jsonl");
        let logger = AuditLogger::new(Some(path.clone()));
        logger.log_execution("u.v", &json!({}), "success", 0, 0);
        logger.log_execution("u.v", &json!({}), "success", 0, 0);
        let lines: Vec<String> = std::fs::read_to_string(&path)
            .unwrap()
            .lines()
            .map(String::from)
            .collect();
        let h0 = serde_json::from_str::<serde_json::Value>(&lines[0]).unwrap()["input_hash"]
            .as_str()
            .unwrap()
            .to_string();
        let h1 = serde_json::from_str::<serde_json::Value>(&lines[1]).unwrap()["input_hash"]
            .as_str()
            .unwrap()
            .to_string();
        assert_ne!(
            h0, h1,
            "same input across calls must produce different hashes (random salt)"
        );
    }

    #[cfg(unix)]
    #[test]
    fn test_audit_logger_file_mode_is_owner_only() {
        use std::os::unix::fs::PermissionsExt;
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("audit.jsonl");
        let logger = AuditLogger::new(Some(path.clone()));
        logger.log_execution("perm.test", &json!({}), "success", 0, 0);
        let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777;
        assert_eq!(mode, 0o600, "audit log must be 0600; got {:o}", mode);
    }

    #[cfg(unix)]
    #[test]
    fn test_audit_logger_parent_dir_mode_is_owner_only() {
        use std::os::unix::fs::PermissionsExt;
        let dir = tempfile::tempdir().unwrap();
        let nested = dir.path().join("nested-audit-dir");
        let path = nested.join("audit.jsonl");
        let _logger = AuditLogger::new(Some(path));
        let mode = std::fs::metadata(&nested).unwrap().permissions().mode() & 0o777;
        assert_eq!(mode, 0o700, "parent dir must be 0700; got {:o}", mode);
    }

    /// D11-002 (2026-05-08): `sorted_json` must canonicalise objects nested
    /// inside object values and inside arrays. Previously the function only
    /// sorted top-level keys, so two semantically equal payloads with
    /// different nested key orderings serialised to different strings and
    /// therefore hashed differently.
    #[test]
    fn test_sorted_json_recurses_into_nested_objects() {
        let a = json!({ "outer": { "y": 1, "x": 2 } });
        let b = json!({ "outer": { "x": 2, "y": 1 } });
        assert_eq!(
            super::sorted_json(&a),
            super::sorted_json(&b),
            "nested objects with reordered keys must canonicalise identically"
        );
    }

    #[test]
    fn test_sorted_json_recurses_into_arrays_of_objects() {
        let a = json!({ "items": [ { "y": 1, "x": 2 }, { "b": 4, "a": 3 } ] });
        let b = json!({ "items": [ { "x": 2, "y": 1 }, { "a": 3, "b": 4 } ] });
        assert_eq!(
            super::sorted_json(&a),
            super::sorted_json(&b),
            "objects nested inside arrays must canonicalise identically"
        );
    }

    #[test]
    fn test_sorted_json_preserves_array_element_order() {
        // Element order is data, not formatting — must NOT be sorted.
        let a = json!([3, 1, 2]);
        let b = json!([1, 2, 3]);
        assert_ne!(
            super::sorted_json(&a),
            super::sorted_json(&b),
            "array element order must be preserved (it is part of the value)"
        );
    }

    /// D10-007: env-fallback helper must walk USER -> LOGNAME -> USERNAME -> "unknown".
    #[test]
    fn test_resolve_user_from_env_priority_chain() {
        // USER takes precedence over LOGNAME/USERNAME.
        let env = |k: &str| -> Option<String> {
            match k {
                "USER" => Some("user_val".to_string()),
                "LOGNAME" => Some("logname_val".to_string()),
                "USERNAME" => Some("username_val".to_string()),
                _ => None,
            }
        };
        assert_eq!(AuditLogger::resolve_user_from_env(&env), "user_val");

        // LOGNAME wins when USER is unset.
        let env = |k: &str| -> Option<String> {
            match k {
                "LOGNAME" => Some("logname_val".to_string()),
                "USERNAME" => Some("username_val".to_string()),
                _ => None,
            }
        };
        assert_eq!(AuditLogger::resolve_user_from_env(&env), "logname_val");

        // USERNAME wins when USER and LOGNAME are unset (Windows-style).
        let env = |k: &str| -> Option<String> {
            match k {
                "USERNAME" => Some("username_val".to_string()),
                _ => None,
            }
        };
        assert_eq!(AuditLogger::resolve_user_from_env(&env), "username_val");

        // All unset → "unknown".
        let env = |_: &str| -> Option<String> { None };
        assert_eq!(AuditLogger::resolve_user_from_env(&env), "unknown");
    }

    /// D10-007: get_user prefers system identity (getlogin/getpwuid) over env vars.
    /// On Unix dev hosts, getpwuid(geteuid()) always succeeds, so even if USER and
    /// LOGNAME are set to sentinel values, get_user must NOT return those sentinels.
    #[cfg(unix)]
    #[test]
    fn test_get_user_prefers_system_identity_over_env() {
        // SAFETY: This test sets process-wide env vars. Other tests in the same
        // binary that read USER/LOGNAME could observe these values; we restore
        // them at the end. The critical assertion (system identity beats env) is
        // independent of pre-existing values.
        let prev_user = std::env::var("USER").ok();
        let prev_logname = std::env::var("LOGNAME").ok();
        std::env::set_var("USER", "sentinel_user_d10_007");
        std::env::set_var("LOGNAME", "sentinel_logname_d10_007");

        let resolved = AuditLogger::get_user();

        // Restore env first so a panic in the assertions below leaves a clean state.
        match prev_user {
            Some(v) => std::env::set_var("USER", v),
            None => std::env::remove_var("USER"),
        }
        match prev_logname {
            Some(v) => std::env::set_var("LOGNAME", v),
            None => std::env::remove_var("LOGNAME"),
        }

        assert_ne!(
            resolved, "sentinel_user_d10_007",
            "get_user must consult getlogin/getpwuid before USER env var"
        );
        assert_ne!(
            resolved, "sentinel_logname_d10_007",
            "get_user must consult getlogin/getpwuid before LOGNAME env var"
        );
        assert!(!resolved.is_empty(), "get_user must never return empty");
    }
}