#![allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
use std::fs;
use assert_cmd::Command;
use camino::Utf8PathBuf;
use predicates::prelude::*;
use tempfile::TempDir;
use doiget_core::provenance::{Capability, LogEvent, LogResult, ProvenanceLog, RowInput};
fn utf8_path(dir: &TempDir) -> Utf8PathBuf {
Utf8PathBuf::from_path_buf(dir.path().to_path_buf()).expect("temp dir path must be UTF-8")
}
fn seed_log(n: usize) -> (TempDir, Utf8PathBuf) {
let dir = TempDir::new().expect("tempdir");
let path = utf8_path(&dir).join("access.jsonl");
let log = ProvenanceLog::open(path.clone(), "01JCKZ7Q0000000000000000AB".to_string())
.expect("open provenance log");
for _ in 0..n {
log.append(RowInput {
event: LogEvent::Fetch,
result: LogResult::Ok,
capability: Capability::Oa,
ref_: None,
source: None,
error_code: None,
size_bytes: None,
license: None,
store_path: None,
canonical_digest: None,
})
.expect("append seed row");
}
drop(log);
(dir, path)
}
fn doiget(log_path: &Utf8PathBuf, dir_root: &Utf8PathBuf) -> Command {
let mut cmd = Command::cargo_bin("doiget").expect("locate doiget binary");
cmd.env("DOIGET_LOG_PATH", log_path.as_str())
.env("HOME", dir_root.as_str())
.env("USERPROFILE", dir_root.as_str())
.env("DOIGET_MODE", "human");
cmd
}
#[test]
fn audit_log_verify_clean_chain_succeeds() {
let (dir_guard, log_path) = seed_log(3);
let dir_root = utf8_path(&dir_guard);
let assert = doiget(&log_path, &dir_root)
.args(["audit-log", "--verify"])
.assert()
.success();
let stdout = String::from_utf8(assert.get_output().stdout.clone())
.expect("doiget audit-log stdout was not UTF-8");
assert!(
stdout.contains("audit-log verify: 3 rows"),
"expected header with row count, got:\n{stdout}"
);
assert!(
stdout.contains("ok: 3"),
"expected ok count of 3, got:\n{stdout}"
);
assert!(
stdout.contains("issues: 0"),
"expected zero issues on a clean log, got:\n{stdout}"
);
}
#[test]
fn audit_log_verify_missing_log_succeeds() {
let dir = TempDir::new().expect("tempdir");
let dir_root = utf8_path(&dir);
let log_path = dir_root.join("never-created.jsonl");
assert!(!log_path.exists(), "precondition: log must not exist");
doiget(&log_path, &dir_root)
.args(["audit-log", "--verify"])
.assert()
.success()
.stdout(predicate::str::contains("audit-log verify: 0 rows"))
.stdout(predicate::str::contains("issues: 0"));
}
#[test]
fn audit_log_without_verify_flag_errors() {
let dir = TempDir::new().expect("tempdir");
let dir_root = utf8_path(&dir);
let log_path = dir_root.join("access.jsonl");
doiget(&log_path, &dir_root)
.args(["audit-log"])
.assert()
.failure()
.stderr(predicate::str::contains("--verify is required"));
}
#[test]
fn audit_log_verify_detects_tampered_this_hash() {
let (dir_guard, log_path) = seed_log(2);
let dir_root = utf8_path(&dir_guard);
let raw = fs::read_to_string(&log_path).expect("read log");
let mut lines: Vec<String> = raw.lines().map(str::to_string).collect();
assert_eq!(lines.len(), 2, "seed_log should produce exactly 2 rows");
let needle = "\"this_hash\":\"";
let target = &lines[1];
let start = target
.find(needle)
.expect("this_hash field present in row 2")
+ needle.len();
let end_rel = target[start..]
.find('"')
.expect("closing quote for this_hash present");
let end = start + end_rel;
let bogus = "0000000000000000000000000000000000000000000000000000000000000000";
let mut new_line = String::with_capacity(target.len());
new_line.push_str(&target[..start]);
new_line.push_str(bogus);
new_line.push_str(&target[end..]);
lines[1] = new_line;
let mut tampered = lines.join("\n");
tampered.push('\n');
fs::write(&log_path, tampered).expect("write tampered log");
let assert = doiget(&log_path, &dir_root)
.args(["audit-log", "--verify"])
.assert()
.failure();
let stdout = String::from_utf8(assert.get_output().stdout.clone())
.expect("doiget audit-log stdout was not UTF-8");
assert!(
stdout.contains("audit-log verify: 2 rows"),
"expected header with row count, got stdout:\n{stdout}"
);
assert!(
stdout.contains("this-hash"),
"expected 'this-hash' issue marker in stdout, got:\n{stdout}"
);
assert!(
stdout.contains("line 2"),
"expected issue to be reported on line 2, got stdout:\n{stdout}"
);
}
fn seed_log_at(path: &Utf8PathBuf, n: usize) {
let log = ProvenanceLog::open(path.clone(), "01JCKZ7Q0000000000000000AB".to_string())
.expect("open provenance log");
for _ in 0..n {
log.append(RowInput {
event: LogEvent::Fetch,
result: LogResult::Ok,
capability: Capability::Oa,
ref_: None,
source: None,
error_code: None,
size_bytes: None,
license: None,
store_path: None,
canonical_digest: None,
})
.expect("append seed row");
}
drop(log);
}
fn tamper_this_hash(path: &Utf8PathBuf, line_1based: usize) {
let raw = fs::read_to_string(path).expect("read log");
let mut lines: Vec<String> = raw.lines().map(str::to_string).collect();
let needle = "\"this_hash\":\"";
let target = &lines[line_1based - 1];
let start = target.find(needle).expect("this_hash field") + needle.len();
let end = start + target[start..].find('"').expect("closing quote");
let mut new_line = String::with_capacity(target.len());
new_line.push_str(&target[..start]);
new_line.push_str("0000000000000000000000000000000000000000000000000000000000000000");
new_line.push_str(&target[end..]);
lines[line_1based - 1] = new_line;
let mut out = lines.join("\n");
out.push('\n');
fs::write(path, out).expect("write tampered log");
}
#[test]
fn audit_log_verify_multi_segment_reports_each_independently() {
use std::io::Write;
use flate2::write::GzEncoder;
use flate2::Compression;
let dir = TempDir::new().expect("tempdir");
let dir_root = utf8_path(&dir);
let current = dir_root.join("access.jsonl");
let scratch = dir_root.join("scratch.jsonl");
seed_log_at(&scratch, 2);
let plain = fs::read(scratch.as_std_path()).expect("read scratch");
let gz_path = dir_root.join("access.jsonl.2026-01-01-000000.gz");
{
let f = fs::File::create(gz_path.as_std_path()).expect("create gz");
let mut enc = GzEncoder::new(f, Compression::default());
enc.write_all(&plain).expect("gzip write");
enc.finish().expect("gzip finish");
}
fs::remove_file(scratch.as_std_path()).expect("rm scratch");
seed_log_at(¤t, 2);
tamper_this_hash(¤t, 2);
let assert = doiget(¤t, &dir_root)
.args(["audit-log", "--verify"])
.assert()
.failure();
let stdout = String::from_utf8(assert.get_output().stdout.clone())
.expect("doiget audit-log stdout was not UTF-8");
assert!(
stdout.contains("audit-log verify: 4 rows"),
"aggregate row count over all segments, got:\n{stdout}"
);
assert!(
stdout.contains("issues: 1"),
"exactly the tampered row counts as an issue, got:\n{stdout}"
);
assert!(
stdout.contains("segment access.jsonl.2026-01-01-000000.gz: 2 rows, 2 ok, 0 issues"),
"clean rotated .gz segment summary, got:\n{stdout}"
);
assert!(
stdout.contains("segment access.jsonl: 2 rows, 1 ok, 1 issues"),
"tampered current segment summary, got:\n{stdout}"
);
assert!(
stdout.contains("[access.jsonl] line 2: this-hash"),
"multi-segment issue line must carry the segment prefix, got:\n{stdout}"
);
}