#![allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
use camino::{Utf8Path, Utf8PathBuf};
use doiget_cli::commands::fetch;
use doiget_cli::commands::output::OutputMode;
use doiget_core::provenance::{LogEvent, LogResult, LogRow};
use doiget_core::store::Metadata;
use serial_test::serial;
use tempfile::TempDir;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
mod common;
use common::env_guard::EnvGuard;
fn read_log_rows(path: &Utf8PathBuf) -> Vec<LogRow> {
let raw = std::fs::read_to_string(path.as_std_path()).expect("read log");
raw.lines()
.filter(|l| !l.is_empty())
.map(|l| serde_json::from_str::<LogRow>(l).expect("valid LogRow"))
.collect()
}
#[tokio::test]
#[serial]
async fn arxiv_2401_12345_end_to_end() {
let server = MockServer::start().await;
let body = b"%PDF-1.7\n%fixture-bytes\n".to_vec();
Mock::given(method("GET"))
.and(path("/pdf/2401.12345.pdf"))
.respond_with(ResponseTemplate::new(200).set_body_bytes(body.clone()))
.mount(&server)
.await;
let td = TempDir::new().expect("tempdir");
let temp_root: Utf8PathBuf = Utf8Path::from_path(td.path())
.expect("temp dir is utf-8")
.to_path_buf();
let store_root = temp_root.join("papers");
let log_path = temp_root.join("log.jsonl");
let env = EnvGuard::new(&[
"DOIGET_STORE_ROOT",
"DOIGET_LOG_PATH",
"DOIGET_ARXIV_BASE",
"DOIGET_CROSSREF_BASE",
"DOIGET_UNPAYWALL_BASE",
"DOIGET_CONTACT_EMAIL",
"DOIGET_UNPAYWALL_EMAIL",
]);
env.set("DOIGET_STORE_ROOT", store_root.as_str());
env.set("DOIGET_LOG_PATH", log_path.as_str());
env.set("DOIGET_ARXIV_BASE", &server.uri());
fetch::run_with_options("arxiv:2401.12345".to_string(), false, OutputMode::Human)
.await
.expect("fetch::run_with_options succeeds");
let pdf_path = store_root.join("arxiv_2401.12345.pdf");
assert!(
pdf_path.exists(),
"expected PDF at {pdf_path}; tree: {:?}",
std::fs::read_dir(temp_root.as_std_path())
.map(|d| d.flatten().map(|e| e.path()).collect::<Vec<_>>())
);
let pdf_bytes = std::fs::read(pdf_path.as_std_path()).expect("read pdf");
assert_eq!(pdf_bytes, body, "stored PDF must match wiremock body");
let meta_path = store_root.join(".metadata").join("arxiv_2401.12345.toml");
let meta_raw = std::fs::read_to_string(meta_path.as_std_path()).expect("read metadata toml");
let metadata: Metadata = toml::from_str(&meta_raw).expect("metadata round-trips");
assert_eq!(metadata.schema_version, "1.0");
let doiget = metadata.doiget.expect("[doiget] table present");
assert_eq!(doiget.source, "arxiv");
assert_eq!(doiget.size_bytes, body.len() as u64);
assert_eq!(doiget.license, "arxiv-default");
assert_eq!(
metadata.arxiv_id.map(|a| a.as_str().to_string()),
Some("2401.12345".to_string())
);
let rows = read_log_rows(&log_path);
assert_eq!(
rows.len(),
4,
"expected 4 rows (start/fetch/store/end), got {}: {:?}",
rows.len(),
rows.iter().map(|r| (r.event, r.result)).collect::<Vec<_>>()
);
assert_eq!(rows[0].event, LogEvent::SessionStart);
assert_eq!(rows[0].result, LogResult::Ok);
assert_eq!(rows[0].ref_.as_deref(), Some("2401.12345"));
assert_eq!(rows[1].event, LogEvent::Fetch);
assert_eq!(rows[1].result, LogResult::Ok);
assert_eq!(rows[1].source.as_deref(), Some("arxiv"));
assert_eq!(rows[1].size_bytes, Some(body.len() as u64));
assert_eq!(rows[2].event, LogEvent::StoreWrite);
assert_eq!(rows[2].result, LogResult::Ok);
assert_eq!(rows[2].source.as_deref(), Some("arxiv"));
assert_eq!(rows[3].event, LogEvent::SessionEnd);
assert_eq!(rows[3].result, LogResult::Ok);
assert_eq!(rows[0].prev_hash, "GENESIS");
for i in 1..rows.len() {
assert_eq!(
rows[i].prev_hash,
rows[i - 1].this_hash,
"hash chain break at row {i}"
);
}
drop(env);
drop(td);
}