#![allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
use assert_cmd::Command;
use camino::Utf8PathBuf;
use predicates::str::contains;
use serial_test::serial;
use tempfile::TempDir;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
use doiget_cli::commands::output::OutputMode;
use doiget_cli::commands::text::run;
mod common;
use common::env_guard::EnvGuard;
const ENV_KEYS: &[&str] = &[
"DOIGET_AR5IV_BASE",
"DOIGET_CACHE_ROOT",
"DOIGET_STORE_ROOT",
"DOIGET_LOG_PATH",
"DOIGET_MODE",
"HOME",
"USERPROFILE",
];
const SAMPLE_AR5IV: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Extracted Paper</title></head>
<body>
<p>Lead matter.</p>
<section><h2>1 Introduction</h2><p>Intro body.</p></section>
</body>
</html>"#;
fn utf8(dir: &TempDir) -> Utf8PathBuf {
Utf8PathBuf::from_path_buf(dir.path().to_path_buf()).expect("temp dir path must be UTF-8")
}
#[tokio::test]
#[serial]
async fn text_extracts_logs_and_caches() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/html/2401.12345"))
.respond_with(ResponseTemplate::new(200).set_body_string(SAMPLE_AR5IV))
.up_to_n_times(1)
.mount(&server)
.await;
let dir = TempDir::new().expect("tempdir");
let root = utf8(&dir);
let cache_root = root.join("cache");
let log_path = root.join("access.jsonl");
let guard = EnvGuard::new(ENV_KEYS);
guard.set("DOIGET_AR5IV_BASE", &server.uri());
guard.set("DOIGET_CACHE_ROOT", cache_root.as_str());
guard.set("DOIGET_STORE_ROOT", root.join("papers").as_str());
guard.set("DOIGET_LOG_PATH", log_path.as_str());
guard.set("DOIGET_MODE", "quiet");
guard.set("HOME", root.as_str());
guard.set("USERPROFILE", root.as_str());
let res = run(
"arxiv:2401.12345".to_string(),
None,
false, OutputMode::Quiet,
true, )
.await;
assert!(res.is_ok(), "text run failed: {res:?}");
let log = std::fs::read_to_string(log_path.as_std_path()).expect("read provenance log");
assert!(
log.contains("\"event\":\"fetch\"") && log.contains("\"source\":\"ar5iv\""),
"missing ar5iv fetch row in:\n{log}"
);
let text_dir = cache_root.join("text");
let entries: Vec<_> = std::fs::read_dir(text_dir.as_std_path())
.expect("text cache dir exists")
.filter_map(Result::ok)
.filter(|e| {
e.path()
.extension()
.and_then(|x| x.to_str())
.map(|x| x == "json")
.unwrap_or(false)
})
.collect();
assert_eq!(entries.len(), 1, "exactly one cached text entry expected");
let res2 = run(
"arxiv:2401.12345".to_string(),
None,
false,
OutputMode::Quiet,
true,
)
.await;
assert!(res2.is_ok(), "second (cached) text run failed: {res2:?}");
}
#[tokio::test]
#[serial]
async fn text_for_doi_reports_no_oa_available() {
let dir = TempDir::new().expect("tempdir");
let root = utf8(&dir);
let guard = EnvGuard::new(ENV_KEYS);
guard.set("DOIGET_CACHE_ROOT", root.join("cache").as_str());
guard.set("DOIGET_STORE_ROOT", root.join("papers").as_str());
guard.set("DOIGET_LOG_PATH", root.join("access.jsonl").as_str());
guard.set("DOIGET_MODE", "quiet");
guard.set("HOME", root.as_str());
guard.set("USERPROFILE", root.as_str());
let err = run(
"10.1234/example".to_string(),
None,
false,
OutputMode::Quiet,
true,
)
.await
.expect_err("a DOI must error (no full-text source)");
let exit = err
.downcast_ref::<doiget_cli::commands::fetch::CliExit>()
.expect("DOI path must yield a CliExit");
assert_ne!(exit.0, 0, "exit code must be non-zero for a DOI");
}
#[tokio::test]
#[serial]
async fn text_unconverted_render_exits_non_zero_never_silent() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/html/2012.03644"))
.respond_with(
ResponseTemplate::new(200).set_body_string("<html><head></head><body></body></html>"),
)
.mount(&server)
.await;
let dir = TempDir::new().expect("tempdir");
let root = utf8(&dir);
let guard = EnvGuard::new(ENV_KEYS);
guard.set("DOIGET_AR5IV_BASE", &server.uri());
guard.set("DOIGET_CACHE_ROOT", root.join("cache").as_str());
guard.set("DOIGET_STORE_ROOT", root.join("papers").as_str());
guard.set("DOIGET_LOG_PATH", root.join("access.jsonl").as_str());
guard.set("DOIGET_MODE", "quiet");
guard.set("HOME", root.as_str());
guard.set("USERPROFILE", root.as_str());
let err = run(
"arxiv:2012.03644".to_string(),
None,
true, OutputMode::Quiet,
true,
)
.await
.expect_err("an unconverted render must error, never silently succeed");
let exit = err
.downcast_ref::<doiget_cli::commands::fetch::CliExit>()
.expect("unavailable text must yield a CliExit");
assert_ne!(
exit.0, 0,
"exit code must be non-zero when no text is produced"
);
}
fn doiget_subprocess(root: &Utf8PathBuf, server_uri: &str) -> Command {
let mut cmd = Command::cargo_bin("doiget").expect("locate doiget binary");
let p = root.as_str();
cmd.env("DOIGET_AR5IV_BASE", server_uri)
.env("DOIGET_CACHE_ROOT", root.join("cache").as_str())
.env("DOIGET_STORE_ROOT", root.join("papers").as_str())
.env("DOIGET_LOG_PATH", root.join("access.jsonl").as_str())
.env("HOME", p)
.env("USERPROFILE", p);
cmd
}
#[tokio::test]
#[serial]
async fn text_piped_non_tty_still_emits_prose() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/html/2401.12345"))
.respond_with(ResponseTemplate::new(200).set_body_string(SAMPLE_AR5IV))
.mount(&server)
.await;
let dir = TempDir::new().expect("tempdir");
let root = utf8(&dir);
doiget_subprocess(&root, &server.uri())
.args(["text", "arxiv:2401.12345"])
.assert()
.success()
.stdout(contains("Extracted Paper"))
.stdout(contains("Intro body."));
}
#[tokio::test]
#[serial]
async fn text_explicit_quiet_still_suppresses_prose() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/html/2401.12345"))
.respond_with(ResponseTemplate::new(200).set_body_string(SAMPLE_AR5IV))
.mount(&server)
.await;
let dir = TempDir::new().expect("tempdir");
let root = utf8(&dir);
doiget_subprocess(&root, &server.uri())
.args(["text", "arxiv:2401.12345", "--quiet"])
.assert()
.success()
.stdout(predicates::str::is_empty());
}
#[tokio::test]
#[serial]
async fn text_unavailable_prints_actionable_fetch_note() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/html/2012.03644"))
.respond_with(
ResponseTemplate::new(200).set_body_string("<html><head></head><body></body></html>"),
)
.mount(&server)
.await;
let dir = TempDir::new().expect("tempdir");
let root = utf8(&dir);
doiget_subprocess(&root, &server.uri())
.args(["text", "arxiv:2012.03644"])
.assert()
.failure()
.stderr(contains(
"fetch the PDF instead: `doiget fetch arxiv:2012.03644`",
));
}