use std::io::Write;
use anyhow::{Context, Result};
use doiget_core::paper_text::{paper_text, PaperText, AR5IV_DEFAULT_BASE};
use doiget_core::{ArxivId, ErrorCode, Ref};
use super::fetch::{build_resolve_context, cli_exit_code, CliExit};
use super::output::OutputMode;
#[allow(clippy::print_stderr)]
fn print_err(args: std::fmt::Arguments<'_>) {
eprintln!("{args}");
}
pub async fn run(
ref_: String,
max_chars: Option<usize>,
no_cache: bool,
mode: OutputMode,
quiet_was_explicit: bool,
) -> Result<()> {
let parsed = Ref::parse(&ref_).with_context(|| format!("invalid ref {ref_:?}"))?;
let id: ArxivId = match parsed {
Ref::Arxiv(a) => a,
Ref::Doi(_) => {
let code = ErrorCode::NoOaAvailable;
print_err(format_args!(
"error[{}]: no full-text source for a DOI — if an arXiv preprint exists, \
pass its id (e.g. `doiget text arxiv:2401.12345`)",
code.as_wire()
));
return Err(anyhow::Error::new(CliExit(cli_exit_code(code))));
}
};
let base = resolve_ar5iv_base()?;
let mut ctx = build_resolve_context().context("building fetch context")?;
if no_cache {
ctx.cache_root = None;
}
let text = match paper_text(&base, &id, max_chars, &ctx).await {
Ok(t) => t,
Err(e) => {
let code = ErrorCode::from(&e);
print_err(format_args!("error[{}]: {e}", code.as_wire()));
if code == ErrorCode::TextUnavailable {
print_err(format_args!(
" = note: the arXiv id is valid — fetch the PDF instead: `doiget fetch arxiv:{}`",
id.as_str()
));
}
return Err(anyhow::Error::new(CliExit(cli_exit_code(code))));
}
};
if mode == OutputMode::Quiet && quiet_was_explicit {
return Ok(());
}
let stdout = std::io::stdout();
let mut out = stdout.lock();
if mode == OutputMode::Json {
let s = serde_json::to_string_pretty(&text).context("serializing paper text JSON")?;
writeln!(out, "{s}").context("writing paper text JSON to stdout")?;
return Ok(());
}
render_human(&mut out, &text)?;
Ok(())
}
fn resolve_ar5iv_base() -> Result<url::Url> {
let raw = std::env::var("DOIGET_AR5IV_BASE").unwrap_or_else(|_| AR5IV_DEFAULT_BASE.to_string());
url::Url::parse(&raw).with_context(|| format!("DOIGET_AR5IV_BASE is not a URL: {raw}"))
}
fn render_human(out: &mut impl Write, text: &PaperText) -> Result<()> {
if let Some(t) = &text.title {
writeln!(out, "# {t}").context("writing title to stdout")?;
}
for sec in &text.sections {
if let Some(h) = &sec.heading {
writeln!(out, "\n## {h}").context("writing section heading to stdout")?;
}
if !sec.text.is_empty() {
writeln!(out, "{}", sec.text).context("writing section body to stdout")?;
}
}
if text.truncated {
print_err(format_args!(
"note: output truncated to {} chars (raise or drop --max-chars for the full text)",
text.char_count
));
}
Ok(())
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use doiget_core::paper_text::{TextSection, TextSource};
fn sample() -> PaperText {
PaperText {
arxiv_id: "2401.12345".into(),
source: TextSource::Ar5iv,
title: Some("A Title".into()),
sections: vec![
TextSection {
heading: None,
text: "Lead paragraph.".into(),
},
TextSection {
heading: Some("1 Introduction".into()),
text: "Body text.".into(),
},
],
char_count: 25,
truncated: false,
retrieved_from: "https://ar5iv.labs.arxiv.org/html/2401.12345".into(),
}
}
#[test]
fn json_envelope_is_the_paper_text_shape() {
let v = serde_json::to_value(sample()).expect("serialize");
assert_eq!(v["arxiv_id"], "2401.12345");
assert_eq!(v["source"], "ar5iv");
assert_eq!(v["title"], "A Title");
assert_eq!(v["sections"][1]["heading"], "1 Introduction");
assert_eq!(v["truncated"], false);
}
#[test]
fn human_render_lays_out_title_and_sections() {
let mut buf: Vec<u8> = Vec::new();
render_human(&mut buf, &sample()).expect("render");
let s = String::from_utf8(buf).expect("utf8");
assert!(s.contains("# A Title"), "got: {s}");
assert!(s.contains("## 1 Introduction"), "got: {s}");
assert!(s.contains("Lead paragraph."), "got: {s}");
assert!(s.contains("Body text."), "got: {s}");
}
#[test]
fn resolve_ar5iv_base_defaults_to_production() {
if std::env::var("DOIGET_AR5IV_BASE").is_err() {
let u = resolve_ar5iv_base().expect("base");
assert_eq!(u.as_str(), "https://ar5iv.labs.arxiv.org/");
}
}
}