use std::io::Write;
use anyhow::{Context, Result};
use doiget_core::discovery::{paper_search, PaperSearchQuery, PaperSearchResults, SearchSort};
use doiget_core::store::{EntryInfo, FsStore, Store};
use doiget_core::ErrorCode;
use super::fetch::{cli_exit_code, CliExit, FetchHarness};
use super::output::OutputMode;
use super::resolve_store_root;
const LOCAL_DEFAULT_LIMIT: usize = 50;
const FETCHED_AT_FMT: &str = "%Y-%m-%dT%H:%M:%SZ";
const OPENALEX_DEFAULT_BASE: &str = "https://api.openalex.org";
#[derive(Clone, Debug, Default, PartialEq, Eq, clap::ValueEnum)]
pub enum SortArg {
#[default]
Relevance,
}
impl From<SortArg> for SearchSort {
fn from(s: SortArg) -> Self {
match s {
SortArg::Relevance => SearchSort::Relevance,
}
}
}
#[derive(Debug, Clone)]
pub struct ExternalArgs {
pub limit: usize,
pub from_year: Option<i32>,
pub to_year: Option<i32>,
pub oa_only: bool,
pub min_citations: Option<u64>,
pub min_fwci: Option<f64>,
pub min_percentile: Option<u8>,
pub author: Option<String>,
pub venue: Option<String>,
pub publisher: Option<String>,
pub sort: SortArg,
}
#[allow(clippy::print_stderr)]
fn print_err(args: std::fmt::Arguments<'_>) {
eprintln!("{args}");
}
pub async fn run(
query: String,
local: bool,
ext: ExternalArgs,
mode: OutputMode,
quiet_was_explicit: bool,
) -> Result<()> {
if query.trim().is_empty() {
anyhow::bail!("search query is empty");
}
if local {
run_local(&query, mode, quiet_was_explicit)
} else {
run_external(&query, ext, mode, quiet_was_explicit).await
}
}
fn run_local(query: &str, mode: OutputMode, quiet_was_explicit: bool) -> Result<()> {
let store_root = resolve_store_root()?;
let store = FsStore::new(store_root)?;
let entries = store
.search(query, LOCAL_DEFAULT_LIMIT)
.with_context(|| format!("search failed for query {query:?}"))?;
if mode == OutputMode::Quiet && quiet_was_explicit {
return Ok(());
}
let stdout = std::io::stdout();
let mut out = stdout.lock();
if mode == OutputMode::Json {
write_json(&mut out, &local_envelope(query, &entries))?;
return Ok(());
}
writeln!(out, "safekey\tyear\ttitle\tfetched_at")
.context("failed to write search header to stdout")?;
for e in entries {
let year = dash_or(e.year);
let fetched = e
.fetched_at
.map(|t| t.format(FETCHED_AT_FMT).to_string())
.unwrap_or_else(|| "-".into());
writeln!(
out,
"{}\t{}\t{}\t{}",
e.safekey.as_str(),
year,
e.title,
fetched
)
.context("failed to write search row to stdout")?;
}
Ok(())
}
async fn run_external(
query: &str,
ext: ExternalArgs,
mode: OutputMode,
quiet_was_explicit: bool,
) -> Result<()> {
let q = PaperSearchQuery {
query: query.to_string(),
limit: ext.limit,
from_year: ext.from_year,
to_year: ext.to_year,
oa_only: ext.oa_only,
min_citations: ext.min_citations,
min_fwci: ext.min_fwci,
min_percentile: ext.min_percentile,
author: ext.author,
venue: ext.venue,
publisher: ext.publisher,
sort: ext.sort.into(),
};
q.validate().map_err(|m| anyhow::anyhow!("{m}"))?;
let base = resolve_openalex_base()?;
let contact_email = std::env::var("DOIGET_CONTACT_EMAIL").unwrap_or_default();
let harness = FetchHarness::from_env().context("building fetch harness")?;
harness
.log_session_start(Some(query))
.context("logging session start")?;
let ctx = harness.fetch_context();
let outcome = paper_search(&base, &contact_email, &q, &ctx).await;
harness.log_session_end(outcome.is_ok(), Some(query));
let results = match outcome {
Ok(r) => r,
Err(e) => {
let code = ErrorCode::from(&e);
print_err(format_args!("error[{}]: {e}", code.as_wire()));
return Err(anyhow::Error::new(CliExit(cli_exit_code(code))));
}
};
if mode == OutputMode::Quiet && quiet_was_explicit {
return Ok(());
}
let stdout = std::io::stdout();
let mut out = stdout.lock();
if mode == OutputMode::Json {
write_json(&mut out, &external_envelope(query, &results))?;
return Ok(());
}
writeln!(out, "cited_by\tyear\toa\tdoi\ttitle")
.context("failed to write search header to stdout")?;
for hit in &results.results {
let year = dash_or(hit.year);
let oa = hit.oa_status.as_deref().unwrap_or("-");
let doi = hit.doi.as_deref().unwrap_or("-");
writeln!(
out,
"{}\t{}\t{}\t{}\t{}",
hit.cited_by_count, year, oa, doi, hit.title
)
.context("failed to write search row to stdout")?;
}
Ok(())
}
fn resolve_openalex_base() -> Result<url::Url> {
let raw =
std::env::var("DOIGET_OPENALEX_BASE").unwrap_or_else(|_| OPENALEX_DEFAULT_BASE.to_string());
url::Url::parse(&raw).with_context(|| format!("DOIGET_OPENALEX_BASE is not a URL: {raw}"))
}
fn local_envelope(query: &str, entries: &[EntryInfo]) -> serde_json::Value {
serde_json::json!({
"scope": "local",
"query": query,
"count": entries.len(),
"results": entries,
})
}
fn external_envelope(query: &str, results: &PaperSearchResults) -> serde_json::Value {
serde_json::json!({
"scope": "external",
"query": query,
"total_results": results.total_results,
"count": results.results.len(),
"results": results.results,
})
}
fn write_json(out: &mut impl Write, value: &serde_json::Value) -> Result<()> {
let s = serde_json::to_string_pretty(value).context("failed to serialize search JSON")?;
writeln!(out, "{s}").context("failed to write search JSON to stdout")
}
fn dash_or<T: std::fmt::Display>(v: Option<T>) -> String {
v.map(|x| x.to_string()).unwrap_or_else(|| "-".into())
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use doiget_core::discovery::{DiscoverySource, PaperHit};
fn hit() -> PaperHit {
PaperHit {
doi: Some("10.1234/x".to_string()),
openalex_id: "W1".to_string(),
arxiv: None,
title: "T".to_string(),
authors: vec!["A".to_string()],
year: Some(2024),
venue: Some("V".to_string()),
abstract_: Some("abs".to_string()),
cited_by_count: 3,
oa_status: Some("gold".to_string()),
source: DiscoverySource::OpenAlex,
}
}
#[test]
fn external_envelope_has_scope_total_and_results() {
let results = PaperSearchResults {
results: vec![hit()],
total_results: Some(4012),
};
let v = external_envelope("spin glass", &results);
assert_eq!(v["scope"], "external");
assert_eq!(v["query"], "spin glass");
assert_eq!(v["total_results"], 4012);
assert_eq!(v["count"], 1);
assert_eq!(v["results"][0]["openalex_id"], "W1");
assert_eq!(v["results"][0]["abstract"], "abs");
}
#[test]
fn sort_arg_lowers_to_core() {
assert_eq!(SearchSort::from(SortArg::Relevance), SearchSort::Relevance);
}
#[test]
fn local_envelope_has_local_scope_and_count() {
let v = local_envelope("quantum", &[]);
assert_eq!(v["scope"], "local");
assert_eq!(v["query"], "quantum");
assert_eq!(v["count"], 0);
assert!(v["results"].as_array().expect("results array").is_empty());
assert!(v.get("total_results").is_none());
}
fn ext(limit: usize, from_year: Option<i32>, to_year: Option<i32>) -> ExternalArgs {
ExternalArgs {
limit,
from_year,
to_year,
oa_only: false,
min_citations: None,
min_fwci: None,
min_percentile: None,
author: None,
venue: None,
publisher: None,
sort: SortArg::Relevance,
}
}
#[tokio::test]
async fn external_rejects_limit_below_1() {
let err = run(
"q".into(),
false,
ext(0, None, None),
OutputMode::Quiet,
true,
)
.await
.expect_err("limit 0 must be rejected");
assert!(err.to_string().contains("limit"), "got: {err}");
}
#[tokio::test]
async fn external_rejects_limit_above_200() {
let err = run(
"q".into(),
false,
ext(201, None, None),
OutputMode::Quiet,
true,
)
.await
.expect_err("limit 201 must be rejected");
assert!(err.to_string().contains("limit"), "got: {err}");
}
#[tokio::test]
async fn external_rejects_inverted_year_range() {
let err = run(
"q".into(),
false,
ext(25, Some(2025), Some(2010)),
OutputMode::Quiet,
true,
)
.await
.expect_err("from_year > to_year must be rejected");
assert!(err.to_string().contains("is after"), "got: {err}");
}
}