1use std::io::Write;
24
25use anyhow::{Context, Result};
26
27use doiget_core::discovery::{paper_search, PaperSearchQuery, PaperSearchResults, SearchSort};
28use doiget_core::store::{EntryInfo, FsStore, Store};
29use doiget_core::ErrorCode;
30
31use super::fetch::{cli_exit_code, CliExit, FetchHarness};
32use super::output::OutputMode;
33use super::resolve_store_root;
34
35const LOCAL_DEFAULT_LIMIT: usize = 50;
40
41const FETCHED_AT_FMT: &str = "%Y-%m-%dT%H:%M:%SZ";
45
46const OPENALEX_DEFAULT_BASE: &str = "https://api.openalex.org";
49
50#[derive(Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
53pub enum SortArg {
54 Relevance,
56 Cited,
58 Recent,
60}
61
62impl From<SortArg> for SearchSort {
63 fn from(s: SortArg) -> Self {
64 match s {
65 SortArg::Relevance => SearchSort::Relevance,
66 SortArg::Cited => SearchSort::Cited,
67 SortArg::Recent => SearchSort::Recent,
68 }
69 }
70}
71
72#[derive(Debug, Clone)]
75pub struct ExternalArgs {
76 pub limit: usize,
80 pub from_year: Option<i32>,
82 pub to_year: Option<i32>,
84 pub oa_only: bool,
86 pub min_citations: Option<u64>,
88 pub author: Option<String>,
90 pub venue: Option<String>,
92 pub publisher: Option<String>,
94 pub sort: SortArg,
96}
97
98#[allow(clippy::print_stderr)]
101fn print_err(args: std::fmt::Arguments<'_>) {
102 eprintln!("{args}");
103}
104
105pub async fn run(query: String, local: bool, ext: ExternalArgs, mode: OutputMode) -> Result<()> {
118 if query.trim().is_empty() {
119 anyhow::bail!("search query is empty");
120 }
121 if local {
122 run_local(&query, mode)
123 } else {
124 run_external(&query, ext, mode).await
125 }
126}
127
128fn run_local(query: &str, mode: OutputMode) -> Result<()> {
130 let store_root = resolve_store_root()?;
131 let store = FsStore::new(store_root)?;
132 let entries = store
133 .search(query, LOCAL_DEFAULT_LIMIT)
134 .with_context(|| format!("search failed for query {query:?}"))?;
135
136 if mode == OutputMode::Quiet {
137 return Ok(());
138 }
139
140 let stdout = std::io::stdout();
141 let mut out = stdout.lock();
142 if mode == OutputMode::Json {
143 write_json(&mut out, &local_envelope(query, &entries))?;
144 return Ok(());
145 }
146 writeln!(out, "safekey\tyear\ttitle\tfetched_at")
147 .context("failed to write search header to stdout")?;
148 for e in entries {
149 let year = dash_or(e.year);
150 let fetched = e
151 .fetched_at
152 .map(|t| t.format(FETCHED_AT_FMT).to_string())
153 .unwrap_or_else(|| "-".into());
154 writeln!(
155 out,
156 "{}\t{}\t{}\t{}",
157 e.safekey.as_str(),
158 year,
159 e.title,
160 fetched
161 )
162 .context("failed to write search row to stdout")?;
163 }
164 Ok(())
165}
166
167async fn run_external(query: &str, ext: ExternalArgs, mode: OutputMode) -> Result<()> {
169 let q = PaperSearchQuery {
170 query: query.to_string(),
171 limit: ext.limit,
172 from_year: ext.from_year,
173 to_year: ext.to_year,
174 oa_only: ext.oa_only,
175 min_citations: ext.min_citations,
176 author: ext.author,
177 venue: ext.venue,
178 publisher: ext.publisher,
179 sort: ext.sort.into(),
180 };
181 q.validate().map_err(|m| anyhow::anyhow!("{m}"))?;
184
185 let base = resolve_openalex_base()?;
186 let contact_email = std::env::var("DOIGET_CONTACT_EMAIL").unwrap_or_default();
190
191 let harness = FetchHarness::from_env().context("building fetch harness")?;
192 harness
193 .log_session_start(Some(query))
194 .context("logging session start")?;
195 let ctx = harness.fetch_context();
196
197 let outcome = paper_search(&base, &contact_email, &q, &ctx).await;
198 harness.log_session_end(outcome.is_ok(), Some(query));
199
200 let results = match outcome {
201 Ok(r) => r,
202 Err(e) => {
203 let code = ErrorCode::from(&e);
204 print_err(format_args!("error[{}]: {e}", code.as_wire()));
205 return Err(anyhow::Error::new(CliExit(cli_exit_code(code))));
206 }
207 };
208
209 if mode == OutputMode::Quiet {
210 return Ok(());
211 }
212
213 let stdout = std::io::stdout();
214 let mut out = stdout.lock();
215 if mode == OutputMode::Json {
216 write_json(&mut out, &external_envelope(query, &results))?;
217 return Ok(());
218 }
219
220 writeln!(out, "cited_by\tyear\toa\tdoi\ttitle")
223 .context("failed to write search header to stdout")?;
224 for hit in &results.results {
225 let year = dash_or(hit.year);
226 let oa = hit.oa_status.as_deref().unwrap_or("-");
227 let doi = hit.doi.as_deref().unwrap_or("-");
228 writeln!(
229 out,
230 "{}\t{}\t{}\t{}\t{}",
231 hit.cited_by_count, year, oa, doi, hit.title
232 )
233 .context("failed to write search row to stdout")?;
234 }
235 Ok(())
236}
237
238fn resolve_openalex_base() -> Result<url::Url> {
241 let raw =
242 std::env::var("DOIGET_OPENALEX_BASE").unwrap_or_else(|_| OPENALEX_DEFAULT_BASE.to_string());
243 url::Url::parse(&raw).with_context(|| format!("DOIGET_OPENALEX_BASE is not a URL: {raw}"))
244}
245
246fn local_envelope(query: &str, entries: &[EntryInfo]) -> serde_json::Value {
250 serde_json::json!({
251 "scope": "local",
252 "query": query,
253 "count": entries.len(),
254 "results": entries,
255 })
256}
257
258fn external_envelope(query: &str, results: &PaperSearchResults) -> serde_json::Value {
262 serde_json::json!({
263 "scope": "external",
264 "query": query,
265 "total_results": results.total_results,
266 "count": results.results.len(),
267 "results": results.results,
268 })
269}
270
271fn write_json(out: &mut impl Write, value: &serde_json::Value) -> Result<()> {
274 let s = serde_json::to_string_pretty(value).context("failed to serialize search JSON")?;
275 writeln!(out, "{s}").context("failed to write search JSON to stdout")
276}
277
278fn dash_or<T: std::fmt::Display>(v: Option<T>) -> String {
280 v.map(|x| x.to_string()).unwrap_or_else(|| "-".into())
281}
282
283#[cfg(test)]
284#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
285mod tests {
286 use super::*;
287 use doiget_core::discovery::{DiscoverySource, PaperHit};
288
289 fn hit() -> PaperHit {
290 PaperHit {
291 doi: Some("10.1234/x".to_string()),
292 openalex_id: "W1".to_string(),
293 arxiv: None,
294 title: "T".to_string(),
295 authors: vec!["A".to_string()],
296 year: Some(2024),
297 venue: Some("V".to_string()),
298 abstract_: Some("abs".to_string()),
299 cited_by_count: 3,
300 oa_status: Some("gold".to_string()),
301 source: DiscoverySource::OpenAlex,
302 }
303 }
304
305 #[test]
306 fn external_envelope_has_scope_total_and_results() {
307 let results = PaperSearchResults {
308 results: vec![hit()],
309 total_results: Some(4012),
310 };
311 let v = external_envelope("spin glass", &results);
312 assert_eq!(v["scope"], "external");
313 assert_eq!(v["query"], "spin glass");
314 assert_eq!(v["total_results"], 4012);
315 assert_eq!(v["count"], 1);
316 assert_eq!(v["results"][0]["openalex_id"], "W1");
317 assert_eq!(v["results"][0]["abstract"], "abs");
318 }
319
320 #[test]
321 fn sort_arg_lowers_to_core() {
322 assert_eq!(SearchSort::from(SortArg::Relevance), SearchSort::Relevance);
323 assert_eq!(SearchSort::from(SortArg::Cited), SearchSort::Cited);
324 assert_eq!(SearchSort::from(SortArg::Recent), SearchSort::Recent);
325 }
326
327 #[test]
328 fn local_envelope_has_local_scope_and_count() {
329 let v = local_envelope("quantum", &[]);
330 assert_eq!(v["scope"], "local");
331 assert_eq!(v["query"], "quantum");
332 assert_eq!(v["count"], 0);
333 assert!(v["results"].as_array().expect("results array").is_empty());
334 assert!(v.get("total_results").is_none());
336 }
337
338 fn ext(limit: usize, from_year: Option<i32>, to_year: Option<i32>) -> ExternalArgs {
340 ExternalArgs {
341 limit,
342 from_year,
343 to_year,
344 oa_only: false,
345 min_citations: None,
346 author: None,
347 venue: None,
348 publisher: None,
349 sort: SortArg::Relevance,
350 }
351 }
352
353 #[tokio::test]
357 async fn external_rejects_limit_below_1() {
358 let err = run("q".into(), false, ext(0, None, None), OutputMode::Quiet)
359 .await
360 .expect_err("limit 0 must be rejected");
361 assert!(err.to_string().contains("limit"), "got: {err}");
362 }
363
364 #[tokio::test]
365 async fn external_rejects_limit_above_200() {
366 let err = run("q".into(), false, ext(201, None, None), OutputMode::Quiet)
367 .await
368 .expect_err("limit 201 must be rejected");
369 assert!(err.to_string().contains("limit"), "got: {err}");
370 }
371
372 #[tokio::test]
373 async fn external_rejects_inverted_year_range() {
374 let err = run(
375 "q".into(),
376 false,
377 ext(25, Some(2025), Some(2010)),
378 OutputMode::Quiet,
379 )
380 .await
381 .expect_err("from_year > to_year must be rejected");
382 assert!(err.to_string().contains("is after"), "got: {err}");
383 }
384}