1use std::io::Write;
24
25use anyhow::{Context, Result};
26
27use doiget_core::discovery::{paper_search, PaperSearchQuery, PaperSearchResults, SearchSort};
28use doiget_core::store::{EntryInfo, FsStore, Store};
29use doiget_core::ErrorCode;
30
31use super::fetch::{cli_exit_code, CliExit, FetchHarness};
32use super::output::OutputMode;
33use super::resolve_store_root;
34
35const LOCAL_DEFAULT_LIMIT: usize = 50;
40
41const FETCHED_AT_FMT: &str = "%Y-%m-%dT%H:%M:%SZ";
45
46const OPENALEX_DEFAULT_BASE: &str = "https://api.openalex.org";
49
50#[derive(Clone, Debug, Default, PartialEq, Eq, clap::ValueEnum)]
53pub enum SortArg {
54 #[default]
61 Relevance,
62}
63
64impl From<SortArg> for SearchSort {
65 fn from(s: SortArg) -> Self {
66 match s {
67 SortArg::Relevance => SearchSort::Relevance,
68 }
69 }
70}
71
72#[derive(Debug, Clone)]
75pub struct ExternalArgs {
76 pub limit: usize,
80 pub from_year: Option<i32>,
82 pub to_year: Option<i32>,
84 pub oa_only: bool,
86 pub min_citations: Option<u64>,
88 pub min_fwci: Option<f64>,
90 pub min_percentile: Option<u8>,
92 pub author: Option<String>,
94 pub venue: Option<String>,
96 pub publisher: Option<String>,
98 pub sort: SortArg,
100}
101
102#[allow(clippy::print_stderr)]
105fn print_err(args: std::fmt::Arguments<'_>) {
106 eprintln!("{args}");
107}
108
109pub async fn run(
122 query: String,
123 local: bool,
124 ext: ExternalArgs,
125 mode: OutputMode,
126 quiet_was_explicit: bool,
127) -> Result<()> {
128 if query.trim().is_empty() {
129 anyhow::bail!("search query is empty");
130 }
131 if local {
132 run_local(&query, mode, quiet_was_explicit)
133 } else {
134 run_external(&query, ext, mode, quiet_was_explicit).await
135 }
136}
137
138fn run_local(query: &str, mode: OutputMode, quiet_was_explicit: bool) -> Result<()> {
140 let store_root = resolve_store_root()?;
141 let store = FsStore::new(store_root)?;
142 let entries = store
143 .search(query, LOCAL_DEFAULT_LIMIT)
144 .with_context(|| format!("search failed for query {query:?}"))?;
145
146 if mode == OutputMode::Quiet && quiet_was_explicit {
149 return Ok(());
150 }
151
152 let stdout = std::io::stdout();
153 let mut out = stdout.lock();
154 if mode == OutputMode::Json {
155 write_json(&mut out, &local_envelope(query, &entries))?;
156 return Ok(());
157 }
158 writeln!(out, "safekey\tyear\ttitle\tfetched_at")
159 .context("failed to write search header to stdout")?;
160 for e in entries {
161 let year = dash_or(e.year);
162 let fetched = e
163 .fetched_at
164 .map(|t| t.format(FETCHED_AT_FMT).to_string())
165 .unwrap_or_else(|| "-".into());
166 writeln!(
167 out,
168 "{}\t{}\t{}\t{}",
169 e.safekey.as_str(),
170 year,
171 e.title,
172 fetched
173 )
174 .context("failed to write search row to stdout")?;
175 }
176 Ok(())
177}
178
179async fn run_external(
181 query: &str,
182 ext: ExternalArgs,
183 mode: OutputMode,
184 quiet_was_explicit: bool,
185) -> Result<()> {
186 let q = PaperSearchQuery {
187 query: query.to_string(),
188 limit: ext.limit,
189 from_year: ext.from_year,
190 to_year: ext.to_year,
191 oa_only: ext.oa_only,
192 min_citations: ext.min_citations,
193 min_fwci: ext.min_fwci,
194 min_percentile: ext.min_percentile,
195 author: ext.author,
196 venue: ext.venue,
197 publisher: ext.publisher,
198 sort: ext.sort.into(),
199 };
200 q.validate().map_err(|m| anyhow::anyhow!("{m}"))?;
203
204 let base = resolve_openalex_base()?;
205 let contact_email = std::env::var("DOIGET_CONTACT_EMAIL").unwrap_or_default();
209
210 let harness = FetchHarness::from_env().context("building fetch harness")?;
211 harness
212 .log_session_start(Some(query))
213 .context("logging session start")?;
214 let ctx = harness.fetch_context();
215
216 let outcome = paper_search(&base, &contact_email, &q, &ctx).await;
217 harness.log_session_end(outcome.is_ok(), Some(query));
218
219 let results = match outcome {
220 Ok(r) => r,
221 Err(e) => {
222 let code = ErrorCode::from(&e);
223 print_err(format_args!("error[{}]: {e}", code.as_wire()));
224 return Err(anyhow::Error::new(CliExit(cli_exit_code(code))));
225 }
226 };
227
228 if mode == OutputMode::Quiet && quiet_was_explicit {
231 return Ok(());
232 }
233
234 let stdout = std::io::stdout();
235 let mut out = stdout.lock();
236 if mode == OutputMode::Json {
237 write_json(&mut out, &external_envelope(query, &results))?;
238 return Ok(());
239 }
240
241 writeln!(out, "cited_by\tyear\toa\tdoi\ttitle")
244 .context("failed to write search header to stdout")?;
245 for hit in &results.results {
246 let year = dash_or(hit.year);
247 let oa = hit.oa_status.as_deref().unwrap_or("-");
248 let doi = hit.doi.as_deref().unwrap_or("-");
249 writeln!(
250 out,
251 "{}\t{}\t{}\t{}\t{}",
252 hit.cited_by_count, year, oa, doi, hit.title
253 )
254 .context("failed to write search row to stdout")?;
255 }
256 Ok(())
257}
258
259fn resolve_openalex_base() -> Result<url::Url> {
262 let raw =
263 std::env::var("DOIGET_OPENALEX_BASE").unwrap_or_else(|_| OPENALEX_DEFAULT_BASE.to_string());
264 url::Url::parse(&raw).with_context(|| format!("DOIGET_OPENALEX_BASE is not a URL: {raw}"))
265}
266
267fn local_envelope(query: &str, entries: &[EntryInfo]) -> serde_json::Value {
271 serde_json::json!({
272 "scope": "local",
273 "query": query,
274 "count": entries.len(),
275 "results": entries,
276 })
277}
278
279fn external_envelope(query: &str, results: &PaperSearchResults) -> serde_json::Value {
283 serde_json::json!({
284 "scope": "external",
285 "query": query,
286 "total_results": results.total_results,
287 "count": results.results.len(),
288 "results": results.results,
289 })
290}
291
292fn write_json(out: &mut impl Write, value: &serde_json::Value) -> Result<()> {
295 let s = serde_json::to_string_pretty(value).context("failed to serialize search JSON")?;
296 writeln!(out, "{s}").context("failed to write search JSON to stdout")
297}
298
299fn dash_or<T: std::fmt::Display>(v: Option<T>) -> String {
301 v.map(|x| x.to_string()).unwrap_or_else(|| "-".into())
302}
303
304#[cfg(test)]
305#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
306mod tests {
307 use super::*;
308 use doiget_core::discovery::{DiscoverySource, PaperHit};
309
310 fn hit() -> PaperHit {
311 PaperHit {
312 doi: Some("10.1234/x".to_string()),
313 openalex_id: "W1".to_string(),
314 arxiv: None,
315 title: "T".to_string(),
316 authors: vec!["A".to_string()],
317 year: Some(2024),
318 venue: Some("V".to_string()),
319 abstract_: Some("abs".to_string()),
320 cited_by_count: 3,
321 oa_status: Some("gold".to_string()),
322 source: DiscoverySource::OpenAlex,
323 }
324 }
325
326 #[test]
327 fn external_envelope_has_scope_total_and_results() {
328 let results = PaperSearchResults {
329 results: vec![hit()],
330 total_results: Some(4012),
331 };
332 let v = external_envelope("spin glass", &results);
333 assert_eq!(v["scope"], "external");
334 assert_eq!(v["query"], "spin glass");
335 assert_eq!(v["total_results"], 4012);
336 assert_eq!(v["count"], 1);
337 assert_eq!(v["results"][0]["openalex_id"], "W1");
338 assert_eq!(v["results"][0]["abstract"], "abs");
339 }
340
341 #[test]
342 fn sort_arg_lowers_to_core() {
343 assert_eq!(SearchSort::from(SortArg::Relevance), SearchSort::Relevance);
345 }
346
347 #[test]
348 fn local_envelope_has_local_scope_and_count() {
349 let v = local_envelope("quantum", &[]);
350 assert_eq!(v["scope"], "local");
351 assert_eq!(v["query"], "quantum");
352 assert_eq!(v["count"], 0);
353 assert!(v["results"].as_array().expect("results array").is_empty());
354 assert!(v.get("total_results").is_none());
356 }
357
358 fn ext(limit: usize, from_year: Option<i32>, to_year: Option<i32>) -> ExternalArgs {
360 ExternalArgs {
361 limit,
362 from_year,
363 to_year,
364 oa_only: false,
365 min_citations: None,
366 min_fwci: None,
367 min_percentile: None,
368 author: None,
369 venue: None,
370 publisher: None,
371 sort: SortArg::Relevance,
372 }
373 }
374
375 #[tokio::test]
379 async fn external_rejects_limit_below_1() {
380 let err = run(
381 "q".into(),
382 false,
383 ext(0, None, None),
384 OutputMode::Quiet,
385 true,
386 )
387 .await
388 .expect_err("limit 0 must be rejected");
389 assert!(err.to_string().contains("limit"), "got: {err}");
390 }
391
392 #[tokio::test]
393 async fn external_rejects_limit_above_200() {
394 let err = run(
395 "q".into(),
396 false,
397 ext(201, None, None),
398 OutputMode::Quiet,
399 true,
400 )
401 .await
402 .expect_err("limit 201 must be rejected");
403 assert!(err.to_string().contains("limit"), "got: {err}");
404 }
405
406 #[tokio::test]
407 async fn external_rejects_inverted_year_range() {
408 let err = run(
409 "q".into(),
410 false,
411 ext(25, Some(2025), Some(2010)),
412 OutputMode::Quiet,
413 true,
414 )
415 .await
416 .expect_err("from_year > to_year must be rejected");
417 assert!(err.to_string().contains("is after"), "got: {err}");
418 }
419}