Skip to main content

papers_core/
api.rs

1use papers_openalex::{
2    Author, AutocompleteResponse, Domain, Field, FindWorksParams, FindWorksResponse, Funder,
3    GetParams, Institution, OpenAlexClient, OpenAlexError, Publisher, Source, Subfield,
4    Topic, Work,
5};
6use papers_zotero::ZoteroClient;
7use serde::Serialize;
8
9use crate::filter::{
10    AuthorListParams, DomainListParams, FieldListParams, FilterError, FunderListParams,
11    InstitutionListParams, PublisherListParams, SourceListParams, SubfieldListParams,
12    TopicListParams, WorkListParams, is_openalex_id, resolve_entity_id,
13    resolve_filters, WORK_ALIASES,
14};
15use crate::summary::{
16    AuthorSummary, DomainSummary, FieldSummary, FunderSummary, InstitutionSummary,
17    PublisherSummary, SlimListResponse, SourceSummary, SubfieldSummary, TopicSummary, WorkSummary,
18    summary_list_result,
19};
20
21// ── List ─────────────────────────────────────────────────────────────────
22
23pub async fn work_list(
24    client: &OpenAlexClient,
25    params: &WorkListParams,
26) -> Result<SlimListResponse<WorkSummary>, FilterError> {
27    let (alias_values, mut list_params) = params.into_aliases_and_list_params();
28    list_params.filter = resolve_filters(client, WORK_ALIASES, &alias_values, list_params.filter.as_deref()).await?;
29    Ok(summary_list_result(client.list_works(&list_params).await, WorkSummary::from)?)
30}
31
32macro_rules! entity_list_fn {
33    ($fn_name:ident, $params_type:ident, $summary_type:ident, $client_method:ident) => {
34        pub async fn $fn_name(
35            client: &OpenAlexClient,
36            params: &$params_type,
37        ) -> Result<SlimListResponse<$summary_type>, FilterError> {
38            let (alias_values, mut list_params) = params.into_aliases_and_list_params();
39            list_params.filter = resolve_filters(
40                client,
41                $params_type::alias_specs(),
42                &alias_values,
43                list_params.filter.as_deref(),
44            ).await?;
45            Ok(summary_list_result(client.$client_method(&list_params).await, $summary_type::from)?)
46        }
47    };
48}
49
50entity_list_fn!(author_list, AuthorListParams, AuthorSummary, list_authors);
51entity_list_fn!(source_list, SourceListParams, SourceSummary, list_sources);
52entity_list_fn!(institution_list, InstitutionListParams, InstitutionSummary, list_institutions);
53entity_list_fn!(topic_list, TopicListParams, TopicSummary, list_topics);
54entity_list_fn!(publisher_list, PublisherListParams, PublisherSummary, list_publishers);
55entity_list_fn!(funder_list, FunderListParams, FunderSummary, list_funders);
56entity_list_fn!(domain_list, DomainListParams, DomainSummary, list_domains);
57entity_list_fn!(field_list, FieldListParams, FieldSummary, list_fields);
58entity_list_fn!(subfield_list, SubfieldListParams, SubfieldSummary, list_subfields);
59
60// ── Get (smart ID resolution) ────────────────────────────────────────────
61
62/// Returns `true` if `input` looks like a known identifier for the given entity type
63/// (as opposed to a search query that needs resolution).
64fn looks_like_identifier(input: &str, entity_type: &str) -> bool {
65    // OpenAlex IDs (short or full URL)
66    if is_openalex_id(input, entity_type) {
67        return true;
68    }
69
70    // DOIs (works only, but safe to pass through for any entity — API will 404 gracefully)
71    if input.starts_with("https://doi.org/")
72        || input.starts_with("doi:")
73        || (input.starts_with("10.") && input.contains('/'))
74    {
75        return true;
76    }
77
78    // PubMed IDs
79    if input.starts_with("pmid:") || input.starts_with("pmcid:") {
80        return true;
81    }
82
83    // ORCIDs (authors)
84    if input.starts_with("https://orcid.org/") {
85        return true;
86    }
87
88    // ROR IDs (institutions)
89    if input.starts_with("https://ror.org/") {
90        return true;
91    }
92
93    // ISSNs: XXXX-XXXX pattern (digits and X)
94    if input.len() == 9 && input.as_bytes().get(4) == Some(&b'-') {
95        let (left, right) = (&input[..4], &input[5..]);
96        if left.chars().all(|c| c.is_ascii_digit() || c == 'X' || c == 'x')
97            && right.chars().all(|c| c.is_ascii_digit() || c == 'X' || c == 'x')
98        {
99            return true;
100        }
101    }
102
103    false
104}
105
106/// Strip any well-known ID prefixes to get the bare ID for a get endpoint.
107///
108/// Unlike `normalize_id` (which adds path prefixes for filter expressions),
109/// this strips prefixes to get the bare form the client's get methods expect:
110/// - `https://openalex.org/W123` → `W123`
111/// - `https://openalex.org/domains/3` → `3`
112/// - `domains/3` → `3`
113/// - `W123` → `W123` (unchanged)
114/// - `10.1234/foo` → `doi:10.1234/foo` (bare DOI needs prefix for OpenAlex API)
115fn bare_id_for_get(input: &str, entity_type: &str) -> String {
116    // Strip full OpenAlex URL prefix
117    let id = input.strip_prefix("https://openalex.org/").unwrap_or(input);
118    // For hierarchy entities, the client adds the path itself — strip it
119    let id = match entity_type {
120        "domains" => id.strip_prefix("domains/").unwrap_or(id),
121        "fields" => id.strip_prefix("fields/").unwrap_or(id),
122        "subfields" => id.strip_prefix("subfields/").unwrap_or(id),
123        _ => id,
124    };
125    // Bare DOIs (e.g. "10.1234/foo") need the "doi:" prefix for the OpenAlex /works/{id}
126    // endpoint. DOIs already prefixed with "doi:" or "https://doi.org/" are left unchanged.
127    if id.starts_with("10.") && id.contains('/') {
128        return format!("doi:{id}");
129    }
130    id.to_string()
131}
132
133/// Resolve an input string to an entity ID suitable for the get endpoint.
134///
135/// If the input looks like a known identifier, it is returned in bare form.
136/// Otherwise, it is treated as a search query: the list endpoint is queried
137/// for the top result by citation count, and that result's ID is used.
138async fn resolve_get_id(
139    client: &OpenAlexClient,
140    input: &str,
141    entity_type: &'static str,
142) -> Result<String, FilterError> {
143    if looks_like_identifier(input, entity_type) {
144        Ok(bare_id_for_get(input, entity_type))
145    } else {
146        // resolve_entity_id returns a normalized ID (e.g. "domains/3"); strip for get
147        let normalized = resolve_entity_id(client, input, entity_type).await?;
148        Ok(bare_id_for_get(&normalized, entity_type))
149    }
150}
151
152macro_rules! entity_get_fn {
153    ($fn_name:ident, $return_type:ident, $client_method:ident, $entity_type:literal) => {
154        pub async fn $fn_name(
155            client: &OpenAlexClient,
156            id: &str,
157            params: &GetParams,
158        ) -> Result<$return_type, FilterError> {
159            let resolved = resolve_get_id(client, id, $entity_type).await?;
160            Ok(client.$client_method(&resolved, params).await?)
161        }
162    };
163}
164
165entity_get_fn!(work_get, Work, get_work, "works");
166entity_get_fn!(author_get, Author, get_author, "authors");
167entity_get_fn!(source_get, Source, get_source, "sources");
168entity_get_fn!(institution_get, Institution, get_institution, "institutions");
169entity_get_fn!(topic_get, Topic, get_topic, "topics");
170entity_get_fn!(publisher_get, Publisher, get_publisher, "publishers");
171entity_get_fn!(funder_get, Funder, get_funder, "funders");
172entity_get_fn!(domain_get, Domain, get_domain, "domains");
173entity_get_fn!(field_get, Field, get_field, "fields");
174entity_get_fn!(subfield_get, Subfield, get_subfield, "subfields");
175
176/// Combined work response including optional Zotero library metadata.
177#[derive(Debug, Clone, Serialize)]
178pub struct WorkGetResponse {
179    pub work: Work,
180    pub in_zotero: bool,
181    pub zotero: Option<crate::text::ZoteroItemInfo>,
182}
183
184/// Get a work by ID and check if it's in the Zotero library (if Zotero is configured).
185pub async fn work_get_response(
186    client: &OpenAlexClient,
187    zotero: Option<&ZoteroClient>,
188    id: &str,
189    params: &GetParams,
190) -> Result<WorkGetResponse, FilterError> {
191    let t0 = std::time::Instant::now();
192    let work = work_get(client, id, params).await?;
193    eprintln!("[timing] openalex work_get: {:?}", t0.elapsed());
194
195    let zotero_info = if let Some(z) = zotero {
196        let t1 = std::time::Instant::now();
197        let info = crate::text::find_work_in_zotero(z, &work).await.unwrap_or(None);
198        eprintln!("[timing] zotero find_work: {:?}", t1.elapsed());
199        info
200    } else {
201        None
202    };
203    eprintln!("[timing] total: {:?}", t0.elapsed());
204    Ok(WorkGetResponse {
205        in_zotero: zotero_info.is_some(),
206        work,
207        zotero: zotero_info,
208    })
209}
210
211// ── Autocomplete ─────────────────────────────────────────────────────────
212
213pub async fn work_autocomplete(
214    client: &OpenAlexClient,
215    q: &str,
216) -> Result<AutocompleteResponse, OpenAlexError> {
217    client.autocomplete_works(q).await
218}
219
220pub async fn author_autocomplete(
221    client: &OpenAlexClient,
222    q: &str,
223) -> Result<AutocompleteResponse, OpenAlexError> {
224    client.autocomplete_authors(q).await
225}
226
227pub async fn source_autocomplete(
228    client: &OpenAlexClient,
229    q: &str,
230) -> Result<AutocompleteResponse, OpenAlexError> {
231    client.autocomplete_sources(q).await
232}
233
234pub async fn institution_autocomplete(
235    client: &OpenAlexClient,
236    q: &str,
237) -> Result<AutocompleteResponse, OpenAlexError> {
238    client.autocomplete_institutions(q).await
239}
240
241pub async fn publisher_autocomplete(
242    client: &OpenAlexClient,
243    q: &str,
244) -> Result<AutocompleteResponse, OpenAlexError> {
245    client.autocomplete_publishers(q).await
246}
247
248pub async fn funder_autocomplete(
249    client: &OpenAlexClient,
250    q: &str,
251) -> Result<AutocompleteResponse, OpenAlexError> {
252    client.autocomplete_funders(q).await
253}
254
255pub async fn subfield_autocomplete(
256    client: &OpenAlexClient,
257    q: &str,
258) -> Result<AutocompleteResponse, OpenAlexError> {
259    client.autocomplete_subfields(q).await
260}
261
262// ── Find ─────────────────────────────────────────────────────────────────
263
264/// AI semantic search for works by conceptual similarity.
265/// Automatically uses POST for queries longer than 2048 characters.
266pub async fn work_find(
267    client: &OpenAlexClient,
268    params: &FindWorksParams,
269) -> Result<FindWorksResponse, OpenAlexError> {
270    if params.query.len() > 2048 {
271        client.find_works_post(params).await
272    } else {
273        client.find_works(params).await
274    }
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280
281    // ── looks_like_identifier: OpenAlex IDs ──────────────────────────────
282
283    #[test]
284    fn id_work_short() {
285        assert!(looks_like_identifier("W2741809807", "works"));
286    }
287
288    #[test]
289    fn id_author_short() {
290        assert!(looks_like_identifier("A5023888391", "authors"));
291    }
292
293    #[test]
294    fn id_source_short() {
295        assert!(looks_like_identifier("S131921510", "sources"));
296    }
297
298    #[test]
299    fn id_institution_short() {
300        assert!(looks_like_identifier("I136199984", "institutions"));
301    }
302
303    #[test]
304    fn id_topic_short() {
305        assert!(looks_like_identifier("T11636", "topics"));
306    }
307
308    #[test]
309    fn id_publisher_short() {
310        assert!(looks_like_identifier("P4310319798", "publishers"));
311    }
312
313    #[test]
314    fn id_funder_short() {
315        assert!(looks_like_identifier("F1234567", "funders"));
316    }
317
318    #[test]
319    fn id_full_openalex_url() {
320        assert!(looks_like_identifier("https://openalex.org/W2741809807", "works"));
321        assert!(looks_like_identifier("https://openalex.org/A123", "authors"));
322    }
323
324    // ── looks_like_identifier: DOIs ──────────────────────────────────────
325
326    #[test]
327    fn id_doi_url() {
328        assert!(looks_like_identifier("https://doi.org/10.1109/ipdps.2012.30", "works"));
329    }
330
331    #[test]
332    fn id_doi_prefix() {
333        assert!(looks_like_identifier("doi:10.1109/ipdps.2012.30", "works"));
334    }
335
336    #[test]
337    fn id_bare_doi() {
338        assert!(looks_like_identifier("10.1109/ipdps.2012.30", "works"));
339    }
340
341    // ── looks_like_identifier: PubMed IDs ────────────────────────────────
342
343    #[test]
344    fn id_pmid() {
345        assert!(looks_like_identifier("pmid:12345678", "works"));
346    }
347
348    #[test]
349    fn id_pmcid() {
350        assert!(looks_like_identifier("pmcid:PMC1234567", "works"));
351    }
352
353    // ── looks_like_identifier: ORCIDs ────────────────────────────────────
354
355    #[test]
356    fn id_orcid() {
357        assert!(looks_like_identifier("https://orcid.org/0000-0002-1825-0097", "authors"));
358    }
359
360    // ── looks_like_identifier: ROR IDs ───────────────────────────────────
361
362    #[test]
363    fn id_ror() {
364        assert!(looks_like_identifier("https://ror.org/03vek6s52", "institutions"));
365    }
366
367    // ── looks_like_identifier: ISSNs ─────────────────────────────────────
368
369    #[test]
370    fn id_issn() {
371        assert!(looks_like_identifier("0028-0836", "sources"));
372    }
373
374    #[test]
375    fn id_issn_with_x() {
376        assert!(looks_like_identifier("0000-000X", "sources"));
377    }
378
379    // ── looks_like_identifier: hierarchy IDs ─────────────────────────────
380
381    #[test]
382    fn id_domain_bare_digits() {
383        assert!(looks_like_identifier("3", "domains"));
384    }
385
386    #[test]
387    fn id_domain_path() {
388        assert!(looks_like_identifier("domains/3", "domains"));
389    }
390
391    #[test]
392    fn id_field_bare_digits() {
393        assert!(looks_like_identifier("17", "fields"));
394    }
395
396    #[test]
397    fn id_subfield_bare_digits() {
398        assert!(looks_like_identifier("1702", "subfields"));
399    }
400
401    // ── looks_like_identifier: search queries (should be false) ──────────
402
403    #[test]
404    fn search_query_title() {
405        assert!(!looks_like_identifier("adaptive bitonic sort", "works"));
406    }
407
408    #[test]
409    fn search_query_author_name() {
410        assert!(!looks_like_identifier("Albert Einstein", "authors"));
411    }
412
413    #[test]
414    fn search_query_source_name() {
415        assert!(!looks_like_identifier("Nature", "sources"));
416    }
417
418    #[test]
419    fn search_query_institution_name() {
420        assert!(!looks_like_identifier("Massachusetts Institute of Technology", "institutions"));
421    }
422
423    #[test]
424    fn search_query_topic_name() {
425        assert!(!looks_like_identifier("machine learning", "topics"));
426    }
427
428    #[test]
429    fn search_query_publisher_name() {
430        assert!(!looks_like_identifier("Elsevier", "publishers"));
431    }
432
433    #[test]
434    fn search_query_funder_name() {
435        assert!(!looks_like_identifier("National Science Foundation", "funders"));
436    }
437
438    #[test]
439    fn search_query_domain_name() {
440        assert!(!looks_like_identifier("Physical Sciences", "domains"));
441    }
442}