Skip to main content

papers_core/
api.rs

1use papers_openalex::{
2    Author, AutocompleteResponse, Domain, Field, FindWorksParams, FindWorksResponse, Funder,
3    GetParams, Institution, OpenAlexClient, OpenAlexError, Publisher, Source, Subfield,
4    Topic, Work,
5};
6
7use crate::filter::{
8    AuthorListParams, DomainListParams, FieldListParams, FilterError, FunderListParams,
9    InstitutionListParams, PublisherListParams, SourceListParams, SubfieldListParams,
10    TopicListParams, WorkListParams, is_openalex_id, resolve_entity_id,
11    resolve_filters, WORK_ALIASES,
12};
13use crate::summary::{
14    AuthorSummary, DomainSummary, FieldSummary, FunderSummary, InstitutionSummary,
15    PublisherSummary, SlimListResponse, SourceSummary, SubfieldSummary, TopicSummary, WorkSummary,
16    summary_list_result,
17};
18
19// ── List ─────────────────────────────────────────────────────────────────
20
21pub async fn work_list(
22    client: &OpenAlexClient,
23    params: &WorkListParams,
24) -> Result<SlimListResponse<WorkSummary>, FilterError> {
25    let (alias_values, mut list_params) = params.into_aliases_and_list_params();
26    list_params.filter = resolve_filters(client, WORK_ALIASES, &alias_values, list_params.filter.as_deref()).await?;
27    Ok(summary_list_result(client.list_works(&list_params).await, WorkSummary::from)?)
28}
29
30macro_rules! entity_list_fn {
31    ($fn_name:ident, $params_type:ident, $summary_type:ident, $client_method:ident) => {
32        pub async fn $fn_name(
33            client: &OpenAlexClient,
34            params: &$params_type,
35        ) -> Result<SlimListResponse<$summary_type>, FilterError> {
36            let (alias_values, mut list_params) = params.into_aliases_and_list_params();
37            list_params.filter = resolve_filters(
38                client,
39                $params_type::alias_specs(),
40                &alias_values,
41                list_params.filter.as_deref(),
42            ).await?;
43            Ok(summary_list_result(client.$client_method(&list_params).await, $summary_type::from)?)
44        }
45    };
46}
47
48entity_list_fn!(author_list, AuthorListParams, AuthorSummary, list_authors);
49entity_list_fn!(source_list, SourceListParams, SourceSummary, list_sources);
50entity_list_fn!(institution_list, InstitutionListParams, InstitutionSummary, list_institutions);
51entity_list_fn!(topic_list, TopicListParams, TopicSummary, list_topics);
52entity_list_fn!(publisher_list, PublisherListParams, PublisherSummary, list_publishers);
53entity_list_fn!(funder_list, FunderListParams, FunderSummary, list_funders);
54entity_list_fn!(domain_list, DomainListParams, DomainSummary, list_domains);
55entity_list_fn!(field_list, FieldListParams, FieldSummary, list_fields);
56entity_list_fn!(subfield_list, SubfieldListParams, SubfieldSummary, list_subfields);
57
58// ── Get (smart ID resolution) ────────────────────────────────────────────
59
60/// Returns `true` if `input` looks like a known identifier for the given entity type
61/// (as opposed to a search query that needs resolution).
62fn looks_like_identifier(input: &str, entity_type: &str) -> bool {
63    // OpenAlex IDs (short or full URL)
64    if is_openalex_id(input, entity_type) {
65        return true;
66    }
67
68    // DOIs (works only, but safe to pass through for any entity — API will 404 gracefully)
69    if input.starts_with("https://doi.org/")
70        || input.starts_with("doi:")
71        || (input.starts_with("10.") && input.contains('/'))
72    {
73        return true;
74    }
75
76    // PubMed IDs
77    if input.starts_with("pmid:") || input.starts_with("pmcid:") {
78        return true;
79    }
80
81    // ORCIDs (authors)
82    if input.starts_with("https://orcid.org/") {
83        return true;
84    }
85
86    // ROR IDs (institutions)
87    if input.starts_with("https://ror.org/") {
88        return true;
89    }
90
91    // ISSNs: XXXX-XXXX pattern (digits and X)
92    if input.len() == 9 && input.as_bytes().get(4) == Some(&b'-') {
93        let (left, right) = (&input[..4], &input[5..]);
94        if left.chars().all(|c| c.is_ascii_digit() || c == 'X' || c == 'x')
95            && right.chars().all(|c| c.is_ascii_digit() || c == 'X' || c == 'x')
96        {
97            return true;
98        }
99    }
100
101    false
102}
103
104/// Strip any well-known ID prefixes to get the bare ID for a get endpoint.
105///
106/// Unlike `normalize_id` (which adds path prefixes for filter expressions),
107/// this strips prefixes to get the bare form the client's get methods expect:
108/// - `https://openalex.org/W123` → `W123`
109/// - `https://openalex.org/domains/3` → `3`
110/// - `domains/3` → `3`
111/// - `W123` → `W123` (unchanged)
112fn bare_id_for_get(input: &str, entity_type: &str) -> String {
113    // Strip full OpenAlex URL prefix
114    let id = input.strip_prefix("https://openalex.org/").unwrap_or(input);
115    // For hierarchy entities, the client adds the path itself — strip it
116    let id = match entity_type {
117        "domains" => id.strip_prefix("domains/").unwrap_or(id),
118        "fields" => id.strip_prefix("fields/").unwrap_or(id),
119        "subfields" => id.strip_prefix("subfields/").unwrap_or(id),
120        _ => id,
121    };
122    id.to_string()
123}
124
125/// Resolve an input string to an entity ID suitable for the get endpoint.
126///
127/// If the input looks like a known identifier, it is returned in bare form.
128/// Otherwise, it is treated as a search query: the list endpoint is queried
129/// for the top result by citation count, and that result's ID is used.
130async fn resolve_get_id(
131    client: &OpenAlexClient,
132    input: &str,
133    entity_type: &'static str,
134) -> Result<String, FilterError> {
135    if looks_like_identifier(input, entity_type) {
136        Ok(bare_id_for_get(input, entity_type))
137    } else {
138        // resolve_entity_id returns a normalized ID (e.g. "domains/3"); strip for get
139        let normalized = resolve_entity_id(client, input, entity_type).await?;
140        Ok(bare_id_for_get(&normalized, entity_type))
141    }
142}
143
144macro_rules! entity_get_fn {
145    ($fn_name:ident, $return_type:ident, $client_method:ident, $entity_type:literal) => {
146        pub async fn $fn_name(
147            client: &OpenAlexClient,
148            id: &str,
149            params: &GetParams,
150        ) -> Result<$return_type, FilterError> {
151            let resolved = resolve_get_id(client, id, $entity_type).await?;
152            Ok(client.$client_method(&resolved, params).await?)
153        }
154    };
155}
156
157entity_get_fn!(work_get, Work, get_work, "works");
158entity_get_fn!(author_get, Author, get_author, "authors");
159entity_get_fn!(source_get, Source, get_source, "sources");
160entity_get_fn!(institution_get, Institution, get_institution, "institutions");
161entity_get_fn!(topic_get, Topic, get_topic, "topics");
162entity_get_fn!(publisher_get, Publisher, get_publisher, "publishers");
163entity_get_fn!(funder_get, Funder, get_funder, "funders");
164entity_get_fn!(domain_get, Domain, get_domain, "domains");
165entity_get_fn!(field_get, Field, get_field, "fields");
166entity_get_fn!(subfield_get, Subfield, get_subfield, "subfields");
167
168// ── Autocomplete ─────────────────────────────────────────────────────────
169
170pub async fn work_autocomplete(
171    client: &OpenAlexClient,
172    q: &str,
173) -> Result<AutocompleteResponse, OpenAlexError> {
174    client.autocomplete_works(q).await
175}
176
177pub async fn author_autocomplete(
178    client: &OpenAlexClient,
179    q: &str,
180) -> Result<AutocompleteResponse, OpenAlexError> {
181    client.autocomplete_authors(q).await
182}
183
184pub async fn source_autocomplete(
185    client: &OpenAlexClient,
186    q: &str,
187) -> Result<AutocompleteResponse, OpenAlexError> {
188    client.autocomplete_sources(q).await
189}
190
191pub async fn institution_autocomplete(
192    client: &OpenAlexClient,
193    q: &str,
194) -> Result<AutocompleteResponse, OpenAlexError> {
195    client.autocomplete_institutions(q).await
196}
197
198pub async fn publisher_autocomplete(
199    client: &OpenAlexClient,
200    q: &str,
201) -> Result<AutocompleteResponse, OpenAlexError> {
202    client.autocomplete_publishers(q).await
203}
204
205pub async fn funder_autocomplete(
206    client: &OpenAlexClient,
207    q: &str,
208) -> Result<AutocompleteResponse, OpenAlexError> {
209    client.autocomplete_funders(q).await
210}
211
212pub async fn subfield_autocomplete(
213    client: &OpenAlexClient,
214    q: &str,
215) -> Result<AutocompleteResponse, OpenAlexError> {
216    client.autocomplete_subfields(q).await
217}
218
219// ── Find ─────────────────────────────────────────────────────────────────
220
221/// AI semantic search for works by conceptual similarity.
222/// Automatically uses POST for queries longer than 2048 characters.
223pub async fn work_find(
224    client: &OpenAlexClient,
225    params: &FindWorksParams,
226) -> Result<FindWorksResponse, OpenAlexError> {
227    if params.query.len() > 2048 {
228        client.find_works_post(params).await
229    } else {
230        client.find_works(params).await
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    // ── looks_like_identifier: OpenAlex IDs ──────────────────────────────
239
240    #[test]
241    fn id_work_short() {
242        assert!(looks_like_identifier("W2741809807", "works"));
243    }
244
245    #[test]
246    fn id_author_short() {
247        assert!(looks_like_identifier("A5023888391", "authors"));
248    }
249
250    #[test]
251    fn id_source_short() {
252        assert!(looks_like_identifier("S131921510", "sources"));
253    }
254
255    #[test]
256    fn id_institution_short() {
257        assert!(looks_like_identifier("I136199984", "institutions"));
258    }
259
260    #[test]
261    fn id_topic_short() {
262        assert!(looks_like_identifier("T11636", "topics"));
263    }
264
265    #[test]
266    fn id_publisher_short() {
267        assert!(looks_like_identifier("P4310319798", "publishers"));
268    }
269
270    #[test]
271    fn id_funder_short() {
272        assert!(looks_like_identifier("F1234567", "funders"));
273    }
274
275    #[test]
276    fn id_full_openalex_url() {
277        assert!(looks_like_identifier("https://openalex.org/W2741809807", "works"));
278        assert!(looks_like_identifier("https://openalex.org/A123", "authors"));
279    }
280
281    // ── looks_like_identifier: DOIs ──────────────────────────────────────
282
283    #[test]
284    fn id_doi_url() {
285        assert!(looks_like_identifier("https://doi.org/10.1109/ipdps.2012.30", "works"));
286    }
287
288    #[test]
289    fn id_doi_prefix() {
290        assert!(looks_like_identifier("doi:10.1109/ipdps.2012.30", "works"));
291    }
292
293    #[test]
294    fn id_bare_doi() {
295        assert!(looks_like_identifier("10.1109/ipdps.2012.30", "works"));
296    }
297
298    // ── looks_like_identifier: PubMed IDs ────────────────────────────────
299
300    #[test]
301    fn id_pmid() {
302        assert!(looks_like_identifier("pmid:12345678", "works"));
303    }
304
305    #[test]
306    fn id_pmcid() {
307        assert!(looks_like_identifier("pmcid:PMC1234567", "works"));
308    }
309
310    // ── looks_like_identifier: ORCIDs ────────────────────────────────────
311
312    #[test]
313    fn id_orcid() {
314        assert!(looks_like_identifier("https://orcid.org/0000-0002-1825-0097", "authors"));
315    }
316
317    // ── looks_like_identifier: ROR IDs ───────────────────────────────────
318
319    #[test]
320    fn id_ror() {
321        assert!(looks_like_identifier("https://ror.org/03vek6s52", "institutions"));
322    }
323
324    // ── looks_like_identifier: ISSNs ─────────────────────────────────────
325
326    #[test]
327    fn id_issn() {
328        assert!(looks_like_identifier("0028-0836", "sources"));
329    }
330
331    #[test]
332    fn id_issn_with_x() {
333        assert!(looks_like_identifier("0000-000X", "sources"));
334    }
335
336    // ── looks_like_identifier: hierarchy IDs ─────────────────────────────
337
338    #[test]
339    fn id_domain_bare_digits() {
340        assert!(looks_like_identifier("3", "domains"));
341    }
342
343    #[test]
344    fn id_domain_path() {
345        assert!(looks_like_identifier("domains/3", "domains"));
346    }
347
348    #[test]
349    fn id_field_bare_digits() {
350        assert!(looks_like_identifier("17", "fields"));
351    }
352
353    #[test]
354    fn id_subfield_bare_digits() {
355        assert!(looks_like_identifier("1702", "subfields"));
356    }
357
358    // ── looks_like_identifier: search queries (should be false) ──────────
359
360    #[test]
361    fn search_query_title() {
362        assert!(!looks_like_identifier("adaptive bitonic sort", "works"));
363    }
364
365    #[test]
366    fn search_query_author_name() {
367        assert!(!looks_like_identifier("Albert Einstein", "authors"));
368    }
369
370    #[test]
371    fn search_query_source_name() {
372        assert!(!looks_like_identifier("Nature", "sources"));
373    }
374
375    #[test]
376    fn search_query_institution_name() {
377        assert!(!looks_like_identifier("Massachusetts Institute of Technology", "institutions"));
378    }
379
380    #[test]
381    fn search_query_topic_name() {
382        assert!(!looks_like_identifier("machine learning", "topics"));
383    }
384
385    #[test]
386    fn search_query_publisher_name() {
387        assert!(!looks_like_identifier("Elsevier", "publishers"));
388    }
389
390    #[test]
391    fn search_query_funder_name() {
392        assert!(!looks_like_identifier("National Science Foundation", "funders"));
393    }
394
395    #[test]
396    fn search_query_domain_name() {
397        assert!(!looks_like_identifier("Physical Sciences", "domains"));
398    }
399}