pubmed-client 0.1.0

An async Rust client for PubMed and PMC APIs for retrieving biomedical research articles
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
//! Integration tests for batch fetch_articles using mocked HTTP responses
//!
//! These tests verify the batch fetching functionality without making real API calls.
//! They use wiremock to simulate NCBI EFetch responses.

use pubmed_client::{ClientConfig, PubMedClient};
use tracing_test::traced_test;
use wiremock::matchers::{method, path_regex};
use wiremock::{Mock, MockServer, ResponseTemplate};

/// Multi-article XML response for batch fetch testing
const BATCH_EFETCH_RESPONSE_3_ARTICLES: &str = r#"<?xml version="1.0" ?>
<PubmedArticleSet>
    <PubmedArticle>
        <MedlineCitation>
            <PMID Version="1">31978945</PMID>
            <Article>
                <Journal><Title>Nature</Title></Journal>
                <ArticleTitle>A pneumonia outbreak associated with a new coronavirus</ArticleTitle>
                <Abstract>
                    <AbstractText>In December 2019, a cluster of patients with pneumonia...</AbstractText>
                </Abstract>
                <AuthorList>
                    <Author>
                        <LastName>Wu</LastName>
                        <ForeName>Fan</ForeName>
                    </Author>
                    <Author>
                        <LastName>Zhao</LastName>
                        <ForeName>Su</ForeName>
                    </Author>
                </AuthorList>
                <PublicationTypeList>
                    <PublicationType>Journal Article</PublicationType>
                </PublicationTypeList>
            </Article>
        </MedlineCitation>
        <PubmedData>
            <ArticleIdList>
                <ArticleId IdType="pubmed">31978945</ArticleId>
                <ArticleId IdType="doi">10.1038/s41586-020-2008-3</ArticleId>
            </ArticleIdList>
        </PubmedData>
    </PubmedArticle>
    <PubmedArticle>
        <MedlineCitation>
            <PMID Version="1">33515491</PMID>
            <Article>
                <Journal><Title>Lancet Oncology</Title></Journal>
                <ArticleTitle>Cancer treatment advances in 2020</ArticleTitle>
                <Abstract>
                    <AbstractText>Recent advances in cancer treatment have shown promise...</AbstractText>
                </Abstract>
                <AuthorList>
                    <Author>
                        <LastName>Smith</LastName>
                        <ForeName>John</ForeName>
                    </Author>
                </AuthorList>
                <PublicationTypeList>
                    <PublicationType>Review</PublicationType>
                </PublicationTypeList>
            </Article>
        </MedlineCitation>
        <PubmedData>
            <ArticleIdList>
                <ArticleId IdType="pubmed">33515491</ArticleId>
            </ArticleIdList>
        </PubmedData>
    </PubmedArticle>
    <PubmedArticle>
        <MedlineCitation>
            <PMID Version="1">25760099</PMID>
            <Article>
                <Journal><Title>Science</Title></Journal>
                <ArticleTitle>CRISPR-Cas9 gene editing technology</ArticleTitle>
                <Abstract>
                    <AbstractText>The CRISPR-Cas9 system has revolutionized genome editing...</AbstractText>
                </Abstract>
                <AuthorList>
                    <Author>
                        <LastName>Doudna</LastName>
                        <ForeName>Jennifer</ForeName>
                    </Author>
                </AuthorList>
                <PublicationTypeList>
                    <PublicationType>Journal Article</PublicationType>
                </PublicationTypeList>
            </Article>
        </MedlineCitation>
        <PubmedData>
            <ArticleIdList>
                <ArticleId IdType="pubmed">25760099</ArticleId>
            </ArticleIdList>
        </PubmedData>
    </PubmedArticle>
</PubmedArticleSet>"#;

const SINGLE_ARTICLE_RESPONSE: &str = r#"<?xml version="1.0" ?>
<PubmedArticleSet>
    <PubmedArticle>
        <MedlineCitation>
            <PMID Version="1">12345678</PMID>
            <Article>
                <Journal><Title>Test Journal</Title></Journal>
                <ArticleTitle>Single Test Article</ArticleTitle>
                <AuthorList>
                    <Author>
                        <LastName>Test</LastName>
                        <ForeName>Author</ForeName>
                    </Author>
                </AuthorList>
                <PublicationTypeList>
                    <PublicationType>Journal Article</PublicationType>
                </PublicationTypeList>
            </Article>
        </MedlineCitation>
    </PubmedArticle>
</PubmedArticleSet>"#;

/// Helper to create a mock server with a batch efetch response
async fn setup_batch_efetch_mock(body: &str) -> MockServer {
    let mock_server = MockServer::start().await;

    Mock::given(method("GET"))
        .and(path_regex(r"/efetch\.fcgi.*"))
        .respond_with(
            ResponseTemplate::new(200)
                .set_body_string(body.to_string())
                .insert_header("content-type", "application/xml"),
        )
        .mount(&mock_server)
        .await;

    mock_server
}

/// Helper to create a client pointing at a mock server
fn create_mock_client(mock_server: &MockServer) -> PubMedClient {
    let config = ClientConfig::new()
        .with_base_url(mock_server.uri())
        .with_rate_limit(100.0); // High rate limit for tests

    PubMedClient::with_config(config)
}

/// Test batch fetching multiple articles in a single request
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_multiple_articles() {
    let mock_server = setup_batch_efetch_mock(BATCH_EFETCH_RESPONSE_3_ARTICLES).await;
    let client = create_mock_client(&mock_server);

    let articles = client
        .fetch_articles(&["31978945", "33515491", "25760099"])
        .await
        .expect("Batch fetch should succeed");

    assert_eq!(articles.len(), 3, "Should return 3 articles");

    // Verify first article (COVID-19)
    let covid = articles.iter().find(|a| a.pmid == "31978945").unwrap();
    assert!(covid.title.contains("pneumonia"));
    assert_eq!(covid.journal, "Nature");
    assert_eq!(covid.authors.len(), 2);
    assert!(covid.abstract_text.is_some());
    // DOI extracted from PubmedData/ArticleIdList fallback
    assert_eq!(covid.doi.as_deref(), Some("10.1038/s41586-020-2008-3"));

    // Verify second article (Cancer)
    let cancer = articles.iter().find(|a| a.pmid == "33515491").unwrap();
    assert!(cancer.title.contains("Cancer"));
    assert_eq!(cancer.journal, "Lancet Oncology");

    // Verify third article (CRISPR)
    let crispr = articles.iter().find(|a| a.pmid == "25760099").unwrap();
    assert!(crispr.title.contains("CRISPR"));
    assert_eq!(crispr.journal, "Science");
}

/// Test batch fetching a single article
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_single_article() {
    let mock_server = setup_batch_efetch_mock(SINGLE_ARTICLE_RESPONSE).await;
    let client = create_mock_client(&mock_server);

    let articles = client
        .fetch_articles(&["12345678"])
        .await
        .expect("Single article batch fetch should succeed");

    assert_eq!(articles.len(), 1);
    assert_eq!(articles[0].pmid, "12345678");
    assert_eq!(articles[0].title, "Single Test Article");
    assert_eq!(articles[0].journal, "Test Journal");
}

/// Test batch fetch with empty input returns empty vec
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_empty_input() {
    let mock_server = MockServer::start().await;
    let client = create_mock_client(&mock_server);

    let articles = client
        .fetch_articles(&[])
        .await
        .expect("Empty batch should return Ok");

    assert!(articles.is_empty());

    // Verify no requests were made
    let received_requests = mock_server.received_requests().await.unwrap();
    assert_eq!(
        received_requests.len(),
        0,
        "No HTTP requests should be made for empty input"
    );
}

/// Test batch fetch rejects invalid PMIDs before making network requests
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_invalid_pmid_rejected() {
    let mock_server = MockServer::start().await;
    let client = create_mock_client(&mock_server);

    let result = client.fetch_articles(&["not_a_number"]).await;
    assert!(result.is_err(), "Invalid PMID should cause error");

    // Verify no requests were made
    let received_requests = mock_server.received_requests().await.unwrap();
    assert_eq!(
        received_requests.len(),
        0,
        "No HTTP requests should be made for invalid PMIDs"
    );
}

/// Test batch fetch rejects mixed valid/invalid PMIDs before making requests
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_mixed_valid_invalid_pmids() {
    let mock_server = MockServer::start().await;
    let client = create_mock_client(&mock_server);

    let result = client
        .fetch_articles(&["31978945", "invalid", "25760099"])
        .await;
    assert!(result.is_err(), "Mixed valid/invalid PMIDs should fail");

    // No requests should be made if validation fails
    let received_requests = mock_server.received_requests().await.unwrap();
    assert_eq!(received_requests.len(), 0);
}

/// Test batch fetch rejects zero PMIDs
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_zero_pmid_rejected() {
    let mock_server = MockServer::start().await;
    let client = create_mock_client(&mock_server);

    let result = client.fetch_articles(&["0"]).await;
    assert!(result.is_err(), "PMID 0 should be rejected");
}

/// Test that batch fetch sends comma-separated IDs in a single request
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_sends_single_request() {
    let mock_server = MockServer::start().await;

    // Set up mock that expects comma-separated IDs
    Mock::given(method("GET"))
        .and(path_regex(r"/efetch\.fcgi.*"))
        .respond_with(ResponseTemplate::new(200).set_body_string(BATCH_EFETCH_RESPONSE_3_ARTICLES))
        .expect(1) // Exactly one request
        .mount(&mock_server)
        .await;

    let client = create_mock_client(&mock_server);

    let articles = client
        .fetch_articles(&["31978945", "33515491", "25760099"])
        .await
        .expect("Batch fetch should succeed");

    assert_eq!(articles.len(), 3);

    // wiremock will verify expect(1) on drop
}

/// Test that search_and_fetch now uses batch internally
#[tokio::test]
#[traced_test]
async fn test_search_and_fetch_uses_batch() {
    let mock_server = MockServer::start().await;

    // ESearch returns 3 PMIDs
    Mock::given(method("GET"))
        .and(path_regex(r"/esearch\.fcgi.*"))
        .respond_with(
            ResponseTemplate::new(200)
                .set_body_json(serde_json::json!({
                    "esearchresult": {
                        "count": "3",
                        "retmax": "3",
                        "retstart": "0",
                        "idlist": ["31978945", "33515491", "25760099"]
                    }
                }))
                .insert_header("content-type", "application/json"),
        )
        .expect(1)
        .mount(&mock_server)
        .await;

    // EFetch should be called exactly once with all PMIDs (batch)
    Mock::given(method("GET"))
        .and(path_regex(r"/efetch\.fcgi.*"))
        .respond_with(ResponseTemplate::new(200).set_body_string(BATCH_EFETCH_RESPONSE_3_ARTICLES))
        .expect(1) // Only 1 fetch request, not 3
        .mount(&mock_server)
        .await;

    let client = create_mock_client(&mock_server);

    let articles = client
        .search_and_fetch("test query", 3, None)
        .await
        .expect("search_and_fetch should succeed");

    assert_eq!(articles.len(), 3);

    // wiremock verifies expect(1) for efetch on drop
}

/// Test batch fetch handles server error gracefully
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_server_error() {
    let mock_server = MockServer::start().await;

    Mock::given(method("GET"))
        .and(path_regex(r"/efetch\.fcgi.*"))
        .respond_with(ResponseTemplate::new(500).set_body_string("Internal Server Error"))
        .mount(&mock_server)
        .await;

    let client = create_mock_client(&mock_server);

    let result = client.fetch_articles(&["31978945", "33515491"]).await;

    assert!(result.is_err(), "Server error should propagate");
}

/// Test batch fetch handles empty XML response
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_empty_xml_response() {
    let mock_server = setup_batch_efetch_mock("").await;
    let client = create_mock_client(&mock_server);

    let articles = client
        .fetch_articles(&["31978945"])
        .await
        .expect("Empty response should return Ok with empty vec");

    assert!(articles.is_empty());
}

/// Test batch fetch handles XML with no articles
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_empty_article_set() {
    let xml = r#"<?xml version="1.0" ?>
<PubmedArticleSet>
</PubmedArticleSet>"#;

    let mock_server = setup_batch_efetch_mock(xml).await;
    let client = create_mock_client(&mock_server);

    let articles = client
        .fetch_articles(&["99999999"])
        .await
        .expect("Empty article set should return Ok");

    assert!(articles.is_empty());
}

/// Test batch fetch with duplicate PMIDs
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_duplicate_pmids() {
    let mock_server = setup_batch_efetch_mock(SINGLE_ARTICLE_RESPONSE).await;
    let client = create_mock_client(&mock_server);

    // NCBI handles dedup on their side; we just verify no crash
    let articles = client
        .fetch_articles(&["12345678", "12345678"])
        .await
        .expect("Duplicate PMIDs should not cause error");

    assert!(!articles.is_empty());
}

/// Test batch fetch with rate-limited server (429)
#[tokio::test]
#[traced_test]
async fn test_batch_fetch_rate_limited() {
    let mock_server = MockServer::start().await;

    Mock::given(method("GET"))
        .and(path_regex(r"/efetch\.fcgi.*"))
        .respond_with(
            ResponseTemplate::new(429)
                .set_body_string("Too Many Requests")
                .insert_header("retry-after", "1"),
        )
        .mount(&mock_server)
        .await;

    let client = create_mock_client(&mock_server);

    let result = client.fetch_articles(&["31978945", "33515491"]).await;

    assert!(result.is_err(), "429 response should result in error");
}