Expand description
§PubMed Client
A Rust client library for accessing PubMed and PMC (PubMed Central) APIs. This crate provides easy-to-use interfaces for searching, fetching, and parsing biomedical research articles.
§Features
- PubMed API Integration: Search and fetch article metadata
- PMC Full Text: Retrieve and parse structured full-text articles
- Markdown Export: Convert PMC articles to well-formatted Markdown
- Response Caching: Reduce API quota usage with intelligent caching
- Async Support: Built on tokio for async/await support
- Error Handling: Comprehensive error types for robust error handling
- Type Safety: Strongly typed data structures for all API responses
§Quick Start
§Searching for Articles
use pubmed_client_rs::PubMedClient;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PubMedClient::new();
// Search for articles with query builder
let articles = client
.search()
.query("covid-19 treatment")
.open_access_only()
.published_after(2020)
.limit(10)
.search_and_fetch(&client)
.await?;
for article in articles {
println!("Title: {}", article.title);
let author_names: Vec<&str> = article.authors.iter().map(|a| a.full_name.as_str()).collect();
println!("Authors: {}", author_names.join(", "));
}
Ok(())
}§Fetching Full Text from PMC
use pubmed_client_rs::PmcClient;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
// Check if PMC full text is available
if let Some(pmcid) = client.check_pmc_availability("33515491").await? {
// Fetch structured full text
let full_text = client.fetch_full_text(&pmcid).await?;
println!("Title: {}", full_text.title);
println!("Sections: {}", full_text.sections.len());
println!("References: {}", full_text.references.len());
}
Ok(())
}§Converting PMC Articles to Markdown
use pubmed_client_rs::{PmcClient, PmcMarkdownConverter, HeadingStyle, ReferenceStyle};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
// Fetch and parse a PMC article
if let Ok(full_text) = client.fetch_full_text("PMC1234567").await {
// Create a markdown converter with custom configuration
let converter = PmcMarkdownConverter::new()
.with_include_metadata(true)
.with_include_toc(true)
.with_heading_style(HeadingStyle::ATX)
.with_reference_style(ReferenceStyle::Numbered);
// Convert to markdown
let markdown = converter.convert(&full_text);
println!("{}", markdown);
// Or save to file
std::fs::write("article.md", markdown)?;
}
Ok(())
}§Downloading and Extracting PMC Articles as TAR files
use pubmed_client_rs::PmcClient;
use std::path::Path;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
let output_dir = Path::new("./extracted_articles");
// Download and extract a PMC article as tar.gz from the OA API
let files = client.download_and_extract_tar("PMC7906746", output_dir).await?;
println!("Extracted {} files:", files.len());
for file in files {
println!(" - {}", file);
}
Ok(())
}§Extracting Figures with Captions
use pubmed_client_rs::PmcClient;
use std::path::Path;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = PmcClient::new();
let output_dir = Path::new("./extracted_articles");
// Extract figures and match them with captions from XML
let figures = client.extract_figures_with_captions("PMC7906746", output_dir).await?;
for figure in figures {
println!("Figure {}: {}", figure.figure.id, figure.figure.caption);
println!("File: {}", figure.extracted_file_path);
if let Some(dimensions) = figure.dimensions {
println!("Dimensions: {}x{}", dimensions.0, dimensions.1);
}
}
Ok(())
}§Response Caching
The library supports intelligent caching to reduce API quota usage and improve performance.
§Basic Caching
use pubmed_client_rs::{PmcClient, ClientConfig};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Enable default memory caching
let config = ClientConfig::new().with_cache();
let client = PmcClient::with_config(config);
// First fetch - hits the API
let article1 = client.fetch_full_text("PMC7906746").await?;
// Second fetch - served from cache
let article2 = client.fetch_full_text("PMC7906746").await?;
Ok(())
}§Advanced Caching Options
use pubmed_client_rs::{PmcClient, ClientConfig};
use pubmed_client_rs::cache::CacheConfig;
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Memory cache with custom settings
let cache_config = CacheConfig {
max_capacity: 5000,
time_to_live: Duration::from_secs(24 * 60 * 60), // 24 hours
};
let config = ClientConfig::new()
.with_cache_config(cache_config);
let client = PmcClient::with_config(config);
// Use the client normally - caching happens automatically
let article = client.fetch_full_text("PMC7906746").await?;
Ok(())
}§Hybrid Cache with Disk Persistence
#[cfg(not(target_arch = "wasm32"))]
{
use pubmed_client_rs::{PmcClient, ClientConfig};
use pubmed_client_rs::cache::CacheConfig;
use std::time::Duration;
use std::path::PathBuf;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Memory cache configuration
let cache_config = CacheConfig {
max_capacity: 1000,
time_to_live: Duration::from_secs(24 * 60 * 60),
};
let config = ClientConfig::new()
.with_cache_config(cache_config);
let client = PmcClient::with_config(config);
// Articles are cached in memory
let article = client.fetch_full_text("PMC7906746").await?;
Ok(())
}
}§Cache Management
use pubmed_client_rs::{PmcClient, ClientConfig};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ClientConfig::new().with_cache();
let client = PmcClient::with_config(config);
// Fetch some articles
client.fetch_full_text("PMC7906746").await?;
client.fetch_full_text("PMC10618641").await?;
// Check cache statistics
let count = client.cache_entry_count();
println!("Cached items: {}", count);
// Clear the cache when needed
client.clear_cache().await;
Ok(())
}Re-exports§
pub use common::Affiliation;pub use common::Author;pub use common::PmcId;pub use common::PubMedId;pub use config::ClientConfig;pub use error::PubMedError;pub use error::Result;pub use pmc::models::ExtractedFigure;pub use pmc::parse_pmc_xml;pub use pmc::ArticleSection;pub use pmc::Figure;pub use pmc::FundingInfo;pub use pmc::HeadingStyle;pub use pmc::JournalInfo;pub use pmc::MarkdownConfig;pub use pmc::OaSubsetInfo;pub use pmc::PmcClient;pub use pmc::PmcFullText;pub use pmc::PmcMarkdownConverter;pub use pmc::PmcTarClient;pub use pmc::Reference;pub use pmc::ReferenceStyle;pub use pmc::Table;pub use pubmed::parse_article_from_xml;pub use pubmed::ArticleSummary;pub use pubmed::ArticleType;pub use pubmed::CitationMatch;pub use pubmed::CitationMatchStatus;pub use pubmed::CitationMatches;pub use pubmed::CitationQuery;pub use pubmed::Citations;pub use pubmed::DatabaseCount;pub use pubmed::DatabaseInfo;pub use pubmed::FieldInfo;pub use pubmed::GlobalQueryResults;pub use pubmed::HistorySession;pub use pubmed::Language;pub use pubmed::LinkInfo;pub use pubmed::PmcLinks;pub use pubmed::PubMedArticle;pub use pubmed::PubMedClient;pub use pubmed::RelatedArticles;pub use pubmed::SearchQuery;pub use pubmed::SearchResult;pub use pubmed::SortOrder;pub use pubmed::SpellCheckResult;pub use pubmed::SpelledQuerySegment;pub use rate_limit::RateLimiter;pub use time::sleep;pub use time::Duration;pub use time::Instant;
Modules§
- cache
- common
- Common data structures and utilities shared between PubMed and PMC modules
- config
- error
- pmc
- PMC (PubMed Central) client for fetching full-text articles
- pubmed
- PubMed client for searching and fetching article metadata
- rate_
limit - Rate limiting implementation for NCBI API compliance
- retry
- Retry logic with exponential backoff for handling transient network failures
- time
- Internal time management module for cross-platform compatibility
Structs§
- Client
- Convenience client that combines both PubMed and PMC functionality