Skip to main content

Crate pubmed_client

Crate pubmed_client 

Source
Expand description

§PubMed Client

A Rust client library for accessing PubMed and PMC (PubMed Central) APIs. This crate provides easy-to-use interfaces for searching, fetching, and parsing biomedical research articles.

§Features

  • PubMed API Integration: Search and fetch article metadata
  • PMC Full Text: Retrieve and parse structured full-text articles
  • Markdown Export: Convert PMC articles to well-formatted Markdown
  • Response Caching: Reduce API quota usage with intelligent caching
  • Async Support: Built on tokio for async/await support
  • Error Handling: Comprehensive error types for robust error handling
  • Type Safety: Strongly typed data structures for all API responses

§Quick Start

§Searching for Articles

use pubmed_client_rs::PubMedClient;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PubMedClient::new();

    // Search for articles with query builder
    let articles = client
        .search()
        .query("covid-19 treatment")
        .open_access_only()
        .published_after(2020)
        .limit(10)
        .search_and_fetch(&client)
        .await?;

    for article in articles {
        println!("Title: {}", article.title);
        let author_names: Vec<&str> = article.authors.iter().map(|a| a.full_name.as_str()).collect();
        println!("Authors: {}", author_names.join(", "));
    }

    Ok(())
}

§Fetching Full Text from PMC

use pubmed_client_rs::PmcClient;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();

    // Check if PMC full text is available
    if let Some(pmcid) = client.check_pmc_availability("33515491").await? {
        // Fetch structured full text
        let full_text = client.fetch_full_text(&pmcid).await?;

        println!("Title: {}", full_text.title);
        println!("Sections: {}", full_text.sections.len());
        println!("References: {}", full_text.references.len());
    }

    Ok(())
}

§Converting PMC Articles to Markdown

use pubmed_client_rs::{PmcClient, PmcMarkdownConverter, HeadingStyle, ReferenceStyle};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();

    // Fetch and parse a PMC article
    if let Ok(full_text) = client.fetch_full_text("PMC1234567").await {
        // Create a markdown converter with custom configuration
        let converter = PmcMarkdownConverter::new()
            .with_include_metadata(true)
            .with_include_toc(true)
            .with_heading_style(HeadingStyle::ATX)
            .with_reference_style(ReferenceStyle::Numbered);

        // Convert to markdown
        let markdown = converter.convert(&full_text);
        println!("{}", markdown);

        // Or save to file
        std::fs::write("article.md", markdown)?;
    }

    Ok(())
}

§Downloading and Extracting PMC Articles as TAR files

use pubmed_client_rs::PmcClient;
use std::path::Path;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();
    let output_dir = Path::new("./extracted_articles");

    // Download and extract a PMC article as tar.gz from the OA API
    let files = client.download_and_extract_tar("PMC7906746", output_dir).await?;

    println!("Extracted {} files:", files.len());
    for file in files {
        println!("  - {}", file);
    }

    Ok(())
}

§Extracting Figures with Captions

use pubmed_client_rs::PmcClient;
use std::path::Path;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = PmcClient::new();
    let output_dir = Path::new("./extracted_articles");

    // Extract figures and match them with captions from XML
    let figures = client.extract_figures_with_captions("PMC7906746", output_dir).await?;

    for figure in figures {
        println!("Figure {}: {}", figure.figure.id, figure.figure.caption);
        println!("File: {}", figure.extracted_file_path);
        if let Some(dimensions) = figure.dimensions {
            println!("Dimensions: {}x{}", dimensions.0, dimensions.1);
        }
    }

    Ok(())
}

§Response Caching

The library supports intelligent caching to reduce API quota usage and improve performance.

§Basic Caching

use pubmed_client_rs::{PmcClient, ClientConfig};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Enable default memory caching
    let config = ClientConfig::new().with_cache();
    let client = PmcClient::with_config(config);

    // First fetch - hits the API
    let article1 = client.fetch_full_text("PMC7906746").await?;

    // Second fetch - served from cache
    let article2 = client.fetch_full_text("PMC7906746").await?;

    Ok(())
}

§Advanced Caching Options

use pubmed_client_rs::{PmcClient, ClientConfig};
use pubmed_client_rs::cache::CacheConfig;
use std::time::Duration;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Memory cache with custom settings
    let cache_config = CacheConfig {
        max_capacity: 5000,
        time_to_live: Duration::from_secs(24 * 60 * 60), // 24 hours
    };

    let config = ClientConfig::new()
        .with_cache_config(cache_config);
    let client = PmcClient::with_config(config);

    // Use the client normally - caching happens automatically
    let article = client.fetch_full_text("PMC7906746").await?;

    Ok(())
}

§Hybrid Cache with Disk Persistence

#[cfg(not(target_arch = "wasm32"))]
{
use pubmed_client_rs::{PmcClient, ClientConfig};
use pubmed_client_rs::cache::CacheConfig;
use std::time::Duration;
use std::path::PathBuf;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Memory cache configuration
    let cache_config = CacheConfig {
        max_capacity: 1000,
        time_to_live: Duration::from_secs(24 * 60 * 60),
    };

    let config = ClientConfig::new()
        .with_cache_config(cache_config);
    let client = PmcClient::with_config(config);

    // Articles are cached in memory
    let article = client.fetch_full_text("PMC7906746").await?;

    Ok(())
}
}

§Cache Management

use pubmed_client_rs::{PmcClient, ClientConfig};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let config = ClientConfig::new().with_cache();
    let client = PmcClient::with_config(config);

    // Fetch some articles
    client.fetch_full_text("PMC7906746").await?;
    client.fetch_full_text("PMC10618641").await?;

    // Check cache statistics
    let count = client.cache_entry_count();
    println!("Cached items: {}", count);

    // Clear the cache when needed
    client.clear_cache().await;

    Ok(())
}

Re-exports§

pub use common::Affiliation;
pub use common::Author;
pub use common::PmcId;
pub use common::PubMedId;
pub use config::ClientConfig;
pub use error::PubMedError;
pub use error::Result;
pub use pmc::models::ExtractedFigure;
pub use pmc::parse_pmc_xml;
pub use pmc::ArticleSection;
pub use pmc::Figure;
pub use pmc::FundingInfo;
pub use pmc::HeadingStyle;
pub use pmc::JournalInfo;
pub use pmc::MarkdownConfig;
pub use pmc::OaSubsetInfo;
pub use pmc::PmcClient;
pub use pmc::PmcFullText;
pub use pmc::PmcMarkdownConverter;
pub use pmc::PmcTarClient;
pub use pmc::Reference;
pub use pmc::ReferenceStyle;
pub use pmc::Table;
pub use pubmed::parse_article_from_xml;
pub use pubmed::ArticleSummary;
pub use pubmed::ArticleType;
pub use pubmed::CitationMatch;
pub use pubmed::CitationMatchStatus;
pub use pubmed::CitationMatches;
pub use pubmed::CitationQuery;
pub use pubmed::Citations;
pub use pubmed::DatabaseCount;
pub use pubmed::DatabaseInfo;
pub use pubmed::FieldInfo;
pub use pubmed::GlobalQueryResults;
pub use pubmed::HistorySession;
pub use pubmed::Language;
pub use pubmed::LinkInfo;
pub use pubmed::PubMedArticle;
pub use pubmed::PubMedClient;
pub use pubmed::RelatedArticles;
pub use pubmed::SearchQuery;
pub use pubmed::SearchResult;
pub use pubmed::SortOrder;
pub use pubmed::SpellCheckResult;
pub use pubmed::SpelledQuerySegment;
pub use rate_limit::RateLimiter;
pub use time::sleep;
pub use time::Duration;
pub use time::Instant;

Modules§

cache
common
Common data structures and utilities shared between PubMed and PMC modules
config
error
pmc
PMC (PubMed Central) client for fetching full-text articles
pubmed
PubMed client for searching and fetching article metadata
rate_limit
Rate limiting implementation for NCBI API compliance
retry
Retry logic with exponential backoff for handling transient network failures
time
Internal time management module for cross-platform compatibility

Structs§

Client
Convenience client that combines both PubMed and PMC functionality