use serde::{Deserialize, Serialize};
pub use crate::common::{Affiliation, Author};
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct PubMedArticle {
pub pmid: String,
pub title: String,
pub authors: Vec<Author>,
pub author_count: u32,
pub journal: String,
pub pub_date: String,
pub doi: Option<String>,
pub pmc_id: Option<String>,
pub abstract_text: Option<String>,
pub article_types: Vec<String>,
pub mesh_headings: Option<Vec<MeshHeading>>,
pub keywords: Option<Vec<String>>,
pub chemical_list: Option<Vec<ChemicalConcept>>,
pub volume: Option<String>,
pub issue: Option<String>,
pub pages: Option<String>,
pub language: Option<String>,
pub journal_abbreviation: Option<String>,
pub issn: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct DatabaseInfo {
pub name: String,
pub menu_name: String,
pub description: String,
pub build: Option<String>,
pub count: Option<u64>,
pub last_update: Option<String>,
pub fields: Vec<FieldInfo>,
pub links: Vec<LinkInfo>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FieldInfo {
pub name: String,
pub full_name: String,
pub description: String,
pub term_count: Option<u64>,
pub is_date: bool,
pub is_numerical: bool,
pub single_token: bool,
pub hierarchy: bool,
pub is_hidden: bool,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LinkInfo {
pub name: String,
pub menu: String,
pub description: String,
pub target_db: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RelatedArticles {
pub source_pmids: Vec<u32>,
pub related_pmids: Vec<u32>,
pub link_type: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct PmcLinks {
pub source_pmids: Vec<u32>,
pub pmc_ids: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Citations {
pub source_pmids: Vec<u32>,
pub citing_pmids: Vec<u32>,
pub link_type: String,
}
#[derive(Debug, Clone)]
pub struct SearchResult {
pub pmids: Vec<String>,
pub total_count: usize,
pub webenv: Option<String>,
pub query_key: Option<String>,
pub query_translation: Option<String>,
}
impl SearchResult {
pub fn history_session(&self) -> Option<HistorySession> {
match (&self.webenv, &self.query_key) {
(Some(webenv), Some(query_key)) => Some(HistorySession {
webenv: webenv.clone(),
query_key: query_key.clone(),
}),
_ => None,
}
}
pub fn has_history(&self) -> bool {
self.webenv.is_some() && self.query_key.is_some()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HistorySession {
pub webenv: String,
pub query_key: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MeshQualifier {
pub qualifier_name: String,
pub qualifier_ui: String,
pub major_topic: bool,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MeshTerm {
pub descriptor_name: String,
pub descriptor_ui: String,
pub major_topic: bool,
pub qualifiers: Vec<MeshQualifier>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct SupplementalConcept {
pub name: String,
pub ui: String,
pub concept_type: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ChemicalConcept {
pub name: String,
pub registry_number: Option<String>,
pub ui: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MeshHeading {
pub mesh_terms: Vec<MeshTerm>,
pub supplemental_concepts: Vec<SupplementalConcept>,
}
impl PubMedArticle {
pub fn get_major_mesh_terms(&self) -> Vec<String> {
let mut major_terms = Vec::new();
if let Some(mesh_headings) = &self.mesh_headings {
for heading in mesh_headings {
for term in &heading.mesh_terms {
if term.major_topic {
major_terms.push(term.descriptor_name.clone());
}
}
}
}
major_terms
}
pub fn has_mesh_term(&self, term: &str) -> bool {
if let Some(mesh_headings) = &self.mesh_headings {
for heading in mesh_headings {
for mesh_term in &heading.mesh_terms {
if mesh_term.descriptor_name.eq_ignore_ascii_case(term) {
return true;
}
}
}
}
false
}
pub fn get_all_mesh_terms(&self) -> Vec<String> {
let mut terms = Vec::new();
if let Some(mesh_headings) = &self.mesh_headings {
for heading in mesh_headings {
for term in &heading.mesh_terms {
terms.push(term.descriptor_name.clone());
}
}
}
terms
}
pub fn get_corresponding_authors(&self) -> Vec<&Author> {
self.authors
.iter()
.filter(|author| author.is_corresponding)
.collect()
}
pub fn get_authors_by_institution(&self, institution: &str) -> Vec<&Author> {
let institution_lower = institution.to_lowercase();
self.authors
.iter()
.filter(|author| {
author.affiliations.iter().any(|affil| {
affil
.institution
.as_ref()
.is_some_and(|inst| inst.to_lowercase().contains(&institution_lower))
})
})
.collect()
}
pub fn get_author_countries(&self) -> Vec<String> {
use std::collections::HashSet;
let mut countries: HashSet<String> = HashSet::new();
for author in &self.authors {
for affiliation in &author.affiliations {
if let Some(country) = &affiliation.country {
countries.insert(country.clone());
}
}
}
countries.into_iter().collect()
}
pub fn get_authors_with_orcid(&self) -> Vec<&Author> {
self.authors
.iter()
.filter(|author| author.orcid.is_some())
.collect()
}
pub fn has_international_collaboration(&self) -> bool {
self.get_author_countries().len() > 1
}
pub fn mesh_term_similarity(&self, other: &PubMedArticle) -> f64 {
use std::collections::HashSet;
let terms1: HashSet<String> = self
.get_all_mesh_terms()
.into_iter()
.map(|t| t.to_lowercase())
.collect();
let terms2: HashSet<String> = other
.get_all_mesh_terms()
.into_iter()
.map(|t| t.to_lowercase())
.collect();
if terms1.is_empty() && terms2.is_empty() {
return 0.0;
}
let intersection = terms1.intersection(&terms2).count();
let union = terms1.union(&terms2).count();
if union == 0 {
0.0
} else {
intersection as f64 / union as f64
}
}
pub fn get_mesh_qualifiers(&self, term: &str) -> Vec<String> {
let mut qualifiers = Vec::new();
if let Some(mesh_headings) = &self.mesh_headings {
for heading in mesh_headings {
for mesh_term in &heading.mesh_terms {
if mesh_term.descriptor_name.eq_ignore_ascii_case(term) {
for qualifier in &mesh_term.qualifiers {
qualifiers.push(qualifier.qualifier_name.clone());
}
}
}
}
}
qualifiers
}
pub fn has_mesh_terms(&self) -> bool {
self.mesh_headings
.as_ref()
.map(|h| !h.is_empty())
.unwrap_or(false)
}
pub fn get_chemical_names(&self) -> Vec<String> {
self.chemical_list
.as_ref()
.map(|chemicals| chemicals.iter().map(|c| c.name.clone()).collect())
.unwrap_or_default()
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum SpelledQuerySegment {
Original(String),
Replaced(String),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct SpellCheckResult {
pub database: String,
pub query: String,
pub corrected_query: String,
pub spelled_query: Vec<SpelledQuerySegment>,
}
impl SpellCheckResult {
pub fn has_corrections(&self) -> bool {
self.query != self.corrected_query
}
pub fn replacements(&self) -> Vec<&str> {
self.spelled_query
.iter()
.filter_map(|segment| match segment {
SpelledQuerySegment::Replaced(s) => Some(s.as_str()),
SpelledQuerySegment::Original(_) => None,
})
.collect()
}
}
#[derive(Debug, Clone)]
pub struct CitationQuery {
pub journal: String,
pub year: String,
pub volume: String,
pub first_page: String,
pub author_name: String,
pub key: String,
}
impl CitationQuery {
pub fn new(
journal: &str,
year: &str,
volume: &str,
first_page: &str,
author_name: &str,
key: &str,
) -> Self {
Self {
journal: journal.to_string(),
year: year.to_string(),
volume: volume.to_string(),
first_page: first_page.to_string(),
author_name: author_name.to_string(),
key: key.to_string(),
}
}
pub(crate) fn to_bdata(&self) -> String {
format!(
"{}|{}|{}|{}|{}|{}|",
self.journal.replace(' ', "+"),
self.year,
self.volume,
self.first_page,
self.author_name.replace(' ', "+"),
self.key,
)
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum CitationMatchStatus {
Found,
NotFound,
Ambiguous,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct CitationMatch {
pub journal: String,
pub year: String,
pub volume: String,
pub first_page: String,
pub author_name: String,
pub key: String,
pub pmid: Option<String>,
pub status: CitationMatchStatus,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct CitationMatches {
pub matches: Vec<CitationMatch>,
}
impl CitationMatches {
pub fn found(&self) -> Vec<&CitationMatch> {
self.matches
.iter()
.filter(|m| m.status == CitationMatchStatus::Found)
.collect()
}
pub fn found_count(&self) -> usize {
self.matches
.iter()
.filter(|m| m.status == CitationMatchStatus::Found)
.count()
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct DatabaseCount {
pub db_name: String,
pub menu_name: String,
pub count: u64,
pub status: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct GlobalQueryResults {
pub term: String,
pub results: Vec<DatabaseCount>,
}
impl GlobalQueryResults {
pub fn non_zero(&self) -> Vec<&DatabaseCount> {
self.results.iter().filter(|r| r.count > 0).collect()
}
pub fn count_for(&self, db_name: &str) -> Option<u64> {
self.results
.iter()
.find(|r| r.db_name == db_name)
.map(|r| r.count)
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ArticleSummary {
pub pmid: String,
pub title: String,
pub authors: Vec<String>,
pub journal: String,
pub full_journal_name: String,
pub pub_date: String,
pub epub_date: String,
pub doi: Option<String>,
pub pmc_id: Option<String>,
pub volume: String,
pub issue: String,
pub pages: String,
pub languages: Vec<String>,
pub pub_types: Vec<String>,
pub issn: String,
pub essn: String,
pub sort_pub_date: String,
pub pmc_ref_count: u64,
pub record_status: String,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::common::format_author_name;
fn create_test_author() -> Author {
Author {
surname: Some("Doe".to_string()),
given_names: Some("John A".to_string()),
initials: Some("JA".to_string()),
suffix: None,
full_name: "John A Doe".to_string(),
affiliations: vec![
Affiliation {
id: None,
institution: Some("Harvard Medical School".to_string()),
department: Some("Department of Medicine".to_string()),
address: Some("Boston, MA".to_string()),
country: Some("USA".to_string()),
},
Affiliation {
id: None,
institution: Some("Massachusetts General Hospital".to_string()),
department: None,
address: Some("Boston, MA".to_string()),
country: Some("USA".to_string()),
},
],
orcid: Some("0000-0001-2345-6789".to_string()),
email: Some("john.doe@hms.harvard.edu".to_string()),
is_corresponding: true,
roles: vec![
"Conceptualization".to_string(),
"Writing - original draft".to_string(),
],
}
}
fn create_test_article_with_mesh() -> PubMedArticle {
PubMedArticle {
pmid: "12345".to_string(),
title: "Test Article".to_string(),
authors: vec![create_test_author()],
author_count: 1,
journal: "Test Journal".to_string(),
pub_date: "2023".to_string(),
doi: None,
pmc_id: None,
abstract_text: None,
article_types: vec![],
mesh_headings: Some(vec![
MeshHeading {
mesh_terms: vec![MeshTerm {
descriptor_name: "Diabetes Mellitus, Type 2".to_string(),
descriptor_ui: "D003924".to_string(),
major_topic: true,
qualifiers: vec![
MeshQualifier {
qualifier_name: "drug therapy".to_string(),
qualifier_ui: "Q000188".to_string(),
major_topic: false,
},
MeshQualifier {
qualifier_name: "genetics".to_string(),
qualifier_ui: "Q000235".to_string(),
major_topic: true,
},
],
}],
supplemental_concepts: vec![],
},
MeshHeading {
mesh_terms: vec![MeshTerm {
descriptor_name: "Hypertension".to_string(),
descriptor_ui: "D006973".to_string(),
major_topic: false,
qualifiers: vec![],
}],
supplemental_concepts: vec![],
},
]),
keywords: Some(vec!["diabetes".to_string(), "treatment".to_string()]),
chemical_list: Some(vec![ChemicalConcept {
name: "Metformin".to_string(),
registry_number: Some("657-24-9".to_string()),
ui: Some("D008687".to_string()),
}]),
volume: Some("45".to_string()),
issue: Some("3".to_string()),
pages: Some("123-130".to_string()),
language: Some("eng".to_string()),
journal_abbreviation: Some("Test J".to_string()),
issn: Some("1234-5678".to_string()),
}
}
#[test]
fn test_get_major_mesh_terms() {
let article = create_test_article_with_mesh();
let major_terms = article.get_major_mesh_terms();
assert_eq!(major_terms.len(), 1);
assert_eq!(major_terms[0], "Diabetes Mellitus, Type 2");
}
#[test]
fn test_has_mesh_term() {
let article = create_test_article_with_mesh();
assert!(article.has_mesh_term("Diabetes Mellitus, Type 2"));
assert!(article.has_mesh_term("DIABETES MELLITUS, TYPE 2")); assert!(article.has_mesh_term("Hypertension"));
assert!(!article.has_mesh_term("Cancer"));
}
#[test]
fn test_get_all_mesh_terms() {
let article = create_test_article_with_mesh();
let all_terms = article.get_all_mesh_terms();
assert_eq!(all_terms.len(), 2);
assert!(all_terms.contains(&"Diabetes Mellitus, Type 2".to_string()));
assert!(all_terms.contains(&"Hypertension".to_string()));
}
#[test]
fn test_mesh_term_similarity() {
let article1 = create_test_article_with_mesh();
let mut article2 = create_test_article_with_mesh();
let similarity = article1.mesh_term_similarity(&article2);
assert_eq!(similarity, 1.0);
article2.mesh_headings = Some(vec![MeshHeading {
mesh_terms: vec![
MeshTerm {
descriptor_name: "Diabetes Mellitus, Type 2".to_string(),
descriptor_ui: "D003924".to_string(),
major_topic: true,
qualifiers: vec![],
},
MeshTerm {
descriptor_name: "Obesity".to_string(),
descriptor_ui: "D009765".to_string(),
major_topic: false,
qualifiers: vec![],
},
],
supplemental_concepts: vec![],
}]);
let similarity = article1.mesh_term_similarity(&article2);
assert!(similarity > 0.0 && similarity < 1.0);
assert_eq!(similarity, 1.0 / 3.0);
let article3 = PubMedArticle {
pmid: "54321".to_string(),
title: "Test".to_string(),
authors: vec![],
author_count: 0,
journal: "Test".to_string(),
pub_date: "2023".to_string(),
doi: None,
pmc_id: None,
abstract_text: None,
article_types: vec![],
mesh_headings: None,
keywords: None,
chemical_list: None,
volume: None,
issue: None,
pages: None,
language: None,
journal_abbreviation: None,
issn: None,
};
assert_eq!(article1.mesh_term_similarity(&article3), 0.0);
}
#[test]
fn test_get_mesh_qualifiers() {
let article = create_test_article_with_mesh();
let qualifiers = article.get_mesh_qualifiers("Diabetes Mellitus, Type 2");
assert_eq!(qualifiers.len(), 2);
assert!(qualifiers.contains(&"drug therapy".to_string()));
assert!(qualifiers.contains(&"genetics".to_string()));
let qualifiers = article.get_mesh_qualifiers("Hypertension");
assert_eq!(qualifiers.len(), 0);
let qualifiers = article.get_mesh_qualifiers("Nonexistent Term");
assert_eq!(qualifiers.len(), 0);
}
#[test]
fn test_has_mesh_terms() {
let article = create_test_article_with_mesh();
assert!(article.has_mesh_terms());
let mut article_no_mesh = article.clone();
article_no_mesh.mesh_headings = None;
assert!(!article_no_mesh.has_mesh_terms());
let mut article_empty_mesh = article.clone();
article_empty_mesh.mesh_headings = Some(vec![]);
assert!(!article_empty_mesh.has_mesh_terms());
}
#[test]
fn test_get_chemical_names() {
let article = create_test_article_with_mesh();
let chemicals = article.get_chemical_names();
assert_eq!(chemicals.len(), 1);
assert_eq!(chemicals[0], "Metformin");
let mut article_no_chemicals = article.clone();
article_no_chemicals.chemical_list = None;
let chemicals = article_no_chemicals.get_chemical_names();
assert_eq!(chemicals.len(), 0);
}
#[test]
fn test_author_creation() {
let author = Author::new(Some("Smith".to_string()), Some("Jane".to_string()));
assert_eq!(author.surname, Some("Smith".to_string()));
assert_eq!(author.given_names, Some("Jane".to_string()));
assert_eq!(author.full_name, "Jane Smith");
assert!(!author.has_orcid());
assert!(!author.is_corresponding);
}
#[test]
fn test_author_affiliations() {
let author = create_test_author();
assert!(author.is_affiliated_with("Harvard"));
assert!(author.is_affiliated_with("Massachusetts General"));
assert!(!author.is_affiliated_with("Stanford"));
let primary = author.primary_affiliation().unwrap();
assert_eq!(
primary.institution,
Some("Harvard Medical School".to_string())
);
assert!(author.has_orcid());
assert!(author.is_corresponding);
}
#[test]
fn test_get_corresponding_authors() {
let article = create_test_article_with_mesh();
let corresponding = article.get_corresponding_authors();
assert_eq!(corresponding.len(), 1);
assert_eq!(corresponding[0].full_name, "John A Doe");
}
#[test]
fn test_get_authors_by_institution() {
let article = create_test_article_with_mesh();
let harvard_authors = article.get_authors_by_institution("Harvard");
assert_eq!(harvard_authors.len(), 1);
let stanford_authors = article.get_authors_by_institution("Stanford");
assert_eq!(stanford_authors.len(), 0);
}
#[test]
fn test_get_author_countries() {
let article = create_test_article_with_mesh();
let countries = article.get_author_countries();
assert_eq!(countries.len(), 1);
assert!(countries.contains(&"USA".to_string()));
}
#[test]
fn test_international_collaboration() {
let article = create_test_article_with_mesh();
assert!(!article.has_international_collaboration());
let mut international_article = article.clone();
let mut uk_author = create_test_author();
uk_author.affiliations[0].country = Some("UK".to_string());
international_article.authors.push(uk_author);
international_article.author_count = 2;
assert!(international_article.has_international_collaboration());
}
#[test]
fn test_get_authors_with_orcid() {
let article = create_test_article_with_mesh();
let authors_with_orcid = article.get_authors_with_orcid();
assert_eq!(authors_with_orcid.len(), 1);
assert_eq!(
authors_with_orcid[0].orcid,
Some("0000-0001-2345-6789".to_string())
);
}
#[test]
fn test_format_author_name() {
assert_eq!(
format_author_name(&Some("Smith".to_string()), &Some("John".to_string()), &None),
"John Smith"
);
assert_eq!(
format_author_name(&Some("Doe".to_string()), &None, &Some("J".to_string())),
"J Doe"
);
assert_eq!(
format_author_name(&Some("Johnson".to_string()), &None, &None),
"Johnson"
);
assert_eq!(
format_author_name(&None, &Some("Jane".to_string()), &None),
"Jane"
);
assert_eq!(format_author_name(&None, &None, &None), "Unknown Author");
}
#[test]
fn test_spell_check_result_has_corrections() {
let result = SpellCheckResult {
database: "pubmed".to_string(),
query: "asthmaa".to_string(),
corrected_query: "asthma".to_string(),
spelled_query: vec![SpelledQuerySegment::Replaced("asthma".to_string())],
};
assert!(result.has_corrections());
let no_correction = SpellCheckResult {
database: "pubmed".to_string(),
query: "asthma".to_string(),
corrected_query: "asthma".to_string(),
spelled_query: vec![SpelledQuerySegment::Original("asthma".to_string())],
};
assert!(!no_correction.has_corrections());
}
#[test]
fn test_spell_check_result_replacements() {
let result = SpellCheckResult {
database: "pubmed".to_string(),
query: "asthmaa OR alergies".to_string(),
corrected_query: "asthma or allergies".to_string(),
spelled_query: vec![
SpelledQuerySegment::Original("".to_string()),
SpelledQuerySegment::Replaced("asthma".to_string()),
SpelledQuerySegment::Original(" OR ".to_string()),
SpelledQuerySegment::Replaced("allergies".to_string()),
],
};
let replacements = result.replacements();
assert_eq!(replacements.len(), 2);
assert_eq!(replacements[0], "asthma");
assert_eq!(replacements[1], "allergies");
}
#[test]
fn test_bibliographic_fields_on_article() {
let article = create_test_article_with_mesh();
assert_eq!(article.volume, Some("45".to_string()));
assert_eq!(article.issue, Some("3".to_string()));
assert_eq!(article.pages, Some("123-130".to_string()));
assert_eq!(article.language, Some("eng".to_string()));
assert_eq!(article.journal_abbreviation, Some("Test J".to_string()));
assert_eq!(article.issn, Some("1234-5678".to_string()));
}
}