use super::extractors::{extract_country_from_text, extract_email_from_text, format_author_name};
use super::xml_types::*;
use crate::error::{PubMedError, Result};
use crate::pubmed::models::{
Affiliation, Author, ChemicalConcept, MeshHeading, MeshQualifier, MeshTerm, PubMedArticle,
};
use tracing::debug;
impl PubmedArticleXml {
fn extract_article_id(pubmed_data: &Option<PubmedData>, id_type: &str) -> Option<String> {
pubmed_data
.as_ref()?
.article_id_list
.as_ref()?
.ids
.iter()
.find(|id| id.id_type.eq_ignore_ascii_case(id_type))
.map(|id| id.value.clone())
}
pub(super) fn into_article(self, pmid: &str) -> Result<PubMedArticle> {
let medline = self.medline_citation;
let article = medline.article;
let pmc_id = Self::extract_article_id(&self.pubmed_data, "pmc");
let title = article
.article_title
.ok_or_else(|| PubMedError::ArticleNotFound {
pmid: pmid.to_string(),
})?;
let authors = article
.author_list
.map_or(Vec::new(), |list| list.into_authors());
let journal = article
.journal
.as_ref()
.and_then(|j| j.title.clone())
.unwrap_or_default();
let pub_date = article.journal.as_ref().map_or(String::new(), |j| {
j.journal_issue
.as_ref()
.and_then(|ji| ji.pub_date.as_ref())
.map_or(String::new(), |pd| pd.to_string())
});
let volume = article
.journal
.as_ref()
.and_then(|j| j.journal_issue.as_ref())
.and_then(|ji| ji.volume.clone());
let issue = article
.journal
.as_ref()
.and_then(|j| j.journal_issue.as_ref())
.and_then(|ji| ji.issue.clone());
let pages = article
.pagination
.as_ref()
.and_then(|p| p.medline_pgn.clone());
let language = article.language.clone();
let journal_abbreviation = article
.journal
.as_ref()
.and_then(|j| j.iso_abbreviation.clone());
let issn = article
.journal
.as_ref()
.and_then(|j| j.issn.as_ref())
.map(|i| i.value.clone());
let doi = article
.elocation_ids
.and_then(|ids| {
ids.into_iter()
.find(|id| id.eid_type.as_deref() == Some("doi"))
.map(|id| id.value)
})
.or_else(|| Self::extract_article_id(&self.pubmed_data, "doi"));
let abstract_text = article.abstract_section.and_then(|s| s.to_string_opt());
let article_types = article
.publication_type_list
.map_or(Vec::new(), |list| list.into_types());
let mesh_headings = medline
.mesh_heading_list
.and_then(|list| list.into_headings());
let keywords = medline.keyword_list.and_then(|list| list.into_keywords());
let chemical_list = medline.chemical_list.and_then(|list| list.into_chemicals());
let author_count = authors.len() as u32;
debug!(
authors_parsed = authors.len(),
has_abstract = abstract_text.is_some(),
journal = %journal,
mesh_terms_count = mesh_headings.as_ref().map_or(0, |h| h.len()),
keywords_count = keywords.as_ref().map_or(0, |k| k.len()),
chemicals_count = chemical_list.as_ref().map_or(0, |c| c.len()),
"Completed XML parsing"
);
Ok(PubMedArticle {
pmid: pmid.to_string(),
title,
authors,
author_count,
journal,
pub_date,
doi,
pmc_id,
abstract_text,
article_types,
mesh_headings,
keywords,
chemical_list,
volume,
issue,
pages,
language,
journal_abbreviation,
issn,
})
}
}
impl AuthorList {
pub(super) fn into_authors(self) -> Vec<Author> {
self.authors
.unwrap_or_default()
.into_iter()
.filter_map(|a| a.into_author())
.collect()
}
}
impl AuthorXml {
pub(super) fn into_author(self) -> Option<Author> {
if let Some(collective_name) = self.collective_name {
return Some(Author {
surname: None,
given_names: None,
initials: None,
suffix: None,
full_name: collective_name,
affiliations: Vec::new(),
orcid: None,
email: None,
is_corresponding: false,
roles: Vec::new(),
});
}
let full_name = format_author_name(&self.last_name, &self.fore_name, &self.initials);
if full_name.trim().is_empty() || full_name == "Unknown Author" {
None
} else {
let (affiliations, email) = self
.affiliation_info
.unwrap_or_default()
.into_iter()
.filter_map(|info| info.affiliation)
.fold((Vec::new(), None), |(mut affs, mut email_acc), text| {
let email = extract_email_from_text(&text);
let country = extract_country_from_text(&text);
if email_acc.is_none() && email.is_some() {
email_acc = email.clone();
}
affs.push(Affiliation {
id: None,
institution: if text.is_empty() {
None
} else {
Some(text.to_string())
},
department: None,
address: None,
country,
});
(affs, email_acc)
});
let orcid = self.identifiers.and_then(|ids| {
ids.into_iter()
.find(|id| id.source.as_deref() == Some("ORCID"))
.map(|id| id.value)
});
Some(Author {
surname: self.last_name,
given_names: self.fore_name,
initials: self.initials,
suffix: self.suffix,
full_name,
affiliations,
orcid,
email,
is_corresponding: false,
roles: Vec::new(),
})
}
}
}
impl PublicationTypeList {
pub(super) fn into_types(self) -> Vec<String> {
self.publication_types
.unwrap_or_default()
.into_iter()
.map(|pt| pt.to_string())
.collect()
}
}
impl MeshHeadingList {
pub(super) fn into_headings(self) -> Option<Vec<MeshHeading>> {
self.mesh_headings.and_then(|headings| {
let result: Vec<MeshHeading> = headings
.into_iter()
.filter_map(|h| h.into_heading())
.collect();
if result.is_empty() {
None
} else {
Some(result)
}
})
}
}
impl MeshHeadingXml {
pub(super) fn into_heading(self) -> Option<MeshHeading> {
self.descriptor_name.map(|descriptor| {
let qualifiers = self
.qualifier_names
.unwrap_or_default()
.into_iter()
.map(|q| q.into_qualifier())
.collect();
MeshHeading {
mesh_terms: vec![MeshTerm {
descriptor_name: descriptor.text,
descriptor_ui: descriptor.ui.unwrap_or_default(),
major_topic: descriptor.major_topic_yn,
qualifiers,
}],
supplemental_concepts: Vec::new(),
}
})
}
}
impl QualifierName {
pub(super) fn into_qualifier(self) -> MeshQualifier {
MeshQualifier {
qualifier_name: self.text,
qualifier_ui: self.ui.unwrap_or_default(),
major_topic: self.major_topic_yn,
}
}
}
impl ChemicalList {
pub(super) fn into_chemicals(self) -> Option<Vec<ChemicalConcept>> {
self.chemicals.and_then(|chemicals| {
let result: Vec<ChemicalConcept> = chemicals
.into_iter()
.filter_map(|c| c.into_chemical())
.collect();
if result.is_empty() {
None
} else {
Some(result)
}
})
}
}
impl ChemicalXml {
pub(super) fn into_chemical(self) -> Option<ChemicalConcept> {
self.name_of_substance.map(|name| ChemicalConcept {
name: name.text,
registry_number: self.registry_number.filter(|r| !r.is_empty() && r != "0"),
ui: name.ui,
})
}
}
impl KeywordList {
pub(super) fn into_keywords(self) -> Option<Vec<String>> {
self.keywords.and_then(|keywords| {
let result: Vec<String> = keywords.into_iter().map(|k| k.to_string()).collect();
if result.is_empty() {
None
} else {
Some(result)
}
})
}
}