use serde::{Deserialize, Serialize};
pub use crate::common::{Affiliation, Author};
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct JournalInfo {
pub title: String,
pub abbreviation: Option<String>,
pub issn_print: Option<String>,
pub issn_electronic: Option<String>,
pub publisher: Option<String>,
pub volume: Option<String>,
pub issue: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FundingInfo {
pub source: String,
pub award_id: Option<String>,
pub statement: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Figure {
pub id: String,
pub label: Option<String>,
pub caption: String,
pub alt_text: Option<String>,
pub fig_type: Option<String>,
pub file_path: Option<String>,
pub file_name: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ExtractedFigure {
pub figure: Figure,
pub extracted_file_path: String,
pub file_size: Option<u64>,
pub dimensions: Option<(u32, u32)>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Table {
pub id: String,
pub label: Option<String>,
pub caption: String,
pub footnotes: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct SupplementaryMaterial {
pub id: String,
pub content_type: Option<String>,
pub title: Option<String>,
pub description: Option<String>,
pub file_url: Option<String>,
pub file_type: Option<String>,
pub position: Option<String>,
}
impl SupplementaryMaterial {
pub fn new(id: String) -> Self {
Self {
id,
content_type: None,
title: None,
description: None,
file_url: None,
file_type: None,
position: None,
}
}
pub fn is_tar_file(&self) -> bool {
if let Some(url) = &self.file_url {
url.ends_with(".tar")
|| url.ends_with(".tar.gz")
|| url.ends_with(".tar.bz2")
|| url.ends_with(".tgz")
} else {
false
}
}
pub fn get_file_extension(&self) -> Option<String> {
if let Some(url) = &self.file_url {
if let Some(filename) = url.split('/').next_back() {
if let Some(dot_index) = filename.rfind('.') {
return Some(filename[dot_index + 1..].to_lowercase());
}
}
}
None
}
pub fn is_archive(&self) -> bool {
if let Some(ext) = self.get_file_extension() {
matches!(
ext.as_str(),
"zip" | "tar" | "gz" | "bz2" | "tgz" | "rar" | "7z"
)
} else {
false
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct PmcFullText {
pub pmcid: String,
pub pmid: Option<String>,
pub title: String,
pub authors: Vec<Author>,
pub journal: JournalInfo,
pub pub_date: String,
pub doi: Option<String>,
pub sections: Vec<ArticleSection>,
pub references: Vec<Reference>,
pub article_type: Option<String>,
pub keywords: Vec<String>,
pub funding: Vec<FundingInfo>,
pub conflict_of_interest: Option<String>,
pub acknowledgments: Option<String>,
pub data_availability: Option<String>,
pub supplementary_materials: Vec<SupplementaryMaterial>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ArticleSection {
pub section_type: String,
pub title: Option<String>,
pub content: String,
pub subsections: Vec<ArticleSection>,
pub id: Option<String>,
pub figures: Vec<Figure>,
pub tables: Vec<Table>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Reference {
pub id: String,
pub title: Option<String>,
pub authors: Vec<Author>,
pub journal: Option<String>,
pub year: Option<String>,
pub volume: Option<String>,
pub issue: Option<String>,
pub pages: Option<String>,
pub pmid: Option<String>,
pub doi: Option<String>,
pub ref_type: Option<String>,
}
impl PmcFullText {
pub fn new(pmcid: String) -> Self {
Self {
pmcid,
pmid: None,
title: String::new(),
authors: Vec::new(),
journal: JournalInfo {
title: String::new(),
abbreviation: None,
issn_print: None,
issn_electronic: None,
publisher: None,
volume: None,
issue: None,
},
pub_date: String::new(),
doi: None,
sections: Vec::new(),
references: Vec::new(),
article_type: None,
keywords: Vec::new(),
funding: Vec::new(),
conflict_of_interest: None,
acknowledgments: None,
data_availability: None,
supplementary_materials: Vec::new(),
}
}
pub fn has_content(&self) -> bool {
!self.sections.is_empty() || !self.title.is_empty()
}
pub fn total_sections(&self) -> usize {
fn count_sections(sections: &[ArticleSection]) -> usize {
sections.iter().fold(0, |acc, section| {
acc + 1 + count_sections(§ion.subsections)
})
}
count_sections(&self.sections)
}
pub fn get_full_text(&self) -> String {
fn collect_content(sections: &[ArticleSection]) -> String {
sections
.iter()
.map(|section| {
let mut content = section.content.clone();
if !section.subsections.is_empty() {
content.push('\n');
content.push_str(&collect_content(§ion.subsections));
}
content
})
.collect::<Vec<_>>()
.join("\n\n")
}
collect_content(&self.sections)
}
pub fn get_tar_files(&self) -> Vec<&SupplementaryMaterial> {
self.supplementary_materials
.iter()
.filter(|material| material.is_tar_file())
.collect()
}
pub fn get_archive_files(&self) -> Vec<&SupplementaryMaterial> {
self.supplementary_materials
.iter()
.filter(|material| material.is_archive())
.collect()
}
pub fn has_supplementary_materials(&self) -> bool {
!self.supplementary_materials.is_empty()
}
pub fn get_supplementary_materials_by_type(
&self,
content_type: &str,
) -> Vec<&SupplementaryMaterial> {
self.supplementary_materials
.iter()
.filter(|material| {
material
.content_type
.as_ref()
.is_some_and(|ct| ct == content_type)
})
.collect()
}
}
impl ArticleSection {
pub fn new(section_type: String, content: String) -> Self {
Self {
section_type,
title: None,
content,
subsections: Vec::new(),
id: None,
figures: Vec::new(),
tables: Vec::new(),
}
}
pub fn with_title(section_type: String, title: String, content: String) -> Self {
Self {
section_type,
title: Some(title),
content,
subsections: Vec::new(),
id: None,
figures: Vec::new(),
tables: Vec::new(),
}
}
pub fn with_id(section_type: String, content: String, id: String) -> Self {
Self {
section_type,
title: None,
content,
subsections: Vec::new(),
id: Some(id),
figures: Vec::new(),
tables: Vec::new(),
}
}
pub fn add_subsection(&mut self, subsection: ArticleSection) {
self.subsections.push(subsection);
}
pub fn has_content(&self) -> bool {
!self.content.trim().is_empty() || !self.subsections.is_empty()
}
}
impl Reference {
pub fn new(id: String) -> Self {
Self {
id,
title: None,
authors: Vec::new(),
journal: None,
year: None,
volume: None,
issue: None,
pages: None,
pmid: None,
doi: None,
ref_type: None,
}
}
pub fn basic(id: String, title: Option<String>, journal: Option<String>) -> Self {
Self {
id,
title,
authors: Vec::new(),
journal,
year: None,
volume: None,
issue: None,
pages: None,
pmid: None,
doi: None,
ref_type: None,
}
}
pub fn format_citation(&self) -> String {
let mut parts = Vec::new();
if !self.authors.is_empty() {
let author_names: Vec<String> = self
.authors
.iter()
.map(|author| author.full_name.clone())
.filter(|name| !name.trim().is_empty())
.collect();
if !author_names.is_empty() {
parts.push(author_names.join(", "));
}
}
if let Some(title) = &self.title {
if !title.trim().is_empty() {
parts.push(title.clone());
}
}
if let Some(journal) = &self.journal {
if !journal.trim().is_empty() {
let mut journal_part = journal.clone();
if let Some(year) = &self.year {
if !year.trim().is_empty() && year != "n.d." {
journal_part.push_str(&format!(" ({year})"));
}
}
if let Some(volume) = &self.volume {
if !volume.trim().is_empty() {
journal_part.push_str(&format!(" {volume}"));
if let Some(issue) = &self.issue {
if !issue.trim().is_empty() {
journal_part.push_str(&format!("({issue})"));
}
}
}
}
if let Some(pages) = &self.pages {
if !pages.trim().is_empty() {
journal_part.push_str(&format!(": {pages}"));
}
}
parts.push(journal_part);
}
}
let result = parts.join(". ");
if result.trim().is_empty() {
let id = &self.id;
format!("Reference {id}")
} else {
result
}
}
}
impl JournalInfo {
pub fn new(title: String) -> Self {
Self {
title,
abbreviation: None,
issn_print: None,
issn_electronic: None,
publisher: None,
volume: None,
issue: None,
}
}
}
impl FundingInfo {
pub fn new(source: String) -> Self {
Self {
source,
award_id: None,
statement: None,
}
}
}
impl Figure {
pub fn new(id: String, caption: String) -> Self {
Self {
id,
label: None,
caption,
alt_text: None,
fig_type: None,
file_path: None,
file_name: None,
}
}
}
impl Table {
pub fn new(id: String, caption: String) -> Self {
Self {
id,
label: None,
caption,
footnotes: Vec::new(),
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct OaSubsetInfo {
pub pmcid: String,
pub is_oa_subset: bool,
pub citation: Option<String>,
pub license: Option<String>,
pub retracted: bool,
pub download_link: Option<String>,
pub download_format: Option<String>,
pub updated: Option<String>,
pub error_code: Option<String>,
pub error_message: Option<String>,
}
impl OaSubsetInfo {
pub fn available(pmcid: String) -> Self {
Self {
pmcid,
is_oa_subset: true,
citation: None,
license: None,
retracted: false,
download_link: None,
download_format: None,
updated: None,
error_code: None,
error_message: None,
}
}
pub fn not_available(pmcid: String, error_code: String, error_message: String) -> Self {
Self {
pmcid,
is_oa_subset: false,
citation: None,
license: None,
retracted: false,
download_link: None,
download_format: None,
updated: None,
error_code: Some(error_code),
error_message: Some(error_message),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pmc_full_text_creation() {
let article = PmcFullText::new("PMC1234567".to_string());
assert_eq!(article.pmcid, "PMC1234567");
assert!(!article.has_content());
assert_eq!(article.total_sections(), 0);
}
#[test]
fn test_article_section_creation() {
let mut section =
ArticleSection::new("abstract".to_string(), "This is an abstract.".to_string());
assert!(section.has_content());
assert_eq!(section.subsections.len(), 0);
let subsection = ArticleSection::new("method".to_string(), "Method details.".to_string());
section.add_subsection(subsection);
assert_eq!(section.subsections.len(), 1);
}
#[test]
fn test_reference_formatting() {
let mut reference = Reference::new("ref1".to_string());
reference.authors = vec![
Author::from_full_name("Smith, J.".to_string()),
Author::from_full_name("Doe, A.".to_string()),
];
reference.title = Some("Test Article".to_string());
reference.journal = Some("Test Journal".to_string());
reference.year = Some("2023".to_string());
let citation = reference.format_citation();
assert!(citation.contains("Smith, J., Doe, A."));
assert!(citation.contains("Test Article"));
assert!(citation.contains("Test Journal (2023)"));
}
#[test]
fn test_full_text_content() {
let mut article = PmcFullText::new("PMC1234567".to_string());
let section1 = ArticleSection::new("abstract".to_string(), "Abstract content.".to_string());
let section2 = ArticleSection::new(
"introduction".to_string(),
"Introduction content.".to_string(),
);
article.sections.push(section1);
article.sections.push(section2);
assert!(article.has_content());
assert_eq!(article.total_sections(), 2);
let full_text = article.get_full_text();
assert!(full_text.contains("Abstract content."));
assert!(full_text.contains("Introduction content."));
}
#[test]
fn test_supplementary_material_creation() {
let mut material = SupplementaryMaterial::new("supp1".to_string());
material.file_url = Some("https://example.com/data.tar.gz".to_string());
material.content_type = Some("local-data".to_string());
material.title = Some("Supplementary Data".to_string());
assert_eq!(material.id, "supp1");
assert!(material.is_tar_file());
assert!(material.is_archive());
assert_eq!(material.get_file_extension(), Some("gz".to_string()));
}
#[test]
fn test_tar_file_detection() {
let mut material = SupplementaryMaterial::new("tar1".to_string());
material.file_url = Some("data.tar".to_string());
assert!(material.is_tar_file());
material.file_url = Some("data.tar.gz".to_string());
assert!(material.is_tar_file());
material.file_url = Some("data.tar.bz2".to_string());
assert!(material.is_tar_file());
material.file_url = Some("data.tgz".to_string());
assert!(material.is_tar_file());
material.file_url = Some("data.zip".to_string());
assert!(!material.is_tar_file());
assert!(material.is_archive());
material.file_url = Some("data.pdf".to_string());
assert!(!material.is_tar_file());
assert!(!material.is_archive());
}
#[test]
fn test_pmc_full_text_with_supplementary_materials() {
let mut article = PmcFullText::new("PMC1234567".to_string());
let mut tar_material = SupplementaryMaterial::new("supp1".to_string());
tar_material.file_url = Some("dataset.tar.gz".to_string());
tar_material.content_type = Some("local-data".to_string());
let mut zip_material = SupplementaryMaterial::new("supp2".to_string());
zip_material.file_url = Some("figures.zip".to_string());
zip_material.content_type = Some("local-data".to_string());
article.supplementary_materials.push(tar_material);
article.supplementary_materials.push(zip_material);
assert!(article.has_supplementary_materials());
assert_eq!(article.supplementary_materials.len(), 2);
let tar_files = article.get_tar_files();
assert_eq!(tar_files.len(), 1);
assert_eq!(tar_files[0].id, "supp1");
let archive_files = article.get_archive_files();
assert_eq!(archive_files.len(), 2);
let local_data_materials = article.get_supplementary_materials_by_type("local-data");
assert_eq!(local_data_materials.len(), 2);
}
}