use std::collections::HashMap;
use serde::Serialize;
use crate::pmc::models::{
ArticleSection, Author, Figure, FundingInfo, PmcFullText, Reference, Table,
};
static HTML_ENTITIES: &[(&str, &str)] = &[
("&", "&"),
("<", "<"),
(">", ">"),
(""", "\""),
("'", "'"),
("'", "'"),
("’", "'"), ("‘", "'"), ("“", "\""), ("”", "\""), ("’", "'"), ("‘", "'"), ("”", "\""), ("“", "\""), ("–", "-"), ("—", "--"), (" ", " "), (" ", " "), ("–", "-"), ("—", "--"), ("…", "..."), ("…", "..."), ("™", "(TM)"), ("®", "(R)"), ("©", "(C)"), ("™", "(TM)"), ("®", "(R)"), ("©", "(C)"), ("€", "EUR"), ("£", "GBP"), ("¥", "JPY"), ("−", "-"), ("×", "x"), ("÷", "/"), ("±", "±"), ("×", "x"), ("÷", "/"), ("±", "±"), ("α", "α"), ("β", "β"), ("γ", "γ"), ("δ", "δ"), ("ε", "ε"), ("μ", "μ"), ("π", "Ï€"), ("σ", "σ"), ("α", "α"), ("β", "β"), ("γ", "γ"), ("δ", "δ"), ("ε", "ε"), ("μ", "μ"), ("π", "Ï€"), ("σ", "σ"), ];
#[derive(Debug, Clone, Serialize)]
struct ArticleMetadata {
title: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
authors: Vec<String>,
journal: String,
#[serde(skip_serializing_if = "Option::is_none")]
journal_abbrev: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub_date: Option<String>,
pmcid: String,
#[serde(skip_serializing_if = "Option::is_none")]
pmid: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
doi: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
article_type: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
keywords: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
volume: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
issue: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
publisher: Option<String>,
}
#[derive(Debug, Clone)]
pub struct MarkdownConfig {
pub include_metadata: bool,
pub include_toc: bool,
pub heading_style: HeadingStyle,
pub reference_style: ReferenceStyle,
pub max_heading_level: u8,
pub include_orcid_links: bool,
pub include_identifier_links: bool,
pub include_figure_captions: bool,
pub include_local_figures: bool,
pub use_yaml_frontmatter: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum HeadingStyle {
ATX,
Setext,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ReferenceStyle {
Numbered,
AuthorYear,
FullCitation,
}
impl Default for MarkdownConfig {
fn default() -> Self {
Self {
include_metadata: true,
include_toc: false,
heading_style: HeadingStyle::ATX,
reference_style: ReferenceStyle::Numbered,
max_heading_level: 6,
include_orcid_links: true,
include_identifier_links: true,
include_figure_captions: true,
include_local_figures: false,
use_yaml_frontmatter: false,
}
}
}
pub struct PmcMarkdownConverter {
config: MarkdownConfig,
}
impl PmcMarkdownConverter {
pub fn new() -> Self {
Self {
config: MarkdownConfig::default(),
}
}
pub fn with_config(config: MarkdownConfig) -> Self {
Self { config }
}
pub fn with_include_metadata(mut self, include: bool) -> Self {
self.config.include_metadata = include;
self
}
pub fn with_include_toc(mut self, include: bool) -> Self {
self.config.include_toc = include;
self
}
pub fn with_heading_style(mut self, style: HeadingStyle) -> Self {
self.config.heading_style = style;
self
}
pub fn with_reference_style(mut self, style: ReferenceStyle) -> Self {
self.config.reference_style = style;
self
}
pub fn with_max_heading_level(mut self, level: u8) -> Self {
self.config.max_heading_level = level.clamp(1, 6);
self
}
pub fn with_include_orcid_links(mut self, include: bool) -> Self {
self.config.include_orcid_links = include;
self
}
pub fn with_include_identifier_links(mut self, include: bool) -> Self {
self.config.include_identifier_links = include;
self
}
pub fn with_include_figure_captions(mut self, include: bool) -> Self {
self.config.include_figure_captions = include;
self
}
pub fn with_yaml_frontmatter(mut self, use_yaml: bool) -> Self {
self.config.use_yaml_frontmatter = use_yaml;
self
}
pub fn convert_with_figures(
&self,
article: &PmcFullText,
figure_paths: Option<&HashMap<String, String>>,
) -> String {
let mut markdown = String::new();
if self.config.include_metadata {
markdown.push_str(&self.convert_metadata(article));
markdown.push_str("\n\n");
} else {
markdown.push_str(&self.format_heading(&self.clean_content(&article.title), 1));
markdown.push_str("\n\n");
}
if self.config.include_toc {
markdown.push_str(&self.convert_toc(article));
markdown.push_str("\n\n");
}
markdown.push_str(&self.convert_sections_with_figures(&article.sections, 1, figure_paths));
if !article.references.is_empty() {
markdown.push_str(&self.convert_references(&article.references));
}
markdown.push_str(&self.convert_additional_sections(article));
markdown.trim().to_string()
}
pub fn convert(&self, article: &PmcFullText) -> String {
let mut markdown = String::new();
if self.config.include_metadata {
markdown.push_str(&self.convert_metadata(article));
markdown.push_str("\n\n");
} else {
markdown.push_str(&self.format_heading(&self.clean_content(&article.title), 1));
markdown.push_str("\n\n");
}
if self.config.include_toc {
markdown.push_str(&self.convert_toc(article));
markdown.push_str("\n\n");
}
markdown.push_str(&self.convert_sections(&article.sections, 1));
if !article.references.is_empty() {
markdown.push_str(&self.convert_references(&article.references));
}
markdown.push_str(&self.convert_additional_sections(article));
markdown.trim().to_string()
}
fn generate_yaml_frontmatter(&self, article: &PmcFullText) -> String {
let metadata = ArticleMetadata {
title: self.clean_content(&article.title),
authors: article
.authors
.iter()
.map(|a| self.clean_content(&a.full_name))
.collect(),
journal: self.clean_content(&article.journal.title),
journal_abbrev: article
.journal
.abbreviation
.as_ref()
.map(|a| self.clean_content(a)),
pub_date: if !article.pub_date.is_empty() && article.pub_date != "Unknown Date" {
Some(self.clean_content(&article.pub_date))
} else {
None
},
pmcid: self.clean_content(&article.pmcid),
pmid: article.pmid.as_ref().map(|p| self.clean_content(p)),
doi: article.doi.as_ref().map(|d| self.clean_content(d)),
article_type: article.article_type.as_ref().map(|t| self.clean_content(t)),
keywords: article
.keywords
.iter()
.map(|k| self.clean_content(k))
.collect(),
volume: article
.journal
.volume
.as_ref()
.map(|v| self.clean_content(v)),
issue: article
.journal
.issue
.as_ref()
.map(|i| self.clean_content(i)),
publisher: article
.journal
.publisher
.as_ref()
.map(|p| self.clean_content(p)),
};
match serde_yaml::to_string(&metadata) {
Ok(yaml_content) => format!("---\n{}---\n", yaml_content),
Err(e) => {
tracing::warn!("Failed to serialize YAML frontmatter: {}", e);
"---\n---\n".to_string()
}
}
}
fn convert_metadata(&self, article: &PmcFullText) -> String {
if self.config.use_yaml_frontmatter {
return self.generate_yaml_frontmatter(article);
}
let mut metadata = String::new();
metadata.push_str(&self.format_heading(&self.clean_content(&article.title), 1));
metadata.push('\n');
if !article.authors.is_empty() {
metadata.push_str("\n**Authors:** ");
metadata.push_str(&self.format_authors(&article.authors));
metadata.push('\n');
}
let journal_title = &article.journal.title;
metadata.push_str(&format!("\n**Journal:** {journal_title}"));
if let Some(abbrev) = &article.journal.abbreviation {
metadata.push_str(&format!(" ({abbrev})"));
}
metadata.push('\n');
if !article.pub_date.is_empty() && article.pub_date != "Unknown Date" {
let pub_date = &article.pub_date;
metadata.push_str(&format!("**Published:** {pub_date}\n"));
}
let mut identifiers = Vec::new();
if let Some(doi) = &article.doi {
if self.config.include_identifier_links {
identifiers.push(format!("[DOI: {doi}](https://doi.org/{doi})"));
} else {
identifiers.push(format!("DOI: {doi}"));
}
}
if let Some(pmid) = &article.pmid {
if self.config.include_identifier_links {
identifiers.push(format!(
"[PMID: {pmid}](https://pubmed.ncbi.nlm.nih.gov/{pmid})"
));
} else {
identifiers.push(format!("PMID: {pmid}"));
}
}
let pmcid = &article.pmcid;
identifiers.push(format!("PMC: {pmcid}"));
if !identifiers.is_empty() {
let identifiers_str = identifiers.join(" | ");
metadata.push_str(&format!("**Identifiers:** {identifiers_str}\n"));
}
if let Some(article_type) = &article.article_type {
metadata.push_str(&format!("**Article Type:** {article_type}\n"));
}
if !article.keywords.is_empty() {
let clean_keywords: Vec<String> = article
.keywords
.iter()
.map(|k| self.clean_content(k))
.collect();
let keywords_str = clean_keywords.join(", ");
metadata.push_str(&format!("**Keywords:** {keywords_str}\n"));
}
let mut journal_details = Vec::new();
if let Some(volume) = &article.journal.volume {
journal_details.push(format!("Volume {volume}"));
}
if let Some(issue) = &article.journal.issue {
journal_details.push(format!("Issue {issue}"));
}
if let Some(publisher) = &article.journal.publisher {
journal_details.push(format!("Publisher: {publisher}"));
}
if !journal_details.is_empty() {
metadata.push_str(&format!(
"**Journal Details:** {}\n",
journal_details.join(" | ")
));
}
metadata
}
fn convert_toc(&self, article: &PmcFullText) -> String {
let mut toc = String::new();
toc.push_str(&self.format_heading("Table of Contents", 2));
toc.push('\n');
for (i, section) in article.sections.iter().enumerate() {
let default_title = "Untitled".to_string();
let title = section.title.as_ref().unwrap_or(&default_title);
let anchor = self.create_anchor(title);
let index = i + 1;
toc.push_str(&format!("{index}. [{title}](#{anchor})\n"));
for (j, subsection) in section.subsections.iter().enumerate() {
let default_sub_title = "Untitled".to_string();
let sub_title = subsection.title.as_ref().unwrap_or(&default_sub_title);
let sub_anchor = self.create_anchor(sub_title);
let main_index = i + 1;
let sub_index = j + 1;
toc.push_str(&format!(
" {main_index}.{sub_index}. [{sub_title}](#{sub_anchor})\n"
));
}
}
toc
}
fn convert_sections_with_figures(
&self,
sections: &[ArticleSection],
level: u8,
figure_paths: Option<&HashMap<String, String>>,
) -> String {
let mut content = String::new();
for section in sections {
if let Some(title) = §ion.title {
content.push_str(&self.format_heading(title, level));
content.push_str("\n\n");
}
if !section.content.is_empty() {
content.push_str(&self.clean_content(§ion.content));
content.push_str("\n\n");
}
if self.config.include_figure_captions {
for figure in §ion.figures {
let figure_path = figure_paths.and_then(|paths| paths.get(&figure.id));
content.push_str(&self.convert_figure_with_path(figure, figure_path));
content.push_str("\n\n");
}
}
if self.config.include_figure_captions {
for table in §ion.tables {
content.push_str(&self.convert_table(table));
content.push_str("\n\n");
}
}
if !section.subsections.is_empty() {
let next_level = (level + 1).min(self.config.max_heading_level);
content.push_str(&self.convert_sections_with_figures(
§ion.subsections,
next_level,
figure_paths,
));
}
}
content
}
fn convert_sections(&self, sections: &[ArticleSection], level: u8) -> String {
let mut content = String::new();
for section in sections {
if let Some(title) = §ion.title {
content.push_str(&self.format_heading(title, level));
content.push_str("\n\n");
}
if !section.content.is_empty() {
content.push_str(&self.clean_content(§ion.content));
content.push_str("\n\n");
}
if self.config.include_figure_captions {
for figure in §ion.figures {
content.push_str(&self.convert_figure(figure));
content.push_str("\n\n");
}
}
if self.config.include_figure_captions {
for table in §ion.tables {
content.push_str(&self.convert_table(table));
content.push_str("\n\n");
}
}
if !section.subsections.is_empty() {
let next_level = (level + 1).min(self.config.max_heading_level);
content.push_str(&self.convert_sections(§ion.subsections, next_level));
}
}
content
}
fn convert_references(&self, references: &[Reference]) -> String {
let mut content = String::new();
content.push_str(&self.format_heading("References", 2));
content.push_str("\n\n");
match self.config.reference_style {
ReferenceStyle::Numbered => {
for (i, reference) in references.iter().enumerate() {
content.push_str(&format!(
"{}. {}\n",
i + 1,
self.format_reference(reference)
));
}
}
ReferenceStyle::AuthorYear | ReferenceStyle::FullCitation => {
for reference in references {
let formatted_ref = self.format_reference(reference);
content.push_str(&format!("- {formatted_ref}\n"));
}
}
}
content.push('\n');
content
}
fn convert_additional_sections(&self, article: &PmcFullText) -> String {
let mut content = String::new();
if !article.funding.is_empty() {
content.push_str(&self.format_heading("Funding", 2));
content.push_str("\n\n");
for funding in &article.funding {
content.push_str(&self.format_funding(funding));
content.push('\n');
}
content.push('\n');
}
if let Some(coi) = &article.conflict_of_interest {
content.push_str(&self.format_heading("Conflict of Interest", 2));
content.push_str("\n\n");
content.push_str(&self.clean_content(coi));
content.push_str("\n\n");
}
if let Some(ack) = &article.acknowledgments {
content.push_str(&self.format_heading("Acknowledgments", 2));
content.push_str("\n\n");
content.push_str(&self.clean_content(ack));
content.push_str("\n\n");
}
if let Some(data_avail) = &article.data_availability {
content.push_str(&self.format_heading("Data Availability", 2));
content.push_str("\n\n");
content.push_str(&self.clean_content(data_avail));
content.push_str("\n\n");
}
content
}
fn format_heading(&self, text: &str, level: u8) -> String {
let level = level.min(self.config.max_heading_level);
match self.config.heading_style {
HeadingStyle::ATX => {
let hashes = "#".repeat(level as usize);
format!("{hashes} {text}")
}
HeadingStyle::Setext => {
if level == 1 {
let underline = "=".repeat(text.len());
format!("{text}\n{underline}")
} else if level == 2 {
let underline = "-".repeat(text.len());
format!("{text}\n{underline}")
} else {
let hashes = "#".repeat(level as usize);
format!("{hashes} {text}")
}
}
}
}
fn format_authors(&self, authors: &[Author]) -> String {
authors
.iter()
.map(|author| {
let mut name = self.clean_content(&author.full_name);
if author.is_corresponding {
name.push('*');
}
if self.config.include_orcid_links {
if let Some(orcid) = &author.orcid {
let cleaned_orcid = self.clean_content(orcid);
let clean_orcid = cleaned_orcid.trim_start_matches("https://orcid.org/");
if clean_orcid.len() >= 19 && clean_orcid.matches('-').count() == 3 {
name.push_str(&format!(" ([ORCID](https://orcid.org/{clean_orcid}))"));
}
}
}
name
})
.collect::<Vec<String>>()
.join(", ")
}
fn format_reference(&self, reference: &Reference) -> String {
match self.config.reference_style {
ReferenceStyle::Numbered | ReferenceStyle::FullCitation => {
let citation = reference.format_citation();
if self.config.include_identifier_links {
let mut formatted = citation;
if let Some(doi) = &reference.doi {
formatted.push_str(&format!(" [DOI](https://doi.org/{doi})"));
}
if let Some(pmid) = &reference.pmid {
formatted
.push_str(&format!(" [PMID](https://pubmed.ncbi.nlm.nih.gov/{pmid})"));
}
formatted
} else {
citation
}
}
ReferenceStyle::AuthorYear => {
if !reference.authors.is_empty() && reference.year.is_some() {
format!(
"{} ({})",
reference.authors.first().unwrap().full_name,
reference.year.as_ref().unwrap()
)
} else {
reference.format_citation()
}
}
}
}
fn format_funding(&self, funding: &FundingInfo) -> String {
let source = &funding.source;
let mut text = format!("- **{source}**");
if let Some(award_id) = &funding.award_id {
text.push_str(&format!(" (Award ID: {award_id})"));
}
if let Some(statement) = &funding.statement {
let content = self.clean_content(statement);
text.push_str(&format!(": {content}"));
}
text
}
fn convert_figure_with_path(&self, figure: &Figure, figure_path: Option<&String>) -> String {
let mut content = String::new();
if self.config.include_local_figures {
if let Some(path) = figure_path {
let alt_text = figure
.alt_text
.as_deref()
.or(figure.label.as_deref())
.unwrap_or(&figure.id);
content.push_str(&format!("\n\n"));
}
}
if let Some(label) = &figure.label {
content.push_str(&format!("**{label}**"));
} else {
let figure_id = &figure.id;
content.push_str(&format!("**Figure {figure_id}**"));
}
let caption = self.clean_content(&figure.caption);
content.push_str(&format!(": {caption}"));
if let Some(alt_text) = &figure.alt_text {
let alt_content = self.clean_content(alt_text);
content.push_str(&format!("\n\n*Alt text: {alt_content}*"));
}
content
}
fn convert_figure(&self, figure: &Figure) -> String {
let mut content = String::new();
if let Some(label) = &figure.label {
content.push_str(&format!("**{label}**"));
} else {
let figure_id = &figure.id;
content.push_str(&format!("**Figure {figure_id}**"));
}
let caption = self.clean_content(&figure.caption);
content.push_str(&format!(": {caption}"));
if let Some(alt_text) = &figure.alt_text {
let alt_content = self.clean_content(alt_text);
content.push_str(&format!("\n\n*Alt text: {alt_content}*"));
}
content
}
fn convert_table(&self, table: &Table) -> String {
let mut content = String::new();
if let Some(label) = &table.label {
content.push_str(&format!("**{label}**"));
} else {
let table_id = &table.id;
content.push_str(&format!("**Table {table_id}**"));
}
let caption = self.clean_content(&table.caption);
content.push_str(&format!(": {caption}"));
if !table.footnotes.is_empty() {
content.push_str("\n\n*Footnotes:*\n");
for (i, footnote) in table.footnotes.iter().enumerate() {
let index = i + 1;
let footnote_content = self.clean_content(footnote);
content.push_str(&format!("{index}. {footnote_content}\n"));
}
}
content
}
fn clean_content(&self, content: &str) -> String {
let mut cleaned = content.to_string();
cleaned = regex::Regex::new(r"<[^>]*>")
.unwrap()
.replace_all(&cleaned, "")
.to_string();
for (entity, replacement) in HTML_ENTITIES {
cleaned = cleaned.replace(entity, replacement);
}
cleaned = regex::Regex::new(r"\s+")
.unwrap()
.replace_all(&cleaned, " ")
.trim()
.to_string();
cleaned
}
fn create_anchor(&self, title: &str) -> String {
title
.to_lowercase()
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '-' })
.collect::<String>()
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("-")
}
}
impl Default for PmcMarkdownConverter {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pmc::models::{Author, JournalInfo, PmcFullText};
#[test]
fn test_markdown_converter_creation() {
let converter = PmcMarkdownConverter::new();
assert!(converter.config.include_metadata);
assert_eq!(converter.config.heading_style, HeadingStyle::ATX);
assert_eq!(converter.config.reference_style, ReferenceStyle::Numbered);
}
#[test]
fn test_configuration_builder() {
let converter = PmcMarkdownConverter::new()
.with_include_metadata(false)
.with_heading_style(HeadingStyle::Setext)
.with_reference_style(ReferenceStyle::AuthorYear)
.with_max_heading_level(4);
assert!(!converter.config.include_metadata);
assert_eq!(converter.config.heading_style, HeadingStyle::Setext);
assert_eq!(converter.config.reference_style, ReferenceStyle::AuthorYear);
assert_eq!(converter.config.max_heading_level, 4);
}
#[test]
fn test_heading_formatting() {
let converter = PmcMarkdownConverter::new();
assert_eq!(converter.format_heading("Title", 1), "# Title");
assert_eq!(converter.format_heading("Subtitle", 2), "## Subtitle");
let converter = converter.with_heading_style(HeadingStyle::Setext);
assert_eq!(converter.format_heading("Title", 1), "Title\n=====");
assert_eq!(
converter.format_heading("Subtitle", 2),
"Subtitle\n--------"
);
assert_eq!(converter.format_heading("Section", 3), "### Section");
}
#[test]
fn test_clean_content() {
let converter = PmcMarkdownConverter::new();
let dirty = "<p>This is <em>emphasis</em> and & entities</p>";
let clean = converter.clean_content(dirty);
assert_eq!(clean, "This is emphasis and & entities");
}
#[test]
fn test_anchor_creation() {
let converter = PmcMarkdownConverter::new();
assert_eq!(converter.create_anchor("Introduction"), "introduction");
assert_eq!(
converter.create_anchor("Methods & Results"),
"methods-results"
);
assert_eq!(
converter.create_anchor("Discussion (2023)"),
"discussion-2023"
);
}
#[test]
fn test_basic_conversion() {
let converter = PmcMarkdownConverter::new();
let article = PmcFullText {
pmcid: "PMC1234567".to_string(),
pmid: Some("12345".to_string()),
title: "Test Article".to_string(),
authors: vec![Author::from_full_name("John Doe".to_string())],
journal: JournalInfo::new("Test Journal".to_string()),
pub_date: "2023".to_string(),
doi: Some("10.1000/test".to_string()),
sections: vec![],
references: vec![],
article_type: Some("research-article".to_string()),
keywords: vec!["test".to_string(), "example".to_string()],
funding: vec![],
conflict_of_interest: None,
acknowledgments: None,
data_availability: None,
supplementary_materials: vec![],
};
let markdown = converter.convert(&article);
assert!(markdown.contains("# Test Article"));
assert!(markdown.contains("**Authors:** John Doe"));
assert!(markdown.contains("**Journal:** Test Journal"));
assert!(markdown.contains("DOI: 10.1000/test"));
assert!(markdown.contains("**Keywords:** test, example"));
}
#[test]
fn test_yaml_frontmatter_basic() {
let converter = PmcMarkdownConverter::new().with_yaml_frontmatter(true);
let article = PmcFullText {
pmcid: "PMC1234567".to_string(),
pmid: Some("12345".to_string()),
title: "Test Article".to_string(),
authors: vec![
Author::from_full_name("John Doe".to_string()),
Author::from_full_name("Jane Smith".to_string()),
],
journal: JournalInfo::new("Test Journal".to_string()),
pub_date: "2023-05-15".to_string(),
doi: Some("10.1000/test".to_string()),
sections: vec![],
references: vec![],
article_type: Some("research-article".to_string()),
keywords: vec!["test".to_string(), "example".to_string()],
funding: vec![],
conflict_of_interest: None,
acknowledgments: None,
data_availability: None,
supplementary_materials: vec![],
};
let markdown = converter.convert(&article);
assert!(markdown.starts_with("---\n"));
let delimiter_count = markdown.matches("---").count();
assert_eq!(
delimiter_count, 2,
"Should have opening and closing YAML frontmatter delimiters"
);
assert!(markdown.contains("title: Test Article"));
assert!(markdown.contains("authors:"));
assert!(markdown.contains("- John Doe")); assert!(markdown.contains("- Jane Smith"));
assert!(markdown.contains("journal: Test Journal"));
assert!(markdown.contains("pub_date: 2023-05-15"));
assert!(markdown.contains("pmcid: PMC1234567"));
assert!(markdown.contains("pmid: '12345'")); assert!(markdown.contains("doi: 10.1000/test"));
assert!(markdown.contains("article_type: research-article"));
assert!(markdown.contains("keywords:"));
assert!(markdown.contains("- test"));
assert!(markdown.contains("- example"));
}
#[test]
fn test_yaml_frontmatter_with_special_characters() {
let converter = PmcMarkdownConverter::new().with_yaml_frontmatter(true);
let article = PmcFullText {
pmcid: "PMC7890123".to_string(),
pmid: None,
title: "COVID-19: A Comprehensive Study".to_string(),
authors: vec![Author::from_full_name("O'Brien, Michael".to_string())],
journal: JournalInfo::new("Nature: Medicine & Science".to_string()),
pub_date: "2023".to_string(),
doi: Some("10.1038/s41591-023-01234-5".to_string()),
sections: vec![],
references: vec![],
article_type: Some("research-article".to_string()),
keywords: vec![
"#COVID-19".to_string(),
"SARS-CoV-2".to_string(),
"vaccine".to_string(),
],
funding: vec![],
conflict_of_interest: None,
acknowledgments: None,
data_availability: None,
supplementary_materials: vec![],
};
let markdown = converter.convert(&article);
assert!(
markdown.contains("title: 'COVID-19: A Comprehensive Study'")
|| markdown.contains("title: \"COVID-19: A Comprehensive Study\"")
);
assert!(
markdown.contains("journal: 'Nature: Medicine & Science'")
|| markdown.contains("journal: \"Nature: Medicine & Science\"")
);
assert!(markdown.contains("'#COVID-19'") || markdown.contains("\"#COVID-19\""));
assert!(markdown.contains("SARS-CoV-2"));
}
#[test]
fn test_yaml_frontmatter_backward_compatibility() {
let converter = PmcMarkdownConverter::new();
assert!(!converter.config.use_yaml_frontmatter);
let article = PmcFullText {
pmcid: "PMC1234567".to_string(),
pmid: None,
title: "Test Article".to_string(),
authors: vec![],
journal: JournalInfo::new("Test Journal".to_string()),
pub_date: "2023".to_string(),
doi: None,
sections: vec![],
references: vec![],
article_type: None,
keywords: vec![],
funding: vec![],
conflict_of_interest: None,
acknowledgments: None,
data_availability: None,
supplementary_materials: vec![],
};
let markdown = converter.convert(&article);
assert!(markdown.contains("# Test Article"));
assert!(markdown.contains("**Journal:** Test Journal"));
assert!(!markdown.starts_with("---\n"));
}
#[test]
fn test_builder_pattern_with_yaml_frontmatter() {
let converter = PmcMarkdownConverter::new()
.with_yaml_frontmatter(true)
.with_include_metadata(true)
.with_heading_style(HeadingStyle::ATX);
assert!(converter.config.use_yaml_frontmatter);
assert!(converter.config.include_metadata);
assert_eq!(converter.config.heading_style, HeadingStyle::ATX);
}
}