mod common;
use common::get_pmc_xml_test_cases;
use tracing::{info, warn};
fn count_figures_recursive(section: &pubmed_client::pmc::models::ArticleSection) -> usize {
let mut count = section.figures.len();
for subsection in §ion.subsections {
count += count_figures_recursive(subsection);
}
count
}
fn print_figures_recursive(section: &pubmed_client::pmc::models::ArticleSection, indent: &str) {
for figure in §ion.figures {
println!("{}Figure ID: {}", indent, figure.id);
println!("{} Label: {:?}", indent, figure.label);
println!("{} Caption: {} chars", indent, figure.caption.len());
println!("{} File name: {:?}", indent, figure.file_name);
println!(
"{} Caption preview: {}",
indent,
figure.caption.chars().take(100).collect::<String>()
);
}
for subsection in §ion.subsections {
print_figures_recursive(subsection, &format!("{} ", indent));
}
}
fn find_first_figure(
sections: &[pubmed_client::pmc::models::ArticleSection],
) -> Option<&pubmed_client::pmc::models::Figure> {
for section in sections {
if !section.figures.is_empty() {
return Some(§ion.figures[0]);
}
if let Some(figure) = find_first_figure(§ion.subsections) {
return Some(figure);
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_all_pmc_xml_files() {
let test_cases = get_pmc_xml_test_cases();
if test_cases.is_empty() {
warn!("No XML test files found in tests/test_data/pmc_xml");
return;
}
for test_case in test_cases {
info!(filename = test_case.filename(), "Testing file");
let xml_content = test_case.read_xml_content_or_panic();
assert!(!xml_content.is_empty(), "XML file should not be empty");
assert!(
xml_content.contains("<article"),
"Should contain article tag"
);
assert!(xml_content.contains("PMC"), "Should contain PMC reference");
info!(filename = test_case.filename(), "Basic validation passed");
}
}
#[test]
fn test_xml_test_case_functionality() {
let test_cases = get_pmc_xml_test_cases();
if let Some(first_case) = test_cases.first() {
assert!(first_case.filename().ends_with(".xml"));
assert!(!first_case.pmcid.is_empty());
let content = first_case.read_xml_content();
assert!(content.is_ok());
let content_panic = first_case.read_xml_content_or_panic();
assert!(!content_panic.is_empty());
info!("PmcXmlTestCase functionality validated");
}
}
#[test]
fn test_specific_xml_file_access() {
use common::get_pmc_xml_test_case;
let test_cases = get_pmc_xml_test_cases();
if let Some(first_case) = test_cases.first() {
let filename = first_case.filename();
let specific_case = get_pmc_xml_test_case(filename);
assert!(specific_case.is_some());
let specific_case = specific_case.unwrap();
assert_eq!(specific_case.filename(), filename);
assert_eq!(specific_case.pmcid, first_case.pmcid);
info!("Specific XML file access validated");
}
}
#[test]
fn test_nonexistent_file_handling() {
use common::get_pmc_xml_test_case;
let nonexistent = get_pmc_xml_test_case("nonexistent_file.xml");
assert!(nonexistent.is_none());
info!("Nonexistent file handling validated");
}
#[test]
fn test_pmc7906746_figure_extraction() {
println!("🔍 Starting PMC7906746 figure extraction debug test");
use pubmed_client::pmc::parser::parse_pmc_xml;
use std::fs;
let xml_content = match fs::read_to_string("PMC7906746.xml") {
Ok(content) => content,
Err(_) => {
warn!("PMC7906746.xml not found - skipping this test");
return;
}
};
let result = parse_pmc_xml(&xml_content, "PMC7906746");
assert!(
result.is_ok(),
"Failed to parse PMC7906746 XML: {:?}",
result.err()
);
let article = result.unwrap();
println!("Article title: {}", article.title);
println!("Number of sections: {}", article.sections.len());
let mut total_figures = 0;
for (i, section) in article.sections.iter().enumerate() {
let section_figures = count_figures_recursive(section);
total_figures += section_figures;
println!(
"Section {}: type={}, title={:?}, figures={}",
i, section.section_type, section.title, section_figures
);
if section_figures > 0 {
print_figures_recursive(section, "");
}
}
println!("Total figures found: {}", total_figures);
if total_figures == 0 {
println!("🔍 No figures found by library parser. Debugging...");
let fig_count = xml_content.matches("<fig").count();
println!("Raw XML contains {} <fig> tags", fig_count);
for (i, section) in article.sections.iter().enumerate() {
println!(
"Section {}: type={}, content length={}, title={:?}",
i,
section.section_type,
section.content.len(),
section.title
);
if section.content.contains("fig1") || section.content.contains("Figure") {
println!(" ⚠️ This section mentions figures!");
println!(
" Content preview: {}",
section.content.chars().take(200).collect::<String>()
);
}
}
}
if total_figures > 0 {
let figure = find_first_figure(&article.sections);
assert!(figure.is_some(), "No figure found in any section");
let figure = figure.unwrap();
assert_eq!(figure.id, "fig1");
assert_eq!(figure.label, Some("Figure".to_string()));
assert!(figure.caption.contains("COVID-19 hospitalisations"));
assert!(figure.caption.contains("Manaus, Brazil"));
println!("Figure file_name: {:?}", figure.file_name);
println!("✅ PMC7906746 figure extraction test passed!");
} else {
println!("❌ No figures extracted by the library parser - this indicates a bug!");
}
}
}