use oxidize_pdf::parser::{PdfDocument, PdfReader};
use oxidize_pdf::text::{ExtractionOptions, TextExtractor};
use std::path::PathBuf;
const FIXTURE: &str = "tests/fixtures/issue_272_issue5010_korean_usecmap.pdf";
fn extract_page0_text() -> String {
let pdf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(FIXTURE);
let reader = PdfReader::open(&pdf_path).expect("issue5010 fixture must be readable");
let document = PdfDocument::new(reader);
let mut extractor = TextExtractor::with_options(ExtractionOptions::default());
extractor
.extract_from_page(&document, 0)
.expect("extract page 0")
.text
}
#[test]
fn issue5010_usecmap_korea1_resolves_real_hangul() {
let text = extract_page0_text();
let hangul = text
.chars()
.filter(|&c| ('\u{AC00}'..='\u{D7A3}').contains(&c))
.count();
let replacement = text.chars().filter(|&c| c == '\u{FFFD}').count();
assert!(hangul > 0, "expected real hangul, got: {text:?}");
assert_eq!(
replacement, 0,
"no replacement chars expected, got: {text:?}"
);
}