#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use lopdf::{Object, StringFormat};
use printpdf::{
cmap::ToUnicodeCMap,
text::{decode_pdf_string, decode_tj_operands},
};
#[test]
fn test_to_unicode_cmap_parsing() {
let cmap_data = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
%!PS-Adobe-3.0 Resource-CMap
%%DocumentNeededResources: procset CIDInit
%%IncludeResource: procset CIDInit
/CIDSystemInfo 3 dict dup begin
/Registry (FontSpecific) def
/Ordering (HEIGIDGCBAAHFGBHAEFHCBHGAJHCJDHF) def
/Supplement 0 def
end def
/CMapName /FontSpecific-HEIGIDGCBAAHFGBHAEFHCBHGAJHCJDHF def
/CMapVersion 1 def
/CMapType 2 def
/WMode 0 def
1 begincodespacerange
<0000>
endcodespacerange
13 beginbfchar
<0000> <0020>
<0001> <002c>
<0002> <003f>
<0003> <0432>
<0004> <0434>
<0005> <0438>
<0006> <043a>
<0007> <043b>
<0008> <0442>
<0009> <041f>
<000a> <0430>
<000b> <0435>
<000c> <0440>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let cmap = ToUnicodeCMap::parse(cmap_data).expect("Failed to parse CMap");
assert_eq!(cmap.mappings.len(), 13, "Expected 13 mappings");
assert_eq!(cmap.mappings.get(&0x0000), Some(&vec![0x0020])); assert_eq!(cmap.mappings.get(&0x0001), Some(&vec![0x002c])); assert_eq!(cmap.mappings.get(&0x0009), Some(&vec![0x041f])); assert_eq!(cmap.mappings.get(&0x000a), Some(&vec![0x0430]));
let bytes = [
0x00, 0x09, 0x00, 0x0c, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x08, 0x00, 0x01,
0x00, 0x00, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0b,
0x00, 0x07, 0x00, 0x0a, 0x00, 0x02,
];
let pdf_string = Object::String(bytes.to_vec(), StringFormat::Literal);
let decoded = decode_pdf_string(&pdf_string, Some(&cmap));
let expected = "Привет, как дела?";
assert_eq!(decoded, expected, "Failed to decode PDF string using CMap");
}
#[test]
fn test_manual_cmap_decoding() {
let mut mappings = BTreeMap::new();
mappings.insert(0x0000, vec![0x0020]); mappings.insert(0x0001, vec![0x002c]); mappings.insert(0x0002, vec![0x003f]); mappings.insert(0x0003, vec![0x0432]); mappings.insert(0x0004, vec![0x0434]); mappings.insert(0x0005, vec![0x0438]); mappings.insert(0x0006, vec![0x043a]); mappings.insert(0x0007, vec![0x043b]); mappings.insert(0x0008, vec![0x0442]); mappings.insert(0x0009, vec![0x041f]); mappings.insert(0x000a, vec![0x0430]); mappings.insert(0x000b, vec![0x0435]); mappings.insert(0x000c, vec![0x0440]);
let cmap = ToUnicodeCMap { mappings };
fn decode_with_cmap(bytes: &[u8], cmap: &ToUnicodeCMap) -> String {
let mut result = String::new();
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() {
let cid = ((bytes[i] as u32) << 8) | (bytes[i + 1] as u32);
if let Some(unis) = cmap.mappings.get(&cid) {
for &u in unis {
if let Some(c) = std::char::from_u32(u) {
result.push(c);
}
}
}
i += 2;
} else {
i += 1;
}
}
result
}
let bytes = [
0x00, 0x09, 0x00, 0x0c, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x08, 0x00, 0x01,
0x00, 0x00, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0b,
0x00, 0x07, 0x00, 0x0a, 0x00, 0x02,
];
let decoded = decode_with_cmap(&bytes, &cmap);
let expected = "Привет, как дела?";
assert_eq!(decoded, expected, "Manual decoding failed");
}
#[test]
fn test_tj_operator_decoding() {
let mut mappings = BTreeMap::new();
mappings.insert(0x0000, vec![0x0020]); mappings.insert(0x0001, vec![0x002c]); mappings.insert(0x0002, vec![0x003f]); mappings.insert(0x0003, vec![0x0432]); mappings.insert(0x0004, vec![0x0434]); mappings.insert(0x0005, vec![0x0438]); mappings.insert(0x0006, vec![0x043a]); mappings.insert(0x0007, vec![0x043b]); mappings.insert(0x0008, vec![0x0442]); mappings.insert(0x0009, vec![0x041f]); mappings.insert(0x000a, vec![0x0430]); mappings.insert(0x000b, vec![0x0435]); mappings.insert(0x000c, vec![0x0440]);
let cmap = ToUnicodeCMap { mappings };
let bytes = [
0x00, 0x09, 0x00, 0x0c, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x08, 0x00, 0x01,
0x00, 0x00, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0b,
0x00, 0x07, 0x00, 0x0a, 0x00, 0x02,
];
let tj_array = vec![Object::String(bytes.to_vec(), StringFormat::Literal)];
let text_items = decode_tj_operands(&tj_array, Some(&cmap));
let mut result = String::new();
for item in text_items {
if let printpdf::TextItem::Text(text) = item {
result.push_str(&text);
}
}
let expected = "Привет, как дела?";
assert_eq!(result, expected, "TJ operator decoding failed");
}
}