use std::collections::HashMap;
use rpdfium_core::{Name, PdfSource};
use rpdfium_parser::{Object, ObjectStore};
use crate::error::{DocError, DocResult};
const MAX_TREE_DEPTH: usize = 64;
#[derive(Debug, Clone)]
pub struct NameTree<V> {
entries: Vec<(String, V)>,
}
impl<V> NameTree<V> {
pub fn parse<S: PdfSource, F>(
root: &Object,
store: &ObjectStore<S>,
convert: F,
) -> DocResult<Self>
where
F: Fn(&Object) -> DocResult<V>,
{
let root_dict = resolve_dict(root, store)?;
let mut entries = Vec::new();
let mut stack: Vec<(&HashMap<Name, Object>, usize)> = vec![(root_dict, 0)];
while let Some((dict, depth)) = stack.pop() {
if depth > MAX_TREE_DEPTH {
return Err(DocError::DepthExceeded);
}
if let Some(Object::Array(names_arr)) = dict.get(&Name::names()) {
let mut i = 0;
while i + 1 < names_arr.len() {
let key_obj = store
.deep_resolve(&names_arr[i])
.map_err(|e| DocError::Parser(e.to_string()))?;
let val_obj = store
.deep_resolve(&names_arr[i + 1])
.map_err(|e| DocError::Parser(e.to_string()))?;
let key = extract_string(key_obj)?;
let value = convert(val_obj)?;
entries.push((key, value));
i += 2;
}
}
if let Some(Object::Array(kids)) = dict.get(&Name::kids()) {
for kid in kids.iter().rev() {
let kid_obj = store
.deep_resolve(kid)
.map_err(|e| DocError::Parser(e.to_string()))?;
let kid_dict = kid_obj.as_dict().ok_or(DocError::UnexpectedType)?;
stack.push((kid_dict, depth + 1));
}
}
}
Ok(Self { entries })
}
pub fn count(&self) -> usize {
self.entries.len()
}
#[inline]
pub fn get_count(&self) -> usize {
self.count()
}
pub fn get(&self, name: &str) -> Option<&V> {
self.entries.iter().find(|(k, _)| k == name).map(|(_, v)| v)
}
pub fn entries(&self) -> &[(String, V)] {
&self.entries
}
pub fn add(&mut self, name: String, value: V) -> DocResult<()> {
let pos = self
.entries
.partition_point(|(k, _)| k.as_str() < name.as_str());
if self.entries.get(pos).is_some_and(|(k, _)| k == &name) {
self.entries[pos] = (name, value);
} else {
self.entries.insert(pos, (name, value));
}
Ok(())
}
pub fn delete_by_index(&mut self, index: usize) -> DocResult<()> {
if index >= self.entries.len() {
return Err(DocError::InvalidIndex(index));
}
self.entries.remove(index);
Ok(())
}
pub fn delete_by_name(&mut self, name: &str) -> DocResult<()> {
if let Some(pos) = self.entries.iter().position(|(k, _)| k == name) {
self.entries.remove(pos);
Ok(())
} else {
Err(DocError::NotFound(name.to_string()))
}
}
}
fn resolve_dict<'a, S: PdfSource>(
obj: &'a Object,
store: &'a ObjectStore<S>,
) -> DocResult<&'a HashMap<Name, Object>> {
let resolved = store
.deep_resolve(obj)
.map_err(|e| DocError::Parser(e.to_string()))?;
resolved.as_dict().ok_or(DocError::UnexpectedType)
}
fn extract_string(obj: &Object) -> DocResult<String> {
if let Some(s) = obj.as_string() {
return Ok(s.to_string_lossy());
}
if let Some(n) = obj.as_name() {
return Ok(n.as_str().into_owned());
}
Err(DocError::UnexpectedType)
}
#[cfg(test)]
mod tests {
use super::*;
use rpdfium_core::PdfString;
fn str_obj(s: &str) -> Object {
Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
}
fn int_obj(n: i64) -> Object {
Object::Integer(n)
}
fn convert_int(obj: &Object) -> DocResult<i64> {
obj.as_i64().ok_or(DocError::UnexpectedType)
}
fn build_store() -> ObjectStore<Vec<u8>> {
let pdf = build_minimal_pdf();
ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
}
fn build_minimal_pdf() -> Vec<u8> {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.4\n");
let obj1_offset = pdf.len();
pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
let obj2_offset = pdf.len();
pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
let xref_offset = pdf.len();
pdf.extend_from_slice(b"xref\n0 3\n");
pdf.extend_from_slice(b"0000000000 65535 f \r\n");
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
pdf
}
#[test]
fn test_empty_tree() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert!(tree.entries().is_empty());
}
#[test]
fn test_single_level_leaf() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(
Name::names(),
Object::Array(vec![
str_obj("alpha"),
int_obj(1),
str_obj("beta"),
int_obj(2),
]),
);
let root = Object::Dictionary(dict);
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.entries().len(), 2);
assert_eq!(tree.get("alpha"), Some(&1));
assert_eq!(tree.get("beta"), Some(&2));
}
#[test]
fn test_two_level_tree() {
let store = build_store();
let mut leaf1 = HashMap::new();
leaf1.insert(
Name::names(),
Object::Array(vec![str_obj("a"), int_obj(10)]),
);
let mut leaf2 = HashMap::new();
leaf2.insert(
Name::names(),
Object::Array(vec![str_obj("b"), int_obj(20)]),
);
let mut root_dict = HashMap::new();
root_dict.insert(
Name::kids(),
Object::Array(vec![Object::Dictionary(leaf1), Object::Dictionary(leaf2)]),
);
let root = Object::Dictionary(root_dict);
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.entries().len(), 2);
assert_eq!(tree.get("a"), Some(&10));
assert_eq!(tree.get("b"), Some(&20));
}
#[test]
fn test_lookup_miss() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(
Name::names(),
Object::Array(vec![str_obj("key"), int_obj(42)]),
);
let root = Object::Dictionary(dict);
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.get("key"), Some(&42));
assert_eq!(tree.get("missing"), None);
}
#[test]
fn test_duplicate_key_keeps_first() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(
Name::names(),
Object::Array(vec![str_obj("dup"), int_obj(1), str_obj("dup"), int_obj(2)]),
);
let root = Object::Dictionary(dict);
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.get("dup"), Some(&1));
assert_eq!(tree.entries().len(), 2);
}
#[test]
fn test_add_inserts_sorted() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
tree.add("beta".to_string(), 2).unwrap();
tree.add("alpha".to_string(), 1).unwrap();
tree.add("gamma".to_string(), 3).unwrap();
let entries = tree.entries();
assert_eq!(entries.len(), 3);
assert_eq!(entries[0].0, "alpha");
assert_eq!(entries[1].0, "beta");
assert_eq!(entries[2].0, "gamma");
assert_eq!(tree.get("beta"), Some(&2));
}
#[test]
fn test_add_replaces_existing_key() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
tree.add("key".to_string(), 1).unwrap();
tree.add("key".to_string(), 99).unwrap();
assert_eq!(tree.entries().len(), 1);
assert_eq!(tree.get("key"), Some(&99));
}
#[test]
fn test_delete_by_index_removes_entry() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
tree.add("a".to_string(), 1).unwrap();
tree.add("b".to_string(), 2).unwrap();
tree.delete_by_index(0).unwrap();
assert_eq!(tree.entries().len(), 1);
assert_eq!(tree.get("b"), Some(&2));
}
#[test]
fn test_delete_by_index_out_of_range() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
let result = tree.delete_by_index(0);
assert!(matches!(result.unwrap_err(), DocError::InvalidIndex(0)));
}
#[test]
fn test_delete_by_name_removes_entry() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
tree.add("key".to_string(), 42).unwrap();
tree.delete_by_name("key").unwrap();
assert!(tree.entries().is_empty());
}
#[test]
fn test_delete_by_name_not_found() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let mut tree = NameTree::parse(&root, &store, convert_int).unwrap();
let result = tree.delete_by_name("missing");
assert!(matches!(result.unwrap_err(), DocError::NotFound(_)));
}
#[test]
fn test_cpdf_name_tree_get_unicode_name_with_bom() {
let store = build_store();
let bom_key = PdfString::from_bytes(vec![0xFE, 0xFF, 0x00, 0x31]);
let mut dict = HashMap::new();
dict.insert(
Name::names(),
Object::Array(vec![Object::String(bom_key), int_obj(100)]),
);
let root = Object::Dictionary(dict);
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.count(), 1);
let (stored_key, stored_val) = &tree.entries()[0];
assert_eq!(*stored_val, 100);
assert!(tree.get(stored_key).is_some());
assert_eq!(*tree.get(stored_key).unwrap(), 100);
}
#[test]
fn test_cpdf_name_tree_get_from_tree_with_limits_array_with_4_items() {
let store = build_store();
let great_grand_kid4 = {
let mut d = HashMap::new();
d.insert(
Name::names(),
Object::Array(vec![
str_obj("1.txt"),
int_obj(111),
str_obj("2.txt"),
int_obj(222),
]),
);
Object::Dictionary(d)
};
let great_grand_kid5 = {
let mut d = HashMap::new();
d.insert(
Name::names(),
Object::Array(vec![
str_obj("3.txt"),
int_obj(333),
str_obj("5.txt"),
int_obj(555),
]),
);
Object::Dictionary(d)
};
let grand_kid2 = {
let mut d = HashMap::new();
d.insert(
Name::kids(),
Object::Array(vec![great_grand_kid4, great_grand_kid5]),
);
Object::Dictionary(d)
};
let grand_kid3 = {
let mut d = HashMap::new();
d.insert(
Name::names(),
Object::Array(vec![str_obj("9.txt"), int_obj(999)]),
);
d.insert(
Name::from("Limits"),
Object::Array(vec![
str_obj("9.txt"),
str_obj("9.txt"),
int_obj(5),
int_obj(6),
]),
);
Object::Dictionary(d)
};
let kid1 = {
let mut d = HashMap::new();
d.insert(Name::kids(), Object::Array(vec![grand_kid2, grand_kid3]));
Object::Dictionary(d)
};
let mut root_dict = HashMap::new();
root_dict.insert(Name::kids(), Object::Array(vec![kid1]));
let root = Object::Dictionary(root_dict);
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.count(), 5);
assert_eq!(tree.get("1.txt"), Some(&111));
assert_eq!(tree.get("2.txt"), Some(&222));
assert_eq!(tree.get("3.txt"), Some(&333));
assert_eq!(tree.get("5.txt"), Some(&555));
assert_eq!(tree.get("9.txt"), Some(&999));
}
#[test]
fn test_name_object_as_key() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(
Name::names(),
Object::Array(vec![Object::Name(Name::from("myname")), int_obj(99)]),
);
let root = Object::Dictionary(dict);
let tree = NameTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.get("myname"), Some(&99));
}
}