use std::collections::HashMap;
use rpdfium_core::{Name, PdfSource};
use rpdfium_parser::{Object, ObjectStore};
use crate::error::{DocError, DocResult};
const MAX_TREE_DEPTH: usize = 64;
#[derive(Debug, Clone)]
pub struct NumberTree<V> {
entries: Vec<(i64, V)>,
}
impl<V> NumberTree<V> {
pub fn parse<S: PdfSource, F>(
root: &Object,
store: &ObjectStore<S>,
convert: F,
) -> DocResult<Self>
where
F: Fn(&Object, &ObjectStore<S>) -> DocResult<V>,
{
let root_dict = resolve_dict(root, store)?;
let mut entries = Vec::new();
let mut stack: Vec<(&HashMap<Name, Object>, usize)> = vec![(root_dict, 0)];
while let Some((dict, depth)) = stack.pop() {
if depth > MAX_TREE_DEPTH {
return Err(DocError::DepthExceeded);
}
if let Some(Object::Array(nums_arr)) = dict.get(&Name::nums()) {
let mut i = 0;
while i + 1 < nums_arr.len() {
let key_obj = store
.deep_resolve(&nums_arr[i])
.map_err(|e| DocError::Parser(e.to_string()))?;
let val_obj = store
.deep_resolve(&nums_arr[i + 1])
.map_err(|e| DocError::Parser(e.to_string()))?;
let key = key_obj.as_i64().ok_or(DocError::UnexpectedType)?;
let value = convert(val_obj, store)?;
entries.push((key, value));
i += 2;
}
}
if let Some(Object::Array(kids)) = dict.get(&Name::kids()) {
for kid in kids.iter().rev() {
let kid_obj = store
.deep_resolve(kid)
.map_err(|e| DocError::Parser(e.to_string()))?;
let kid_dict = kid_obj.as_dict().ok_or(DocError::UnexpectedType)?;
stack.push((kid_dict, depth + 1));
}
}
}
entries.sort_by_key(|(k, _)| *k);
Ok(Self { entries })
}
pub fn get(&self, num: i64) -> Option<&V> {
self.entries
.binary_search_by_key(&num, |(k, _)| *k)
.ok()
.map(|idx| &self.entries[idx].1)
}
pub fn floor(&self, num: i64) -> Option<&V> {
match self.entries.binary_search_by_key(&num, |(k, _)| *k) {
Ok(idx) => Some(&self.entries[idx].1),
Err(idx) => {
if idx > 0 {
Some(&self.entries[idx - 1].1)
} else {
None
}
}
}
}
#[inline]
pub fn get_floor(&self, num: i64) -> Option<&V> {
self.floor(num)
}
pub fn entries(&self) -> &[(i64, V)] {
&self.entries
}
}
fn resolve_dict<'a, S: PdfSource>(
obj: &'a Object,
store: &'a ObjectStore<S>,
) -> DocResult<&'a HashMap<Name, Object>> {
let resolved = store
.deep_resolve(obj)
.map_err(|e| DocError::Parser(e.to_string()))?;
resolved.as_dict().ok_or(DocError::UnexpectedType)
}
#[cfg(test)]
mod tests {
use super::*;
fn int_obj(n: i64) -> Object {
Object::Integer(n)
}
fn convert_int<S: PdfSource>(obj: &Object, _store: &ObjectStore<S>) -> DocResult<i64> {
obj.as_i64().ok_or(DocError::UnexpectedType)
}
fn build_store() -> ObjectStore<Vec<u8>> {
let pdf = build_minimal_pdf();
ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
}
fn build_minimal_pdf() -> Vec<u8> {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.4\n");
let obj1_offset = pdf.len();
pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
let obj2_offset = pdf.len();
pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
let xref_offset = pdf.len();
pdf.extend_from_slice(b"xref\n0 3\n");
pdf.extend_from_slice(b"0000000000 65535 f \r\n");
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
pdf
}
#[test]
fn test_empty_tree() {
let store = build_store();
let root = Object::Dictionary(HashMap::new());
let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
assert!(tree.entries().is_empty());
}
#[test]
fn test_single_level_leaf() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(
Name::nums(),
Object::Array(vec![int_obj(0), int_obj(100), int_obj(5), int_obj(500)]),
);
let root = Object::Dictionary(dict);
let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.entries().len(), 2);
assert_eq!(tree.get(0), Some(&100));
assert_eq!(tree.get(5), Some(&500));
}
#[test]
fn test_get_floor_exact() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(
Name::nums(),
Object::Array(vec![int_obj(0), int_obj(10), int_obj(5), int_obj(50)]),
);
let root = Object::Dictionary(dict);
let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.floor(5), Some(&50));
}
#[test]
fn test_get_floor_between() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(
Name::nums(),
Object::Array(vec![
int_obj(0),
int_obj(10),
int_obj(5),
int_obj(50),
int_obj(10),
int_obj(100),
]),
);
let root = Object::Dictionary(dict);
let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.floor(7), Some(&50));
}
#[test]
fn test_get_floor_before_first() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(Name::nums(), Object::Array(vec![int_obj(5), int_obj(50)]));
let root = Object::Dictionary(dict);
let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.floor(3), None);
}
#[test]
fn test_two_level_tree() {
let store = build_store();
let mut leaf1 = HashMap::new();
leaf1.insert(Name::nums(), Object::Array(vec![int_obj(0), int_obj(100)]));
let mut leaf2 = HashMap::new();
leaf2.insert(Name::nums(), Object::Array(vec![int_obj(10), int_obj(200)]));
let mut root_dict = HashMap::new();
root_dict.insert(
Name::kids(),
Object::Array(vec![Object::Dictionary(leaf1), Object::Dictionary(leaf2)]),
);
let root = Object::Dictionary(root_dict);
let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.entries().len(), 2);
assert_eq!(tree.get(0), Some(&100));
assert_eq!(tree.get(10), Some(&200));
}
#[test]
fn test_lookup_miss() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(Name::nums(), Object::Array(vec![int_obj(1), int_obj(11)]));
let root = Object::Dictionary(dict);
let tree = NumberTree::parse(&root, &store, convert_int).unwrap();
assert_eq!(tree.get(1), Some(&11));
assert_eq!(tree.get(99), None);
}
}