use std::collections::HashMap;
use crate::document::PdfDocument;
use crate::geometry::Rect;
use crate::object::{Object, ObjectRef};
#[derive(Debug, Clone, PartialEq)]
pub struct Bead {
pub page_index: usize,
pub rect: Rect,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ArticleThread {
pub title: Option<String>,
pub beads: Vec<Bead>,
}
const MAX_BEADS_PER_THREAD: usize = 4096;
fn resolve(doc: &PdfDocument, obj: &Object) -> Option<Object> {
match obj.as_reference() {
Some(r) => doc.load_object(r).ok(),
None => Some(obj.clone()),
}
}
fn parse_rect(arr: &[Object]) -> Option<Rect> {
if arr.len() != 4 {
return None;
}
let n = |o: &Object| -> Option<f32> {
o.as_real()
.map(|v| v as f32)
.or_else(|| o.as_integer().map(|v| v as f32))
};
let (llx, lly, urx, ury) = (n(&arr[0])?, n(&arr[1])?, n(&arr[2])?, n(&arr[3])?);
Some(Rect::from_points(llx, lly, urx, ury))
}
pub fn parse_article_threads(doc: &PdfDocument) -> Vec<ArticleThread> {
let Ok(catalog) = doc.catalog() else {
return Vec::new();
};
let Some(catalog_dict) = catalog.as_dict() else {
return Vec::new();
};
let Some(threads_obj) = catalog_dict.get("Threads") else {
return Vec::new();
};
let Some(threads_resolved) = resolve(doc, threads_obj) else {
return Vec::new();
};
let Some(threads_arr) = threads_resolved.as_array() else {
return Vec::new();
};
let page_index: HashMap<ObjectRef, usize> = doc
.all_page_refs()
.unwrap_or_default()
.into_iter()
.enumerate()
.map(|(i, r)| (r, i))
.collect();
let mut threads = Vec::new();
for thread_ref in threads_arr {
if let Some(thread) = parse_one_thread(doc, thread_ref, &page_index) {
if !thread.beads.is_empty() {
threads.push(thread);
}
}
}
threads
}
fn parse_one_thread(
doc: &PdfDocument,
thread_obj: &Object,
page_index: &HashMap<ObjectRef, usize>,
) -> Option<ArticleThread> {
let thread = resolve(doc, thread_obj)?;
let thread_dict = thread.as_dict()?;
let title = thread_dict
.get("I")
.and_then(|i| resolve(doc, i))
.and_then(|info| info.as_dict()?.get("Title").and_then(string_value));
let first_ref = thread_dict.get("F")?.as_reference()?;
let mut beads = Vec::new();
let mut seen = std::collections::HashSet::new();
let mut cur = Some(first_ref);
while let Some(bead_ref) = cur {
if !seen.insert(bead_ref) || beads.len() >= MAX_BEADS_PER_THREAD {
break; }
let Ok(bead_obj) = doc.load_object(bead_ref) else {
break;
};
let Some(bead_dict) = bead_obj.as_dict() else {
break;
};
if let Some(bead) = parse_bead(bead_dict, page_index) {
beads.push(bead);
}
cur = bead_dict.get("N").and_then(|n| n.as_reference());
}
Some(ArticleThread { title, beads })
}
fn parse_bead(
bead_dict: &HashMap<String, Object>,
page_index: &HashMap<ObjectRef, usize>,
) -> Option<Bead> {
let page_ref = bead_dict.get("P")?.as_reference()?;
let idx = *page_index.get(&page_ref)?;
let rect = parse_rect(bead_dict.get("R")?.as_array()?)?;
Some(Bead {
page_index: idx,
rect,
})
}
fn string_value(obj: &Object) -> Option<String> {
match obj {
Object::String(bytes) => Some(String::from_utf8_lossy(bytes).into_owned()),
_ => None,
}
}