use crate::object::ObjectIdentifier;
use crate::object::Stream;
use crate::reader::ReaderContext;
use crate::sync::HashMap;
use crate::sync::{Arc, Mutex, MutexExt};
use crate::util::SegmentList;
use alloc::vec::Vec;
use core::fmt::{Debug, Formatter};
pub(crate) type ObjectStreamOffsets = Vec<(u32, usize)>;
#[derive(Clone)]
pub struct PdfData {
#[cfg(feature = "std")]
inner: Arc<dyn AsRef<[u8]> + Send + Sync>,
#[cfg(not(feature = "std"))]
inner: Arc<dyn AsRef<[u8]>>,
}
impl Debug for PdfData {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "PdfData {{ ... }}")
}
}
impl AsRef<[u8]> for PdfData {
fn as_ref(&self) -> &[u8] {
(*self.inner).as_ref()
}
}
#[cfg(feature = "std")]
impl<T: AsRef<[u8]> + Send + Sync + 'static> From<Arc<T>> for PdfData {
fn from(data: Arc<T>) -> Self {
Self { inner: data }
}
}
#[cfg(not(feature = "std"))]
impl<T: AsRef<[u8]> + 'static> From<Arc<T>> for PdfData {
fn from(data: Arc<T>) -> Self {
Self { inner: data }
}
}
impl From<Vec<u8>> for PdfData {
fn from(data: Vec<u8>) -> Self {
Self {
inner: Arc::new(data),
}
}
}
pub(crate) struct Data {
data: PdfData,
decoded: SegmentList<Option<Vec<u8>>, 32>,
map: Mutex<HashMap<ObjectIdentifier, usize>>,
object_stream_offsets: Mutex<HashMap<ObjectIdentifier, Arc<ObjectStreamOffsets>>>,
}
impl Debug for Data {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "Data {{ ... }}")
}
}
impl Data {
pub(crate) fn new(data: PdfData) -> Self {
Self {
data,
decoded: SegmentList::new(),
map: Mutex::new(HashMap::new()),
object_stream_offsets: Mutex::new(HashMap::new()),
}
}
pub(crate) fn get(&self) -> &PdfData {
&self.data
}
pub(crate) fn get_object_stream_offsets_or_init<F>(
&self,
id: ObjectIdentifier,
parse: F,
) -> Option<Arc<ObjectStreamOffsets>>
where
F: FnOnce() -> Option<ObjectStreamOffsets>,
{
if let Some(hit) = self.object_stream_offsets.get().get(&id).cloned() {
return Some(hit);
}
let parsed = Arc::new(parse()?);
let mut locked = self.object_stream_offsets.get();
Some(locked.entry(id).or_insert(parsed).clone())
}
#[cfg(test)]
pub(crate) fn object_stream_offsets_cache_len(&self) -> usize {
self.object_stream_offsets.get().len()
}
pub(crate) fn get_with(&self, id: ObjectIdentifier, ctx: &ReaderContext<'_>) -> Option<&[u8]> {
if let Some(&idx) = self.map.get().get(&id) {
self.decoded.get(idx)?.as_deref()
} else {
let idx = {
let mut locked = self.map.get();
let idx = locked.len();
locked.insert(id, idx);
idx
};
self.decoded
.get_or_init(idx, || {
let stream = ctx.xref().get_with::<Stream<'_>>(id, ctx)?;
stream.decoded().ok()
})
.as_deref()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::sync::atomic::{AtomicUsize, Ordering};
fn make_data() -> Data {
Data::new(PdfData::from(alloc::vec![0u8; 16]))
}
fn id(n: i32) -> ObjectIdentifier {
ObjectIdentifier::new(n, 0)
}
#[test]
fn qf2b_cache_miss_parses_once_and_returns_same_arc() {
let d = make_data();
let calls = AtomicUsize::new(0);
let a = d
.get_object_stream_offsets_or_init(id(7), || {
calls.fetch_add(1, Ordering::SeqCst);
Some(alloc::vec![(1, 10), (2, 20), (3, 30)])
})
.expect("first parse must succeed");
let b = d
.get_object_stream_offsets_or_init(id(7), || {
calls.fetch_add(1, Ordering::SeqCst);
Some(alloc::vec![(99, 99)])
})
.expect("cache hit must succeed");
assert_eq!(calls.load(Ordering::SeqCst), 1, "parse called exactly once");
assert!(Arc::ptr_eq(&a, &b), "cache returns identical Arc");
assert_eq!(&*a, &alloc::vec![(1u32, 10usize), (2, 20), (3, 30)]);
assert_eq!(d.object_stream_offsets_cache_len(), 1);
}
#[test]
fn qf2b_cache_miss_with_none_does_not_poison() {
let d = make_data();
let first = d.get_object_stream_offsets_or_init(id(9), || None);
assert!(first.is_none(), "first parse may legitimately fail");
assert_eq!(
d.object_stream_offsets_cache_len(),
0,
"failed parses must not pollute the cache"
);
let retry = d
.get_object_stream_offsets_or_init(id(9), || Some(alloc::vec![(5, 50)]))
.expect("retry after None must succeed");
assert_eq!(&*retry, &alloc::vec![(5u32, 50usize)]);
assert_eq!(d.object_stream_offsets_cache_len(), 1);
}
#[test]
fn qf2b_distinct_ids_are_isolated() {
let d = make_data();
let a = d
.get_object_stream_offsets_or_init(id(1), || Some(alloc::vec![(1, 10)]))
.unwrap();
let b = d
.get_object_stream_offsets_or_init(id(2), || Some(alloc::vec![(2, 20)]))
.unwrap();
assert!(!Arc::ptr_eq(&a, &b));
assert_eq!(&*a, &alloc::vec![(1u32, 10usize)]);
assert_eq!(&*b, &alloc::vec![(2u32, 20usize)]);
assert_eq!(d.object_stream_offsets_cache_len(), 2);
}
#[test]
fn qf2b_distinct_data_instances_do_not_share_cache() {
let d1 = make_data();
let d2 = make_data();
let _ = d1
.get_object_stream_offsets_or_init(id(7), || Some(alloc::vec![(1, 10)]))
.unwrap();
let calls = AtomicUsize::new(0);
let _ = d2
.get_object_stream_offsets_or_init(id(7), || {
calls.fetch_add(1, Ordering::SeqCst);
Some(alloc::vec![(2, 20)])
})
.unwrap();
assert_eq!(
calls.load(Ordering::SeqCst),
1,
"d2 must invoke its own parse — no cross-document cache"
);
let again = d2
.get_object_stream_offsets_or_init(id(7), || {
calls.fetch_add(1, Ordering::SeqCst);
Some(alloc::vec![(3, 30)])
})
.unwrap();
assert_eq!(
calls.load(Ordering::SeqCst),
1,
"second lookup on d2 must hit the d2 cache"
);
assert_eq!(&*again, &alloc::vec![(2u32, 20usize)]);
}
#[test]
fn qf2b_cache_drops_with_data() {
let arc_after_drop = {
let d = make_data();
let inner = d
.get_object_stream_offsets_or_init(id(1), || Some(alloc::vec![(1, 10)]))
.unwrap();
assert!(Arc::strong_count(&inner) >= 2, "cache + caller refs");
inner
};
assert_eq!(Arc::strong_count(&arc_after_drop), 1);
}
}