1use crate::object::ObjectIdentifier;
2use crate::object::Stream;
3use crate::reader::ReaderContext;
4use crate::sync::HashMap;
5use crate::sync::{Arc, Mutex, MutexExt};
6use crate::util::SegmentList;
7use alloc::vec::Vec;
8use core::fmt::{Debug, Formatter};
9
10pub(crate) type ObjectStreamOffsets = Vec<(u32, usize)>;
16
17#[derive(Clone)]
19pub struct PdfData {
20 #[cfg(feature = "std")]
21 inner: Arc<dyn AsRef<[u8]> + Send + Sync>,
22 #[cfg(not(feature = "std"))]
23 inner: Arc<dyn AsRef<[u8]>>,
24}
25
26impl Debug for PdfData {
27 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
28 write!(f, "PdfData {{ ... }}")
29 }
30}
31
32impl AsRef<[u8]> for PdfData {
33 fn as_ref(&self) -> &[u8] {
34 (*self.inner).as_ref()
35 }
36}
37
38#[cfg(feature = "std")]
39impl<T: AsRef<[u8]> + Send + Sync + 'static> From<Arc<T>> for PdfData {
40 fn from(data: Arc<T>) -> Self {
41 Self { inner: data }
42 }
43}
44
45#[cfg(not(feature = "std"))]
46impl<T: AsRef<[u8]> + 'static> From<Arc<T>> for PdfData {
47 fn from(data: Arc<T>) -> Self {
48 Self { inner: data }
49 }
50}
51
52impl From<Vec<u8>> for PdfData {
53 fn from(data: Vec<u8>) -> Self {
54 Self {
55 inner: Arc::new(data),
56 }
57 }
58}
59
60pub(crate) struct Data {
72 data: PdfData,
73 decoded: SegmentList<Option<Vec<u8>>, 32>,
75 map: Mutex<HashMap<ObjectIdentifier, usize>>,
76 object_stream_offsets: Mutex<HashMap<ObjectIdentifier, Arc<ObjectStreamOffsets>>>,
83}
84
85impl Debug for Data {
86 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
87 write!(f, "Data {{ ... }}")
88 }
89}
90
91impl Data {
92 pub(crate) fn new(data: PdfData) -> Self {
94 Self {
95 data,
96 decoded: SegmentList::new(),
97 map: Mutex::new(HashMap::new()),
98 object_stream_offsets: Mutex::new(HashMap::new()),
99 }
100 }
101
102 pub(crate) fn get(&self) -> &PdfData {
104 &self.data
105 }
106
107 pub(crate) fn get_object_stream_offsets_or_init<F>(
123 &self,
124 id: ObjectIdentifier,
125 parse: F,
126 ) -> Option<Arc<ObjectStreamOffsets>>
127 where
128 F: FnOnce() -> Option<ObjectStreamOffsets>,
129 {
130 if let Some(hit) = self.object_stream_offsets.get().get(&id).cloned() {
131 return Some(hit);
132 }
133
134 let parsed = Arc::new(parse()?);
137
138 let mut locked = self.object_stream_offsets.get();
142 Some(locked.entry(id).or_insert(parsed).clone())
143 }
144
145 #[cfg(test)]
147 pub(crate) fn object_stream_offsets_cache_len(&self) -> usize {
148 self.object_stream_offsets.get().len()
149 }
150
151 pub(crate) fn get_with(&self, id: ObjectIdentifier, ctx: &ReaderContext<'_>) -> Option<&[u8]> {
153 if let Some(&idx) = self.map.get().get(&id) {
154 self.decoded.get(idx)?.as_deref()
155 } else {
156 let idx = {
158 let mut locked = self.map.get();
159 let idx = locked.len();
160 locked.insert(id, idx);
161 idx
162 };
163 self.decoded
164 .get_or_init(idx, || {
165 let stream = ctx.xref().get_with::<Stream<'_>>(id, ctx)?;
166 stream.decoded().ok()
167 })
168 .as_deref()
169 }
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use super::*;
184 use core::sync::atomic::{AtomicUsize, Ordering};
185
186 fn make_data() -> Data {
187 Data::new(PdfData::from(alloc::vec![0u8; 16]))
188 }
189
190 fn id(n: i32) -> ObjectIdentifier {
191 ObjectIdentifier::new(n, 0)
192 }
193
194 #[test]
195 fn qf2b_cache_miss_parses_once_and_returns_same_arc() {
196 let d = make_data();
197 let calls = AtomicUsize::new(0);
198
199 let a = d
200 .get_object_stream_offsets_or_init(id(7), || {
201 calls.fetch_add(1, Ordering::SeqCst);
202 Some(alloc::vec![(1, 10), (2, 20), (3, 30)])
203 })
204 .expect("first parse must succeed");
205
206 let b = d
208 .get_object_stream_offsets_or_init(id(7), || {
209 calls.fetch_add(1, Ordering::SeqCst);
210 Some(alloc::vec![(99, 99)])
211 })
212 .expect("cache hit must succeed");
213
214 assert_eq!(calls.load(Ordering::SeqCst), 1, "parse called exactly once");
215 assert!(Arc::ptr_eq(&a, &b), "cache returns identical Arc");
216 assert_eq!(&*a, &alloc::vec![(1u32, 10usize), (2, 20), (3, 30)]);
217 assert_eq!(d.object_stream_offsets_cache_len(), 1);
218 }
219
220 #[test]
221 fn qf2b_cache_miss_with_none_does_not_poison() {
222 let d = make_data();
223 let first = d.get_object_stream_offsets_or_init(id(9), || None);
224 assert!(first.is_none(), "first parse may legitimately fail");
225 assert_eq!(
226 d.object_stream_offsets_cache_len(),
227 0,
228 "failed parses must not pollute the cache"
229 );
230
231 let retry = d
233 .get_object_stream_offsets_or_init(id(9), || Some(alloc::vec![(5, 50)]))
234 .expect("retry after None must succeed");
235 assert_eq!(&*retry, &alloc::vec![(5u32, 50usize)]);
236 assert_eq!(d.object_stream_offsets_cache_len(), 1);
237 }
238
239 #[test]
240 fn qf2b_distinct_ids_are_isolated() {
241 let d = make_data();
242
243 let a = d
244 .get_object_stream_offsets_or_init(id(1), || Some(alloc::vec![(1, 10)]))
245 .unwrap();
246 let b = d
247 .get_object_stream_offsets_or_init(id(2), || Some(alloc::vec![(2, 20)]))
248 .unwrap();
249
250 assert!(!Arc::ptr_eq(&a, &b));
251 assert_eq!(&*a, &alloc::vec![(1u32, 10usize)]);
252 assert_eq!(&*b, &alloc::vec![(2u32, 20usize)]);
253 assert_eq!(d.object_stream_offsets_cache_len(), 2);
254 }
255
256 #[test]
257 fn qf2b_distinct_data_instances_do_not_share_cache() {
258 let d1 = make_data();
261 let d2 = make_data();
262
263 let _ = d1
264 .get_object_stream_offsets_or_init(id(7), || Some(alloc::vec![(1, 10)]))
265 .unwrap();
266
267 let calls = AtomicUsize::new(0);
269 let _ = d2
270 .get_object_stream_offsets_or_init(id(7), || {
271 calls.fetch_add(1, Ordering::SeqCst);
272 Some(alloc::vec![(2, 20)])
273 })
274 .unwrap();
275
276 assert_eq!(
277 calls.load(Ordering::SeqCst),
278 1,
279 "d2 must invoke its own parse — no cross-document cache"
280 );
281
282 let again = d2
283 .get_object_stream_offsets_or_init(id(7), || {
284 calls.fetch_add(1, Ordering::SeqCst);
285 Some(alloc::vec![(3, 30)])
286 })
287 .unwrap();
288 assert_eq!(
289 calls.load(Ordering::SeqCst),
290 1,
291 "second lookup on d2 must hit the d2 cache"
292 );
293 assert_eq!(&*again, &alloc::vec![(2u32, 20usize)]);
294 }
295
296 #[test]
297 fn qf2b_cache_drops_with_data() {
298 let arc_after_drop = {
302 let d = make_data();
303 let inner = d
304 .get_object_stream_offsets_or_init(id(1), || Some(alloc::vec![(1, 10)]))
305 .unwrap();
306 assert!(Arc::strong_count(&inner) >= 2, "cache + caller refs");
307 inner
308 };
309 assert_eq!(Arc::strong_count(&arc_after_drop), 1);
311 }
312}