1use rpdfium_core::error::PdfError;
13use rpdfium_core::{Name, ParsingMode};
14
15use crate::object::Object;
16use crate::object_parser::parse_object;
17use crate::tokenizer::{Token, Tokenizer};
18
19pub struct ObjectStreamContents {
21 pub objects: Vec<(u32, Object)>,
24}
25
26pub fn parse_object_stream(
41 data: &[u8],
42 dict: &std::collections::HashMap<Name, Object>,
43 mode: ParsingMode,
44) -> Result<ObjectStreamContents, PdfError> {
45 let n = match dict.get(&Name::n()) {
47 Some(Object::Integer(n)) if *n >= 0 => *n as usize,
48 _ => {
49 return Err(PdfError::InvalidObjectStream);
50 }
51 };
52
53 let first = match dict.get(&Name::first()) {
55 Some(Object::Integer(f)) if *f >= 0 => *f as usize,
56 _ => {
57 return Err(PdfError::InvalidObjectStream);
58 }
59 };
60
61 if first > data.len() {
62 return Err(PdfError::InvalidObjectStream);
63 }
64
65 let mut tok = Tokenizer::new(data);
67 let mut header = Vec::with_capacity(n);
68
69 for _ in 0..n {
70 let obj_num = match tok.next_token() {
71 Some(Ok(Token::Integer(num))) if num >= 0 => num as u32,
72 _ => {
73 return Err(PdfError::InvalidObjectStream);
74 }
75 };
76
77 let offset = match tok.next_token() {
78 Some(Ok(Token::Integer(off))) if off >= 0 => off as usize,
79 _ => {
80 return Err(PdfError::InvalidObjectStream);
81 }
82 };
83
84 header.push((obj_num, offset));
85 }
86
87 let obj_data = &data[first..];
89 let mut objects = Vec::with_capacity(n);
90
91 for (obj_num, offset) in &header {
92 if *offset >= obj_data.len() {
93 if mode == ParsingMode::Lenient {
94 tracing::warn!(
95 object_number = obj_num,
96 offset = offset,
97 "object stream entry offset out of bounds, skipping"
98 );
99 objects.push((*obj_num, Object::Null));
100 continue;
101 }
102 return Err(PdfError::InvalidObjectStream);
103 }
104
105 match parse_object(obj_data, *offset as u64, mode) {
106 Ok(obj) => objects.push((*obj_num, obj)),
107 Err(e) => {
108 if mode == ParsingMode::Lenient {
109 tracing::warn!(
110 object_number = obj_num,
111 error = ?e,
112 "failed to parse object in object stream, substituting Null"
113 );
114 objects.push((*obj_num, Object::Null));
115 } else {
116 return Err(e);
117 }
118 }
119 }
120 }
121
122 Ok(ObjectStreamContents { objects })
123}
124
125pub fn get_object_from_stream(contents: &ObjectStreamContents, index: u32) -> Option<&Object> {
127 contents.objects.get(index as usize).map(|(_, obj)| obj)
128}
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133 use std::collections::HashMap;
134
135 #[test]
136 fn parse_simple_object_stream() {
137 let data = b"10 0 11 3 42 true";
141 let mut dict = HashMap::new();
142 dict.insert(Name::n(), Object::Integer(2));
143 dict.insert(Name::first(), Object::Integer(10));
144
145 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
146 assert_eq!(contents.objects.len(), 2);
147
148 assert_eq!(contents.objects[0].0, 10);
149 assert_eq!(contents.objects[0].1.as_i64(), Some(42));
150
151 assert_eq!(contents.objects[1].0, 11);
152 assert_eq!(contents.objects[1].1.as_bool(), Some(true));
153 }
154
155 #[test]
156 fn get_object_by_index() {
157 let data = b"5 0 42";
158 let mut dict = HashMap::new();
159 dict.insert(Name::n(), Object::Integer(1));
160 dict.insert(Name::first(), Object::Integer(4));
161
162 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
163
164 let obj = get_object_from_stream(&contents, 0).unwrap();
165 assert_eq!(obj.as_i64(), Some(42));
166
167 assert!(get_object_from_stream(&contents, 1).is_none());
168 }
169
170 #[test]
171 fn missing_n_key() {
172 let data = b"";
173 let mut dict = HashMap::new();
174 dict.insert(Name::first(), Object::Integer(0));
175 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
176 assert!(result.is_err());
177 }
178
179 #[test]
180 fn missing_first_key() {
181 let data = b"";
182 let mut dict = HashMap::new();
183 dict.insert(Name::n(), Object::Integer(0));
184 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
185 assert!(result.is_err());
186 }
187
188 #[test]
189 fn empty_object_stream() {
190 let data = b"";
191 let mut dict = HashMap::new();
192 dict.insert(Name::n(), Object::Integer(0));
193 dict.insert(Name::first(), Object::Integer(0));
194
195 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
196 assert!(contents.objects.is_empty());
197 }
198
199 #[test]
205 fn parse_normal_three_objects() {
206 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
209 let mut dict = HashMap::new();
210 dict.insert(Name::n(), Object::Integer(3));
211 dict.insert(Name::first(), Object::Integer(16));
212
213 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
214 assert_eq!(contents.objects.len(), 3);
215
216 assert_eq!(contents.objects[0].0, 10);
218 assert!(contents.objects[0].1.as_dict().is_some());
219
220 assert_eq!(contents.objects[1].0, 11);
222 assert!(contents.objects[1].1.as_array().is_some());
223
224 assert_eq!(contents.objects[2].0, 12);
226 assert_eq!(contents.objects[2].1.as_i64(), Some(4));
227 }
228
229 #[test]
231 fn missing_n_key_is_error() {
232 let data = b"10 0 42";
233 let mut dict = HashMap::new();
234 dict.insert(Name::first(), Object::Integer(4));
235 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
237 assert!(result.is_err());
238 }
239
240 #[test]
242 fn negative_n_is_error() {
243 let data = b"10 0 42";
244 let mut dict = HashMap::new();
245 dict.insert(Name::n(), Object::Integer(-1));
246 dict.insert(Name::first(), Object::Integer(4));
247 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
248 assert!(result.is_err());
249 }
250
251 #[test]
253 fn float_n_is_error() {
254 let data = b"10 0 42";
255 let mut dict = HashMap::new();
256 dict.insert(Name::n(), Object::Real(2.2));
257 dict.insert(Name::first(), Object::Integer(4));
258 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
259 assert!(result.is_err());
260 }
261
262 #[test]
264 fn missing_first_key_is_error() {
265 let data = b"10 0 42";
266 let mut dict = HashMap::new();
267 dict.insert(Name::n(), Object::Integer(1));
268 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
270 assert!(result.is_err());
271 }
272
273 #[test]
275 fn negative_first_is_error() {
276 let data = b"10 0 42";
277 let mut dict = HashMap::new();
278 dict.insert(Name::n(), Object::Integer(1));
279 dict.insert(Name::first(), Object::Integer(-5));
280 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
281 assert!(result.is_err());
282 }
283
284 #[test]
286 fn float_first_is_error() {
287 let data = b"10 0 42";
288 let mut dict = HashMap::new();
289 dict.insert(Name::n(), Object::Integer(1));
290 dict.insert(Name::first(), Object::Real(5.5));
291 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
292 assert!(result.is_err());
293 }
294
295 #[test]
297 fn first_beyond_data_is_error() {
298 let data = b"10 0 42";
299 let mut dict = HashMap::new();
300 dict.insert(Name::n(), Object::Integer(1));
301 dict.insert(Name::first(), Object::Integer(999));
302 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
303 assert!(result.is_err());
304 }
305
306 #[test]
309 fn too_few_count_parses_subset() {
310 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
311 let mut dict = HashMap::new();
312 dict.insert(Name::n(), Object::Integer(2)); dict.insert(Name::first(), Object::Integer(16));
314
315 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
316 assert_eq!(contents.objects.len(), 2);
317 assert_eq!(contents.objects[0].0, 10);
318 assert_eq!(contents.objects[1].0, 11);
319 }
320
321 #[test]
324 fn object_offset_too_big_strict() {
325 let data = b"10 0 11 999 42 true";
327 let mut dict = HashMap::new();
328 dict.insert(Name::n(), Object::Integer(2));
329 dict.insert(Name::first(), Object::Integer(12));
330
331 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
332 assert!(result.is_err());
335 }
336
337 #[test]
339 fn object_offset_too_big_lenient() {
340 let data = b"10 0 11 999 42 true";
341 let mut dict = HashMap::new();
342 dict.insert(Name::n(), Object::Integer(2));
343 dict.insert(Name::first(), Object::Integer(12));
344
345 let contents = parse_object_stream(data, &dict, ParsingMode::Lenient).unwrap();
346 assert_eq!(contents.objects.len(), 2);
347 assert_eq!(contents.objects[0].0, 10);
348 assert_eq!(contents.objects[0].1.as_i64(), Some(42));
349 assert_eq!(contents.objects[1].0, 11);
351 assert!(contents.objects[1].1.is_null());
352 }
353
354 #[test]
356 fn garbage_in_header_obj_num() {
357 let data = b"10 0 hi 14 12 21 42 true 99";
359 let mut dict = HashMap::new();
360 dict.insert(Name::n(), Object::Integer(3));
361 dict.insert(Name::first(), Object::Integer(19));
362
363 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
365 assert!(result.is_err());
366 }
367
368 #[test]
370 fn get_object_index_out_of_bounds() {
371 let data = b"5 0 42";
372 let mut dict = HashMap::new();
373 dict.insert(Name::n(), Object::Integer(1));
374 dict.insert(Name::first(), Object::Integer(4));
375
376 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
377
378 assert!(get_object_from_stream(&contents, 0).is_some());
379 assert!(get_object_from_stream(&contents, 1).is_none());
380 assert!(get_object_from_stream(&contents, 100).is_none());
381 }
382
383 #[test]
385 fn string_n_is_error() {
386 let data = b"10 0 42";
387 let mut dict = HashMap::new();
388 dict.insert(
389 Name::n(),
390 Object::String(rpdfium_core::PdfString::from_bytes(b"3".to_vec())),
391 );
392 dict.insert(Name::first(), Object::Integer(4));
393 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
394 assert!(result.is_err());
395 }
396
397 #[test]
399 fn zero_objects_zero_data() {
400 let data = b"";
401 let mut dict = HashMap::new();
402 dict.insert(Name::n(), Object::Integer(0));
403 dict.insert(Name::first(), Object::Integer(0));
404
405 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
406 assert!(contents.objects.is_empty());
407 }
408
409 #[test]
411 fn duplicate_object_numbers() {
412 let data = b"10 0 10 3 42 true";
414 let mut dict = HashMap::new();
415 dict.insert(Name::n(), Object::Integer(2));
416 dict.insert(Name::first(), Object::Integer(10));
417
418 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
419 assert_eq!(contents.objects.len(), 2);
420 assert_eq!(contents.objects[0].0, 10);
421 assert_eq!(contents.objects[0].1.as_i64(), Some(42));
422 assert_eq!(contents.objects[1].0, 10);
423 assert_eq!(contents.objects[1].1.as_bool(), Some(true));
424 }
425
426 #[test]
428 fn very_large_n_limited_by_data() {
429 let data = b"5 0 6 3 42 true";
431 let mut dict = HashMap::new();
432 dict.insert(Name::n(), Object::Integer(1000));
433 dict.insert(Name::first(), Object::Integer(10));
434
435 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
437 assert!(result.is_err());
438 }
439
440 #[test]
442 fn unordered_offsets() {
443 let data = b"10 5 11 0 true 42";
445 let mut dict = HashMap::new();
446 dict.insert(Name::n(), Object::Integer(2));
447 dict.insert(Name::first(), Object::Integer(10));
448
449 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
450 assert_eq!(contents.objects.len(), 2);
451 assert_eq!(contents.objects[0].0, 10);
453 assert_eq!(contents.objects[1].0, 11);
455 }
456
457 #[test]
459 fn n_zero_with_data() {
460 let data = b"10 0 42";
461 let mut dict = HashMap::new();
462 dict.insert(Name::n(), Object::Integer(0));
463 dict.insert(Name::first(), Object::Integer(4));
464
465 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
466 assert!(contents.objects.is_empty());
467 }
468
469 #[test]
471 fn first_equals_data_length() {
472 let data = b"5 0 42"; let mut dict = HashMap::new();
474 dict.insert(Name::n(), Object::Integer(1));
475 dict.insert(Name::first(), Object::Integer(6)); let result = parse_object_stream(data, &dict, ParsingMode::Strict);
479 assert!(result.is_err());
480
481 let contents = parse_object_stream(data, &dict, ParsingMode::Lenient).unwrap();
483 assert_eq!(contents.objects.len(), 1);
484 assert!(contents.objects[0].1.is_null());
485 }
486}