1use rpdfium_core::error::PdfError;
13use rpdfium_core::{Name, ParsingMode};
14
15use crate::object::Object;
16use crate::object_parser::parse_object;
17use crate::tokenizer::{Token, Tokenizer};
18
19pub struct ObjectStreamContents {
21 pub objects: Vec<(u32, Object)>,
24}
25
26pub fn parse_object_stream(
41 data: &[u8],
42 dict: &std::collections::HashMap<Name, Object>,
43 mode: ParsingMode,
44) -> Result<ObjectStreamContents, PdfError> {
45 let n = match dict.get(&Name::n()) {
47 Some(Object::Integer(n)) if *n >= 0 => *n as usize,
48 _ => {
49 return Err(PdfError::InvalidObjectStream);
50 }
51 };
52
53 let first = match dict.get(&Name::first()) {
55 Some(Object::Integer(f)) if *f >= 0 => *f as usize,
56 _ => {
57 return Err(PdfError::InvalidObjectStream);
58 }
59 };
60
61 if first > data.len() {
62 return Err(PdfError::InvalidObjectStream);
63 }
64
65 let n = n.min(data.len());
69
70 let mut tok = Tokenizer::new(data);
72 let mut header = Vec::with_capacity(n);
73
74 for _ in 0..n {
75 let obj_num = match tok.next_token() {
76 Some(Ok(Token::Integer(num))) if num >= 0 => num as u32,
77 _ => {
78 return Err(PdfError::InvalidObjectStream);
79 }
80 };
81
82 let offset = match tok.next_token() {
83 Some(Ok(Token::Integer(off))) if off >= 0 => off as usize,
84 _ => {
85 return Err(PdfError::InvalidObjectStream);
86 }
87 };
88
89 header.push((obj_num, offset));
90 }
91
92 let obj_data = &data[first..];
94 let mut objects = Vec::with_capacity(n);
95
96 for (obj_num, offset) in &header {
97 if *offset >= obj_data.len() {
98 if mode == ParsingMode::Lenient {
99 tracing::warn!(
100 object_number = obj_num,
101 offset = offset,
102 "object stream entry offset out of bounds, skipping"
103 );
104 objects.push((*obj_num, Object::Null));
105 continue;
106 }
107 return Err(PdfError::InvalidObjectStream);
108 }
109
110 match parse_object(obj_data, *offset as u64, mode) {
111 Ok(obj) => objects.push((*obj_num, obj)),
112 Err(e) => {
113 if mode == ParsingMode::Lenient {
114 tracing::warn!(
115 object_number = obj_num,
116 error = ?e,
117 "failed to parse object in object stream, substituting Null"
118 );
119 objects.push((*obj_num, Object::Null));
120 } else {
121 return Err(e);
122 }
123 }
124 }
125 }
126
127 Ok(ObjectStreamContents { objects })
128}
129
130pub fn get_object_from_stream(contents: &ObjectStreamContents, index: u32) -> Option<&Object> {
132 contents.objects.get(index as usize).map(|(_, obj)| obj)
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138 use std::collections::HashMap;
139
140 #[test]
141 fn test_parse_simple_object_stream() {
142 let data = b"10 0 11 3 42 true";
146 let mut dict = HashMap::new();
147 dict.insert(Name::n(), Object::Integer(2));
148 dict.insert(Name::first(), Object::Integer(10));
149
150 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
151 assert_eq!(contents.objects.len(), 2);
152
153 assert_eq!(contents.objects[0].0, 10);
154 assert_eq!(contents.objects[0].1.as_i64(), Some(42));
155
156 assert_eq!(contents.objects[1].0, 11);
157 assert_eq!(contents.objects[1].1.as_bool(), Some(true));
158 }
159
160 #[test]
161 fn test_get_object_by_index() {
162 let data = b"5 0 42";
163 let mut dict = HashMap::new();
164 dict.insert(Name::n(), Object::Integer(1));
165 dict.insert(Name::first(), Object::Integer(4));
166
167 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
168
169 let obj = get_object_from_stream(&contents, 0).unwrap();
170 assert_eq!(obj.as_i64(), Some(42));
171
172 assert!(get_object_from_stream(&contents, 1).is_none());
173 }
174
175 #[test]
176 fn test_missing_n_key() {
177 let data = b"";
178 let mut dict = HashMap::new();
179 dict.insert(Name::first(), Object::Integer(0));
180 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
181 assert!(result.is_err());
182 }
183
184 #[test]
185 fn test_missing_first_key() {
186 let data = b"";
187 let mut dict = HashMap::new();
188 dict.insert(Name::n(), Object::Integer(0));
189 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
190 assert!(result.is_err());
191 }
192
193 #[test]
194 fn test_empty_object_stream() {
195 let data = b"";
196 let mut dict = HashMap::new();
197 dict.insert(Name::n(), Object::Integer(0));
198 dict.insert(Name::first(), Object::Integer(0));
199
200 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
201 assert!(contents.objects.is_empty());
202 }
203
204 #[test]
210 fn test_object_stream_stream_dict_normal() {
211 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
214 let mut dict = HashMap::new();
215 dict.insert(Name::n(), Object::Integer(3));
216 dict.insert(Name::first(), Object::Integer(16));
217
218 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
219 assert_eq!(contents.objects.len(), 3);
220
221 assert_eq!(contents.objects[0].0, 10);
223 assert!(contents.objects[0].1.as_dict().is_some());
224
225 assert_eq!(contents.objects[1].0, 11);
227 assert!(contents.objects[1].1.as_array().is_some());
228
229 assert_eq!(contents.objects[2].0, 12);
231 assert_eq!(contents.objects[2].1.as_i64(), Some(4));
232 }
233
234 #[test]
236 fn test_object_stream_stream_dict_no_count() {
237 let data = b"10 0 42";
238 let mut dict = HashMap::new();
239 dict.insert(Name::first(), Object::Integer(4));
240 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
242 assert!(result.is_err());
243 }
244
245 #[test]
247 fn test_object_stream_stream_dict_negative_count() {
248 let data = b"10 0 42";
249 let mut dict = HashMap::new();
250 dict.insert(Name::n(), Object::Integer(-1));
251 dict.insert(Name::first(), Object::Integer(4));
252 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
253 assert!(result.is_err());
254 }
255
256 #[test]
258 fn test_object_stream_stream_dict_float_count() {
259 let data = b"10 0 42";
260 let mut dict = HashMap::new();
261 dict.insert(Name::n(), Object::Real(2.2));
262 dict.insert(Name::first(), Object::Integer(4));
263 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
264 assert!(result.is_err());
265 }
266
267 #[test]
269 fn test_object_stream_stream_dict_no_offset() {
270 let data = b"10 0 42";
271 let mut dict = HashMap::new();
272 dict.insert(Name::n(), Object::Integer(1));
273 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
275 assert!(result.is_err());
276 }
277
278 #[test]
280 fn test_object_stream_stream_dict_negative_offset() {
281 let data = b"10 0 42";
282 let mut dict = HashMap::new();
283 dict.insert(Name::n(), Object::Integer(1));
284 dict.insert(Name::first(), Object::Integer(-5));
285 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
286 assert!(result.is_err());
287 }
288
289 #[test]
291 fn test_object_stream_stream_dict_float_offset() {
292 let data = b"10 0 42";
293 let mut dict = HashMap::new();
294 dict.insert(Name::n(), Object::Integer(1));
295 dict.insert(Name::first(), Object::Real(5.5));
296 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
297 assert!(result.is_err());
298 }
299
300 #[test]
302 fn test_object_stream_stream_dict_offset_too_big() {
303 let data = b"10 0 42";
304 let mut dict = HashMap::new();
305 dict.insert(Name::n(), Object::Integer(1));
306 dict.insert(Name::first(), Object::Integer(999));
307 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
308 assert!(result.is_err());
309 }
310
311 #[test]
313 fn test_object_stream_stream_dict_too_few_count() {
314 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
315 let mut dict = HashMap::new();
316 dict.insert(Name::n(), Object::Integer(2)); dict.insert(Name::first(), Object::Integer(16));
318
319 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
320 assert_eq!(contents.objects.len(), 2);
321 assert_eq!(contents.objects[0].0, 10);
322 assert_eq!(contents.objects[1].0, 11);
323 }
324
325 #[test]
327 fn test_object_stream_stream_dict_object_offset_too_big_strict() {
328 let data = b"10 0 11 999 42 true";
330 let mut dict = HashMap::new();
331 dict.insert(Name::n(), Object::Integer(2));
332 dict.insert(Name::first(), Object::Integer(12));
333
334 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
335 assert!(result.is_err());
338 }
339
340 #[test]
342 fn test_object_stream_stream_dict_object_offset_too_big_lenient() {
343 let data = b"10 0 11 999 42 true";
344 let mut dict = HashMap::new();
345 dict.insert(Name::n(), Object::Integer(2));
346 dict.insert(Name::first(), Object::Integer(12));
347
348 let contents = parse_object_stream(data, &dict, ParsingMode::Lenient).unwrap();
349 assert_eq!(contents.objects.len(), 2);
350 assert_eq!(contents.objects[0].0, 10);
351 assert_eq!(contents.objects[0].1.as_i64(), Some(42));
352 assert_eq!(contents.objects[1].0, 11);
354 assert!(contents.objects[1].1.is_null());
355 }
356
357 #[test]
359 fn test_object_stream_stream_dict_garbage_obj_num() {
360 let data = b"10 0 hi 14 12 21 42 true 99";
362 let mut dict = HashMap::new();
363 dict.insert(Name::n(), Object::Integer(3));
364 dict.insert(Name::first(), Object::Integer(19));
365
366 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
368 assert!(result.is_err());
369 }
370
371 #[test]
373 fn test_get_object_index_out_of_bounds() {
374 let data = b"5 0 42";
375 let mut dict = HashMap::new();
376 dict.insert(Name::n(), Object::Integer(1));
377 dict.insert(Name::first(), Object::Integer(4));
378
379 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
380
381 assert!(get_object_from_stream(&contents, 0).is_some());
382 assert!(get_object_from_stream(&contents, 1).is_none());
383 assert!(get_object_from_stream(&contents, 100).is_none());
384 }
385
386 #[test]
388 fn test_string_n_is_error() {
389 let data = b"10 0 42";
390 let mut dict = HashMap::new();
391 dict.insert(
392 Name::n(),
393 Object::String(rpdfium_core::PdfString::from_bytes(b"3".to_vec())),
394 );
395 dict.insert(Name::first(), Object::Integer(4));
396 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
397 assert!(result.is_err());
398 }
399
400 #[test]
402 fn test_zero_objects_zero_data() {
403 let data = b"";
404 let mut dict = HashMap::new();
405 dict.insert(Name::n(), Object::Integer(0));
406 dict.insert(Name::first(), Object::Integer(0));
407
408 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
409 assert!(contents.objects.is_empty());
410 }
411
412 #[test]
415 fn test_duplicate_object_numbers() {
416 let data = b"10 0 10 3 42 true";
418 let mut dict = HashMap::new();
419 dict.insert(Name::n(), Object::Integer(2));
420 dict.insert(Name::first(), Object::Integer(10));
421
422 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
423 assert_eq!(contents.objects.len(), 2);
424 assert_eq!(contents.objects[0].0, 10);
425 assert_eq!(contents.objects[0].1.as_i64(), Some(42));
426 assert_eq!(contents.objects[1].0, 10);
427 assert_eq!(contents.objects[1].1.as_bool(), Some(true));
428 }
429
430 #[test]
433 fn test_very_large_n_limited_by_data() {
434 let data = b"5 0 6 3 42 true";
436 let mut dict = HashMap::new();
437 dict.insert(Name::n(), Object::Integer(1000));
438 dict.insert(Name::first(), Object::Integer(10));
439
440 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
442 assert!(result.is_err());
443 }
444
445 #[test]
448 fn test_unordered_offsets() {
449 let data = b"10 5 11 0 true 42";
451 let mut dict = HashMap::new();
452 dict.insert(Name::n(), Object::Integer(2));
453 dict.insert(Name::first(), Object::Integer(10));
454
455 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
456 assert_eq!(contents.objects.len(), 2);
457 assert_eq!(contents.objects[0].0, 10);
459 assert_eq!(contents.objects[1].0, 11);
461 }
462
463 #[test]
465 fn test_n_zero_with_data() {
466 let data = b"10 0 42";
467 let mut dict = HashMap::new();
468 dict.insert(Name::n(), Object::Integer(0));
469 dict.insert(Name::first(), Object::Integer(4));
470
471 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
472 assert!(contents.objects.is_empty());
473 }
474
475 #[test]
481 fn test_object_stream_stream_empty_dict() {
482 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
483 let dict = HashMap::new();
484 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
485 assert!(result.is_err());
486 }
487
488 #[test]
495 fn test_object_stream_stream_dict_no_type() {
496 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
497 let mut dict = HashMap::new();
498 dict.insert(Name::n(), Object::Integer(3));
499 dict.insert(Name::first(), Object::Integer(16));
500 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
502 assert_eq!(contents.objects.len(), 3);
503 }
504
505 #[test]
511 fn test_object_stream_stream_dict_wrong_type() {
512 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
513 let mut dict = HashMap::new();
514 dict.insert(
515 Name::from_bytes(b"Type".to_vec()),
516 Object::String(rpdfium_core::PdfString::from_bytes(b"ObjStm".to_vec())),
517 );
518 dict.insert(Name::n(), Object::Integer(3));
519 dict.insert(Name::first(), Object::Integer(16));
520 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
522 assert_eq!(contents.objects.len(), 3);
523 }
524
525 #[test]
531 fn test_object_stream_stream_dict_wrong_type_value() {
532 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
533 let mut dict = HashMap::new();
534 dict.insert(
535 Name::from_bytes(b"Type".to_vec()),
536 Object::Name(Name::from_bytes(b"ObjStmmmm".to_vec())),
537 );
538 dict.insert(Name::n(), Object::Integer(3));
539 dict.insert(Name::first(), Object::Integer(16));
540 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
542 assert_eq!(contents.objects.len(), 3);
543 }
544
545 #[test]
550 fn test_object_stream_stream_dict_count_too_big() {
551 let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
552 let mut dict = HashMap::new();
553 dict.insert(Name::n(), Object::Integer(999999999));
554 dict.insert(Name::first(), Object::Integer(16));
555 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
556 assert!(result.is_err());
557 }
558
559 #[test]
565 fn test_object_stream_stream_dict_negative_object_offset() {
566 let data = b"10 0 11 -1 12 21<</Name /Foo>>[1 2 3]4";
568 let mut dict = HashMap::new();
569 dict.insert(Name::n(), Object::Integer(3));
570 dict.insert(Name::first(), Object::Integer(16));
571
572 let result = parse_object_stream(data, &dict, ParsingMode::Strict);
574 assert!(result.is_err());
575 }
576
577 #[test]
583 fn test_object_stream_stream_dict_unordered_object_numbers() {
584 let data = b"11 0 12 14 10 21<</Name /Foo>>[1 2 3]4";
586 let mut dict = HashMap::new();
587 dict.insert(Name::n(), Object::Integer(3));
588 dict.insert(Name::first(), Object::Integer(16));
589
590 let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
591 assert_eq!(contents.objects.len(), 3);
592
593 assert_eq!(contents.objects[0].0, 11);
595 assert!(contents.objects[0].1.as_dict().is_some());
596
597 assert_eq!(contents.objects[1].0, 12);
599 assert!(contents.objects[1].1.as_array().is_some());
600
601 assert_eq!(contents.objects[2].0, 10);
603 assert_eq!(contents.objects[2].1.as_i64(), Some(4));
604 }
605
606 #[test]
609 fn test_first_equals_data_length() {
610 let data = b"5 0 42"; let mut dict = HashMap::new();
612 dict.insert(Name::n(), Object::Integer(1));
613 dict.insert(Name::first(), Object::Integer(6)); let result = parse_object_stream(data, &dict, ParsingMode::Strict);
617 assert!(result.is_err());
618
619 let contents = parse_object_stream(data, &dict, ParsingMode::Lenient).unwrap();
621 assert_eq!(contents.objects.len(), 1);
622 assert!(contents.objects[0].1.is_null());
623 }
624}