Skip to main content

rpdfium_parser/
object_stream.rs

1// Derived from PDFium's cpdf_object_stream.cpp
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! Object stream (ObjStm) parsing.
7//!
8//! Object streams (PDF 1.5+) pack multiple non-stream objects into a single
9//! compressed stream. The stream contains a header with object numbers and
10//! byte offsets, followed by the objects themselves.
11
12use rpdfium_core::error::PdfError;
13use rpdfium_core::{Name, ParsingMode};
14
15use crate::object::Object;
16use crate::object_parser::parse_object;
17use crate::tokenizer::{Token, Tokenizer};
18
19/// Parsed contents of an object stream: a list of (object_number, object) pairs.
20pub struct ObjectStreamContents {
21    /// Objects extracted from the stream, indexed by their position.
22    /// The tuple is (object_number, parsed_object).
23    pub objects: Vec<(u32, Object)>,
24}
25
26/// Parse the decompressed contents of an object stream.
27///
28/// The stream dict must contain:
29/// - `/N`: number of objects in the stream
30/// - `/First`: byte offset of the first object data (past the header)
31///
32/// The stream data format is:
33/// ```text
34/// objnum1 offset1 objnum2 offset2 ... objnumN offsetN
35/// <object1_data> <object2_data> ... <objectN_data>
36/// ```
37///
38/// `data` is the decompressed stream bytes.
39/// `dict` is the stream's dictionary.
40pub fn parse_object_stream(
41    data: &[u8],
42    dict: &std::collections::HashMap<Name, Object>,
43    mode: ParsingMode,
44) -> Result<ObjectStreamContents, PdfError> {
45    // Extract /N (number of objects)
46    let n = match dict.get(&Name::n()) {
47        Some(Object::Integer(n)) if *n >= 0 => *n as usize,
48        _ => {
49            return Err(PdfError::InvalidObjectStream);
50        }
51    };
52
53    // Extract /First (byte offset to first object data)
54    let first = match dict.get(&Name::first()) {
55        Some(Object::Integer(f)) if *f >= 0 => *f as usize,
56        _ => {
57            return Err(PdfError::InvalidObjectStream);
58        }
59    };
60
61    if first > data.len() {
62        return Err(PdfError::InvalidObjectStream);
63    }
64
65    // Cap n to the data length: each header entry requires at least one byte,
66    // so n can never legitimately exceed data.len(). This prevents OOM from
67    // corrupt or malicious /N values before any parsing begins.
68    let n = n.min(data.len());
69
70    // Parse the header: N pairs of (object_number, byte_offset)
71    let mut tok = Tokenizer::new(data);
72    let mut header = Vec::with_capacity(n);
73
74    for _ in 0..n {
75        let obj_num = match tok.next_token() {
76            Some(Ok(Token::Integer(num))) if num >= 0 => num as u32,
77            _ => {
78                return Err(PdfError::InvalidObjectStream);
79            }
80        };
81
82        let offset = match tok.next_token() {
83            Some(Ok(Token::Integer(off))) if off >= 0 => off as usize,
84            _ => {
85                return Err(PdfError::InvalidObjectStream);
86            }
87        };
88
89        header.push((obj_num, offset));
90    }
91
92    // Parse each object from the data section
93    let obj_data = &data[first..];
94    let mut objects = Vec::with_capacity(n);
95
96    for (obj_num, offset) in &header {
97        if *offset >= obj_data.len() {
98            if mode == ParsingMode::Lenient {
99                tracing::warn!(
100                    object_number = obj_num,
101                    offset = offset,
102                    "object stream entry offset out of bounds, skipping"
103                );
104                objects.push((*obj_num, Object::Null));
105                continue;
106            }
107            return Err(PdfError::InvalidObjectStream);
108        }
109
110        match parse_object(obj_data, *offset as u64, mode) {
111            Ok(obj) => objects.push((*obj_num, obj)),
112            Err(e) => {
113                if mode == ParsingMode::Lenient {
114                    tracing::warn!(
115                        object_number = obj_num,
116                        error = ?e,
117                        "failed to parse object in object stream, substituting Null"
118                    );
119                    objects.push((*obj_num, Object::Null));
120                } else {
121                    return Err(e);
122                }
123            }
124        }
125    }
126
127    Ok(ObjectStreamContents { objects })
128}
129
130/// Get a specific object from a parsed object stream by its index.
131pub fn get_object_from_stream(contents: &ObjectStreamContents, index: u32) -> Option<&Object> {
132    contents.objects.get(index as usize).map(|(_, obj)| obj)
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138    use std::collections::HashMap;
139
140    #[test]
141    fn test_parse_simple_object_stream() {
142        // Two objects: obj 10 = integer 42, obj 11 = boolean true
143        // Header: "10 0 11 2 " (obj 10 at offset 0, obj 11 at offset 2 relative to /First)
144        // Data starts at /First=10: "42 true "
145        let data = b"10 0 11 3 42 true";
146        let mut dict = HashMap::new();
147        dict.insert(Name::n(), Object::Integer(2));
148        dict.insert(Name::first(), Object::Integer(10));
149
150        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
151        assert_eq!(contents.objects.len(), 2);
152
153        assert_eq!(contents.objects[0].0, 10);
154        assert_eq!(contents.objects[0].1.as_i64(), Some(42));
155
156        assert_eq!(contents.objects[1].0, 11);
157        assert_eq!(contents.objects[1].1.as_bool(), Some(true));
158    }
159
160    #[test]
161    fn test_get_object_by_index() {
162        let data = b"5 0 42";
163        let mut dict = HashMap::new();
164        dict.insert(Name::n(), Object::Integer(1));
165        dict.insert(Name::first(), Object::Integer(4));
166
167        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
168
169        let obj = get_object_from_stream(&contents, 0).unwrap();
170        assert_eq!(obj.as_i64(), Some(42));
171
172        assert!(get_object_from_stream(&contents, 1).is_none());
173    }
174
175    #[test]
176    fn test_missing_n_key() {
177        let data = b"";
178        let mut dict = HashMap::new();
179        dict.insert(Name::first(), Object::Integer(0));
180        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
181        assert!(result.is_err());
182    }
183
184    #[test]
185    fn test_missing_first_key() {
186        let data = b"";
187        let mut dict = HashMap::new();
188        dict.insert(Name::n(), Object::Integer(0));
189        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
190        assert!(result.is_err());
191    }
192
193    #[test]
194    fn test_empty_object_stream() {
195        let data = b"";
196        let mut dict = HashMap::new();
197        dict.insert(Name::n(), Object::Integer(0));
198        dict.insert(Name::first(), Object::Integer(0));
199
200        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
201        assert!(contents.objects.is_empty());
202    }
203
204    // -----------------------------------------------------------------------
205    // Upstream-derived object stream tests (cpdf_object_stream_unittest.cpp)
206    // -----------------------------------------------------------------------
207
208    /// Upstream: TEST(ObjectStreamTest, StreamDictNormal)
209    #[test]
210    fn test_object_stream_stream_dict_normal() {
211        // "10 0 11 14 12 21<</Name /Foo>>[1 2 3]4"
212        // /First=16, /N=3
213        let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
214        let mut dict = HashMap::new();
215        dict.insert(Name::n(), Object::Integer(3));
216        dict.insert(Name::first(), Object::Integer(16));
217
218        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
219        assert_eq!(contents.objects.len(), 3);
220
221        // Object 10: dictionary
222        assert_eq!(contents.objects[0].0, 10);
223        assert!(contents.objects[0].1.as_dict().is_some());
224
225        // Object 11: array
226        assert_eq!(contents.objects[1].0, 11);
227        assert!(contents.objects[1].1.as_array().is_some());
228
229        // Object 12: integer
230        assert_eq!(contents.objects[2].0, 12);
231        assert_eq!(contents.objects[2].1.as_i64(), Some(4));
232    }
233
234    /// Upstream: TEST(ObjectStreamTest, StreamDictNoCount)
235    #[test]
236    fn test_object_stream_stream_dict_no_count() {
237        let data = b"10 0 42";
238        let mut dict = HashMap::new();
239        dict.insert(Name::first(), Object::Integer(4));
240        // No /N key
241        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
242        assert!(result.is_err());
243    }
244
245    /// Upstream: TEST(ObjectStreamTest, StreamDictNegativeCount)
246    #[test]
247    fn test_object_stream_stream_dict_negative_count() {
248        let data = b"10 0 42";
249        let mut dict = HashMap::new();
250        dict.insert(Name::n(), Object::Integer(-1));
251        dict.insert(Name::first(), Object::Integer(4));
252        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
253        assert!(result.is_err());
254    }
255
256    /// Upstream: TEST(ObjectStreamTest, StreamDictFloatCount)
257    #[test]
258    fn test_object_stream_stream_dict_float_count() {
259        let data = b"10 0 42";
260        let mut dict = HashMap::new();
261        dict.insert(Name::n(), Object::Real(2.2));
262        dict.insert(Name::first(), Object::Integer(4));
263        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
264        assert!(result.is_err());
265    }
266
267    /// Upstream: TEST(ObjectStreamTest, StreamDictNoOffset)
268    #[test]
269    fn test_object_stream_stream_dict_no_offset() {
270        let data = b"10 0 42";
271        let mut dict = HashMap::new();
272        dict.insert(Name::n(), Object::Integer(1));
273        // No /First key
274        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
275        assert!(result.is_err());
276    }
277
278    /// Upstream: TEST(ObjectStreamTest, StreamDictNegativeOffset)
279    #[test]
280    fn test_object_stream_stream_dict_negative_offset() {
281        let data = b"10 0 42";
282        let mut dict = HashMap::new();
283        dict.insert(Name::n(), Object::Integer(1));
284        dict.insert(Name::first(), Object::Integer(-5));
285        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
286        assert!(result.is_err());
287    }
288
289    /// Upstream: TEST(ObjectStreamTest, StreamDictFloatOffset)
290    #[test]
291    fn test_object_stream_stream_dict_float_offset() {
292        let data = b"10 0 42";
293        let mut dict = HashMap::new();
294        dict.insert(Name::n(), Object::Integer(1));
295        dict.insert(Name::first(), Object::Real(5.5));
296        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
297        assert!(result.is_err());
298    }
299
300    /// Upstream: TEST(ObjectStreamTest, StreamDictOffsetTooBig)
301    #[test]
302    fn test_object_stream_stream_dict_offset_too_big() {
303        let data = b"10 0 42";
304        let mut dict = HashMap::new();
305        dict.insert(Name::n(), Object::Integer(1));
306        dict.insert(Name::first(), Object::Integer(999));
307        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
308        assert!(result.is_err());
309    }
310
311    /// Upstream: TEST(ObjectStreamTest, StreamDictTooFewCount)
312    #[test]
313    fn test_object_stream_stream_dict_too_few_count() {
314        let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
315        let mut dict = HashMap::new();
316        dict.insert(Name::n(), Object::Integer(2)); // Only 2 of 3
317        dict.insert(Name::first(), Object::Integer(16));
318
319        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
320        assert_eq!(contents.objects.len(), 2);
321        assert_eq!(contents.objects[0].0, 10);
322        assert_eq!(contents.objects[1].0, 11);
323    }
324
325    /// Upstream: TEST(ObjectStreamTest, StreamDictObjectOffsetTooBig)
326    #[test]
327    fn test_object_stream_stream_dict_object_offset_too_big_strict() {
328        // Object at offset 999 (way beyond data)
329        let data = b"10 0 11 999 42 true";
330        let mut dict = HashMap::new();
331        dict.insert(Name::n(), Object::Integer(2));
332        dict.insert(Name::first(), Object::Integer(12));
333
334        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
335        // Object 10 at offset 0 should parse fine, but object 11 at offset 999
336        // should fail since it's beyond the data
337        assert!(result.is_err());
338    }
339
340    /// Upstream: TEST(ObjectStreamTest, StreamDictObjectOffsetTooBig)
341    #[test]
342    fn test_object_stream_stream_dict_object_offset_too_big_lenient() {
343        let data = b"10 0 11 999 42 true";
344        let mut dict = HashMap::new();
345        dict.insert(Name::n(), Object::Integer(2));
346        dict.insert(Name::first(), Object::Integer(12));
347
348        let contents = parse_object_stream(data, &dict, ParsingMode::Lenient).unwrap();
349        assert_eq!(contents.objects.len(), 2);
350        assert_eq!(contents.objects[0].0, 10);
351        assert_eq!(contents.objects[0].1.as_i64(), Some(42));
352        // Object 11 should be Null (offset out of bounds)
353        assert_eq!(contents.objects[1].0, 11);
354        assert!(contents.objects[1].1.is_null());
355    }
356
357    /// Upstream: TEST(ObjectStreamTest, StreamDictGarbageObjNum)
358    #[test]
359    fn test_object_stream_stream_dict_garbage_obj_num() {
360        // "10 0 hi 14 12 21..." — "hi" is not a valid integer for 2nd obj number
361        let data = b"10 0 hi 14 12 21 42 true 99";
362        let mut dict = HashMap::new();
363        dict.insert(Name::n(), Object::Integer(3));
364        dict.insert(Name::first(), Object::Integer(19));
365
366        // Should fail in strict mode because header parsing expects integer pairs
367        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
368        assert!(result.is_err());
369    }
370
371    /// get_object_from_stream with index beyond bounds returns None.
372    #[test]
373    fn test_get_object_index_out_of_bounds() {
374        let data = b"5 0 42";
375        let mut dict = HashMap::new();
376        dict.insert(Name::n(), Object::Integer(1));
377        dict.insert(Name::first(), Object::Integer(4));
378
379        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
380
381        assert!(get_object_from_stream(&contents, 0).is_some());
382        assert!(get_object_from_stream(&contents, 1).is_none());
383        assert!(get_object_from_stream(&contents, 100).is_none());
384    }
385
386    /// Object stream with dict containing string value for /N (wrong type).
387    #[test]
388    fn test_string_n_is_error() {
389        let data = b"10 0 42";
390        let mut dict = HashMap::new();
391        dict.insert(
392            Name::n(),
393            Object::String(rpdfium_core::PdfString::from_bytes(b"3".to_vec())),
394        );
395        dict.insert(Name::first(), Object::Integer(4));
396        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
397        assert!(result.is_err());
398    }
399
400    /// Object stream with zero-length data and /N=0 is valid.
401    #[test]
402    fn test_zero_objects_zero_data() {
403        let data = b"";
404        let mut dict = HashMap::new();
405        dict.insert(Name::n(), Object::Integer(0));
406        dict.insert(Name::first(), Object::Integer(0));
407
408        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
409        assert!(contents.objects.is_empty());
410    }
411
412    /// Duplicate object numbers in header — both should parse successfully.
413    /// Upstream: TEST(ObjectStreamTest, StreamDictDuplicateObjNum)
414    #[test]
415    fn test_duplicate_object_numbers() {
416        // Two entries both claiming object number 10
417        let data = b"10 0 10 3 42 true";
418        let mut dict = HashMap::new();
419        dict.insert(Name::n(), Object::Integer(2));
420        dict.insert(Name::first(), Object::Integer(10));
421
422        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
423        assert_eq!(contents.objects.len(), 2);
424        assert_eq!(contents.objects[0].0, 10);
425        assert_eq!(contents.objects[0].1.as_i64(), Some(42));
426        assert_eq!(contents.objects[1].0, 10);
427        assert_eq!(contents.objects[1].1.as_bool(), Some(true));
428    }
429
430    /// /N=1000 but only 2 header entries in the data — should parse what's available.
431    /// Upstream: TEST(ObjectStreamTest, StreamDictTooManyObject)
432    #[test]
433    fn test_very_large_n_limited_by_data() {
434        // Header has only 2 entries, but /N claims 1000
435        let data = b"5 0 6 3 42 true";
436        let mut dict = HashMap::new();
437        dict.insert(Name::n(), Object::Integer(1000));
438        dict.insert(Name::first(), Object::Integer(10));
439
440        // In strict mode, parsing the 3rd header entry will fail (no more tokens)
441        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
442        assert!(result.is_err());
443    }
444
445    /// Non-ascending offsets in header — both should parse correctly.
446    /// Upstream: TEST(ObjectStreamTest, StreamDictUnorderedObjectOffsets)
447    #[test]
448    fn test_unordered_offsets() {
449        // Object 10 at offset 5, object 11 at offset 0 — reversed order
450        let data = b"10 5 11 0 true 42";
451        let mut dict = HashMap::new();
452        dict.insert(Name::n(), Object::Integer(2));
453        dict.insert(Name::first(), Object::Integer(10));
454
455        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
456        assert_eq!(contents.objects.len(), 2);
457        // Object 10 at offset 5 in obj_data
458        assert_eq!(contents.objects[0].0, 10);
459        // Object 11 at offset 0 in obj_data
460        assert_eq!(contents.objects[1].0, 11);
461    }
462
463    /// /N=0 with non-empty data — should produce empty result.
464    #[test]
465    fn test_n_zero_with_data() {
466        let data = b"10 0 42";
467        let mut dict = HashMap::new();
468        dict.insert(Name::n(), Object::Integer(0));
469        dict.insert(Name::first(), Object::Integer(4));
470
471        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
472        assert!(contents.objects.is_empty());
473    }
474
475    /// Upstream: TEST(ObjectStreamTest, StreamEmptyDict)
476    ///
477    /// Empty dictionary (no /Type, /N, or /First keys).
478    /// Upstream rejects via CPDF_ObjectStream::Create() due to missing /Type.
479    /// In rpdfium, parse_object_stream rejects due to missing /N.
480    #[test]
481    fn test_object_stream_stream_empty_dict() {
482        let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
483        let dict = HashMap::new();
484        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
485        assert!(result.is_err());
486    }
487
488    /// Upstream: TEST(ObjectStreamTest, StreamDictNoType)
489    ///
490    /// Dict has /N and /First but no /Type key.
491    /// Upstream rejects via CPDF_ObjectStream::Create() which requires /Type = /ObjStm.
492    /// In rpdfium, /Type validation is done at a higher level (stream loading),
493    /// so parse_object_stream succeeds when /N and /First are valid.
494    #[test]
495    fn test_object_stream_stream_dict_no_type() {
496        let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
497        let mut dict = HashMap::new();
498        dict.insert(Name::n(), Object::Integer(3));
499        dict.insert(Name::first(), Object::Integer(16));
500        // No /Type key — parse_object_stream does not validate /Type
501        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
502        assert_eq!(contents.objects.len(), 3);
503    }
504
505    /// Upstream: TEST(ObjectStreamTest, StreamDictWrongType)
506    ///
507    /// Dict has /Type as a String "ObjStm" (not a Name).
508    /// Upstream rejects because GetNameFor("Type") returns empty for a String.
509    /// In rpdfium, parse_object_stream does not validate /Type at all.
510    #[test]
511    fn test_object_stream_stream_dict_wrong_type() {
512        let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
513        let mut dict = HashMap::new();
514        dict.insert(
515            Name::from_bytes(b"Type".to_vec()),
516            Object::String(rpdfium_core::PdfString::from_bytes(b"ObjStm".to_vec())),
517        );
518        dict.insert(Name::n(), Object::Integer(3));
519        dict.insert(Name::first(), Object::Integer(16));
520        // parse_object_stream does not check /Type — succeeds
521        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
522        assert_eq!(contents.objects.len(), 3);
523    }
524
525    /// Upstream: TEST(ObjectStreamTest, StreamDictWrongTypeValue)
526    ///
527    /// Dict has /Type as Name "ObjStmmmm" (wrong value).
528    /// Upstream rejects because type value != "ObjStm".
529    /// In rpdfium, parse_object_stream does not validate /Type at all.
530    #[test]
531    fn test_object_stream_stream_dict_wrong_type_value() {
532        let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
533        let mut dict = HashMap::new();
534        dict.insert(
535            Name::from_bytes(b"Type".to_vec()),
536            Object::Name(Name::from_bytes(b"ObjStmmmm".to_vec())),
537        );
538        dict.insert(Name::n(), Object::Integer(3));
539        dict.insert(Name::first(), Object::Integer(16));
540        // parse_object_stream does not check /Type — succeeds
541        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
542        assert_eq!(contents.objects.len(), 3);
543    }
544
545    /// Upstream: TEST(ObjectStreamTest, StreamDictCountTooBig)
546    ///
547    /// /N = 999999999 but stream data only has a few bytes of header.
548    /// Fails because we cannot read 999999999 header (obj_num, offset) pairs.
549    #[test]
550    fn test_object_stream_stream_dict_count_too_big() {
551        let data = b"10 0 11 14 12 21<</Name /Foo>>[1 2 3]4";
552        let mut dict = HashMap::new();
553        dict.insert(Name::n(), Object::Integer(999999999));
554        dict.insert(Name::first(), Object::Integer(16));
555        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
556        assert!(result.is_err());
557    }
558
559    /// Upstream: TEST(ObjectStreamTest, StreamDictNegativeObjectOffset)
560    ///
561    /// Negative offset (-1) in the object metadata is rejected by the parser
562    /// because the tokenizer reads it as a negative integer, which fails the
563    /// `off >= 0` check. In upstream PDFium, -1 wraps to u32::MAX (4294967295).
564    #[test]
565    fn test_object_stream_stream_dict_negative_object_offset() {
566        // "10 0 11 -1 12 21..." — second object has negative offset
567        let data = b"10 0 11 -1 12 21<</Name /Foo>>[1 2 3]4";
568        let mut dict = HashMap::new();
569        dict.insert(Name::n(), Object::Integer(3));
570        dict.insert(Name::first(), Object::Integer(16));
571
572        // In strict mode, negative offset fails header parsing
573        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
574        assert!(result.is_err());
575    }
576
577    /// Upstream: TEST(ObjectStreamTest, StreamDictUnorderedObjectNumbers)
578    ///
579    /// Object numbers in the header are not in ascending order (11, 12, 10).
580    /// ISO 32000-1:2008 section 7.5.7, note 6 says there is no restriction
581    /// on object number ordering. The parser should handle this correctly.
582    #[test]
583    fn test_object_stream_stream_dict_unordered_object_numbers() {
584        // Header: obj 11 at offset 0, obj 12 at offset 14, obj 10 at offset 21
585        let data = b"11 0 12 14 10 21<</Name /Foo>>[1 2 3]4";
586        let mut dict = HashMap::new();
587        dict.insert(Name::n(), Object::Integer(3));
588        dict.insert(Name::first(), Object::Integer(16));
589
590        let contents = parse_object_stream(data, &dict, ParsingMode::Strict).unwrap();
591        assert_eq!(contents.objects.len(), 3);
592
593        // Object 11: dictionary (at offset 0)
594        assert_eq!(contents.objects[0].0, 11);
595        assert!(contents.objects[0].1.as_dict().is_some());
596
597        // Object 12: array (at offset 14)
598        assert_eq!(contents.objects[1].0, 12);
599        assert!(contents.objects[1].1.as_array().is_some());
600
601        // Object 10: integer 4 (at offset 21)
602        assert_eq!(contents.objects[2].0, 10);
603        assert_eq!(contents.objects[2].1.as_i64(), Some(4));
604    }
605
606    /// /First equals data length — obj_data is empty, so all offsets are out of bounds.
607    /// Upstream: TEST(ObjectStreamTest, StreamDictGarbageObjectOffset)
608    #[test]
609    fn test_first_equals_data_length() {
610        let data = b"5 0 42"; // length = 6
611        let mut dict = HashMap::new();
612        dict.insert(Name::n(), Object::Integer(1));
613        dict.insert(Name::first(), Object::Integer(6)); // /First = data.len()
614
615        // Object at offset 0 in empty obj_data — out of bounds
616        let result = parse_object_stream(data, &dict, ParsingMode::Strict);
617        assert!(result.is_err());
618
619        // In lenient mode, should substitute Null
620        let contents = parse_object_stream(data, &dict, ParsingMode::Lenient).unwrap();
621        assert_eq!(contents.objects.len(), 1);
622        assert!(contents.objects[0].1.is_null());
623    }
624}