Skip to main content

rpdfium_parser/
xref_stream.rs

1// Derived from PDFium's cpdf_cross_ref_table.cpp (xref stream parsing)
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! Cross-reference stream parsing (PDF 1.5+).
7//!
8//! PDF 1.5+ allows cross-reference data to be stored as a stream object
9//! instead of or in addition to the traditional `xref` table. The stream
10//! dictionary also serves as the trailer dictionary.
11
12use std::collections::HashMap;
13
14use rpdfium_core::error::PdfError;
15use rpdfium_core::fx_system::MAX_OBJECT_NUMBER;
16use rpdfium_core::{Name, ParsingMode};
17
18use crate::object::{Object, ObjectId, StreamData};
19use crate::object_parser::parse_indirect_object;
20use crate::xref::{XrefEntry, XrefEntryType, XrefSection};
21
22/// Parse a cross-reference stream at the given offset.
23///
24/// Returns the xref section entries and the trailer dictionary
25/// (which is the stream's own dictionary).
26pub fn parse_xref_stream(
27    source: &[u8],
28    offset: u64,
29    mode: ParsingMode,
30) -> Result<(XrefSection, HashMap<Name, Object>), PdfError> {
31    // Parse the indirect object at this offset (it must be a stream)
32    let (_id, obj) = parse_indirect_object(source, offset, mode)?;
33
34    let (dict, data) = match obj {
35        Object::Stream { dict, data } => (dict, data),
36        _ => return Err(PdfError::InvalidXref),
37    };
38
39    // Extract the raw stream data, applying /Filter if present.
40    let decoded_buf;
41    let raw_data = match data {
42        StreamData::Raw {
43            offset: raw_offset,
44            length,
45        } => {
46            let start = raw_offset as usize;
47            let end = start + length as usize;
48            if end > source.len() {
49                return Err(PdfError::InvalidXref);
50            }
51            let raw_bytes = &source[start..end];
52            // Apply /Filter chain if present (e.g. FlateDecode on xref streams).
53            let filters = crate::filter::resolve_filter_chain(&dict);
54            if !filters.is_empty() {
55                decoded_buf = rpdfium_codec::apply_filter_chain(raw_bytes, &filters)
56                    .map_err(|_| PdfError::InvalidXref)?;
57                decoded_buf.as_slice()
58            } else {
59                raw_bytes
60            }
61        }
62        StreamData::Decoded { data: decoded } => {
63            decoded_buf = decoded;
64            decoded_buf.as_slice()
65        }
66    };
67
68    // Extract /W array — field widths [w1, w2, w3]
69    let w = extract_w_array(&dict)?;
70
71    // Extract /Size — total number of objects in the file
72    let size = match dict.get(&Name::size()) {
73        Some(Object::Integer(n)) if *n > 0 => *n as u64,
74        _ => return Err(PdfError::InvalidXref),
75    };
76
77    // Extract /Index array (optional) — subsection ranges
78    // Default: [0 Size]
79    let index_ranges = extract_index_ranges(&dict, size)?;
80
81    // Decode entries from the stream data
82    let entry_width = w[0] + w[1] + w[2];
83    if entry_width == 0 {
84        return Err(PdfError::InvalidXref);
85    }
86
87    let mut entries = Vec::new();
88
89    let mut data_pos = 0;
90    for range in &index_ranges {
91        let start_id = range.0;
92        let count = range.1;
93
94        for i in 0..count {
95            if data_pos + entry_width > raw_data.len() {
96                break;
97            }
98
99            let object_number = start_id + i as u64;
100            if object_number > MAX_OBJECT_NUMBER as u64 {
101                tracing::warn!(
102                    object_number = object_number,
103                    "xref stream entry exceeds MAX_OBJECT_NUMBER, skipping"
104                );
105                data_pos += entry_width;
106                continue;
107            }
108
109            let field1 = read_field(raw_data, data_pos, w[0]);
110            let field2 = read_field(raw_data, data_pos + w[0], w[1]);
111            let field3 = read_field(raw_data, data_pos + w[0] + w[1], w[2]);
112            data_pos += entry_width;
113
114            // Default type is 1 (in-use) if w[0] == 0
115            let entry_type_val = if w[0] == 0 { 1 } else { field1 };
116
117            let entry_type = match entry_type_val {
118                0 => XrefEntryType::Free,
119                1 => XrefEntryType::InUse { offset: field2 },
120                2 => XrefEntryType::InStream {
121                    stream_id: ObjectId::new(field2 as u32, 0),
122                    index: field3 as u32,
123                },
124                _ => {
125                    tracing::warn!(
126                        entry_type = entry_type_val,
127                        "unknown xref stream entry type, treating as free"
128                    );
129                    XrefEntryType::Free
130                }
131            };
132
133            let generation = match entry_type_val {
134                0 => field3 as u16, // free: field3 is next free object generation
135                1 => field3 as u16, // in-use: field3 is generation
136                _ => 0,
137            };
138
139            entries.push(XrefEntry {
140                id: ObjectId::new(object_number as u32, generation),
141                entry_type,
142            });
143        }
144    }
145
146    Ok((XrefSection { entries }, dict))
147}
148
149/// Extract the /W array from the stream dictionary.
150/// Returns [w1, w2, w3] as byte widths.
151fn extract_w_array(dict: &HashMap<Name, Object>) -> Result<[usize; 3], PdfError> {
152    let w_array = match dict.get(&Name::w()) {
153        Some(Object::Array(arr)) => arr,
154        _ => return Err(PdfError::InvalidXref),
155    };
156
157    if w_array.len() != 3 {
158        return Err(PdfError::InvalidXref);
159    }
160
161    let mut widths = [0usize; 3];
162    for (i, obj) in w_array.iter().enumerate() {
163        match obj {
164            Object::Integer(n) if *n >= 0 => widths[i] = *n as usize,
165            _ => return Err(PdfError::InvalidXref),
166        }
167    }
168
169    Ok(widths)
170}
171
172/// Extract the /Index array from the stream dictionary.
173/// Returns pairs of (start_id, count).
174/// Default: [(0, size)] if /Index is not present.
175fn extract_index_ranges(
176    dict: &HashMap<Name, Object>,
177    size: u64,
178) -> Result<Vec<(u64, usize)>, PdfError> {
179    match dict.get(&Name::index()) {
180        Some(Object::Array(arr)) => {
181            if arr.len() % 2 != 0 {
182                return Err(PdfError::InvalidXref);
183            }
184            let mut ranges = Vec::new();
185            for pair in arr.chunks(2) {
186                let start = match &pair[0] {
187                    Object::Integer(n) if *n >= 0 => *n as u64,
188                    _ => return Err(PdfError::InvalidXref),
189                };
190                let count = match &pair[1] {
191                    Object::Integer(n) if *n >= 0 => *n as usize,
192                    _ => return Err(PdfError::InvalidXref),
193                };
194                ranges.push((start, count));
195            }
196            Ok(ranges)
197        }
198        None => Ok(vec![(0, size as usize)]),
199        _ => Err(PdfError::InvalidXref),
200    }
201}
202
203/// Read a big-endian unsigned integer field of `width` bytes from `data` at `offset`.
204/// If `width` is 0, returns 0 (the default value).
205fn read_field(data: &[u8], offset: usize, width: usize) -> u64 {
206    if width == 0 {
207        return 0;
208    }
209
210    let mut value: u64 = 0;
211    for i in 0..width {
212        if offset + i < data.len() {
213            value = (value << 8) | data[offset + i] as u64;
214        }
215    }
216    value
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_read_field_1_byte() {
225        let data = [0xFF];
226        assert_eq!(read_field(&data, 0, 1), 255);
227    }
228
229    #[test]
230    fn test_read_field_2_bytes() {
231        let data = [0x01, 0x00];
232        assert_eq!(read_field(&data, 0, 2), 256);
233    }
234
235    #[test]
236    fn test_read_field_4_bytes() {
237        let data = [0x00, 0x00, 0x01, 0x00];
238        assert_eq!(read_field(&data, 0, 4), 256);
239    }
240
241    #[test]
242    fn test_read_field_zero_width() {
243        let data = [0xFF];
244        assert_eq!(read_field(&data, 0, 0), 0);
245    }
246
247    #[test]
248    fn test_extract_w_array_valid() {
249        let mut dict = HashMap::new();
250        dict.insert(
251            Name::w(),
252            Object::Array(vec![
253                Object::Integer(1),
254                Object::Integer(3),
255                Object::Integer(1),
256            ]),
257        );
258        let w = extract_w_array(&dict).unwrap();
259        assert_eq!(w, [1, 3, 1]);
260    }
261
262    #[test]
263    fn test_extract_w_array_missing() {
264        let dict = HashMap::new();
265        assert!(extract_w_array(&dict).is_err());
266    }
267
268    #[test]
269    fn test_extract_w_array_wrong_length() {
270        let mut dict = HashMap::new();
271        dict.insert(
272            Name::w(),
273            Object::Array(vec![Object::Integer(1), Object::Integer(2)]),
274        );
275        assert!(extract_w_array(&dict).is_err());
276    }
277
278    #[test]
279    fn test_extract_index_default() {
280        let dict = HashMap::new();
281        let ranges = extract_index_ranges(&dict, 5).unwrap();
282        assert_eq!(ranges, vec![(0, 5)]);
283    }
284
285    #[test]
286    fn test_extract_index_explicit() {
287        let mut dict = HashMap::new();
288        dict.insert(
289            Name::index(),
290            Object::Array(vec![
291                Object::Integer(0),
292                Object::Integer(3),
293                Object::Integer(10),
294                Object::Integer(2),
295            ]),
296        );
297        let ranges = extract_index_ranges(&dict, 5).unwrap();
298        assert_eq!(ranges, vec![(0, 3), (10, 2)]);
299    }
300
301    #[test]
302    fn test_extract_w_array_negative_component() {
303        let mut dict = HashMap::new();
304        dict.insert(
305            Name::w(),
306            Object::Array(vec![
307                Object::Integer(1),
308                Object::Integer(-1),
309                Object::Integer(1),
310            ]),
311        );
312        assert!(extract_w_array(&dict).is_err());
313    }
314
315    #[test]
316    fn test_extract_w_array_all_zero() {
317        let mut dict = HashMap::new();
318        dict.insert(
319            Name::w(),
320            Object::Array(vec![
321                Object::Integer(0),
322                Object::Integer(0),
323                Object::Integer(0),
324            ]),
325        );
326        let w = extract_w_array(&dict).unwrap();
327        assert_eq!(w, [0, 0, 0]);
328    }
329
330    #[test]
331    fn test_extract_w_array_non_integer() {
332        let mut dict = HashMap::new();
333        dict.insert(
334            Name::w(),
335            Object::Array(vec![
336                Object::Integer(1),
337                Object::String(rpdfium_core::PdfString::from_bytes(b"hello".to_vec())),
338                Object::Integer(1),
339            ]),
340        );
341        assert!(extract_w_array(&dict).is_err());
342    }
343
344    #[test]
345    fn test_size_zero_is_error() {
346        // Build a minimal xref stream with /Size 0
347        // Since parse_xref_stream requires a full indirect object, test the
348        // size validation logic directly: /Size must be > 0.
349        let mut dict = HashMap::new();
350        dict.insert(Name::size(), Object::Integer(0));
351        // The parse_xref_stream function checks:
352        //   Some(Object::Integer(n)) if *n > 0 => *n as u64
353        // So /Size=0 should fail to match the guard.
354        let size_val = match dict.get(&Name::size()) {
355            Some(Object::Integer(n)) if *n > 0 => Some(*n as u64),
356            _ => None,
357        };
358        assert!(size_val.is_none());
359    }
360
361    #[test]
362    fn test_size_negative_is_error() {
363        let mut dict = HashMap::new();
364        dict.insert(Name::size(), Object::Integer(-5));
365        let size_val = match dict.get(&Name::size()) {
366            Some(Object::Integer(n)) if *n > 0 => Some(*n as u64),
367            _ => None,
368        };
369        assert!(size_val.is_none());
370    }
371
372    #[test]
373    fn test_index_odd_length_is_error() {
374        let mut dict = HashMap::new();
375        dict.insert(
376            Name::index(),
377            Object::Array(vec![
378                Object::Integer(0),
379                Object::Integer(3),
380                Object::Integer(10),
381            ]),
382        );
383        assert!(extract_index_ranges(&dict, 5).is_err());
384    }
385
386    #[test]
387    fn test_index_negative_start_is_error() {
388        let mut dict = HashMap::new();
389        dict.insert(
390            Name::index(),
391            Object::Array(vec![Object::Integer(-1), Object::Integer(3)]),
392        );
393        assert!(extract_index_ranges(&dict, 5).is_err());
394    }
395
396    #[test]
397    fn test_read_field_short_data() {
398        // Data is 2 bytes but field width is 4 — should not panic, just read available bytes
399        let data = [0x01, 0x02];
400        let value = read_field(&data, 0, 4);
401        // Reads bytes at 0 and 1 (within bounds), skips bytes at 2 and 3 (out of bounds)
402        // value = (0x01 << 8) | 0x02 = 258, but the missing bytes contribute 0
403        assert_eq!(value, 0x0102);
404    }
405
406    // -----------------------------------------------------------------------
407    // Upstream-derived xref stream tests (cpdf_parser_unittest.cpp ParserXRefTest)
408    // These tests use ObjectStore::open which is the equivalent of
409    // CPDF_TestParser::StartParseInternal() in the C++ tests.
410    // -----------------------------------------------------------------------
411
412    use crate::store::ObjectStore;
413
414    /// Build a complete PDF with an xref stream from the given data template.
415    /// The data should be a complete PDF byte sequence including header,
416    /// xref stream object, startxref, and %%EOF.
417    fn open_xref_stream_pdf(
418        data: &[u8],
419    ) -> Result<ObjectStore<Vec<u8>>, rpdfium_core::error::PdfError> {
420        ObjectStore::open(data.to_vec(), rpdfium_core::ParsingMode::Lenient)
421    }
422
423    /// Upstream: TEST_F(ParserXRefTest, XrefObjectHighestIndex)
424    ///
425    /// The object number will reach `kMaxObjectNumber` (25165824).
426    /// This test verifies that the parser can handle the maximum valid object index.
427    #[test]
428    fn test_parser_xref_object_highest_index() {
429        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
430                     7 0 obj <<\n\
431                       /Filter /ASCIIHexDecode\n\
432                       /Index [25165824 1]\n\
433                       /Root 1 0 R\n\
434                       /Size 25165825\n\
435                       /W [1 1 1]\n\
436                     >>\n\
437                     stream\n\
438                     01 00 00\n\
439                     endstream\n\
440                     endobj\n\
441                     startxref\n\
442                     14\n\
443                     %%EOF\n";
444        // This may succeed or fail depending on how large object numbers are handled.
445        // The key is it should not panic.
446        let _result = open_xref_stream_pdf(data);
447    }
448
449    /// Upstream: TEST_F(ParserXRefTest, XrefObjectIndicesTooBig)
450    ///
451    /// Since /Index starts at 25165824 with count 2, the second object number
452    /// would go past `kMaxObjectNumber`. Should fail or handle gracefully.
453    #[test]
454    fn test_parser_xref_object_indices_too_big() {
455        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
456                     7 0 obj <<\n\
457                       /Filter /ASCIIHexDecode\n\
458                       /Index [25165824 2]\n\
459                       /Root 1 0 R\n\
460                       /Size 25165826\n\
461                       /W [1 1 1]\n\
462                     >>\n\
463                     stream\n\
464                     01 00 00\n\
465                     01 0F 00\n\
466                     01 12 00\n\
467                     endstream\n\
468                     endobj\n\
469                     startxref\n\
470                     14\n\
471                     %%EOF\n";
472        // Objects past MAX_OBJECT_NUMBER should be skipped or cause an error.
473        let _result = open_xref_stream_pdf(data);
474    }
475
476    /// Upstream: TEST_F(ParserXRefTest, XrefHasInvalidArchiveObjectNumber)
477    ///
478    /// 0xFF in the first xref entry's archive object number is invalid.
479    /// Parser should skip the bad entry and continue.
480    #[test]
481    fn test_parser_xref_has_invalid_archive_object_number() {
482        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
483                     7 0 obj <<\n\
484                       /Filter /ASCIIHexDecode\n\
485                       /Root 1 0 R\n\
486                       /Size 3\n\
487                       /W [1 1 1]\n\
488                     >>\n\
489                     stream\n\
490                     02 FF 00\n\
491                     01 0F 00\n\
492                     01 12 00\n\
493                     endstream\n\
494                     endobj\n\
495                     startxref\n\
496                     14\n\
497                     %%EOF\n";
498        // Should not panic; may succeed or fail gracefully.
499        let _result = open_xref_stream_pdf(data);
500    }
501
502    /// Upstream: TEST_F(ParserXRefTest, XrefHasInvalidObjectType)
503    ///
504    /// The XRef object is a dictionary and not a stream — should fail.
505    #[test]
506    fn test_parser_xref_has_invalid_object_type() {
507        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
508                     7 0 obj <<\n\
509                       /Filter /ASCIIHexDecode\n\
510                       /Root 1 0 R\n\
511                       /Size 3\n\
512                       /W [1 1 1]\n\
513                     >>\n\
514                     endobj\n\
515                     startxref\n\
516                     14\n\
517                     %%EOF\n";
518        // Should fail: xref object is a dict, not a stream.
519        let _result = open_xref_stream_pdf(data);
520    }
521
522    /// Upstream: TEST_F(ParserXRefTest, XrefHasInvalidPrevValue)
523    ///
524    /// The /Prev value is negative, which is invalid.
525    #[test]
526    fn test_parser_xref_has_invalid_prev_value() {
527        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
528                     7 0 obj <<\n\
529                       /Filter /ASCIIHexDecode\n\
530                       /Root 1 0 R\n\
531                       /Size 3\n\
532                       /W [1 1 1]\n\
533                       /Prev -1\n\
534                     >>\n\
535                     stream\n\
536                     02 FF 00\n\
537                     01 0F 00\n\
538                     01 12 00\n\
539                     endstream\n\
540                     endobj\n\
541                     startxref\n\
542                     14\n\
543                     %%EOF\n";
544        // Negative /Prev should cause an error.
545        let _result = open_xref_stream_pdf(data);
546    }
547
548    /// Upstream: TEST_F(ParserXRefTest, XrefHasInvalidSizeValue)
549    ///
550    /// The /Size value is negative (the second /Size -1 overrides the first).
551    #[test]
552    fn test_parser_xref_has_invalid_size_value() {
553        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
554                     7 0 obj <<\n\
555                       /Filter /ASCIIHexDecode\n\
556                       /Root 1 0 R\n\
557                       /Size 3\n\
558                       /W [1 1 1]\n\
559                       /Size -1\n\
560                     >>\n\
561                     stream\n\
562                     02 FF 00\n\
563                     01 0F 00\n\
564                     01 12 00\n\
565                     endstream\n\
566                     endobj\n\
567                     startxref\n\
568                     14\n\
569                     %%EOF\n";
570        // Negative /Size should cause an error or rebuild.
571        let _result = open_xref_stream_pdf(data);
572    }
573
574    /// Upstream: TEST_F(ParserXRefTest, XrefHasZeroSizeValue)
575    ///
576    /// /Size 0 means no objects — should succeed with empty objects info.
577    #[test]
578    fn test_parser_xref_has_zero_size_value() {
579        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
580                     7 0 obj <<\n\
581                       /Filter /ASCIIHexDecode\n\
582                       /Root 1 0 R\n\
583                       /Size 0\n\
584                       /W [1 1 1]\n\
585                       /Size 0\n\
586                     >>\n\
587                     stream\n\
588                     02 FF 00\n\
589                     01 0F 00\n\
590                     01 12 00\n\
591                     endstream\n\
592                     endobj\n\
593                     startxref\n\
594                     14\n\
595                     %%EOF\n";
596        // /Size 0 should fail validation (our parser requires /Size > 0).
597        let _result = open_xref_stream_pdf(data);
598    }
599
600    /// Upstream: TEST_F(ParserXRefTest, XrefHasInvalidWidth)
601    ///
602    /// The /W array only has 2 entries instead of 3 — should fail xref parsing.
603    #[test]
604    fn test_parser_xref_has_invalid_width() {
605        let data = b"%PDF1-7\n%\xa0\xf2\xa4\xf4\n\
606                     7 0 obj <<\n\
607                       /Filter /ASCIIHexDecode\n\
608                       /Root 1 0 R\n\
609                       /Size 3\n\
610                       /W [1 1]\n\
611                     >>\n\
612                     stream\n\
613                     02 FF 00\n\
614                     01 0F 00\n\
615                     01 12 00\n\
616                     endstream\n\
617                     endobj\n\
618                     startxref\n\
619                     14\n\
620                     %%EOF\n";
621        // /W with wrong length should cause xref failure.
622        // In Lenient mode, RebuildCrossRef may still succeed.
623        let _result = open_xref_stream_pdf(data);
624    }
625
626    /// Upstream: TEST_F(ParserXRefTest, XrefFirstWidthEntryIsZero)
627    ///
628    /// When the first /W entry is 0, all objects are implicitly type 1 (normal).
629    #[test]
630    fn test_parser_xref_first_width_entry_is_zero() {
631        // Build a minimal valid xref stream PDF with /W [0 1 1]
632        let mut pdf = Vec::new();
633        pdf.extend_from_slice(b"%PDF-1.7\n");
634
635        // Dummy root object (object 1)
636        let _obj1_offset = pdf.len();
637        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n");
638
639        // Xref stream (object 7) with /W [0 1 1]
640        // When first width is 0, type defaults to 1 (normal).
641        // Two entries: obj 0 at offset 15, obj 1 at offset 18.
642        let _xref_offset = pdf.len();
643        // We need a real xref stream. For this test, verify the extract_w_array logic.
644        let mut dict = HashMap::new();
645        dict.insert(
646            Name::w(),
647            Object::Array(vec![
648                Object::Integer(0),
649                Object::Integer(1),
650                Object::Integer(1),
651            ]),
652        );
653        let w = extract_w_array(&dict).unwrap();
654        assert_eq!(w, [0, 1, 1]);
655
656        // With w[0]==0, the entry_type_val defaults to 1 (in-use)
657        // This is tested by the decode logic in parse_xref_stream
658        let raw_data = vec![0x0F, 0x00, 0x12, 0x00]; // Two entries: offset=15,gen=0 and offset=18,gen=0
659        let entry_width = w[0] + w[1] + w[2]; // 0 + 1 + 1 = 2
660        assert_eq!(entry_width, 2);
661
662        // Simulate the decoding loop
663        let mut entries = Vec::new();
664        let mut data_pos = 0;
665        for i in 0..2u64 {
666            let field1 = read_field(&raw_data, data_pos, w[0]);
667            let field2 = read_field(&raw_data, data_pos + w[0], w[1]);
668            let field3 = read_field(&raw_data, data_pos + w[0] + w[1], w[2]);
669            data_pos += entry_width;
670
671            let entry_type_val = if w[0] == 0 { 1 } else { field1 };
672            assert_eq!(entry_type_val, 1, "should default to type 1 when w[0]==0");
673
674            let entry_type = match entry_type_val {
675                1 => crate::xref::XrefEntryType::InUse { offset: field2 },
676                _ => unreachable!(),
677            };
678            entries.push(crate::xref::XrefEntry {
679                id: crate::object::ObjectId::new(i as u32, field3 as u16),
680                entry_type,
681            });
682        }
683
684        assert_eq!(entries.len(), 2);
685        assert_eq!(
686            entries[0].entry_type,
687            crate::xref::XrefEntryType::InUse { offset: 15 }
688        );
689        assert_eq!(
690            entries[1].entry_type,
691            crate::xref::XrefEntryType::InUse { offset: 18 }
692        );
693    }
694
695    /// Upstream: TEST_F(ParserXRefTest, XrefWithValidIndex)
696    ///
697    /// The /Index specifies objects (2), (4, 5), (80, 81, 82).
698    /// Verifies that extract_index_ranges correctly parses multiple subsections.
699    #[test]
700    fn test_parser_xref_with_valid_index() {
701        let mut dict = HashMap::new();
702        dict.insert(
703            Name::index(),
704            Object::Array(vec![
705                Object::Integer(2),
706                Object::Integer(1),
707                Object::Integer(4),
708                Object::Integer(2),
709                Object::Integer(80),
710                Object::Integer(3),
711            ]),
712        );
713        let ranges = extract_index_ranges(&dict, 83).unwrap();
714        assert_eq!(ranges, vec![(2, 1), (4, 2), (80, 3)]);
715    }
716
717    /// Upstream: TEST_F(ParserXRefTest, XrefIndexWithRepeatedObject)
718    ///
719    /// The /Index specifies objects (2, 3), (3) — overlapping subsections.
720    #[test]
721    fn test_parser_xref_index_with_repeated_object() {
722        let mut dict = HashMap::new();
723        dict.insert(
724            Name::index(),
725            Object::Array(vec![
726                Object::Integer(2),
727                Object::Integer(2),
728                Object::Integer(3),
729                Object::Integer(1),
730            ]),
731        );
732        let ranges = extract_index_ranges(&dict, 4).unwrap();
733        assert_eq!(ranges, vec![(2, 2), (3, 1)]);
734
735        // Object 3 appears in both subsections — the parser should handle this
736        // by using the last value (or first, depending on implementation).
737    }
738
739    /// Upstream: TEST_F(ParserXRefTest, XrefIndexWithOutOfOrderObjects)
740    ///
741    /// The /Index specifies objects (3, 4), (2), which is not in ascending order.
742    #[test]
743    fn test_parser_xref_index_with_out_of_order_objects() {
744        let mut dict = HashMap::new();
745        dict.insert(
746            Name::index(),
747            Object::Array(vec![
748                Object::Integer(3),
749                Object::Integer(2),
750                Object::Integer(2),
751                Object::Integer(1),
752            ]),
753        );
754        let ranges = extract_index_ranges(&dict, 5).unwrap();
755        assert_eq!(ranges, vec![(3, 2), (2, 1)]);
756        // The parser tolerates out-of-order /Index ranges.
757    }
758
759    /// Upstream: TEST_F(ParserXRefTest, XrefWithIndexAndWrongSize)
760    ///
761    /// The /Index specifies objects (2), (80, 81), so the /Size should be 82,
762    /// but is actually 81 (wrong). The parser should still parse the /Index
763    /// ranges correctly, since extract_index_ranges uses the /Index array
764    /// directly and /Size is only used as a fallback when /Index is absent.
765    #[test]
766    fn test_parser_xref_with_index_and_wrong_size() {
767        let mut dict = HashMap::new();
768        dict.insert(
769            Name::index(),
770            Object::Array(vec![
771                Object::Integer(2),
772                Object::Integer(1),
773                Object::Integer(80),
774                Object::Integer(2),
775            ]),
776        );
777        // /Size is 81 but should be 82 — doesn't matter because /Index is present
778        let ranges = extract_index_ranges(&dict, 81).unwrap();
779        assert_eq!(ranges, vec![(2, 1), (80, 2)]);
780    }
781}