rpdfium_parser/
linearized_header.rs1use rpdfium_core::{Name, ParsingMode};
13
14use crate::header::parse_header;
15use crate::object::Object;
16use crate::object_parser::parse_indirect_object;
17
18#[derive(Debug, Clone)]
20pub struct LinearizedInfo {
21 pub file_length: u64,
23 pub primary_page: u32,
25 pub page_count: u32,
27 pub first_page_offset: u64,
29 pub hint_stream_offset: Option<u64>,
31 pub hint_stream_length: Option<u32>,
33}
34
35pub fn detect_linearized(source: &[u8], mode: ParsingMode) -> Option<LinearizedInfo> {
39 let (_version, header_end) = parse_header(source, mode).ok()?;
41
42 let mut pos = header_end as usize;
44 while pos < source.len()
45 && (source[pos] == b'%'
46 || source[pos] == b'\r'
47 || source[pos] == b'\n'
48 || source[pos] > 127)
49 {
50 if source[pos] == b'%' {
52 while pos < source.len() && source[pos] != b'\r' && source[pos] != b'\n' {
53 pos += 1;
54 }
55 }
56 while pos < source.len() && (source[pos] == b'\r' || source[pos] == b'\n') {
57 pos += 1;
58 }
59 }
60
61 let (_id, obj) = parse_indirect_object(source, pos as u64, mode).ok()?;
63
64 let dict = obj.as_dict()?;
66 let _linearized = dict.get(&Name::from("Linearized"))?;
67
68 let file_length = match dict.get(&Name::from_bytes(b"L".to_vec())) {
70 Some(Object::Integer(n)) if *n > 0 => *n as u64,
71 _ => return None,
72 };
73
74 let page_count = match dict.get(&Name::n()) {
75 Some(Object::Integer(n)) if *n > 0 => *n as u32,
76 _ => return None,
77 };
78
79 let primary_page = match dict.get(&Name::from_bytes(b"P".to_vec())) {
80 Some(Object::Integer(n)) if *n >= 0 => *n as u32,
81 _ => 0, };
83
84 let first_page_offset = match dict.get(&Name::from_bytes(b"O".to_vec())) {
85 Some(Object::Integer(n)) if *n > 0 => *n as u64,
86 _ => return None,
87 };
88
89 let (hint_stream_offset, hint_stream_length) = match dict.get(&Name::h()) {
91 Some(Object::Array(arr)) if arr.len() >= 2 => {
92 let offset = arr[0].as_i64().filter(|&n| n > 0).map(|n| n as u64);
93 let length = arr[1].as_i64().filter(|&n| n > 0).map(|n| n as u32);
94 (offset, length)
95 }
96 _ => (None, None),
97 };
98
99 Some(LinearizedInfo {
100 file_length,
101 primary_page,
102 page_count,
103 first_page_offset,
104 hint_stream_offset,
105 hint_stream_length,
106 })
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 #[test]
114 fn detect_non_linearized() {
115 let mut pdf = Vec::new();
116 pdf.extend_from_slice(b"%PDF-1.4\n");
117 let obj_offset = pdf.len();
118 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n");
119 let xref_offset = pdf.len();
120 pdf.extend_from_slice(b"xref\n0 2\n");
121 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
122 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj_offset).as_bytes());
123 pdf.extend_from_slice(b"trailer\n<< /Size 2 /Root 1 0 R >>\n");
124 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
125
126 let result = detect_linearized(&pdf, ParsingMode::Lenient);
127 assert!(result.is_none());
128 }
129
130 #[test]
131 fn detect_linearized_pdf() {
132 let mut pdf = Vec::new();
133 pdf.extend_from_slice(b"%PDF-1.4\n");
134 pdf.extend_from_slice(
135 b"1 0 obj\n<< /Linearized 1.0 /L 12345 /N 5 /O 100 /P 0 >>\nendobj\n",
136 );
137 pdf.extend_from_slice(b"2 0 obj\n<< >>\nendobj\n");
139
140 let result = detect_linearized(&pdf, ParsingMode::Lenient);
141 assert!(result.is_some());
142
143 let info = result.unwrap();
144 assert_eq!(info.file_length, 12345);
145 assert_eq!(info.page_count, 5);
146 assert_eq!(info.primary_page, 0);
147 assert_eq!(info.first_page_offset, 100);
148 assert!(info.hint_stream_offset.is_none());
149 assert!(info.hint_stream_length.is_none());
150 }
151
152 #[test]
153 fn detect_linearized_with_h_array_2_values() {
154 let mut pdf = Vec::new();
155 pdf.extend_from_slice(b"%PDF-1.4\n");
156 pdf.extend_from_slice(
157 b"1 0 obj\n<< /Linearized 1.0 /L 50000 /N 10 /O 200 /H [500 120] >>\nendobj\n",
158 );
159 pdf.extend_from_slice(b"2 0 obj\n<< >>\nendobj\n");
160
161 let info = detect_linearized(&pdf, ParsingMode::Lenient).unwrap();
162 assert_eq!(info.hint_stream_offset, Some(500));
163 assert_eq!(info.hint_stream_length, Some(120));
164 }
165
166 #[test]
167 fn detect_linearized_with_h_array_4_values() {
168 let mut pdf = Vec::new();
169 pdf.extend_from_slice(b"%PDF-1.4\n");
170 pdf.extend_from_slice(
171 b"1 0 obj\n<< /Linearized 1.0 /L 50000 /N 10 /O 200 /H [500 120 700 80] >>\nendobj\n",
172 );
173 pdf.extend_from_slice(b"2 0 obj\n<< >>\nendobj\n");
174
175 let info = detect_linearized(&pdf, ParsingMode::Lenient).unwrap();
176 assert_eq!(info.hint_stream_offset, Some(500));
177 assert_eq!(info.hint_stream_length, Some(120));
178 }
179
180 #[test]
181 fn non_linearized_has_no_hint_fields() {
182 let mut pdf = Vec::new();
183 pdf.extend_from_slice(b"%PDF-1.4\n");
184 let obj_offset = pdf.len();
185 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n");
186 let xref_offset = pdf.len();
187 pdf.extend_from_slice(b"xref\n0 2\n");
188 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
189 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj_offset).as_bytes());
190 pdf.extend_from_slice(b"trailer\n<< /Size 2 /Root 1 0 R >>\n");
191 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
192
193 let result = detect_linearized(&pdf, ParsingMode::Lenient);
194 assert!(result.is_none());
195 }
196
197 #[test]
198 fn detect_linearized_missing_keys() {
199 let mut pdf = Vec::new();
200 pdf.extend_from_slice(b"%PDF-1.4\n");
201 pdf.extend_from_slice(
202 b"1 0 obj\n<< /Linearized 1.0 >>\nendobj\n", );
204
205 let result = detect_linearized(&pdf, ParsingMode::Lenient);
206 assert!(result.is_none());
207 }
208}