1use rpdfium_core::{Name, ParsingMode};
13
14use crate::header::parse_header;
15use crate::object::Object;
16use crate::object_parser::parse_indirect_object;
17
18#[derive(Debug, Clone)]
22pub struct LinearizedInfo {
23 pub file_length: u64,
25 pub primary_page: u32,
27 pub main_xref_table_first_entry_offset: u64,
29 pub page_count: u32,
31 pub first_page_end_offset: u64,
33 pub first_page_obj_num: u32,
35 pub last_xref_offset: u64,
38 pub hint_stream_offset: Option<u64>,
40 pub hint_stream_length: Option<u32>,
42}
43
44impl LinearizedInfo {
45 pub fn file_size(&self) -> u64 {
49 self.file_length
50 }
51
52 #[inline]
56 pub fn get_file_size(&self) -> u64 {
57 self.file_size()
58 }
59
60 pub fn first_page_no(&self) -> u32 {
64 self.primary_page
65 }
66
67 #[inline]
71 pub fn get_first_page_no(&self) -> u32 {
72 self.first_page_no()
73 }
74
75 pub fn main_xref_table_first_entry_offset(&self) -> u64 {
80 self.main_xref_table_first_entry_offset
81 }
82
83 #[inline]
89 pub fn get_main_xref_table_first_entry_offset(&self) -> u64 {
90 self.main_xref_table_first_entry_offset()
91 }
92
93 pub fn page_count(&self) -> u32 {
97 self.page_count
98 }
99
100 #[inline]
104 pub fn get_page_count(&self) -> u32 {
105 self.page_count()
106 }
107
108 pub fn first_page_end_offset(&self) -> u64 {
112 self.first_page_end_offset
113 }
114
115 #[inline]
119 pub fn get_first_page_end_offset(&self) -> u64 {
120 self.first_page_end_offset()
121 }
122
123 pub fn first_page_obj_num(&self) -> u32 {
127 self.first_page_obj_num
128 }
129
130 #[inline]
134 pub fn get_first_page_obj_num(&self) -> u32 {
135 self.first_page_obj_num()
136 }
137
138 pub fn last_xref_offset(&self) -> u64 {
147 self.last_xref_offset
148 }
149
150 #[inline]
154 pub fn get_last_xref_offset(&self) -> u64 {
155 self.last_xref_offset()
156 }
157
158 pub fn hint_start(&self) -> Option<u64> {
162 self.hint_stream_offset
163 }
164
165 #[inline]
169 pub fn get_hint_start(&self) -> Option<u64> {
170 self.hint_start()
171 }
172
173 pub fn hint_length(&self) -> Option<u32> {
177 self.hint_stream_length
178 }
179
180 #[inline]
184 pub fn get_hint_length(&self) -> Option<u32> {
185 self.hint_length()
186 }
187
188 pub fn has_hint_table(&self) -> bool {
195 self.page_count() > 1
196 && self.hint_start().is_some_and(|s| s > 0)
197 && self.hint_length().is_some_and(|l| l > 0)
198 }
199}
200
201pub fn detect_linearized(source: &[u8], mode: ParsingMode) -> Option<LinearizedInfo> {
205 let (_version, header_end) = parse_header(source, mode).ok()?;
207
208 let mut pos = header_end as usize;
210 while pos < source.len()
211 && (source[pos] == b'%'
212 || source[pos] == b'\r'
213 || source[pos] == b'\n'
214 || source[pos] > 127)
215 {
216 if source[pos] == b'%' {
218 while pos < source.len() && source[pos] != b'\r' && source[pos] != b'\n' {
219 pos += 1;
220 }
221 }
222 while pos < source.len() && (source[pos] == b'\r' || source[pos] == b'\n') {
223 pos += 1;
224 }
225 }
226
227 let (_id, obj) = parse_indirect_object(source, pos as u64, mode).ok()?;
229
230 let dict = obj.as_dict()?;
232 let _linearized = dict.get(&Name::from("Linearized"))?;
233
234 let file_length = match dict.get(&Name::from_bytes(b"L".to_vec())) {
244 Some(Object::Integer(n)) if *n > 0 => *n as u64,
245 _ => return None,
246 };
247
248 let primary_page = match dict.get(&Name::from_bytes(b"P".to_vec())) {
249 Some(Object::Integer(n)) if *n >= 0 => *n as u32,
250 _ => 0, };
252
253 let main_xref_table_first_entry_offset = match dict.get(&Name::from_bytes(b"T".to_vec())) {
254 Some(Object::Integer(n)) if *n > 0 => *n as u64,
255 _ => return None,
256 };
257
258 let page_count = match dict.get(&Name::n()) {
259 Some(Object::Integer(n)) if *n > 0 => *n as u32,
260 _ => return None,
261 };
262
263 let first_page_end_offset = match dict.get(&Name::from_bytes(b"E".to_vec())) {
264 Some(Object::Integer(n)) if *n > 0 => *n as u64,
265 _ => return None,
266 };
267
268 let first_page_obj_num = match dict.get(&Name::from_bytes(b"O".to_vec())) {
269 Some(Object::Integer(n)) if *n > 0 => *n as u32,
270 _ => return None,
271 };
272
273 let (hint_stream_offset, hint_stream_length) = match dict.get(&Name::h()) {
275 Some(Object::Array(arr)) if arr.len() >= 2 => {
276 let offset = arr[0].as_i64().filter(|&n| n > 0).map(|n| n as u64);
277 let length = arr[1].as_i64().filter(|&n| n > 0).map(|n| n as u32);
278 (offset, length)
279 }
280 _ => (None, None),
281 };
282
283 let last_xref_offset = main_xref_table_first_entry_offset;
291
292 Some(LinearizedInfo {
293 file_length,
294 primary_page,
295 main_xref_table_first_entry_offset,
296 page_count,
297 first_page_end_offset,
298 first_page_obj_num,
299 last_xref_offset,
300 hint_stream_offset,
301 hint_stream_length,
302 })
303}
304
305#[cfg(test)]
306mod tests {
307 use super::*;
308
309 #[test]
310 fn test_detect_non_linearized() {
311 let mut pdf = Vec::new();
312 pdf.extend_from_slice(b"%PDF-1.4\n");
313 let obj_offset = pdf.len();
314 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n");
315 let xref_offset = pdf.len();
316 pdf.extend_from_slice(b"xref\n0 2\n");
317 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
318 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj_offset).as_bytes());
319 pdf.extend_from_slice(b"trailer\n<< /Size 2 /Root 1 0 R >>\n");
320 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
321
322 let result = detect_linearized(&pdf, ParsingMode::Lenient);
323 assert!(result.is_none());
324 }
325
326 #[test]
327 fn test_detect_linearized_pdf() {
328 let mut pdf = Vec::new();
329 pdf.extend_from_slice(b"%PDF-1.4\n");
330 pdf.extend_from_slice(
331 b"1 0 obj\n<< /Linearized 1.0 /L 12345 /T 1000 /N 5 /E 800 /O 3 /P 0 >>\nendobj\n",
332 );
333 pdf.extend_from_slice(b"2 0 obj\n<< >>\nendobj\n");
335
336 let result = detect_linearized(&pdf, ParsingMode::Lenient);
337 assert!(result.is_some());
338
339 let info = result.unwrap();
340 assert_eq!(info.file_size(), 12345);
341 assert_eq!(info.page_count(), 5);
342 assert_eq!(info.first_page_no(), 0);
343 assert_eq!(info.first_page_end_offset(), 800);
344 assert_eq!(info.first_page_obj_num(), 3);
345 assert_eq!(info.main_xref_table_first_entry_offset(), 1000);
346 assert!(info.hint_start().is_none());
347 assert!(info.hint_length().is_none());
348 }
349
350 #[test]
351 fn test_detect_linearized_with_h_array_2_values() {
352 let mut pdf = Vec::new();
353 pdf.extend_from_slice(b"%PDF-1.4\n");
354 pdf.extend_from_slice(
355 b"1 0 obj\n<< /Linearized 1.0 /L 50000 /T 2000 /N 10 /E 1500 /O 2 /H [500 120] >>\nendobj\n",
356 );
357 pdf.extend_from_slice(b"2 0 obj\n<< >>\nendobj\n");
358
359 let info = detect_linearized(&pdf, ParsingMode::Lenient).unwrap();
360 assert_eq!(info.hint_start(), Some(500));
361 assert_eq!(info.hint_length(), Some(120));
362 }
363
364 #[test]
365 fn test_detect_linearized_with_h_array_4_values() {
366 let mut pdf = Vec::new();
367 pdf.extend_from_slice(b"%PDF-1.4\n");
368 pdf.extend_from_slice(
369 b"1 0 obj\n<< /Linearized 1.0 /L 50000 /T 2000 /N 10 /E 1500 /O 2 /H [500 120 700 80] >>\nendobj\n",
370 );
371 pdf.extend_from_slice(b"2 0 obj\n<< >>\nendobj\n");
372
373 let info = detect_linearized(&pdf, ParsingMode::Lenient).unwrap();
374 assert_eq!(info.hint_start(), Some(500));
375 assert_eq!(info.hint_length(), Some(120));
376 }
377
378 #[test]
379 fn test_non_linearized_has_no_hint_fields() {
380 let mut pdf = Vec::new();
381 pdf.extend_from_slice(b"%PDF-1.4\n");
382 let obj_offset = pdf.len();
383 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog >>\nendobj\n");
384 let xref_offset = pdf.len();
385 pdf.extend_from_slice(b"xref\n0 2\n");
386 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
387 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj_offset).as_bytes());
388 pdf.extend_from_slice(b"trailer\n<< /Size 2 /Root 1 0 R >>\n");
389 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
390
391 let result = detect_linearized(&pdf, ParsingMode::Lenient);
392 assert!(result.is_none());
393 }
394
395 #[test]
396 fn test_detect_linearized_missing_keys() {
397 let mut pdf = Vec::new();
398 pdf.extend_from_slice(b"%PDF-1.4\n");
399 pdf.extend_from_slice(
400 b"1 0 obj\n<< /Linearized 1.0 >>\nendobj\n", );
402
403 let result = detect_linearized(&pdf, ParsingMode::Lenient);
404 assert!(result.is_none());
405 }
406
407 #[test]
408 fn test_has_hint_table_true() {
409 let info = LinearizedInfo {
410 file_length: 50000,
411 primary_page: 0,
412 main_xref_table_first_entry_offset: 2000,
413 page_count: 5,
414 first_page_end_offset: 1500,
415 first_page_obj_num: 2,
416 last_xref_offset: 2000,
417 hint_stream_offset: Some(500),
418 hint_stream_length: Some(120),
419 };
420 assert!(info.has_hint_table());
421 }
422
423 #[test]
424 fn test_has_hint_table_false_single_page() {
425 let info = LinearizedInfo {
426 file_length: 50000,
427 primary_page: 0,
428 main_xref_table_first_entry_offset: 2000,
429 page_count: 1,
430 first_page_end_offset: 1500,
431 first_page_obj_num: 2,
432 last_xref_offset: 2000,
433 hint_stream_offset: Some(500),
434 hint_stream_length: Some(120),
435 };
436 assert!(!info.has_hint_table());
437 }
438
439 #[test]
440 fn test_has_hint_table_false_no_hint() {
441 let info = LinearizedInfo {
442 file_length: 50000,
443 primary_page: 0,
444 main_xref_table_first_entry_offset: 2000,
445 page_count: 5,
446 first_page_end_offset: 1500,
447 first_page_obj_num: 2,
448 last_xref_offset: 2000,
449 hint_stream_offset: None,
450 hint_stream_length: None,
451 };
452 assert!(!info.has_hint_table());
453 }
454}