1use std::io::Write;
2
3use crate::error::Result;
4use crate::object::{PdfDict, PdfObject};
5use crate::writer::encode::encode_flate;
6use crate::writer::serialize::serialize_object;
7
8#[derive(Debug, Clone)]
20pub struct CompressedObjInfo {
21 pub obj_num: u32,
23 pub objstm_num: u32,
25 pub index: u32,
27}
28
29pub struct PackResult {
31 pub objects: Vec<(u32, PdfObject)>,
33 pub compressed: Vec<CompressedObjInfo>,
35}
36
37pub fn pack_object_streams(
38 objects: &[(u32, PdfObject)],
39 max_objects_per_stream: usize,
40 catalog_obj_num: u32,
41 pages_root_obj_num: Option<u32>,
42 encrypt_obj_num: Option<u32>,
43) -> Result<PackResult> {
44 let mut eligible: Vec<(u32, &PdfObject)> = Vec::new();
45 let mut ineligible: Vec<(u32, PdfObject)> = Vec::new();
46
47 for (obj_num, obj) in objects {
48 if is_eligible(*obj_num, obj, catalog_obj_num, pages_root_obj_num, encrypt_obj_num) {
49 eligible.push((*obj_num, obj));
50 } else {
51 ineligible.push((*obj_num, obj.clone()));
52 }
53 }
54
55 if eligible.is_empty() {
56 return Ok(PackResult {
57 objects: objects.to_vec(),
58 compressed: Vec::new(),
59 });
60 }
61
62 let mut next_obj_num = objects.iter().map(|(n, _)| *n).max().unwrap_or(0) + 1;
64
65 let mut result = ineligible;
67 let mut compressed = Vec::new();
68
69 for chunk in eligible.chunks(max_objects_per_stream) {
70 let objstm_num = next_obj_num;
71 let objstm = build_object_stream(chunk)?;
72 result.push((objstm_num, objstm));
73
74 for (index, (obj_num, _)) in chunk.iter().enumerate() {
75 compressed.push(CompressedObjInfo {
76 obj_num: *obj_num,
77 objstm_num,
78 index: index as u32,
79 });
80 }
81
82 next_obj_num += 1;
83 }
84
85 Ok(PackResult { objects: result, compressed })
86}
87
88fn is_eligible(
90 obj_num: u32,
91 obj: &PdfObject,
92 catalog_obj_num: u32,
93 pages_root_obj_num: Option<u32>,
94 encrypt_obj_num: Option<u32>,
95) -> bool {
96 if obj.is_stream() {
98 return false;
99 }
100
101 if obj_num == catalog_obj_num {
103 return false;
104 }
105
106 if pages_root_obj_num == Some(obj_num) {
108 return false;
109 }
110
111 if encrypt_obj_num == Some(obj_num) {
113 return false;
114 }
115
116 if let PdfObject::Dict(d) = obj {
118 if d.get_name(b"Type") == Some(b"XRef") {
119 return false;
120 }
121 }
122
123 if obj.is_null() {
125 return false;
126 }
127
128 true
129}
130
131fn build_object_stream(objects: &[(u32, &PdfObject)]) -> Result<PdfObject> {
138 let n = objects.len();
139
140 let mut object_data: Vec<Vec<u8>> = Vec::with_capacity(n);
142 for (_obj_num, obj) in objects {
143 let mut buf = Vec::new();
144 serialize_object(&mut buf, obj)?;
145 object_data.push(buf);
146 }
147
148 let mut offsets: Vec<usize> = Vec::with_capacity(n);
150 let mut running_offset = 0usize;
151 for data in &object_data {
152 offsets.push(running_offset);
153 running_offset += data.len();
154 running_offset += 1;
156 }
157
158 let mut index_section = Vec::new();
160 for (i, (obj_num, _)) in objects.iter().enumerate() {
161 if i > 0 {
162 write!(index_section, " ")?;
163 }
164 write!(index_section, "{} {}", obj_num, offsets[i])?;
165 }
166 write!(index_section, " ")?; let first = index_section.len();
169
170 let mut content = index_section;
172 for (i, data) in object_data.iter().enumerate() {
173 content.extend_from_slice(data);
174 if i < n - 1 {
175 content.push(b' ');
176 }
177 }
178
179 let compressed = encode_flate(&content)?;
181
182 let mut dict = PdfDict::new();
183 dict.insert(b"Type".to_vec(), PdfObject::Name(b"ObjStm".to_vec()));
184 dict.insert(b"N".to_vec(), PdfObject::Integer(n as i64));
185 dict.insert(b"First".to_vec(), PdfObject::Integer(first as i64));
186 dict.insert(
187 b"Filter".to_vec(),
188 PdfObject::Name(b"FlateDecode".to_vec()),
189 );
190
191 Ok(PdfObject::Stream {
192 dict,
193 data: compressed,
194 })
195}
196
197pub fn write_xref_stream(
202 buf: &mut Vec<u8>,
203 offsets: &[(u32, usize)],
204 compressed: &[CompressedObjInfo],
205 catalog_ref: &crate::object::IndirectRef,
206 info_ref: Option<&crate::object::IndirectRef>,
207 xref_stm_obj_num: u32,
208) -> Result<()> {
209 let max_obj_num = offsets
210 .iter()
211 .map(|(n, _)| *n)
212 .max()
213 .unwrap_or(0)
214 .max(xref_stm_obj_num)
215 .max(compressed.iter().map(|c| c.obj_num).max().unwrap_or(0));
216 let size = max_obj_num + 1;
217
218 let mut offset_map = std::collections::HashMap::new();
220 for (num, off) in offsets {
221 offset_map.insert(*num, *off);
222 }
223
224 let mut compressed_map: std::collections::HashMap<u32, (u32, u32)> =
226 std::collections::HashMap::new();
227 for info in compressed {
228 compressed_map.insert(info.obj_num, (info.objstm_num, info.index));
229 }
230
231 let max_offset = offsets.iter().map(|(_, o)| *o).max().unwrap_or(0);
237 let max_objstm_num = compressed.iter().map(|c| c.objstm_num as usize).max().unwrap_or(0);
238 let w2 = bytes_needed(max_offset.max(max_objstm_num) as u64);
239 let w1 = 1u8;
240 let max_index = compressed.iter().map(|c| c.index).max().unwrap_or(0);
241 let w3 = bytes_needed(max_index.max(255) as u64);
242
243 let entry_size = (w1 + w2 + w3) as usize;
245 let mut stream_data = Vec::with_capacity(entry_size * size as usize);
246
247 for obj_num in 0..size {
248 if obj_num == 0 {
249 stream_data.push(0u8);
251 write_field(&mut stream_data, 0, w2);
252 write_field(&mut stream_data, 255, w3);
253 } else if let Some(&off) = offset_map.get(&obj_num) {
254 stream_data.push(1u8);
256 write_field(&mut stream_data, off as u64, w2);
257 write_field(&mut stream_data, 0, w3);
258 } else if let Some(&(objstm_num, index)) = compressed_map.get(&obj_num) {
259 stream_data.push(2u8);
261 write_field(&mut stream_data, objstm_num as u64, w2);
262 write_field(&mut stream_data, index as u64, w3);
263 } else if obj_num == xref_stm_obj_num {
264 stream_data.push(1u8);
266 write_field(&mut stream_data, buf.len() as u64, w2);
267 write_field(&mut stream_data, 0, w3);
268 } else {
269 stream_data.push(0u8);
271 write_field(&mut stream_data, 0, w2);
272 write_field(&mut stream_data, 0, w3);
273 }
274 }
275
276 let compressed = encode_flate(&stream_data)?;
278
279 let mut dict = PdfDict::new();
280 dict.insert(b"Type".to_vec(), PdfObject::Name(b"XRef".to_vec()));
281 dict.insert(b"Size".to_vec(), PdfObject::Integer(size as i64));
282 dict.insert(
283 b"W".to_vec(),
284 PdfObject::Array(vec![
285 PdfObject::Integer(w1 as i64),
286 PdfObject::Integer(w2 as i64),
287 PdfObject::Integer(w3 as i64),
288 ]),
289 );
290 dict.insert(
291 b"Root".to_vec(),
292 PdfObject::Reference(catalog_ref.clone()),
293 );
294 if let Some(info) = info_ref {
295 dict.insert(b"Info".to_vec(), PdfObject::Reference(info.clone()));
296 }
297 dict.insert(
298 b"Filter".to_vec(),
299 PdfObject::Name(b"FlateDecode".to_vec()),
300 );
301
302 let xref_offset = buf.len();
303
304 write!(buf, "{} 0 obj\n", xref_stm_obj_num)?;
306 let mut stream_dict = dict;
308 stream_dict.insert(
309 b"Length".to_vec(),
310 PdfObject::Integer(compressed.len() as i64),
311 );
312 crate::writer::serialize::serialize_dict(buf, &stream_dict)?;
313 buf.extend_from_slice(b"\nstream\r\n");
314 buf.extend_from_slice(&compressed);
315 buf.extend_from_slice(b"\r\nendstream");
316 write!(buf, "\nendobj\n")?;
317
318 write!(buf, "startxref\n{}\n%%EOF\n", xref_offset)?;
320
321 Ok(())
322}
323
324fn bytes_needed(val: u64) -> u8 {
326 if val <= 0xFF {
327 1
328 } else if val <= 0xFFFF {
329 2
330 } else if val <= 0xFF_FFFF {
331 3
332 } else {
333 4
334 }
335}
336
337fn write_field(buf: &mut Vec<u8>, val: u64, width: u8) {
339 for i in (0..width).rev() {
340 buf.push(((val >> (8 * i as u64)) & 0xFF) as u8);
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347 use crate::object::{IndirectRef, PdfDict, PdfObject};
348
349 #[test]
350 fn test_eligible_objects() {
351 let dict = PdfObject::Dict(PdfDict::new());
353 assert!(is_eligible(10, &dict, 1, Some(2), None));
354
355 assert!(is_eligible(10, &PdfObject::Integer(42), 1, Some(2), None));
357
358 let arr = PdfObject::Array(vec![PdfObject::Integer(1)]);
360 assert!(is_eligible(10, &arr, 1, Some(2), None));
361 }
362
363 #[test]
364 fn test_ineligible_stream() {
365 let stream = PdfObject::Stream {
366 dict: PdfDict::new(),
367 data: vec![1, 2, 3],
368 };
369 assert!(!is_eligible(10, &stream, 1, Some(2), None));
370 }
371
372 #[test]
373 fn test_ineligible_catalog() {
374 let dict = PdfObject::Dict(PdfDict::new());
375 assert!(!is_eligible(1, &dict, 1, Some(2), None));
376 }
377
378 #[test]
379 fn test_ineligible_pages_root() {
380 let dict = PdfObject::Dict(PdfDict::new());
381 assert!(!is_eligible(2, &dict, 1, Some(2), None));
382 }
383
384 #[test]
385 fn test_ineligible_encrypt() {
386 let dict = PdfObject::Dict(PdfDict::new());
387 assert!(!is_eligible(5, &dict, 1, Some(2), Some(5)));
388 }
389
390 #[test]
391 fn test_ineligible_xref_stream() {
392 let mut d = PdfDict::new();
393 d.insert(b"Type".to_vec(), PdfObject::Name(b"XRef".to_vec()));
394 let obj = PdfObject::Dict(d);
395 assert!(!is_eligible(10, &obj, 1, Some(2), None));
396 }
397
398 #[test]
399 fn test_ineligible_null() {
400 assert!(!is_eligible(10, &PdfObject::Null, 1, Some(2), None));
401 }
402
403 #[test]
404 fn test_pack_object_streams_structure() {
405 let mut catalog = PdfDict::new();
407 catalog.insert(b"Type".to_vec(), PdfObject::Name(b"Catalog".to_vec()));
408
409 let mut pages = PdfDict::new();
410 pages.insert(b"Type".to_vec(), PdfObject::Name(b"Pages".to_vec()));
411
412 let objects = vec![
413 (1, PdfObject::Dict(catalog)),
414 (2, PdfObject::Dict(pages)),
415 (3, PdfObject::Integer(42)),
416 (4, PdfObject::String(b"hello".to_vec())),
417 (5, PdfObject::Array(vec![PdfObject::Integer(1), PdfObject::Integer(2)])),
418 ];
419
420 let packed = pack_object_streams(&objects, 100, 1, Some(2), None).unwrap();
421
422 let catalog_entry = packed.objects.iter().find(|(n, _)| *n == 1);
424 assert!(catalog_entry.is_some());
425 let pages_entry = packed.objects.iter().find(|(n, _)| *n == 2);
426 assert!(pages_entry.is_some());
427
428 let obj3 = packed.objects.iter().find(|(n, _)| *n == 3);
431 assert!(obj3.is_none(), "object 3 should be packed");
432 let obj4 = packed.objects.iter().find(|(n, _)| *n == 4);
433 assert!(obj4.is_none(), "object 4 should be packed");
434
435 let objstm = packed.objects.iter().find(|(_, obj)| {
437 if let PdfObject::Stream { dict, .. } = obj {
438 dict.get_name(b"Type") == Some(b"ObjStm")
439 } else {
440 false
441 }
442 });
443 assert!(objstm.is_some(), "should contain an object stream");
444
445 let (_, stream_obj) = objstm.unwrap();
446 if let PdfObject::Stream { dict, .. } = stream_obj {
447 assert_eq!(dict.get_i64(b"N"), Some(3)); assert!(dict.get_i64(b"First").unwrap() > 0);
449 assert_eq!(dict.get_name(b"Filter"), Some(b"FlateDecode".as_slice()));
450 } else {
451 panic!("expected stream object");
452 }
453 }
454
455 #[test]
456 fn test_pack_splits_by_max() {
457 let mut objects = vec![
458 (1, PdfObject::Dict(PdfDict::new())), ];
460 for i in 2..=6 {
462 objects.push((i, PdfObject::Integer(i as i64)));
463 }
464
465 let packed = pack_object_streams(&objects, 2, 1, None, None).unwrap();
466
467 let objstm_count = packed.objects
469 .iter()
470 .filter(|(_, obj)| {
471 if let PdfObject::Stream { dict, .. } = obj {
472 dict.get_name(b"Type") == Some(b"ObjStm")
473 } else {
474 false
475 }
476 })
477 .count();
478 assert_eq!(objstm_count, 3);
479 }
480
481 #[test]
482 fn test_pack_no_eligible() {
483 let stream = PdfObject::Stream {
484 dict: PdfDict::new(),
485 data: vec![1, 2, 3],
486 };
487 let objects = vec![
488 (1, PdfObject::Dict(PdfDict::new())), (2, stream),
490 ];
491
492 let packed = pack_object_streams(&objects, 100, 1, None, None).unwrap();
493 assert_eq!(packed.objects.len(), 2); }
495
496 #[test]
497 fn test_build_object_stream_content() {
498 use flate2::read::ZlibDecoder;
499 use std::io::Read;
500
501 let obj1 = PdfObject::Integer(42);
502 let obj2 = PdfObject::String(b"test".to_vec());
503 let items: Vec<(u32, &PdfObject)> = vec![(10, &obj1), (20, &obj2)];
504
505 let result = build_object_stream(&items).unwrap();
506 if let PdfObject::Stream { dict, data } = result {
507 assert_eq!(dict.get_name(b"Type"), Some(b"ObjStm".as_slice()));
508 assert_eq!(dict.get_i64(b"N"), Some(2));
509
510 let mut decoder = ZlibDecoder::new(&data[..]);
512 let mut decompressed = Vec::new();
513 decoder.read_to_end(&mut decompressed).unwrap();
514 let text = String::from_utf8_lossy(&decompressed);
515
516 assert!(text.starts_with("10 "), "content should start with first obj num: {}", text);
518 assert!(text.contains("20 "), "content should contain second obj num");
519 assert!(text.contains("42"), "content should contain integer 42");
521 assert!(text.contains("(test)"), "content should contain string (test)");
522 } else {
523 panic!("expected stream object");
524 }
525 }
526
527 #[test]
528 fn test_bytes_needed() {
529 assert_eq!(bytes_needed(0), 1);
530 assert_eq!(bytes_needed(255), 1);
531 assert_eq!(bytes_needed(256), 2);
532 assert_eq!(bytes_needed(65535), 2);
533 assert_eq!(bytes_needed(65536), 3);
534 assert_eq!(bytes_needed(0xFF_FFFF), 3);
535 assert_eq!(bytes_needed(0x100_0000), 4);
536 }
537
538 #[test]
539 fn test_write_field() {
540 let mut buf = Vec::new();
541 write_field(&mut buf, 0x1234, 2);
542 assert_eq!(buf, vec![0x12, 0x34]);
543
544 let mut buf = Vec::new();
545 write_field(&mut buf, 42, 1);
546 assert_eq!(buf, vec![42]);
547
548 let mut buf = Vec::new();
549 write_field(&mut buf, 0xABCDEF, 3);
550 assert_eq!(buf, vec![0xAB, 0xCD, 0xEF]);
551 }
552
553 #[test]
554 fn test_write_xref_stream() {
555 let mut buf = Vec::new();
556 buf.extend_from_slice(b"%PDF-1.5\n");
558
559 let offsets = vec![(1, 20), (2, 100)];
560 let catalog_ref = IndirectRef { obj_num: 1, gen_num: 0 };
561
562 write_xref_stream(&mut buf, &offsets, &[], &catalog_ref, None, 3).unwrap();
563
564 let text = String::from_utf8_lossy(&buf);
565 assert!(text.contains("3 0 obj"));
566 assert!(text.contains("/Type /XRef"));
567 assert!(text.contains("/Root 1 0 R"));
568 assert!(text.contains("startxref"));
569 assert!(text.contains("%%EOF"));
570 }
571}