1use std::collections::BTreeMap;
2use std::fmt::Write;
3
4use crate::stream::flate_encode;
5use crate::types::{ObjectRef, PdfDictionary, PdfFile, PdfObject, PdfString, PdfValue, XrefForm};
6
7const OBJSTM_CHUNK_SIZE: usize = 100;
13
14pub fn serialize_pdf(file: &PdfFile) -> Vec<u8> {
15 match file.xref_form {
16 XrefForm::Classic => serialize_classic(file),
17 XrefForm::Stream => serialize_with_xref_stream(file),
18 }
19}
20
21fn serialize_classic(file: &PdfFile) -> Vec<u8> {
22 let mut output = Vec::new();
23 output.extend_from_slice(
24 format!("%PDF-{}\n%\u{00FF}\u{00FF}\u{00FF}\u{00FF}\n", file.version).as_bytes(),
25 );
26
27 let mut offsets = BTreeMap::new();
28 for (object_ref, object) in &file.objects {
29 let offset = output.len();
30 offsets.insert(object_ref.object_number, offset);
31 output.extend_from_slice(
32 format!(
33 "{} {} obj\n",
34 object_ref.object_number, object_ref.generation
35 )
36 .as_bytes(),
37 );
38 match object {
39 PdfObject::Value(value) => {
40 output.extend_from_slice(serialize_value(value).as_bytes());
41 output.extend_from_slice(b"\nendobj\n");
42 }
43 PdfObject::Stream(stream) => {
44 let mut dict = stream.dict.clone();
45 dict.insert(
46 "Length".to_string(),
47 PdfValue::Integer(stream.data.len() as i64),
48 );
49 output.extend_from_slice(serialize_dictionary(&dict).as_bytes());
50 output.extend_from_slice(b"\nstream\n");
51 output.extend_from_slice(&stream.data);
52 if !stream.data.ends_with(b"\n") {
53 output.push(b'\n');
54 }
55 output.extend_from_slice(b"endstream\nendobj\n");
56 }
57 }
58 }
59
60 let startxref = output.len();
61 let size = file.max_object_number + 1;
62 output.extend_from_slice(format!("xref\n0 {}\n", size).as_bytes());
63 output.extend_from_slice(b"0000000000 65535 f \n");
64 for object_number in 1..=file.max_object_number {
65 if let Some(offset) = offsets.get(&object_number).copied() {
66 output.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
67 } else {
68 output.extend_from_slice(b"0000000000 65535 f \n");
69 }
70 }
71
72 let mut trailer = file.trailer.clone();
73 trailer.insert("Size".to_string(), PdfValue::Integer(size as i64));
74 trailer.remove("Prev");
75 trailer.remove("XRefStm");
76 output.extend_from_slice(b"trailer\n");
77 output.extend_from_slice(serialize_dictionary(&trailer).as_bytes());
78 output.extend_from_slice(format!("\nstartxref\n{startxref}\n%%EOF\n").as_bytes());
79 output
80}
81
82#[derive(Debug, Clone, Copy)]
86enum XrefRow {
87 Free,
88 Direct { offset: usize, generation: u16 },
89 InObjStm { stream_objnum: u32, index: u32 },
90}
91
92struct PackedObjStm {
95 container_objnum: u32,
96 body: Vec<u8>,
97 first: usize,
98 members: Vec<(ObjectRef, u32)>,
99}
100
101fn serialize_with_xref_stream(file: &PdfFile) -> Vec<u8> {
102 let mut direct: Vec<(ObjectRef, &PdfObject)> = Vec::new();
107 let mut compressible: Vec<(ObjectRef, &PdfValue)> = Vec::new();
108 for (object_ref, object) in &file.objects {
109 match object {
110 PdfObject::Value(value) if object_ref.generation == 0 => {
111 compressible.push((*object_ref, value));
112 }
113 _ => direct.push((*object_ref, object)),
114 }
115 }
116
117 let mut next_objnum = file.max_object_number + 1;
120 let mut packed_streams = Vec::new();
121 for chunk in compressible.chunks(OBJSTM_CHUNK_SIZE) {
122 let pack = pack_objstm_chunk(next_objnum, chunk);
123 next_objnum += 1;
124 packed_streams.push(pack);
125 }
126
127 let xref_stream_objnum = next_objnum;
129 let xref_size = xref_stream_objnum + 1;
130
131 let mut output = Vec::new();
134 output.extend_from_slice(
135 format!("%PDF-{}\n%\u{00FF}\u{00FF}\u{00FF}\u{00FF}\n", file.version).as_bytes(),
136 );
137
138 let mut direct_offsets: BTreeMap<u32, usize> = BTreeMap::new();
139 for (object_ref, object) in &direct {
140 let offset = output.len();
141 direct_offsets.insert(object_ref.object_number, offset);
142 write_indirect_object(&mut output, *object_ref, object);
143 }
144
145 let mut objstm_offsets: BTreeMap<u32, usize> = BTreeMap::new();
146 for pack in &packed_streams {
147 let offset = output.len();
148 objstm_offsets.insert(pack.container_objnum, offset);
149 write_objstm_container(&mut output, pack);
150 }
151
152 let mut rows: Vec<XrefRow> = vec![XrefRow::Free; xref_size as usize];
154 for (object_ref, _) in &direct {
155 if let Some(offset) = direct_offsets.get(&object_ref.object_number).copied() {
156 rows[object_ref.object_number as usize] = XrefRow::Direct {
157 offset,
158 generation: object_ref.generation,
159 };
160 }
161 }
162 for pack in &packed_streams {
163 for (member_ref, index) in &pack.members {
164 rows[member_ref.object_number as usize] = XrefRow::InObjStm {
165 stream_objnum: pack.container_objnum,
166 index: *index,
167 };
168 }
169 if let Some(offset) = objstm_offsets.get(&pack.container_objnum).copied() {
170 rows[pack.container_objnum as usize] = XrefRow::Direct {
171 offset,
172 generation: 0,
173 };
174 }
175 }
176
177 let max_offset = direct_offsets
179 .values()
180 .chain(objstm_offsets.values())
181 .copied()
182 .max()
183 .unwrap_or(0);
184 let max_member_index = packed_streams
185 .iter()
186 .flat_map(|p| p.members.iter().map(|(_, i)| *i))
187 .max()
188 .unwrap_or(0)
189 .max(file.max_object_number);
190 let widths = xref_entry_widths(max_offset, max_member_index);
191 let xref_data = build_xref_stream_data(&rows, widths);
192
193 let mut xref_dict = file.trailer.clone();
196 for key in [
197 "Prev",
198 "XRefStm",
199 "Encrypt",
200 "Length",
201 "Filter",
202 "DecodeParms",
203 "W",
204 "Index",
205 "Type",
206 ] {
207 xref_dict.remove(key);
208 }
209 xref_dict.insert("Type".to_string(), PdfValue::Name("XRef".to_string()));
210 xref_dict.insert("Size".to_string(), PdfValue::Integer(xref_size as i64));
211 xref_dict.insert(
212 "W".to_string(),
213 PdfValue::Array(
214 widths
215 .iter()
216 .map(|w| PdfValue::Integer(i64::from(*w)))
217 .collect(),
218 ),
219 );
220 xref_dict.insert(
221 "Filter".to_string(),
222 PdfValue::Name("FlateDecode".to_string()),
223 );
224
225 let compressed_xref =
227 flate_encode(&xref_data).expect("flate_encode is infallible for in-memory buffers");
228 xref_dict.insert(
229 "Length".to_string(),
230 PdfValue::Integer(compressed_xref.len() as i64),
231 );
232
233 let startxref = output.len();
235 output.extend_from_slice(format!("{} 0 obj\n", xref_stream_objnum).as_bytes());
236 output.extend_from_slice(serialize_dictionary(&xref_dict).as_bytes());
237 output.extend_from_slice(b"\nstream\n");
238 output.extend_from_slice(&compressed_xref);
239 output.extend_from_slice(b"\nendstream\nendobj\n");
240
241 output.extend_from_slice(format!("startxref\n{startxref}\n%%EOF\n").as_bytes());
243 output
244}
245
246fn write_indirect_object(output: &mut Vec<u8>, object_ref: ObjectRef, object: &PdfObject) {
247 output.extend_from_slice(
248 format!(
249 "{} {} obj\n",
250 object_ref.object_number, object_ref.generation
251 )
252 .as_bytes(),
253 );
254 match object {
255 PdfObject::Value(value) => {
256 output.extend_from_slice(serialize_value(value).as_bytes());
257 output.extend_from_slice(b"\nendobj\n");
258 }
259 PdfObject::Stream(stream) => {
260 let mut dict = stream.dict.clone();
261 dict.insert(
262 "Length".to_string(),
263 PdfValue::Integer(stream.data.len() as i64),
264 );
265 output.extend_from_slice(serialize_dictionary(&dict).as_bytes());
266 output.extend_from_slice(b"\nstream\n");
267 output.extend_from_slice(&stream.data);
268 if !stream.data.ends_with(b"\n") {
269 output.push(b'\n');
270 }
271 output.extend_from_slice(b"endstream\nendobj\n");
272 }
273 }
274}
275
276fn write_objstm_container(output: &mut Vec<u8>, pack: &PackedObjStm) {
277 let mut dict = PdfDictionary::new();
278 dict.insert("Type".to_string(), PdfValue::Name("ObjStm".to_string()));
279 dict.insert(
280 "N".to_string(),
281 PdfValue::Integer(pack.members.len() as i64),
282 );
283 dict.insert("First".to_string(), PdfValue::Integer(pack.first as i64));
284 dict.insert(
285 "Filter".to_string(),
286 PdfValue::Name("FlateDecode".to_string()),
287 );
288 dict.insert(
289 "Length".to_string(),
290 PdfValue::Integer(pack.body.len() as i64),
291 );
292 output.extend_from_slice(format!("{} 0 obj\n", pack.container_objnum).as_bytes());
293 output.extend_from_slice(serialize_dictionary(&dict).as_bytes());
294 output.extend_from_slice(b"\nstream\n");
295 output.extend_from_slice(&pack.body);
296 if !pack.body.ends_with(b"\n") {
297 output.push(b'\n');
298 }
299 output.extend_from_slice(b"endstream\nendobj\n");
300}
301
302fn pack_objstm_chunk(container_objnum: u32, chunk: &[(ObjectRef, &PdfValue)]) -> PackedObjStm {
303 let mut header = String::new();
307 let mut body_text = String::new();
308 let mut members: Vec<(ObjectRef, u32)> = Vec::new();
309 let mut running_offset = 0usize;
310 for (index, (object_ref, value)) in chunk.iter().enumerate() {
311 write!(header, "{} {} ", object_ref.object_number, running_offset)
312 .expect("string writes should succeed");
313 let serialized = serialize_value(value);
314 body_text.push_str(&serialized);
315 body_text.push(' ');
316 running_offset += serialized.len() + 1;
317 members.push((*object_ref, index as u32));
318 }
319 let header_bytes = header.into_bytes();
320 let first = header_bytes.len();
321 let mut decompressed = header_bytes;
322 decompressed.extend_from_slice(body_text.as_bytes());
323 let body =
324 flate_encode(&decompressed).expect("flate_encode is infallible for in-memory buffers");
325 PackedObjStm {
326 container_objnum,
327 body,
328 first,
329 members,
330 }
331}
332
333fn xref_entry_widths(max_offset: usize, max_member_index: u32) -> [u8; 3] {
334 let field2 = bytes_to_fit(max_offset as u64).max(1);
335 let field3 = bytes_to_fit(u64::from(max_member_index)).max(1);
336 [1, field2, field3]
337}
338
339fn bytes_to_fit(value: u64) -> u8 {
340 if value == 0 {
341 return 1;
342 }
343 let mut bits = 0u32;
344 let mut v = value;
345 while v > 0 {
346 bits += 1;
347 v >>= 1;
348 }
349 bits.div_ceil(8) as u8
350}
351
352fn build_xref_stream_data(rows: &[XrefRow], widths: [u8; 3]) -> Vec<u8> {
353 let mut output = Vec::with_capacity(rows.len() * (widths[0] + widths[1] + widths[2]) as usize);
354 for row in rows {
355 match row {
356 XrefRow::Free => {
357 push_be(&mut output, 0, widths[0]);
358 push_be(&mut output, 0, widths[1]);
359 push_be(&mut output, 0, widths[2]);
360 }
361 XrefRow::Direct { offset, generation } => {
362 push_be(&mut output, 1, widths[0]);
363 push_be(&mut output, *offset as u64, widths[1]);
364 push_be(&mut output, u64::from(*generation), widths[2]);
365 }
366 XrefRow::InObjStm {
367 stream_objnum,
368 index,
369 } => {
370 push_be(&mut output, 2, widths[0]);
371 push_be(&mut output, u64::from(*stream_objnum), widths[1]);
372 push_be(&mut output, u64::from(*index), widths[2]);
373 }
374 }
375 }
376 output
377}
378
379fn push_be(output: &mut Vec<u8>, value: u64, width: u8) {
380 let width = width as usize;
381 for i in (0..width).rev() {
382 output.push(((value >> (i * 8)) & 0xff) as u8);
383 }
384}
385
386pub fn serialize_value(value: &PdfValue) -> String {
387 match value {
388 PdfValue::Null => "null".to_string(),
389 PdfValue::Bool(value) => value.to_string(),
390 PdfValue::Integer(value) => value.to_string(),
391 PdfValue::Number(value) => {
392 if value.fract() == 0.0 {
393 format!("{:.0}", value)
394 } else {
395 let mut number = format!("{value:.6}");
396 while number.contains('.') && number.ends_with('0') {
397 number.pop();
398 }
399 if number.ends_with('.') {
400 number.pop();
401 }
402 number
403 }
404 }
405 PdfValue::Name(name) => {
406 let mut encoded = String::from("/");
407 for byte in name.bytes() {
408 if byte == b'#'
409 || byte <= b' '
410 || byte >= 0x7F
411 || matches!(
412 byte,
413 b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%'
414 )
415 {
416 encoded.push_str(&format!("#{:02X}", byte));
417 } else {
418 encoded.push(byte as char);
419 }
420 }
421 encoded
422 }
423 PdfValue::String(string) => serialize_string(string),
424 PdfValue::Array(values) => format!(
425 "[{}]",
426 values
427 .iter()
428 .map(serialize_value)
429 .collect::<Vec<_>>()
430 .join(" ")
431 ),
432 PdfValue::Dictionary(dictionary) => serialize_dictionary(dictionary),
433 PdfValue::Reference(object_ref) => {
434 format!("{} {} R", object_ref.object_number, object_ref.generation)
435 }
436 }
437}
438
439pub fn serialize_dictionary(dictionary: &PdfDictionary) -> String {
440 let mut output = String::from("<<");
441 for (key, value) in dictionary {
442 write!(output, "/{} {}", key, serialize_value(value))
443 .expect("string writes should succeed");
444 output.push(' ');
445 }
446 output.push_str(">>");
447 output
448}
449
450pub fn serialize_string(string: &PdfString) -> String {
451 let mut output = String::from("(");
452 for byte in &string.0 {
453 match byte {
454 b'(' | b')' | b'\\' => {
455 output.push('\\');
456 output.push(*byte as char);
457 }
458 b'\n' => output.push_str("\\n"),
459 b'\r' => output.push_str("\\r"),
460 b'\t' => output.push_str("\\t"),
461 0x08 => output.push_str("\\b"),
462 0x0C => output.push_str("\\f"),
463 byte if byte.is_ascii_graphic() || *byte == b' ' => output.push(*byte as char),
464 other => output.push_str(&format!("\\{:03o}", other)),
465 }
466 }
467 output.push(')');
468 output
469}
470
471#[cfg(test)]
472mod tests {
473 use super::*;
474
475 #[test]
476 fn xref_entry_widths_picks_minimal_field_widths() {
477 assert_eq!(xref_entry_widths(0, 0), [1, 1, 1]);
479 assert_eq!(xref_entry_widths(255, 250), [1, 1, 1]);
480 assert_eq!(xref_entry_widths(256, 0), [1, 2, 1]);
482 assert_eq!(xref_entry_widths(65_535, 65_535), [1, 2, 2]);
484 assert_eq!(xref_entry_widths(65_536, 65_536), [1, 3, 3]);
485 assert_eq!(xref_entry_widths(16_777_215, 0), [1, 3, 1]);
487 assert_eq!(xref_entry_widths(16_777_216, 0), [1, 4, 1]);
488 }
489
490 #[test]
491 fn pack_objstm_chunk_preserves_member_indices() {
492 let v1 = PdfValue::Integer(42);
493 let v2 = PdfValue::Name("Foo".to_string());
494 let v3 = PdfValue::Bool(true);
495 let chunk: Vec<(ObjectRef, &PdfValue)> = vec![
496 (ObjectRef::new(7, 0), &v1),
497 (ObjectRef::new(8, 0), &v2),
498 (ObjectRef::new(9, 0), &v3),
499 ];
500 let pack = pack_objstm_chunk(100, &chunk);
501 assert_eq!(pack.container_objnum, 100);
502 assert_eq!(pack.members.len(), 3);
503 assert_eq!(pack.members[0].1, 0);
504 assert_eq!(pack.members[1].1, 1);
505 assert_eq!(pack.members[2].1, 2);
506 assert_eq!(pack.members[0].0.object_number, 7);
507 assert_eq!(pack.members[1].0.object_number, 8);
508 assert_eq!(pack.members[2].0.object_number, 9);
509 assert!(pack.first > 0, "ObjStm header must have positive length");
510 }
511
512 #[test]
513 fn build_xref_stream_data_serialises_widths_big_endian() {
514 let rows = vec![
515 XrefRow::Free,
516 XrefRow::Direct {
517 offset: 0x1234,
518 generation: 0,
519 },
520 XrefRow::InObjStm {
521 stream_objnum: 5,
522 index: 3,
523 },
524 ];
525 let widths = [1u8, 2u8, 1u8];
526 let data = build_xref_stream_data(&rows, widths);
527 assert_eq!(data.len(), 12);
529 assert_eq!(&data[0..4], &[0, 0, 0, 0]);
531 assert_eq!(&data[4..8], &[1, 0x12, 0x34, 0]);
533 assert_eq!(&data[8..12], &[2, 0, 5, 3]);
535 }
536}