1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
6use chrono::{DateTime, Utc};
7use std::collections::HashMap;
8use std::io::{BufWriter, Write};
9use std::path::Path;
10
11#[derive(Debug, Clone)]
13pub struct WriterConfig {
14 pub use_xref_streams: bool,
16 pub use_object_streams: bool,
18 pub pdf_version: String,
20 pub compress_streams: bool,
22 pub incremental_update: bool,
24}
25
26impl Default for WriterConfig {
27 fn default() -> Self {
28 Self {
29 use_xref_streams: false,
30 use_object_streams: false,
31 pdf_version: "1.7".to_string(),
32 compress_streams: true,
33 incremental_update: false,
34 }
35 }
36}
37
38impl WriterConfig {
39 pub fn modern() -> Self {
41 Self {
42 use_xref_streams: true,
43 use_object_streams: true,
44 pdf_version: "1.5".to_string(),
45 compress_streams: true,
46 incremental_update: false,
47 }
48 }
49
50 pub fn legacy() -> Self {
52 Self {
53 use_xref_streams: false,
54 use_object_streams: false,
55 pdf_version: "1.4".to_string(),
56 compress_streams: true,
57 incremental_update: false,
58 }
59 }
60
61 pub fn incremental() -> Self {
63 Self {
64 use_xref_streams: false,
65 use_object_streams: false,
66 pdf_version: "1.4".to_string(),
67 compress_streams: true,
68 incremental_update: true,
69 }
70 }
71}
72
73pub struct PdfWriter<W: Write> {
74 writer: W,
75 xref_positions: HashMap<ObjectId, u64>,
76 current_position: u64,
77 next_object_id: u32,
78 catalog_id: Option<ObjectId>,
80 pages_id: Option<ObjectId>,
81 info_id: Option<ObjectId>,
82 #[allow(dead_code)]
84 field_widget_map: HashMap<String, Vec<ObjectId>>, #[allow(dead_code)]
86 field_id_map: HashMap<String, ObjectId>, form_field_ids: Vec<ObjectId>, page_ids: Vec<ObjectId>, config: WriterConfig,
91 document_used_chars: Option<std::collections::HashSet<char>>,
93 buffered_objects: HashMap<ObjectId, Vec<u8>>,
95 compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, prev_xref_offset: Option<u64>,
98 base_pdf_size: Option<u64>,
99}
100
101impl<W: Write> PdfWriter<W> {
102 pub fn new_with_writer(writer: W) -> Self {
103 Self::with_config(writer, WriterConfig::default())
104 }
105
106 pub fn with_config(writer: W, config: WriterConfig) -> Self {
107 Self {
108 writer,
109 xref_positions: HashMap::new(),
110 current_position: 0,
111 next_object_id: 1, catalog_id: None,
113 pages_id: None,
114 info_id: None,
115 field_widget_map: HashMap::new(),
116 field_id_map: HashMap::new(),
117 form_field_ids: Vec::new(),
118 page_ids: Vec::new(),
119 config,
120 document_used_chars: None,
121 buffered_objects: HashMap::new(),
122 compressed_object_map: HashMap::new(),
123 prev_xref_offset: None,
124 base_pdf_size: None,
125 }
126 }
127
128 pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
129 if !document.used_characters.is_empty() {
131 self.document_used_chars = Some(document.used_characters.clone());
132 }
133
134 self.write_header()?;
135
136 self.catalog_id = Some(self.allocate_object_id());
138 self.pages_id = Some(self.allocate_object_id());
139 self.info_id = Some(self.allocate_object_id());
140
141 let font_refs = self.write_fonts(document)?;
143
144 self.write_pages(document, &font_refs)?;
146
147 self.write_form_fields(document)?;
149
150 self.write_catalog(document)?;
152
153 self.write_info(document)?;
155
156 if self.config.use_object_streams {
158 self.flush_object_streams()?;
159 }
160
161 let xref_position = self.current_position;
163 if self.config.use_xref_streams {
164 self.write_xref_stream()?;
165 } else {
166 self.write_xref()?;
167 }
168
169 if !self.config.use_xref_streams {
171 self.write_trailer(xref_position)?;
172 }
173
174 if let Ok(()) = self.writer.flush() {
175 }
177 Ok(())
178 }
179
180 pub fn write_incremental_update(
214 &mut self,
215 base_pdf_path: impl AsRef<std::path::Path>,
216 document: &mut Document,
217 ) -> Result<()> {
218 use std::io::{BufReader, Read, Seek, SeekFrom};
219
220 let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
222 let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
223
224 let base_catalog = pdf_reader.catalog()?;
226
227 let (base_pages_id, base_pages_gen) = base_catalog
229 .get("Pages")
230 .and_then(|obj| {
231 if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
232 Some((*id, *gen))
233 } else {
234 None
235 }
236 })
237 .ok_or_else(|| {
238 crate::error::PdfError::InvalidStructure(
239 "Base PDF catalog missing /Pages reference".to_string(),
240 )
241 })?;
242
243 let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
245 let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
246 base_pages_obj
247 {
248 dict.get("Kids")
249 .and_then(|obj| {
250 if let crate::parser::objects::PdfObject::Array(arr) = obj {
251 Some(
254 arr.0
255 .iter()
256 .filter_map(|item| {
257 if let crate::parser::objects::PdfObject::Reference(id, gen) =
258 item
259 {
260 Some(crate::objects::Object::Reference(
261 crate::objects::ObjectId::new(*id, *gen),
262 ))
263 } else {
264 None
265 }
266 })
267 .collect::<Vec<_>>(),
268 )
269 } else {
270 None
271 }
272 })
273 .unwrap_or_default()
274 } else {
275 Vec::new()
276 };
277
278 let base_page_count = base_pages_kids.len();
280
281 let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
283 let mut base_reader = BufReader::new(base_pdf);
284
285 base_reader.seek(SeekFrom::End(-100))?;
287 let mut end_buffer = vec![0u8; 100];
288 let bytes_read = base_reader.read(&mut end_buffer)?;
289 end_buffer.truncate(bytes_read);
290
291 let end_str = String::from_utf8_lossy(&end_buffer);
292 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
293 let after_startxref = &end_str[startxref_pos + 9..];
294
295 let number_str: String = after_startxref
296 .chars()
297 .skip_while(|c| c.is_whitespace())
298 .take_while(|c| c.is_ascii_digit())
299 .collect();
300
301 number_str.parse::<u64>().map_err(|_| {
302 crate::error::PdfError::InvalidStructure(
303 "Could not parse startxref offset".to_string(),
304 )
305 })?
306 } else {
307 return Err(crate::error::PdfError::InvalidStructure(
308 "startxref not found in base PDF".to_string(),
309 ));
310 };
311
312 base_reader.seek(SeekFrom::Start(0))?;
314 let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
315
316 self.prev_xref_offset = Some(prev_xref);
318 self.base_pdf_size = Some(base_size);
319 self.current_position = base_size;
320
321 if !document.used_characters.is_empty() {
323 self.document_used_chars = Some(document.used_characters.clone());
324 }
325
326 self.catalog_id = Some(self.allocate_object_id());
328 self.pages_id = Some(self.allocate_object_id());
329 self.info_id = Some(self.allocate_object_id());
330
331 let font_refs = self.write_fonts(document)?;
333
334 self.write_pages(document, &font_refs)?;
336
337 self.write_form_fields(document)?;
339
340 let catalog_id = self.get_catalog_id()?;
342 let new_pages_id = self.get_pages_id()?;
343
344 let mut catalog = crate::objects::Dictionary::new();
345 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
346 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
347
348 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
353
354 let mut all_pages_kids = base_pages_kids;
356
357 for page_id in &self.page_ids {
359 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
360 }
361
362 let mut pages_dict = crate::objects::Dictionary::new();
363 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
364 pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
365 pages_dict.set(
366 "Count",
367 crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
368 );
369
370 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
371
372 self.write_info(document)?;
374
375 let xref_position = self.current_position;
377 self.write_xref()?;
378
379 self.write_trailer(xref_position)?;
381
382 self.writer.flush()?;
383 Ok(())
384 }
385
386 pub fn write_incremental_with_page_replacement(
452 &mut self,
453 base_pdf_path: impl AsRef<std::path::Path>,
454 document: &mut Document,
455 ) -> Result<()> {
456 use std::io::Cursor;
457
458 let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
460 let base_size = base_pdf_bytes.len() as u64;
461
462 let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
464
465 let base_catalog = pdf_reader.catalog()?;
466
467 let (base_pages_id, base_pages_gen) = base_catalog
468 .get("Pages")
469 .and_then(|obj| {
470 if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
471 Some((*id, *gen))
472 } else {
473 None
474 }
475 })
476 .ok_or_else(|| {
477 crate::error::PdfError::InvalidStructure(
478 "Base PDF catalog missing /Pages reference".to_string(),
479 )
480 })?;
481
482 let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
483 let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
484 base_pages_obj
485 {
486 dict.get("Kids")
487 .and_then(|obj| {
488 if let crate::parser::objects::PdfObject::Array(arr) = obj {
489 Some(
490 arr.0
491 .iter()
492 .filter_map(|item| {
493 if let crate::parser::objects::PdfObject::Reference(id, gen) =
494 item
495 {
496 Some(crate::objects::Object::Reference(
497 crate::objects::ObjectId::new(*id, *gen),
498 ))
499 } else {
500 None
501 }
502 })
503 .collect::<Vec<_>>(),
504 )
505 } else {
506 None
507 }
508 })
509 .unwrap_or_default()
510 } else {
511 Vec::new()
512 };
513
514 let base_page_count = base_pages_kids.len();
515
516 let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
518 let end_bytes = &base_pdf_bytes[start_search..];
519 let end_str = String::from_utf8_lossy(end_bytes);
520
521 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
522 let after_startxref = &end_str[startxref_pos + 9..];
523 let number_str: String = after_startxref
524 .chars()
525 .skip_while(|c| c.is_whitespace())
526 .take_while(|c| c.is_ascii_digit())
527 .collect();
528
529 number_str.parse::<u64>().map_err(|_| {
530 crate::error::PdfError::InvalidStructure(
531 "Could not parse startxref offset".to_string(),
532 )
533 })?
534 } else {
535 return Err(crate::error::PdfError::InvalidStructure(
536 "startxref not found in base PDF".to_string(),
537 ));
538 };
539
540 self.writer.write_all(&base_pdf_bytes)?;
542
543 self.prev_xref_offset = Some(prev_xref);
544 self.base_pdf_size = Some(base_size);
545 self.current_position = base_size;
546
547 if !document.used_characters.is_empty() {
549 self.document_used_chars = Some(document.used_characters.clone());
550 }
551
552 self.catalog_id = Some(self.allocate_object_id());
553 self.pages_id = Some(self.allocate_object_id());
554 self.info_id = Some(self.allocate_object_id());
555
556 let font_refs = self.write_fonts(document)?;
557 self.write_pages(document, &font_refs)?;
558 self.write_form_fields(document)?;
559
560 let catalog_id = self.get_catalog_id()?;
562 let new_pages_id = self.get_pages_id()?;
563
564 let mut catalog = crate::objects::Dictionary::new();
565 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
566 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
567 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
568
569 let mut all_pages_kids = Vec::new();
571 let replacement_count = document.pages.len();
572
573 for page_id in &self.page_ids {
575 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
576 }
577
578 if replacement_count < base_page_count {
580 for i in replacement_count..base_page_count {
581 if let Some(page_ref) = base_pages_kids.get(i) {
582 all_pages_kids.push(page_ref.clone());
583 }
584 }
585 }
586
587 let mut pages_dict = crate::objects::Dictionary::new();
588 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
589 pages_dict.set(
590 "Kids",
591 crate::objects::Object::Array(all_pages_kids.clone()),
592 );
593 pages_dict.set(
594 "Count",
595 crate::objects::Object::Integer(all_pages_kids.len() as i64),
596 );
597
598 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
599 self.write_info(document)?;
600
601 let xref_position = self.current_position;
602 self.write_xref()?;
603 self.write_trailer(xref_position)?;
604
605 self.writer.flush()?;
606 Ok(())
607 }
608
609 pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
657 &mut self,
658 base_pdf_path: P,
659 mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
660 ) -> Result<()> {
661 use std::io::Cursor;
662
663 let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
665 let base_size = base_pdf_bytes.len() as u64;
666
667 let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
669 let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
670
671 let page_count = parsed_doc.page_count()?;
673
674 let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
676 let end_bytes = &base_pdf_bytes[start_search..];
677 let end_str = String::from_utf8_lossy(end_bytes);
678
679 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
680 let after_startxref = &end_str[startxref_pos + 9..];
681 let number_str: String = after_startxref
682 .chars()
683 .skip_while(|c| c.is_whitespace())
684 .take_while(|c| c.is_ascii_digit())
685 .collect();
686
687 number_str.parse::<u64>().map_err(|_| {
688 crate::error::PdfError::InvalidStructure(
689 "Could not parse startxref offset".to_string(),
690 )
691 })?
692 } else {
693 return Err(crate::error::PdfError::InvalidStructure(
694 "startxref not found in base PDF".to_string(),
695 ));
696 };
697
698 self.writer.write_all(&base_pdf_bytes)?;
700
701 self.prev_xref_offset = Some(prev_xref);
702 self.base_pdf_size = Some(base_size);
703 self.current_position = base_size;
704
705 let mut temp_doc = crate::Document::new();
707
708 for page_idx in 0..page_count {
709 let parsed_page = parsed_doc.get_page(page_idx)?;
711 let mut writable_page =
712 crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
713
714 overlay_fn(&mut writable_page)?;
716
717 temp_doc.add_page(writable_page);
719 }
720
721 if !temp_doc.used_characters.is_empty() {
724 self.document_used_chars = Some(temp_doc.used_characters.clone());
725 }
726
727 self.catalog_id = Some(self.allocate_object_id());
728 self.pages_id = Some(self.allocate_object_id());
729 self.info_id = Some(self.allocate_object_id());
730
731 let font_refs = self.write_fonts(&temp_doc)?;
732 self.write_pages(&temp_doc, &font_refs)?;
733 self.write_form_fields(&mut temp_doc)?;
734
735 let catalog_id = self.get_catalog_id()?;
737 let new_pages_id = self.get_pages_id()?;
738
739 let mut catalog = crate::objects::Dictionary::new();
740 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
741 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
742 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
743
744 let mut all_pages_kids = Vec::new();
746 for page_id in &self.page_ids {
747 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
748 }
749
750 let mut pages_dict = crate::objects::Dictionary::new();
751 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
752 pages_dict.set(
753 "Kids",
754 crate::objects::Object::Array(all_pages_kids.clone()),
755 );
756 pages_dict.set(
757 "Count",
758 crate::objects::Object::Integer(all_pages_kids.len() as i64),
759 );
760
761 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
762 self.write_info(&temp_doc)?;
763
764 let xref_position = self.current_position;
765 self.write_xref()?;
766 self.write_trailer(xref_position)?;
767
768 self.writer.flush()?;
769 Ok(())
770 }
771
772 fn write_header(&mut self) -> Result<()> {
773 let header = format!("%PDF-{}\n", self.config.pdf_version);
774 self.write_bytes(header.as_bytes())?;
775 self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
777 Ok(())
778 }
779
780 fn convert_pdf_objects_dict_to_writer(
783 &self,
784 pdf_dict: &crate::pdf_objects::Dictionary,
785 ) -> crate::objects::Dictionary {
786 let mut writer_dict = crate::objects::Dictionary::new();
787
788 for (key, value) in pdf_dict.iter() {
789 let writer_obj = self.convert_pdf_object_to_writer(value);
790 writer_dict.set(key.as_str(), writer_obj);
791 }
792
793 writer_dict
794 }
795
796 fn convert_pdf_object_to_writer(
797 &self,
798 obj: &crate::pdf_objects::Object,
799 ) -> crate::objects::Object {
800 use crate::objects::Object as WriterObj;
801 use crate::pdf_objects::Object as PdfObj;
802
803 match obj {
804 PdfObj::Null => WriterObj::Null,
805 PdfObj::Boolean(b) => WriterObj::Boolean(*b),
806 PdfObj::Integer(i) => WriterObj::Integer(*i),
807 PdfObj::Real(f) => WriterObj::Real(*f),
808 PdfObj::String(s) => {
809 WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
810 }
811 PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
812 PdfObj::Array(arr) => {
813 let items: Vec<WriterObj> = arr
814 .iter()
815 .map(|item| self.convert_pdf_object_to_writer(item))
816 .collect();
817 WriterObj::Array(items)
818 }
819 PdfObj::Dictionary(dict) => {
820 WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
821 }
822 PdfObj::Stream(stream) => {
823 let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
824 WriterObj::Stream(dict, stream.data.clone())
825 }
826 PdfObj::Reference(id) => {
827 WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
828 }
829 }
830 }
831
832 fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
833 let catalog_id = self.get_catalog_id()?;
834 let pages_id = self.get_pages_id()?;
835
836 let mut catalog = Dictionary::new();
837 catalog.set("Type", Object::Name("Catalog".to_string()));
838 catalog.set("Pages", Object::Reference(pages_id));
839
840 if let Some(_form_manager) = &document.form_manager {
843 if document.acro_form.is_none() {
845 document.acro_form = Some(crate::forms::AcroForm::new());
846 }
847 }
848
849 if let Some(acro_form) = &document.acro_form {
851 let acro_form_id = self.allocate_object_id();
853
854 self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
856
857 catalog.set("AcroForm", Object::Reference(acro_form_id));
859 }
860
861 if let Some(outline_tree) = &document.outline {
863 if !outline_tree.items.is_empty() {
864 let outline_root_id = self.write_outline_tree(outline_tree)?;
865 catalog.set("Outlines", Object::Reference(outline_root_id));
866 }
867 }
868
869 if let Some(struct_tree) = &document.struct_tree {
871 if !struct_tree.is_empty() {
872 let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
873 catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
874 catalog.set("MarkInfo", {
876 let mut mark_info = Dictionary::new();
877 mark_info.set("Marked", Object::Boolean(true));
878 Object::Dictionary(mark_info)
879 });
880 }
881 }
882
883 let xmp_metadata = document.create_xmp_metadata();
886 let xmp_packet = xmp_metadata.to_xmp_packet();
887 let metadata_id = self.allocate_object_id();
888
889 let mut metadata_dict = Dictionary::new();
891 metadata_dict.set("Type", Object::Name("Metadata".to_string()));
892 metadata_dict.set("Subtype", Object::Name("XML".to_string()));
893 metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
894
895 self.write_object(
897 metadata_id,
898 Object::Stream(metadata_dict, xmp_packet.into_bytes()),
899 )?;
900
901 catalog.set("Metadata", Object::Reference(metadata_id));
903
904 self.write_object(catalog_id, Object::Dictionary(catalog))?;
905 Ok(())
906 }
907
908 fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
909 let mut page_copy = page.clone();
910 let content = page_copy.generate_content()?;
911
912 #[cfg(feature = "compression")]
914 {
915 use crate::objects::Stream;
916 let mut stream = Stream::new(content);
917 if self.config.compress_streams {
919 stream.compress_flate()?;
920 }
921
922 self.write_object(
923 content_id,
924 Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
925 )?;
926 }
927
928 #[cfg(not(feature = "compression"))]
929 {
930 let mut stream_dict = Dictionary::new();
931 stream_dict.set("Length", Object::Integer(content.len() as i64));
932
933 self.write_object(content_id, Object::Stream(stream_dict, content))?;
934 }
935
936 Ok(())
937 }
938
939 fn write_outline_tree(
940 &mut self,
941 outline_tree: &crate::structure::OutlineTree,
942 ) -> Result<ObjectId> {
943 let outline_root_id = self.allocate_object_id();
945
946 let mut outline_root = Dictionary::new();
947 outline_root.set("Type", Object::Name("Outlines".to_string()));
948
949 if !outline_tree.items.is_empty() {
950 let mut item_ids = Vec::new();
952
953 fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
955 let mut count = items.len();
956 for item in items {
957 count += count_items(&item.children);
958 }
959 count
960 }
961
962 let total_items = count_items(&outline_tree.items);
963
964 for _ in 0..total_items {
966 item_ids.push(self.allocate_object_id());
967 }
968
969 let mut id_index = 0;
970
971 let first_id = item_ids[0];
973 let last_id = item_ids[outline_tree.items.len() - 1];
974
975 outline_root.set("First", Object::Reference(first_id));
976 outline_root.set("Last", Object::Reference(last_id));
977
978 let visible_count = outline_tree.visible_count();
980 outline_root.set("Count", Object::Integer(visible_count));
981
982 let mut written_items = Vec::new();
984
985 for (i, item) in outline_tree.items.iter().enumerate() {
986 let item_id = item_ids[id_index];
987 id_index += 1;
988
989 let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
990 let next_id = if i < outline_tree.items.len() - 1 {
991 Some(item_ids[i + 1])
992 } else {
993 None
994 };
995
996 let children_ids = self.write_outline_item(
998 item,
999 item_id,
1000 outline_root_id,
1001 prev_id,
1002 next_id,
1003 &mut item_ids,
1004 &mut id_index,
1005 )?;
1006
1007 written_items.extend(children_ids);
1008 }
1009 }
1010
1011 self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1012 Ok(outline_root_id)
1013 }
1014
1015 #[allow(clippy::too_many_arguments)]
1016 fn write_outline_item(
1017 &mut self,
1018 item: &crate::structure::OutlineItem,
1019 item_id: ObjectId,
1020 parent_id: ObjectId,
1021 prev_id: Option<ObjectId>,
1022 next_id: Option<ObjectId>,
1023 all_ids: &mut Vec<ObjectId>,
1024 id_index: &mut usize,
1025 ) -> Result<Vec<ObjectId>> {
1026 let mut written_ids = vec![item_id];
1027
1028 let (first_child_id, last_child_id) = if !item.children.is_empty() {
1030 let first_idx = *id_index;
1031 let first_id = all_ids[first_idx];
1032 let last_idx = first_idx + item.children.len() - 1;
1033 let last_id = all_ids[last_idx];
1034
1035 for (i, child) in item.children.iter().enumerate() {
1037 let child_id = all_ids[*id_index];
1038 *id_index += 1;
1039
1040 let child_prev = if i > 0 {
1041 Some(all_ids[first_idx + i - 1])
1042 } else {
1043 None
1044 };
1045 let child_next = if i < item.children.len() - 1 {
1046 Some(all_ids[first_idx + i + 1])
1047 } else {
1048 None
1049 };
1050
1051 let child_ids = self.write_outline_item(
1052 child, child_id, item_id, child_prev, child_next, all_ids, id_index,
1054 )?;
1055
1056 written_ids.extend(child_ids);
1057 }
1058
1059 (Some(first_id), Some(last_id))
1060 } else {
1061 (None, None)
1062 };
1063
1064 let item_dict = crate::structure::outline_item_to_dict(
1066 item,
1067 parent_id,
1068 first_child_id,
1069 last_child_id,
1070 prev_id,
1071 next_id,
1072 );
1073
1074 self.write_object(item_id, Object::Dictionary(item_dict))?;
1075
1076 Ok(written_ids)
1077 }
1078
1079 fn write_struct_tree(
1081 &mut self,
1082 struct_tree: &crate::structure::StructTree,
1083 ) -> Result<ObjectId> {
1084 let struct_tree_root_id = self.allocate_object_id();
1086 let mut element_ids = Vec::new();
1087 for _ in 0..struct_tree.len() {
1088 element_ids.push(self.allocate_object_id());
1089 }
1090
1091 let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1093 std::collections::HashMap::new();
1094
1095 if let Some(root_index) = struct_tree.root_index() {
1097 parent_map.insert(root_index, struct_tree_root_id);
1098
1099 fn map_children_parents(
1101 tree: &crate::structure::StructTree,
1102 parent_index: usize,
1103 parent_id: ObjectId,
1104 element_ids: &[ObjectId],
1105 parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1106 ) {
1107 if let Some(parent_elem) = tree.get(parent_index) {
1108 for &child_index in &parent_elem.children {
1109 parent_map.insert(child_index, parent_id);
1110 map_children_parents(
1111 tree,
1112 child_index,
1113 element_ids[child_index],
1114 element_ids,
1115 parent_map,
1116 );
1117 }
1118 }
1119 }
1120
1121 map_children_parents(
1122 struct_tree,
1123 root_index,
1124 element_ids[root_index],
1125 &element_ids,
1126 &mut parent_map,
1127 );
1128 }
1129
1130 for (index, element) in struct_tree.iter().enumerate() {
1132 let element_id = element_ids[index];
1133 let mut element_dict = Dictionary::new();
1134
1135 element_dict.set("Type", Object::Name("StructElem".to_string()));
1136 element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1137
1138 if let Some(&parent_id) = parent_map.get(&index) {
1140 element_dict.set("P", Object::Reference(parent_id));
1141 }
1142
1143 if let Some(ref id) = element.id {
1145 element_dict.set("ID", Object::String(id.clone()));
1146 }
1147
1148 if let Some(ref lang) = element.attributes.lang {
1150 element_dict.set("Lang", Object::String(lang.clone()));
1151 }
1152 if let Some(ref alt) = element.attributes.alt {
1153 element_dict.set("Alt", Object::String(alt.clone()));
1154 }
1155 if let Some(ref actual_text) = element.attributes.actual_text {
1156 element_dict.set("ActualText", Object::String(actual_text.clone()));
1157 }
1158 if let Some(ref title) = element.attributes.title {
1159 element_dict.set("T", Object::String(title.clone()));
1160 }
1161 if let Some(bbox) = element.attributes.bbox {
1162 element_dict.set(
1163 "BBox",
1164 Object::Array(vec![
1165 Object::Real(bbox[0]),
1166 Object::Real(bbox[1]),
1167 Object::Real(bbox[2]),
1168 Object::Real(bbox[3]),
1169 ]),
1170 );
1171 }
1172
1173 let mut kids = Vec::new();
1175
1176 for &child_index in &element.children {
1178 kids.push(Object::Reference(element_ids[child_index]));
1179 }
1180
1181 for mcid_ref in &element.mcids {
1183 let mut mcr = Dictionary::new();
1184 mcr.set("Type", Object::Name("MCR".to_string()));
1185 mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1186 mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1187 kids.push(Object::Dictionary(mcr));
1188 }
1189
1190 if !kids.is_empty() {
1191 element_dict.set("K", Object::Array(kids));
1192 }
1193
1194 self.write_object(element_id, Object::Dictionary(element_dict))?;
1195 }
1196
1197 let mut struct_tree_root = Dictionary::new();
1199 struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1200
1201 if let Some(root_index) = struct_tree.root_index() {
1203 struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1204 }
1205
1206 if !struct_tree.role_map.mappings().is_empty() {
1208 let mut role_map = Dictionary::new();
1209 for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1210 role_map.set(
1211 custom_type.as_str(),
1212 Object::Name(standard_type.as_pdf_name().to_string()),
1213 );
1214 }
1215 struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1216 }
1217
1218 self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1219 Ok(struct_tree_root_id)
1220 }
1221
1222 fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1223 if !self.form_field_ids.is_empty() {
1225 if let Some(acro_form) = &mut document.acro_form {
1226 acro_form.fields.clear();
1228 for field_id in &self.form_field_ids {
1229 acro_form.add_field(*field_id);
1230 }
1231
1232 acro_form.need_appearances = true;
1234 if acro_form.da.is_none() {
1235 acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1236 }
1237 }
1238 }
1239 Ok(())
1240 }
1241
1242 fn write_info(&mut self, document: &Document) -> Result<()> {
1243 let info_id = self.get_info_id()?;
1244 let mut info_dict = Dictionary::new();
1245
1246 if let Some(ref title) = document.metadata.title {
1247 info_dict.set("Title", Object::String(title.clone()));
1248 }
1249 if let Some(ref author) = document.metadata.author {
1250 info_dict.set("Author", Object::String(author.clone()));
1251 }
1252 if let Some(ref subject) = document.metadata.subject {
1253 info_dict.set("Subject", Object::String(subject.clone()));
1254 }
1255 if let Some(ref keywords) = document.metadata.keywords {
1256 info_dict.set("Keywords", Object::String(keywords.clone()));
1257 }
1258 if let Some(ref creator) = document.metadata.creator {
1259 info_dict.set("Creator", Object::String(creator.clone()));
1260 }
1261 if let Some(ref producer) = document.metadata.producer {
1262 info_dict.set("Producer", Object::String(producer.clone()));
1263 }
1264
1265 if let Some(creation_date) = document.metadata.creation_date {
1267 let date_string = format_pdf_date(creation_date);
1268 info_dict.set("CreationDate", Object::String(date_string));
1269 }
1270
1271 if let Some(mod_date) = document.metadata.modification_date {
1273 let date_string = format_pdf_date(mod_date);
1274 info_dict.set("ModDate", Object::String(date_string));
1275 }
1276
1277 let edition = if cfg!(feature = "pro") {
1280 super::Edition::Pro
1281 } else if cfg!(feature = "enterprise") {
1282 super::Edition::Enterprise
1283 } else {
1284 super::Edition::Community
1285 };
1286
1287 let signature = super::PdfSignature::new(document, edition);
1288 signature.write_to_info_dict(&mut info_dict);
1289
1290 self.write_object(info_id, Object::Dictionary(info_dict))?;
1291 Ok(())
1292 }
1293
1294 fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1295 let mut font_refs = HashMap::new();
1296
1297 for font_name in document.custom_font_names() {
1299 if let Some(font) = document.get_custom_font(&font_name) {
1300 let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1303 font_refs.insert(font_name.clone(), font_id);
1304 }
1305 }
1306
1307 Ok(font_refs)
1308 }
1309
1310 fn write_font_with_unicode_support(
1312 &mut self,
1313 font_name: &str,
1314 font: &crate::fonts::Font,
1315 ) -> Result<ObjectId> {
1316 self.write_type0_font_from_font(font_name, font)
1319 }
1320
1321 fn write_type0_font_from_font(
1323 &mut self,
1324 font_name: &str,
1325 font: &crate::fonts::Font,
1326 ) -> Result<ObjectId> {
1327 let used_chars = self.document_used_chars.clone().unwrap_or_else(|| {
1329 let mut chars = std::collections::HashSet::new();
1331 for ch in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1332 {
1333 chars.insert(ch);
1334 }
1335 chars
1336 });
1337 let font_id = self.allocate_object_id();
1339 let descendant_font_id = self.allocate_object_id();
1340 let descriptor_id = self.allocate_object_id();
1341 let font_file_id = self.allocate_object_id();
1342 let to_unicode_id = self.allocate_object_id();
1343
1344 let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1348 if font.data.len() > 100_000 && !used_chars.is_empty() {
1349 match crate::text::fonts::truetype_subsetter::subset_font(
1351 font.data.clone(),
1352 &used_chars,
1353 ) {
1354 Ok(subset_result) => {
1355 (
1358 subset_result.font_data,
1359 Some(subset_result.glyph_mapping),
1360 font.clone(),
1361 )
1362 }
1363 Err(_) => {
1364 if font.data.len() < 25_000_000 {
1366 (font.data.clone(), None, font.clone())
1367 } else {
1368 (Vec::new(), None, font.clone())
1370 }
1371 }
1372 }
1373 } else {
1374 (font.data.clone(), None, font.clone())
1376 };
1377
1378 if !font_data_to_embed.is_empty() {
1379 let mut font_file_dict = Dictionary::new();
1380 match font.format {
1382 crate::fonts::FontFormat::OpenType => {
1383 font_file_dict.set("Subtype", Object::Name("OpenType".to_string()));
1385 font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1386 }
1387 crate::fonts::FontFormat::TrueType => {
1388 font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1390 }
1391 }
1392 let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1393 self.write_object(font_file_id, font_stream_obj)?;
1394 } else {
1395 let font_file_dict = Dictionary::new();
1397 let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1398 self.write_object(font_file_id, font_stream_obj)?;
1399 }
1400
1401 let mut descriptor = Dictionary::new();
1403 descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1404 descriptor.set("FontName", Object::Name(font_name.to_string()));
1405 descriptor.set("Flags", Object::Integer(4)); descriptor.set(
1407 "FontBBox",
1408 Object::Array(vec![
1409 Object::Integer(font.descriptor.font_bbox[0] as i64),
1410 Object::Integer(font.descriptor.font_bbox[1] as i64),
1411 Object::Integer(font.descriptor.font_bbox[2] as i64),
1412 Object::Integer(font.descriptor.font_bbox[3] as i64),
1413 ]),
1414 );
1415 descriptor.set(
1416 "ItalicAngle",
1417 Object::Real(font.descriptor.italic_angle as f64),
1418 );
1419 descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1420 descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1421 descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1422 descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1423 let font_file_key = match font.format {
1425 crate::fonts::FontFormat::OpenType => "FontFile3", crate::fonts::FontFormat::TrueType => "FontFile2", };
1428 descriptor.set(font_file_key, Object::Reference(font_file_id));
1429 self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1430
1431 let mut cid_font = Dictionary::new();
1433 cid_font.set("Type", Object::Name("Font".to_string()));
1434 let cid_font_subtype =
1436 if CjkFontType::should_use_cidfonttype2_for_preview_compatibility(font_name) {
1437 "CIDFontType2" } else {
1439 match font.format {
1440 crate::fonts::FontFormat::OpenType => "CIDFontType0", crate::fonts::FontFormat::TrueType => "CIDFontType2", }
1443 };
1444 cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1445 cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1446
1447 let mut cid_system_info = Dictionary::new();
1449 let (registry, ordering, supplement) =
1450 if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1451 cjk_type.cid_system_info()
1452 } else {
1453 ("Adobe", "Identity", 0)
1454 };
1455
1456 cid_system_info.set("Registry", Object::String(registry.to_string()));
1457 cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1458 cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1459 cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1460
1461 cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1462
1463 let default_width = self.calculate_default_width(font);
1465 cid_font.set("DW", Object::Integer(default_width));
1466
1467 let w_array = self.generate_width_array(
1471 &original_font_for_widths,
1472 default_width,
1473 subset_glyph_mapping.as_ref(),
1474 );
1475 cid_font.set("W", Object::Array(w_array));
1476
1477 if cid_font_subtype == "CIDFontType2" {
1481 let cid_to_gid_map =
1483 self.generate_cid_to_gid_map(font, subset_glyph_mapping.as_ref())?;
1484 if !cid_to_gid_map.is_empty() {
1485 let cid_to_gid_map_id = self.allocate_object_id();
1487 let mut map_dict = Dictionary::new();
1488 map_dict.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1489 let map_stream = Object::Stream(map_dict, cid_to_gid_map);
1490 self.write_object(cid_to_gid_map_id, map_stream)?;
1491 cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1492 } else {
1493 cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1494 }
1495 }
1496 self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1499
1500 let cmap_data = self.generate_tounicode_cmap_from_font(font);
1502 let cmap_dict = Dictionary::new();
1503 let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1504 self.write_object(to_unicode_id, cmap_stream)?;
1505
1506 let mut type0_font = Dictionary::new();
1508 type0_font.set("Type", Object::Name("Font".to_string()));
1509 type0_font.set("Subtype", Object::Name("Type0".to_string()));
1510 type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1511 type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1512 type0_font.set(
1513 "DescendantFonts",
1514 Object::Array(vec![Object::Reference(descendant_font_id)]),
1515 );
1516 type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1517
1518 self.write_object(font_id, Object::Dictionary(type0_font))?;
1519
1520 Ok(font_id)
1521 }
1522
1523 fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1525 use crate::text::fonts::truetype::TrueTypeFont;
1526
1527 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1529 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1530 if let Some(cmap) = cmap_tables
1531 .iter()
1532 .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1533 .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1534 {
1535 if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1536 let common_chars =
1540 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1541 let mut total_width = 0;
1542 let mut count = 0;
1543
1544 for ch in common_chars.chars() {
1545 let unicode = ch as u32;
1546 if let Some(&pdf_width) = widths.get(&unicode) {
1547 total_width += pdf_width as i64;
1548 count += 1;
1549 }
1550 }
1551
1552 if count > 0 {
1553 return total_width / count;
1554 }
1555 }
1556 }
1557 }
1558 }
1559
1560 500
1562 }
1563
1564 fn generate_width_array(
1566 &self,
1567 font: &crate::fonts::Font,
1568 _default_width: i64,
1569 subset_mapping: Option<&HashMap<u32, u16>>,
1570 ) -> Vec<Object> {
1571 use crate::text::fonts::truetype::TrueTypeFont;
1572
1573 let mut w_array = Vec::new();
1574
1575 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1577 let char_to_glyph = {
1581 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1583 if let Some(cmap) = cmap_tables
1584 .iter()
1585 .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1586 .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1587 {
1588 if let Some(subset_map) = subset_mapping {
1590 let mut filtered = HashMap::new();
1591 for unicode in subset_map.keys() {
1592 if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1594 filtered.insert(*unicode, orig_glyph);
1595 }
1596 }
1597 filtered
1598 } else {
1599 cmap.mappings.clone()
1600 }
1601 } else {
1602 HashMap::new()
1603 }
1604 } else {
1605 HashMap::new()
1606 }
1607 };
1608
1609 if !char_to_glyph.is_empty() {
1610 if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1612 let mut sorted_chars: Vec<_> = widths.iter().collect();
1617 sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1618
1619 let mut i = 0;
1620 while i < sorted_chars.len() {
1621 let start_unicode = *sorted_chars[i].0;
1622 let pdf_width = *sorted_chars[i].1 as i64;
1624
1625 let mut end_unicode = start_unicode;
1627 let mut j = i + 1;
1628 while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1629 let next_pdf_width = *sorted_chars[j].1 as i64;
1630 if next_pdf_width == pdf_width {
1631 end_unicode = *sorted_chars[j].0;
1632 j += 1;
1633 } else {
1634 break;
1635 }
1636 }
1637
1638 if start_unicode == end_unicode {
1640 w_array.push(Object::Integer(start_unicode as i64));
1642 w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1643 } else {
1644 w_array.push(Object::Integer(start_unicode as i64));
1646 w_array.push(Object::Integer(end_unicode as i64));
1647 w_array.push(Object::Integer(pdf_width));
1648 }
1649
1650 i = j;
1651 }
1652
1653 return w_array;
1654 }
1655 }
1656 }
1657
1658 let ranges = vec![
1660 (0x20, 0x20, 250), (0x21, 0x2F, 333), (0x30, 0x39, 500), (0x3A, 0x40, 333), (0x41, 0x5A, 667), (0x5B, 0x60, 333), (0x61, 0x7A, 500), (0x7B, 0x7E, 333), (0xA0, 0xA0, 250), (0xA1, 0xBF, 333), (0xC0, 0xD6, 667), (0xD7, 0xD7, 564), (0xD8, 0xDE, 667), (0xDF, 0xF6, 500), (0xF7, 0xF7, 564), (0xF8, 0xFF, 500), (0x100, 0x17F, 500), (0x2000, 0x200F, 250), (0x2010, 0x2027, 333), (0x2028, 0x202F, 250), (0x2030, 0x206F, 500), (0x2070, 0x209F, 400), (0x20A0, 0x20CF, 600), (0x2100, 0x214F, 700), (0x2190, 0x21FF, 600), (0x2200, 0x22FF, 600), (0x2300, 0x23FF, 600), (0x2500, 0x257F, 500), (0x2580, 0x259F, 500), (0x25A0, 0x25FF, 600), (0x2600, 0x26FF, 600), (0x2700, 0x27BF, 600), ];
1697
1698 for (start, end, width) in ranges {
1700 if start == end {
1701 w_array.push(Object::Integer(start));
1703 w_array.push(Object::Array(vec![Object::Integer(width)]));
1704 } else {
1705 w_array.push(Object::Integer(start));
1707 w_array.push(Object::Integer(end));
1708 w_array.push(Object::Integer(width));
1709 }
1710 }
1711
1712 w_array
1713 }
1714
1715 fn generate_cid_to_gid_map(
1717 &mut self,
1718 font: &crate::fonts::Font,
1719 subset_mapping: Option<&HashMap<u32, u16>>,
1720 ) -> Result<Vec<u8>> {
1721 use crate::text::fonts::truetype::TrueTypeFont;
1722
1723 let cmap_mappings = if let Some(subset_map) = subset_mapping {
1726 subset_map.clone()
1728 } else {
1729 let tt_font = TrueTypeFont::parse(font.data.clone())?;
1731 let cmap_tables = tt_font.parse_cmap()?;
1732
1733 let cmap = cmap_tables
1735 .iter()
1736 .find(|t| t.platform_id == 3 && t.encoding_id == 1) .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0)) .ok_or_else(|| {
1739 crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1740 })?;
1741
1742 cmap.mappings.clone()
1743 };
1744
1745 let used_chars = self.document_used_chars.clone().unwrap_or_default();
1752
1753 let max_unicode = if !used_chars.is_empty() {
1755 used_chars
1757 .iter()
1758 .map(|ch| *ch as u32)
1759 .max()
1760 .unwrap_or(0x00FF) .min(0xFFFF) as usize
1762 } else {
1763 cmap_mappings
1765 .keys()
1766 .max()
1767 .copied()
1768 .unwrap_or(0xFFFF)
1769 .min(0xFFFF) as usize
1770 };
1771
1772 let mut map = vec![0u8; (max_unicode + 1) * 2];
1774
1775 let mut sample_mappings = Vec::new();
1777 for (&unicode, &glyph_id) in &cmap_mappings {
1778 if unicode <= max_unicode as u32 {
1779 let idx = (unicode as usize) * 2;
1780 map[idx] = (glyph_id >> 8) as u8;
1782 map[idx + 1] = (glyph_id & 0xFF) as u8;
1783
1784 if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
1786 {
1787 sample_mappings.push((unicode, glyph_id));
1788 }
1789 }
1790 }
1791
1792 Ok(map)
1793 }
1794
1795 fn generate_tounicode_cmap_from_font(&self, font: &crate::fonts::Font) -> Vec<u8> {
1797 use crate::text::fonts::truetype::TrueTypeFont;
1798
1799 let mut cmap = String::new();
1800
1801 cmap.push_str("/CIDInit /ProcSet findresource begin\n");
1803 cmap.push_str("12 dict begin\n");
1804 cmap.push_str("begincmap\n");
1805 cmap.push_str("/CIDSystemInfo\n");
1806 cmap.push_str("<< /Registry (Adobe)\n");
1807 cmap.push_str(" /Ordering (UCS)\n");
1808 cmap.push_str(" /Supplement 0\n");
1809 cmap.push_str(">> def\n");
1810 cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
1811 cmap.push_str("/CMapType 2 def\n");
1812 cmap.push_str("1 begincodespacerange\n");
1813 cmap.push_str("<0000> <FFFF>\n");
1814 cmap.push_str("endcodespacerange\n");
1815
1816 let mut mappings = Vec::new();
1818 let mut has_font_mappings = false;
1819
1820 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1821 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1822 if let Some(cmap_table) = cmap_tables
1824 .iter()
1825 .find(|t| t.platform_id == 3 && t.encoding_id == 1) .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1827 {
1829 for (&unicode, &glyph_id) in &cmap_table.mappings {
1832 if glyph_id > 0 && unicode <= 0xFFFF {
1833 mappings.push((unicode, unicode));
1836 }
1837 }
1838 has_font_mappings = true;
1839 }
1840 }
1841 }
1842
1843 if !has_font_mappings {
1845 for i in 0x0020..=0x00FF {
1847 mappings.push((i, i));
1848 }
1849
1850 for i in 0x0100..=0x017F {
1852 mappings.push((i, i));
1853 }
1854
1855 for i in 0x3040..=0x309F {
1858 mappings.push((i, i));
1859 }
1860
1861 for i in 0x30A0..=0x30FF {
1863 mappings.push((i, i));
1864 }
1865
1866 for i in 0x4E00..=0x9FFF {
1868 mappings.push((i, i));
1869 }
1870
1871 for i in 0xAC00..=0xD7AF {
1873 mappings.push((i, i));
1874 }
1875
1876 for i in 0x2000..=0x206F {
1878 mappings.push((i, i));
1879 }
1880
1881 for i in 0x2200..=0x22FF {
1883 mappings.push((i, i));
1884 }
1885
1886 for i in 0x2190..=0x21FF {
1888 mappings.push((i, i));
1889 }
1890
1891 for i in 0x2500..=0x259F {
1893 mappings.push((i, i));
1894 }
1895
1896 for i in 0x25A0..=0x25FF {
1898 mappings.push((i, i));
1899 }
1900
1901 for i in 0x2600..=0x26FF {
1903 mappings.push((i, i));
1904 }
1905 }
1906
1907 mappings.sort_by_key(|&(cid, _)| cid);
1909
1910 let mut i = 0;
1912 while i < mappings.len() {
1913 let start_cid = mappings[i].0;
1915 let start_unicode = mappings[i].1;
1916 let mut end_idx = i;
1917
1918 while end_idx + 1 < mappings.len()
1920 && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
1921 && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
1922 && end_idx - i < 99
1923 {
1925 end_idx += 1;
1926 }
1927
1928 if end_idx > i {
1929 cmap.push_str("1 beginbfrange\n");
1931 cmap.push_str(&format!(
1932 "<{:04X}> <{:04X}> <{:04X}>\n",
1933 start_cid, mappings[end_idx].0, start_unicode
1934 ));
1935 cmap.push_str("endbfrange\n");
1936 i = end_idx + 1;
1937 } else {
1938 let mut chars = Vec::new();
1940 let chunk_end = (i + 100).min(mappings.len());
1941
1942 for item in &mappings[i..chunk_end] {
1943 chars.push(*item);
1944 }
1945
1946 if !chars.is_empty() {
1947 cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
1948 for (cid, unicode) in chars {
1949 cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
1950 }
1951 cmap.push_str("endbfchar\n");
1952 }
1953
1954 i = chunk_end;
1955 }
1956 }
1957
1958 cmap.push_str("endcmap\n");
1960 cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
1961 cmap.push_str("end\n");
1962 cmap.push_str("end\n");
1963
1964 cmap.into_bytes()
1965 }
1966
1967 #[allow(dead_code)]
1969 fn write_truetype_font(
1970 &mut self,
1971 font_name: &str,
1972 font: &crate::text::font_manager::CustomFont,
1973 ) -> Result<ObjectId> {
1974 let font_id = self.allocate_object_id();
1976 let descriptor_id = self.allocate_object_id();
1977 let font_file_id = self.allocate_object_id();
1978
1979 if let Some(ref data) = font.font_data {
1981 let mut font_file_dict = Dictionary::new();
1982 font_file_dict.set("Length1", Object::Integer(data.len() as i64));
1983 let font_stream_obj = Object::Stream(font_file_dict, data.clone());
1984 self.write_object(font_file_id, font_stream_obj)?;
1985 }
1986
1987 let mut descriptor = Dictionary::new();
1989 descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1990 descriptor.set("FontName", Object::Name(font_name.to_string()));
1991 descriptor.set("Flags", Object::Integer(32)); descriptor.set(
1993 "FontBBox",
1994 Object::Array(vec![
1995 Object::Integer(-1000),
1996 Object::Integer(-1000),
1997 Object::Integer(2000),
1998 Object::Integer(2000),
1999 ]),
2000 );
2001 descriptor.set("ItalicAngle", Object::Integer(0));
2002 descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
2003 descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
2004 descriptor.set(
2005 "CapHeight",
2006 Object::Integer(font.descriptor.cap_height as i64),
2007 );
2008 descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2009 descriptor.set("FontFile2", Object::Reference(font_file_id));
2010 self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2011
2012 let mut font_dict = Dictionary::new();
2014 font_dict.set("Type", Object::Name("Font".to_string()));
2015 font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2016 font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2017 font_dict.set("FirstChar", Object::Integer(0));
2018 font_dict.set("LastChar", Object::Integer(255));
2019
2020 let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2022 font_dict.set("Widths", Object::Array(widths));
2023 font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2024
2025 font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2027
2028 self.write_object(font_id, Object::Dictionary(font_dict))?;
2029
2030 Ok(font_id)
2031 }
2032
2033 fn write_pages(
2034 &mut self,
2035 document: &Document,
2036 font_refs: &HashMap<String, ObjectId>,
2037 ) -> Result<()> {
2038 let pages_id = self.get_pages_id()?;
2039 let mut pages_dict = Dictionary::new();
2040 pages_dict.set("Type", Object::Name("Pages".to_string()));
2041 pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2042
2043 let mut kids = Vec::new();
2044
2045 let mut page_ids = Vec::new();
2047 let mut content_ids = Vec::new();
2048 for _ in 0..document.pages.len() {
2049 page_ids.push(self.allocate_object_id());
2050 content_ids.push(self.allocate_object_id());
2051 }
2052
2053 for page_id in &page_ids {
2054 kids.push(Object::Reference(*page_id));
2055 }
2056
2057 pages_dict.set("Kids", Object::Array(kids));
2058
2059 self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2060
2061 self.page_ids = page_ids.clone();
2063
2064 for (i, page) in document.pages.iter().enumerate() {
2066 let page_id = page_ids[i];
2067 let content_id = content_ids[i];
2068
2069 self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2070 self.write_page_content(content_id, page)?;
2071 }
2072
2073 Ok(())
2074 }
2075
2076 #[allow(dead_code)]
2078 fn write_pages_with_fonts(
2079 &mut self,
2080 document: &Document,
2081 font_refs: &HashMap<String, ObjectId>,
2082 ) -> Result<()> {
2083 self.write_pages(document, font_refs)
2084 }
2085
2086 fn write_page_with_fonts(
2087 &mut self,
2088 page_id: ObjectId,
2089 parent_id: ObjectId,
2090 content_id: ObjectId,
2091 page: &crate::page::Page,
2092 _document: &Document,
2093 font_refs: &HashMap<String, ObjectId>,
2094 ) -> Result<()> {
2095 let mut page_dict = page.to_dict();
2097
2098 page_dict.set("Type", Object::Name("Page".to_string()));
2099 page_dict.set("Parent", Object::Reference(parent_id));
2100 page_dict.set("Contents", Object::Reference(content_id));
2101
2102 let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2104 res.clone()
2105 } else {
2106 Dictionary::new()
2107 };
2108
2109 let mut font_dict = Dictionary::new();
2111
2112 let mut helvetica_dict = Dictionary::new();
2117 helvetica_dict.set("Type", Object::Name("Font".to_string()));
2118 helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2119 helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2120 helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2121 font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2122
2123 let mut helvetica_bold_dict = Dictionary::new();
2124 helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2125 helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2126 helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2127 helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2128 font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2129
2130 let mut helvetica_oblique_dict = Dictionary::new();
2131 helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2132 helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2133 helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2134 helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2135 font_dict.set(
2136 "Helvetica-Oblique",
2137 Object::Dictionary(helvetica_oblique_dict),
2138 );
2139
2140 let mut helvetica_bold_oblique_dict = Dictionary::new();
2141 helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2142 helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2143 helvetica_bold_oblique_dict.set(
2144 "BaseFont",
2145 Object::Name("Helvetica-BoldOblique".to_string()),
2146 );
2147 helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2148 font_dict.set(
2149 "Helvetica-BoldOblique",
2150 Object::Dictionary(helvetica_bold_oblique_dict),
2151 );
2152
2153 let mut times_dict = Dictionary::new();
2155 times_dict.set("Type", Object::Name("Font".to_string()));
2156 times_dict.set("Subtype", Object::Name("Type1".to_string()));
2157 times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2158 times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2159 font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2160
2161 let mut times_bold_dict = Dictionary::new();
2162 times_bold_dict.set("Type", Object::Name("Font".to_string()));
2163 times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2164 times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2165 times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2166 font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2167
2168 let mut times_italic_dict = Dictionary::new();
2169 times_italic_dict.set("Type", Object::Name("Font".to_string()));
2170 times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2171 times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2172 times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2173 font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2174
2175 let mut times_bold_italic_dict = Dictionary::new();
2176 times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2177 times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2178 times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2179 times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2180 font_dict.set(
2181 "Times-BoldItalic",
2182 Object::Dictionary(times_bold_italic_dict),
2183 );
2184
2185 let mut courier_dict = Dictionary::new();
2187 courier_dict.set("Type", Object::Name("Font".to_string()));
2188 courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2189 courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2190 courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2191 font_dict.set("Courier", Object::Dictionary(courier_dict));
2192
2193 let mut courier_bold_dict = Dictionary::new();
2194 courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2195 courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2196 courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2197 courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2198 font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2199
2200 let mut courier_oblique_dict = Dictionary::new();
2201 courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2202 courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2203 courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2204 courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2205 font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2206
2207 let mut courier_bold_oblique_dict = Dictionary::new();
2208 courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2209 courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2210 courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2211 courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2212 font_dict.set(
2213 "Courier-BoldOblique",
2214 Object::Dictionary(courier_bold_oblique_dict),
2215 );
2216
2217 for (font_name, font_id) in font_refs {
2219 font_dict.set(font_name, Object::Reference(*font_id));
2220 }
2221
2222 resources.set("Font", Object::Dictionary(font_dict));
2223
2224 if !page.images().is_empty() {
2226 let mut xobject_dict = Dictionary::new();
2227
2228 for (name, image) in page.images() {
2229 let image_id = self.allocate_object_id();
2231
2232 if image.has_transparency() {
2234 let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2236
2237 if let Some(smask_stream) = smask_obj {
2239 let smask_id = self.allocate_object_id();
2240 self.write_object(smask_id, smask_stream)?;
2241
2242 if let Object::Stream(ref mut dict, _) = main_obj {
2244 dict.set("SMask", Object::Reference(smask_id));
2245 }
2246 }
2247
2248 self.write_object(image_id, main_obj)?;
2250 } else {
2251 self.write_object(image_id, image.to_pdf_object())?;
2253 }
2254
2255 xobject_dict.set(name, Object::Reference(image_id));
2257 }
2258
2259 resources.set("XObject", Object::Dictionary(xobject_dict));
2260 }
2261
2262 if let Some(extgstate_states) = page.get_extgstate_resources() {
2264 let mut extgstate_dict = Dictionary::new();
2265 for (name, state) in extgstate_states {
2266 let mut state_dict = Dictionary::new();
2267 state_dict.set("Type", Object::Name("ExtGState".to_string()));
2268
2269 if let Some(alpha_stroke) = state.alpha_stroke {
2271 state_dict.set("CA", Object::Real(alpha_stroke));
2272 }
2273 if let Some(alpha_fill) = state.alpha_fill {
2274 state_dict.set("ca", Object::Real(alpha_fill));
2275 }
2276
2277 if let Some(line_width) = state.line_width {
2279 state_dict.set("LW", Object::Real(line_width));
2280 }
2281 if let Some(line_cap) = state.line_cap {
2282 state_dict.set("LC", Object::Integer(line_cap as i64));
2283 }
2284 if let Some(line_join) = state.line_join {
2285 state_dict.set("LJ", Object::Integer(line_join as i64));
2286 }
2287 if let Some(dash_pattern) = &state.dash_pattern {
2288 let dash_objects: Vec<Object> = dash_pattern
2289 .array
2290 .iter()
2291 .map(|&d| Object::Real(d))
2292 .collect();
2293 state_dict.set(
2294 "D",
2295 Object::Array(vec![
2296 Object::Array(dash_objects),
2297 Object::Real(dash_pattern.phase),
2298 ]),
2299 );
2300 }
2301
2302 extgstate_dict.set(name, Object::Dictionary(state_dict));
2303 }
2304 if !extgstate_dict.is_empty() {
2305 resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2306 }
2307 }
2308
2309 if let Some(preserved_res) = page.get_preserved_resources() {
2312 let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2314
2315 if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2317 let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2319
2320 preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2322 }
2323
2324 if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2328 let mut fonts_with_refs = crate::objects::Dictionary::new();
2329
2330 for (font_name, font_obj) in fonts.iter() {
2331 if let Object::Dictionary(font_dict) = font_obj {
2332 let updated_font = self.write_embedded_font_streams(font_dict)?;
2334 fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2335 } else {
2336 fonts_with_refs.set(font_name, font_obj.clone());
2338 }
2339 }
2340
2341 preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2343 }
2344
2345 for (key, value) in preserved_writer_dict.iter() {
2347 if let Some(Object::Dictionary(existing)) = resources.get(key) {
2349 if let Object::Dictionary(preserved_dict) = value {
2350 let mut merged = existing.clone();
2351 for (res_name, res_obj) in preserved_dict.iter() {
2353 if !merged.contains_key(res_name) {
2354 merged.set(res_name, res_obj.clone());
2355 }
2356 }
2357 resources.set(key, Object::Dictionary(merged));
2358 }
2359 } else {
2360 resources.set(key, value.clone());
2362 }
2363 }
2364 }
2365
2366 page_dict.set("Resources", Object::Dictionary(resources));
2367
2368 if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2370 let mut new_annots = Vec::new();
2371
2372 for annot in annots {
2373 if let Object::Dictionary(ref annot_dict) = annot {
2374 if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2375 if subtype == "Widget" {
2376 let widget_id = self.allocate_object_id();
2378 self.write_object(widget_id, annot.clone())?;
2379 new_annots.push(Object::Reference(widget_id));
2380
2381 if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2383 if let Some(Object::String(field_name)) = annot_dict.get("T") {
2384 self.field_widget_map
2385 .entry(field_name.clone())
2386 .or_default()
2387 .push(widget_id);
2388 self.field_id_map.insert(field_name.clone(), widget_id);
2389 self.form_field_ids.push(widget_id);
2390 }
2391 }
2392 continue;
2393 }
2394 }
2395 }
2396 new_annots.push(annot.clone());
2397 }
2398
2399 if !new_annots.is_empty() {
2400 page_dict.set("Annots", Object::Array(new_annots));
2401 }
2402 }
2403
2404 self.write_object(page_id, Object::Dictionary(page_dict))?;
2405 Ok(())
2406 }
2407}
2408
2409impl PdfWriter<BufWriter<std::fs::File>> {
2410 pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2411 let file = std::fs::File::create(path)?;
2412 let writer = BufWriter::new(file);
2413
2414 Ok(Self {
2415 writer,
2416 xref_positions: HashMap::new(),
2417 current_position: 0,
2418 next_object_id: 1,
2419 catalog_id: None,
2420 pages_id: None,
2421 info_id: None,
2422 field_widget_map: HashMap::new(),
2423 field_id_map: HashMap::new(),
2424 form_field_ids: Vec::new(),
2425 page_ids: Vec::new(),
2426 config: WriterConfig::default(),
2427 document_used_chars: None,
2428 buffered_objects: HashMap::new(),
2429 compressed_object_map: HashMap::new(),
2430 prev_xref_offset: None,
2431 base_pdf_size: None,
2432 })
2433 }
2434}
2435
2436impl<W: Write> PdfWriter<W> {
2437 fn write_embedded_font_streams(
2453 &mut self,
2454 font_dict: &crate::objects::Dictionary,
2455 ) -> Result<crate::objects::Dictionary> {
2456 let mut updated_font = font_dict.clone();
2457
2458 if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
2460 if subtype == "Type0" {
2461 if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
2463 let mut updated_descendants = Vec::new();
2464
2465 for descendant in descendants {
2466 match descendant {
2467 Object::Dictionary(cidfont) => {
2468 let updated_cidfont =
2470 self.write_cidfont_embedded_streams(cidfont)?;
2471 let cidfont_id = self.allocate_object_id();
2473 self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
2474 updated_descendants.push(Object::Reference(cidfont_id));
2476 }
2477 Object::Reference(_) => {
2478 updated_descendants.push(descendant.clone());
2480 }
2481 _ => {
2482 updated_descendants.push(descendant.clone());
2483 }
2484 }
2485 }
2486
2487 updated_font.set("DescendantFonts", Object::Array(updated_descendants));
2488 }
2489
2490 if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
2492 let tounicode_id = self.allocate_object_id();
2493 self.write_object(
2494 tounicode_id,
2495 Object::Stream(stream_dict.clone(), stream_data.clone()),
2496 )?;
2497 updated_font.set("ToUnicode", Object::Reference(tounicode_id));
2498 }
2499
2500 return Ok(updated_font);
2501 }
2502 }
2503
2504 if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
2507 let mut updated_descriptor = descriptor.clone();
2508 let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2509
2510 for key in &font_file_keys {
2512 if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2513 let stream_id = self.allocate_object_id();
2515 let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
2516 self.write_object(stream_id, stream_obj)?;
2517
2518 updated_descriptor.set(*key, Object::Reference(stream_id));
2520 }
2521 }
2523
2524 updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
2526 }
2527
2528 Ok(updated_font)
2529 }
2530
2531 fn write_cidfont_embedded_streams(
2533 &mut self,
2534 cidfont: &crate::objects::Dictionary,
2535 ) -> Result<crate::objects::Dictionary> {
2536 let mut updated_cidfont = cidfont.clone();
2537
2538 if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
2540 let mut updated_descriptor = descriptor.clone();
2541 let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2542
2543 for key in &font_file_keys {
2545 if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2546 let stream_id = self.allocate_object_id();
2547 self.write_object(
2548 stream_id,
2549 Object::Stream(stream_dict.clone(), stream_data.clone()),
2550 )?;
2551 updated_descriptor.set(*key, Object::Reference(stream_id));
2552 }
2553 }
2554
2555 let descriptor_id = self.allocate_object_id();
2557 self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
2558
2559 updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
2561 }
2562
2563 if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
2565 let map_id = self.allocate_object_id();
2566 self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
2567 updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
2568 }
2569
2570 Ok(updated_cidfont)
2571 }
2572
2573 fn allocate_object_id(&mut self) -> ObjectId {
2574 let id = ObjectId::new(self.next_object_id, 0);
2575 self.next_object_id += 1;
2576 id
2577 }
2578
2579 fn get_catalog_id(&self) -> Result<ObjectId> {
2581 self.catalog_id.ok_or_else(|| {
2582 PdfError::InvalidOperation(
2583 "catalog_id not initialized - write_document() must be called first".to_string(),
2584 )
2585 })
2586 }
2587
2588 fn get_pages_id(&self) -> Result<ObjectId> {
2590 self.pages_id.ok_or_else(|| {
2591 PdfError::InvalidOperation(
2592 "pages_id not initialized - write_document() must be called first".to_string(),
2593 )
2594 })
2595 }
2596
2597 fn get_info_id(&self) -> Result<ObjectId> {
2599 self.info_id.ok_or_else(|| {
2600 PdfError::InvalidOperation(
2601 "info_id not initialized - write_document() must be called first".to_string(),
2602 )
2603 })
2604 }
2605
2606 fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
2607 use crate::writer::ObjectStreamWriter;
2608
2609 if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
2611 let mut buffer = Vec::new();
2612 self.write_object_value_to_buffer(&object, &mut buffer)?;
2613 self.buffered_objects.insert(id, buffer);
2614 return Ok(());
2615 }
2616
2617 self.xref_positions.insert(id, self.current_position);
2619
2620 let header = format!("{} {} obj\n", id.number(), id.generation());
2622 self.write_bytes(header.as_bytes())?;
2623
2624 self.write_object_value(&object)?;
2625
2626 self.write_bytes(b"\nendobj\n")?;
2627 Ok(())
2628 }
2629
2630 fn write_object_value(&mut self, object: &Object) -> Result<()> {
2631 match object {
2632 Object::Null => self.write_bytes(b"null")?,
2633 Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
2634 Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
2635 Object::Real(f) => self.write_bytes(
2636 format!("{f:.6}")
2637 .trim_end_matches('0')
2638 .trim_end_matches('.')
2639 .as_bytes(),
2640 )?,
2641 Object::String(s) => {
2642 self.write_bytes(b"(")?;
2643 self.write_bytes(s.as_bytes())?;
2644 self.write_bytes(b")")?;
2645 }
2646 Object::Name(n) => {
2647 self.write_bytes(b"/")?;
2648 self.write_bytes(n.as_bytes())?;
2649 }
2650 Object::Array(arr) => {
2651 self.write_bytes(b"[")?;
2652 for (i, obj) in arr.iter().enumerate() {
2653 if i > 0 {
2654 self.write_bytes(b" ")?;
2655 }
2656 self.write_object_value(obj)?;
2657 }
2658 self.write_bytes(b"]")?;
2659 }
2660 Object::Dictionary(dict) => {
2661 self.write_bytes(b"<<")?;
2662 for (key, value) in dict.entries() {
2663 self.write_bytes(b"\n/")?;
2664 self.write_bytes(key.as_bytes())?;
2665 self.write_bytes(b" ")?;
2666 self.write_object_value(value)?;
2667 }
2668 self.write_bytes(b"\n>>")?;
2669 }
2670 Object::Stream(dict, data) => {
2671 let mut corrected_dict = dict.clone();
2674 corrected_dict.set("Length", Object::Integer(data.len() as i64));
2675
2676 self.write_object_value(&Object::Dictionary(corrected_dict))?;
2677 self.write_bytes(b"\nstream\n")?;
2678 self.write_bytes(data)?;
2679 self.write_bytes(b"\nendstream")?;
2680 }
2681 Object::Reference(id) => {
2682 let ref_str = format!("{} {} R", id.number(), id.generation());
2683 self.write_bytes(ref_str.as_bytes())?;
2684 }
2685 }
2686 Ok(())
2687 }
2688
2689 fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
2691 match object {
2692 Object::Null => buffer.extend_from_slice(b"null"),
2693 Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
2694 Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
2695 Object::Real(f) => buffer.extend_from_slice(
2696 format!("{f:.6}")
2697 .trim_end_matches('0')
2698 .trim_end_matches('.')
2699 .as_bytes(),
2700 ),
2701 Object::String(s) => {
2702 buffer.push(b'(');
2703 buffer.extend_from_slice(s.as_bytes());
2704 buffer.push(b')');
2705 }
2706 Object::Name(n) => {
2707 buffer.push(b'/');
2708 buffer.extend_from_slice(n.as_bytes());
2709 }
2710 Object::Array(arr) => {
2711 buffer.push(b'[');
2712 for (i, obj) in arr.iter().enumerate() {
2713 if i > 0 {
2714 buffer.push(b' ');
2715 }
2716 self.write_object_value_to_buffer(obj, buffer)?;
2717 }
2718 buffer.push(b']');
2719 }
2720 Object::Dictionary(dict) => {
2721 buffer.extend_from_slice(b"<<");
2722 for (key, value) in dict.entries() {
2723 buffer.extend_from_slice(b"\n/");
2724 buffer.extend_from_slice(key.as_bytes());
2725 buffer.push(b' ');
2726 self.write_object_value_to_buffer(value, buffer)?;
2727 }
2728 buffer.extend_from_slice(b"\n>>");
2729 }
2730 Object::Stream(_, _) => {
2731 return Err(crate::error::PdfError::ObjectStreamError(
2733 "Cannot compress stream objects in object streams".to_string(),
2734 ));
2735 }
2736 Object::Reference(id) => {
2737 let ref_str = format!("{} {} R", id.number(), id.generation());
2738 buffer.extend_from_slice(ref_str.as_bytes());
2739 }
2740 }
2741 Ok(())
2742 }
2743
2744 fn flush_object_streams(&mut self) -> Result<()> {
2746 if self.buffered_objects.is_empty() {
2747 return Ok(());
2748 }
2749
2750 let config = ObjectStreamConfig {
2752 max_objects_per_stream: 100,
2753 compression_level: 6,
2754 enabled: true,
2755 };
2756 let mut os_writer = ObjectStreamWriter::new(config);
2757
2758 let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
2760 buffered.sort_by_key(|(id, _)| id.number());
2761
2762 for (id, data) in buffered {
2764 os_writer.add_object(*id, data.clone())?;
2765 }
2766
2767 let streams = os_writer.finalize()?;
2769
2770 for mut stream in streams {
2772 let stream_id = stream.stream_id;
2773
2774 let compressed_data = stream.generate_stream_data(6)?;
2776
2777 let dict = stream.generate_dictionary(&compressed_data);
2779
2780 for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
2782 self.compressed_object_map
2783 .insert(*obj_id, (stream_id, index as u32));
2784 }
2785
2786 self.xref_positions.insert(stream_id, self.current_position);
2788
2789 let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
2790 self.write_bytes(header.as_bytes())?;
2791
2792 self.write_object_value(&Object::Dictionary(dict))?;
2793
2794 self.write_bytes(b"\nstream\n")?;
2795 self.write_bytes(&compressed_data)?;
2796 self.write_bytes(b"\nendstream\nendobj\n")?;
2797 }
2798
2799 Ok(())
2800 }
2801
2802 fn write_xref(&mut self) -> Result<()> {
2803 self.write_bytes(b"xref\n")?;
2804
2805 let mut entries: Vec<_> = self
2807 .xref_positions
2808 .iter()
2809 .map(|(id, pos)| (*id, *pos))
2810 .collect();
2811 entries.sort_by_key(|(id, _)| id.number());
2812
2813 let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
2815
2816 self.write_bytes(b"0 ")?;
2819 self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
2820 self.write_bytes(b"\n")?;
2821
2822 self.write_bytes(b"0000000000 65535 f \n")?;
2824
2825 for obj_num in 1..=max_obj_num {
2828 let _obj_id = ObjectId::new(obj_num, 0);
2829 if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
2830 let entry = format!("{:010} {:05} n \n", position, 0);
2831 self.write_bytes(entry.as_bytes())?;
2832 } else {
2833 self.write_bytes(b"0000000000 00000 f \n")?;
2835 }
2836 }
2837
2838 Ok(())
2839 }
2840
2841 fn write_xref_stream(&mut self) -> Result<()> {
2842 let catalog_id = self.get_catalog_id()?;
2843 let info_id = self.get_info_id()?;
2844
2845 let xref_stream_id = self.allocate_object_id();
2847 let xref_position = self.current_position;
2848
2849 let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
2851 xref_writer.set_trailer_info(catalog_id, info_id);
2852
2853 xref_writer.add_free_entry(0, 65535);
2855
2856 let mut entries: Vec<_> = self
2858 .xref_positions
2859 .iter()
2860 .map(|(id, pos)| (*id, *pos))
2861 .collect();
2862 entries.sort_by_key(|(id, _)| id.number());
2863
2864 let max_obj_num = entries
2866 .iter()
2867 .map(|(id, _)| id.number())
2868 .max()
2869 .unwrap_or(0)
2870 .max(xref_stream_id.number());
2871
2872 for obj_num in 1..=max_obj_num {
2874 let obj_id = ObjectId::new(obj_num, 0);
2875
2876 if obj_num == xref_stream_id.number() {
2877 xref_writer.add_in_use_entry(xref_position, 0);
2879 } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
2880 xref_writer.add_compressed_entry(stream_id.number(), *index);
2882 } else if let Some((id, position)) =
2883 entries.iter().find(|(id, _)| id.number() == obj_num)
2884 {
2885 xref_writer.add_in_use_entry(*position, id.generation());
2887 } else {
2888 xref_writer.add_free_entry(0, 0);
2890 }
2891 }
2892
2893 self.xref_positions.insert(xref_stream_id, xref_position);
2895
2896 self.write_bytes(
2898 format!(
2899 "{} {} obj\n",
2900 xref_stream_id.number(),
2901 xref_stream_id.generation()
2902 )
2903 .as_bytes(),
2904 )?;
2905
2906 let uncompressed_data = xref_writer.encode_entries();
2908 let final_data = if self.config.compress_streams {
2909 crate::compression::compress(&uncompressed_data)?
2910 } else {
2911 uncompressed_data
2912 };
2913
2914 let mut dict = xref_writer.create_dictionary(None);
2916 dict.set("Length", Object::Integer(final_data.len() as i64));
2917
2918 if self.config.compress_streams {
2920 dict.set("Filter", Object::Name("FlateDecode".to_string()));
2921 }
2922 self.write_bytes(b"<<")?;
2923 for (key, value) in dict.iter() {
2924 self.write_bytes(b"\n/")?;
2925 self.write_bytes(key.as_bytes())?;
2926 self.write_bytes(b" ")?;
2927 self.write_object_value(value)?;
2928 }
2929 self.write_bytes(b"\n>>\n")?;
2930
2931 self.write_bytes(b"stream\n")?;
2933 self.write_bytes(&final_data)?;
2934 self.write_bytes(b"\nendstream\n")?;
2935 self.write_bytes(b"endobj\n")?;
2936
2937 self.write_bytes(b"\nstartxref\n")?;
2939 self.write_bytes(xref_position.to_string().as_bytes())?;
2940 self.write_bytes(b"\n%%EOF\n")?;
2941
2942 Ok(())
2943 }
2944
2945 fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
2946 let catalog_id = self.get_catalog_id()?;
2947 let info_id = self.get_info_id()?;
2948 let max_obj_num = self
2950 .xref_positions
2951 .keys()
2952 .map(|id| id.number())
2953 .max()
2954 .unwrap_or(0);
2955
2956 let mut trailer = Dictionary::new();
2957 trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
2958 trailer.set("Root", Object::Reference(catalog_id));
2959 trailer.set("Info", Object::Reference(info_id));
2960
2961 if let Some(prev_xref) = self.prev_xref_offset {
2963 trailer.set("Prev", Object::Integer(prev_xref as i64));
2964 }
2965
2966 self.write_bytes(b"trailer\n")?;
2967 self.write_object_value(&Object::Dictionary(trailer))?;
2968 self.write_bytes(b"\nstartxref\n")?;
2969 self.write_bytes(xref_position.to_string().as_bytes())?;
2970 self.write_bytes(b"\n%%EOF\n")?;
2971
2972 Ok(())
2973 }
2974
2975 fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
2976 self.writer.write_all(data)?;
2977 self.current_position += data.len() as u64;
2978 Ok(())
2979 }
2980
2981 #[allow(dead_code)]
2982 fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
2983 let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
2985 if rect_array.len() >= 4 {
2986 if let (
2987 Some(Object::Real(x1)),
2988 Some(Object::Real(y1)),
2989 Some(Object::Real(x2)),
2990 Some(Object::Real(y2)),
2991 ) = (
2992 rect_array.first(),
2993 rect_array.get(1),
2994 rect_array.get(2),
2995 rect_array.get(3),
2996 ) {
2997 (*x1, *y1, *x2, *y2)
2998 } else {
2999 (0.0, 0.0, 100.0, 20.0) }
3001 } else {
3002 (0.0, 0.0, 100.0, 20.0) }
3004 } else {
3005 (0.0, 0.0, 100.0, 20.0) };
3007
3008 let width = rect.2 - rect.0;
3009 let height = rect.3 - rect.1;
3010
3011 let mut content = String::new();
3013
3014 content.push_str("q\n");
3016
3017 content.push_str("0 0 0 RG\n"); content.push_str("1 w\n"); content.push_str(&format!("0 0 {width} {height} re\n"));
3023 content.push_str("S\n"); content.push_str("1 1 1 rg\n"); content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3028 content.push_str("f\n"); content.push_str("Q\n");
3032
3033 let mut stream_dict = Dictionary::new();
3035 stream_dict.set("Type", Object::Name("XObject".to_string()));
3036 stream_dict.set("Subtype", Object::Name("Form".to_string()));
3037 stream_dict.set(
3038 "BBox",
3039 Object::Array(vec![
3040 Object::Real(0.0),
3041 Object::Real(0.0),
3042 Object::Real(width),
3043 Object::Real(height),
3044 ]),
3045 );
3046 stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3047 stream_dict.set("Length", Object::Integer(content.len() as i64));
3048
3049 let stream_id = self.allocate_object_id();
3051 self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3052
3053 Ok(stream_id)
3054 }
3055
3056 #[allow(dead_code)]
3057 fn create_field_appearance_stream(
3058 &mut self,
3059 field_dict: &Dictionary,
3060 widget: &crate::forms::Widget,
3061 ) -> Result<ObjectId> {
3062 let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3063 let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3064
3065 let mut content = String::new();
3067
3068 content.push_str("q\n");
3070
3071 if let Some(bg_color) = &widget.appearance.background_color {
3073 match bg_color {
3074 crate::graphics::Color::Gray(g) => {
3075 content.push_str(&format!("{g} g\n"));
3076 }
3077 crate::graphics::Color::Rgb(r, g, b) => {
3078 content.push_str(&format!("{r} {g} {b} rg\n"));
3079 }
3080 crate::graphics::Color::Cmyk(c, m, y, k) => {
3081 content.push_str(&format!("{c} {m} {y} {k} k\n"));
3082 }
3083 }
3084 content.push_str(&format!("0 0 {width} {height} re\n"));
3085 content.push_str("f\n");
3086 }
3087
3088 if let Some(border_color) = &widget.appearance.border_color {
3090 match border_color {
3091 crate::graphics::Color::Gray(g) => {
3092 content.push_str(&format!("{g} G\n"));
3093 }
3094 crate::graphics::Color::Rgb(r, g, b) => {
3095 content.push_str(&format!("{r} {g} {b} RG\n"));
3096 }
3097 crate::graphics::Color::Cmyk(c, m, y, k) => {
3098 content.push_str(&format!("{c} {m} {y} {k} K\n"));
3099 }
3100 }
3101 content.push_str(&format!("{} w\n", widget.appearance.border_width));
3102 content.push_str(&format!("0 0 {width} {height} re\n"));
3103 content.push_str("S\n");
3104 }
3105
3106 if let Some(Object::Name(ft)) = field_dict.get("FT") {
3108 if ft == "Btn" {
3109 if let Some(Object::Name(v)) = field_dict.get("V") {
3110 if v == "Yes" {
3111 content.push_str("0 0 0 RG\n"); content.push_str("2 w\n");
3114 let margin = width * 0.2;
3115 content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3116 content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3117 content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3118 content.push_str("S\n");
3119 }
3120 }
3121 }
3122 }
3123
3124 content.push_str("Q\n");
3126
3127 let mut stream_dict = Dictionary::new();
3129 stream_dict.set("Type", Object::Name("XObject".to_string()));
3130 stream_dict.set("Subtype", Object::Name("Form".to_string()));
3131 stream_dict.set(
3132 "BBox",
3133 Object::Array(vec![
3134 Object::Real(0.0),
3135 Object::Real(0.0),
3136 Object::Real(width),
3137 Object::Real(height),
3138 ]),
3139 );
3140 stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3141 stream_dict.set("Length", Object::Integer(content.len() as i64));
3142
3143 let stream_id = self.allocate_object_id();
3145 self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3146
3147 Ok(stream_id)
3148 }
3149}
3150
3151fn format_pdf_date(date: DateTime<Utc>) -> String {
3153 let formatted = date.format("D:%Y%m%d%H%M%S");
3156
3157 format!("{formatted}+00'00")
3159}
3160
3161#[cfg(test)]
3162mod tests;
3163
3164#[cfg(test)]
3165mod rigorous_tests;