1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
6use chrono::{DateTime, Utc};
7use std::collections::HashMap;
8use std::io::{BufWriter, Write};
9use std::path::Path;
10
11#[derive(Debug, Clone)]
13pub struct WriterConfig {
14 pub use_xref_streams: bool,
16 pub use_object_streams: bool,
18 pub pdf_version: String,
20 pub compress_streams: bool,
22 pub incremental_update: bool,
24}
25
26impl Default for WriterConfig {
27 fn default() -> Self {
28 Self {
29 use_xref_streams: false,
30 use_object_streams: false,
31 pdf_version: "1.7".to_string(),
32 compress_streams: true,
33 incremental_update: false,
34 }
35 }
36}
37
38impl WriterConfig {
39 pub fn modern() -> Self {
41 Self {
42 use_xref_streams: true,
43 use_object_streams: true,
44 pdf_version: "1.5".to_string(),
45 compress_streams: true,
46 incremental_update: false,
47 }
48 }
49
50 pub fn legacy() -> Self {
52 Self {
53 use_xref_streams: false,
54 use_object_streams: false,
55 pdf_version: "1.4".to_string(),
56 compress_streams: true,
57 incremental_update: false,
58 }
59 }
60
61 pub fn incremental() -> Self {
63 Self {
64 use_xref_streams: false,
65 use_object_streams: false,
66 pdf_version: "1.4".to_string(),
67 compress_streams: true,
68 incremental_update: true,
69 }
70 }
71}
72
73pub struct PdfWriter<W: Write> {
74 writer: W,
75 xref_positions: HashMap<ObjectId, u64>,
76 current_position: u64,
77 next_object_id: u32,
78 catalog_id: Option<ObjectId>,
80 pages_id: Option<ObjectId>,
81 info_id: Option<ObjectId>,
82 #[allow(dead_code)]
84 field_widget_map: HashMap<String, Vec<ObjectId>>, #[allow(dead_code)]
86 field_id_map: HashMap<String, ObjectId>, form_field_ids: Vec<ObjectId>, page_ids: Vec<ObjectId>, config: WriterConfig,
91 document_used_chars: Option<std::collections::HashSet<char>>,
93 buffered_objects: HashMap<ObjectId, Vec<u8>>,
95 compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, prev_xref_offset: Option<u64>,
98 base_pdf_size: Option<u64>,
99}
100
101impl<W: Write> PdfWriter<W> {
102 pub fn new_with_writer(writer: W) -> Self {
103 Self::with_config(writer, WriterConfig::default())
104 }
105
106 pub fn with_config(writer: W, config: WriterConfig) -> Self {
107 Self {
108 writer,
109 xref_positions: HashMap::new(),
110 current_position: 0,
111 next_object_id: 1, catalog_id: None,
113 pages_id: None,
114 info_id: None,
115 field_widget_map: HashMap::new(),
116 field_id_map: HashMap::new(),
117 form_field_ids: Vec::new(),
118 page_ids: Vec::new(),
119 config,
120 document_used_chars: None,
121 buffered_objects: HashMap::new(),
122 compressed_object_map: HashMap::new(),
123 prev_xref_offset: None,
124 base_pdf_size: None,
125 }
126 }
127
128 pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
129 if !document.used_characters.is_empty() {
131 self.document_used_chars = Some(document.used_characters.clone());
132 }
133
134 self.write_header()?;
135
136 self.catalog_id = Some(self.allocate_object_id());
138 self.pages_id = Some(self.allocate_object_id());
139 self.info_id = Some(self.allocate_object_id());
140
141 let font_refs = self.write_fonts(document)?;
143
144 self.write_pages(document, &font_refs)?;
146
147 self.write_form_fields(document)?;
149
150 self.write_catalog(document)?;
152
153 self.write_info(document)?;
155
156 if self.config.use_object_streams {
158 self.flush_object_streams()?;
159 }
160
161 let xref_position = self.current_position;
163 if self.config.use_xref_streams {
164 self.write_xref_stream()?;
165 } else {
166 self.write_xref()?;
167 }
168
169 if !self.config.use_xref_streams {
171 self.write_trailer(xref_position)?;
172 }
173
174 if let Ok(()) = self.writer.flush() {
175 }
177 Ok(())
178 }
179
180 pub fn write_incremental_update(
214 &mut self,
215 base_pdf_path: impl AsRef<std::path::Path>,
216 document: &mut Document,
217 ) -> Result<()> {
218 use std::io::{BufReader, Read, Seek, SeekFrom};
219
220 let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
222 let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
223
224 let base_catalog = pdf_reader.catalog()?;
226
227 let (base_pages_id, base_pages_gen) = base_catalog
229 .get("Pages")
230 .and_then(|obj| {
231 if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
232 Some((*id, *gen))
233 } else {
234 None
235 }
236 })
237 .ok_or_else(|| {
238 crate::error::PdfError::InvalidStructure(
239 "Base PDF catalog missing /Pages reference".to_string(),
240 )
241 })?;
242
243 let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
245 let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
246 base_pages_obj
247 {
248 dict.get("Kids")
249 .and_then(|obj| {
250 if let crate::parser::objects::PdfObject::Array(arr) = obj {
251 Some(
254 arr.0
255 .iter()
256 .filter_map(|item| {
257 if let crate::parser::objects::PdfObject::Reference(id, gen) =
258 item
259 {
260 Some(crate::objects::Object::Reference(
261 crate::objects::ObjectId::new(*id, *gen),
262 ))
263 } else {
264 None
265 }
266 })
267 .collect::<Vec<_>>(),
268 )
269 } else {
270 None
271 }
272 })
273 .unwrap_or_default()
274 } else {
275 Vec::new()
276 };
277
278 let base_page_count = base_pages_kids.len();
280
281 let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
283 let mut base_reader = BufReader::new(base_pdf);
284
285 base_reader.seek(SeekFrom::End(-100))?;
287 let mut end_buffer = vec![0u8; 100];
288 let bytes_read = base_reader.read(&mut end_buffer)?;
289 end_buffer.truncate(bytes_read);
290
291 let end_str = String::from_utf8_lossy(&end_buffer);
292 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
293 let after_startxref = &end_str[startxref_pos + 9..];
294
295 let number_str: String = after_startxref
296 .chars()
297 .skip_while(|c| c.is_whitespace())
298 .take_while(|c| c.is_ascii_digit())
299 .collect();
300
301 number_str.parse::<u64>().map_err(|_| {
302 crate::error::PdfError::InvalidStructure(
303 "Could not parse startxref offset".to_string(),
304 )
305 })?
306 } else {
307 return Err(crate::error::PdfError::InvalidStructure(
308 "startxref not found in base PDF".to_string(),
309 ));
310 };
311
312 base_reader.seek(SeekFrom::Start(0))?;
314 let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
315
316 self.prev_xref_offset = Some(prev_xref);
318 self.base_pdf_size = Some(base_size);
319 self.current_position = base_size;
320
321 if !document.used_characters.is_empty() {
323 self.document_used_chars = Some(document.used_characters.clone());
324 }
325
326 self.catalog_id = Some(self.allocate_object_id());
328 self.pages_id = Some(self.allocate_object_id());
329 self.info_id = Some(self.allocate_object_id());
330
331 let font_refs = self.write_fonts(document)?;
333
334 self.write_pages(document, &font_refs)?;
336
337 self.write_form_fields(document)?;
339
340 let catalog_id = self.get_catalog_id()?;
342 let new_pages_id = self.get_pages_id()?;
343
344 let mut catalog = crate::objects::Dictionary::new();
345 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
346 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
347
348 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
353
354 let mut all_pages_kids = base_pages_kids;
356
357 for page_id in &self.page_ids {
359 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
360 }
361
362 let mut pages_dict = crate::objects::Dictionary::new();
363 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
364 pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
365 pages_dict.set(
366 "Count",
367 crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
368 );
369
370 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
371
372 self.write_info(document)?;
374
375 let xref_position = self.current_position;
377 self.write_xref()?;
378
379 self.write_trailer(xref_position)?;
381
382 self.writer.flush()?;
383 Ok(())
384 }
385
386 pub fn write_incremental_with_page_replacement(
452 &mut self,
453 base_pdf_path: impl AsRef<std::path::Path>,
454 document: &mut Document,
455 ) -> Result<()> {
456 use std::io::Cursor;
457
458 let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
460 let base_size = base_pdf_bytes.len() as u64;
461
462 let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
464
465 let base_catalog = pdf_reader.catalog()?;
466
467 let (base_pages_id, base_pages_gen) = base_catalog
468 .get("Pages")
469 .and_then(|obj| {
470 if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
471 Some((*id, *gen))
472 } else {
473 None
474 }
475 })
476 .ok_or_else(|| {
477 crate::error::PdfError::InvalidStructure(
478 "Base PDF catalog missing /Pages reference".to_string(),
479 )
480 })?;
481
482 let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
483 let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
484 base_pages_obj
485 {
486 dict.get("Kids")
487 .and_then(|obj| {
488 if let crate::parser::objects::PdfObject::Array(arr) = obj {
489 Some(
490 arr.0
491 .iter()
492 .filter_map(|item| {
493 if let crate::parser::objects::PdfObject::Reference(id, gen) =
494 item
495 {
496 Some(crate::objects::Object::Reference(
497 crate::objects::ObjectId::new(*id, *gen),
498 ))
499 } else {
500 None
501 }
502 })
503 .collect::<Vec<_>>(),
504 )
505 } else {
506 None
507 }
508 })
509 .unwrap_or_default()
510 } else {
511 Vec::new()
512 };
513
514 let base_page_count = base_pages_kids.len();
515
516 let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
518 let end_bytes = &base_pdf_bytes[start_search..];
519 let end_str = String::from_utf8_lossy(end_bytes);
520
521 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
522 let after_startxref = &end_str[startxref_pos + 9..];
523 let number_str: String = after_startxref
524 .chars()
525 .skip_while(|c| c.is_whitespace())
526 .take_while(|c| c.is_ascii_digit())
527 .collect();
528
529 number_str.parse::<u64>().map_err(|_| {
530 crate::error::PdfError::InvalidStructure(
531 "Could not parse startxref offset".to_string(),
532 )
533 })?
534 } else {
535 return Err(crate::error::PdfError::InvalidStructure(
536 "startxref not found in base PDF".to_string(),
537 ));
538 };
539
540 self.writer.write_all(&base_pdf_bytes)?;
542
543 self.prev_xref_offset = Some(prev_xref);
544 self.base_pdf_size = Some(base_size);
545 self.current_position = base_size;
546
547 if !document.used_characters.is_empty() {
549 self.document_used_chars = Some(document.used_characters.clone());
550 }
551
552 self.catalog_id = Some(self.allocate_object_id());
553 self.pages_id = Some(self.allocate_object_id());
554 self.info_id = Some(self.allocate_object_id());
555
556 let font_refs = self.write_fonts(document)?;
557 self.write_pages(document, &font_refs)?;
558 self.write_form_fields(document)?;
559
560 let catalog_id = self.get_catalog_id()?;
562 let new_pages_id = self.get_pages_id()?;
563
564 let mut catalog = crate::objects::Dictionary::new();
565 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
566 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
567 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
568
569 let mut all_pages_kids = Vec::new();
571 let replacement_count = document.pages.len();
572
573 for page_id in &self.page_ids {
575 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
576 }
577
578 if replacement_count < base_page_count {
580 for i in replacement_count..base_page_count {
581 if let Some(page_ref) = base_pages_kids.get(i) {
582 all_pages_kids.push(page_ref.clone());
583 }
584 }
585 }
586
587 let mut pages_dict = crate::objects::Dictionary::new();
588 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
589 pages_dict.set(
590 "Kids",
591 crate::objects::Object::Array(all_pages_kids.clone()),
592 );
593 pages_dict.set(
594 "Count",
595 crate::objects::Object::Integer(all_pages_kids.len() as i64),
596 );
597
598 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
599 self.write_info(document)?;
600
601 let xref_position = self.current_position;
602 self.write_xref()?;
603 self.write_trailer(xref_position)?;
604
605 self.writer.flush()?;
606 Ok(())
607 }
608
609 pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
657 &mut self,
658 base_pdf_path: P,
659 mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
660 ) -> Result<()> {
661 use std::io::Cursor;
662
663 let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
665 let base_size = base_pdf_bytes.len() as u64;
666
667 let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
669 let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
670
671 let page_count = parsed_doc.page_count()?;
673
674 let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
676 let end_bytes = &base_pdf_bytes[start_search..];
677 let end_str = String::from_utf8_lossy(end_bytes);
678
679 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
680 let after_startxref = &end_str[startxref_pos + 9..];
681 let number_str: String = after_startxref
682 .chars()
683 .skip_while(|c| c.is_whitespace())
684 .take_while(|c| c.is_ascii_digit())
685 .collect();
686
687 number_str.parse::<u64>().map_err(|_| {
688 crate::error::PdfError::InvalidStructure(
689 "Could not parse startxref offset".to_string(),
690 )
691 })?
692 } else {
693 return Err(crate::error::PdfError::InvalidStructure(
694 "startxref not found in base PDF".to_string(),
695 ));
696 };
697
698 self.writer.write_all(&base_pdf_bytes)?;
700
701 self.prev_xref_offset = Some(prev_xref);
702 self.base_pdf_size = Some(base_size);
703 self.current_position = base_size;
704
705 let mut temp_doc = crate::Document::new();
707
708 for page_idx in 0..page_count {
709 let parsed_page = parsed_doc.get_page(page_idx)?;
711 let mut writable_page =
712 crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
713
714 overlay_fn(&mut writable_page)?;
716
717 temp_doc.add_page(writable_page);
719 }
720
721 if !temp_doc.used_characters.is_empty() {
724 self.document_used_chars = Some(temp_doc.used_characters.clone());
725 }
726
727 self.catalog_id = Some(self.allocate_object_id());
728 self.pages_id = Some(self.allocate_object_id());
729 self.info_id = Some(self.allocate_object_id());
730
731 let font_refs = self.write_fonts(&temp_doc)?;
732 self.write_pages(&temp_doc, &font_refs)?;
733 self.write_form_fields(&mut temp_doc)?;
734
735 let catalog_id = self.get_catalog_id()?;
737 let new_pages_id = self.get_pages_id()?;
738
739 let mut catalog = crate::objects::Dictionary::new();
740 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
741 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
742 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
743
744 let mut all_pages_kids = Vec::new();
746 for page_id in &self.page_ids {
747 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
748 }
749
750 let mut pages_dict = crate::objects::Dictionary::new();
751 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
752 pages_dict.set(
753 "Kids",
754 crate::objects::Object::Array(all_pages_kids.clone()),
755 );
756 pages_dict.set(
757 "Count",
758 crate::objects::Object::Integer(all_pages_kids.len() as i64),
759 );
760
761 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
762 self.write_info(&temp_doc)?;
763
764 let xref_position = self.current_position;
765 self.write_xref()?;
766 self.write_trailer(xref_position)?;
767
768 self.writer.flush()?;
769 Ok(())
770 }
771
772 fn write_header(&mut self) -> Result<()> {
773 let header = format!("%PDF-{}\n", self.config.pdf_version);
774 self.write_bytes(header.as_bytes())?;
775 self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
777 Ok(())
778 }
779
780 fn convert_pdf_objects_dict_to_writer(
783 &self,
784 pdf_dict: &crate::pdf_objects::Dictionary,
785 ) -> crate::objects::Dictionary {
786 let mut writer_dict = crate::objects::Dictionary::new();
787
788 for (key, value) in pdf_dict.iter() {
789 let writer_obj = self.convert_pdf_object_to_writer(value);
790 writer_dict.set(key.as_str(), writer_obj);
791 }
792
793 writer_dict
794 }
795
796 fn convert_pdf_object_to_writer(
797 &self,
798 obj: &crate::pdf_objects::Object,
799 ) -> crate::objects::Object {
800 use crate::objects::Object as WriterObj;
801 use crate::pdf_objects::Object as PdfObj;
802
803 match obj {
804 PdfObj::Null => WriterObj::Null,
805 PdfObj::Boolean(b) => WriterObj::Boolean(*b),
806 PdfObj::Integer(i) => WriterObj::Integer(*i),
807 PdfObj::Real(f) => WriterObj::Real(*f),
808 PdfObj::String(s) => {
809 WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
810 }
811 PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
812 PdfObj::Array(arr) => {
813 let items: Vec<WriterObj> = arr
814 .iter()
815 .map(|item| self.convert_pdf_object_to_writer(item))
816 .collect();
817 WriterObj::Array(items)
818 }
819 PdfObj::Dictionary(dict) => {
820 WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
821 }
822 PdfObj::Stream(stream) => {
823 let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
824 WriterObj::Stream(dict, stream.data.clone())
825 }
826 PdfObj::Reference(id) => {
827 WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
828 }
829 }
830 }
831
832 fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
833 let catalog_id = self.get_catalog_id()?;
834 let pages_id = self.get_pages_id()?;
835
836 let mut catalog = Dictionary::new();
837 catalog.set("Type", Object::Name("Catalog".to_string()));
838 catalog.set("Pages", Object::Reference(pages_id));
839
840 if let Some(_form_manager) = &document.form_manager {
843 if document.acro_form.is_none() {
845 document.acro_form = Some(crate::forms::AcroForm::new());
846 }
847 }
848
849 if let Some(acro_form) = &document.acro_form {
851 let acro_form_id = self.allocate_object_id();
853
854 self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
856
857 catalog.set("AcroForm", Object::Reference(acro_form_id));
859 }
860
861 if let Some(outline_tree) = &document.outline {
863 if !outline_tree.items.is_empty() {
864 let outline_root_id = self.write_outline_tree(outline_tree)?;
865 catalog.set("Outlines", Object::Reference(outline_root_id));
866 }
867 }
868
869 if let Some(struct_tree) = &document.struct_tree {
871 if !struct_tree.is_empty() {
872 let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
873 catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
874 catalog.set("MarkInfo", {
876 let mut mark_info = Dictionary::new();
877 mark_info.set("Marked", Object::Boolean(true));
878 Object::Dictionary(mark_info)
879 });
880 }
881 }
882
883 let xmp_metadata = document.create_xmp_metadata();
886 let xmp_packet = xmp_metadata.to_xmp_packet();
887 let metadata_id = self.allocate_object_id();
888
889 let mut metadata_dict = Dictionary::new();
891 metadata_dict.set("Type", Object::Name("Metadata".to_string()));
892 metadata_dict.set("Subtype", Object::Name("XML".to_string()));
893 metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
894
895 self.write_object(
897 metadata_id,
898 Object::Stream(metadata_dict, xmp_packet.into_bytes()),
899 )?;
900
901 catalog.set("Metadata", Object::Reference(metadata_id));
903
904 self.write_object(catalog_id, Object::Dictionary(catalog))?;
905 Ok(())
906 }
907
908 fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
909 let mut page_copy = page.clone();
910 let content = page_copy.generate_content()?;
911
912 #[cfg(feature = "compression")]
914 {
915 use crate::objects::Stream;
916 let mut stream = Stream::new(content);
917 if self.config.compress_streams {
919 stream.compress_flate()?;
920 }
921
922 self.write_object(
923 content_id,
924 Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
925 )?;
926 }
927
928 #[cfg(not(feature = "compression"))]
929 {
930 let mut stream_dict = Dictionary::new();
931 stream_dict.set("Length", Object::Integer(content.len() as i64));
932
933 self.write_object(content_id, Object::Stream(stream_dict, content))?;
934 }
935
936 Ok(())
937 }
938
939 fn write_outline_tree(
940 &mut self,
941 outline_tree: &crate::structure::OutlineTree,
942 ) -> Result<ObjectId> {
943 let outline_root_id = self.allocate_object_id();
945
946 let mut outline_root = Dictionary::new();
947 outline_root.set("Type", Object::Name("Outlines".to_string()));
948
949 if !outline_tree.items.is_empty() {
950 let mut item_ids = Vec::new();
952
953 fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
955 let mut count = items.len();
956 for item in items {
957 count += count_items(&item.children);
958 }
959 count
960 }
961
962 let total_items = count_items(&outline_tree.items);
963
964 for _ in 0..total_items {
966 item_ids.push(self.allocate_object_id());
967 }
968
969 let mut id_index = 0;
970
971 let first_id = item_ids[0];
973 let last_id = item_ids[outline_tree.items.len() - 1];
974
975 outline_root.set("First", Object::Reference(first_id));
976 outline_root.set("Last", Object::Reference(last_id));
977
978 let visible_count = outline_tree.visible_count();
980 outline_root.set("Count", Object::Integer(visible_count));
981
982 let mut written_items = Vec::new();
984
985 for (i, item) in outline_tree.items.iter().enumerate() {
986 let item_id = item_ids[id_index];
987 id_index += 1;
988
989 let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
990 let next_id = if i < outline_tree.items.len() - 1 {
991 Some(item_ids[i + 1])
992 } else {
993 None
994 };
995
996 let children_ids = self.write_outline_item(
998 item,
999 item_id,
1000 outline_root_id,
1001 prev_id,
1002 next_id,
1003 &mut item_ids,
1004 &mut id_index,
1005 )?;
1006
1007 written_items.extend(children_ids);
1008 }
1009 }
1010
1011 self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1012 Ok(outline_root_id)
1013 }
1014
1015 #[allow(clippy::too_many_arguments)]
1016 fn write_outline_item(
1017 &mut self,
1018 item: &crate::structure::OutlineItem,
1019 item_id: ObjectId,
1020 parent_id: ObjectId,
1021 prev_id: Option<ObjectId>,
1022 next_id: Option<ObjectId>,
1023 all_ids: &mut Vec<ObjectId>,
1024 id_index: &mut usize,
1025 ) -> Result<Vec<ObjectId>> {
1026 let mut written_ids = vec![item_id];
1027
1028 let (first_child_id, last_child_id) = if !item.children.is_empty() {
1030 let first_idx = *id_index;
1031 let first_id = all_ids[first_idx];
1032 let last_idx = first_idx + item.children.len() - 1;
1033 let last_id = all_ids[last_idx];
1034
1035 for (i, child) in item.children.iter().enumerate() {
1037 let child_id = all_ids[*id_index];
1038 *id_index += 1;
1039
1040 let child_prev = if i > 0 {
1041 Some(all_ids[first_idx + i - 1])
1042 } else {
1043 None
1044 };
1045 let child_next = if i < item.children.len() - 1 {
1046 Some(all_ids[first_idx + i + 1])
1047 } else {
1048 None
1049 };
1050
1051 let child_ids = self.write_outline_item(
1052 child, child_id, item_id, child_prev, child_next, all_ids, id_index,
1054 )?;
1055
1056 written_ids.extend(child_ids);
1057 }
1058
1059 (Some(first_id), Some(last_id))
1060 } else {
1061 (None, None)
1062 };
1063
1064 let item_dict = crate::structure::outline_item_to_dict(
1066 item,
1067 parent_id,
1068 first_child_id,
1069 last_child_id,
1070 prev_id,
1071 next_id,
1072 );
1073
1074 self.write_object(item_id, Object::Dictionary(item_dict))?;
1075
1076 Ok(written_ids)
1077 }
1078
1079 fn write_struct_tree(
1081 &mut self,
1082 struct_tree: &crate::structure::StructTree,
1083 ) -> Result<ObjectId> {
1084 let struct_tree_root_id = self.allocate_object_id();
1086 let mut element_ids = Vec::new();
1087 for _ in 0..struct_tree.len() {
1088 element_ids.push(self.allocate_object_id());
1089 }
1090
1091 let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1093 std::collections::HashMap::new();
1094
1095 if let Some(root_index) = struct_tree.root_index() {
1097 parent_map.insert(root_index, struct_tree_root_id);
1098
1099 fn map_children_parents(
1101 tree: &crate::structure::StructTree,
1102 parent_index: usize,
1103 parent_id: ObjectId,
1104 element_ids: &[ObjectId],
1105 parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1106 ) {
1107 if let Some(parent_elem) = tree.get(parent_index) {
1108 for &child_index in &parent_elem.children {
1109 parent_map.insert(child_index, parent_id);
1110 map_children_parents(
1111 tree,
1112 child_index,
1113 element_ids[child_index],
1114 element_ids,
1115 parent_map,
1116 );
1117 }
1118 }
1119 }
1120
1121 map_children_parents(
1122 struct_tree,
1123 root_index,
1124 element_ids[root_index],
1125 &element_ids,
1126 &mut parent_map,
1127 );
1128 }
1129
1130 for (index, element) in struct_tree.iter().enumerate() {
1132 let element_id = element_ids[index];
1133 let mut element_dict = Dictionary::new();
1134
1135 element_dict.set("Type", Object::Name("StructElem".to_string()));
1136 element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1137
1138 if let Some(&parent_id) = parent_map.get(&index) {
1140 element_dict.set("P", Object::Reference(parent_id));
1141 }
1142
1143 if let Some(ref id) = element.id {
1145 element_dict.set("ID", Object::String(id.clone()));
1146 }
1147
1148 if let Some(ref lang) = element.attributes.lang {
1150 element_dict.set("Lang", Object::String(lang.clone()));
1151 }
1152 if let Some(ref alt) = element.attributes.alt {
1153 element_dict.set("Alt", Object::String(alt.clone()));
1154 }
1155 if let Some(ref actual_text) = element.attributes.actual_text {
1156 element_dict.set("ActualText", Object::String(actual_text.clone()));
1157 }
1158 if let Some(ref title) = element.attributes.title {
1159 element_dict.set("T", Object::String(title.clone()));
1160 }
1161 if let Some(bbox) = element.attributes.bbox {
1162 element_dict.set(
1163 "BBox",
1164 Object::Array(vec![
1165 Object::Real(bbox[0]),
1166 Object::Real(bbox[1]),
1167 Object::Real(bbox[2]),
1168 Object::Real(bbox[3]),
1169 ]),
1170 );
1171 }
1172
1173 let mut kids = Vec::new();
1175
1176 for &child_index in &element.children {
1178 kids.push(Object::Reference(element_ids[child_index]));
1179 }
1180
1181 for mcid_ref in &element.mcids {
1183 let mut mcr = Dictionary::new();
1184 mcr.set("Type", Object::Name("MCR".to_string()));
1185 mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1186 mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1187 kids.push(Object::Dictionary(mcr));
1188 }
1189
1190 if !kids.is_empty() {
1191 element_dict.set("K", Object::Array(kids));
1192 }
1193
1194 self.write_object(element_id, Object::Dictionary(element_dict))?;
1195 }
1196
1197 let mut struct_tree_root = Dictionary::new();
1199 struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1200
1201 if let Some(root_index) = struct_tree.root_index() {
1203 struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1204 }
1205
1206 if !struct_tree.role_map.mappings().is_empty() {
1208 let mut role_map = Dictionary::new();
1209 for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1210 role_map.set(
1211 custom_type.as_str(),
1212 Object::Name(standard_type.as_pdf_name().to_string()),
1213 );
1214 }
1215 struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1216 }
1217
1218 self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1219 Ok(struct_tree_root_id)
1220 }
1221
1222 fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1223 if !self.form_field_ids.is_empty() {
1225 if let Some(acro_form) = &mut document.acro_form {
1226 acro_form.fields.clear();
1228 for field_id in &self.form_field_ids {
1229 acro_form.add_field(*field_id);
1230 }
1231
1232 acro_form.need_appearances = true;
1234 if acro_form.da.is_none() {
1235 acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1236 }
1237 }
1238 }
1239 Ok(())
1240 }
1241
1242 fn write_info(&mut self, document: &Document) -> Result<()> {
1243 let info_id = self.get_info_id()?;
1244 let mut info_dict = Dictionary::new();
1245
1246 if let Some(ref title) = document.metadata.title {
1247 info_dict.set("Title", Object::String(title.clone()));
1248 }
1249 if let Some(ref author) = document.metadata.author {
1250 info_dict.set("Author", Object::String(author.clone()));
1251 }
1252 if let Some(ref subject) = document.metadata.subject {
1253 info_dict.set("Subject", Object::String(subject.clone()));
1254 }
1255 if let Some(ref keywords) = document.metadata.keywords {
1256 info_dict.set("Keywords", Object::String(keywords.clone()));
1257 }
1258 if let Some(ref creator) = document.metadata.creator {
1259 info_dict.set("Creator", Object::String(creator.clone()));
1260 }
1261 if let Some(ref producer) = document.metadata.producer {
1262 info_dict.set("Producer", Object::String(producer.clone()));
1263 }
1264
1265 if let Some(creation_date) = document.metadata.creation_date {
1267 let date_string = format_pdf_date(creation_date);
1268 info_dict.set("CreationDate", Object::String(date_string));
1269 }
1270
1271 if let Some(mod_date) = document.metadata.modification_date {
1273 let date_string = format_pdf_date(mod_date);
1274 info_dict.set("ModDate", Object::String(date_string));
1275 }
1276
1277 let edition = if cfg!(feature = "pro") {
1280 super::Edition::Pro
1281 } else if cfg!(feature = "enterprise") {
1282 super::Edition::Enterprise
1283 } else {
1284 super::Edition::Community
1285 };
1286
1287 let signature = super::PdfSignature::new(document, edition);
1288 signature.write_to_info_dict(&mut info_dict);
1289
1290 self.write_object(info_id, Object::Dictionary(info_dict))?;
1291 Ok(())
1292 }
1293
1294 fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1295 let mut font_refs = HashMap::new();
1296
1297 for font_name in document.custom_font_names() {
1299 if let Some(font) = document.get_custom_font(&font_name) {
1300 let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1303 font_refs.insert(font_name.clone(), font_id);
1304 }
1305 }
1306
1307 Ok(font_refs)
1308 }
1309
1310 fn write_font_with_unicode_support(
1312 &mut self,
1313 font_name: &str,
1314 font: &crate::fonts::Font,
1315 ) -> Result<ObjectId> {
1316 self.write_type0_font_from_font(font_name, font)
1319 }
1320
1321 fn write_type0_font_from_font(
1323 &mut self,
1324 font_name: &str,
1325 font: &crate::fonts::Font,
1326 ) -> Result<ObjectId> {
1327 let used_chars = self.document_used_chars.clone().unwrap_or_else(|| {
1329 let mut chars = std::collections::HashSet::new();
1331 for ch in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1332 {
1333 chars.insert(ch);
1334 }
1335 chars
1336 });
1337 let font_id = self.allocate_object_id();
1339 let descendant_font_id = self.allocate_object_id();
1340 let descriptor_id = self.allocate_object_id();
1341 let font_file_id = self.allocate_object_id();
1342 let to_unicode_id = self.allocate_object_id();
1343
1344 let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1348 if font.data.len() > 100_000 && !used_chars.is_empty() {
1349 match crate::text::fonts::truetype_subsetter::subset_font(
1351 font.data.clone(),
1352 &used_chars,
1353 ) {
1354 Ok(subset_result) => {
1355 (
1358 subset_result.font_data,
1359 Some(subset_result.glyph_mapping),
1360 font.clone(),
1361 )
1362 }
1363 Err(_) => {
1364 if font.data.len() < 25_000_000 {
1366 (font.data.clone(), None, font.clone())
1367 } else {
1368 (Vec::new(), None, font.clone())
1370 }
1371 }
1372 }
1373 } else {
1374 (font.data.clone(), None, font.clone())
1376 };
1377
1378 if !font_data_to_embed.is_empty() {
1379 let mut font_file_dict = Dictionary::new();
1380 match font.format {
1382 crate::fonts::FontFormat::OpenType => {
1383 font_file_dict.set("Subtype", Object::Name("OpenType".to_string()));
1385 font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1386 }
1387 crate::fonts::FontFormat::TrueType => {
1388 font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1390 }
1391 }
1392 let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1393 self.write_object(font_file_id, font_stream_obj)?;
1394 } else {
1395 let font_file_dict = Dictionary::new();
1397 let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1398 self.write_object(font_file_id, font_stream_obj)?;
1399 }
1400
1401 let mut descriptor = Dictionary::new();
1403 descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1404 descriptor.set("FontName", Object::Name(font_name.to_string()));
1405 descriptor.set("Flags", Object::Integer(4)); descriptor.set(
1407 "FontBBox",
1408 Object::Array(vec![
1409 Object::Integer(font.descriptor.font_bbox[0] as i64),
1410 Object::Integer(font.descriptor.font_bbox[1] as i64),
1411 Object::Integer(font.descriptor.font_bbox[2] as i64),
1412 Object::Integer(font.descriptor.font_bbox[3] as i64),
1413 ]),
1414 );
1415 descriptor.set(
1416 "ItalicAngle",
1417 Object::Real(font.descriptor.italic_angle as f64),
1418 );
1419 descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1420 descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1421 descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1422 descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1423 let font_file_key = match font.format {
1425 crate::fonts::FontFormat::OpenType => "FontFile3", crate::fonts::FontFormat::TrueType => "FontFile2", };
1428 descriptor.set(font_file_key, Object::Reference(font_file_id));
1429 self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1430
1431 let mut cid_font = Dictionary::new();
1433 cid_font.set("Type", Object::Name("Font".to_string()));
1434 let cid_font_subtype =
1436 if CjkFontType::should_use_cidfonttype2_for_preview_compatibility(font_name) {
1437 "CIDFontType2" } else {
1439 match font.format {
1440 crate::fonts::FontFormat::OpenType => "CIDFontType0", crate::fonts::FontFormat::TrueType => "CIDFontType2", }
1443 };
1444 cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1445 cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1446
1447 let mut cid_system_info = Dictionary::new();
1449 let (registry, ordering, supplement) =
1450 if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1451 cjk_type.cid_system_info()
1452 } else {
1453 ("Adobe", "Identity", 0)
1454 };
1455
1456 cid_system_info.set("Registry", Object::String(registry.to_string()));
1457 cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1458 cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1459 cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1460
1461 cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1462
1463 let default_width = self.calculate_default_width(font);
1465 cid_font.set("DW", Object::Integer(default_width));
1466
1467 let w_array = self.generate_width_array(
1471 &original_font_for_widths,
1472 default_width,
1473 subset_glyph_mapping.as_ref(),
1474 );
1475 cid_font.set("W", Object::Array(w_array));
1476
1477 let cid_to_gid_map = self.generate_cid_to_gid_map(font, subset_glyph_mapping.as_ref())?;
1481 if !cid_to_gid_map.is_empty() {
1482 let cid_to_gid_map_id = self.allocate_object_id();
1484 let mut map_dict = Dictionary::new();
1485 map_dict.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1486 let map_stream = Object::Stream(map_dict, cid_to_gid_map);
1487 self.write_object(cid_to_gid_map_id, map_stream)?;
1488 cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1489 } else {
1490 cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1491 }
1492
1493 self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1494
1495 let cmap_data = self.generate_tounicode_cmap_from_font(font);
1497 let cmap_dict = Dictionary::new();
1498 let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1499 self.write_object(to_unicode_id, cmap_stream)?;
1500
1501 let mut type0_font = Dictionary::new();
1503 type0_font.set("Type", Object::Name("Font".to_string()));
1504 type0_font.set("Subtype", Object::Name("Type0".to_string()));
1505 type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1506 type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1507 type0_font.set(
1508 "DescendantFonts",
1509 Object::Array(vec![Object::Reference(descendant_font_id)]),
1510 );
1511 type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1512
1513 self.write_object(font_id, Object::Dictionary(type0_font))?;
1514
1515 Ok(font_id)
1516 }
1517
1518 fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1520 use crate::text::fonts::truetype::TrueTypeFont;
1521
1522 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1524 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1525 if let Some(cmap) = cmap_tables
1526 .iter()
1527 .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1528 .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1529 {
1530 if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1531 let common_chars =
1535 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1536 let mut total_width = 0;
1537 let mut count = 0;
1538
1539 for ch in common_chars.chars() {
1540 let unicode = ch as u32;
1541 if let Some(&pdf_width) = widths.get(&unicode) {
1542 total_width += pdf_width as i64;
1543 count += 1;
1544 }
1545 }
1546
1547 if count > 0 {
1548 return total_width / count;
1549 }
1550 }
1551 }
1552 }
1553 }
1554
1555 500
1557 }
1558
1559 fn generate_width_array(
1561 &self,
1562 font: &crate::fonts::Font,
1563 _default_width: i64,
1564 subset_mapping: Option<&HashMap<u32, u16>>,
1565 ) -> Vec<Object> {
1566 use crate::text::fonts::truetype::TrueTypeFont;
1567
1568 let mut w_array = Vec::new();
1569
1570 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1572 let char_to_glyph = {
1576 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1578 if let Some(cmap) = cmap_tables
1579 .iter()
1580 .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1581 .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1582 {
1583 if let Some(subset_map) = subset_mapping {
1585 let mut filtered = HashMap::new();
1586 for unicode in subset_map.keys() {
1587 if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1589 filtered.insert(*unicode, orig_glyph);
1590 }
1591 }
1592 filtered
1593 } else {
1594 cmap.mappings.clone()
1595 }
1596 } else {
1597 HashMap::new()
1598 }
1599 } else {
1600 HashMap::new()
1601 }
1602 };
1603
1604 if !char_to_glyph.is_empty() {
1605 if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1607 let mut sorted_chars: Vec<_> = widths.iter().collect();
1612 sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1613
1614 let mut i = 0;
1615 while i < sorted_chars.len() {
1616 let start_unicode = *sorted_chars[i].0;
1617 let pdf_width = *sorted_chars[i].1 as i64;
1619
1620 let mut end_unicode = start_unicode;
1622 let mut j = i + 1;
1623 while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1624 let next_pdf_width = *sorted_chars[j].1 as i64;
1625 if next_pdf_width == pdf_width {
1626 end_unicode = *sorted_chars[j].0;
1627 j += 1;
1628 } else {
1629 break;
1630 }
1631 }
1632
1633 if start_unicode == end_unicode {
1635 w_array.push(Object::Integer(start_unicode as i64));
1637 w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1638 } else {
1639 w_array.push(Object::Integer(start_unicode as i64));
1641 w_array.push(Object::Integer(end_unicode as i64));
1642 w_array.push(Object::Integer(pdf_width));
1643 }
1644
1645 i = j;
1646 }
1647
1648 return w_array;
1649 }
1650 }
1651 }
1652
1653 let ranges = vec![
1655 (0x20, 0x20, 250), (0x21, 0x2F, 333), (0x30, 0x39, 500), (0x3A, 0x40, 333), (0x41, 0x5A, 667), (0x5B, 0x60, 333), (0x61, 0x7A, 500), (0x7B, 0x7E, 333), (0xA0, 0xA0, 250), (0xA1, 0xBF, 333), (0xC0, 0xD6, 667), (0xD7, 0xD7, 564), (0xD8, 0xDE, 667), (0xDF, 0xF6, 500), (0xF7, 0xF7, 564), (0xF8, 0xFF, 500), (0x100, 0x17F, 500), (0x2000, 0x200F, 250), (0x2010, 0x2027, 333), (0x2028, 0x202F, 250), (0x2030, 0x206F, 500), (0x2070, 0x209F, 400), (0x20A0, 0x20CF, 600), (0x2100, 0x214F, 700), (0x2190, 0x21FF, 600), (0x2200, 0x22FF, 600), (0x2300, 0x23FF, 600), (0x2500, 0x257F, 500), (0x2580, 0x259F, 500), (0x25A0, 0x25FF, 600), (0x2600, 0x26FF, 600), (0x2700, 0x27BF, 600), ];
1692
1693 for (start, end, width) in ranges {
1695 if start == end {
1696 w_array.push(Object::Integer(start));
1698 w_array.push(Object::Array(vec![Object::Integer(width)]));
1699 } else {
1700 w_array.push(Object::Integer(start));
1702 w_array.push(Object::Integer(end));
1703 w_array.push(Object::Integer(width));
1704 }
1705 }
1706
1707 w_array
1708 }
1709
1710 fn generate_cid_to_gid_map(
1712 &mut self,
1713 font: &crate::fonts::Font,
1714 subset_mapping: Option<&HashMap<u32, u16>>,
1715 ) -> Result<Vec<u8>> {
1716 use crate::text::fonts::truetype::TrueTypeFont;
1717
1718 let cmap_mappings = if let Some(subset_map) = subset_mapping {
1721 subset_map.clone()
1723 } else {
1724 let tt_font = TrueTypeFont::parse(font.data.clone())?;
1726 let cmap_tables = tt_font.parse_cmap()?;
1727
1728 let cmap = cmap_tables
1730 .iter()
1731 .find(|t| t.platform_id == 3 && t.encoding_id == 1) .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0)) .ok_or_else(|| {
1734 crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1735 })?;
1736
1737 cmap.mappings.clone()
1738 };
1739
1740 let used_chars = self.document_used_chars.clone().unwrap_or_default();
1747
1748 let max_unicode = if !used_chars.is_empty() {
1750 used_chars
1752 .iter()
1753 .map(|ch| *ch as u32)
1754 .max()
1755 .unwrap_or(0x00FF) .min(0xFFFF) as usize
1757 } else {
1758 cmap_mappings
1760 .keys()
1761 .max()
1762 .copied()
1763 .unwrap_or(0xFFFF)
1764 .min(0xFFFF) as usize
1765 };
1766
1767 let mut map = vec![0u8; (max_unicode + 1) * 2];
1769
1770 let mut sample_mappings = Vec::new();
1772 for (&unicode, &glyph_id) in &cmap_mappings {
1773 if unicode <= max_unicode as u32 {
1774 let idx = (unicode as usize) * 2;
1775 map[idx] = (glyph_id >> 8) as u8;
1777 map[idx + 1] = (glyph_id & 0xFF) as u8;
1778
1779 if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
1781 {
1782 sample_mappings.push((unicode, glyph_id));
1783 }
1784 }
1785 }
1786
1787 Ok(map)
1788 }
1789
1790 fn generate_tounicode_cmap_from_font(&self, font: &crate::fonts::Font) -> Vec<u8> {
1792 use crate::text::fonts::truetype::TrueTypeFont;
1793
1794 let mut cmap = String::new();
1795
1796 cmap.push_str("/CIDInit /ProcSet findresource begin\n");
1798 cmap.push_str("12 dict begin\n");
1799 cmap.push_str("begincmap\n");
1800 cmap.push_str("/CIDSystemInfo\n");
1801 cmap.push_str("<< /Registry (Adobe)\n");
1802 cmap.push_str(" /Ordering (UCS)\n");
1803 cmap.push_str(" /Supplement 0\n");
1804 cmap.push_str(">> def\n");
1805 cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
1806 cmap.push_str("/CMapType 2 def\n");
1807 cmap.push_str("1 begincodespacerange\n");
1808 cmap.push_str("<0000> <FFFF>\n");
1809 cmap.push_str("endcodespacerange\n");
1810
1811 let mut mappings = Vec::new();
1813 let mut has_font_mappings = false;
1814
1815 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1816 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1817 if let Some(cmap_table) = cmap_tables
1819 .iter()
1820 .find(|t| t.platform_id == 3 && t.encoding_id == 1) .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1822 {
1824 for (&unicode, &glyph_id) in &cmap_table.mappings {
1827 if glyph_id > 0 && unicode <= 0xFFFF {
1828 mappings.push((unicode, unicode));
1831 }
1832 }
1833 has_font_mappings = true;
1834 }
1835 }
1836 }
1837
1838 if !has_font_mappings {
1840 for i in 0x0020..=0x00FF {
1842 mappings.push((i, i));
1843 }
1844
1845 for i in 0x0100..=0x017F {
1847 mappings.push((i, i));
1848 }
1849
1850 for i in 0x3040..=0x309F {
1853 mappings.push((i, i));
1854 }
1855
1856 for i in 0x30A0..=0x30FF {
1858 mappings.push((i, i));
1859 }
1860
1861 for i in 0x4E00..=0x9FFF {
1863 mappings.push((i, i));
1864 }
1865
1866 for i in 0xAC00..=0xD7AF {
1868 mappings.push((i, i));
1869 }
1870
1871 for i in 0x2000..=0x206F {
1873 mappings.push((i, i));
1874 }
1875
1876 for i in 0x2200..=0x22FF {
1878 mappings.push((i, i));
1879 }
1880
1881 for i in 0x2190..=0x21FF {
1883 mappings.push((i, i));
1884 }
1885
1886 for i in 0x2500..=0x259F {
1888 mappings.push((i, i));
1889 }
1890
1891 for i in 0x25A0..=0x25FF {
1893 mappings.push((i, i));
1894 }
1895
1896 for i in 0x2600..=0x26FF {
1898 mappings.push((i, i));
1899 }
1900 }
1901
1902 mappings.sort_by_key(|&(cid, _)| cid);
1904
1905 let mut i = 0;
1907 while i < mappings.len() {
1908 let start_cid = mappings[i].0;
1910 let start_unicode = mappings[i].1;
1911 let mut end_idx = i;
1912
1913 while end_idx + 1 < mappings.len()
1915 && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
1916 && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
1917 && end_idx - i < 99
1918 {
1920 end_idx += 1;
1921 }
1922
1923 if end_idx > i {
1924 cmap.push_str("1 beginbfrange\n");
1926 cmap.push_str(&format!(
1927 "<{:04X}> <{:04X}> <{:04X}>\n",
1928 start_cid, mappings[end_idx].0, start_unicode
1929 ));
1930 cmap.push_str("endbfrange\n");
1931 i = end_idx + 1;
1932 } else {
1933 let mut chars = Vec::new();
1935 let chunk_end = (i + 100).min(mappings.len());
1936
1937 for item in &mappings[i..chunk_end] {
1938 chars.push(*item);
1939 }
1940
1941 if !chars.is_empty() {
1942 cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
1943 for (cid, unicode) in chars {
1944 cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
1945 }
1946 cmap.push_str("endbfchar\n");
1947 }
1948
1949 i = chunk_end;
1950 }
1951 }
1952
1953 cmap.push_str("endcmap\n");
1955 cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
1956 cmap.push_str("end\n");
1957 cmap.push_str("end\n");
1958
1959 cmap.into_bytes()
1960 }
1961
1962 #[allow(dead_code)]
1964 fn write_truetype_font(
1965 &mut self,
1966 font_name: &str,
1967 font: &crate::text::font_manager::CustomFont,
1968 ) -> Result<ObjectId> {
1969 let font_id = self.allocate_object_id();
1971 let descriptor_id = self.allocate_object_id();
1972 let font_file_id = self.allocate_object_id();
1973
1974 if let Some(ref data) = font.font_data {
1976 let mut font_file_dict = Dictionary::new();
1977 font_file_dict.set("Length1", Object::Integer(data.len() as i64));
1978 let font_stream_obj = Object::Stream(font_file_dict, data.clone());
1979 self.write_object(font_file_id, font_stream_obj)?;
1980 }
1981
1982 let mut descriptor = Dictionary::new();
1984 descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1985 descriptor.set("FontName", Object::Name(font_name.to_string()));
1986 descriptor.set("Flags", Object::Integer(32)); descriptor.set(
1988 "FontBBox",
1989 Object::Array(vec![
1990 Object::Integer(-1000),
1991 Object::Integer(-1000),
1992 Object::Integer(2000),
1993 Object::Integer(2000),
1994 ]),
1995 );
1996 descriptor.set("ItalicAngle", Object::Integer(0));
1997 descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
1998 descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
1999 descriptor.set(
2000 "CapHeight",
2001 Object::Integer(font.descriptor.cap_height as i64),
2002 );
2003 descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2004 descriptor.set("FontFile2", Object::Reference(font_file_id));
2005 self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2006
2007 let mut font_dict = Dictionary::new();
2009 font_dict.set("Type", Object::Name("Font".to_string()));
2010 font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2011 font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2012 font_dict.set("FirstChar", Object::Integer(0));
2013 font_dict.set("LastChar", Object::Integer(255));
2014
2015 let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2017 font_dict.set("Widths", Object::Array(widths));
2018 font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2019
2020 font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2022
2023 self.write_object(font_id, Object::Dictionary(font_dict))?;
2024
2025 Ok(font_id)
2026 }
2027
2028 fn write_pages(
2029 &mut self,
2030 document: &Document,
2031 font_refs: &HashMap<String, ObjectId>,
2032 ) -> Result<()> {
2033 let pages_id = self.get_pages_id()?;
2034 let mut pages_dict = Dictionary::new();
2035 pages_dict.set("Type", Object::Name("Pages".to_string()));
2036 pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2037
2038 let mut kids = Vec::new();
2039
2040 let mut page_ids = Vec::new();
2042 let mut content_ids = Vec::new();
2043 for _ in 0..document.pages.len() {
2044 page_ids.push(self.allocate_object_id());
2045 content_ids.push(self.allocate_object_id());
2046 }
2047
2048 for page_id in &page_ids {
2049 kids.push(Object::Reference(*page_id));
2050 }
2051
2052 pages_dict.set("Kids", Object::Array(kids));
2053
2054 self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2055
2056 self.page_ids = page_ids.clone();
2058
2059 for (i, page) in document.pages.iter().enumerate() {
2061 let page_id = page_ids[i];
2062 let content_id = content_ids[i];
2063
2064 self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2065 self.write_page_content(content_id, page)?;
2066 }
2067
2068 Ok(())
2069 }
2070
2071 #[allow(dead_code)]
2073 fn write_pages_with_fonts(
2074 &mut self,
2075 document: &Document,
2076 font_refs: &HashMap<String, ObjectId>,
2077 ) -> Result<()> {
2078 self.write_pages(document, font_refs)
2079 }
2080
2081 fn write_page_with_fonts(
2082 &mut self,
2083 page_id: ObjectId,
2084 parent_id: ObjectId,
2085 content_id: ObjectId,
2086 page: &crate::page::Page,
2087 _document: &Document,
2088 font_refs: &HashMap<String, ObjectId>,
2089 ) -> Result<()> {
2090 let mut page_dict = page.to_dict();
2092
2093 page_dict.set("Type", Object::Name("Page".to_string()));
2094 page_dict.set("Parent", Object::Reference(parent_id));
2095 page_dict.set("Contents", Object::Reference(content_id));
2096
2097 let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2099 res.clone()
2100 } else {
2101 Dictionary::new()
2102 };
2103
2104 let mut font_dict = Dictionary::new();
2106
2107 let mut helvetica_dict = Dictionary::new();
2112 helvetica_dict.set("Type", Object::Name("Font".to_string()));
2113 helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2114 helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2115 helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2116 font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2117
2118 let mut helvetica_bold_dict = Dictionary::new();
2119 helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2120 helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2121 helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2122 helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2123 font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2124
2125 let mut helvetica_oblique_dict = Dictionary::new();
2126 helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2127 helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2128 helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2129 helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2130 font_dict.set(
2131 "Helvetica-Oblique",
2132 Object::Dictionary(helvetica_oblique_dict),
2133 );
2134
2135 let mut helvetica_bold_oblique_dict = Dictionary::new();
2136 helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2137 helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2138 helvetica_bold_oblique_dict.set(
2139 "BaseFont",
2140 Object::Name("Helvetica-BoldOblique".to_string()),
2141 );
2142 helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2143 font_dict.set(
2144 "Helvetica-BoldOblique",
2145 Object::Dictionary(helvetica_bold_oblique_dict),
2146 );
2147
2148 let mut times_dict = Dictionary::new();
2150 times_dict.set("Type", Object::Name("Font".to_string()));
2151 times_dict.set("Subtype", Object::Name("Type1".to_string()));
2152 times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2153 times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2154 font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2155
2156 let mut times_bold_dict = Dictionary::new();
2157 times_bold_dict.set("Type", Object::Name("Font".to_string()));
2158 times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2159 times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2160 times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2161 font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2162
2163 let mut times_italic_dict = Dictionary::new();
2164 times_italic_dict.set("Type", Object::Name("Font".to_string()));
2165 times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2166 times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2167 times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2168 font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2169
2170 let mut times_bold_italic_dict = Dictionary::new();
2171 times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2172 times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2173 times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2174 times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2175 font_dict.set(
2176 "Times-BoldItalic",
2177 Object::Dictionary(times_bold_italic_dict),
2178 );
2179
2180 let mut courier_dict = Dictionary::new();
2182 courier_dict.set("Type", Object::Name("Font".to_string()));
2183 courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2184 courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2185 courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2186 font_dict.set("Courier", Object::Dictionary(courier_dict));
2187
2188 let mut courier_bold_dict = Dictionary::new();
2189 courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2190 courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2191 courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2192 courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2193 font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2194
2195 let mut courier_oblique_dict = Dictionary::new();
2196 courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2197 courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2198 courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2199 courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2200 font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2201
2202 let mut courier_bold_oblique_dict = Dictionary::new();
2203 courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2204 courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2205 courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2206 courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2207 font_dict.set(
2208 "Courier-BoldOblique",
2209 Object::Dictionary(courier_bold_oblique_dict),
2210 );
2211
2212 for (font_name, font_id) in font_refs {
2214 font_dict.set(font_name, Object::Reference(*font_id));
2215 }
2216
2217 resources.set("Font", Object::Dictionary(font_dict));
2218
2219 if !page.images().is_empty() {
2221 let mut xobject_dict = Dictionary::new();
2222
2223 for (name, image) in page.images() {
2224 let image_id = self.allocate_object_id();
2226
2227 if image.has_transparency() {
2229 let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2231
2232 if let Some(smask_stream) = smask_obj {
2234 let smask_id = self.allocate_object_id();
2235 self.write_object(smask_id, smask_stream)?;
2236
2237 if let Object::Stream(ref mut dict, _) = main_obj {
2239 dict.set("SMask", Object::Reference(smask_id));
2240 }
2241 }
2242
2243 self.write_object(image_id, main_obj)?;
2245 } else {
2246 self.write_object(image_id, image.to_pdf_object())?;
2248 }
2249
2250 xobject_dict.set(name, Object::Reference(image_id));
2252 }
2253
2254 resources.set("XObject", Object::Dictionary(xobject_dict));
2255 }
2256
2257 if let Some(extgstate_states) = page.get_extgstate_resources() {
2259 let mut extgstate_dict = Dictionary::new();
2260 for (name, state) in extgstate_states {
2261 let mut state_dict = Dictionary::new();
2262 state_dict.set("Type", Object::Name("ExtGState".to_string()));
2263
2264 if let Some(alpha_stroke) = state.alpha_stroke {
2266 state_dict.set("CA", Object::Real(alpha_stroke));
2267 }
2268 if let Some(alpha_fill) = state.alpha_fill {
2269 state_dict.set("ca", Object::Real(alpha_fill));
2270 }
2271
2272 if let Some(line_width) = state.line_width {
2274 state_dict.set("LW", Object::Real(line_width));
2275 }
2276 if let Some(line_cap) = state.line_cap {
2277 state_dict.set("LC", Object::Integer(line_cap as i64));
2278 }
2279 if let Some(line_join) = state.line_join {
2280 state_dict.set("LJ", Object::Integer(line_join as i64));
2281 }
2282 if let Some(dash_pattern) = &state.dash_pattern {
2283 let dash_objects: Vec<Object> = dash_pattern
2284 .array
2285 .iter()
2286 .map(|&d| Object::Real(d))
2287 .collect();
2288 state_dict.set(
2289 "D",
2290 Object::Array(vec![
2291 Object::Array(dash_objects),
2292 Object::Real(dash_pattern.phase),
2293 ]),
2294 );
2295 }
2296
2297 extgstate_dict.set(name, Object::Dictionary(state_dict));
2298 }
2299 if !extgstate_dict.is_empty() {
2300 resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2301 }
2302 }
2303
2304 if let Some(preserved_res) = page.get_preserved_resources() {
2307 let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2309
2310 if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2312 let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2314
2315 preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2317 }
2318
2319 if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2323 let mut fonts_with_refs = crate::objects::Dictionary::new();
2324
2325 for (font_name, font_obj) in fonts.iter() {
2326 if let Object::Dictionary(font_dict) = font_obj {
2327 let updated_font = self.write_embedded_font_streams(font_dict)?;
2329 fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2330 } else {
2331 fonts_with_refs.set(font_name, font_obj.clone());
2333 }
2334 }
2335
2336 preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2338 }
2339
2340 for (key, value) in preserved_writer_dict.iter() {
2342 if let Some(Object::Dictionary(existing)) = resources.get(key) {
2344 if let Object::Dictionary(preserved_dict) = value {
2345 let mut merged = existing.clone();
2346 for (res_name, res_obj) in preserved_dict.iter() {
2348 if !merged.contains_key(res_name) {
2349 merged.set(res_name, res_obj.clone());
2350 }
2351 }
2352 resources.set(key, Object::Dictionary(merged));
2353 }
2354 } else {
2355 resources.set(key, value.clone());
2357 }
2358 }
2359 }
2360
2361 page_dict.set("Resources", Object::Dictionary(resources));
2362
2363 if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2365 let mut new_annots = Vec::new();
2366
2367 for annot in annots {
2368 if let Object::Dictionary(ref annot_dict) = annot {
2369 if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2370 if subtype == "Widget" {
2371 let widget_id = self.allocate_object_id();
2373 self.write_object(widget_id, annot.clone())?;
2374 new_annots.push(Object::Reference(widget_id));
2375
2376 if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2378 if let Some(Object::String(field_name)) = annot_dict.get("T") {
2379 self.field_widget_map
2380 .entry(field_name.clone())
2381 .or_default()
2382 .push(widget_id);
2383 self.field_id_map.insert(field_name.clone(), widget_id);
2384 self.form_field_ids.push(widget_id);
2385 }
2386 }
2387 continue;
2388 }
2389 }
2390 }
2391 new_annots.push(annot.clone());
2392 }
2393
2394 if !new_annots.is_empty() {
2395 page_dict.set("Annots", Object::Array(new_annots));
2396 }
2397 }
2398
2399 self.write_object(page_id, Object::Dictionary(page_dict))?;
2400 Ok(())
2401 }
2402}
2403
2404impl PdfWriter<BufWriter<std::fs::File>> {
2405 pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2406 let file = std::fs::File::create(path)?;
2407 let writer = BufWriter::new(file);
2408
2409 Ok(Self {
2410 writer,
2411 xref_positions: HashMap::new(),
2412 current_position: 0,
2413 next_object_id: 1,
2414 catalog_id: None,
2415 pages_id: None,
2416 info_id: None,
2417 field_widget_map: HashMap::new(),
2418 field_id_map: HashMap::new(),
2419 form_field_ids: Vec::new(),
2420 page_ids: Vec::new(),
2421 config: WriterConfig::default(),
2422 document_used_chars: None,
2423 buffered_objects: HashMap::new(),
2424 compressed_object_map: HashMap::new(),
2425 prev_xref_offset: None,
2426 base_pdf_size: None,
2427 })
2428 }
2429}
2430
2431impl<W: Write> PdfWriter<W> {
2432 fn write_embedded_font_streams(
2448 &mut self,
2449 font_dict: &crate::objects::Dictionary,
2450 ) -> Result<crate::objects::Dictionary> {
2451 let mut updated_font = font_dict.clone();
2452
2453 if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
2455 if subtype == "Type0" {
2456 if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
2458 let mut updated_descendants = Vec::new();
2459
2460 for descendant in descendants {
2461 match descendant {
2462 Object::Dictionary(cidfont) => {
2463 let updated_cidfont =
2465 self.write_cidfont_embedded_streams(cidfont)?;
2466 let cidfont_id = self.allocate_object_id();
2468 self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
2469 updated_descendants.push(Object::Reference(cidfont_id));
2471 }
2472 Object::Reference(_) => {
2473 updated_descendants.push(descendant.clone());
2475 }
2476 _ => {
2477 updated_descendants.push(descendant.clone());
2478 }
2479 }
2480 }
2481
2482 updated_font.set("DescendantFonts", Object::Array(updated_descendants));
2483 }
2484
2485 if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
2487 let tounicode_id = self.allocate_object_id();
2488 self.write_object(
2489 tounicode_id,
2490 Object::Stream(stream_dict.clone(), stream_data.clone()),
2491 )?;
2492 updated_font.set("ToUnicode", Object::Reference(tounicode_id));
2493 }
2494
2495 return Ok(updated_font);
2496 }
2497 }
2498
2499 if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
2502 let mut updated_descriptor = descriptor.clone();
2503 let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2504
2505 for key in &font_file_keys {
2507 if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2508 let stream_id = self.allocate_object_id();
2510 let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
2511 self.write_object(stream_id, stream_obj)?;
2512
2513 updated_descriptor.set(*key, Object::Reference(stream_id));
2515 }
2516 }
2518
2519 updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
2521 }
2522
2523 Ok(updated_font)
2524 }
2525
2526 fn write_cidfont_embedded_streams(
2528 &mut self,
2529 cidfont: &crate::objects::Dictionary,
2530 ) -> Result<crate::objects::Dictionary> {
2531 let mut updated_cidfont = cidfont.clone();
2532
2533 if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
2535 let mut updated_descriptor = descriptor.clone();
2536 let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2537
2538 for key in &font_file_keys {
2540 if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2541 let stream_id = self.allocate_object_id();
2542 self.write_object(
2543 stream_id,
2544 Object::Stream(stream_dict.clone(), stream_data.clone()),
2545 )?;
2546 updated_descriptor.set(*key, Object::Reference(stream_id));
2547 }
2548 }
2549
2550 let descriptor_id = self.allocate_object_id();
2552 self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
2553
2554 updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
2556 }
2557
2558 if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
2560 let map_id = self.allocate_object_id();
2561 self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
2562 updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
2563 }
2564
2565 Ok(updated_cidfont)
2566 }
2567
2568 fn allocate_object_id(&mut self) -> ObjectId {
2569 let id = ObjectId::new(self.next_object_id, 0);
2570 self.next_object_id += 1;
2571 id
2572 }
2573
2574 fn get_catalog_id(&self) -> Result<ObjectId> {
2576 self.catalog_id.ok_or_else(|| {
2577 PdfError::InvalidOperation(
2578 "catalog_id not initialized - write_document() must be called first".to_string(),
2579 )
2580 })
2581 }
2582
2583 fn get_pages_id(&self) -> Result<ObjectId> {
2585 self.pages_id.ok_or_else(|| {
2586 PdfError::InvalidOperation(
2587 "pages_id not initialized - write_document() must be called first".to_string(),
2588 )
2589 })
2590 }
2591
2592 fn get_info_id(&self) -> Result<ObjectId> {
2594 self.info_id.ok_or_else(|| {
2595 PdfError::InvalidOperation(
2596 "info_id not initialized - write_document() must be called first".to_string(),
2597 )
2598 })
2599 }
2600
2601 fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
2602 use crate::writer::ObjectStreamWriter;
2603
2604 if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
2606 let mut buffer = Vec::new();
2607 self.write_object_value_to_buffer(&object, &mut buffer)?;
2608 self.buffered_objects.insert(id, buffer);
2609 return Ok(());
2610 }
2611
2612 self.xref_positions.insert(id, self.current_position);
2614
2615 let header = format!("{} {} obj\n", id.number(), id.generation());
2617 self.write_bytes(header.as_bytes())?;
2618
2619 self.write_object_value(&object)?;
2620
2621 self.write_bytes(b"\nendobj\n")?;
2622 Ok(())
2623 }
2624
2625 fn write_object_value(&mut self, object: &Object) -> Result<()> {
2626 match object {
2627 Object::Null => self.write_bytes(b"null")?,
2628 Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
2629 Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
2630 Object::Real(f) => self.write_bytes(
2631 format!("{f:.6}")
2632 .trim_end_matches('0')
2633 .trim_end_matches('.')
2634 .as_bytes(),
2635 )?,
2636 Object::String(s) => {
2637 self.write_bytes(b"(")?;
2638 self.write_bytes(s.as_bytes())?;
2639 self.write_bytes(b")")?;
2640 }
2641 Object::Name(n) => {
2642 self.write_bytes(b"/")?;
2643 self.write_bytes(n.as_bytes())?;
2644 }
2645 Object::Array(arr) => {
2646 self.write_bytes(b"[")?;
2647 for (i, obj) in arr.iter().enumerate() {
2648 if i > 0 {
2649 self.write_bytes(b" ")?;
2650 }
2651 self.write_object_value(obj)?;
2652 }
2653 self.write_bytes(b"]")?;
2654 }
2655 Object::Dictionary(dict) => {
2656 self.write_bytes(b"<<")?;
2657 for (key, value) in dict.entries() {
2658 self.write_bytes(b"\n/")?;
2659 self.write_bytes(key.as_bytes())?;
2660 self.write_bytes(b" ")?;
2661 self.write_object_value(value)?;
2662 }
2663 self.write_bytes(b"\n>>")?;
2664 }
2665 Object::Stream(dict, data) => {
2666 let mut corrected_dict = dict.clone();
2669 corrected_dict.set("Length", Object::Integer(data.len() as i64));
2670
2671 self.write_object_value(&Object::Dictionary(corrected_dict))?;
2672 self.write_bytes(b"\nstream\n")?;
2673 self.write_bytes(data)?;
2674 self.write_bytes(b"\nendstream")?;
2675 }
2676 Object::Reference(id) => {
2677 let ref_str = format!("{} {} R", id.number(), id.generation());
2678 self.write_bytes(ref_str.as_bytes())?;
2679 }
2680 }
2681 Ok(())
2682 }
2683
2684 fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
2686 match object {
2687 Object::Null => buffer.extend_from_slice(b"null"),
2688 Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
2689 Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
2690 Object::Real(f) => buffer.extend_from_slice(
2691 format!("{f:.6}")
2692 .trim_end_matches('0')
2693 .trim_end_matches('.')
2694 .as_bytes(),
2695 ),
2696 Object::String(s) => {
2697 buffer.push(b'(');
2698 buffer.extend_from_slice(s.as_bytes());
2699 buffer.push(b')');
2700 }
2701 Object::Name(n) => {
2702 buffer.push(b'/');
2703 buffer.extend_from_slice(n.as_bytes());
2704 }
2705 Object::Array(arr) => {
2706 buffer.push(b'[');
2707 for (i, obj) in arr.iter().enumerate() {
2708 if i > 0 {
2709 buffer.push(b' ');
2710 }
2711 self.write_object_value_to_buffer(obj, buffer)?;
2712 }
2713 buffer.push(b']');
2714 }
2715 Object::Dictionary(dict) => {
2716 buffer.extend_from_slice(b"<<");
2717 for (key, value) in dict.entries() {
2718 buffer.extend_from_slice(b"\n/");
2719 buffer.extend_from_slice(key.as_bytes());
2720 buffer.push(b' ');
2721 self.write_object_value_to_buffer(value, buffer)?;
2722 }
2723 buffer.extend_from_slice(b"\n>>");
2724 }
2725 Object::Stream(_, _) => {
2726 return Err(crate::error::PdfError::ObjectStreamError(
2728 "Cannot compress stream objects in object streams".to_string(),
2729 ));
2730 }
2731 Object::Reference(id) => {
2732 let ref_str = format!("{} {} R", id.number(), id.generation());
2733 buffer.extend_from_slice(ref_str.as_bytes());
2734 }
2735 }
2736 Ok(())
2737 }
2738
2739 fn flush_object_streams(&mut self) -> Result<()> {
2741 if self.buffered_objects.is_empty() {
2742 return Ok(());
2743 }
2744
2745 let config = ObjectStreamConfig {
2747 max_objects_per_stream: 100,
2748 compression_level: 6,
2749 enabled: true,
2750 };
2751 let mut os_writer = ObjectStreamWriter::new(config);
2752
2753 let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
2755 buffered.sort_by_key(|(id, _)| id.number());
2756
2757 for (id, data) in buffered {
2759 os_writer.add_object(*id, data.clone())?;
2760 }
2761
2762 let streams = os_writer.finalize()?;
2764
2765 for mut stream in streams {
2767 let stream_id = stream.stream_id;
2768
2769 let compressed_data = stream.generate_stream_data(6)?;
2771
2772 let dict = stream.generate_dictionary(&compressed_data);
2774
2775 for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
2777 self.compressed_object_map
2778 .insert(*obj_id, (stream_id, index as u32));
2779 }
2780
2781 self.xref_positions.insert(stream_id, self.current_position);
2783
2784 let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
2785 self.write_bytes(header.as_bytes())?;
2786
2787 self.write_object_value(&Object::Dictionary(dict))?;
2788
2789 self.write_bytes(b"\nstream\n")?;
2790 self.write_bytes(&compressed_data)?;
2791 self.write_bytes(b"\nendstream\nendobj\n")?;
2792 }
2793
2794 Ok(())
2795 }
2796
2797 fn write_xref(&mut self) -> Result<()> {
2798 self.write_bytes(b"xref\n")?;
2799
2800 let mut entries: Vec<_> = self
2802 .xref_positions
2803 .iter()
2804 .map(|(id, pos)| (*id, *pos))
2805 .collect();
2806 entries.sort_by_key(|(id, _)| id.number());
2807
2808 let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
2810
2811 self.write_bytes(b"0 ")?;
2814 self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
2815 self.write_bytes(b"\n")?;
2816
2817 self.write_bytes(b"0000000000 65535 f \n")?;
2819
2820 for obj_num in 1..=max_obj_num {
2823 let _obj_id = ObjectId::new(obj_num, 0);
2824 if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
2825 let entry = format!("{:010} {:05} n \n", position, 0);
2826 self.write_bytes(entry.as_bytes())?;
2827 } else {
2828 self.write_bytes(b"0000000000 00000 f \n")?;
2830 }
2831 }
2832
2833 Ok(())
2834 }
2835
2836 fn write_xref_stream(&mut self) -> Result<()> {
2837 let catalog_id = self.get_catalog_id()?;
2838 let info_id = self.get_info_id()?;
2839
2840 let xref_stream_id = self.allocate_object_id();
2842 let xref_position = self.current_position;
2843
2844 let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
2846 xref_writer.set_trailer_info(catalog_id, info_id);
2847
2848 xref_writer.add_free_entry(0, 65535);
2850
2851 let mut entries: Vec<_> = self
2853 .xref_positions
2854 .iter()
2855 .map(|(id, pos)| (*id, *pos))
2856 .collect();
2857 entries.sort_by_key(|(id, _)| id.number());
2858
2859 let max_obj_num = entries
2861 .iter()
2862 .map(|(id, _)| id.number())
2863 .max()
2864 .unwrap_or(0)
2865 .max(xref_stream_id.number());
2866
2867 for obj_num in 1..=max_obj_num {
2869 let obj_id = ObjectId::new(obj_num, 0);
2870
2871 if obj_num == xref_stream_id.number() {
2872 xref_writer.add_in_use_entry(xref_position, 0);
2874 } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
2875 xref_writer.add_compressed_entry(stream_id.number(), *index);
2877 } else if let Some((id, position)) =
2878 entries.iter().find(|(id, _)| id.number() == obj_num)
2879 {
2880 xref_writer.add_in_use_entry(*position, id.generation());
2882 } else {
2883 xref_writer.add_free_entry(0, 0);
2885 }
2886 }
2887
2888 self.xref_positions.insert(xref_stream_id, xref_position);
2890
2891 self.write_bytes(
2893 format!(
2894 "{} {} obj\n",
2895 xref_stream_id.number(),
2896 xref_stream_id.generation()
2897 )
2898 .as_bytes(),
2899 )?;
2900
2901 let uncompressed_data = xref_writer.encode_entries();
2903 let final_data = if self.config.compress_streams {
2904 crate::compression::compress(&uncompressed_data)?
2905 } else {
2906 uncompressed_data
2907 };
2908
2909 let mut dict = xref_writer.create_dictionary(None);
2911 dict.set("Length", Object::Integer(final_data.len() as i64));
2912
2913 if self.config.compress_streams {
2915 dict.set("Filter", Object::Name("FlateDecode".to_string()));
2916 }
2917 self.write_bytes(b"<<")?;
2918 for (key, value) in dict.iter() {
2919 self.write_bytes(b"\n/")?;
2920 self.write_bytes(key.as_bytes())?;
2921 self.write_bytes(b" ")?;
2922 self.write_object_value(value)?;
2923 }
2924 self.write_bytes(b"\n>>\n")?;
2925
2926 self.write_bytes(b"stream\n")?;
2928 self.write_bytes(&final_data)?;
2929 self.write_bytes(b"\nendstream\n")?;
2930 self.write_bytes(b"endobj\n")?;
2931
2932 self.write_bytes(b"\nstartxref\n")?;
2934 self.write_bytes(xref_position.to_string().as_bytes())?;
2935 self.write_bytes(b"\n%%EOF\n")?;
2936
2937 Ok(())
2938 }
2939
2940 fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
2941 let catalog_id = self.get_catalog_id()?;
2942 let info_id = self.get_info_id()?;
2943 let max_obj_num = self
2945 .xref_positions
2946 .keys()
2947 .map(|id| id.number())
2948 .max()
2949 .unwrap_or(0);
2950
2951 let mut trailer = Dictionary::new();
2952 trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
2953 trailer.set("Root", Object::Reference(catalog_id));
2954 trailer.set("Info", Object::Reference(info_id));
2955
2956 if let Some(prev_xref) = self.prev_xref_offset {
2958 trailer.set("Prev", Object::Integer(prev_xref as i64));
2959 }
2960
2961 self.write_bytes(b"trailer\n")?;
2962 self.write_object_value(&Object::Dictionary(trailer))?;
2963 self.write_bytes(b"\nstartxref\n")?;
2964 self.write_bytes(xref_position.to_string().as_bytes())?;
2965 self.write_bytes(b"\n%%EOF\n")?;
2966
2967 Ok(())
2968 }
2969
2970 fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
2971 self.writer.write_all(data)?;
2972 self.current_position += data.len() as u64;
2973 Ok(())
2974 }
2975
2976 #[allow(dead_code)]
2977 fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
2978 let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
2980 if rect_array.len() >= 4 {
2981 if let (
2982 Some(Object::Real(x1)),
2983 Some(Object::Real(y1)),
2984 Some(Object::Real(x2)),
2985 Some(Object::Real(y2)),
2986 ) = (
2987 rect_array.first(),
2988 rect_array.get(1),
2989 rect_array.get(2),
2990 rect_array.get(3),
2991 ) {
2992 (*x1, *y1, *x2, *y2)
2993 } else {
2994 (0.0, 0.0, 100.0, 20.0) }
2996 } else {
2997 (0.0, 0.0, 100.0, 20.0) }
2999 } else {
3000 (0.0, 0.0, 100.0, 20.0) };
3002
3003 let width = rect.2 - rect.0;
3004 let height = rect.3 - rect.1;
3005
3006 let mut content = String::new();
3008
3009 content.push_str("q\n");
3011
3012 content.push_str("0 0 0 RG\n"); content.push_str("1 w\n"); content.push_str(&format!("0 0 {width} {height} re\n"));
3018 content.push_str("S\n"); content.push_str("1 1 1 rg\n"); content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3023 content.push_str("f\n"); content.push_str("Q\n");
3027
3028 let mut stream_dict = Dictionary::new();
3030 stream_dict.set("Type", Object::Name("XObject".to_string()));
3031 stream_dict.set("Subtype", Object::Name("Form".to_string()));
3032 stream_dict.set(
3033 "BBox",
3034 Object::Array(vec![
3035 Object::Real(0.0),
3036 Object::Real(0.0),
3037 Object::Real(width),
3038 Object::Real(height),
3039 ]),
3040 );
3041 stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3042 stream_dict.set("Length", Object::Integer(content.len() as i64));
3043
3044 let stream_id = self.allocate_object_id();
3046 self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3047
3048 Ok(stream_id)
3049 }
3050
3051 #[allow(dead_code)]
3052 fn create_field_appearance_stream(
3053 &mut self,
3054 field_dict: &Dictionary,
3055 widget: &crate::forms::Widget,
3056 ) -> Result<ObjectId> {
3057 let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3058 let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3059
3060 let mut content = String::new();
3062
3063 content.push_str("q\n");
3065
3066 if let Some(bg_color) = &widget.appearance.background_color {
3068 match bg_color {
3069 crate::graphics::Color::Gray(g) => {
3070 content.push_str(&format!("{g} g\n"));
3071 }
3072 crate::graphics::Color::Rgb(r, g, b) => {
3073 content.push_str(&format!("{r} {g} {b} rg\n"));
3074 }
3075 crate::graphics::Color::Cmyk(c, m, y, k) => {
3076 content.push_str(&format!("{c} {m} {y} {k} k\n"));
3077 }
3078 }
3079 content.push_str(&format!("0 0 {width} {height} re\n"));
3080 content.push_str("f\n");
3081 }
3082
3083 if let Some(border_color) = &widget.appearance.border_color {
3085 match border_color {
3086 crate::graphics::Color::Gray(g) => {
3087 content.push_str(&format!("{g} G\n"));
3088 }
3089 crate::graphics::Color::Rgb(r, g, b) => {
3090 content.push_str(&format!("{r} {g} {b} RG\n"));
3091 }
3092 crate::graphics::Color::Cmyk(c, m, y, k) => {
3093 content.push_str(&format!("{c} {m} {y} {k} K\n"));
3094 }
3095 }
3096 content.push_str(&format!("{} w\n", widget.appearance.border_width));
3097 content.push_str(&format!("0 0 {width} {height} re\n"));
3098 content.push_str("S\n");
3099 }
3100
3101 if let Some(Object::Name(ft)) = field_dict.get("FT") {
3103 if ft == "Btn" {
3104 if let Some(Object::Name(v)) = field_dict.get("V") {
3105 if v == "Yes" {
3106 content.push_str("0 0 0 RG\n"); content.push_str("2 w\n");
3109 let margin = width * 0.2;
3110 content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3111 content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3112 content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3113 content.push_str("S\n");
3114 }
3115 }
3116 }
3117 }
3118
3119 content.push_str("Q\n");
3121
3122 let mut stream_dict = Dictionary::new();
3124 stream_dict.set("Type", Object::Name("XObject".to_string()));
3125 stream_dict.set("Subtype", Object::Name("Form".to_string()));
3126 stream_dict.set(
3127 "BBox",
3128 Object::Array(vec![
3129 Object::Real(0.0),
3130 Object::Real(0.0),
3131 Object::Real(width),
3132 Object::Real(height),
3133 ]),
3134 );
3135 stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3136 stream_dict.set("Length", Object::Integer(content.len() as i64));
3137
3138 let stream_id = self.allocate_object_id();
3140 self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3141
3142 Ok(stream_id)
3143 }
3144}
3145
3146fn format_pdf_date(date: DateTime<Utc>) -> String {
3148 let formatted = date.format("D:%Y%m%d%H%M%S");
3151
3152 format!("{formatted}+00'00")
3154}
3155
3156#[cfg(test)]
3157mod tests;
3158
3159#[cfg(test)]
3160mod rigorous_tests;