1use crate::document::Document;
2use crate::error::{PdfError, Result};
3use crate::objects::{Dictionary, Object, ObjectId};
4use crate::text::fonts::embedding::CjkFontType;
5use crate::writer::{ObjectStreamConfig, ObjectStreamWriter, XRefStreamWriter};
6use chrono::{DateTime, Utc};
7use std::collections::HashMap;
8use std::io::{BufWriter, Write};
9use std::path::Path;
10
11#[derive(Debug, Clone)]
13pub struct WriterConfig {
14 pub use_xref_streams: bool,
16 pub use_object_streams: bool,
18 pub pdf_version: String,
20 pub compress_streams: bool,
22 pub incremental_update: bool,
24}
25
26impl Default for WriterConfig {
27 fn default() -> Self {
28 Self {
29 use_xref_streams: false,
30 use_object_streams: false,
31 pdf_version: "1.7".to_string(),
32 compress_streams: true,
33 incremental_update: false,
34 }
35 }
36}
37
38impl WriterConfig {
39 pub fn modern() -> Self {
41 Self {
42 use_xref_streams: true,
43 use_object_streams: true,
44 pdf_version: "1.5".to_string(),
45 compress_streams: true,
46 incremental_update: false,
47 }
48 }
49
50 pub fn legacy() -> Self {
52 Self {
53 use_xref_streams: false,
54 use_object_streams: false,
55 pdf_version: "1.4".to_string(),
56 compress_streams: true,
57 incremental_update: false,
58 }
59 }
60
61 pub fn incremental() -> Self {
63 Self {
64 use_xref_streams: false,
65 use_object_streams: false,
66 pdf_version: "1.4".to_string(),
67 compress_streams: true,
68 incremental_update: true,
69 }
70 }
71}
72
73pub struct PdfWriter<W: Write> {
74 writer: W,
75 xref_positions: HashMap<ObjectId, u64>,
76 current_position: u64,
77 next_object_id: u32,
78 catalog_id: Option<ObjectId>,
80 pages_id: Option<ObjectId>,
81 info_id: Option<ObjectId>,
82 #[allow(dead_code)]
84 field_widget_map: HashMap<String, Vec<ObjectId>>, #[allow(dead_code)]
86 field_id_map: HashMap<String, ObjectId>, form_field_ids: Vec<ObjectId>, page_ids: Vec<ObjectId>, config: WriterConfig,
91 document_used_chars: Option<std::collections::HashSet<char>>,
93 buffered_objects: HashMap<ObjectId, Vec<u8>>,
95 compressed_object_map: HashMap<ObjectId, (ObjectId, u32)>, prev_xref_offset: Option<u64>,
98 base_pdf_size: Option<u64>,
99}
100
101impl<W: Write> PdfWriter<W> {
102 pub fn new_with_writer(writer: W) -> Self {
103 Self::with_config(writer, WriterConfig::default())
104 }
105
106 pub fn with_config(writer: W, config: WriterConfig) -> Self {
107 Self {
108 writer,
109 xref_positions: HashMap::new(),
110 current_position: 0,
111 next_object_id: 1, catalog_id: None,
113 pages_id: None,
114 info_id: None,
115 field_widget_map: HashMap::new(),
116 field_id_map: HashMap::new(),
117 form_field_ids: Vec::new(),
118 page_ids: Vec::new(),
119 config,
120 document_used_chars: None,
121 buffered_objects: HashMap::new(),
122 compressed_object_map: HashMap::new(),
123 prev_xref_offset: None,
124 base_pdf_size: None,
125 }
126 }
127
128 pub fn write_document(&mut self, document: &mut Document) -> Result<()> {
129 if !document.used_characters.is_empty() {
131 self.document_used_chars = Some(document.used_characters.clone());
132 }
133
134 self.write_header()?;
135
136 self.catalog_id = Some(self.allocate_object_id());
138 self.pages_id = Some(self.allocate_object_id());
139 self.info_id = Some(self.allocate_object_id());
140
141 let font_refs = self.write_fonts(document)?;
143
144 self.write_pages(document, &font_refs)?;
146
147 self.write_form_fields(document)?;
149
150 self.write_catalog(document)?;
152
153 self.write_info(document)?;
155
156 if self.config.use_object_streams {
158 self.flush_object_streams()?;
159 }
160
161 let xref_position = self.current_position;
163 if self.config.use_xref_streams {
164 self.write_xref_stream()?;
165 } else {
166 self.write_xref()?;
167 }
168
169 if !self.config.use_xref_streams {
171 self.write_trailer(xref_position)?;
172 }
173
174 if let Ok(()) = self.writer.flush() {
175 }
177 Ok(())
178 }
179
180 pub fn write_incremental_update(
214 &mut self,
215 base_pdf_path: impl AsRef<std::path::Path>,
216 document: &mut Document,
217 ) -> Result<()> {
218 use std::io::{BufReader, Read, Seek, SeekFrom};
219
220 let base_pdf_file = std::fs::File::open(base_pdf_path.as_ref())?;
222 let mut pdf_reader = crate::parser::PdfReader::new(BufReader::new(base_pdf_file))?;
223
224 let base_catalog = pdf_reader.catalog()?;
226
227 let (base_pages_id, base_pages_gen) = base_catalog
229 .get("Pages")
230 .and_then(|obj| {
231 if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
232 Some((*id, *gen))
233 } else {
234 None
235 }
236 })
237 .ok_or_else(|| {
238 crate::error::PdfError::InvalidStructure(
239 "Base PDF catalog missing /Pages reference".to_string(),
240 )
241 })?;
242
243 let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
245 let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
246 base_pages_obj
247 {
248 dict.get("Kids")
249 .and_then(|obj| {
250 if let crate::parser::objects::PdfObject::Array(arr) = obj {
251 Some(
254 arr.0
255 .iter()
256 .filter_map(|item| {
257 if let crate::parser::objects::PdfObject::Reference(id, gen) =
258 item
259 {
260 Some(crate::objects::Object::Reference(
261 crate::objects::ObjectId::new(*id, *gen),
262 ))
263 } else {
264 None
265 }
266 })
267 .collect::<Vec<_>>(),
268 )
269 } else {
270 None
271 }
272 })
273 .unwrap_or_default()
274 } else {
275 Vec::new()
276 };
277
278 let base_page_count = base_pages_kids.len();
280
281 let base_pdf = std::fs::File::open(base_pdf_path.as_ref())?;
283 let mut base_reader = BufReader::new(base_pdf);
284
285 base_reader.seek(SeekFrom::End(-100))?;
287 let mut end_buffer = vec![0u8; 100];
288 let bytes_read = base_reader.read(&mut end_buffer)?;
289 end_buffer.truncate(bytes_read);
290
291 let end_str = String::from_utf8_lossy(&end_buffer);
292 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
293 let after_startxref = &end_str[startxref_pos + 9..];
294
295 let number_str: String = after_startxref
296 .chars()
297 .skip_while(|c| c.is_whitespace())
298 .take_while(|c| c.is_ascii_digit())
299 .collect();
300
301 number_str.parse::<u64>().map_err(|_| {
302 crate::error::PdfError::InvalidStructure(
303 "Could not parse startxref offset".to_string(),
304 )
305 })?
306 } else {
307 return Err(crate::error::PdfError::InvalidStructure(
308 "startxref not found in base PDF".to_string(),
309 ));
310 };
311
312 base_reader.seek(SeekFrom::Start(0))?;
314 let base_size = std::io::copy(&mut base_reader, &mut self.writer)? as u64;
315
316 self.prev_xref_offset = Some(prev_xref);
318 self.base_pdf_size = Some(base_size);
319 self.current_position = base_size;
320
321 if !document.used_characters.is_empty() {
323 self.document_used_chars = Some(document.used_characters.clone());
324 }
325
326 self.catalog_id = Some(self.allocate_object_id());
328 self.pages_id = Some(self.allocate_object_id());
329 self.info_id = Some(self.allocate_object_id());
330
331 let font_refs = self.write_fonts(document)?;
333
334 self.write_pages(document, &font_refs)?;
336
337 self.write_form_fields(document)?;
339
340 let catalog_id = self.get_catalog_id()?;
342 let new_pages_id = self.get_pages_id()?;
343
344 let mut catalog = crate::objects::Dictionary::new();
345 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
346 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
347
348 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
353
354 let mut all_pages_kids = base_pages_kids;
356
357 for page_id in &self.page_ids {
359 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
360 }
361
362 let mut pages_dict = crate::objects::Dictionary::new();
363 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
364 pages_dict.set("Kids", crate::objects::Object::Array(all_pages_kids));
365 pages_dict.set(
366 "Count",
367 crate::objects::Object::Integer((base_page_count + self.page_ids.len()) as i64),
368 );
369
370 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
371
372 self.write_info(document)?;
374
375 let xref_position = self.current_position;
377 self.write_xref()?;
378
379 self.write_trailer(xref_position)?;
381
382 self.writer.flush()?;
383 Ok(())
384 }
385
386 pub fn write_incremental_with_page_replacement(
452 &mut self,
453 base_pdf_path: impl AsRef<std::path::Path>,
454 document: &mut Document,
455 ) -> Result<()> {
456 use std::io::Cursor;
457
458 let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
460 let base_size = base_pdf_bytes.len() as u64;
461
462 let mut pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
464
465 let base_catalog = pdf_reader.catalog()?;
466
467 let (base_pages_id, base_pages_gen) = base_catalog
468 .get("Pages")
469 .and_then(|obj| {
470 if let crate::parser::objects::PdfObject::Reference(id, gen) = obj {
471 Some((*id, *gen))
472 } else {
473 None
474 }
475 })
476 .ok_or_else(|| {
477 crate::error::PdfError::InvalidStructure(
478 "Base PDF catalog missing /Pages reference".to_string(),
479 )
480 })?;
481
482 let base_pages_obj = pdf_reader.get_object(base_pages_id, base_pages_gen)?;
483 let base_pages_kids = if let crate::parser::objects::PdfObject::Dictionary(dict) =
484 base_pages_obj
485 {
486 dict.get("Kids")
487 .and_then(|obj| {
488 if let crate::parser::objects::PdfObject::Array(arr) = obj {
489 Some(
490 arr.0
491 .iter()
492 .filter_map(|item| {
493 if let crate::parser::objects::PdfObject::Reference(id, gen) =
494 item
495 {
496 Some(crate::objects::Object::Reference(
497 crate::objects::ObjectId::new(*id, *gen),
498 ))
499 } else {
500 None
501 }
502 })
503 .collect::<Vec<_>>(),
504 )
505 } else {
506 None
507 }
508 })
509 .unwrap_or_default()
510 } else {
511 Vec::new()
512 };
513
514 let base_page_count = base_pages_kids.len();
515
516 let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
518 let end_bytes = &base_pdf_bytes[start_search..];
519 let end_str = String::from_utf8_lossy(end_bytes);
520
521 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
522 let after_startxref = &end_str[startxref_pos + 9..];
523 let number_str: String = after_startxref
524 .chars()
525 .skip_while(|c| c.is_whitespace())
526 .take_while(|c| c.is_ascii_digit())
527 .collect();
528
529 number_str.parse::<u64>().map_err(|_| {
530 crate::error::PdfError::InvalidStructure(
531 "Could not parse startxref offset".to_string(),
532 )
533 })?
534 } else {
535 return Err(crate::error::PdfError::InvalidStructure(
536 "startxref not found in base PDF".to_string(),
537 ));
538 };
539
540 self.writer.write_all(&base_pdf_bytes)?;
542
543 self.prev_xref_offset = Some(prev_xref);
544 self.base_pdf_size = Some(base_size);
545 self.current_position = base_size;
546
547 if !document.used_characters.is_empty() {
549 self.document_used_chars = Some(document.used_characters.clone());
550 }
551
552 self.catalog_id = Some(self.allocate_object_id());
553 self.pages_id = Some(self.allocate_object_id());
554 self.info_id = Some(self.allocate_object_id());
555
556 let font_refs = self.write_fonts(document)?;
557 self.write_pages(document, &font_refs)?;
558 self.write_form_fields(document)?;
559
560 let catalog_id = self.get_catalog_id()?;
562 let new_pages_id = self.get_pages_id()?;
563
564 let mut catalog = crate::objects::Dictionary::new();
565 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
566 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
567 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
568
569 let mut all_pages_kids = Vec::new();
571 let replacement_count = document.pages.len();
572
573 for page_id in &self.page_ids {
575 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
576 }
577
578 if replacement_count < base_page_count {
580 for i in replacement_count..base_page_count {
581 if let Some(page_ref) = base_pages_kids.get(i) {
582 all_pages_kids.push(page_ref.clone());
583 }
584 }
585 }
586
587 let mut pages_dict = crate::objects::Dictionary::new();
588 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
589 pages_dict.set(
590 "Kids",
591 crate::objects::Object::Array(all_pages_kids.clone()),
592 );
593 pages_dict.set(
594 "Count",
595 crate::objects::Object::Integer(all_pages_kids.len() as i64),
596 );
597
598 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
599 self.write_info(document)?;
600
601 let xref_position = self.current_position;
602 self.write_xref()?;
603 self.write_trailer(xref_position)?;
604
605 self.writer.flush()?;
606 Ok(())
607 }
608
609 pub fn write_incremental_with_overlay<P: AsRef<std::path::Path>>(
657 &mut self,
658 base_pdf_path: P,
659 mut overlay_fn: impl FnMut(&mut crate::Page) -> Result<()>,
660 ) -> Result<()> {
661 use std::io::Cursor;
662
663 let base_pdf_bytes = std::fs::read(base_pdf_path.as_ref())?;
665 let base_size = base_pdf_bytes.len() as u64;
666
667 let pdf_reader = crate::parser::PdfReader::new(Cursor::new(&base_pdf_bytes))?;
669 let parsed_doc = crate::parser::PdfDocument::new(pdf_reader);
670
671 let page_count = parsed_doc.page_count()?;
673
674 let start_search = if base_size > 100 { base_size - 100 } else { 0 } as usize;
676 let end_bytes = &base_pdf_bytes[start_search..];
677 let end_str = String::from_utf8_lossy(end_bytes);
678
679 let prev_xref = if let Some(startxref_pos) = end_str.find("startxref") {
680 let after_startxref = &end_str[startxref_pos + 9..];
681 let number_str: String = after_startxref
682 .chars()
683 .skip_while(|c| c.is_whitespace())
684 .take_while(|c| c.is_ascii_digit())
685 .collect();
686
687 number_str.parse::<u64>().map_err(|_| {
688 crate::error::PdfError::InvalidStructure(
689 "Could not parse startxref offset".to_string(),
690 )
691 })?
692 } else {
693 return Err(crate::error::PdfError::InvalidStructure(
694 "startxref not found in base PDF".to_string(),
695 ));
696 };
697
698 self.writer.write_all(&base_pdf_bytes)?;
700
701 self.prev_xref_offset = Some(prev_xref);
702 self.base_pdf_size = Some(base_size);
703 self.current_position = base_size;
704
705 let mut temp_doc = crate::Document::new();
707
708 for page_idx in 0..page_count {
709 let parsed_page = parsed_doc.get_page(page_idx)?;
711 let mut writable_page =
712 crate::Page::from_parsed_with_content(&parsed_page, &parsed_doc)?;
713
714 overlay_fn(&mut writable_page)?;
716
717 temp_doc.add_page(writable_page);
719 }
720
721 if !temp_doc.used_characters.is_empty() {
724 self.document_used_chars = Some(temp_doc.used_characters.clone());
725 }
726
727 self.catalog_id = Some(self.allocate_object_id());
728 self.pages_id = Some(self.allocate_object_id());
729 self.info_id = Some(self.allocate_object_id());
730
731 let font_refs = self.write_fonts(&temp_doc)?;
732 self.write_pages(&temp_doc, &font_refs)?;
733 self.write_form_fields(&mut temp_doc)?;
734
735 let catalog_id = self.get_catalog_id()?;
737 let new_pages_id = self.get_pages_id()?;
738
739 let mut catalog = crate::objects::Dictionary::new();
740 catalog.set("Type", crate::objects::Object::Name("Catalog".to_string()));
741 catalog.set("Pages", crate::objects::Object::Reference(new_pages_id));
742 self.write_object(catalog_id, crate::objects::Object::Dictionary(catalog))?;
743
744 let mut all_pages_kids = Vec::new();
746 for page_id in &self.page_ids {
747 all_pages_kids.push(crate::objects::Object::Reference(*page_id));
748 }
749
750 let mut pages_dict = crate::objects::Dictionary::new();
751 pages_dict.set("Type", crate::objects::Object::Name("Pages".to_string()));
752 pages_dict.set(
753 "Kids",
754 crate::objects::Object::Array(all_pages_kids.clone()),
755 );
756 pages_dict.set(
757 "Count",
758 crate::objects::Object::Integer(all_pages_kids.len() as i64),
759 );
760
761 self.write_object(new_pages_id, crate::objects::Object::Dictionary(pages_dict))?;
762 self.write_info(&temp_doc)?;
763
764 let xref_position = self.current_position;
765 self.write_xref()?;
766 self.write_trailer(xref_position)?;
767
768 self.writer.flush()?;
769 Ok(())
770 }
771
772 fn write_header(&mut self) -> Result<()> {
773 let header = format!("%PDF-{}\n", self.config.pdf_version);
774 self.write_bytes(header.as_bytes())?;
775 self.write_bytes(&[b'%', 0xE2, 0xE3, 0xCF, 0xD3, b'\n'])?;
777 Ok(())
778 }
779
780 fn convert_pdf_objects_dict_to_writer(
783 &self,
784 pdf_dict: &crate::pdf_objects::Dictionary,
785 ) -> crate::objects::Dictionary {
786 let mut writer_dict = crate::objects::Dictionary::new();
787
788 for (key, value) in pdf_dict.iter() {
789 let writer_obj = self.convert_pdf_object_to_writer(value);
790 writer_dict.set(key.as_str(), writer_obj);
791 }
792
793 writer_dict
794 }
795
796 fn convert_pdf_object_to_writer(
797 &self,
798 obj: &crate::pdf_objects::Object,
799 ) -> crate::objects::Object {
800 use crate::objects::Object as WriterObj;
801 use crate::pdf_objects::Object as PdfObj;
802
803 match obj {
804 PdfObj::Null => WriterObj::Null,
805 PdfObj::Boolean(b) => WriterObj::Boolean(*b),
806 PdfObj::Integer(i) => WriterObj::Integer(*i),
807 PdfObj::Real(f) => WriterObj::Real(*f),
808 PdfObj::String(s) => {
809 WriterObj::String(String::from_utf8_lossy(s.as_bytes()).to_string())
810 }
811 PdfObj::Name(n) => WriterObj::Name(n.as_str().to_string()),
812 PdfObj::Array(arr) => {
813 let items: Vec<WriterObj> = arr
814 .iter()
815 .map(|item| self.convert_pdf_object_to_writer(item))
816 .collect();
817 WriterObj::Array(items)
818 }
819 PdfObj::Dictionary(dict) => {
820 WriterObj::Dictionary(self.convert_pdf_objects_dict_to_writer(dict))
821 }
822 PdfObj::Stream(stream) => {
823 let dict = self.convert_pdf_objects_dict_to_writer(&stream.dict);
824 WriterObj::Stream(dict, stream.data.clone())
825 }
826 PdfObj::Reference(id) => {
827 WriterObj::Reference(crate::objects::ObjectId::new(id.number(), id.generation()))
828 }
829 }
830 }
831
832 fn write_catalog(&mut self, document: &mut Document) -> Result<()> {
833 let catalog_id = self.get_catalog_id()?;
834 let pages_id = self.get_pages_id()?;
835
836 let mut catalog = Dictionary::new();
837 catalog.set("Type", Object::Name("Catalog".to_string()));
838 catalog.set("Pages", Object::Reference(pages_id));
839
840 if let Some(_form_manager) = &document.form_manager {
843 if document.acro_form.is_none() {
845 document.acro_form = Some(crate::forms::AcroForm::new());
846 }
847 }
848
849 if let Some(acro_form) = &document.acro_form {
851 let acro_form_id = self.allocate_object_id();
853
854 self.write_object(acro_form_id, Object::Dictionary(acro_form.to_dict()))?;
856
857 catalog.set("AcroForm", Object::Reference(acro_form_id));
859 }
860
861 if let Some(outline_tree) = &document.outline {
863 if !outline_tree.items.is_empty() {
864 let outline_root_id = self.write_outline_tree(outline_tree)?;
865 catalog.set("Outlines", Object::Reference(outline_root_id));
866 }
867 }
868
869 if let Some(struct_tree) = &document.struct_tree {
871 if !struct_tree.is_empty() {
872 let struct_tree_root_id = self.write_struct_tree(struct_tree)?;
873 catalog.set("StructTreeRoot", Object::Reference(struct_tree_root_id));
874 catalog.set("MarkInfo", {
876 let mut mark_info = Dictionary::new();
877 mark_info.set("Marked", Object::Boolean(true));
878 Object::Dictionary(mark_info)
879 });
880 }
881 }
882
883 let xmp_metadata = document.create_xmp_metadata();
886 let xmp_packet = xmp_metadata.to_xmp_packet();
887 let metadata_id = self.allocate_object_id();
888
889 let mut metadata_dict = Dictionary::new();
891 metadata_dict.set("Type", Object::Name("Metadata".to_string()));
892 metadata_dict.set("Subtype", Object::Name("XML".to_string()));
893 metadata_dict.set("Length", Object::Integer(xmp_packet.len() as i64));
894
895 self.write_object(
897 metadata_id,
898 Object::Stream(metadata_dict, xmp_packet.into_bytes()),
899 )?;
900
901 catalog.set("Metadata", Object::Reference(metadata_id));
903
904 self.write_object(catalog_id, Object::Dictionary(catalog))?;
905 Ok(())
906 }
907
908 fn write_page_content(&mut self, content_id: ObjectId, page: &crate::page::Page) -> Result<()> {
909 let mut page_copy = page.clone();
910 let content = page_copy.generate_content()?;
911
912 #[cfg(feature = "compression")]
914 {
915 use crate::objects::Stream;
916 let mut stream = Stream::new(content);
917 if self.config.compress_streams {
919 stream.compress_flate()?;
920 }
921
922 self.write_object(
923 content_id,
924 Object::Stream(stream.dictionary().clone(), stream.data().to_vec()),
925 )?;
926 }
927
928 #[cfg(not(feature = "compression"))]
929 {
930 let mut stream_dict = Dictionary::new();
931 stream_dict.set("Length", Object::Integer(content.len() as i64));
932
933 self.write_object(content_id, Object::Stream(stream_dict, content))?;
934 }
935
936 Ok(())
937 }
938
939 fn write_outline_tree(
940 &mut self,
941 outline_tree: &crate::structure::OutlineTree,
942 ) -> Result<ObjectId> {
943 let outline_root_id = self.allocate_object_id();
945
946 let mut outline_root = Dictionary::new();
947 outline_root.set("Type", Object::Name("Outlines".to_string()));
948
949 if !outline_tree.items.is_empty() {
950 let mut item_ids = Vec::new();
952
953 fn count_items(items: &[crate::structure::OutlineItem]) -> usize {
955 let mut count = items.len();
956 for item in items {
957 count += count_items(&item.children);
958 }
959 count
960 }
961
962 let total_items = count_items(&outline_tree.items);
963
964 for _ in 0..total_items {
966 item_ids.push(self.allocate_object_id());
967 }
968
969 let mut id_index = 0;
970
971 let first_id = item_ids[0];
973 let last_id = item_ids[outline_tree.items.len() - 1];
974
975 outline_root.set("First", Object::Reference(first_id));
976 outline_root.set("Last", Object::Reference(last_id));
977
978 let visible_count = outline_tree.visible_count();
980 outline_root.set("Count", Object::Integer(visible_count));
981
982 let mut written_items = Vec::new();
984
985 for (i, item) in outline_tree.items.iter().enumerate() {
986 let item_id = item_ids[id_index];
987 id_index += 1;
988
989 let prev_id = if i > 0 { Some(item_ids[i - 1]) } else { None };
990 let next_id = if i < outline_tree.items.len() - 1 {
991 Some(item_ids[i + 1])
992 } else {
993 None
994 };
995
996 let children_ids = self.write_outline_item(
998 item,
999 item_id,
1000 outline_root_id,
1001 prev_id,
1002 next_id,
1003 &mut item_ids,
1004 &mut id_index,
1005 )?;
1006
1007 written_items.extend(children_ids);
1008 }
1009 }
1010
1011 self.write_object(outline_root_id, Object::Dictionary(outline_root))?;
1012 Ok(outline_root_id)
1013 }
1014
1015 #[allow(clippy::too_many_arguments)]
1016 fn write_outline_item(
1017 &mut self,
1018 item: &crate::structure::OutlineItem,
1019 item_id: ObjectId,
1020 parent_id: ObjectId,
1021 prev_id: Option<ObjectId>,
1022 next_id: Option<ObjectId>,
1023 all_ids: &mut Vec<ObjectId>,
1024 id_index: &mut usize,
1025 ) -> Result<Vec<ObjectId>> {
1026 let mut written_ids = vec![item_id];
1027
1028 let (first_child_id, last_child_id) = if !item.children.is_empty() {
1030 let first_idx = *id_index;
1031 let first_id = all_ids[first_idx];
1032 let last_idx = first_idx + item.children.len() - 1;
1033 let last_id = all_ids[last_idx];
1034
1035 for (i, child) in item.children.iter().enumerate() {
1037 let child_id = all_ids[*id_index];
1038 *id_index += 1;
1039
1040 let child_prev = if i > 0 {
1041 Some(all_ids[first_idx + i - 1])
1042 } else {
1043 None
1044 };
1045 let child_next = if i < item.children.len() - 1 {
1046 Some(all_ids[first_idx + i + 1])
1047 } else {
1048 None
1049 };
1050
1051 let child_ids = self.write_outline_item(
1052 child, child_id, item_id, child_prev, child_next, all_ids, id_index,
1054 )?;
1055
1056 written_ids.extend(child_ids);
1057 }
1058
1059 (Some(first_id), Some(last_id))
1060 } else {
1061 (None, None)
1062 };
1063
1064 let item_dict = crate::structure::outline_item_to_dict(
1066 item,
1067 parent_id,
1068 first_child_id,
1069 last_child_id,
1070 prev_id,
1071 next_id,
1072 );
1073
1074 self.write_object(item_id, Object::Dictionary(item_dict))?;
1075
1076 Ok(written_ids)
1077 }
1078
1079 fn write_struct_tree(
1081 &mut self,
1082 struct_tree: &crate::structure::StructTree,
1083 ) -> Result<ObjectId> {
1084 let struct_tree_root_id = self.allocate_object_id();
1086 let mut element_ids = Vec::new();
1087 for _ in 0..struct_tree.len() {
1088 element_ids.push(self.allocate_object_id());
1089 }
1090
1091 let mut parent_map: std::collections::HashMap<usize, ObjectId> =
1093 std::collections::HashMap::new();
1094
1095 if let Some(root_index) = struct_tree.root_index() {
1097 parent_map.insert(root_index, struct_tree_root_id);
1098
1099 fn map_children_parents(
1101 tree: &crate::structure::StructTree,
1102 parent_index: usize,
1103 parent_id: ObjectId,
1104 element_ids: &[ObjectId],
1105 parent_map: &mut std::collections::HashMap<usize, ObjectId>,
1106 ) {
1107 if let Some(parent_elem) = tree.get(parent_index) {
1108 for &child_index in &parent_elem.children {
1109 parent_map.insert(child_index, parent_id);
1110 map_children_parents(
1111 tree,
1112 child_index,
1113 element_ids[child_index],
1114 element_ids,
1115 parent_map,
1116 );
1117 }
1118 }
1119 }
1120
1121 map_children_parents(
1122 struct_tree,
1123 root_index,
1124 element_ids[root_index],
1125 &element_ids,
1126 &mut parent_map,
1127 );
1128 }
1129
1130 for (index, element) in struct_tree.iter().enumerate() {
1132 let element_id = element_ids[index];
1133 let mut element_dict = Dictionary::new();
1134
1135 element_dict.set("Type", Object::Name("StructElem".to_string()));
1136 element_dict.set("S", Object::Name(element.structure_type.as_pdf_name()));
1137
1138 if let Some(&parent_id) = parent_map.get(&index) {
1140 element_dict.set("P", Object::Reference(parent_id));
1141 }
1142
1143 if let Some(ref id) = element.id {
1145 element_dict.set("ID", Object::String(id.clone()));
1146 }
1147
1148 if let Some(ref lang) = element.attributes.lang {
1150 element_dict.set("Lang", Object::String(lang.clone()));
1151 }
1152 if let Some(ref alt) = element.attributes.alt {
1153 element_dict.set("Alt", Object::String(alt.clone()));
1154 }
1155 if let Some(ref actual_text) = element.attributes.actual_text {
1156 element_dict.set("ActualText", Object::String(actual_text.clone()));
1157 }
1158 if let Some(ref title) = element.attributes.title {
1159 element_dict.set("T", Object::String(title.clone()));
1160 }
1161 if let Some(bbox) = element.attributes.bbox {
1162 element_dict.set(
1163 "BBox",
1164 Object::Array(vec![
1165 Object::Real(bbox[0]),
1166 Object::Real(bbox[1]),
1167 Object::Real(bbox[2]),
1168 Object::Real(bbox[3]),
1169 ]),
1170 );
1171 }
1172
1173 let mut kids = Vec::new();
1175
1176 for &child_index in &element.children {
1178 kids.push(Object::Reference(element_ids[child_index]));
1179 }
1180
1181 for mcid_ref in &element.mcids {
1183 let mut mcr = Dictionary::new();
1184 mcr.set("Type", Object::Name("MCR".to_string()));
1185 mcr.set("Pg", Object::Integer(mcid_ref.page_index as i64));
1186 mcr.set("MCID", Object::Integer(mcid_ref.mcid as i64));
1187 kids.push(Object::Dictionary(mcr));
1188 }
1189
1190 if !kids.is_empty() {
1191 element_dict.set("K", Object::Array(kids));
1192 }
1193
1194 self.write_object(element_id, Object::Dictionary(element_dict))?;
1195 }
1196
1197 let mut struct_tree_root = Dictionary::new();
1199 struct_tree_root.set("Type", Object::Name("StructTreeRoot".to_string()));
1200
1201 if let Some(root_index) = struct_tree.root_index() {
1203 struct_tree_root.set("K", Object::Reference(element_ids[root_index]));
1204 }
1205
1206 if !struct_tree.role_map.mappings().is_empty() {
1208 let mut role_map = Dictionary::new();
1209 for (custom_type, standard_type) in struct_tree.role_map.mappings() {
1210 role_map.set(
1211 custom_type.as_str(),
1212 Object::Name(standard_type.as_pdf_name().to_string()),
1213 );
1214 }
1215 struct_tree_root.set("RoleMap", Object::Dictionary(role_map));
1216 }
1217
1218 self.write_object(struct_tree_root_id, Object::Dictionary(struct_tree_root))?;
1219 Ok(struct_tree_root_id)
1220 }
1221
1222 fn write_form_fields(&mut self, document: &mut Document) -> Result<()> {
1223 if !self.form_field_ids.is_empty() {
1225 if let Some(acro_form) = &mut document.acro_form {
1226 acro_form.fields.clear();
1228 for field_id in &self.form_field_ids {
1229 acro_form.add_field(*field_id);
1230 }
1231
1232 acro_form.need_appearances = true;
1234 if acro_form.da.is_none() {
1235 acro_form.da = Some("/Helv 12 Tf 0 g".to_string());
1236 }
1237 }
1238 }
1239 Ok(())
1240 }
1241
1242 fn write_info(&mut self, document: &Document) -> Result<()> {
1243 let info_id = self.get_info_id()?;
1244 let mut info_dict = Dictionary::new();
1245
1246 if let Some(ref title) = document.metadata.title {
1247 info_dict.set("Title", Object::String(title.clone()));
1248 }
1249 if let Some(ref author) = document.metadata.author {
1250 info_dict.set("Author", Object::String(author.clone()));
1251 }
1252 if let Some(ref subject) = document.metadata.subject {
1253 info_dict.set("Subject", Object::String(subject.clone()));
1254 }
1255 if let Some(ref keywords) = document.metadata.keywords {
1256 info_dict.set("Keywords", Object::String(keywords.clone()));
1257 }
1258 if let Some(ref creator) = document.metadata.creator {
1259 info_dict.set("Creator", Object::String(creator.clone()));
1260 }
1261 if let Some(ref producer) = document.metadata.producer {
1262 info_dict.set("Producer", Object::String(producer.clone()));
1263 }
1264
1265 if let Some(creation_date) = document.metadata.creation_date {
1267 let date_string = format_pdf_date(creation_date);
1268 info_dict.set("CreationDate", Object::String(date_string));
1269 }
1270
1271 if let Some(mod_date) = document.metadata.modification_date {
1273 let date_string = format_pdf_date(mod_date);
1274 info_dict.set("ModDate", Object::String(date_string));
1275 }
1276
1277 let edition = super::Edition::OpenSource;
1280
1281 let signature = super::PdfSignature::new(document, edition);
1282 signature.write_to_info_dict(&mut info_dict);
1283
1284 self.write_object(info_id, Object::Dictionary(info_dict))?;
1285 Ok(())
1286 }
1287
1288 fn write_fonts(&mut self, document: &Document) -> Result<HashMap<String, ObjectId>> {
1289 let mut font_refs = HashMap::new();
1290
1291 for font_name in document.custom_font_names() {
1293 if let Some(font) = document.get_custom_font(&font_name) {
1294 let font_id = self.write_font_with_unicode_support(&font_name, &font)?;
1297 font_refs.insert(font_name.clone(), font_id);
1298 }
1299 }
1300
1301 Ok(font_refs)
1302 }
1303
1304 fn write_font_with_unicode_support(
1306 &mut self,
1307 font_name: &str,
1308 font: &crate::fonts::Font,
1309 ) -> Result<ObjectId> {
1310 self.write_type0_font_from_font(font_name, font)
1313 }
1314
1315 fn write_type0_font_from_font(
1317 &mut self,
1318 font_name: &str,
1319 font: &crate::fonts::Font,
1320 ) -> Result<ObjectId> {
1321 let used_chars = self.document_used_chars.clone().unwrap_or_else(|| {
1323 let mut chars = std::collections::HashSet::new();
1325 for ch in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?".chars()
1326 {
1327 chars.insert(ch);
1328 }
1329 chars
1330 });
1331 let font_id = self.allocate_object_id();
1333 let descendant_font_id = self.allocate_object_id();
1334 let descriptor_id = self.allocate_object_id();
1335 let font_file_id = self.allocate_object_id();
1336 let to_unicode_id = self.allocate_object_id();
1337
1338 let (font_data_to_embed, subset_glyph_mapping, original_font_for_widths) =
1342 if font.data.len() > 100_000 && !used_chars.is_empty() {
1343 match crate::text::fonts::truetype_subsetter::subset_font(
1345 font.data.clone(),
1346 &used_chars,
1347 ) {
1348 Ok(subset_result) => {
1349 (
1352 subset_result.font_data,
1353 Some(subset_result.glyph_mapping),
1354 font.clone(),
1355 )
1356 }
1357 Err(_) => {
1358 if font.data.len() < 25_000_000 {
1360 (font.data.clone(), None, font.clone())
1361 } else {
1362 (Vec::new(), None, font.clone())
1364 }
1365 }
1366 }
1367 } else {
1368 (font.data.clone(), None, font.clone())
1370 };
1371
1372 if !font_data_to_embed.is_empty() {
1373 let mut font_file_dict = Dictionary::new();
1374 match font.format {
1376 crate::fonts::FontFormat::OpenType => {
1377 font_file_dict.set("Subtype", Object::Name("OpenType".to_string()));
1379 font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1380 }
1381 crate::fonts::FontFormat::TrueType => {
1382 font_file_dict.set("Length1", Object::Integer(font_data_to_embed.len() as i64));
1384 }
1385 }
1386 let font_stream_obj = Object::Stream(font_file_dict, font_data_to_embed);
1387 self.write_object(font_file_id, font_stream_obj)?;
1388 } else {
1389 let font_file_dict = Dictionary::new();
1391 let font_stream_obj = Object::Stream(font_file_dict, Vec::new());
1392 self.write_object(font_file_id, font_stream_obj)?;
1393 }
1394
1395 let mut descriptor = Dictionary::new();
1397 descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1398 descriptor.set("FontName", Object::Name(font_name.to_string()));
1399 descriptor.set("Flags", Object::Integer(4)); descriptor.set(
1401 "FontBBox",
1402 Object::Array(vec![
1403 Object::Integer(font.descriptor.font_bbox[0] as i64),
1404 Object::Integer(font.descriptor.font_bbox[1] as i64),
1405 Object::Integer(font.descriptor.font_bbox[2] as i64),
1406 Object::Integer(font.descriptor.font_bbox[3] as i64),
1407 ]),
1408 );
1409 descriptor.set(
1410 "ItalicAngle",
1411 Object::Real(font.descriptor.italic_angle as f64),
1412 );
1413 descriptor.set("Ascent", Object::Real(font.descriptor.ascent as f64));
1414 descriptor.set("Descent", Object::Real(font.descriptor.descent as f64));
1415 descriptor.set("CapHeight", Object::Real(font.descriptor.cap_height as f64));
1416 descriptor.set("StemV", Object::Real(font.descriptor.stem_v as f64));
1417 let font_file_key = match font.format {
1419 crate::fonts::FontFormat::OpenType => "FontFile3", crate::fonts::FontFormat::TrueType => "FontFile2", };
1422 descriptor.set(font_file_key, Object::Reference(font_file_id));
1423 self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
1424
1425 let mut cid_font = Dictionary::new();
1427 cid_font.set("Type", Object::Name("Font".to_string()));
1428 let cid_font_subtype =
1430 if CjkFontType::should_use_cidfonttype2_for_preview_compatibility(font_name) {
1431 "CIDFontType2" } else {
1433 match font.format {
1434 crate::fonts::FontFormat::OpenType => "CIDFontType0", crate::fonts::FontFormat::TrueType => "CIDFontType2", }
1437 };
1438 cid_font.set("Subtype", Object::Name(cid_font_subtype.to_string()));
1439 cid_font.set("BaseFont", Object::Name(font_name.to_string()));
1440
1441 let mut cid_system_info = Dictionary::new();
1443 let (registry, ordering, supplement) =
1444 if let Some(cjk_type) = CjkFontType::detect_from_name(font_name) {
1445 cjk_type.cid_system_info()
1446 } else {
1447 ("Adobe", "Identity", 0)
1448 };
1449
1450 cid_system_info.set("Registry", Object::String(registry.to_string()));
1451 cid_system_info.set("Ordering", Object::String(ordering.to_string()));
1452 cid_system_info.set("Supplement", Object::Integer(supplement as i64));
1453 cid_font.set("CIDSystemInfo", Object::Dictionary(cid_system_info));
1454
1455 cid_font.set("FontDescriptor", Object::Reference(descriptor_id));
1456
1457 let default_width = self.calculate_default_width(font);
1459 cid_font.set("DW", Object::Integer(default_width));
1460
1461 let w_array = self.generate_width_array(
1465 &original_font_for_widths,
1466 default_width,
1467 subset_glyph_mapping.as_ref(),
1468 );
1469 cid_font.set("W", Object::Array(w_array));
1470
1471 if cid_font_subtype == "CIDFontType2" {
1475 let cid_to_gid_map =
1477 self.generate_cid_to_gid_map(font, subset_glyph_mapping.as_ref())?;
1478 if !cid_to_gid_map.is_empty() {
1479 let cid_to_gid_map_id = self.allocate_object_id();
1481 let mut map_dict = Dictionary::new();
1482 map_dict.set("Length", Object::Integer(cid_to_gid_map.len() as i64));
1483 let map_stream = Object::Stream(map_dict, cid_to_gid_map);
1484 self.write_object(cid_to_gid_map_id, map_stream)?;
1485 cid_font.set("CIDToGIDMap", Object::Reference(cid_to_gid_map_id));
1486 } else {
1487 cid_font.set("CIDToGIDMap", Object::Name("Identity".to_string()));
1488 }
1489 }
1490 self.write_object(descendant_font_id, Object::Dictionary(cid_font))?;
1493
1494 let cmap_data = self.generate_tounicode_cmap_from_font(font);
1496 let cmap_dict = Dictionary::new();
1497 let cmap_stream = Object::Stream(cmap_dict, cmap_data);
1498 self.write_object(to_unicode_id, cmap_stream)?;
1499
1500 let mut type0_font = Dictionary::new();
1502 type0_font.set("Type", Object::Name("Font".to_string()));
1503 type0_font.set("Subtype", Object::Name("Type0".to_string()));
1504 type0_font.set("BaseFont", Object::Name(font_name.to_string()));
1505 type0_font.set("Encoding", Object::Name("Identity-H".to_string()));
1506 type0_font.set(
1507 "DescendantFonts",
1508 Object::Array(vec![Object::Reference(descendant_font_id)]),
1509 );
1510 type0_font.set("ToUnicode", Object::Reference(to_unicode_id));
1511
1512 self.write_object(font_id, Object::Dictionary(type0_font))?;
1513
1514 Ok(font_id)
1515 }
1516
1517 fn calculate_default_width(&self, font: &crate::fonts::Font) -> i64 {
1519 use crate::text::fonts::truetype::TrueTypeFont;
1520
1521 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1523 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1524 if let Some(cmap) = cmap_tables
1525 .iter()
1526 .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1527 .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1528 {
1529 if let Ok(widths) = tt_font.get_glyph_widths(&cmap.mappings) {
1530 let common_chars =
1534 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
1535 let mut total_width = 0;
1536 let mut count = 0;
1537
1538 for ch in common_chars.chars() {
1539 let unicode = ch as u32;
1540 if let Some(&pdf_width) = widths.get(&unicode) {
1541 total_width += pdf_width as i64;
1542 count += 1;
1543 }
1544 }
1545
1546 if count > 0 {
1547 return total_width / count;
1548 }
1549 }
1550 }
1551 }
1552 }
1553
1554 500
1556 }
1557
1558 fn generate_width_array(
1560 &self,
1561 font: &crate::fonts::Font,
1562 _default_width: i64,
1563 subset_mapping: Option<&HashMap<u32, u16>>,
1564 ) -> Vec<Object> {
1565 use crate::text::fonts::truetype::TrueTypeFont;
1566
1567 let mut w_array = Vec::new();
1568
1569 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1571 let char_to_glyph = {
1575 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1577 if let Some(cmap) = cmap_tables
1578 .iter()
1579 .find(|t| t.platform_id == 3 && t.encoding_id == 1)
1580 .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1581 {
1582 if let Some(subset_map) = subset_mapping {
1584 let mut filtered = HashMap::new();
1585 for unicode in subset_map.keys() {
1586 if let Some(&orig_glyph) = cmap.mappings.get(unicode) {
1588 filtered.insert(*unicode, orig_glyph);
1589 }
1590 }
1591 filtered
1592 } else {
1593 cmap.mappings.clone()
1594 }
1595 } else {
1596 HashMap::new()
1597 }
1598 } else {
1599 HashMap::new()
1600 }
1601 };
1602
1603 if !char_to_glyph.is_empty() {
1604 if let Ok(widths) = tt_font.get_glyph_widths(&char_to_glyph) {
1606 let mut sorted_chars: Vec<_> = widths.iter().collect();
1611 sorted_chars.sort_by_key(|(unicode, _)| *unicode);
1612
1613 let mut i = 0;
1614 while i < sorted_chars.len() {
1615 let start_unicode = *sorted_chars[i].0;
1616 let pdf_width = *sorted_chars[i].1 as i64;
1618
1619 let mut end_unicode = start_unicode;
1621 let mut j = i + 1;
1622 while j < sorted_chars.len() && *sorted_chars[j].0 == end_unicode + 1 {
1623 let next_pdf_width = *sorted_chars[j].1 as i64;
1624 if next_pdf_width == pdf_width {
1625 end_unicode = *sorted_chars[j].0;
1626 j += 1;
1627 } else {
1628 break;
1629 }
1630 }
1631
1632 if start_unicode == end_unicode {
1634 w_array.push(Object::Integer(start_unicode as i64));
1636 w_array.push(Object::Array(vec![Object::Integer(pdf_width)]));
1637 } else {
1638 w_array.push(Object::Integer(start_unicode as i64));
1640 w_array.push(Object::Integer(end_unicode as i64));
1641 w_array.push(Object::Integer(pdf_width));
1642 }
1643
1644 i = j;
1645 }
1646
1647 return w_array;
1648 }
1649 }
1650 }
1651
1652 let ranges = vec![
1654 (0x20, 0x20, 250), (0x21, 0x2F, 333), (0x30, 0x39, 500), (0x3A, 0x40, 333), (0x41, 0x5A, 667), (0x5B, 0x60, 333), (0x61, 0x7A, 500), (0x7B, 0x7E, 333), (0xA0, 0xA0, 250), (0xA1, 0xBF, 333), (0xC0, 0xD6, 667), (0xD7, 0xD7, 564), (0xD8, 0xDE, 667), (0xDF, 0xF6, 500), (0xF7, 0xF7, 564), (0xF8, 0xFF, 500), (0x100, 0x17F, 500), (0x2000, 0x200F, 250), (0x2010, 0x2027, 333), (0x2028, 0x202F, 250), (0x2030, 0x206F, 500), (0x2070, 0x209F, 400), (0x20A0, 0x20CF, 600), (0x2100, 0x214F, 700), (0x2190, 0x21FF, 600), (0x2200, 0x22FF, 600), (0x2300, 0x23FF, 600), (0x2500, 0x257F, 500), (0x2580, 0x259F, 500), (0x25A0, 0x25FF, 600), (0x2600, 0x26FF, 600), (0x2700, 0x27BF, 600), ];
1691
1692 for (start, end, width) in ranges {
1694 if start == end {
1695 w_array.push(Object::Integer(start));
1697 w_array.push(Object::Array(vec![Object::Integer(width)]));
1698 } else {
1699 w_array.push(Object::Integer(start));
1701 w_array.push(Object::Integer(end));
1702 w_array.push(Object::Integer(width));
1703 }
1704 }
1705
1706 w_array
1707 }
1708
1709 fn generate_cid_to_gid_map(
1711 &mut self,
1712 font: &crate::fonts::Font,
1713 subset_mapping: Option<&HashMap<u32, u16>>,
1714 ) -> Result<Vec<u8>> {
1715 use crate::text::fonts::truetype::TrueTypeFont;
1716
1717 let cmap_mappings = if let Some(subset_map) = subset_mapping {
1720 subset_map.clone()
1722 } else {
1723 let tt_font = TrueTypeFont::parse(font.data.clone())?;
1725 let cmap_tables = tt_font.parse_cmap()?;
1726
1727 let cmap = cmap_tables
1729 .iter()
1730 .find(|t| t.platform_id == 3 && t.encoding_id == 1) .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0)) .ok_or_else(|| {
1733 crate::error::PdfError::FontError("No Unicode cmap table found".to_string())
1734 })?;
1735
1736 cmap.mappings.clone()
1737 };
1738
1739 let used_chars = self.document_used_chars.clone().unwrap_or_default();
1746
1747 let max_unicode = if !used_chars.is_empty() {
1749 used_chars
1751 .iter()
1752 .map(|ch| *ch as u32)
1753 .max()
1754 .unwrap_or(0x00FF) .min(0xFFFF) as usize
1756 } else {
1757 cmap_mappings
1759 .keys()
1760 .max()
1761 .copied()
1762 .unwrap_or(0xFFFF)
1763 .min(0xFFFF) as usize
1764 };
1765
1766 let mut map = vec![0u8; (max_unicode + 1) * 2];
1768
1769 let mut sample_mappings = Vec::new();
1771 for (&unicode, &glyph_id) in &cmap_mappings {
1772 if unicode <= max_unicode as u32 {
1773 let idx = (unicode as usize) * 2;
1774 map[idx] = (glyph_id >> 8) as u8;
1776 map[idx + 1] = (glyph_id & 0xFF) as u8;
1777
1778 if unicode == 0x0041 || unicode == 0x0061 || unicode == 0x00E1 || unicode == 0x00F1
1780 {
1781 sample_mappings.push((unicode, glyph_id));
1782 }
1783 }
1784 }
1785
1786 Ok(map)
1787 }
1788
1789 fn generate_tounicode_cmap_from_font(&self, font: &crate::fonts::Font) -> Vec<u8> {
1791 use crate::text::fonts::truetype::TrueTypeFont;
1792
1793 let mut cmap = String::new();
1794
1795 cmap.push_str("/CIDInit /ProcSet findresource begin\n");
1797 cmap.push_str("12 dict begin\n");
1798 cmap.push_str("begincmap\n");
1799 cmap.push_str("/CIDSystemInfo\n");
1800 cmap.push_str("<< /Registry (Adobe)\n");
1801 cmap.push_str(" /Ordering (UCS)\n");
1802 cmap.push_str(" /Supplement 0\n");
1803 cmap.push_str(">> def\n");
1804 cmap.push_str("/CMapName /Adobe-Identity-UCS def\n");
1805 cmap.push_str("/CMapType 2 def\n");
1806 cmap.push_str("1 begincodespacerange\n");
1807 cmap.push_str("<0000> <FFFF>\n");
1808 cmap.push_str("endcodespacerange\n");
1809
1810 let mut mappings = Vec::new();
1812 let mut has_font_mappings = false;
1813
1814 if let Ok(tt_font) = TrueTypeFont::parse(font.data.clone()) {
1815 if let Ok(cmap_tables) = tt_font.parse_cmap() {
1816 if let Some(cmap_table) = cmap_tables
1818 .iter()
1819 .find(|t| t.platform_id == 3 && t.encoding_id == 1) .or_else(|| cmap_tables.iter().find(|t| t.platform_id == 0))
1821 {
1823 for (&unicode, &glyph_id) in &cmap_table.mappings {
1826 if glyph_id > 0 && unicode <= 0xFFFF {
1827 mappings.push((unicode, unicode));
1830 }
1831 }
1832 has_font_mappings = true;
1833 }
1834 }
1835 }
1836
1837 if !has_font_mappings {
1839 for i in 0x0020..=0x00FF {
1841 mappings.push((i, i));
1842 }
1843
1844 for i in 0x0100..=0x017F {
1846 mappings.push((i, i));
1847 }
1848
1849 for i in 0x3040..=0x309F {
1852 mappings.push((i, i));
1853 }
1854
1855 for i in 0x30A0..=0x30FF {
1857 mappings.push((i, i));
1858 }
1859
1860 for i in 0x4E00..=0x9FFF {
1862 mappings.push((i, i));
1863 }
1864
1865 for i in 0xAC00..=0xD7AF {
1867 mappings.push((i, i));
1868 }
1869
1870 for i in 0x2000..=0x206F {
1872 mappings.push((i, i));
1873 }
1874
1875 for i in 0x2200..=0x22FF {
1877 mappings.push((i, i));
1878 }
1879
1880 for i in 0x2190..=0x21FF {
1882 mappings.push((i, i));
1883 }
1884
1885 for i in 0x2500..=0x259F {
1887 mappings.push((i, i));
1888 }
1889
1890 for i in 0x25A0..=0x25FF {
1892 mappings.push((i, i));
1893 }
1894
1895 for i in 0x2600..=0x26FF {
1897 mappings.push((i, i));
1898 }
1899 }
1900
1901 mappings.sort_by_key(|&(cid, _)| cid);
1903
1904 let mut i = 0;
1906 while i < mappings.len() {
1907 let start_cid = mappings[i].0;
1909 let start_unicode = mappings[i].1;
1910 let mut end_idx = i;
1911
1912 while end_idx + 1 < mappings.len()
1914 && mappings[end_idx + 1].0 == mappings[end_idx].0 + 1
1915 && mappings[end_idx + 1].1 == mappings[end_idx].1 + 1
1916 && end_idx - i < 99
1917 {
1919 end_idx += 1;
1920 }
1921
1922 if end_idx > i {
1923 cmap.push_str("1 beginbfrange\n");
1925 cmap.push_str(&format!(
1926 "<{:04X}> <{:04X}> <{:04X}>\n",
1927 start_cid, mappings[end_idx].0, start_unicode
1928 ));
1929 cmap.push_str("endbfrange\n");
1930 i = end_idx + 1;
1931 } else {
1932 let mut chars = Vec::new();
1934 let chunk_end = (i + 100).min(mappings.len());
1935
1936 for item in &mappings[i..chunk_end] {
1937 chars.push(*item);
1938 }
1939
1940 if !chars.is_empty() {
1941 cmap.push_str(&format!("{} beginbfchar\n", chars.len()));
1942 for (cid, unicode) in chars {
1943 cmap.push_str(&format!("<{:04X}> <{:04X}>\n", cid, unicode));
1944 }
1945 cmap.push_str("endbfchar\n");
1946 }
1947
1948 i = chunk_end;
1949 }
1950 }
1951
1952 cmap.push_str("endcmap\n");
1954 cmap.push_str("CMapName currentdict /CMap defineresource pop\n");
1955 cmap.push_str("end\n");
1956 cmap.push_str("end\n");
1957
1958 cmap.into_bytes()
1959 }
1960
1961 #[allow(dead_code)]
1963 fn write_truetype_font(
1964 &mut self,
1965 font_name: &str,
1966 font: &crate::text::font_manager::CustomFont,
1967 ) -> Result<ObjectId> {
1968 let font_id = self.allocate_object_id();
1970 let descriptor_id = self.allocate_object_id();
1971 let font_file_id = self.allocate_object_id();
1972
1973 if let Some(ref data) = font.font_data {
1975 let mut font_file_dict = Dictionary::new();
1976 font_file_dict.set("Length1", Object::Integer(data.len() as i64));
1977 let font_stream_obj = Object::Stream(font_file_dict, data.clone());
1978 self.write_object(font_file_id, font_stream_obj)?;
1979 }
1980
1981 let mut descriptor = Dictionary::new();
1983 descriptor.set("Type", Object::Name("FontDescriptor".to_string()));
1984 descriptor.set("FontName", Object::Name(font_name.to_string()));
1985 descriptor.set("Flags", Object::Integer(32)); descriptor.set(
1987 "FontBBox",
1988 Object::Array(vec![
1989 Object::Integer(-1000),
1990 Object::Integer(-1000),
1991 Object::Integer(2000),
1992 Object::Integer(2000),
1993 ]),
1994 );
1995 descriptor.set("ItalicAngle", Object::Integer(0));
1996 descriptor.set("Ascent", Object::Integer(font.descriptor.ascent as i64));
1997 descriptor.set("Descent", Object::Integer(font.descriptor.descent as i64));
1998 descriptor.set(
1999 "CapHeight",
2000 Object::Integer(font.descriptor.cap_height as i64),
2001 );
2002 descriptor.set("StemV", Object::Integer(font.descriptor.stem_v as i64));
2003 descriptor.set("FontFile2", Object::Reference(font_file_id));
2004 self.write_object(descriptor_id, Object::Dictionary(descriptor))?;
2005
2006 let mut font_dict = Dictionary::new();
2008 font_dict.set("Type", Object::Name("Font".to_string()));
2009 font_dict.set("Subtype", Object::Name("TrueType".to_string()));
2010 font_dict.set("BaseFont", Object::Name(font_name.to_string()));
2011 font_dict.set("FirstChar", Object::Integer(0));
2012 font_dict.set("LastChar", Object::Integer(255));
2013
2014 let widths: Vec<Object> = (0..256).map(|_| Object::Integer(600)).collect();
2016 font_dict.set("Widths", Object::Array(widths));
2017 font_dict.set("FontDescriptor", Object::Reference(descriptor_id));
2018
2019 font_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2021
2022 self.write_object(font_id, Object::Dictionary(font_dict))?;
2023
2024 Ok(font_id)
2025 }
2026
2027 fn write_pages(
2028 &mut self,
2029 document: &Document,
2030 font_refs: &HashMap<String, ObjectId>,
2031 ) -> Result<()> {
2032 let pages_id = self.get_pages_id()?;
2033 let mut pages_dict = Dictionary::new();
2034 pages_dict.set("Type", Object::Name("Pages".to_string()));
2035 pages_dict.set("Count", Object::Integer(document.pages.len() as i64));
2036
2037 let mut kids = Vec::new();
2038
2039 let mut page_ids = Vec::new();
2041 let mut content_ids = Vec::new();
2042 for _ in 0..document.pages.len() {
2043 page_ids.push(self.allocate_object_id());
2044 content_ids.push(self.allocate_object_id());
2045 }
2046
2047 for page_id in &page_ids {
2048 kids.push(Object::Reference(*page_id));
2049 }
2050
2051 pages_dict.set("Kids", Object::Array(kids));
2052
2053 self.write_object(pages_id, Object::Dictionary(pages_dict))?;
2054
2055 self.page_ids = page_ids.clone();
2057
2058 for (i, page) in document.pages.iter().enumerate() {
2060 let page_id = page_ids[i];
2061 let content_id = content_ids[i];
2062
2063 self.write_page_with_fonts(page_id, pages_id, content_id, page, document, font_refs)?;
2064 self.write_page_content(content_id, page)?;
2065 }
2066
2067 Ok(())
2068 }
2069
2070 #[allow(dead_code)]
2072 fn write_pages_with_fonts(
2073 &mut self,
2074 document: &Document,
2075 font_refs: &HashMap<String, ObjectId>,
2076 ) -> Result<()> {
2077 self.write_pages(document, font_refs)
2078 }
2079
2080 fn write_page_with_fonts(
2081 &mut self,
2082 page_id: ObjectId,
2083 parent_id: ObjectId,
2084 content_id: ObjectId,
2085 page: &crate::page::Page,
2086 _document: &Document,
2087 font_refs: &HashMap<String, ObjectId>,
2088 ) -> Result<()> {
2089 let mut page_dict = page.to_dict();
2091
2092 page_dict.set("Type", Object::Name("Page".to_string()));
2093 page_dict.set("Parent", Object::Reference(parent_id));
2094 page_dict.set("Contents", Object::Reference(content_id));
2095
2096 let mut resources = if let Some(Object::Dictionary(res)) = page_dict.get("Resources") {
2098 res.clone()
2099 } else {
2100 Dictionary::new()
2101 };
2102
2103 let mut font_dict = Dictionary::new();
2105
2106 let mut helvetica_dict = Dictionary::new();
2111 helvetica_dict.set("Type", Object::Name("Font".to_string()));
2112 helvetica_dict.set("Subtype", Object::Name("Type1".to_string()));
2113 helvetica_dict.set("BaseFont", Object::Name("Helvetica".to_string()));
2114 helvetica_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2115 font_dict.set("Helvetica", Object::Dictionary(helvetica_dict));
2116
2117 let mut helvetica_bold_dict = Dictionary::new();
2118 helvetica_bold_dict.set("Type", Object::Name("Font".to_string()));
2119 helvetica_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2120 helvetica_bold_dict.set("BaseFont", Object::Name("Helvetica-Bold".to_string()));
2121 helvetica_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2122 font_dict.set("Helvetica-Bold", Object::Dictionary(helvetica_bold_dict));
2123
2124 let mut helvetica_oblique_dict = Dictionary::new();
2125 helvetica_oblique_dict.set("Type", Object::Name("Font".to_string()));
2126 helvetica_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2127 helvetica_oblique_dict.set("BaseFont", Object::Name("Helvetica-Oblique".to_string()));
2128 helvetica_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2129 font_dict.set(
2130 "Helvetica-Oblique",
2131 Object::Dictionary(helvetica_oblique_dict),
2132 );
2133
2134 let mut helvetica_bold_oblique_dict = Dictionary::new();
2135 helvetica_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2136 helvetica_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2137 helvetica_bold_oblique_dict.set(
2138 "BaseFont",
2139 Object::Name("Helvetica-BoldOblique".to_string()),
2140 );
2141 helvetica_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2142 font_dict.set(
2143 "Helvetica-BoldOblique",
2144 Object::Dictionary(helvetica_bold_oblique_dict),
2145 );
2146
2147 let mut times_dict = Dictionary::new();
2149 times_dict.set("Type", Object::Name("Font".to_string()));
2150 times_dict.set("Subtype", Object::Name("Type1".to_string()));
2151 times_dict.set("BaseFont", Object::Name("Times-Roman".to_string()));
2152 times_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2153 font_dict.set("Times-Roman", Object::Dictionary(times_dict));
2154
2155 let mut times_bold_dict = Dictionary::new();
2156 times_bold_dict.set("Type", Object::Name("Font".to_string()));
2157 times_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2158 times_bold_dict.set("BaseFont", Object::Name("Times-Bold".to_string()));
2159 times_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2160 font_dict.set("Times-Bold", Object::Dictionary(times_bold_dict));
2161
2162 let mut times_italic_dict = Dictionary::new();
2163 times_italic_dict.set("Type", Object::Name("Font".to_string()));
2164 times_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2165 times_italic_dict.set("BaseFont", Object::Name("Times-Italic".to_string()));
2166 times_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2167 font_dict.set("Times-Italic", Object::Dictionary(times_italic_dict));
2168
2169 let mut times_bold_italic_dict = Dictionary::new();
2170 times_bold_italic_dict.set("Type", Object::Name("Font".to_string()));
2171 times_bold_italic_dict.set("Subtype", Object::Name("Type1".to_string()));
2172 times_bold_italic_dict.set("BaseFont", Object::Name("Times-BoldItalic".to_string()));
2173 times_bold_italic_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2174 font_dict.set(
2175 "Times-BoldItalic",
2176 Object::Dictionary(times_bold_italic_dict),
2177 );
2178
2179 let mut courier_dict = Dictionary::new();
2181 courier_dict.set("Type", Object::Name("Font".to_string()));
2182 courier_dict.set("Subtype", Object::Name("Type1".to_string()));
2183 courier_dict.set("BaseFont", Object::Name("Courier".to_string()));
2184 courier_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2185 font_dict.set("Courier", Object::Dictionary(courier_dict));
2186
2187 let mut courier_bold_dict = Dictionary::new();
2188 courier_bold_dict.set("Type", Object::Name("Font".to_string()));
2189 courier_bold_dict.set("Subtype", Object::Name("Type1".to_string()));
2190 courier_bold_dict.set("BaseFont", Object::Name("Courier-Bold".to_string()));
2191 courier_bold_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2192 font_dict.set("Courier-Bold", Object::Dictionary(courier_bold_dict));
2193
2194 let mut courier_oblique_dict = Dictionary::new();
2195 courier_oblique_dict.set("Type", Object::Name("Font".to_string()));
2196 courier_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2197 courier_oblique_dict.set("BaseFont", Object::Name("Courier-Oblique".to_string()));
2198 courier_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2199 font_dict.set("Courier-Oblique", Object::Dictionary(courier_oblique_dict));
2200
2201 let mut courier_bold_oblique_dict = Dictionary::new();
2202 courier_bold_oblique_dict.set("Type", Object::Name("Font".to_string()));
2203 courier_bold_oblique_dict.set("Subtype", Object::Name("Type1".to_string()));
2204 courier_bold_oblique_dict.set("BaseFont", Object::Name("Courier-BoldOblique".to_string()));
2205 courier_bold_oblique_dict.set("Encoding", Object::Name("WinAnsiEncoding".to_string()));
2206 font_dict.set(
2207 "Courier-BoldOblique",
2208 Object::Dictionary(courier_bold_oblique_dict),
2209 );
2210
2211 for (font_name, font_id) in font_refs {
2213 font_dict.set(font_name, Object::Reference(*font_id));
2214 }
2215
2216 resources.set("Font", Object::Dictionary(font_dict));
2217
2218 if !page.images().is_empty() {
2220 let mut xobject_dict = Dictionary::new();
2221
2222 for (name, image) in page.images() {
2223 let image_id = self.allocate_object_id();
2225
2226 if image.has_transparency() {
2228 let (mut main_obj, smask_obj) = image.to_pdf_object_with_transparency()?;
2230
2231 if let Some(smask_stream) = smask_obj {
2233 let smask_id = self.allocate_object_id();
2234 self.write_object(smask_id, smask_stream)?;
2235
2236 if let Object::Stream(ref mut dict, _) = main_obj {
2238 dict.set("SMask", Object::Reference(smask_id));
2239 }
2240 }
2241
2242 self.write_object(image_id, main_obj)?;
2244 } else {
2245 self.write_object(image_id, image.to_pdf_object())?;
2247 }
2248
2249 xobject_dict.set(name, Object::Reference(image_id));
2251 }
2252
2253 resources.set("XObject", Object::Dictionary(xobject_dict));
2254 }
2255
2256 if let Some(extgstate_states) = page.get_extgstate_resources() {
2258 let mut extgstate_dict = Dictionary::new();
2259 for (name, state) in extgstate_states {
2260 let mut state_dict = Dictionary::new();
2261 state_dict.set("Type", Object::Name("ExtGState".to_string()));
2262
2263 if let Some(alpha_stroke) = state.alpha_stroke {
2265 state_dict.set("CA", Object::Real(alpha_stroke));
2266 }
2267 if let Some(alpha_fill) = state.alpha_fill {
2268 state_dict.set("ca", Object::Real(alpha_fill));
2269 }
2270
2271 if let Some(line_width) = state.line_width {
2273 state_dict.set("LW", Object::Real(line_width));
2274 }
2275 if let Some(line_cap) = state.line_cap {
2276 state_dict.set("LC", Object::Integer(line_cap as i64));
2277 }
2278 if let Some(line_join) = state.line_join {
2279 state_dict.set("LJ", Object::Integer(line_join as i64));
2280 }
2281 if let Some(dash_pattern) = &state.dash_pattern {
2282 let dash_objects: Vec<Object> = dash_pattern
2283 .array
2284 .iter()
2285 .map(|&d| Object::Real(d))
2286 .collect();
2287 state_dict.set(
2288 "D",
2289 Object::Array(vec![
2290 Object::Array(dash_objects),
2291 Object::Real(dash_pattern.phase),
2292 ]),
2293 );
2294 }
2295
2296 extgstate_dict.set(name, Object::Dictionary(state_dict));
2297 }
2298 if !extgstate_dict.is_empty() {
2299 resources.set("ExtGState", Object::Dictionary(extgstate_dict));
2300 }
2301 }
2302
2303 if let Some(preserved_res) = page.get_preserved_resources() {
2306 let mut preserved_writer_dict = self.convert_pdf_objects_dict_to_writer(preserved_res);
2308
2309 if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2311 let renamed_fonts = crate::writer::rename_preserved_fonts(fonts);
2313
2314 preserved_writer_dict.set("Font", Object::Dictionary(renamed_fonts));
2316 }
2317
2318 if let Some(Object::Dictionary(fonts)) = preserved_writer_dict.get("Font") {
2322 let mut fonts_with_refs = crate::objects::Dictionary::new();
2323
2324 for (font_name, font_obj) in fonts.iter() {
2325 if let Object::Dictionary(font_dict) = font_obj {
2326 let updated_font = self.write_embedded_font_streams(font_dict)?;
2328 fonts_with_refs.set(font_name, Object::Dictionary(updated_font));
2329 } else {
2330 fonts_with_refs.set(font_name, font_obj.clone());
2332 }
2333 }
2334
2335 preserved_writer_dict.set("Font", Object::Dictionary(fonts_with_refs));
2337 }
2338
2339 for (key, value) in preserved_writer_dict.iter() {
2341 if let Some(Object::Dictionary(existing)) = resources.get(key) {
2343 if let Object::Dictionary(preserved_dict) = value {
2344 let mut merged = existing.clone();
2345 for (res_name, res_obj) in preserved_dict.iter() {
2347 if !merged.contains_key(res_name) {
2348 merged.set(res_name, res_obj.clone());
2349 }
2350 }
2351 resources.set(key, Object::Dictionary(merged));
2352 }
2353 } else {
2354 resources.set(key, value.clone());
2356 }
2357 }
2358 }
2359
2360 page_dict.set("Resources", Object::Dictionary(resources));
2361
2362 if let Some(Object::Array(annots)) = page_dict.get("Annots") {
2364 let mut new_annots = Vec::new();
2365
2366 for annot in annots {
2367 if let Object::Dictionary(ref annot_dict) = annot {
2368 if let Some(Object::Name(subtype)) = annot_dict.get("Subtype") {
2369 if subtype == "Widget" {
2370 let widget_id = self.allocate_object_id();
2372 self.write_object(widget_id, annot.clone())?;
2373 new_annots.push(Object::Reference(widget_id));
2374
2375 if let Some(Object::Name(_ft)) = annot_dict.get("FT") {
2377 if let Some(Object::String(field_name)) = annot_dict.get("T") {
2378 self.field_widget_map
2379 .entry(field_name.clone())
2380 .or_default()
2381 .push(widget_id);
2382 self.field_id_map.insert(field_name.clone(), widget_id);
2383 self.form_field_ids.push(widget_id);
2384 }
2385 }
2386 continue;
2387 }
2388 }
2389 }
2390 new_annots.push(annot.clone());
2391 }
2392
2393 if !new_annots.is_empty() {
2394 page_dict.set("Annots", Object::Array(new_annots));
2395 }
2396 }
2397
2398 self.write_object(page_id, Object::Dictionary(page_dict))?;
2399 Ok(())
2400 }
2401}
2402
2403impl PdfWriter<BufWriter<std::fs::File>> {
2404 pub fn new(path: impl AsRef<Path>) -> Result<Self> {
2405 let file = std::fs::File::create(path)?;
2406 let writer = BufWriter::new(file);
2407
2408 Ok(Self {
2409 writer,
2410 xref_positions: HashMap::new(),
2411 current_position: 0,
2412 next_object_id: 1,
2413 catalog_id: None,
2414 pages_id: None,
2415 info_id: None,
2416 field_widget_map: HashMap::new(),
2417 field_id_map: HashMap::new(),
2418 form_field_ids: Vec::new(),
2419 page_ids: Vec::new(),
2420 config: WriterConfig::default(),
2421 document_used_chars: None,
2422 buffered_objects: HashMap::new(),
2423 compressed_object_map: HashMap::new(),
2424 prev_xref_offset: None,
2425 base_pdf_size: None,
2426 })
2427 }
2428}
2429
2430impl<W: Write> PdfWriter<W> {
2431 fn write_embedded_font_streams(
2447 &mut self,
2448 font_dict: &crate::objects::Dictionary,
2449 ) -> Result<crate::objects::Dictionary> {
2450 let mut updated_font = font_dict.clone();
2451
2452 if let Some(Object::Name(subtype)) = font_dict.get("Subtype") {
2454 if subtype == "Type0" {
2455 if let Some(Object::Array(descendants)) = font_dict.get("DescendantFonts") {
2457 let mut updated_descendants = Vec::new();
2458
2459 for descendant in descendants {
2460 match descendant {
2461 Object::Dictionary(cidfont) => {
2462 let updated_cidfont =
2464 self.write_cidfont_embedded_streams(cidfont)?;
2465 let cidfont_id = self.allocate_object_id();
2467 self.write_object(cidfont_id, Object::Dictionary(updated_cidfont))?;
2468 updated_descendants.push(Object::Reference(cidfont_id));
2470 }
2471 Object::Reference(_) => {
2472 updated_descendants.push(descendant.clone());
2474 }
2475 _ => {
2476 updated_descendants.push(descendant.clone());
2477 }
2478 }
2479 }
2480
2481 updated_font.set("DescendantFonts", Object::Array(updated_descendants));
2482 }
2483
2484 if let Some(Object::Stream(stream_dict, stream_data)) = font_dict.get("ToUnicode") {
2486 let tounicode_id = self.allocate_object_id();
2487 self.write_object(
2488 tounicode_id,
2489 Object::Stream(stream_dict.clone(), stream_data.clone()),
2490 )?;
2491 updated_font.set("ToUnicode", Object::Reference(tounicode_id));
2492 }
2493
2494 return Ok(updated_font);
2495 }
2496 }
2497
2498 if let Some(Object::Dictionary(descriptor)) = font_dict.get("FontDescriptor") {
2501 let mut updated_descriptor = descriptor.clone();
2502 let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2503
2504 for key in &font_file_keys {
2506 if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2507 let stream_id = self.allocate_object_id();
2509 let stream_obj = Object::Stream(stream_dict.clone(), stream_data.clone());
2510 self.write_object(stream_id, stream_obj)?;
2511
2512 updated_descriptor.set(*key, Object::Reference(stream_id));
2514 }
2515 }
2517
2518 updated_font.set("FontDescriptor", Object::Dictionary(updated_descriptor));
2520 }
2521
2522 Ok(updated_font)
2523 }
2524
2525 fn write_cidfont_embedded_streams(
2527 &mut self,
2528 cidfont: &crate::objects::Dictionary,
2529 ) -> Result<crate::objects::Dictionary> {
2530 let mut updated_cidfont = cidfont.clone();
2531
2532 if let Some(Object::Dictionary(descriptor)) = cidfont.get("FontDescriptor") {
2534 let mut updated_descriptor = descriptor.clone();
2535 let font_file_keys = ["FontFile", "FontFile2", "FontFile3"];
2536
2537 for key in &font_file_keys {
2539 if let Some(Object::Stream(stream_dict, stream_data)) = descriptor.get(*key) {
2540 let stream_id = self.allocate_object_id();
2541 self.write_object(
2542 stream_id,
2543 Object::Stream(stream_dict.clone(), stream_data.clone()),
2544 )?;
2545 updated_descriptor.set(*key, Object::Reference(stream_id));
2546 }
2547 }
2548
2549 let descriptor_id = self.allocate_object_id();
2551 self.write_object(descriptor_id, Object::Dictionary(updated_descriptor))?;
2552
2553 updated_cidfont.set("FontDescriptor", Object::Reference(descriptor_id));
2555 }
2556
2557 if let Some(Object::Stream(map_dict, map_data)) = cidfont.get("CIDToGIDMap") {
2559 let map_id = self.allocate_object_id();
2560 self.write_object(map_id, Object::Stream(map_dict.clone(), map_data.clone()))?;
2561 updated_cidfont.set("CIDToGIDMap", Object::Reference(map_id));
2562 }
2563
2564 Ok(updated_cidfont)
2565 }
2566
2567 fn allocate_object_id(&mut self) -> ObjectId {
2568 let id = ObjectId::new(self.next_object_id, 0);
2569 self.next_object_id += 1;
2570 id
2571 }
2572
2573 fn get_catalog_id(&self) -> Result<ObjectId> {
2575 self.catalog_id.ok_or_else(|| {
2576 PdfError::InvalidOperation(
2577 "catalog_id not initialized - write_document() must be called first".to_string(),
2578 )
2579 })
2580 }
2581
2582 fn get_pages_id(&self) -> Result<ObjectId> {
2584 self.pages_id.ok_or_else(|| {
2585 PdfError::InvalidOperation(
2586 "pages_id not initialized - write_document() must be called first".to_string(),
2587 )
2588 })
2589 }
2590
2591 fn get_info_id(&self) -> Result<ObjectId> {
2593 self.info_id.ok_or_else(|| {
2594 PdfError::InvalidOperation(
2595 "info_id not initialized - write_document() must be called first".to_string(),
2596 )
2597 })
2598 }
2599
2600 fn write_object(&mut self, id: ObjectId, object: Object) -> Result<()> {
2601 use crate::writer::ObjectStreamWriter;
2602
2603 if self.config.use_object_streams && ObjectStreamWriter::can_compress(&object) {
2605 let mut buffer = Vec::new();
2606 self.write_object_value_to_buffer(&object, &mut buffer)?;
2607 self.buffered_objects.insert(id, buffer);
2608 return Ok(());
2609 }
2610
2611 self.xref_positions.insert(id, self.current_position);
2613
2614 let header = format!("{} {} obj\n", id.number(), id.generation());
2616 self.write_bytes(header.as_bytes())?;
2617
2618 self.write_object_value(&object)?;
2619
2620 self.write_bytes(b"\nendobj\n")?;
2621 Ok(())
2622 }
2623
2624 fn write_object_value(&mut self, object: &Object) -> Result<()> {
2625 match object {
2626 Object::Null => self.write_bytes(b"null")?,
2627 Object::Boolean(b) => self.write_bytes(if *b { b"true" } else { b"false" })?,
2628 Object::Integer(i) => self.write_bytes(i.to_string().as_bytes())?,
2629 Object::Real(f) => self.write_bytes(
2630 format!("{f:.6}")
2631 .trim_end_matches('0')
2632 .trim_end_matches('.')
2633 .as_bytes(),
2634 )?,
2635 Object::String(s) => {
2636 self.write_bytes(b"(")?;
2637 self.write_bytes(s.as_bytes())?;
2638 self.write_bytes(b")")?;
2639 }
2640 Object::Name(n) => {
2641 self.write_bytes(b"/")?;
2642 self.write_bytes(n.as_bytes())?;
2643 }
2644 Object::Array(arr) => {
2645 self.write_bytes(b"[")?;
2646 for (i, obj) in arr.iter().enumerate() {
2647 if i > 0 {
2648 self.write_bytes(b" ")?;
2649 }
2650 self.write_object_value(obj)?;
2651 }
2652 self.write_bytes(b"]")?;
2653 }
2654 Object::Dictionary(dict) => {
2655 self.write_bytes(b"<<")?;
2656 for (key, value) in dict.entries() {
2657 self.write_bytes(b"\n/")?;
2658 self.write_bytes(key.as_bytes())?;
2659 self.write_bytes(b" ")?;
2660 self.write_object_value(value)?;
2661 }
2662 self.write_bytes(b"\n>>")?;
2663 }
2664 Object::Stream(dict, data) => {
2665 let mut corrected_dict = dict.clone();
2668 corrected_dict.set("Length", Object::Integer(data.len() as i64));
2669
2670 self.write_object_value(&Object::Dictionary(corrected_dict))?;
2671 self.write_bytes(b"\nstream\n")?;
2672 self.write_bytes(data)?;
2673 self.write_bytes(b"\nendstream")?;
2674 }
2675 Object::Reference(id) => {
2676 let ref_str = format!("{} {} R", id.number(), id.generation());
2677 self.write_bytes(ref_str.as_bytes())?;
2678 }
2679 }
2680 Ok(())
2681 }
2682
2683 fn write_object_value_to_buffer(&self, object: &Object, buffer: &mut Vec<u8>) -> Result<()> {
2685 match object {
2686 Object::Null => buffer.extend_from_slice(b"null"),
2687 Object::Boolean(b) => buffer.extend_from_slice(if *b { b"true" } else { b"false" }),
2688 Object::Integer(i) => buffer.extend_from_slice(i.to_string().as_bytes()),
2689 Object::Real(f) => buffer.extend_from_slice(
2690 format!("{f:.6}")
2691 .trim_end_matches('0')
2692 .trim_end_matches('.')
2693 .as_bytes(),
2694 ),
2695 Object::String(s) => {
2696 buffer.push(b'(');
2697 buffer.extend_from_slice(s.as_bytes());
2698 buffer.push(b')');
2699 }
2700 Object::Name(n) => {
2701 buffer.push(b'/');
2702 buffer.extend_from_slice(n.as_bytes());
2703 }
2704 Object::Array(arr) => {
2705 buffer.push(b'[');
2706 for (i, obj) in arr.iter().enumerate() {
2707 if i > 0 {
2708 buffer.push(b' ');
2709 }
2710 self.write_object_value_to_buffer(obj, buffer)?;
2711 }
2712 buffer.push(b']');
2713 }
2714 Object::Dictionary(dict) => {
2715 buffer.extend_from_slice(b"<<");
2716 for (key, value) in dict.entries() {
2717 buffer.extend_from_slice(b"\n/");
2718 buffer.extend_from_slice(key.as_bytes());
2719 buffer.push(b' ');
2720 self.write_object_value_to_buffer(value, buffer)?;
2721 }
2722 buffer.extend_from_slice(b"\n>>");
2723 }
2724 Object::Stream(_, _) => {
2725 return Err(crate::error::PdfError::ObjectStreamError(
2727 "Cannot compress stream objects in object streams".to_string(),
2728 ));
2729 }
2730 Object::Reference(id) => {
2731 let ref_str = format!("{} {} R", id.number(), id.generation());
2732 buffer.extend_from_slice(ref_str.as_bytes());
2733 }
2734 }
2735 Ok(())
2736 }
2737
2738 fn flush_object_streams(&mut self) -> Result<()> {
2740 if self.buffered_objects.is_empty() {
2741 return Ok(());
2742 }
2743
2744 let config = ObjectStreamConfig {
2746 max_objects_per_stream: 100,
2747 compression_level: 6,
2748 enabled: true,
2749 };
2750 let mut os_writer = ObjectStreamWriter::new(config);
2751
2752 let mut buffered: Vec<_> = self.buffered_objects.iter().collect();
2754 buffered.sort_by_key(|(id, _)| id.number());
2755
2756 for (id, data) in buffered {
2758 os_writer.add_object(*id, data.clone())?;
2759 }
2760
2761 let streams = os_writer.finalize()?;
2763
2764 for mut stream in streams {
2766 let stream_id = stream.stream_id;
2767
2768 let compressed_data = stream.generate_stream_data(6)?;
2770
2771 let dict = stream.generate_dictionary(&compressed_data);
2773
2774 for (index, (obj_id, _)) in stream.objects.iter().enumerate() {
2776 self.compressed_object_map
2777 .insert(*obj_id, (stream_id, index as u32));
2778 }
2779
2780 self.xref_positions.insert(stream_id, self.current_position);
2782
2783 let header = format!("{} {} obj\n", stream_id.number(), stream_id.generation());
2784 self.write_bytes(header.as_bytes())?;
2785
2786 self.write_object_value(&Object::Dictionary(dict))?;
2787
2788 self.write_bytes(b"\nstream\n")?;
2789 self.write_bytes(&compressed_data)?;
2790 self.write_bytes(b"\nendstream\nendobj\n")?;
2791 }
2792
2793 Ok(())
2794 }
2795
2796 fn write_xref(&mut self) -> Result<()> {
2797 self.write_bytes(b"xref\n")?;
2798
2799 let mut entries: Vec<_> = self
2801 .xref_positions
2802 .iter()
2803 .map(|(id, pos)| (*id, *pos))
2804 .collect();
2805 entries.sort_by_key(|(id, _)| id.number());
2806
2807 let max_obj_num = entries.iter().map(|(id, _)| id.number()).max().unwrap_or(0);
2809
2810 self.write_bytes(b"0 ")?;
2813 self.write_bytes((max_obj_num + 1).to_string().as_bytes())?;
2814 self.write_bytes(b"\n")?;
2815
2816 self.write_bytes(b"0000000000 65535 f \n")?;
2818
2819 for obj_num in 1..=max_obj_num {
2822 let _obj_id = ObjectId::new(obj_num, 0);
2823 if let Some((_, position)) = entries.iter().find(|(id, _)| id.number() == obj_num) {
2824 let entry = format!("{:010} {:05} n \n", position, 0);
2825 self.write_bytes(entry.as_bytes())?;
2826 } else {
2827 self.write_bytes(b"0000000000 00000 f \n")?;
2829 }
2830 }
2831
2832 Ok(())
2833 }
2834
2835 fn write_xref_stream(&mut self) -> Result<()> {
2836 let catalog_id = self.get_catalog_id()?;
2837 let info_id = self.get_info_id()?;
2838
2839 let xref_stream_id = self.allocate_object_id();
2841 let xref_position = self.current_position;
2842
2843 let mut xref_writer = XRefStreamWriter::new(xref_stream_id);
2845 xref_writer.set_trailer_info(catalog_id, info_id);
2846
2847 xref_writer.add_free_entry(0, 65535);
2849
2850 let mut entries: Vec<_> = self
2852 .xref_positions
2853 .iter()
2854 .map(|(id, pos)| (*id, *pos))
2855 .collect();
2856 entries.sort_by_key(|(id, _)| id.number());
2857
2858 let max_obj_num = entries
2860 .iter()
2861 .map(|(id, _)| id.number())
2862 .max()
2863 .unwrap_or(0)
2864 .max(xref_stream_id.number());
2865
2866 for obj_num in 1..=max_obj_num {
2868 let obj_id = ObjectId::new(obj_num, 0);
2869
2870 if obj_num == xref_stream_id.number() {
2871 xref_writer.add_in_use_entry(xref_position, 0);
2873 } else if let Some((stream_id, index)) = self.compressed_object_map.get(&obj_id) {
2874 xref_writer.add_compressed_entry(stream_id.number(), *index);
2876 } else if let Some((id, position)) =
2877 entries.iter().find(|(id, _)| id.number() == obj_num)
2878 {
2879 xref_writer.add_in_use_entry(*position, id.generation());
2881 } else {
2882 xref_writer.add_free_entry(0, 0);
2884 }
2885 }
2886
2887 self.xref_positions.insert(xref_stream_id, xref_position);
2889
2890 self.write_bytes(
2892 format!(
2893 "{} {} obj\n",
2894 xref_stream_id.number(),
2895 xref_stream_id.generation()
2896 )
2897 .as_bytes(),
2898 )?;
2899
2900 let uncompressed_data = xref_writer.encode_entries();
2902 let final_data = if self.config.compress_streams {
2903 crate::compression::compress(&uncompressed_data)?
2904 } else {
2905 uncompressed_data
2906 };
2907
2908 let mut dict = xref_writer.create_dictionary(None);
2910 dict.set("Length", Object::Integer(final_data.len() as i64));
2911
2912 if self.config.compress_streams {
2914 dict.set("Filter", Object::Name("FlateDecode".to_string()));
2915 }
2916 self.write_bytes(b"<<")?;
2917 for (key, value) in dict.iter() {
2918 self.write_bytes(b"\n/")?;
2919 self.write_bytes(key.as_bytes())?;
2920 self.write_bytes(b" ")?;
2921 self.write_object_value(value)?;
2922 }
2923 self.write_bytes(b"\n>>\n")?;
2924
2925 self.write_bytes(b"stream\n")?;
2927 self.write_bytes(&final_data)?;
2928 self.write_bytes(b"\nendstream\n")?;
2929 self.write_bytes(b"endobj\n")?;
2930
2931 self.write_bytes(b"\nstartxref\n")?;
2933 self.write_bytes(xref_position.to_string().as_bytes())?;
2934 self.write_bytes(b"\n%%EOF\n")?;
2935
2936 Ok(())
2937 }
2938
2939 fn write_trailer(&mut self, xref_position: u64) -> Result<()> {
2940 let catalog_id = self.get_catalog_id()?;
2941 let info_id = self.get_info_id()?;
2942 let max_obj_num = self
2944 .xref_positions
2945 .keys()
2946 .map(|id| id.number())
2947 .max()
2948 .unwrap_or(0);
2949
2950 let mut trailer = Dictionary::new();
2951 trailer.set("Size", Object::Integer((max_obj_num + 1) as i64));
2952 trailer.set("Root", Object::Reference(catalog_id));
2953 trailer.set("Info", Object::Reference(info_id));
2954
2955 if let Some(prev_xref) = self.prev_xref_offset {
2957 trailer.set("Prev", Object::Integer(prev_xref as i64));
2958 }
2959
2960 self.write_bytes(b"trailer\n")?;
2961 self.write_object_value(&Object::Dictionary(trailer))?;
2962 self.write_bytes(b"\nstartxref\n")?;
2963 self.write_bytes(xref_position.to_string().as_bytes())?;
2964 self.write_bytes(b"\n%%EOF\n")?;
2965
2966 Ok(())
2967 }
2968
2969 fn write_bytes(&mut self, data: &[u8]) -> Result<()> {
2970 self.writer.write_all(data)?;
2971 self.current_position += data.len() as u64;
2972 Ok(())
2973 }
2974
2975 #[allow(dead_code)]
2976 fn create_widget_appearance_stream(&mut self, widget_dict: &Dictionary) -> Result<ObjectId> {
2977 let rect = if let Some(Object::Array(rect_array)) = widget_dict.get("Rect") {
2979 if rect_array.len() >= 4 {
2980 if let (
2981 Some(Object::Real(x1)),
2982 Some(Object::Real(y1)),
2983 Some(Object::Real(x2)),
2984 Some(Object::Real(y2)),
2985 ) = (
2986 rect_array.first(),
2987 rect_array.get(1),
2988 rect_array.get(2),
2989 rect_array.get(3),
2990 ) {
2991 (*x1, *y1, *x2, *y2)
2992 } else {
2993 (0.0, 0.0, 100.0, 20.0) }
2995 } else {
2996 (0.0, 0.0, 100.0, 20.0) }
2998 } else {
2999 (0.0, 0.0, 100.0, 20.0) };
3001
3002 let width = rect.2 - rect.0;
3003 let height = rect.3 - rect.1;
3004
3005 let mut content = String::new();
3007
3008 content.push_str("q\n");
3010
3011 content.push_str("0 0 0 RG\n"); content.push_str("1 w\n"); content.push_str(&format!("0 0 {width} {height} re\n"));
3017 content.push_str("S\n"); content.push_str("1 1 1 rg\n"); content.push_str(&format!("0.5 0.5 {} {} re\n", width - 1.0, height - 1.0));
3022 content.push_str("f\n"); content.push_str("Q\n");
3026
3027 let mut stream_dict = Dictionary::new();
3029 stream_dict.set("Type", Object::Name("XObject".to_string()));
3030 stream_dict.set("Subtype", Object::Name("Form".to_string()));
3031 stream_dict.set(
3032 "BBox",
3033 Object::Array(vec![
3034 Object::Real(0.0),
3035 Object::Real(0.0),
3036 Object::Real(width),
3037 Object::Real(height),
3038 ]),
3039 );
3040 stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3041 stream_dict.set("Length", Object::Integer(content.len() as i64));
3042
3043 let stream_id = self.allocate_object_id();
3045 self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3046
3047 Ok(stream_id)
3048 }
3049
3050 #[allow(dead_code)]
3051 fn create_field_appearance_stream(
3052 &mut self,
3053 field_dict: &Dictionary,
3054 widget: &crate::forms::Widget,
3055 ) -> Result<ObjectId> {
3056 let width = widget.rect.upper_right.x - widget.rect.lower_left.x;
3057 let height = widget.rect.upper_right.y - widget.rect.lower_left.y;
3058
3059 let mut content = String::new();
3061
3062 content.push_str("q\n");
3064
3065 if let Some(bg_color) = &widget.appearance.background_color {
3067 match bg_color {
3068 crate::graphics::Color::Gray(g) => {
3069 content.push_str(&format!("{g} g\n"));
3070 }
3071 crate::graphics::Color::Rgb(r, g, b) => {
3072 content.push_str(&format!("{r} {g} {b} rg\n"));
3073 }
3074 crate::graphics::Color::Cmyk(c, m, y, k) => {
3075 content.push_str(&format!("{c} {m} {y} {k} k\n"));
3076 }
3077 }
3078 content.push_str(&format!("0 0 {width} {height} re\n"));
3079 content.push_str("f\n");
3080 }
3081
3082 if let Some(border_color) = &widget.appearance.border_color {
3084 match border_color {
3085 crate::graphics::Color::Gray(g) => {
3086 content.push_str(&format!("{g} G\n"));
3087 }
3088 crate::graphics::Color::Rgb(r, g, b) => {
3089 content.push_str(&format!("{r} {g} {b} RG\n"));
3090 }
3091 crate::graphics::Color::Cmyk(c, m, y, k) => {
3092 content.push_str(&format!("{c} {m} {y} {k} K\n"));
3093 }
3094 }
3095 content.push_str(&format!("{} w\n", widget.appearance.border_width));
3096 content.push_str(&format!("0 0 {width} {height} re\n"));
3097 content.push_str("S\n");
3098 }
3099
3100 if let Some(Object::Name(ft)) = field_dict.get("FT") {
3102 if ft == "Btn" {
3103 if let Some(Object::Name(v)) = field_dict.get("V") {
3104 if v == "Yes" {
3105 content.push_str("0 0 0 RG\n"); content.push_str("2 w\n");
3108 let margin = width * 0.2;
3109 content.push_str(&format!("{} {} m\n", margin, height / 2.0));
3110 content.push_str(&format!("{} {} l\n", width / 2.0, margin));
3111 content.push_str(&format!("{} {} l\n", width - margin, height - margin));
3112 content.push_str("S\n");
3113 }
3114 }
3115 }
3116 }
3117
3118 content.push_str("Q\n");
3120
3121 let mut stream_dict = Dictionary::new();
3123 stream_dict.set("Type", Object::Name("XObject".to_string()));
3124 stream_dict.set("Subtype", Object::Name("Form".to_string()));
3125 stream_dict.set(
3126 "BBox",
3127 Object::Array(vec![
3128 Object::Real(0.0),
3129 Object::Real(0.0),
3130 Object::Real(width),
3131 Object::Real(height),
3132 ]),
3133 );
3134 stream_dict.set("Resources", Object::Dictionary(Dictionary::new()));
3135 stream_dict.set("Length", Object::Integer(content.len() as i64));
3136
3137 let stream_id = self.allocate_object_id();
3139 self.write_object(stream_id, Object::Stream(stream_dict, content.into_bytes()))?;
3140
3141 Ok(stream_id)
3142 }
3143}
3144
3145fn format_pdf_date(date: DateTime<Utc>) -> String {
3147 let formatted = date.format("D:%Y%m%d%H%M%S");
3150
3151 format!("{formatted}+00'00")
3153}
3154
3155#[cfg(test)]
3156mod tests;
3157
3158#[cfg(test)]
3159mod rigorous_tests;