use std::fs::File;
use std::io::{BufWriter, Result, Write};
use std::path::Path;
use std::vec;
use super::Object::*;
use super::{Dictionary, Document, Object, Stream, StringFormat};
use crate::{xref::*, IncrementalDocument};
impl Document {
#[inline]
pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<File> {
let mut file = BufWriter::new(File::create(path)?);
self.save_internal(&mut file)?;
Ok(file.into_inner()?)
}
#[inline]
pub fn save_to<W: Write>(&mut self, target: &mut W) -> Result<()> {
self.save_internal(target)
}
pub fn save_with_options<W: Write>(&mut self, target: &mut W, options: crate::SaveOptions) -> Result<()> {
if options.use_object_streams {
self.save_with_object_streams(target, options)
} else {
self.save_internal(target)
}
}
pub fn save_modern<W: Write>(&mut self, target: &mut W) -> Result<()> {
let options = crate::SaveOptions {
use_object_streams: true,
use_xref_streams: true,
..Default::default()
};
self.save_with_options(target, options)
}
fn save_internal<W: Write>(&mut self, target: &mut W) -> Result<()> {
let mut target = CountingWrite {
inner: target,
bytes_written: 0,
};
let mut xref = Xref::new(self.max_id + 1, self.reference_table.cross_reference_type);
writeln!(target, "%PDF-{}", self.version)?;
Writer::write_binary_mark(&mut target, &self.binary_mark)?;
for (&(id, generation), object) in &self.objects {
if object
.type_name()
.map(|name| [b"ObjStm".as_slice(), b"XRef".as_slice(), b"Linearized".as_slice()].contains(&name))
.ok()
!= Some(true)
{
Writer::write_indirect_object(&mut target, id, generation, object, &mut xref)?;
}
}
let xref_start = target.bytes_written;
match xref.cross_reference_type {
XrefType::CrossReferenceTable => {
Writer::write_xref(&mut target, &xref)?;
self.write_trailer(&mut target)?;
}
XrefType::CrossReferenceStream => {
self.write_cross_reference_stream(&mut target, &mut xref, xref_start as u32)?;
}
}
write!(target, "\nstartxref\n{xref_start}\n%%EOF")?;
Ok(())
}
fn save_with_object_streams<W: Write>(&mut self, target: &mut W, options: crate::SaveOptions) -> Result<()> {
use crate::ObjectStream;
use std::collections::HashMap;
let mut target = CountingWrite {
inner: target,
bytes_written: 0,
};
if self.version.as_str() < "1.5" {
self.version = "1.5".to_string();
}
if options.use_xref_streams {
self.reference_table.cross_reference_type = XrefType::CrossReferenceStream;
}
let mut xref = Xref::new(self.max_id + 1, self.reference_table.cross_reference_type);
writeln!(target, "%PDF-{}", self.version)?;
Writer::write_binary_mark(&mut target, &self.binary_mark)?;
let mut object_streams: Vec<crate::ObjectStream> = Vec::new();
let mut objects_to_write_directly = Vec::new();
let mut object_to_stream_map = HashMap::new();
for (&(id, generation), object) in &self.objects {
if let Object::Stream(stream) = object {
if let Ok(type_obj) = stream.dict.get(b"Type") {
if let Ok(type_name) = type_obj.as_name() {
if type_name == b"ObjStm" {
continue; }
}
}
}
if generation == 0 && ObjectStream::can_be_compressed((id, generation), object, self) {
let stream_index = object_streams.len().saturating_sub(1);
if object_streams.is_empty() ||
object_streams[stream_index].object_count() >= options.object_stream_config.max_objects_per_stream {
let new_stream = ObjectStream::builder()
.max_objects(options.object_stream_config.max_objects_per_stream)
.compression_level(options.object_stream_config.compression_level)
.build();
object_streams.push(new_stream);
}
let stream_index = object_streams.len() - 1;
object_streams[stream_index].add_object((id, generation), object.clone()).ok();
object_to_stream_map.insert((id, generation), stream_index);
} else {
objects_to_write_directly.push(((id, generation), object));
}
}
for ((id, generation), object) in objects_to_write_directly {
Writer::write_indirect_object(&mut target, id, generation, object, &mut xref)?;
}
let mut stream_count = 0;
for obj_stream in object_streams.into_iter() {
let stream_id = self.max_id + 1 + stream_count;
let stream_obj = obj_stream.to_stream_object().map_err(std::io::Error::other)?;
let mut sorted_objects: Vec<_> = obj_stream.objects.keys().cloned().collect();
sorted_objects.sort_by_key(|id| *id);
for (index_in_stream, (obj_id, _gen)) in sorted_objects.iter().enumerate() {
xref.insert(*obj_id, XrefEntry::Compressed {
container: stream_id,
index: index_in_stream as u16,
});
}
Writer::write_indirect_object(&mut target, stream_id, 0, &Object::Stream(stream_obj), &mut xref)?;
stream_count += 1;
}
self.max_id += stream_count;
let xref_start = target.bytes_written;
match xref.cross_reference_type {
XrefType::CrossReferenceTable => {
Writer::write_xref(&mut target, &xref)?;
self.write_trailer(&mut target)?;
}
XrefType::CrossReferenceStream => {
self.write_cross_reference_stream(&mut target, &mut xref, xref_start as u32)?;
}
}
write!(target, "\nstartxref\n{xref_start}\n%%EOF")?;
Ok(())
}
fn write_cross_reference_stream<W: Write>(
&mut self, file: &mut CountingWrite<&mut W>, xref: &mut Xref, xref_start: u32,
) -> Result<()> {
self.max_id += 1;
let new_obj_id_for_crs = self.max_id;
xref.insert(
new_obj_id_for_crs,
XrefEntry::Normal {
offset: xref_start,
generation: 0,
},
);
self.trailer.set("Type", Name(b"XRef".to_vec()));
self.trailer.set("Size", i64::from(self.max_id + 1));
self.trailer.set("W", Array(vec![Integer(1), Integer(4), Integer(2)]));
let filter = XRefStreamFilter::None;
let (stream, stream_length, indexes) = Writer::create_xref_steam(xref, filter)?;
self.trailer.set("Index", indexes);
if filter == XRefStreamFilter::ASCIIHexDecode {
self.trailer.set("Filter", Name(b"ASCIIHexDecode".to_vec()));
} else {
self.trailer.remove(b"Filter");
}
self.trailer.set("Length", stream_length as i64);
let trailer = &self.trailer;
let cross_reference_stream = Stream(Stream {
dict: trailer.clone(),
allows_compression: true,
content: stream,
start_position: None,
});
Writer::write_indirect_object(file, new_obj_id_for_crs, 0, &cross_reference_stream, xref)?;
Ok(())
}
fn write_trailer(&mut self, file: &mut dyn Write) -> Result<()> {
self.trailer.set("Size", i64::from(self.max_id + 1));
file.write_all(b"trailer\n")?;
Writer::write_dictionary(file, &self.trailer)?;
Ok(())
}
}
impl IncrementalDocument {
#[inline]
pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<File> {
let mut file = BufWriter::new(File::create(path)?);
self.save_internal(&mut file)?;
Ok(file.into_inner()?)
}
#[inline]
pub fn save_to<W: Write>(&mut self, target: &mut W) -> Result<()> {
self.save_internal(target)
}
fn save_internal<W: Write>(&mut self, target: &mut W) -> Result<()> {
let mut target = CountingWrite {
inner: target,
bytes_written: 0,
};
let prev_document_bytes = self.get_prev_documents_bytes();
target.inner.write_all(prev_document_bytes)?;
target.bytes_written += prev_document_bytes.len();
let mut xref = Xref::new(
self.new_document.max_id + 1,
self.get_prev_documents().reference_table.cross_reference_type,
);
if let Some(last_byte) = prev_document_bytes.last() {
if *last_byte != b'\n' {
writeln!(target)?;
}
}
writeln!(target, "%PDF-{}", self.new_document.version)?;
Writer::write_binary_mark(&mut target, &self.new_document.binary_mark)?;
for (&(id, generation), object) in &self.new_document.objects {
if object
.type_name()
.map(|name| [b"ObjStm".as_slice(), b"XRef".as_slice(), b"Linearized".as_slice()].contains(&name))
.ok()
!= Some(true)
{
Writer::write_indirect_object(&mut target, id, generation, object, &mut xref)?;
}
}
let xref_start = target.bytes_written;
match xref.cross_reference_type {
XrefType::CrossReferenceTable => {
Writer::write_xref(&mut target, &xref)?;
self.new_document.write_trailer(&mut target)?;
}
XrefType::CrossReferenceStream => {
self.new_document
.write_cross_reference_stream(&mut target, &mut xref, xref_start as u32)?;
}
}
write!(target, "\nstartxref\n{xref_start}\n%%EOF")?;
Ok(())
}
}
pub struct Writer;
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum XRefStreamFilter {
ASCIIHexDecode,
_FlateDecode, None,
}
impl Writer {
fn need_separator(object: &Object) -> bool {
matches!(*object, Null | Boolean(_) | Integer(_) | Real(_) | Reference(_))
}
fn need_end_separator(object: &Object) -> bool {
matches!(
*object,
Null | Boolean(_) | Integer(_) | Real(_) | Name(_) | Reference(_) | Object::Stream(_)
)
}
fn write_xref(file: &mut dyn Write, xref: &Xref) -> Result<()> {
writeln!(file, "xref")?;
let mut xref_section = XrefSection::new(0);
xref_section.add_unusable_free_entry();
for obj_id in 1..xref.size {
if xref_section.is_empty() {
xref_section = XrefSection::new(obj_id);
}
if let Some(entry) = xref.get(obj_id) {
match *entry {
XrefEntry::Normal { offset, generation } => {
xref_section.add_entry(XrefEntry::Normal { offset, generation });
}
XrefEntry::Compressed { container: _, index: _ } => {
xref_section.add_unusable_free_entry();
}
XrefEntry::Free => {
xref_section.add_entry(XrefEntry::Free);
}
XrefEntry::UnusableFree => {
xref_section.add_unusable_free_entry();
}
}
} else {
if !xref_section.is_empty() {
xref_section.write_xref_section(file)?;
xref_section = XrefSection::new(obj_id);
}
}
}
if !xref_section.is_empty() {
xref_section.write_xref_section(file)?;
}
Ok(())
}
fn create_xref_steam(xref: &Xref, filter: XRefStreamFilter) -> Result<(Vec<u8>, usize, Object)> {
let mut xref_sections = Vec::new();
let mut xref_section = XrefSection::new(0);
for obj_id in 1..xref.size + 1 {
if xref_section.is_empty() {
xref_section = XrefSection::new(obj_id);
}
if let Some(entry) = xref.get(obj_id) {
xref_section.add_entry(entry.clone());
} else {
if !xref_section.is_empty() {
xref_sections.push(xref_section);
xref_section = XrefSection::new(obj_id);
}
}
}
if !xref_section.is_empty() {
xref_sections.push(xref_section);
}
let mut xref_stream = Vec::new();
let mut xref_index = Vec::new();
for section in xref_sections {
xref_index.push(Integer(section.starting_id as i64));
xref_index.push(Integer(section.entries.len() as i64));
let mut obj_id = section.starting_id;
for entry in section.entries {
match entry {
XrefEntry::Free => {
xref_stream.push(0);
xref_stream.extend(obj_id.to_be_bytes());
xref_stream.extend(vec![0, 0]); }
XrefEntry::UnusableFree => {
xref_stream.push(0);
xref_stream.extend(obj_id.to_be_bytes());
xref_stream.extend(65535_u16.to_be_bytes());
}
XrefEntry::Normal { offset, generation } => {
xref_stream.push(1);
xref_stream.extend(offset.to_be_bytes());
xref_stream.extend(generation.to_be_bytes());
}
XrefEntry::Compressed { container, index } => {
xref_stream.push(2);
xref_stream.extend(container.to_be_bytes());
xref_stream.extend(index.to_be_bytes());
}
}
obj_id += 1;
}
}
let stream_length = xref_stream.len();
if filter == XRefStreamFilter::ASCIIHexDecode {
xref_stream = xref_stream
.iter()
.flat_map(|c| format!("{c:02X}").as_bytes().to_vec())
.collect::<Vec<u8>>();
}
Ok((xref_stream, stream_length, Array(xref_index)))
}
fn write_indirect_object<W: Write>(
file: &mut CountingWrite<&mut W>, id: u32, generation: u16, object: &Object, xref: &mut Xref,
) -> Result<()> {
let offset = file.bytes_written as u32;
xref.insert(id, XrefEntry::Normal { offset, generation });
write!(
file,
"{} {} obj\n{}",
id,
generation,
if Writer::need_separator(object) { " " } else { "" }
)?;
Writer::write_object(file, object)?;
writeln!(
file,
"{}\nendobj",
if Writer::need_end_separator(object) { " " } else { "" }
)?;
Ok(())
}
pub fn write_object(file: &mut dyn Write, object: &Object) -> Result<()> {
match object {
Null => file.write_all(b"null"),
Boolean(value) => {
if *value {
file.write_all(b"true")
} else {
file.write_all(b"false")
}
}
Integer(value) => {
let mut buf = itoa::Buffer::new();
file.write_all(buf.format(*value).as_bytes())
}
Real(value) => write!(file, "{value}"),
Name(name) => Writer::write_name(file, name),
String(text, format) => Writer::write_string(file, text, format),
Array(array) => Writer::write_array(file, array),
Object::Dictionary(dict) => Writer::write_dictionary(file, dict),
Object::Stream(stream) => Writer::write_stream(file, stream),
Reference(id) => write!(file, "{} {} R", id.0, id.1),
}
}
fn write_name(file: &mut dyn Write, name: &[u8]) -> Result<()> {
file.write_all(b"/")?;
for &byte in name {
if b" \t\n\r\x0C()<>[]{}/%#".contains(&byte) || !(33..=126).contains(&byte) {
write!(file, "#{byte:02X}")?;
} else {
file.write_all(&[byte])?;
}
}
Ok(())
}
fn write_string(file: &mut dyn Write, text: &[u8], format: &StringFormat) -> Result<()> {
match *format {
StringFormat::Literal => {
let mut escape_indice = Vec::new();
let mut parentheses = Vec::new();
for (index, &byte) in text.iter().enumerate() {
match byte {
b'(' => parentheses.push(index),
b')' => {
if !parentheses.is_empty() {
parentheses.pop();
} else {
escape_indice.push(index);
}
}
b'\\' | b'\r' => escape_indice.push(index),
_ => continue,
}
}
escape_indice.append(&mut parentheses);
file.write_all(b"(")?;
if !escape_indice.is_empty() {
for (index, &byte) in text.iter().enumerate() {
if escape_indice.contains(&index) {
file.write_all(b"\\")?;
file.write_all(&[if byte == b'\r' { b'r' } else { byte }])?;
} else {
file.write_all(&[byte])?;
}
}
} else {
file.write_all(text)?;
}
file.write_all(b")")?;
}
StringFormat::Hexadecimal => {
file.write_all(b"<")?;
for &byte in text {
write!(file, "{byte:02X}")?;
}
file.write_all(b">")?;
}
}
Ok(())
}
fn write_array(file: &mut dyn Write, array: &[Object]) -> Result<()> {
file.write_all(b"[")?;
let mut first = true;
for object in array {
if first {
first = false;
} else if Writer::need_separator(object) {
file.write_all(b" ")?;
}
Writer::write_object(file, object)?;
}
file.write_all(b"]")?;
Ok(())
}
fn write_dictionary(file: &mut dyn Write, dictionary: &Dictionary) -> Result<()> {
file.write_all(b"<<")?;
for (key, value) in dictionary {
Writer::write_name(file, key)?;
if Writer::need_separator(value) {
file.write_all(b" ")?;
}
Writer::write_object(file, value)?;
}
file.write_all(b">>")?;
Ok(())
}
fn write_stream(file: &mut dyn Write, stream: &Stream) -> Result<()> {
Writer::write_dictionary(file, &stream.dict)?;
file.write_all(b"stream\n")?;
file.write_all(&stream.content)?;
file.write_all(b"\nendstream")?;
Ok(())
}
fn write_binary_mark(file: &mut dyn Write, binary_mark: &[u8]) -> Result<()> {
if binary_mark.iter().all(|&byte| byte >= 128) {
file.write_all(b"%")?;
file.write_all(binary_mark)?;
file.write_all(b"\n")?;
} else {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Invalid binary mark",
));
}
Ok(())
}
}
pub struct CountingWrite<W: Write> {
inner: W,
bytes_written: usize,
}
impl<W: Write> Write for CountingWrite<W> {
#[inline]
fn write(&mut self, buffer: &[u8]) -> Result<usize> {
let result = self.inner.write(buffer);
if let Ok(bytes) = result {
self.bytes_written += bytes;
}
result
}
#[inline]
fn write_all(&mut self, buffer: &[u8]) -> Result<()> {
self.bytes_written += buffer.len();
self.inner.write_all(buffer)
}
#[inline]
fn flush(&mut self) -> Result<()> {
self.inner.flush()
}
}
#[test]
fn save_document() {
let mut doc = Document::with_version("1.5");
doc.objects.insert((1, 0), Null);
doc.objects.insert((2, 0), Boolean(true));
doc.objects.insert((3, 0), Integer(3));
doc.objects.insert((4, 0), Real(0.5));
doc.objects
.insert((5, 0), String("text((\r)".as_bytes().to_vec(), StringFormat::Literal));
doc.objects.insert(
(6, 0),
String("text((\r)".as_bytes().to_vec(), StringFormat::Hexadecimal),
);
doc.objects.insert((7, 0), Name(b"name \t".to_vec()));
doc.objects.insert((8, 0), Reference((1, 0)));
doc.objects
.insert((9, 2), Array(vec![Integer(1), Integer(2), Integer(3)]));
doc.objects
.insert((11, 0), Stream(Stream::new(Dictionary::new(), vec![0x41, 0x42, 0x43])));
let mut dict = Dictionary::new();
dict.set("A", Null);
dict.set("B", false);
dict.set("C", Name(b"name".to_vec()));
doc.objects.insert((12, 0), Object::Dictionary(dict));
doc.max_id = 12;
let temp_dir = tempfile::tempdir().unwrap();
let file_path = temp_dir.path().join("test_0_save.pdf");
doc.save(&file_path).unwrap();
assert!(file_path.exists());
assert!(file_path.is_file());
assert!(file_path.metadata().unwrap().len() > 400);
}