use crate::ffi::Handle;
use std::collections::{HashMap, HashSet};
use std::ffi::{CStr, CString, c_char};
use std::io::{Read, Write};
use std::ptr;
type ContextHandle = Handle;
type DocumentHandle = Handle;
type OutputHandle = Handle;
fn find_pattern(data: &[u8], pattern: &[u8]) -> Option<usize> {
if pattern.is_empty() || data.len() < pattern.len() {
return None;
}
(0..=data.len() - pattern.len()).find(|&i| &data[i..i + pattern.len()] == pattern)
}
fn find_all_patterns(data: &[u8], pattern: &[u8]) -> Vec<usize> {
let mut positions = Vec::new();
if pattern.is_empty() || data.len() < pattern.len() {
return positions;
}
for i in 0..=data.len() - pattern.len() {
if &data[i..i + pattern.len()] == pattern {
positions.push(i);
}
}
positions
}
fn rfind_pattern(data: &[u8], pattern: &[u8]) -> Option<usize> {
if pattern.is_empty() || data.len() < pattern.len() {
return None;
}
(0..=data.len() - pattern.len())
.rev()
.find(|&i| &data[i..i + pattern.len()] == pattern)
}
fn extract_int_after(data: &[u8], pos: usize) -> Option<i32> {
let mut i = pos;
while i < data.len() && data[i].is_ascii_whitespace() {
i += 1;
}
let negative = if i < data.len() && data[i] == b'-' {
i += 1;
true
} else {
false
};
let start = i;
while i < data.len() && data[i].is_ascii_digit() {
i += 1;
}
if i > start {
if let Ok(s) = std::str::from_utf8(&data[start..i]) {
if let Ok(n) = s.parse::<i32>() {
return Some(if negative { -n } else { n });
}
}
}
None
}
fn find_dict_end(data: &[u8], start: usize) -> Option<usize> {
if start + 1 >= data.len() || data[start] != b'<' || data[start + 1] != b'<' {
return None;
}
let mut depth = 0i32;
let mut i = start;
while i + 1 < data.len() {
if data[i] == b'<' && data[i + 1] == b'<' {
depth += 1;
i += 2;
} else if data[i] == b'>' && data[i + 1] == b'>' {
depth -= 1;
if depth == 0 {
return Some(i);
}
i += 2;
} else {
i += 1;
}
}
None
}
fn find_trailer_region(data: &[u8]) -> Option<(usize, usize)> {
let trailer_pos = rfind_pattern(data, b"trailer")?;
let after = &data[trailer_pos..];
let dict_start_rel = find_pattern(after, b"<<")?;
let dict_start = trailer_pos + dict_start_rel;
let dict_end = find_dict_end(data, dict_start)?;
Some((dict_start, dict_end + 2))
}
fn find_dict_key(data: &[u8], region_start: usize, region_end: usize, key: &[u8]) -> Option<usize> {
let end = region_end.min(data.len());
if region_start >= end {
return None;
}
let region = &data[region_start..end];
find_pattern(region, key).map(|pos| region_start + pos + key.len())
}
fn resolve_indirect_ref(data: &[u8], pos: usize) -> Option<i32> {
extract_int_after(data, pos)
}
fn find_object_dict(data: &[u8], obj_num: i32) -> Option<(usize, usize)> {
let pattern = format!("{} 0 obj", obj_num);
let pat_bytes = pattern.as_bytes();
let positions = find_all_patterns(data, pat_bytes);
for &pos in positions.iter().rev() {
let after = &data[pos..];
if let Some(dict_rel) = find_pattern(after, b"<<") {
let dict_start = pos + dict_rel;
if let Some(dict_end) = find_dict_end(data, dict_start) {
return Some((dict_start, dict_end + 2));
}
}
}
None
}
fn find_root_obj_num(data: &[u8]) -> Option<i32> {
let (ts, te) = find_trailer_region(data)?;
let kp = find_dict_key(data, ts, te, b"/Root")?;
resolve_indirect_ref(data, kp)
}
fn find_page_obj_position(data: &[u8], page_num: i32) -> Option<usize> {
let pattern = b"/Type /Page";
let mut found = 0i32;
let mut i = 0;
while i + pattern.len() <= data.len() {
if &data[i..i + pattern.len()] == pattern && data.get(i + pattern.len()) != Some(&b's') {
if found == page_num {
let search_start = i.saturating_sub(500);
let before = &data[search_start..i];
if let Some(obj_rel) = rfind_pattern(before, b" obj") {
let obj_keyword_end = search_start + obj_rel;
let line_start = data[..obj_keyword_end]
.iter()
.rposition(|&b| b == b'\n' || b == b'\r')
.map(|p| p + 1)
.unwrap_or(0);
return Some(line_start);
}
}
found += 1;
}
i += 1;
}
None
}
fn find_object_range(data: &[u8], obj_num: i32) -> Option<(usize, usize)> {
let pattern = format!("{} 0 obj", obj_num);
let pat_bytes = pattern.as_bytes();
let positions = find_all_patterns(data, pat_bytes);
for &pos in positions.iter().rev() {
let after = &data[pos..];
if let Some(end_rel) = find_pattern(after, b"endobj") {
let obj_end = pos + end_rel + b"endobj".len();
return Some((pos, obj_end));
}
}
None
}
fn extract_stream_data(data: &[u8], obj_num: i32) -> Option<(Vec<u8>, usize, usize)> {
let pattern = format!("{} 0 obj", obj_num);
let pat_bytes = pattern.as_bytes();
if let Some(pos) = find_pattern(data, pat_bytes) {
let after = &data[pos..];
if let Some(stream_rel) = find_pattern(after, b"stream") {
let mut stream_start = pos + stream_rel + b"stream".len();
if stream_start < data.len() && data[stream_start] == b'\r' {
stream_start += 1;
}
if stream_start < data.len() && data[stream_start] == b'\n' {
stream_start += 1;
}
if let Some(endstream_rel) = find_pattern(&data[stream_start..], b"endstream") {
let mut stream_end = stream_start + endstream_rel;
if stream_end > stream_start && data[stream_end - 1] == b'\n' {
stream_end -= 1;
}
if stream_end > stream_start && data[stream_end - 1] == b'\r' {
stream_end -= 1;
}
let stream_data = data[stream_start..stream_end].to_vec();
return Some((stream_data, stream_start, stream_end));
}
}
}
None
}
fn collect_all_object_numbers(data: &[u8]) -> Vec<i32> {
let content = String::from_utf8_lossy(data);
let mut objects = Vec::new();
for line in content.lines() {
let trimmed = line.trim();
if let Some(pos) = trimmed.find(" 0 obj") {
if let Ok(num) = trimmed[..pos].trim().parse::<i32>() {
if num > 0 {
objects.push(num);
}
}
}
}
objects.sort();
objects.dedup();
objects
}
fn collect_referenced_objects(data: &[u8]) -> HashSet<i32> {
let mut referenced = HashSet::new();
let content = String::from_utf8_lossy(data);
if let Some(root_num) = find_root_obj_num(data) {
referenced.insert(root_num);
}
if let Some((ts, te)) = find_trailer_region(data) {
if let Some(kp) = find_dict_key(data, ts, te, b"/Info") {
if let Some(info_num) = resolve_indirect_ref(data, kp) {
referenced.insert(info_num);
}
}
if let Some(kp) = find_dict_key(data, ts, te, b"/Encrypt") {
if let Some(enc_num) = resolve_indirect_ref(data, kp) {
referenced.insert(enc_num);
}
}
}
let parts: Vec<&str> = content.split_whitespace().collect();
for window in parts.windows(3) {
if window[1] == "0" && window[2] == "R" {
if let Ok(obj_num) = window[0].parse::<i32>() {
if obj_num > 0 {
referenced.insert(obj_num);
}
}
}
}
referenced
}
fn object_has_filter(data: &[u8], obj_num: i32) -> bool {
if let Some((ds, de)) = find_object_dict(data, obj_num) {
find_dict_key(data, ds, de, b"/Filter").is_some()
} else {
false
}
}
fn object_has_stream(data: &[u8], obj_num: i32) -> bool {
let pattern = format!("{} 0 obj", obj_num);
if let Some(pos) = find_pattern(data, pattern.as_bytes()) {
let after = &data[pos..];
if let Some(endobj_rel) = find_pattern(after, b"endobj") {
let region = &after[..endobj_rel];
return find_pattern(region, b"stream").is_some();
}
}
false
}
fn rebuild_pdf_xref(data: &[u8]) -> Vec<u8> {
let objects = collect_all_object_numbers(data);
let mut obj_offsets: Vec<(i32, usize)> = Vec::new();
for &obj_num in &objects {
let pattern = format!("{} 0 obj", obj_num);
if let Some(pos) = find_pattern(data, pattern.as_bytes()) {
obj_offsets.push((obj_num, pos));
}
}
obj_offsets.sort_by_key(|&(num, _)| num);
let mut content_end = 0;
for &obj_num in &objects {
if let Some((_, end)) = find_object_range(data, obj_num) {
if end > content_end {
content_end = end;
}
}
}
let header_end = find_pattern(data, b"\n").map(|p| p + 1).unwrap_or(0);
let mut output = Vec::new();
output.extend_from_slice(&data[..content_end]);
if !output.is_empty() && *output.last().unwrap() != b'\n' {
output.push(b'\n');
}
let xref_start = output.len();
let max_obj = obj_offsets.iter().map(|&(n, _)| n).max().unwrap_or(0) as usize;
output.extend_from_slice(b"xref\n");
output.extend_from_slice(format!("0 {}\n", max_obj + 1).as_bytes());
output.extend_from_slice(b"0000000000 65535 f \n");
let offset_map: HashMap<i32, usize> = obj_offsets.iter().copied().collect();
for i in 1..=max_obj {
if let Some(&offset) = offset_map.get(&(i as i32)) {
output.extend_from_slice(format!("{:010} 00000 n \n", offset).as_bytes());
} else {
output.extend_from_slice(b"0000000000 00000 f \n");
}
}
output.extend_from_slice(b"trailer\n");
if let Some((ts, te)) = find_trailer_region(data) {
output.extend_from_slice(&data[ts..te]);
} else {
let size = max_obj + 1;
output.extend_from_slice(format!("<< /Size {} >>\n", size).as_bytes());
}
output.push(b'\n');
output.extend_from_slice(format!("startxref\n{}\n%%EOF\n", xref_start).as_bytes());
let _ = header_end;
output
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum CleanStructureOption {
#[default]
Drop = 0,
Keep = 1,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum CleanVectorizeOption {
#[default]
No = 0,
Yes = 1,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum EncryptionMethod {
#[default]
Keep = 0,
None = 1,
Rc4_40 = 2,
Rc4_128 = 3,
Aes128 = 4,
Aes256 = 5,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum CompressionMethod {
#[default]
None = 0,
Zlib = 1,
Brotli = 2,
}
#[derive(Debug, Clone)]
#[repr(C)]
pub struct WriteOptions {
pub do_incremental: i32,
pub do_pretty: i32,
pub do_ascii: i32,
pub do_compress: i32,
pub do_compress_images: i32,
pub do_compress_fonts: i32,
pub do_decompress: i32,
pub do_garbage: i32,
pub do_linear: i32,
pub do_clean: i32,
pub do_sanitize: i32,
pub do_appearance: i32,
pub do_encrypt: i32,
pub dont_regenerate_id: i32,
pub permissions: i32,
pub opwd_utf8: [u8; 128],
pub upwd_utf8: [u8; 128],
pub do_snapshot: i32,
pub do_preserve_metadata: i32,
pub do_use_objstms: i32,
pub compression_effort: i32,
pub do_labels: i32,
}
impl Default for WriteOptions {
fn default() -> Self {
Self::new()
}
}
impl WriteOptions {
pub fn new() -> Self {
Self {
do_incremental: 0,
do_pretty: 0,
do_ascii: 0,
do_compress: 1, do_compress_images: 1,
do_compress_fonts: 1,
do_decompress: 0,
do_garbage: 0,
do_linear: 0,
do_clean: 0,
do_sanitize: 0,
do_appearance: 0,
do_encrypt: 0,
dont_regenerate_id: 0,
permissions: -1, opwd_utf8: [0; 128],
upwd_utf8: [0; 128],
do_snapshot: 0,
do_preserve_metadata: 0,
do_use_objstms: 0,
compression_effort: 0,
do_labels: 0,
}
}
pub fn set_owner_password(&mut self, password: &str) {
let bytes = password.as_bytes();
let len = bytes.len().min(127);
self.opwd_utf8[..len].copy_from_slice(&bytes[..len]);
self.opwd_utf8[len] = 0;
}
pub fn set_user_password(&mut self, password: &str) {
let bytes = password.as_bytes();
let len = bytes.len().min(127);
self.upwd_utf8[..len].copy_from_slice(&bytes[..len]);
self.upwd_utf8[len] = 0;
}
pub fn parse(&mut self, args: &str) {
for c in args.chars() {
match c {
'g' => self.do_garbage = 1,
'G' => self.do_garbage = 2,
'D' => self.do_garbage = 3,
'd' => self.do_decompress = 1,
'i' => {
self.do_decompress = 1;
self.do_compress_images = 0;
}
'f' => {
self.do_decompress = 1;
self.do_compress_fonts = 0;
}
'l' => self.do_linear = 1,
'a' => self.do_ascii = 1,
'z' => self.do_compress = 1,
'Z' => self.do_compress = 2, 'c' => self.do_clean = 1,
's' => self.do_sanitize = 1,
'p' => self.do_pretty = 1,
'A' => self.do_appearance = 1,
'm' => self.do_preserve_metadata = 1,
'o' => self.do_use_objstms = 1,
'L' => self.do_labels = 1,
_ => {}
}
}
}
pub fn format(&self) -> String {
let mut s = String::new();
if self.do_garbage == 1 {
s.push('g');
}
if self.do_garbage == 2 {
s.push('G');
}
if self.do_garbage == 3 {
s.push('D');
}
if self.do_decompress != 0 {
s.push('d');
}
if self.do_linear != 0 {
s.push('l');
}
if self.do_ascii != 0 {
s.push('a');
}
if self.do_compress == 1 {
s.push('z');
}
if self.do_compress == 2 {
s.push('Z');
}
if self.do_clean != 0 {
s.push('c');
}
if self.do_sanitize != 0 {
s.push('s');
}
if self.do_pretty != 0 {
s.push('p');
}
if self.do_appearance != 0 {
s.push('A');
}
if self.do_preserve_metadata != 0 {
s.push('m');
}
if self.do_use_objstms != 0 {
s.push('o');
}
if self.do_labels != 0 {
s.push('L');
}
s
}
}
#[derive(Debug, Clone, Default)]
#[repr(C)]
pub struct ImageRewriterOptions {
pub color_depth: i32,
pub dpi: i32,
pub jpeg_quality: i32,
pub recompress: i32,
}
#[derive(Debug, Clone)]
#[repr(C)]
pub struct CleanOptions {
pub write: WriteOptions,
pub image: ImageRewriterOptions,
pub subset_fonts: i32,
pub structure: CleanStructureOption,
pub vectorize: CleanVectorizeOption,
}
impl Default for CleanOptions {
fn default() -> Self {
Self::new()
}
}
impl CleanOptions {
pub fn new() -> Self {
Self {
write: WriteOptions::new(),
image: ImageRewriterOptions::default(),
subset_fonts: 0,
structure: CleanStructureOption::Drop,
vectorize: CleanVectorizeOption::No,
}
}
pub fn optimize() -> Self {
let mut opts = Self::new();
opts.write.do_garbage = 3; opts.write.do_compress = 1;
opts.write.do_clean = 1;
opts.write.do_sanitize = 1;
opts.subset_fonts = 1;
opts
}
pub fn linearize() -> Self {
let mut opts = Self::new();
opts.write.do_linear = 1;
opts.write.do_garbage = 1;
opts.write.do_compress = 1;
opts
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_default_write_options() -> WriteOptions {
WriteOptions::new()
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_default_clean_options() -> CleanOptions {
CleanOptions::new()
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_parse_write_options(
_ctx: ContextHandle,
opts: *mut WriteOptions,
args: *const c_char,
) -> *mut WriteOptions {
if opts.is_null() || args.is_null() {
return opts;
}
let args_str = unsafe { CStr::from_ptr(args).to_str().unwrap_or("") };
unsafe {
(*opts).parse(args_str);
}
opts
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_format_write_options(
_ctx: ContextHandle,
buffer: *mut c_char,
buffer_len: usize,
opts: *const WriteOptions,
) -> *mut c_char {
if buffer.is_null() || buffer_len == 0 || opts.is_null() {
return buffer;
}
let formatted = unsafe { (*opts).format() };
let len = formatted.len().min(buffer_len - 1);
unsafe {
ptr::copy_nonoverlapping(formatted.as_ptr(), buffer as *mut u8, len);
*buffer.add(len) = 0;
}
buffer
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_can_be_saved_incrementally(_ctx: ContextHandle, doc: DocumentHandle) -> i32 {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(guard) = document.lock() {
let data = guard.data();
if !data.starts_with(b"%PDF-") {
return 0;
}
if rfind_pattern(data, b"startxref").is_some() {
return 1;
}
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_has_unsaved_sigs(_ctx: ContextHandle, doc: DocumentHandle) -> i32 {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(guard) = document.lock() {
let data = guard.data();
let sig_positions = find_all_patterns(data, b"/FT /Sig");
for &pos in &sig_positions {
let search_start = pos.saturating_sub(500);
let region = &data[search_start..pos.min(data.len())];
let search_end = (pos + 1000).min(data.len());
let around = &data[search_start..search_end];
if find_pattern(around, b"/ByteRange").is_none() {
return 1;
}
}
}
}
0
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_save_document(
_ctx: ContextHandle,
doc: DocumentHandle,
filename: *const c_char,
opts: *const WriteOptions,
) {
if filename.is_null() {
return;
}
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let mut data = guard.data().to_vec();
if !opts.is_null() {
let write_opts = unsafe { &*opts };
data = apply_write_options(&data, write_opts);
}
let c_str = unsafe { std::ffi::CStr::from_ptr(filename) };
if let Ok(path) = c_str.to_str() {
if std::fs::write(path, &data).is_ok() {
guard.set_data(data);
}
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_write_document(
_ctx: ContextHandle,
doc: DocumentHandle,
out: OutputHandle,
opts: *const WriteOptions,
) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(guard) = document.lock() {
let mut data = guard.data().to_vec();
if !opts.is_null() {
let write_opts = unsafe { &*opts };
data = apply_write_options(&data, write_opts);
}
if let Some(output_arc) = super::output::OUTPUTS.get(out) {
if let Ok(mut output_guard) = output_arc.lock() {
let _ = output_guard.write_data(&data);
}
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_save_snapshot(
_ctx: ContextHandle,
doc: DocumentHandle,
filename: *const c_char,
) {
if filename.is_null() {
return;
}
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(guard) = document.lock() {
let c_str = unsafe { std::ffi::CStr::from_ptr(filename) };
if let Ok(path) = c_str.to_str() {
let _ = std::fs::write(path, guard.data());
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_write_snapshot(_ctx: ContextHandle, doc: DocumentHandle, out: OutputHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(guard) = document.lock() {
if let Some(output_arc) = super::output::OUTPUTS.get(out) {
if let Ok(mut output_guard) = output_arc.lock() {
let _ = output_guard.write_data(guard.data());
}
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_save_journal(
_ctx: ContextHandle,
doc: DocumentHandle,
filename: *const c_char,
) {
if filename.is_null() {
return;
}
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(guard) = document.lock() {
let data = guard.data();
let journal = build_journal_data(data);
let c_str = unsafe { std::ffi::CStr::from_ptr(filename) };
if let Ok(path) = c_str.to_str() {
let _ = std::fs::write(path, journal);
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_write_journal(_ctx: ContextHandle, doc: DocumentHandle, out: OutputHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(guard) = document.lock() {
let data = guard.data();
let journal = build_journal_data(data);
if let Some(output_arc) = super::output::OUTPUTS.get(out) {
if let Ok(mut output_guard) = output_arc.lock() {
let _ = output_guard.write_data(&journal);
}
}
}
}
}
fn build_journal_data(data: &[u8]) -> Vec<u8> {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let objects = collect_all_object_numbers(data);
let page_count_est = {
let pattern = b"/Type /Page";
let mut count = 0i32;
let mut i = 0;
while i + pattern.len() <= data.len() {
if &data[i..i + pattern.len()] == pattern && data.get(i + pattern.len()) != Some(&b's')
{
count += 1;
}
i += 1;
}
count
};
let mut hasher = DefaultHasher::new();
data.hash(&mut hasher);
let data_hash = hasher.finish();
let journal = format!(
"{{\"type\":\"pdf_journal\",\"version\":1,\"object_count\":{},\"page_count\":{},\"data_size\":{},\"data_hash\":\"{:016x}\",\"objects\":[{}]}}\n",
objects.len(),
page_count_est,
data.len(),
data_hash,
objects
.iter()
.map(|n| n.to_string())
.collect::<Vec<_>>()
.join(",")
);
journal.into_bytes()
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_clean_file(
_ctx: ContextHandle,
infile: *const c_char,
outfile: *const c_char,
_password: *const c_char,
opts: *const CleanOptions,
_retainlen: i32,
_retainlist: *const *const c_char,
) {
if infile.is_null() || outfile.is_null() {
return;
}
let in_path = match unsafe { CStr::from_ptr(infile).to_str() } {
Ok(s) => s,
Err(_) => return,
};
let out_path = match unsafe { CStr::from_ptr(outfile).to_str() } {
Ok(s) => s,
Err(_) => return,
};
let data = match std::fs::read(in_path) {
Ok(d) => d,
Err(_) => return,
};
if !data.starts_with(b"%PDF-") {
return;
}
let write_opts = if !opts.is_null() {
let clean_opts = unsafe { &*opts };
clean_opts.write.clone()
} else {
let mut w = WriteOptions::new();
w.do_garbage = 1;
w.do_compress = 1;
w.do_sanitize = 1;
w
};
let processed = apply_write_options(&data, &write_opts);
let final_data = if !opts.is_null() {
let clean_opts = unsafe { &*opts };
if clean_opts.structure == CleanStructureOption::Drop {
remove_structure_tree(&processed)
} else {
processed
}
} else {
remove_structure_tree(&processed)
};
let _ = std::fs::write(out_path, final_data);
}
fn remove_structure_tree(data: &[u8]) -> Vec<u8> {
let mut result = data.to_vec();
if let Some(root_num) = find_root_obj_num(&result) {
if let Some((ds, de)) = find_object_dict(&result, root_num) {
if let Some(key_pos) = find_dict_key(&result, ds, de, b"/StructTreeRoot") {
let remove_start = key_pos - b"/StructTreeRoot".len();
let mut remove_end = key_pos;
while remove_end < de && result[remove_end] != b'/' {
if remove_end + 1 < result.len()
&& result[remove_end] == b'>'
&& result[remove_end + 1] == b'>'
{
break;
}
remove_end += 1;
}
if remove_end > remove_start && remove_start < result.len() {
result.drain(remove_start..remove_end.min(result.len()));
}
}
}
}
if let Some(root_num) = find_root_obj_num(&result) {
if let Some((ds, de)) = find_object_dict(&result, root_num) {
if let Some(key_pos) = find_dict_key(&result, ds, de, b"/MarkInfo") {
let remove_start = key_pos - b"/MarkInfo".len();
let mut remove_end = key_pos;
while remove_end < de {
if remove_end + 1 < result.len()
&& result[remove_end] == b'>'
&& result[remove_end + 1] == b'>'
{
break;
}
if result[remove_end] == b'/' {
break;
}
remove_end += 1;
}
if remove_end > remove_start && remove_start < result.len() {
result.drain(remove_start..remove_end.min(result.len()));
}
}
}
}
result
}
fn apply_write_options(data: &[u8], opts: &WriteOptions) -> Vec<u8> {
let mut result = data.to_vec();
if opts.do_garbage >= 1 {
result = garbage_collect_data(&result, opts.do_garbage);
}
if opts.do_decompress != 0 {
result = decompress_streams_data(&result);
}
if opts.do_compress >= 1 {
result = compress_streams_data(&result, opts.do_compress);
}
if opts.do_sanitize != 0 {
result = sanitize_content_streams(&result);
}
result
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_rearrange_pages(
_ctx: ContextHandle,
doc: DocumentHandle,
count: i32,
pages: *const i32,
_structure: CleanStructureOption,
) {
if count <= 0 || pages.is_null() {
return;
}
let page_order = unsafe { std::slice::from_raw_parts(pages, count as usize) };
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let pattern = b"/Type /Page";
let mut existing_pages = 0i32;
let mut i = 0;
while i + pattern.len() <= data.len() {
if &data[i..i + pattern.len()] == pattern
&& data.get(i + pattern.len()) != Some(&b's')
{
existing_pages += 1;
}
i += 1;
}
if existing_pages == 0 {
return;
}
for &pg in page_order {
if pg < 0 || pg >= existing_pages {
return;
}
}
let mut page_ranges: Vec<(usize, usize, i32)> = Vec::new(); for pg_idx in 0..existing_pages {
if let Some(pos) = find_page_obj_position(&data, pg_idx) {
if let Some(obj_num) = extract_int_after(&data, pos) {
if let Some((start, end)) = find_object_range(&data, obj_num) {
page_ranges.push((start, end, obj_num));
}
}
}
}
if page_ranges.len() != existing_pages as usize {
return; }
let new_kids: Vec<i32> = page_order
.iter()
.map(|&idx| page_ranges[idx as usize].2)
.collect();
if let Some(root_num) = find_root_obj_num(&data) {
if let Some((ds, de)) = find_object_dict(&data, root_num) {
if let Some(pages_key) = find_dict_key(&data, ds, de, b"/Pages") {
if let Some(pages_num) = resolve_indirect_ref(&data, pages_key) {
let mut new_data = data.clone();
if let Some((pds, pde)) = find_object_dict(&new_data, pages_num) {
let kids_str = new_kids
.iter()
.map(|n| format!("{} 0 R", n))
.collect::<Vec<_>>()
.join(" ");
let new_kids_entry =
format!("/Kids [{}] /Count {}", kids_str, new_kids.len());
if let Some(kids_pos) = find_dict_key(&new_data, pds, pde, b"/Kids")
{
let kids_key_start = kids_pos - b"/Kids".len();
let mut bracket_end = kids_pos;
while bracket_end < pde
&& new_data.get(bracket_end) != Some(&b']')
{
bracket_end += 1;
}
if bracket_end < pde {
bracket_end += 1; }
let remaining_region_end = pde.min(new_data.len());
let mut count_start = bracket_end;
let mut count_end = bracket_end;
if let Some(count_pos) = find_dict_key(
&new_data,
bracket_end,
remaining_region_end,
b"/Count",
) {
count_start = count_pos - b"/Count".len();
count_end = count_pos;
while count_end < remaining_region_end
&& new_data[count_end].is_ascii_whitespace()
{
count_end += 1;
}
while count_end < remaining_region_end
&& new_data[count_end].is_ascii_digit()
{
count_end += 1;
}
}
if count_end > bracket_end && count_start >= bracket_end {
new_data.drain(count_start..count_end);
}
new_data.splice(
kids_key_start..bracket_end.min(new_data.len()),
new_kids_entry.bytes(),
);
}
}
guard.set_data(new_data);
}
}
}
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_vectorize_pages(
_ctx: ContextHandle,
_doc: DocumentHandle,
_count: i32,
_pages: *const i32,
_vectorize: CleanVectorizeOption,
) {
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_clean_object_entries(_ctx: ContextHandle, obj: Handle) {
use crate::ffi::pdf_object::types::{PDF_OBJECTS, PdfObjType};
if let Some(obj_arc) = PDF_OBJECTS.get(obj) {
if let Ok(mut guard) = obj_arc.lock() {
if let PdfObjType::Dict(ref mut entries) = guard.obj_type {
entries.retain(|(_key, value)| !matches!(value.obj_type, PdfObjType::Null));
let defaulting_keys: &[&str] = &["Rotate", "StructParents", "Tabs"];
entries.retain(|(key, value)| {
if defaulting_keys.contains(&key.as_str()) {
if let PdfObjType::Int(0) = value.obj_type {
return false;
}
}
true
});
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_optimize(ctx: ContextHandle, doc: DocumentHandle, filename: *const c_char) {
let opts = CleanOptions::optimize();
pdf_save_document(ctx, doc, filename, &opts.write);
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_linearize(ctx: ContextHandle, doc: DocumentHandle, filename: *const c_char) {
let opts = CleanOptions::linearize();
pdf_save_document(ctx, doc, filename, &opts.write);
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_compress_streams(_ctx: ContextHandle, doc: DocumentHandle, method: i32) {
if method < 1 {
return; }
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let result = compress_streams_data(&data, method);
guard.set_data(result);
}
}
}
fn compress_streams_data(data: &[u8], method: i32) -> Vec<u8> {
let objects = collect_all_object_numbers(data);
let mut result = data.to_vec();
for &obj_num in &objects {
if object_has_stream(&result, obj_num) && !object_has_filter(&result, obj_num) {
if let Some((stream_data, stream_start, stream_end)) =
extract_stream_data(&result, obj_num)
{
let compressed = match method {
2 => {
let mut output = Vec::new();
let params = brotli::enc::BrotliEncoderParams {
quality: 6,
..Default::default()
};
let mut encoder =
brotli::CompressorWriter::with_params(&mut output, 4096, ¶ms);
if encoder.write_all(&stream_data).is_ok() {
drop(encoder);
Some((output, "/Filter /BrotliDecode"))
} else {
None
}
}
_ => {
let mut encoder = flate2::write::ZlibEncoder::new(
Vec::new(),
flate2::Compression::default(),
);
if encoder.write_all(&stream_data).is_ok() {
if let Ok(compressed) = encoder.finish() {
Some((compressed, "/Filter /FlateDecode"))
} else {
None
}
} else {
None
}
}
};
if let Some((compressed_data, filter_name)) = compressed {
if compressed_data.len() < stream_data.len() {
result.splice(stream_start..stream_end, compressed_data.iter().copied());
if let Some((ds, _de)) = find_object_dict(&result, obj_num) {
let insert_pos = ds + 2;
let new_length = compressed_data.len();
let filter_entry = format!(" {} /Length {}", filter_name, new_length);
let filter_bytes = filter_entry.as_bytes();
let updated =
insert_filter_and_update_length(&result, obj_num, filter_bytes);
if let Some(u) = updated {
result = u;
} else {
for (i, &b) in filter_bytes.iter().enumerate() {
result.insert(insert_pos + i, b);
}
}
}
}
}
}
}
}
result
}
fn insert_filter_and_update_length(
data: &[u8],
obj_num: i32,
_filter_bytes: &[u8],
) -> Option<Vec<u8>> {
let (ds, de) = find_object_dict(data, obj_num)?;
let has_length = find_dict_key(data, ds, de, b"/Length").is_some();
let pattern = format!("{} 0 obj", obj_num);
let obj_pos = find_pattern(data, pattern.as_bytes())?;
let after = &data[obj_pos..];
let stream_rel = find_pattern(after, b"stream")?;
let mut stream_start = obj_pos + stream_rel + b"stream".len();
if stream_start < data.len() && data[stream_start] == b'\r' {
stream_start += 1;
}
if stream_start < data.len() && data[stream_start] == b'\n' {
stream_start += 1;
}
let endstream_rel = find_pattern(&data[stream_start..], b"endstream")?;
let mut stream_end = stream_start + endstream_rel;
if stream_end > stream_start && data[stream_end - 1] == b'\n' {
stream_end -= 1;
}
if stream_end > stream_start && data[stream_end - 1] == b'\r' {
stream_end -= 1;
}
let actual_length = stream_end - stream_start;
let mut result = data.to_vec();
if has_length {
if let Some(len_pos) = find_dict_key(&result, ds, de, b"/Length") {
let mut val_start = len_pos;
while val_start < de && result[val_start].is_ascii_whitespace() {
val_start += 1;
}
let mut val_end = val_start;
while val_end < de && result[val_end].is_ascii_digit() {
val_end += 1;
}
if val_end > val_start {
let new_val = format!("{}", actual_length);
result.splice(val_start..val_end, new_val.bytes());
}
}
}
Some(result)
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_decompress_streams(_ctx: ContextHandle, doc: DocumentHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let result = decompress_streams_data(&data);
guard.set_data(result);
}
}
}
fn decompress_streams_data(data: &[u8]) -> Vec<u8> {
let objects = collect_all_object_numbers(data);
let mut result = data.to_vec();
for &obj_num in &objects {
if let Some((ds, de)) = find_object_dict(&result, obj_num) {
let has_flate = find_dict_key(&result, ds, de, b"/FlateDecode").is_some();
if !has_flate {
continue;
}
if let Some((stream_data, stream_start, stream_end)) =
extract_stream_data(&result, obj_num)
{
let mut decoder = flate2::read::ZlibDecoder::new(&stream_data[..]);
let mut decompressed = Vec::new();
if decoder.read_to_end(&mut decompressed).is_ok() {
result.splice(stream_start..stream_end, decompressed.iter().copied());
if let Some((ds2, de2)) = find_object_dict(&result, obj_num) {
if let Some(filter_pos) = find_dict_key(&result, ds2, de2, b"/Filter") {
let filter_start = filter_pos - b"/Filter".len();
let mut filter_end = filter_pos;
while filter_end < de2 && result[filter_end].is_ascii_whitespace() {
filter_end += 1;
}
if filter_end < de2 && result[filter_end] == b'/' {
filter_end += 1;
while filter_end < de2 && result[filter_end].is_ascii_alphanumeric()
{
filter_end += 1;
}
}
result.drain(filter_start..filter_end.min(result.len()));
}
let new_len = decompressed.len();
if let Some((ds3, de3)) = find_object_dict(&result, obj_num) {
if let Some(len_pos) = find_dict_key(&result, ds3, de3, b"/Length") {
let mut val_start = len_pos;
while val_start < de3 && result[val_start].is_ascii_whitespace() {
val_start += 1;
}
let mut val_end = val_start;
while val_end < de3 && result[val_end].is_ascii_digit() {
val_end += 1;
}
if val_end > val_start {
let new_val = format!("{}", new_len);
result.splice(val_start..val_end, new_val.bytes());
}
}
}
}
}
}
}
}
result
}
fn sanitize_content_streams(data: &[u8]) -> Vec<u8> {
let mut result = data.to_vec();
let js_positions = find_all_patterns(&result, b"/JS");
for &pos in js_positions.iter().rev() {
if pos > 0 && (result[pos - 1].is_ascii_whitespace() || result[pos - 1] == b'<') {
let mut end = pos + 3;
while end < result.len() && result[end].is_ascii_whitespace() {
end += 1;
}
if end < result.len() {
if result[end] == b'(' {
let mut depth = 0i32;
while end < result.len() {
match result[end] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
end += 1;
break;
}
}
b'\\' => end += 1, _ => {}
}
end += 1;
}
} else if result[end] == b'<' && end + 1 < result.len() && result[end + 1] != b'<' {
while end < result.len() && result[end] != b'>' {
end += 1;
}
if end < result.len() {
end += 1;
}
}
}
result.drain(pos..end.min(result.len()));
}
}
let aa_positions = find_all_patterns(&result, b"/AA");
for &pos in aa_positions.iter().rev() {
if pos > 0 && (result[pos - 1].is_ascii_whitespace() || result[pos - 1] == b'<') {
let mut end = pos + 3;
while end < result.len() && result[end].is_ascii_whitespace() {
end += 1;
}
if end < result.len() && result[end].is_ascii_digit() {
while end < result.len()
&& (result[end].is_ascii_digit()
|| result[end].is_ascii_whitespace()
|| result[end] == b'R')
{
if result[end] == b'R' {
end += 1;
break;
}
end += 1;
}
}
result.drain(pos..end.min(result.len()));
}
}
result
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_create_object_streams(_ctx: ContextHandle, doc: DocumentHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let objects = collect_all_object_numbers(&data);
let mut non_stream_objects: Vec<(i32, Vec<u8>)> = Vec::new();
for &obj_num in &objects {
if !object_has_stream(&data, obj_num) {
if let Some((start, end)) = find_object_range(&data, obj_num) {
let obj_data = data[start..end].to_vec();
if obj_data.len() < 4096 {
non_stream_objects.push((obj_num, obj_data));
}
}
}
}
if non_stream_objects.len() < 3 {
return;
}
let mut offsets_header = String::new();
let mut objects_data = Vec::new();
let mut current_offset = 0usize;
for (obj_num, obj_bytes) in &non_stream_objects {
let obj_str = String::from_utf8_lossy(obj_bytes);
let value_start = obj_str.find("obj").map(|p| p + 3).unwrap_or(0);
let value_end = obj_str.rfind("endobj").unwrap_or(obj_str.len());
let value_bytes = obj_str[value_start..value_end].trim().as_bytes();
if !offsets_header.is_empty() {
offsets_header.push(' ');
}
offsets_header.push_str(&format!("{} {}", obj_num, current_offset));
objects_data.extend_from_slice(value_bytes);
objects_data.push(b' ');
current_offset = objects_data.len();
}
let mut encoder =
flate2::write::ZlibEncoder::new(Vec::new(), flate2::Compression::default());
let mut combined = offsets_header.as_bytes().to_vec();
combined.push(b' ');
combined.extend_from_slice(&objects_data);
if encoder.write_all(&combined).is_ok() {
if let Ok(compressed) = encoder.finish() {
let max_obj = objects.iter().max().copied().unwrap_or(0);
let new_obj_num = max_obj + 1;
let objstm = format!(
"{} 0 obj\n<< /Type /ObjStm /N {} /First {} /Length {} /Filter /FlateDecode >>\nstream\n",
new_obj_num,
non_stream_objects.len(),
offsets_header.len() + 1,
compressed.len(),
);
let mut new_data = data.clone();
if let Some(xref_pos) = rfind_pattern(&new_data, b"xref") {
let mut objstm_bytes = objstm.into_bytes();
objstm_bytes.extend_from_slice(&compressed);
objstm_bytes.extend_from_slice(b"\nendstream\nendobj\n");
for (i, &b) in objstm_bytes.iter().enumerate() {
new_data.insert(xref_pos + i, b);
}
guard.set_data(new_data);
}
}
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_remove_object_streams(_ctx: ContextHandle, doc: DocumentHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let objects = collect_all_object_numbers(&data);
let mut result = data.clone();
let mut removed_any = false;
for &obj_num in objects.iter().rev() {
if let Some((ds, de)) = find_object_dict(&result, obj_num) {
if find_dict_key(&result, ds, de, b"/Type /ObjStm").is_some()
|| find_dict_key(&result, ds, de, b"/ObjStm").is_some()
{
if let Some((start, end)) = find_object_range(&result, obj_num) {
let mut actual_end = end;
while actual_end < result.len()
&& result[actual_end].is_ascii_whitespace()
{
actual_end += 1;
}
result.drain(start..actual_end.min(result.len()));
removed_any = true;
}
}
}
}
if removed_any {
let rebuilt = rebuild_pdf_xref(&result);
guard.set_data(rebuilt);
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_garbage_collect(_ctx: ContextHandle, doc: DocumentHandle, level: i32) {
if level < 1 {
return;
}
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let result = garbage_collect_data(&data, level);
guard.set_data(result);
}
}
}
fn garbage_collect_data(data: &[u8], level: i32) -> Vec<u8> {
let all_objects: HashSet<i32> = collect_all_object_numbers(data).into_iter().collect();
let referenced = collect_referenced_objects(data);
let unreferenced: Vec<i32> = all_objects.difference(&referenced).copied().collect();
if unreferenced.is_empty() && level < 2 {
return data.to_vec();
}
let mut result = data.to_vec();
let mut sorted_unreferenced = unreferenced.clone();
sorted_unreferenced.sort_unstable_by(|a, b| b.cmp(a));
for &obj_num in &sorted_unreferenced {
if let Some((start, end)) = find_object_range(&result, obj_num) {
let mut actual_end = end;
while actual_end < result.len() && result[actual_end].is_ascii_whitespace() {
actual_end += 1;
}
result.drain(start..actual_end.min(result.len()));
}
}
if level >= 2 {
result = renumber_objects_data(&result);
}
if level >= 3 {
result = deduplicate_objects_data(&result);
}
rebuild_pdf_xref(&result)
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_deduplicate_objects(_ctx: ContextHandle, doc: DocumentHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let result = deduplicate_objects_data(&data);
guard.set_data(result);
}
}
}
fn deduplicate_objects_data(data: &[u8]) -> Vec<u8> {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let objects = collect_all_object_numbers(data);
let mut hash_to_canonical: HashMap<u64, i32> = HashMap::new();
let mut duplicates: Vec<(i32, i32)> = Vec::new();
for &obj_num in &objects {
if let Some((start, end)) = find_object_range(data, obj_num) {
let obj_bytes = &data[start..end];
let pattern = format!("{} 0 obj", obj_num);
let content_start = pattern.len();
if content_start < obj_bytes.len() {
let content = &obj_bytes[content_start..];
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
let hash = hasher.finish();
if let Some(&canonical) = hash_to_canonical.get(&hash) {
if let Some((cstart, cend)) = find_object_range(data, canonical) {
let canonical_bytes = &data[cstart..cend];
let c_pattern = format!("{} 0 obj", canonical);
let c_content_start = c_pattern.len();
if c_content_start < canonical_bytes.len() {
let c_content = &canonical_bytes[c_content_start..];
if content == c_content {
duplicates.push((obj_num, canonical));
}
}
}
} else {
hash_to_canonical.insert(hash, obj_num);
}
}
}
}
if duplicates.is_empty() {
return data.to_vec();
}
let mut result = data.to_vec();
for &(dup, canonical) in &duplicates {
let old_ref = format!("{} 0 R", dup);
let new_ref = format!("{} 0 R", canonical);
while let Some(pos) = find_pattern(&result, old_ref.as_bytes()) {
result.splice(pos..pos + old_ref.len(), new_ref.bytes());
}
}
let mut sorted_dups: Vec<i32> = duplicates.iter().map(|&(dup, _)| dup).collect();
sorted_dups.sort_unstable_by(|a, b| b.cmp(a));
sorted_dups.dedup();
for &dup in &sorted_dups {
if let Some((start, end)) = find_object_range(&result, dup) {
let mut actual_end = end;
while actual_end < result.len() && result[actual_end].is_ascii_whitespace() {
actual_end += 1;
}
result.drain(start..actual_end.min(result.len()));
}
}
rebuild_pdf_xref(&result)
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_renumber_objects(_ctx: ContextHandle, doc: DocumentHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let result = renumber_objects_data(&data);
guard.set_data(result);
}
}
}
fn renumber_objects_data(data: &[u8]) -> Vec<u8> {
let objects = collect_all_object_numbers(data);
let mut mapping: HashMap<i32, i32> = HashMap::new();
for (new_num, &old_num) in objects.iter().enumerate() {
mapping.insert(old_num, (new_num as i32) + 1);
}
let already_sequential = objects
.iter()
.enumerate()
.all(|(i, &n)| n == (i as i32) + 1);
if already_sequential {
return data.to_vec();
}
let mut result = data.to_vec();
let mut placeholder_map: HashMap<i32, String> = HashMap::new();
for (&old, &new) in &mapping {
if old != new {
placeholder_map.insert(old, format!("__OBJ_{}__", new));
}
}
let mut old_nums: Vec<i32> = mapping
.keys()
.filter(|&&old| mapping[&old] != old)
.copied()
.collect();
old_nums.sort_unstable_by(|a, b| b.cmp(a));
for &old in &old_nums {
let placeholder = &placeholder_map[&old];
let old_obj = format!("{} 0 obj", old);
let new_obj = format!("{} 0 obj", placeholder);
while let Some(pos) = find_pattern(&result, old_obj.as_bytes()) {
result.splice(pos..pos + old_obj.len(), new_obj.bytes());
}
let old_ref = format!("{} 0 R", old);
let new_ref = format!("{} 0 R", placeholder);
while let Some(pos) = find_pattern(&result, old_ref.as_bytes()) {
result.splice(pos..pos + old_ref.len(), new_ref.bytes());
}
}
for (&_old, &new) in &mapping {
let placeholder = format!("__OBJ_{}__", new);
let final_num = format!("{}", new);
while let Some(pos) = find_pattern(&result, placeholder.as_bytes()) {
result.splice(pos..pos + placeholder.len(), final_num.bytes());
}
}
rebuild_pdf_xref(&result)
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_remove_unused_resources(_ctx: ContextHandle, doc: DocumentHandle) {
if let Some(document) = super::DOCUMENTS.get(doc) {
if let Ok(mut guard) = document.lock() {
let data = guard.data().to_vec();
if !data.starts_with(b"%PDF-") {
return;
}
let all_objects: HashSet<i32> = collect_all_object_numbers(&data).into_iter().collect();
let referenced = collect_referenced_objects(&data);
let unreferenced: Vec<i32> = all_objects.difference(&referenced).copied().collect();
if unreferenced.is_empty() {
return;
}
let mut result = data.clone();
let mut sorted_unreferenced = unreferenced;
sorted_unreferenced.sort_unstable_by(|a, b| b.cmp(a));
for &obj_num in &sorted_unreferenced {
if let Some((start, end)) = find_object_range(&result, obj_num) {
let mut actual_end = end;
while actual_end < result.len() && result[actual_end].is_ascii_whitespace() {
actual_end += 1;
}
result.drain(start..actual_end.min(result.len()));
}
}
let rebuilt = rebuild_pdf_xref(&result);
guard.set_data(rebuilt);
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_set_encryption(
_ctx: ContextHandle,
opts: *mut WriteOptions,
method: i32,
permissions: i32,
owner_pwd: *const c_char,
user_pwd: *const c_char,
) {
if opts.is_null() {
return;
}
unsafe {
(*opts).do_encrypt = method;
(*opts).permissions = permissions;
if !owner_pwd.is_null() {
if let Ok(pwd) = CStr::from_ptr(owner_pwd).to_str() {
(*opts).set_owner_password(pwd);
}
}
if !user_pwd.is_null() {
if let Ok(pwd) = CStr::from_ptr(user_pwd).to_str() {
(*opts).set_user_password(pwd);
}
}
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_remove_encryption(_ctx: ContextHandle, opts: *mut WriteOptions) {
if opts.is_null() {
return;
}
unsafe {
(*opts).do_encrypt = EncryptionMethod::None as i32;
}
}
#[unsafe(no_mangle)]
pub extern "C" fn pdf_clean_free_string(_ctx: ContextHandle, s: *mut c_char) {
if !s.is_null() {
unsafe {
drop(CString::from_raw(s));
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_write_options_default() {
let opts = WriteOptions::new();
assert_eq!(opts.do_incremental, 0);
assert_eq!(opts.do_compress, 1);
assert_eq!(opts.do_garbage, 0);
assert_eq!(opts.do_linear, 0);
}
#[test]
fn test_write_options_parse() {
let mut opts = WriteOptions::new();
opts.parse("glzcs");
assert_eq!(opts.do_garbage, 1);
assert_eq!(opts.do_linear, 1);
assert_eq!(opts.do_compress, 1);
assert_eq!(opts.do_clean, 1);
assert_eq!(opts.do_sanitize, 1);
}
#[test]
fn test_write_options_format() {
let mut opts = WriteOptions::new();
opts.do_garbage = 1;
opts.do_linear = 1;
opts.do_compress = 1;
let formatted = opts.format();
assert!(formatted.contains('g'));
assert!(formatted.contains('l'));
assert!(formatted.contains('z'));
}
#[test]
fn test_write_options_password() {
let mut opts = WriteOptions::new();
opts.set_owner_password("owner123");
opts.set_user_password("user456");
let owner = std::str::from_utf8(&opts.opwd_utf8[..8]).unwrap();
assert_eq!(owner, "owner123");
let user = std::str::from_utf8(&opts.upwd_utf8[..7]).unwrap();
assert_eq!(user, "user456");
}
#[test]
fn test_clean_options_default() {
let opts = CleanOptions::new();
assert_eq!(opts.subset_fonts, 0);
assert_eq!(opts.structure, CleanStructureOption::Drop);
assert_eq!(opts.vectorize, CleanVectorizeOption::No);
}
#[test]
fn test_clean_options_optimize() {
let opts = CleanOptions::optimize();
assert_eq!(opts.write.do_garbage, 3);
assert_eq!(opts.write.do_compress, 1);
assert_eq!(opts.write.do_clean, 1);
assert_eq!(opts.subset_fonts, 1);
}
#[test]
fn test_clean_options_linearize() {
let opts = CleanOptions::linearize();
assert_eq!(opts.write.do_linear, 1);
assert_eq!(opts.write.do_garbage, 1);
}
#[test]
fn test_structure_option() {
assert_eq!(CleanStructureOption::Drop as i32, 0);
assert_eq!(CleanStructureOption::Keep as i32, 1);
}
#[test]
fn test_vectorize_option() {
assert_eq!(CleanVectorizeOption::No as i32, 0);
assert_eq!(CleanVectorizeOption::Yes as i32, 1);
}
#[test]
fn test_encryption_method() {
assert_eq!(EncryptionMethod::Keep as i32, 0);
assert_eq!(EncryptionMethod::None as i32, 1);
assert_eq!(EncryptionMethod::Aes256 as i32, 5);
}
#[test]
fn test_ffi_default_options() {
let write_opts = pdf_default_write_options();
assert_eq!(write_opts.do_compress, 1);
let clean_opts = pdf_default_clean_options();
assert_eq!(clean_opts.structure, CleanStructureOption::Drop);
}
#[test]
fn test_ffi_parse_options() {
let mut opts = WriteOptions::new();
let args = CString::new("glzcs").unwrap();
pdf_parse_write_options(0, &mut opts, args.as_ptr());
assert_eq!(opts.do_garbage, 1);
assert_eq!(opts.do_linear, 1);
}
#[test]
fn test_ffi_format_options() {
let mut opts = WriteOptions::new();
opts.do_garbage = 1;
opts.do_linear = 1;
let mut buffer = [0u8; 64];
pdf_format_write_options(0, buffer.as_mut_ptr() as *mut c_char, 64, &opts);
let result = unsafe { CStr::from_ptr(buffer.as_ptr() as *const c_char) };
let s = result.to_str().unwrap();
assert!(s.contains('g'));
assert!(s.contains('l'));
}
#[test]
fn test_ffi_can_save_incrementally() {
let result = pdf_can_be_saved_incrementally(0, 0);
assert_eq!(result, 0);
}
#[test]
fn test_ffi_has_unsaved_sigs() {
let result = pdf_has_unsaved_sigs(0, 0);
assert_eq!(result, 0);
}
#[test]
fn test_ffi_set_encryption() {
let mut opts = WriteOptions::new();
let owner = CString::new("owner").unwrap();
let user = CString::new("user").unwrap();
pdf_set_encryption(0, &mut opts, 5, 0xFFFF, owner.as_ptr(), user.as_ptr());
assert_eq!(opts.do_encrypt, 5); assert_eq!(opts.permissions, 0xFFFF);
}
#[test]
fn test_ffi_remove_encryption() {
let mut opts = WriteOptions::new();
opts.do_encrypt = 5;
pdf_remove_encryption(0, &mut opts);
assert_eq!(opts.do_encrypt, 1); }
#[test]
fn test_ffi_parse_options_null() {
let mut opts = WriteOptions::new();
let result =
pdf_parse_write_options(0, std::ptr::null_mut(), CString::new("g").unwrap().as_ptr());
assert!(result.is_null());
}
#[test]
fn test_ffi_format_options_null() {
let opts = WriteOptions::new();
assert!(pdf_format_write_options(0, std::ptr::null_mut(), 64, &opts).is_null());
}
#[test]
fn test_write_options_parse_all() {
let mut opts = WriteOptions::new();
opts.parse("gGDdifzlazZcspAmoL");
assert_eq!(opts.do_garbage, 3);
assert_eq!(opts.do_decompress, 1);
assert_eq!(opts.do_compress_images, 0);
assert_eq!(opts.do_compress_fonts, 0);
assert_eq!(opts.do_linear, 1);
assert_eq!(opts.do_ascii, 1);
assert_eq!(opts.do_compress, 2);
assert_eq!(opts.do_clean, 1);
assert_eq!(opts.do_sanitize, 1);
assert_eq!(opts.do_pretty, 1);
assert_eq!(opts.do_appearance, 1);
assert_eq!(opts.do_preserve_metadata, 1);
assert_eq!(opts.do_use_objstms, 1);
assert_eq!(opts.do_labels, 1);
}
#[test]
fn test_write_options_format_all() {
let mut opts = WriteOptions::new();
opts.do_garbage = 1;
let s = opts.format();
assert!(s.contains('g'));
opts.do_garbage = 2;
let s = opts.format();
assert!(s.contains('G'));
opts.do_garbage = 3;
let s = opts.format();
assert!(s.contains('D'));
opts.do_decompress = 1;
opts.do_linear = 1;
opts.do_ascii = 1;
opts.do_compress = 1;
opts.do_clean = 1;
opts.do_sanitize = 1;
opts.do_pretty = 1;
opts.do_appearance = 1;
opts.do_preserve_metadata = 1;
opts.do_use_objstms = 1;
opts.do_labels = 1;
let s = opts.format();
assert!(s.contains('d'));
assert!(s.contains('l'));
assert!(s.contains('a'));
assert!(s.contains('z'));
assert!(s.contains('c'));
assert!(s.contains('s'));
assert!(s.contains('p'));
assert!(s.contains('A'));
assert!(s.contains('m'));
assert!(s.contains('o'));
assert!(s.contains('L'));
}
#[test]
fn test_ffi_can_save_incrementally_valid() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let result = pdf_can_be_saved_incrementally(0, doc_handle);
assert_eq!(result, 1);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_can_save_incrementally_no_startxref() {
let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let result = pdf_can_be_saved_incrementally(0, doc_handle);
assert_eq!(result, 0);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_has_unsaved_sigs_with_sig() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog >> endobj
2 0 obj << /FT /Sig /T (sig1) >> endobj
xref
0 3
0000000000 65535 f
0000000009 00000 n
0000000050 00000 n
trailer << /Size 3 /Root 1 0 R >>
startxref
120
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let result = pdf_has_unsaved_sigs(0, doc_handle);
assert_eq!(result, 1);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_save_document_null_filename() {
let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_save_document(0, doc_handle, std::ptr::null(), std::ptr::null());
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_save_document_valid() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_clean_test_save.pdf");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
pdf_save_document(0, doc_handle, path.as_ptr(), std::ptr::null());
assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_write_document() {
use super::super::output;
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_write_doc_test.pdf");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
let out_handle = super::super::output::fz_new_output_with_path(0, path.as_ptr(), 0);
pdf_write_document(0, doc_handle, out_handle, std::ptr::null());
let data = std::fs::read(&tmp).unwrap();
assert!(data.starts_with(b"%PDF-"));
super::super::output::fz_drop_output(0, out_handle);
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_save_snapshot_null() {
pdf_save_snapshot(0, 0, std::ptr::null());
}
#[test]
fn test_ffi_save_snapshot_valid() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog >> endobj
xref
0 2
0000000000 65535 f
0000000009 00000 n
trailer << /Size 2 /Root 1 0 R >>
startxref
100
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_snapshot_test.pdf");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
pdf_save_snapshot(0, doc_handle, path.as_ptr());
assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_write_snapshot() {
let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_snapshot_out_test.pdf");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
let out_handle = super::super::output::fz_new_output_with_path(0, path.as_ptr(), 0);
pdf_write_snapshot(0, doc_handle, out_handle);
let data = std::fs::read(&tmp).unwrap();
assert!(data.starts_with(b"%PDF-"));
super::super::output::fz_drop_output(0, out_handle);
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_save_journal_null() {
pdf_save_journal(0, 0, std::ptr::null());
}
#[test]
fn test_ffi_save_journal_valid() {
let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_journal_test.json");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
pdf_save_journal(0, doc_handle, path.as_ptr());
let content = std::fs::read_to_string(&tmp).unwrap();
assert!(content.contains("pdf_journal"));
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_write_journal() {
let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_journal_out_test.json");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
let out_handle = super::super::output::fz_new_output_with_path(0, path.as_ptr(), 0);
pdf_write_journal(0, doc_handle, out_handle);
let data = std::fs::read_to_string(&tmp).unwrap();
assert!(data.contains("pdf_journal"));
super::super::output::fz_drop_output(0, out_handle);
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_clean_file_null() {
pdf_clean_file(
0,
std::ptr::null(),
std::ptr::null(),
std::ptr::null(),
std::ptr::null(),
0,
std::ptr::null(),
);
}
#[test]
fn test_ffi_clean_file_valid() {
let in_tmp = std::env::temp_dir().join("micropdf_clean_in.pdf");
let out_tmp = std::env::temp_dir().join("micropdf_clean_out.pdf");
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
std::fs::write(&in_tmp, pdf_data).unwrap();
let in_path = CString::new(in_tmp.to_str().unwrap()).unwrap();
let out_path = CString::new(out_tmp.to_str().unwrap()).unwrap();
pdf_clean_file(
0,
in_path.as_ptr(),
out_path.as_ptr(),
std::ptr::null(),
std::ptr::null(),
0,
std::ptr::null(),
);
assert!(std::fs::read(&out_tmp).unwrap().starts_with(b"%PDF-"));
let _ = std::fs::remove_file(&in_tmp);
let _ = std::fs::remove_file(&out_tmp);
}
#[test]
fn test_ffi_clean_file_non_pdf() {
let in_tmp = std::env::temp_dir().join("micropdf_clean_invalid.txt");
let out_tmp = std::env::temp_dir().join("micropdf_clean_out_invalid.pdf");
std::fs::write(&in_tmp, b"not a pdf").unwrap();
let in_path = CString::new(in_tmp.to_str().unwrap()).unwrap();
let out_path = CString::new(out_tmp.to_str().unwrap()).unwrap();
pdf_clean_file(
0,
in_path.as_ptr(),
out_path.as_ptr(),
std::ptr::null(),
std::ptr::null(),
0,
std::ptr::null(),
);
assert!(!out_tmp.exists() || std::fs::read(&out_tmp).unwrap_or_default().is_empty());
let _ = std::fs::remove_file(&in_tmp);
let _ = std::fs::remove_file(&out_tmp);
}
#[test]
fn test_ffi_rearrange_pages_null() {
pdf_rearrange_pages(0, 0, 0, std::ptr::null(), CleanStructureOption::Drop);
pdf_rearrange_pages(0, 0, 1, std::ptr::null(), CleanStructureOption::Drop);
}
#[test]
fn test_ffi_vectorize_pages() {
pdf_vectorize_pages(0, 0, 0, std::ptr::null(), CleanVectorizeOption::No);
}
#[test]
fn test_ffi_compress_streams_invalid() {
pdf_compress_streams(0, 0, 0);
pdf_compress_streams(0, 0, -1);
}
#[test]
fn test_ffi_compress_streams_valid() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Length 10 >>
stream
1234567890
endstream
endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000150 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
250
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_compress_streams(0, doc_handle, 1);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_decompress_streams() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_decompress_streams(0, doc_handle);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_garbage_collect_invalid() {
pdf_garbage_collect(0, 0, 0);
pdf_garbage_collect(0, 0, -1);
}
#[test]
fn test_ffi_garbage_collect_valid() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Unused >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000145 00000 n
trailer << /Size 5 /Root 1 0 R /Info 4 0 R >>
startxref
220
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_garbage_collect(0, doc_handle, 1);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_deduplicate_objects() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R 4 0 R] /Count 2 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj
4 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000127 00000 n
0000000196 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
280
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_deduplicate_objects(0, doc_handle);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_renumber_objects() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_renumber_objects(0, doc_handle);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_remove_unused_resources() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Unused >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000145 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
220
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_remove_unused_resources(0, doc_handle);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_set_encryption_null_passwords() {
let mut opts = WriteOptions::new();
pdf_set_encryption(0, &mut opts, 5, 0, std::ptr::null(), std::ptr::null());
assert_eq!(opts.do_encrypt, 5);
}
#[test]
fn test_ffi_clean_free_string() {
pdf_clean_free_string(0, std::ptr::null_mut());
let s = CString::new("test").unwrap();
let ptr = s.into_raw();
pdf_clean_free_string(0, ptr);
}
#[test]
fn test_ffi_optimize() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_optimize_test.pdf");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
pdf_optimize(0, doc_handle, path.as_ptr());
assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_linearize() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let tmp = std::env::temp_dir().join("micropdf_linearize_test.pdf");
let path = CString::new(tmp.to_str().unwrap()).unwrap();
pdf_linearize(0, doc_handle, path.as_ptr());
assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
let _ = std::fs::remove_file(&tmp);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_create_object_streams() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R 4 0 R 5 0 R] /Count 3 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Type /Page /Parent 2 0 R >> endobj
5 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000088 00000 n
0000000118 00000 n
0000000148 00000 n
trailer << /Size 6 /Root 1 0 R >>
startxref
250
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_create_object_streams(0, doc_handle);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_remove_object_streams() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
pdf_remove_object_streams(0, doc_handle);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_rearrange_pages_valid() {
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R 4 0 R] /Count 2 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj
4 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 595 842] >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000127 00000 n
0000000196 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
270
%%EOF";
let doc = super::super::document::Document::new(pdf_data.to_vec());
let doc_handle = super::super::DOCUMENTS.insert(doc);
let pages = [1i32, 0];
pdf_rearrange_pages(0, doc_handle, 2, pages.as_ptr(), CleanStructureOption::Drop);
super::super::DOCUMENTS.remove(doc_handle);
}
#[test]
fn test_ffi_clean_with_structure_keep() {
let in_tmp = std::env::temp_dir().join("micropdf_clean_keep_in.pdf");
let out_tmp = std::env::temp_dir().join("micropdf_clean_keep_out.pdf");
let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
std::fs::write(&in_tmp, pdf_data).unwrap();
let in_path = CString::new(in_tmp.to_str().unwrap()).unwrap();
let out_path = CString::new(out_tmp.to_str().unwrap()).unwrap();
let mut opts = CleanOptions::new();
opts.structure = CleanStructureOption::Keep;
pdf_clean_file(
0,
in_path.as_ptr(),
out_path.as_ptr(),
std::ptr::null(),
&opts,
0,
std::ptr::null(),
);
assert!(std::fs::read(&out_tmp).unwrap().starts_with(b"%PDF-"));
let _ = std::fs::remove_file(&in_tmp);
let _ = std::fs::remove_file(&out_tmp);
}
#[test]
fn test_find_pattern() {
let data = b"hello world";
assert_eq!(find_pattern(data, b"world"), Some(6));
assert_eq!(find_pattern(data, b"hello"), Some(0));
assert!(find_pattern(data, b"xyz").is_none());
assert!(find_pattern(data, b"").is_none());
assert!(find_pattern(b"ab", b"abc").is_none());
}
#[test]
fn test_rfind_pattern() {
let data = b"foo bar foo";
assert_eq!(rfind_pattern(data, b"foo"), Some(8));
assert_eq!(rfind_pattern(data, b"bar"), Some(4));
}
#[test]
fn test_find_dict_end() {
let data = b"<< /Key /Value >>";
assert!(find_dict_end(data, 0).is_some());
let nested = b"<< /Outer << /Inner >> >>";
assert!(find_dict_end(nested, 0).is_some());
}
#[test]
fn test_extract_int_after() {
let data = b" 123 ";
assert_eq!(extract_int_after(data, 0), Some(123));
let neg = b" -42";
assert_eq!(extract_int_after(neg, 0), Some(-42));
}
#[test]
fn test_collect_all_object_numbers() {
let data = b"1 0 obj\n2 0 obj\n1 0 obj\n";
let objs = collect_all_object_numbers(data);
assert!(objs.contains(&1));
assert!(objs.contains(&2));
}
#[test]
fn test_object_has_filter() {
let data = b"5 0 obj << /Filter /FlateDecode >> endobj";
assert!(object_has_filter(data, 5));
}
#[test]
fn test_object_has_stream() {
let data = b"4 0 obj << /Length 5 >>\nstream\nxxxxx\nendstream\nendobj";
assert!(object_has_stream(data, 4));
}
}