use crate::error::{PdfError, Result};
use crate::parser::objects::{PdfDictionary, PdfName, PdfObject, PdfString};
use crate::parser::PdfReader;
use crate::text::TextEncoding;
use std::collections::HashMap;
use std::io::Cursor;
pub struct IncrementalFormFiller<'a> {
base_bytes: &'a [u8],
}
impl<'a> IncrementalFormFiller<'a> {
pub fn new(base_bytes: &'a [u8]) -> Self {
Self { base_bytes }
}
pub fn fill(&self, field_name: &str, value: &str) -> Result<Vec<u8>> {
self.fill_many(&[(field_name, value)])
}
pub fn fill_many(&self, fields: &[(&str, &str)]) -> Result<Vec<u8>> {
fill_many_impl(self.base_bytes, fields)
}
}
const MAX_FIELD_DEPTH: u8 = 32;
fn resolve_acroform_fields(
reader: &mut PdfReader<Cursor<&[u8]>>,
) -> Result<HashMap<String, (u32, u16)>> {
let acroform_dict = {
let catalog = reader
.catalog()
.map_err(|e| PdfError::InvalidStructure(format!("read catalog: {e}")))?
.clone();
match catalog.get("AcroForm") {
Some(PdfObject::Reference(n, g)) => reader
.get_object(*n, *g)
.map_err(|e| PdfError::InvalidStructure(format!("resolve /AcroForm: {e}")))?
.as_dict()
.cloned()
.ok_or_else(|| {
PdfError::InvalidStructure("/AcroForm is not a dictionary".to_string())
})?,
Some(PdfObject::Dictionary(d)) => d.clone(),
_ => {
return Err(PdfError::InvalidStructure(
"document has no /AcroForm".to_string(),
))
}
}
};
let field_refs: Vec<(u32, u16)> = match acroform_dict.get("Fields") {
Some(PdfObject::Array(arr)) => arr.0.iter().filter_map(|o| o.as_reference()).collect(),
_ => Vec::new(),
};
let mut out = HashMap::new();
for (n, g) in field_refs {
collect_fields(reader, (n, g), "", &mut out, 0)?;
}
Ok(out)
}
fn collect_fields(
reader: &mut PdfReader<Cursor<&[u8]>>,
node_ref: (u32, u16),
parent_prefix: &str,
out: &mut HashMap<String, (u32, u16)>,
depth: u8,
) -> Result<()> {
if depth >= MAX_FIELD_DEPTH {
return Err(PdfError::InvalidStructure(
"AcroForm field tree exceeds maximum depth".to_string(),
));
}
let node = reader
.get_object(node_ref.0, node_ref.1)
.map_err(|e| PdfError::InvalidStructure(format!("resolve field object: {e}")))?
.as_dict()
.cloned()
.ok_or_else(|| {
PdfError::InvalidStructure("field object is not a dictionary".to_string())
})?;
let partial = node
.get("T")
.and_then(|o| o.as_string())
.map(|s| String::from_utf8_lossy(s.as_bytes()).into_owned());
let full_name = match (&partial, parent_prefix.is_empty()) {
(Some(t), true) => t.clone(),
(Some(t), false) => format!("{parent_prefix}.{t}"),
(None, _) => parent_prefix.to_string(),
};
let kids: Vec<(u32, u16)> = match node.get("Kids") {
Some(PdfObject::Array(arr)) => arr.0.iter().filter_map(|o| o.as_reference()).collect(),
_ => Vec::new(),
};
let kids_are_subfields = kids.iter().any(|(n, g)| {
reader
.get_object(*n, *g)
.ok()
.and_then(|o| o.as_dict())
.map(|d| d.contains_key("T"))
.unwrap_or(false)
});
if kids.is_empty() || !kids_are_subfields {
if partial.is_some() {
out.insert(full_name, node_ref);
}
} else {
for kid in kids {
collect_fields(reader, kid, &full_name, out, depth + 1)?;
}
}
Ok(())
}
fn fill_many_impl(base_bytes: &[u8], fields: &[(&str, &str)]) -> Result<Vec<u8>> {
let mut reader = PdfReader::new(Cursor::new(base_bytes))
.map_err(|e| PdfError::InvalidStructure(format!("parse base PDF: {e}")))?;
if reader.is_encrypted() {
return Err(PdfError::PermissionDenied(
"incremental form fill is not supported on encrypted PDFs".to_string(),
));
}
let base_startxref = reader.trailer().xref_offset;
let base_root = reader
.trailer()
.root()
.map_err(|e| PdfError::InvalidStructure(format!("base /Root: {e}")))?;
let base_size = reader
.trailer()
.size()
.map_err(|e| PdfError::InvalidStructure(format!("base /Size: {e}")))?;
let base_id_first: Option<Vec<u8>> = first_id_bytes(reader.trailer().id());
let (acro_ref, mut acro_dict) = resolve_acroform_object(&mut reader)?;
let field_map = resolve_acroform_fields(&mut reader)?;
let mut modified: Vec<(u32, u16, PdfDictionary)> = Vec::new();
for (name, value) in fields {
let (num, gen) = field_map
.get(*name)
.copied()
.ok_or_else(|| PdfError::FieldNotFound((*name).to_string()))?;
let mut field_dict = reader
.get_object(num, gen)
.map_err(|e| PdfError::InvalidStructure(format!("resolve field {name}: {e}")))?
.as_dict()
.cloned()
.ok_or_else(|| {
PdfError::InvalidStructure(format!("field {name} is not a dictionary"))
})?;
field_dict.insert(
"V".to_string(),
PdfObject::String(PdfString(value.as_bytes().to_vec())),
);
match modified.iter_mut().find(|(n, g, _)| *n == num && *g == gen) {
Some(slot) => slot.2 = field_dict,
None => modified.push((num, gen, field_dict)),
}
}
acro_dict.insert("NeedAppearances".to_string(), PdfObject::Boolean(true));
let acro_da = da_of(&acro_dict);
let widgets_by_parent = collect_widgets_by_parent(&mut reader);
let mut new_streams: Vec<(u32, Vec<u8>, [f64; 4], Vec<u8>)> = Vec::new();
let mut extra_widgets: Vec<(u32, u16, PdfDictionary)> = Vec::new();
let mut next_id = base_size;
for (num, gen, dict) in modified.iter_mut() {
let ft = dict
.get("FT")
.and_then(|o| o.as_name())
.map(|n| n.0.clone());
match ft.as_deref() {
Some("Tx") => {
synthesize_text_field_ap(
(*num, *gen),
dict,
&acro_da,
&mut reader,
&widgets_by_parent,
&mut next_id,
&mut new_streams,
&mut extra_widgets,
)?;
}
Some("Btn") => {
if let Some(PdfObject::String(v)) = dict.get("V").cloned() {
let state = String::from_utf8_lossy(v.as_bytes()).into_owned();
dict.insert("AS".to_string(), PdfObject::Name(PdfName(state)));
}
}
_ => {}
}
}
let total_size = next_id;
let mut out = Vec::with_capacity(base_bytes.len() + 1024);
out.extend_from_slice(base_bytes);
let mut changed: Vec<(u32, u16, u64)> = Vec::new();
for (num, gen, dict) in &modified {
let offset = out.len() as u64;
write_indirect_object(&mut out, *num, *gen, dict)?;
changed.push((*num, *gen, offset));
}
for (num, gen, dict) in &extra_widgets {
let offset = out.len() as u64;
write_indirect_object(&mut out, *num, *gen, dict)?;
changed.push((*num, *gen, offset));
}
let acro_offset = out.len() as u64;
write_indirect_object(&mut out, acro_ref.0, acro_ref.1, &acro_dict)?;
changed.push((acro_ref.0, acro_ref.1, acro_offset));
for (id, content, bbox, resources) in &new_streams {
let offset = out.len() as u64;
write_indirect_stream_object(&mut out, *id, 0, content, *bbox, resources)?;
changed.push((*id, 0, offset));
}
let xref_pos = out.len() as u64;
let id_pair = base_id_first.map(|first| {
let second = derive_revision_id(&first, fields, xref_pos);
(first, second)
});
out.extend_from_slice(&write_partial_xref_section(&changed));
out.extend_from_slice(&write_incremental_trailer(
base_startxref,
base_root,
total_size,
xref_pos,
id_pair,
));
Ok(out)
}
fn resolve_acroform_object(
reader: &mut PdfReader<Cursor<&[u8]>>,
) -> Result<((u32, u16), PdfDictionary)> {
let catalog = reader
.catalog()
.map_err(|e| PdfError::InvalidStructure(format!("read catalog: {e}")))?
.clone();
match catalog.get("AcroForm") {
Some(PdfObject::Reference(n, g)) => {
let dict = reader
.get_object(*n, *g)
.map_err(|e| PdfError::InvalidStructure(format!("resolve /AcroForm: {e}")))?
.as_dict()
.cloned()
.ok_or_else(|| {
PdfError::InvalidStructure("/AcroForm is not a dictionary".to_string())
})?;
Ok(((*n, *g), dict))
}
_ => Err(PdfError::InvalidStructure(
"/AcroForm must be an indirect reference for incremental fill".to_string(),
)),
}
}
fn first_id_bytes(id: Option<&PdfObject>) -> Option<Vec<u8>> {
match id {
Some(PdfObject::Array(arr)) => arr
.0
.first()
.and_then(|o| o.as_string())
.map(|s| s.as_bytes().to_vec()),
_ => None,
}
}
fn da_font_to_base_font(name: &str) -> &str {
match name {
"Helv" => "Helvetica",
"HeBO" | "HeBo" => "Helvetica-Bold",
"HeOb" => "Helvetica-Oblique",
"Cour" => "Courier",
"CoBO" | "CoBo" => "Courier-Bold",
"TiRo" => "Times-Roman",
"TiBo" => "Times-Bold",
"TiIt" => "Times-Italic",
"Symb" => "Symbol",
"ZaDb" => "ZapfDingbats",
other => other,
}
}
fn rect_of(dict: &PdfDictionary) -> Option<[f64; 4]> {
let arr = dict.get("Rect").and_then(|o| o.as_array())?;
if arr.0.len() != 4 {
return None;
}
let mut r = [0.0f64; 4];
for (i, slot) in r.iter_mut().enumerate() {
*slot = arr.0[i].as_real()?;
}
Some(r)
}
fn da_of(dict: &PdfDictionary) -> Option<String> {
dict.get("DA")
.and_then(|o| o.as_string())
.map(|s| String::from_utf8_lossy(s.as_bytes()).into_owned())
}
fn build_text_ap(
value: &str,
da_font_name: &str,
da_font_size: f64,
rect: [f64; 4],
) -> Result<(Vec<u8>, [f64; 4], Vec<u8>)> {
let width = (rect[2] - rect[0]).abs();
let height = (rect[3] - rect[1]).abs();
let font_size = if da_font_size > 0.0 {
da_font_size
} else {
12.0
};
let encoded = TextEncoding::WinAnsiEncoding
.encode_strict(value)
.map_err(|ch| {
PdfError::EncodingError(format!(
"form value contains character {:?} (U+{:04X}) not representable in \
WinAnsiEncoding used by built-in font {:?}; an embedded Type0 font \
would be required (not yet supported on the incremental fill path)",
ch, ch as u32, da_font_name,
))
})?;
let pad = 2.0;
let text_y = (height - font_size) / 2.0 + font_size * 0.3;
let mut content = Vec::new();
content.extend_from_slice(b"q\n");
content.extend_from_slice(b"BT\n");
content
.extend_from_slice(format!("/{da_font_name} {} Tf\n", format_real(font_size)).as_bytes());
content.extend_from_slice(b"0 g\n");
content
.extend_from_slice(format!("{} {} Td\n", format_real(pad), format_real(text_y)).as_bytes());
write_literal_string(&mut content, &encoded);
content.extend_from_slice(b" Tj\n");
content.extend_from_slice(b"ET\n");
content.extend_from_slice(b"Q");
let mut font_entry = PdfDictionary::new();
font_entry.insert(
"Type".to_string(),
PdfObject::Name(PdfName("Font".to_string())),
);
font_entry.insert(
"Subtype".to_string(),
PdfObject::Name(PdfName("Type1".to_string())),
);
font_entry.insert(
"BaseFont".to_string(),
PdfObject::Name(PdfName(da_font_to_base_font(da_font_name).to_string())),
);
let mut font_dict = PdfDictionary::new();
font_dict.insert(da_font_name.to_string(), PdfObject::Dictionary(font_entry));
let mut resources = PdfDictionary::new();
resources.insert("Font".to_string(), PdfObject::Dictionary(font_dict));
let mut resources_bytes = Vec::new();
write_dict(&mut resources_bytes, &resources)?;
Ok((content, [0.0, 0.0, width, height], resources_bytes))
}
fn resolve_da(
widget_da: Option<&str>,
field_da: Option<&str>,
acro_da: Option<&str>,
) -> (String, f64) {
widget_da
.and_then(parse_da_string)
.or_else(|| field_da.and_then(parse_da_string))
.or_else(|| acro_da.and_then(parse_da_string))
.unwrap_or_else(|| ("Helv".to_string(), 12.0))
}
fn collect_widgets_by_parent(
reader: &mut PdfReader<Cursor<&[u8]>>,
) -> HashMap<(u32, u16), Vec<(u32, u16)>> {
let mut map: HashMap<(u32, u16), Vec<(u32, u16)>> = HashMap::new();
let root = match reader.pages() {
Ok(p) => p.clone(),
Err(_) => return map,
};
let mut stack: Vec<(u32, u16)> = match root.get("Kids") {
Some(PdfObject::Array(arr)) => arr.0.iter().filter_map(|o| o.as_reference()).collect(),
_ => Vec::new(),
};
let mut visited: std::collections::HashSet<(u32, u16)> = std::collections::HashSet::new();
let mut budget = 100_000u32;
while let Some(node_ref) = stack.pop() {
if budget == 0 || !visited.insert(node_ref) {
continue;
}
budget -= 1;
let node = match reader
.get_object(node_ref.0, node_ref.1)
.ok()
.and_then(|o| o.as_dict().cloned())
{
Some(d) => d,
None => continue,
};
if let Some(PdfObject::Array(kids)) = node.get("Kids") {
let is_pages = node
.get("Type")
.and_then(|o| o.as_name())
.map(|n| n.0 == "Pages")
.unwrap_or(false);
if is_pages || node.get("Count").is_some() {
for k in &kids.0 {
if let Some(r) = k.as_reference() {
stack.push(r);
}
}
continue;
}
}
if let Some(PdfObject::Array(annots)) = node.get("Annots") {
let annot_refs: Vec<(u32, u16)> =
annots.0.iter().filter_map(|o| o.as_reference()).collect();
for (an, ag) in annot_refs {
if let Some(annot) = reader
.get_object(an, ag)
.ok()
.and_then(|o| o.as_dict().cloned())
{
if let Some(parent) = annot.get("Parent").and_then(|o| o.as_reference()) {
map.entry(parent).or_default().push((an, ag));
}
}
}
}
}
map
}
fn synthesize_text_field_ap(
field_id: (u32, u16),
field_dict: &mut PdfDictionary,
acro_da: &Option<String>,
reader: &mut PdfReader<Cursor<&[u8]>>,
widgets_by_parent: &HashMap<(u32, u16), Vec<(u32, u16)>>,
next_id: &mut u32,
new_streams: &mut Vec<(u32, Vec<u8>, [f64; 4], Vec<u8>)>,
extra_widgets: &mut Vec<(u32, u16, PdfDictionary)>,
) -> Result<()> {
let value = match field_dict.get("V") {
Some(PdfObject::String(s)) => String::from_utf8_lossy(s.as_bytes()).into_owned(),
_ => return Ok(()),
};
let field_da = da_of(field_dict);
if let Some(rect) = rect_of(field_dict) {
let (font_name, font_size) =
resolve_da(field_da.as_deref(), field_da.as_deref(), acro_da.as_deref());
let (content, bbox, resources) = build_text_ap(&value, &font_name, font_size, rect)?;
let ap_id = *next_id;
*next_id += 1;
new_streams.push((ap_id, content, bbox, resources));
let mut ap = PdfDictionary::new();
ap.insert("N".to_string(), PdfObject::Reference(ap_id, 0));
field_dict.insert("AP".to_string(), PdfObject::Dictionary(ap));
return Ok(());
}
let mut widget_refs: Vec<(u32, u16)> = match field_dict.get("Kids") {
Some(PdfObject::Array(arr)) => arr.0.iter().filter_map(|o| o.as_reference()).collect(),
_ => Vec::new(),
};
if widget_refs.is_empty() {
if let Some(refs) = widgets_by_parent.get(&field_id) {
widget_refs = refs.clone();
}
}
for (kn, kg) in widget_refs {
let mut kid = match reader
.get_object(kn, kg)
.ok()
.and_then(|o| o.as_dict().cloned())
{
Some(d) => d,
None => continue,
};
let is_widget = kid
.get("Subtype")
.and_then(|o| o.as_name())
.map(|n| n.0 == "Widget")
.unwrap_or(false);
let rect = match rect_of(&kid) {
Some(r) if is_widget => r,
_ => continue,
};
let widget_da = da_of(&kid);
let (font_name, font_size) = resolve_da(
widget_da.as_deref(),
field_da.as_deref(),
acro_da.as_deref(),
);
let (content, bbox, resources) = build_text_ap(&value, &font_name, font_size, rect)?;
let ap_id = *next_id;
*next_id += 1;
new_streams.push((ap_id, content, bbox, resources));
let mut ap = PdfDictionary::new();
ap.insert("N".to_string(), PdfObject::Reference(ap_id, 0));
kid.insert("AP".to_string(), PdfObject::Dictionary(ap));
extra_widgets.push((kn, kg, kid));
}
Ok(())
}
fn parse_da_string(da: &str) -> Option<(String, f64)> {
let tokens: Vec<&str> = da.split_whitespace().collect();
let tf_idx = tokens.iter().position(|t| *t == "Tf")?;
if tf_idx < 2 {
return None;
}
let name_tok = tokens[tf_idx - 2];
let size_tok = tokens[tf_idx - 1];
let name = name_tok.strip_prefix('/')?;
if name.is_empty() {
return None;
}
let size: f64 = size_tok.parse().ok()?;
Some((name.to_string(), size))
}
fn write_indirect_stream_object(
out: &mut Vec<u8>,
num: u32,
gen: u16,
content: &[u8],
bbox: [f64; 4],
resources_snippet: &[u8],
) -> Result<()> {
out.extend_from_slice(format!("{num} {gen} obj\n").as_bytes());
out.extend_from_slice(b"<< /Type /XObject /Subtype /Form /BBox [");
out.extend_from_slice(
format!(
"{} {} {} {}",
format_real(bbox[0]),
format_real(bbox[1]),
format_real(bbox[2]),
format_real(bbox[3])
)
.as_bytes(),
);
out.extend_from_slice(b"] /Resources ");
out.extend_from_slice(resources_snippet);
out.extend_from_slice(format!(" /Length {} >>\n", content.len()).as_bytes());
out.extend_from_slice(b"stream\n");
out.extend_from_slice(content);
out.extend_from_slice(b"\nendstream\nendobj\n");
Ok(())
}
fn write_indirect_object(
out: &mut Vec<u8>,
num: u32,
gen: u16,
dict: &PdfDictionary,
) -> Result<()> {
out.extend_from_slice(format!("{num} {gen} obj\n").as_bytes());
write_object_value(out, &PdfObject::Dictionary(dict.clone()))?;
out.extend_from_slice(b"\nendobj\n");
Ok(())
}
fn write_object_value(out: &mut Vec<u8>, obj: &PdfObject) -> Result<()> {
match obj {
PdfObject::Null => out.extend_from_slice(b"null"),
PdfObject::Boolean(b) => out.extend_from_slice(if *b { b"true" } else { b"false" }),
PdfObject::Integer(i) => out.extend_from_slice(i.to_string().as_bytes()),
PdfObject::Real(f) => out.extend_from_slice(format_real(*f).as_bytes()),
PdfObject::String(s) => write_literal_string(out, s.as_bytes()),
PdfObject::Name(n) => write_name(out, n),
PdfObject::Reference(num, gen) => {
out.extend_from_slice(format!("{num} {gen} R").as_bytes())
}
PdfObject::Array(arr) => {
out.extend_from_slice(b"[");
for (i, item) in arr.0.iter().enumerate() {
if i > 0 {
out.extend_from_slice(b" ");
}
write_object_value(out, item)?;
}
out.extend_from_slice(b"]");
}
PdfObject::Dictionary(d) => write_dict(out, d)?,
PdfObject::Stream(_) => {
return Err(PdfError::InvalidStructure(
"unexpected stream object in AcroForm field dictionary".to_string(),
))
}
}
Ok(())
}
fn write_dict(out: &mut Vec<u8>, dict: &PdfDictionary) -> Result<()> {
out.extend_from_slice(b"<< ");
let mut keys: Vec<&PdfName> = dict.0.keys().collect();
keys.sort_by(|a, b| a.0.cmp(&b.0));
for key in keys {
write_name(out, key);
out.extend_from_slice(b" ");
write_object_value(out, &dict.0[key])?;
out.extend_from_slice(b" ");
}
out.extend_from_slice(b">>");
Ok(())
}
fn write_name(out: &mut Vec<u8>, name: &PdfName) {
out.extend_from_slice(b"/");
for &b in name.0.as_bytes() {
if b.is_ascii_alphanumeric()
|| matches!(
b,
b'+' | b'-' | b'.' | b'_' | b'@' | b'$' | b':' | b';' | b'*' | b'?'
)
{
out.push(b);
} else {
out.extend_from_slice(format!("#{b:02X}").as_bytes());
}
}
}
fn write_literal_string(out: &mut Vec<u8>, bytes: &[u8]) {
out.push(b'(');
for &b in bytes {
match b {
b'(' => out.extend_from_slice(b"\\("),
b')' => out.extend_from_slice(b"\\)"),
b'\\' => out.extend_from_slice(b"\\\\"),
b'\n' => out.extend_from_slice(b"\\n"),
b'\r' => out.extend_from_slice(b"\\r"),
b'\t' => out.extend_from_slice(b"\\t"),
0x20..=0x7E => out.push(b),
_ => out.extend_from_slice(format!("\\{b:03o}").as_bytes()),
}
}
out.push(b')');
}
fn format_real(f: f64) -> String {
if !f.is_finite() {
return "0".to_string();
}
if f == f.trunc() {
return format!("{}", f as i64);
}
let mut s = format!("{f:.6}");
while s.ends_with('0') {
s.pop();
}
if s.ends_with('.') {
s.pop();
}
s
}
fn write_partial_xref_section(changed: &[(u32, u16, u64)]) -> Vec<u8> {
let mut entries = changed.to_vec();
entries.sort_by_key(|(num, _, _)| *num);
let mut out = Vec::new();
out.extend_from_slice(b"xref\n");
let mut i = 0;
while i < entries.len() {
let start = entries[i].0;
let mut j = i;
while j + 1 < entries.len() && entries[j + 1].0 == entries[j].0 + 1 {
j += 1;
}
let count = j - i + 1;
out.extend_from_slice(format!("{start} {count}\n").as_bytes());
for entry in &entries[i..=j] {
out.extend_from_slice(format!("{:010} {:05} n \n", entry.2, entry.1).as_bytes());
}
i = j + 1;
}
out
}
fn write_incremental_trailer(
base_prev_xref: u64,
base_root: (u32, u16),
base_size: u32,
new_xref_pos: u64,
id_pair: Option<(Vec<u8>, Vec<u8>)>,
) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(b"trailer\n<< ");
out.extend_from_slice(format!("/Size {base_size} ").as_bytes());
out.extend_from_slice(format!("/Root {} {} R ", base_root.0, base_root.1).as_bytes());
out.extend_from_slice(format!("/Prev {base_prev_xref} ").as_bytes());
if let Some((first, second)) = id_pair {
let hex = |bytes: &[u8]| -> String { bytes.iter().map(|b| format!("{b:02X}")).collect() };
out.extend_from_slice(format!("/ID [<{}> <{}>] ", hex(&first), hex(&second)).as_bytes());
}
out.extend_from_slice(b">>\n");
out.extend_from_slice(b"startxref\n");
out.extend_from_slice(format!("{new_xref_pos}\n").as_bytes());
out.extend_from_slice(b"%%EOF\n");
out
}
fn derive_revision_id(first: &[u8], fields: &[(&str, &str)], xref_pos: u64) -> Vec<u8> {
let mut buf = Vec::new();
buf.extend_from_slice(first);
for (name, value) in fields {
buf.extend_from_slice(name.as_bytes());
buf.push(0);
buf.extend_from_slice(value.as_bytes());
buf.push(0);
}
buf.extend_from_slice(&xref_pos.to_le_bytes());
md5::compute(&buf).0.to_vec()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn partial_xref_groups_contiguous_subsections() {
let bytes = write_partial_xref_section(&[(5, 0, 1024), (7, 0, 2048)]);
let s = String::from_utf8(bytes).unwrap();
assert!(s.starts_with("xref\n"), "must start with xref keyword");
assert!(s.contains("5 1\n0000001024 00000 n \n"), "obj 5 subsection");
assert!(s.contains("7 1\n0000002048 00000 n \n"), "obj 7 subsection");
assert!(!s.contains("\n6 "), "gap object 6 must not appear");
}
#[test]
fn partial_xref_merges_adjacent_ids() {
let bytes = write_partial_xref_section(&[(7, 0, 2048), (5, 0, 1024), (6, 0, 1536)]);
let s = String::from_utf8(bytes).unwrap();
assert!(
s.contains("5 3\n"),
"contiguous ids form one subsection: {s}"
);
assert!(s.contains("0000001024 00000 n \n0000001536 00000 n \n0000002048 00000 n \n"));
}
#[test]
fn incremental_trailer_carries_root_prev_size() {
let bytes = write_incremental_trailer(312, (1, 0), 8, 5000, None);
let s = String::from_utf8(bytes).unwrap();
assert!(s.contains("/Prev 312"), "must chain /Prev: {s}");
assert!(s.contains("/Root 1 0 R"), "must reuse base /Root");
assert!(s.contains("/Size 8"), "must carry /Size");
assert!(s.ends_with("startxref\n5000\n%%EOF\n"), "suffix: {s}");
assert!(!s.contains("/Info"), "no /Info in incremental trailer");
}
#[test]
fn incremental_trailer_emits_distinct_id_pair() {
let bytes = write_incremental_trailer(
10,
(2, 0),
5,
99,
Some((vec![0xDE, 0xAD], vec![0xBE, 0xEF])),
);
let s = String::from_utf8(bytes).unwrap();
assert!(s.contains("/ID [<DEAD> <BEEF>]"), "distinct id pair: {s}");
}
#[test]
fn revision_id_differs_from_permanent_and_is_deterministic() {
let first = vec![1u8, 2, 3, 4];
let a = derive_revision_id(&first, &[("name", "Ada")], 100);
let b = derive_revision_id(&first, &[("name", "Ada")], 100);
let c = derive_revision_id(&first, &[("name", "Grace")], 100);
assert_eq!(a, b, "same inputs -> same id (reproducible)");
assert_ne!(
a, first,
"second element must differ from the permanent one"
);
assert_ne!(a, c, "different content -> different revision id");
assert_eq!(a.len(), 16, "PDF /ID elements are 16 bytes");
}
#[test]
fn literal_string_escapes_reserved_bytes() {
let mut out = Vec::new();
write_literal_string(&mut out, b"a(b)c\\d");
assert_eq!(out, b"(a\\(b\\)c\\\\d)");
}
#[test]
fn parse_da_string_extracts_font_and_size() {
assert_eq!(
parse_da_string("/Helv 12 Tf 0 g"),
Some(("Helv".to_string(), 12.0))
);
assert_eq!(
parse_da_string("/Helvetica-Bold 9.5 Tf 0 g"),
Some(("Helvetica-Bold".to_string(), 9.5))
);
assert_eq!(
parse_da_string("0 0 1 rg /Cour 8 Tf"),
Some(("Cour".to_string(), 8.0))
);
assert_eq!(parse_da_string("/F1 0 Tf"), Some(("F1".to_string(), 0.0)));
assert_eq!(parse_da_string(""), None);
assert_eq!(parse_da_string("garbage"), None);
assert_eq!(parse_da_string("Tf"), None);
}
#[test]
fn write_indirect_stream_object_produces_valid_form_xobject() {
let content = b"q BT /Helv 12 Tf 2 5 Td (Hi) Tj ET Q";
let mut resources = Vec::new();
resources.extend_from_slice(b"<< /Font << /Helv << /Type /Font >> >> >>");
let mut out = Vec::new();
write_indirect_stream_object(&mut out, 9, 0, content, [0.0, 0.0, 200.0, 20.0], &resources)
.unwrap();
let s = String::from_utf8(out.clone()).unwrap();
assert!(s.starts_with("9 0 obj\n<<"), "object header: {s}");
assert!(s.contains("/Type /XObject"), "must be XObject: {s}");
assert!(s.contains("/Subtype /Form"), "must be Form: {s}");
assert!(s.contains("/BBox [0 0 200 20]"), "bbox: {s}");
assert!(
s.contains(&format!("/Length {}", content.len())),
"length must equal raw content byte count: {s}"
);
assert!(s.contains("/Resources << /Font"), "resources inlined: {s}");
let marker = format!(
"stream\n{}\nendstream\nendobj\n",
String::from_utf8_lossy(content)
);
assert!(s.ends_with(&marker), "stream framing: {s}");
let mut out2 = Vec::new();
write_indirect_stream_object(
&mut out2,
9,
0,
content,
[0.0, 0.0, 200.0, 20.0],
&resources,
)
.unwrap();
assert_eq!(out, out2, "serialization must be deterministic");
}
}