use crate::error::{Error, Result};
use crate::object::Object;
use crate::parser::parse_object;
use std::collections::{HashMap, HashSet};
use std::io::{Read, Seek, SeekFrom};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum XRefEntryType {
Free,
Uncompressed,
Compressed,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XRefEntry {
pub entry_type: XRefEntryType,
pub offset: u64,
pub generation: u16,
pub in_use: bool,
}
impl XRefEntry {
pub fn new(offset: u64, generation: u16, in_use: bool) -> Self {
Self {
entry_type: if in_use {
XRefEntryType::Uncompressed
} else {
XRefEntryType::Free
},
offset,
generation,
in_use,
}
}
pub fn uncompressed(offset: u64, generation: u16) -> Self {
Self {
entry_type: XRefEntryType::Uncompressed,
offset,
generation,
in_use: true,
}
}
pub fn compressed(stream_obj_num: u64, index_in_stream: u16) -> Self {
Self {
entry_type: XRefEntryType::Compressed,
offset: stream_obj_num,
generation: index_in_stream,
in_use: true,
}
}
pub fn free(next_free: u64, generation: u16) -> Self {
Self {
entry_type: XRefEntryType::Free,
offset: next_free,
generation,
in_use: false,
}
}
}
#[derive(Debug, Clone)]
pub struct CrossRefTable {
pub(crate) entries: HashMap<u32, XRefEntry>,
trailer: Option<HashMap<String, Object>>,
}
impl CrossRefTable {
pub fn new() -> Self {
Self {
entries: HashMap::new(),
trailer: None,
}
}
pub fn set_trailer(&mut self, trailer: HashMap<String, Object>) {
self.trailer = Some(trailer);
}
pub fn trailer(&self) -> Option<&HashMap<String, Object>> {
self.trailer.as_ref()
}
pub fn add_entry(&mut self, object_number: u32, entry: XRefEntry) {
self.entries.insert(object_number, entry);
}
pub fn get(&self, object_number: u32) -> Option<&XRefEntry> {
self.entries.get(&object_number)
}
pub fn contains(&self, object_number: u32) -> bool {
self.entries.contains_key(&object_number)
}
pub fn all_object_numbers(&self) -> impl Iterator<Item = u32> + '_ {
self.entries.keys().copied()
}
pub fn merge_from(&mut self, other: CrossRefTable) {
for (obj_num, entry) in other.entries {
self.entries.entry(obj_num).or_insert(entry);
}
if self.trailer.is_none() && other.trailer.is_some() {
self.trailer = other.trailer;
}
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn shift_offsets(&mut self, delta: u64) {
for entry in self.entries.values_mut() {
if entry.in_use && entry.entry_type == XRefEntryType::Uncompressed {
entry.offset += delta;
}
}
}
}
impl Default for CrossRefTable {
fn default() -> Self {
Self::new()
}
}
pub fn find_xref_offset<R: Read + Seek>(reader: &mut R) -> Result<u64> {
let file_size = reader.seek(SeekFrom::End(0))?;
let read_size = std::cmp::min(2048, file_size);
reader.seek(SeekFrom::End(-(read_size as i64)))?;
let mut buf = Vec::new();
reader.take(read_size).read_to_end(&mut buf)?;
let content = String::from_utf8_lossy(&buf);
let startxref_pos = content.rfind("startxref").ok_or(Error::InvalidXref)?;
let after_keyword = &content[startxref_pos + 9..];
let lines = split_lines(after_keyword);
for line in lines {
let trimmed = line.trim();
if !trimmed.is_empty() && trimmed.chars().all(|c| c.is_ascii_digit()) {
return trimmed.parse::<u64>().map_err(|_| Error::InvalidXref);
}
}
Err(Error::InvalidXref)
}
pub fn parse_xref<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<CrossRefTable> {
parse_xref_iterative(reader, offset)
}
fn find_stream_length(data: &[u8]) -> Option<usize> {
let keyword = b"/Length";
let pos = data.windows(keyword.len()).position(|w| w == keyword)?;
let after = &data[pos + keyword.len()..];
let start = after.iter().position(|&b| !b.is_ascii_whitespace())?;
let after = &after[start..];
if after.first()?.is_ascii_digit() {
let end = after
.iter()
.position(|b| !b.is_ascii_digit())
.unwrap_or(after.len());
let num_str = std::str::from_utf8(&after[..end]).ok()?;
num_str.parse::<usize>().ok()
} else {
None
}
}
fn find_actual_xref_offset<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<u64> {
reader.seek(SeekFrom::Start(offset))?;
let mut peek = [0u8; 64];
let n = reader.read(&mut peek)?;
let peek_str = String::from_utf8_lossy(&peek[..n]);
let trimmed = peek_str.trim_start();
if trimmed.starts_with("xref") || trimmed.chars().next().is_some_and(|c| c.is_ascii_digit()) {
return Ok(offset);
}
const SCAN_BACK: u64 = 32;
const SCAN_FWD: u64 = 64;
let scan_start = offset.saturating_sub(SCAN_BACK);
let scan_len = (SCAN_BACK + SCAN_FWD) as usize;
reader.seek(SeekFrom::Start(scan_start))?;
let mut buf = vec![0u8; scan_len];
let bytes_read = reader.read(&mut buf)?;
buf.truncate(bytes_read);
for i in 0..bytes_read.saturating_sub(3) {
if &buf[i..i + 4] == b"xref" {
if i == 0 || buf[i - 1] == b'\r' || buf[i - 1] == b'\n' {
let found_offset = scan_start + i as u64;
log::debug!(
"Corrected xref offset: {} -> {} (found 'xref' keyword)",
offset,
found_offset
);
return Ok(found_offset);
}
}
}
for i in 0..bytes_read {
let at_line_start = i == 0 || buf[i - 1] == b'\r' || buf[i - 1] == b'\n';
if !at_line_start || !buf[i].is_ascii_digit() {
continue;
}
let remaining = &buf[i..bytes_read];
let remaining_str = String::from_utf8_lossy(remaining);
if let Some(obj_pos) = remaining_str.find(" obj") {
let before_obj = &remaining_str[..obj_pos];
let parts: Vec<&str> = before_obj.split_whitespace().collect();
if parts.len() == 2
&& parts[0].chars().all(|c| c.is_ascii_digit())
&& parts[1].chars().all(|c| c.is_ascii_digit())
{
let found_offset = scan_start + i as u64;
log::debug!(
"Corrected xref offset: {} -> {} (found object header '{} obj')",
offset,
found_offset,
before_obj.trim()
);
return Ok(found_offset);
}
}
}
log::debug!("Could not find xref near offset {}, using original", offset);
Ok(offset)
}
fn parse_xref_iterative<R: Read + Seek>(
reader: &mut R,
start_offset: u64,
) -> Result<CrossRefTable> {
let mut visited = HashSet::new();
let mut offset = start_offset;
let mut result_xref: Option<CrossRefTable> = None;
loop {
if !visited.insert(offset) {
log::warn!(
"Circular /Prev chain detected at offset {}, stopping xref traversal",
offset
);
break;
}
let actual_offset = find_actual_xref_offset(reader, offset)?;
reader.seek(SeekFrom::Start(actual_offset))?;
let mut peek_buf = [0u8; 64];
let bytes_read = reader.read(&mut peek_buf)?;
reader.seek(SeekFrom::Start(actual_offset))?;
let peek_str = String::from_utf8_lossy(&peek_buf[..bytes_read]);
let trimmed = peek_str.trim_start();
log::debug!(
"Parsing xref at offset {} (original: {}), peek: {:?} [chain depth: {}]",
actual_offset,
offset,
crate::utils::safe_prefix(&peek_str, 15),
visited.len()
);
let xref = if trimmed.starts_with("xref") {
log::debug!("Detected traditional xref at offset {}", actual_offset);
parse_traditional_xref(reader, actual_offset)?
} else if trimmed.chars().next().is_some_and(|c| c.is_ascii_digit()) {
match parse_xref_stream(reader, actual_offset) {
Ok(xref) => xref,
Err(e) => {
log::debug!("Failed to parse as xref stream: {}", e);
reader.seek(SeekFrom::Start(actual_offset))?;
match parse_traditional_xref(reader, actual_offset) {
Ok(xref) => xref,
Err(trad_err) => {
log::debug!("Failed to parse as traditional xref: {}", trad_err);
return Err(Error::InvalidPdf(format!(
"failed to parse xref (stream attempt: {}, traditional attempt: {})",
e, trad_err
)));
},
}
},
}
} else {
log::debug!(
"Xref at offset {} starts with unexpected data: {:?}",
actual_offset,
crate::utils::safe_prefix(trimmed, 20)
);
return Err(Error::InvalidXref);
};
let prev_offset = xref
.trailer()
.and_then(|t| t.get("Prev"))
.and_then(|o| o.as_integer())
.map(|v| v as u64);
match &mut result_xref {
Some(result) => result.merge_from(xref),
None => result_xref = Some(xref),
}
match prev_offset {
Some(prev) => {
log::debug!(
"Following /Prev pointer to offset {} from xref at offset {}",
prev,
offset
);
offset = prev;
},
None => break,
}
}
result_xref.ok_or(Error::InvalidXref)
}
fn parse_traditional_xref<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<CrossRefTable> {
log::debug!("parse_traditional_xref: Starting at offset {}", offset);
reader.seek(SeekFrom::Start(offset))?;
let lines = read_until_trailer(reader).map_err(|e| {
log::error!("Failed to read xref lines: {}", e);
Error::InvalidXref
})?;
log::debug!("parse_traditional_xref: Read {} lines", lines.len());
let mut xref = CrossRefTable::new();
let mut line_idx = 0;
let mut skipped_lines = 0;
const MAX_SKIP_LINES: usize = 10;
while line_idx < lines.len() {
let trimmed = lines[line_idx].trim();
if trimmed.is_empty() {
line_idx += 1;
continue; }
if trimmed.starts_with("xref") {
line_idx += 1;
break; }
skipped_lines += 1;
if skipped_lines > MAX_SKIP_LINES {
return Err(Error::InvalidXref);
}
log::debug!("Skipping unexpected line before xref: {:?}", trimmed);
line_idx += 1;
}
while line_idx < lines.len() {
let trimmed = lines[line_idx].trim();
line_idx += 1;
if trimmed.starts_with("trailer") {
break;
}
if trimmed.is_empty() || trimmed.starts_with('%') {
continue;
}
let parts: Vec<&str> = trimmed.split_whitespace().collect();
if parts.len() != 2 {
continue; }
let start_obj: u32 = parts[0].parse().map_err(|_| Error::InvalidXref)?;
let count: u32 = parts[1].parse().map_err(|_| Error::InvalidXref)?;
if count > 1_000_000 {
return Err(Error::InvalidPdf("xref subsection count exceeds limit".to_string()));
}
let mut i = 0;
while i < count && line_idx < lines.len() {
let trimmed = lines[line_idx].trim();
line_idx += 1;
if trimmed.is_empty() {
continue;
}
if trimmed.starts_with("trailer") {
log::warn!("Expected {} entries but only found {} before trailer", count, i);
line_idx -= 1; break;
}
let parts: Vec<&str> = trimmed.split_whitespace().collect();
if parts.len() < 3 {
log::warn!("Malformed xref entry (too few parts) at index {}: {:?}", i, trimmed);
let entry = XRefEntry::free(0, 65535);
xref.add_entry(start_obj + i, entry);
i += 1;
continue;
}
if parts.len() > 3 {
log::debug!("XRef entry has {} parts (expected 3): {:?}", parts.len(), trimmed);
}
let offset: u64 = match parts[0].parse() {
Ok(v) => v,
Err(_) => {
log::warn!("Failed to parse offset at index {}: {:?}", i, parts[0]);
let entry = XRefEntry::free(0, 65535);
xref.add_entry(start_obj + i, entry);
i += 1;
continue;
},
};
let generation: u16 = match parts[1].parse() {
Ok(v) => v,
Err(_) => {
log::warn!("Failed to parse generation at index {}: {:?}", i, parts[1]);
let entry = XRefEntry::free(0, 65535);
xref.add_entry(start_obj + i, entry);
i += 1;
continue;
},
};
let type_flag = parts[2];
let type_flag_normalized = type_flag.to_lowercase();
let type_char = type_flag_normalized.chars().next().unwrap_or('?');
let in_use = match type_char {
'n' => true,
'f' => false,
_ => {
log::warn!(
"Invalid type flag at index {}: {:?}, treating as free",
i,
type_flag
);
false
},
};
let entry = XRefEntry::new(offset, generation, in_use);
xref.add_entry(start_obj + i, entry);
i += 1;
}
}
let remaining_text: String = lines[line_idx..].join("\n");
if !remaining_text.trim().is_empty() {
let trimmed = remaining_text.trim();
if trimmed.starts_with("<<")
|| trimmed.starts_with("<< ")
|| trimmed.starts_with("<<\n")
|| trimmed.starts_with("<<\r")
{
if let Ok((_, trailer_obj)) = parse_object(trimmed.as_bytes()) {
if let Some(dict) = trailer_obj.as_dict() {
xref.set_trailer(dict.clone());
}
}
}
}
Ok(xref)
}
fn parse_xref_stream<R: Read + Seek>(reader: &mut R, offset: u64) -> Result<CrossRefTable> {
use crate::lexer::token;
reader.seek(SeekFrom::Start(offset))?;
let file_len = reader.seek(SeekFrom::End(0))?;
reader.seek(SeekFrom::Start(offset))?;
let remaining = (file_len - offset) as usize;
let initial_read = remaining.min(256 * 1024);
let mut content = vec![0u8; initial_read];
let bytes_read = reader.read(&mut content)?;
content.truncate(bytes_read);
let needs_more = if let Some(length_val) = find_stream_length(&content) {
let stream_kw_pos = content.windows(6).position(|w| w == b"stream").unwrap_or(0);
let needed = stream_kw_pos + 20 + length_val + 30;
if needed > bytes_read {
Some(needed)
} else {
None
}
} else if content.windows(6).any(|w| w == b"endobj") {
None
} else {
Some(remaining.min(16 * 1024 * 1024))
};
if let Some(needed) = needs_more {
let total = needed.min(remaining);
reader.seek(SeekFrom::Start(offset))?;
content = vec![0u8; total];
let mut total_read = 0;
while total_read < total {
let n = reader.read(&mut content[total_read..])?;
if n == 0 {
break;
}
total_read += n;
}
content.truncate(total_read);
}
let input = &content[..];
let (rest, _obj_num_token) = token(input)
.map_err(|e| Error::InvalidPdf(format!("failed to parse xref object number: {}", e)))?;
let (rest, _gen_token) = token(rest)
.map_err(|e| Error::InvalidPdf(format!("failed to parse xref generation: {}", e)))?;
let (rest, obj_keyword_token) = token(rest)
.map_err(|e| Error::InvalidPdf(format!("failed to parse 'obj' keyword: {}", e)))?;
if !matches!(obj_keyword_token, crate::lexer::Token::ObjStart) {
return Err(Error::InvalidPdf("expected 'obj' keyword in xref stream".to_string()));
}
let parse_result = parse_object(rest)
.map_err(|e| Error::InvalidPdf(format!("failed to parse xref stream object: {}", e)))?;
let (_remaining, obj) = parse_result;
let (stream_dict, stream_data) = match obj {
Object::Stream { dict, data } => (dict, data),
_ => return Err(Error::InvalidPdf("xref stream is not a stream object".to_string())),
};
if let Some(type_obj) = stream_dict.get("Type") {
if let Some(type_name) = type_obj.as_name() {
if type_name != "XRef" {
return Err(Error::InvalidPdf(format!(
"expected /Type /XRef, got /Type /{}",
type_name
)));
}
}
}
let w_array = stream_dict
.get("W")
.and_then(|o| o.as_array())
.ok_or_else(|| Error::InvalidPdf("missing /W array in xref stream".to_string()))?;
if w_array.len() != 3 {
return Err(Error::InvalidPdf("invalid /W array length".to_string()));
}
let w1 = w_array[0]
.as_integer()
.ok_or_else(|| Error::InvalidPdf("invalid /W[0]".to_string()))? as usize;
let w2 = w_array[1]
.as_integer()
.ok_or_else(|| Error::InvalidPdf("invalid /W[1]".to_string()))? as usize;
let w3 = w_array[2]
.as_integer()
.ok_or_else(|| Error::InvalidPdf("invalid /W[2]".to_string()))? as usize;
let entry_size = w1 + w2 + w3;
if entry_size == 0 {
return Err(Error::InvalidPdf("xref stream entry size is 0".to_string()));
}
let size = stream_dict
.get("Size")
.and_then(|o| o.as_integer())
.ok_or_else(|| Error::InvalidPdf("missing /Size in xref stream".to_string()))?
as u32;
let index_ranges = if let Some(index_obj) = stream_dict.get("Index") {
let index_array = index_obj
.as_array()
.ok_or_else(|| Error::InvalidPdf("invalid /Index".to_string()))?;
if index_array.len() % 2 != 0 {
return Err(Error::InvalidPdf("xref stream /Index array has odd length".to_string()));
}
let mut ranges = Vec::new();
for i in (0..index_array.len()).step_by(2) {
let start = index_array[i]
.as_integer()
.ok_or_else(|| Error::InvalidPdf("invalid index start".to_string()))?
as u32;
let count = index_array[i + 1]
.as_integer()
.ok_or_else(|| Error::InvalidPdf("invalid index count".to_string()))?
as u32;
ranges.push((start, count));
}
ranges
} else {
vec![(0, size)]
};
let decode_params = if let Some(decode_params_obj) = stream_dict.get("DecodeParms") {
extract_decode_params(decode_params_obj)?
} else {
None
};
let decoded_data = if let Some(filter_obj) = stream_dict.get("Filter") {
let filter_name = match filter_obj {
Object::Name(name) => name.clone(),
Object::Array(arr) => {
if let Some(Object::Name(name)) = arr.first() {
name.clone()
} else {
return Err(Error::InvalidPdf("invalid filter array".to_string()));
}
},
_ => return Err(Error::InvalidPdf("invalid /Filter in xref stream".to_string())),
};
crate::decoders::decode_stream_with_params(
&stream_data,
&[filter_name],
decode_params.as_ref(),
)?
} else {
stream_data.to_vec()
};
let mut xref = CrossRefTable::new();
let mut data_pos = 0;
for (start_obj, count) in index_ranges {
for i in 0..count {
if data_pos + entry_size > decoded_data.len() {
return Err(Error::InvalidPdf("truncated xref stream data".to_string()));
}
let entry_data = &decoded_data[data_pos..data_pos + entry_size];
data_pos += entry_size;
let entry_type = if w1 > 0 {
read_int(&entry_data[0..w1])
} else {
1 };
let field2 = read_int(&entry_data[w1..w1 + w2]);
let field3 = read_int(&entry_data[w1 + w2..w1 + w2 + w3]);
let entry = match entry_type {
0 => {
XRefEntry::free(field2, field3 as u16)
},
1 => {
XRefEntry::uncompressed(field2, field3 as u16)
},
2 => {
XRefEntry::compressed(field2, field3 as u16)
},
_ => {
return Err(Error::InvalidPdf(format!(
"invalid xref entry type: {}",
entry_type
)));
},
};
xref.add_entry(start_obj + i, entry);
}
}
xref.set_trailer(stream_dict);
Ok(xref)
}
fn extract_decode_params(
decode_params_obj: &Object,
) -> Result<Option<crate::decoders::DecodeParams>> {
let dict = match decode_params_obj {
Object::Dictionary(d) => d,
Object::Array(arr) => {
if let Some(Object::Dictionary(d)) = arr.first() {
d
} else {
return Ok(None);
}
},
_ => return Ok(None),
};
let predictor = dict
.get("Predictor")
.and_then(|o| o.as_integer())
.unwrap_or(1);
let columns = dict
.get("Columns")
.and_then(|o| o.as_integer())
.unwrap_or(1) as usize;
let colors = dict.get("Colors").and_then(|o| o.as_integer()).unwrap_or(1) as usize;
let bits_per_component = dict
.get("BitsPerComponent")
.and_then(|o| o.as_integer())
.unwrap_or(8) as usize;
Ok(Some(crate::decoders::DecodeParams {
predictor,
columns,
colors,
bits_per_component,
}))
}
fn read_int(bytes: &[u8]) -> u64 {
let mut result: u64 = 0;
for &byte in bytes {
result = (result << 8) | (byte as u64);
}
result
}
fn split_lines(text: &str) -> Vec<String> {
let mut lines = Vec::new();
let mut current_line = String::new();
let chars: Vec<char> = text.chars().collect();
let mut i = 0;
while i < chars.len() {
match chars[i] {
'\r' => {
if i + 1 < chars.len() && chars[i + 1] == '\n' {
lines.push(current_line.clone());
current_line.clear();
i += 2;
} else {
lines.push(current_line.clone());
current_line.clear();
i += 1;
}
},
'\n' => {
lines.push(current_line.clone());
current_line.clear();
i += 1;
},
ch => {
current_line.push(ch);
i += 1;
},
}
}
if !current_line.is_empty() {
lines.push(current_line);
}
lines
}
fn read_until_trailer<R: Read + Seek>(reader: &mut R) -> std::io::Result<Vec<String>> {
const CHUNK_SIZE: usize = 256 * 1024;
const MAX_TOTAL: usize = 32 * 1024 * 1024;
let mut data = Vec::with_capacity(CHUNK_SIZE);
let mut total_read = 0usize;
let mut found_end = false;
loop {
let prev_len = data.len();
data.resize(prev_len + CHUNK_SIZE, 0);
let n = reader.read(&mut data[prev_len..])?;
data.truncate(prev_len + n);
total_read += n;
if n == 0 {
break; }
if let Some(trailer_pos) = find_bytes(&data, b"trailer") {
let after_trailer = &data[trailer_pos + 7..];
if let Some(dict_end) = find_bytes(after_trailer, b">>") {
let after_dict = &after_trailer[dict_end + 2..];
if find_bytes(after_dict, b"startxref").is_some() || after_dict.len() > 20 {
found_end = true;
let end_pos = trailer_pos + 7 + dict_end + 2 + 50.min(after_dict.len());
data.truncate(end_pos);
break;
}
}
}
if total_read >= MAX_TOTAL {
break;
}
}
if !found_end {
log::warn!(
"Could not find trailer end marker within {}MB of xref",
total_read / (1024 * 1024)
);
}
let text = String::from_utf8_lossy(&data);
Ok(split_lines(&text))
}
fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
haystack.windows(needle.len()).position(|w| w == needle)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_xref_entry_creation() {
let entry = XRefEntry::new(1234, 0, true);
assert_eq!(entry.offset, 1234);
assert_eq!(entry.generation, 0);
assert!(entry.in_use);
}
#[test]
fn test_xref_entry_free() {
let entry = XRefEntry::new(0, 65535, false);
assert_eq!(entry.offset, 0);
assert_eq!(entry.generation, 65535);
assert!(!entry.in_use);
}
#[test]
fn test_cross_ref_table_new() {
let table = CrossRefTable::new();
assert_eq!(table.len(), 0);
assert!(table.is_empty());
}
#[test]
fn test_cross_ref_table_add_and_get() {
let mut table = CrossRefTable::new();
let entry = XRefEntry::new(1234, 0, true);
table.add_entry(5, entry.clone());
assert_eq!(table.len(), 1);
assert!(!table.is_empty());
let retrieved = table.get(5).unwrap();
assert_eq!(retrieved, &entry);
}
#[test]
fn test_cross_ref_table_get_missing() {
let table = CrossRefTable::new();
assert!(table.get(999).is_none());
}
#[test]
fn test_find_xref_offset_valid() {
let pdf = b"%PDF-1.4\n\
1 0 obj\n\
<< /Type /Catalog >>\n\
endobj\n\
xref\n\
0 2\n\
0000000000 65535 f\n\
0000000009 00000 n\n\
trailer\n\
<< /Size 2 >>\n\
startxref\n\
50\n\
%%EOF";
let mut cursor = Cursor::new(pdf);
let offset = find_xref_offset(&mut cursor).unwrap();
assert_eq!(offset, 50);
}
#[test]
fn test_find_xref_offset_no_startxref() {
let pdf = b"%PDF-1.4\n\
xref\n\
0 1\n\
0000000000 65535 f\n\
trailer\n\
<< /Size 1 >>\n";
let mut cursor = Cursor::new(pdf);
let result = find_xref_offset(&mut cursor);
assert!(result.is_err());
}
#[test]
fn test_find_xref_offset_with_whitespace() {
let pdf = b"%PDF-1.4\n\
startxref\n\
\n\
12345\n\
%%EOF";
let mut cursor = Cursor::new(pdf);
let offset = find_xref_offset(&mut cursor).unwrap();
assert_eq!(offset, 12345);
}
#[test]
fn test_parse_xref_single_subsection() {
let xref_data = b"xref\n\
0 3\n\
0000000000 65535 f\n\
0000000018 00000 n\n\
0000000154 00000 n\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let table = parse_xref(&mut cursor, 0).unwrap();
assert_eq!(table.len(), 3);
let entry0 = table.get(0).unwrap();
assert_eq!(entry0.offset, 0);
assert_eq!(entry0.generation, 65535);
assert!(!entry0.in_use);
let entry1 = table.get(1).unwrap();
assert_eq!(entry1.offset, 18);
assert_eq!(entry1.generation, 0);
assert!(entry1.in_use);
let entry2 = table.get(2).unwrap();
assert_eq!(entry2.offset, 154);
assert_eq!(entry2.generation, 0);
assert!(entry2.in_use);
}
#[test]
fn test_parse_xref_multiple_subsections() {
let xref_data = b"xref\n\
0 2\n\
0000000000 65535 f\n\
0000000018 00000 n\n\
5 3\n\
0000000200 00000 n\n\
0000000300 00000 n\n\
0000000400 00000 n\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let table = parse_xref(&mut cursor, 0).unwrap();
assert_eq!(table.len(), 5);
assert!(table.get(0).is_some());
assert!(table.get(1).is_some());
let entry5 = table.get(5).unwrap();
assert_eq!(entry5.offset, 200);
let entry6 = table.get(6).unwrap();
assert_eq!(entry6.offset, 300);
let entry7 = table.get(7).unwrap();
assert_eq!(entry7.offset, 400);
assert!(table.get(2).is_none());
assert!(table.get(3).is_none());
assert!(table.get(4).is_none());
}
#[test]
fn test_parse_xref_no_xref_keyword() {
let xref_data = b"notxref\n\
0 1\n\
0000000000 65535 f\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let result = parse_xref(&mut cursor, 0);
assert!(result.is_err());
}
#[test]
fn test_parse_xref_malformed_entry() {
let xref_data = b"xref\n\
0 2\n\
0000000000 65535 f\n\
invalid entry here\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let result = parse_xref(&mut cursor, 0);
assert!(result.is_ok());
let table = result.unwrap();
assert_eq!(table.len(), 2);
assert!(table.get(0).is_some());
assert!(!table.get(0).unwrap().in_use);
assert!(table.get(1).is_some());
assert!(!table.get(1).unwrap().in_use);
}
#[test]
fn test_parse_xref_invalid_flag() {
let xref_data = b"xref\n\
0 1\n\
0000000000 65535 x\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let result = parse_xref(&mut cursor, 0);
assert!(result.is_ok());
let table = result.unwrap();
assert_eq!(table.len(), 1);
assert!(table.get(0).is_some());
assert!(!table.get(0).unwrap().in_use);
}
#[test]
fn test_parse_xref_empty_table() {
let xref_data = b"xref\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let table = parse_xref(&mut cursor, 0).unwrap();
assert!(table.is_empty());
}
#[test]
fn test_cross_ref_table_default() {
let table = CrossRefTable::default();
assert!(table.is_empty());
}
#[test]
fn test_parse_xref_with_comments() {
let xref_data = b"xref\n\
% This is a comment\n\
0 2\n\
0000000000 65535 f\n\
0000000018 00000 n\n\
% Another comment\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let table = parse_xref(&mut cursor, 0).unwrap();
assert_eq!(table.len(), 2);
}
#[test]
fn test_parse_xref_excessive_count() {
let xref_data = b"xref\n\
0 2000000\n\
0000000000 65535 f\n\
trailer\n";
let mut cursor = Cursor::new(xref_data);
let result = parse_xref(&mut cursor, 0);
assert!(result.is_err());
}
#[test]
fn test_find_xref_offset_cr_only_line_endings() {
let pdf_data = b"some content\r\
startxref\r\
173\r\
%%EOF\r";
let mut cursor = Cursor::new(pdf_data);
let offset = find_xref_offset(&mut cursor).unwrap();
assert_eq!(offset, 173);
}
#[test]
fn test_parse_xref_cr_only_line_endings() {
let xref_data = b"xref\r\
0 2\r\
0000000000 65535 f\r\
0000000018 00000 n\r\
trailer\r";
let mut cursor = Cursor::new(xref_data);
let table = parse_xref(&mut cursor, 0).unwrap();
assert_eq!(table.len(), 2);
let entry0 = table.get(0).unwrap();
assert!(!entry0.in_use);
let entry1 = table.get(1).unwrap();
assert_eq!(entry1.offset, 18);
assert!(entry1.in_use);
}
#[test]
fn test_split_lines_mixed_endings() {
let text = "line1\rline2\nline3\r\nline4";
let lines = split_lines(text);
assert_eq!(lines, vec!["line1", "line2", "line3", "line4"]);
}
#[test]
fn test_parse_xref_with_prev_chain() {
let xref_data = b"xref\n\
0 2\n\
0000000000 65535 f\n\
0000000500 00000 n\n\
trailer\n\
<< /Size 2 >>\n";
let mut cursor = Cursor::new(xref_data);
let table = parse_xref(&mut cursor, 0).unwrap();
assert_eq!(table.len(), 2);
assert_eq!(table.get(1).unwrap().offset, 500);
}
#[test]
fn test_parse_xref_circular_prev_chain() {
let xref_data = b"xref\n\
0 1\n\
0000000000 65535 f\n\
trailer\n\
<< /Size 1 /Prev 0 >>\n";
let mut cursor = Cursor::new(xref_data);
let table = parse_xref(&mut cursor, 0).unwrap();
assert_eq!(table.len(), 1);
}
#[test]
fn test_xref_entry_uncompressed() {
let entry = XRefEntry::uncompressed(5000, 0);
assert_eq!(entry.entry_type, XRefEntryType::Uncompressed);
assert_eq!(entry.offset, 5000);
assert_eq!(entry.generation, 0);
assert!(entry.in_use);
}
#[test]
fn test_xref_entry_compressed() {
let entry = XRefEntry::compressed(42, 3);
assert_eq!(entry.entry_type, XRefEntryType::Compressed);
assert_eq!(entry.offset, 42); assert_eq!(entry.generation, 3); assert!(entry.in_use);
}
#[test]
fn test_xref_entry_free_constructor() {
let entry = XRefEntry::free(7, 65535);
assert_eq!(entry.entry_type, XRefEntryType::Free);
assert_eq!(entry.offset, 7); assert_eq!(entry.generation, 65535);
assert!(!entry.in_use);
}
#[test]
fn test_xref_entry_type_equality() {
assert_eq!(XRefEntryType::Free, XRefEntryType::Free);
assert_eq!(XRefEntryType::Uncompressed, XRefEntryType::Uncompressed);
assert_eq!(XRefEntryType::Compressed, XRefEntryType::Compressed);
assert_ne!(XRefEntryType::Free, XRefEntryType::Uncompressed);
}
#[test]
fn test_xref_entry_clone_debug() {
let entry = XRefEntry::uncompressed(100, 0);
let cloned = entry.clone();
assert_eq!(entry, cloned);
let debug = format!("{:?}", entry);
assert!(debug.contains("Uncompressed"));
}
#[test]
fn test_cross_ref_table_set_trailer() {
let mut table = CrossRefTable::new();
assert!(table.trailer().is_none());
let mut trailer = HashMap::new();
trailer.insert("Size".to_string(), Object::Integer(10));
table.set_trailer(trailer);
assert!(table.trailer().is_some());
assert!(table.trailer().unwrap().contains_key("Size"));
}
#[test]
fn test_cross_ref_table_contains() {
let mut table = CrossRefTable::new();
assert!(!table.contains(1));
table.add_entry(1, XRefEntry::uncompressed(100, 0));
assert!(table.contains(1));
assert!(!table.contains(2));
}
#[test]
fn test_cross_ref_table_all_object_numbers() {
let mut table = CrossRefTable::new();
table.add_entry(1, XRefEntry::uncompressed(100, 0));
table.add_entry(5, XRefEntry::uncompressed(200, 0));
table.add_entry(10, XRefEntry::uncompressed(300, 0));
let mut nums: Vec<u32> = table.all_object_numbers().collect();
nums.sort();
assert_eq!(nums, vec![1, 5, 10]);
}
#[test]
fn test_cross_ref_table_merge_from() {
let mut table1 = CrossRefTable::new();
table1.add_entry(1, XRefEntry::uncompressed(100, 0));
table1.add_entry(2, XRefEntry::uncompressed(200, 0));
let mut table2 = CrossRefTable::new();
table2.add_entry(2, XRefEntry::uncompressed(999, 0)); table2.add_entry(3, XRefEntry::uncompressed(300, 0));
table1.merge_from(table2);
assert_eq!(table1.len(), 3);
assert_eq!(table1.get(2).unwrap().offset, 200);
assert_eq!(table1.get(3).unwrap().offset, 300);
}
#[test]
fn test_cross_ref_table_merge_trailer() {
let mut table1 = CrossRefTable::new();
let mut table2 = CrossRefTable::new();
let mut trailer = HashMap::new();
trailer.insert("Size".to_string(), Object::Integer(5));
table2.set_trailer(trailer);
table1.merge_from(table2);
assert!(table1.trailer().is_some());
}
#[test]
fn test_cross_ref_table_merge_preserves_existing_trailer() {
let mut table1 = CrossRefTable::new();
let mut trailer1 = HashMap::new();
trailer1.insert("Size".to_string(), Object::Integer(10));
table1.set_trailer(trailer1);
let mut table2 = CrossRefTable::new();
let mut trailer2 = HashMap::new();
trailer2.insert("Size".to_string(), Object::Integer(5));
table2.set_trailer(trailer2);
table1.merge_from(table2);
let size = table1.trailer().unwrap().get("Size").unwrap();
assert_eq!(*size, Object::Integer(10));
}
#[test]
fn test_cross_ref_table_clone() {
let mut table = CrossRefTable::new();
table.add_entry(1, XRefEntry::uncompressed(100, 0));
let cloned = table.clone();
assert_eq!(cloned.len(), 1);
assert_eq!(cloned.get(1).unwrap().offset, 100);
}
#[test]
fn test_find_stream_length_basic() {
let data = b"/Type /XRef /Length 1234 /W [1 2 2]";
let result = find_stream_length(data);
assert_eq!(result, Some(1234));
}
#[test]
fn test_find_stream_length_no_length() {
let data = b"/Type /XRef /W [1 2 2]";
let result = find_stream_length(data);
assert!(result.is_none());
}
#[test]
fn test_find_stream_length_indirect_reference() {
let data = b"/Type /XRef /Length 5 0 R";
let result = find_stream_length(data);
assert_eq!(result, Some(5));
}
#[test]
fn test_find_stream_length_zero() {
let data = b"/Length 0";
let result = find_stream_length(data);
assert_eq!(result, Some(0));
}
#[test]
fn test_find_actual_xref_offset_correct() {
let data = b"xref\n0 1\n0000000000 65535 f\ntrailer\n";
let mut cursor = Cursor::new(data);
let offset = find_actual_xref_offset(&mut cursor, 0).unwrap();
assert_eq!(offset, 0);
}
#[test]
fn test_find_actual_xref_offset_digit_start() {
let data = b"1 0 obj\n<< /Type /XRef >>\nstream\n";
let mut cursor = Cursor::new(data);
let offset = find_actual_xref_offset(&mut cursor, 0).unwrap();
assert_eq!(offset, 0);
}
#[test]
fn test_split_lines_lf() {
let lines = split_lines("a\nb\nc");
assert_eq!(lines, vec!["a", "b", "c"]);
}
#[test]
fn test_split_lines_cr() {
let lines = split_lines("a\rb\rc");
assert_eq!(lines, vec!["a", "b", "c"]);
}
#[test]
fn test_split_lines_crlf() {
let lines = split_lines("a\r\nb\r\nc");
assert_eq!(lines, vec!["a", "b", "c"]);
}
#[test]
fn test_split_lines_empty() {
let lines = split_lines("");
assert!(lines.is_empty());
}
#[test]
fn test_split_lines_single_line() {
let lines = split_lines("hello");
assert_eq!(lines, vec!["hello"]);
}
#[test]
fn test_xref_entry_type_debug() {
let debug = format!("{:?}", XRefEntryType::Compressed);
assert!(debug.contains("Compressed"));
}
}