use std::collections::HashMap;
use rpdfium_core::{Name, PdfSource};
use rpdfium_parser::{Object, ObjectId, ObjectStore};
use crate::error::{DocError, DocResult};
use crate::name_tree::NameTree;
#[derive(Debug, Clone)]
pub struct FileSpec {
pub file_system: Option<String>,
pub filename: Option<String>,
pub unicode_filename: Option<String>,
pub dos_filename: Option<String>,
pub unix_filename: Option<String>,
pub embedded_file: Option<ObjectId>,
pub description: Option<String>,
pub data: Option<Vec<u8>>,
}
pub fn parse_file_spec<S: PdfSource>(obj: &Object, store: &ObjectStore<S>) -> Option<FileSpec> {
let resolved = store.deep_resolve(obj).ok()?;
let dict = resolved.as_dict()?;
let file_system = dict
.get(&Name::fs())
.and_then(|o| store.deep_resolve(o).ok())
.and_then(|o| o.as_name().map(|n| n.as_str().into_owned()));
let filename = extract_string(dict, &Name::f(), store);
let unicode_filename = extract_string(dict, &Name::uf(), store);
let dos_filename = extract_string(dict, &Name::dos(), store);
let unix_filename = extract_string(dict, &Name::unix_name(), store);
let ef_resolved = dict
.get(&Name::ef())
.and_then(|o| store.deep_resolve(o).ok());
let embedded_file = ef_resolved
.as_ref()
.and_then(|o| o.as_dict().cloned())
.and_then(|ef_dict| ef_dict.get(&Name::f()).and_then(|o| o.as_reference()));
let data: Option<Vec<u8>> = embedded_file.and_then(|stream_id| {
let stream_obj = store.resolve(stream_id).ok()?;
store.decode_stream(stream_obj).ok()
});
let description = extract_string(dict, &Name::desc(), store);
Some(FileSpec {
file_system,
filename,
unicode_filename,
dos_filename,
unix_filename,
embedded_file,
description,
data,
})
}
impl FileSpec {
pub fn name(&self) -> Option<&str> {
self.unicode_filename
.as_deref()
.or(self.filename.as_deref())
.or(self.unix_filename.as_deref())
.or(self.dos_filename.as_deref())
}
#[inline]
pub fn attachment_get_name(&self) -> Option<&str> {
self.name()
}
#[deprecated(note = "use `attachment_get_name()` — matches upstream `FPDFAttachment_GetName`")]
#[inline]
pub fn get_name(&self) -> Option<&str> {
self.name()
}
pub fn file_data(&self) -> Option<&[u8]> {
self.data.as_deref()
}
#[inline]
pub fn attachment_get_file(&self) -> Option<&[u8]> {
self.file_data()
}
#[deprecated(note = "use `attachment_get_file()` — matches upstream `FPDFAttachment_GetFile`")]
#[inline]
pub fn get_file(&self) -> Option<&[u8]> {
self.file_data()
}
pub fn subtype(&self) -> Option<&str> {
None
}
#[deprecated(note = "use `subtype()` — there is no public `FPDFAttachment_GetSubtype` API")]
#[inline]
pub fn get_subtype(&self) -> Option<&str> {
self.subtype()
}
pub fn underlying_bytes(&self) -> Option<&[u8]> {
self.data.as_deref()
}
#[deprecated(
note = "use `underlying_bytes()` — there is no public `FPDFAttachment_GetUnderlyingFile` API"
)]
#[inline]
pub fn get_underlying_bytes(&self) -> Option<&[u8]> {
self.underlying_bytes()
}
pub fn set_filename(&mut self, filename: &str) -> DocResult<()> {
self.filename = Some(encode_filename(filename));
self.unicode_filename = Some(filename.to_string());
Ok(())
}
#[deprecated(since = "0.1.0", note = "use name() instead")]
#[inline]
pub fn best_filename(&self) -> Option<&str> {
self.name()
}
}
pub fn encode_filename(path: &str) -> String {
let normalized = path.replace('\\', "/");
if normalized.len() >= 2 && normalized.as_bytes()[1] == b':' {
let drive = &normalized[0..1];
let rest = &normalized[2..];
format!("/{drive}{rest}")
} else {
normalized
}
}
pub fn decode_filename(path: &str) -> String {
if path.len() >= 3
&& path.starts_with('/')
&& path.as_bytes()[1].is_ascii_alphabetic()
&& path.as_bytes()[2] == b'/'
{
let drive = &path[1..2];
let rest = &path[2..];
return format!("{drive}:{rest}");
}
path.to_string()
}
pub fn collect_attachments<S: PdfSource>(
catalog: &Object,
store: &ObjectStore<S>,
) -> DocResult<Vec<FileSpec>> {
let catalog_dict = match catalog.as_dict() {
Some(d) => d,
None => return Ok(Vec::new()),
};
let names_obj = match catalog_dict
.get(&Name::names())
.and_then(|o| store.deep_resolve(o).ok())
{
Some(o) => o,
None => return Ok(Vec::new()),
};
let names_dict = match names_obj.as_dict() {
Some(d) => d,
None => return Ok(Vec::new()),
};
let ef_obj = match names_dict
.get(&Name::embedded_files())
.and_then(|o| store.deep_resolve(o).ok())
{
Some(o) => o,
None => return Ok(Vec::new()),
};
let tree = NameTree::parse(ef_obj, store, |val_obj| {
parse_file_spec(val_obj, store).ok_or(DocError::UnexpectedType)
})?;
Ok(tree.entries().iter().map(|(_, v)| v.clone()).collect())
}
fn extract_string<S: PdfSource>(
dict: &HashMap<Name, Object>,
key: &Name,
store: &ObjectStore<S>,
) -> Option<String> {
dict.get(key)
.and_then(|o| store.deep_resolve(o).ok())
.and_then(|o| o.as_string().map(|s| s.to_string_lossy()))
}
#[cfg(test)]
mod tests {
use super::*;
use rpdfium_core::PdfString;
fn build_store() -> ObjectStore<Vec<u8>> {
let pdf = build_minimal_pdf();
ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
}
fn build_minimal_pdf() -> Vec<u8> {
let mut pdf = Vec::new();
pdf.extend_from_slice(b"%PDF-1.4\n");
let obj1_offset = pdf.len();
pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
let obj2_offset = pdf.len();
pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
let xref_offset = pdf.len();
pdf.extend_from_slice(b"xref\n0 3\n");
pdf.extend_from_slice(b"0000000000 65535 f \r\n");
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
pdf
}
fn str_obj(s: &str) -> Object {
Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
}
#[test]
fn test_parse_file_spec_full() {
let store = build_store();
let mut ef_dict = HashMap::new();
ef_dict.insert(Name::f(), Object::Reference(ObjectId::new(10, 0)));
let mut dict = HashMap::new();
dict.insert(Name::fs(), Object::Name(Name::from("URL")));
dict.insert(Name::f(), str_obj("report.pdf"));
dict.insert(Name::uf(), str_obj("report.pdf"));
dict.insert(Name::dos(), str_obj("REPORT.PDF"));
dict.insert(Name::unix_name(), str_obj("/home/user/report.pdf"));
dict.insert(Name::ef(), Object::Dictionary(ef_dict));
dict.insert(Name::desc(), str_obj("Annual report"));
let obj = Object::Dictionary(dict);
let spec = parse_file_spec(&obj, &store).unwrap();
assert_eq!(spec.file_system.as_deref(), Some("URL"));
assert_eq!(spec.filename.as_deref(), Some("report.pdf"));
assert_eq!(spec.unicode_filename.as_deref(), Some("report.pdf"));
assert_eq!(spec.dos_filename.as_deref(), Some("REPORT.PDF"));
assert_eq!(spec.unix_filename.as_deref(), Some("/home/user/report.pdf"));
assert_eq!(spec.embedded_file, Some(ObjectId::new(10, 0)));
assert_eq!(spec.description.as_deref(), Some("Annual report"));
}
#[test]
fn test_parse_file_spec_minimal() {
let store = build_store();
let mut dict = HashMap::new();
dict.insert(Name::f(), str_obj("data.txt"));
let obj = Object::Dictionary(dict);
let spec = parse_file_spec(&obj, &store).unwrap();
assert!(spec.file_system.is_none());
assert_eq!(spec.filename.as_deref(), Some("data.txt"));
assert!(spec.unicode_filename.is_none());
assert!(spec.embedded_file.is_none());
}
#[test]
fn test_parse_file_spec_not_dict_returns_none() {
let store = build_store();
let obj = Object::Integer(42);
assert!(parse_file_spec(&obj, &store).is_none());
}
#[test]
fn test_set_filename_updates_in_memory() {
let mut spec = FileSpec {
file_system: None,
filename: Some("test.pdf".into()),
unicode_filename: None,
dos_filename: None,
unix_filename: None,
embedded_file: None,
description: None,
data: None,
};
spec.set_filename("new.pdf").unwrap();
assert_eq!(spec.filename.as_deref(), Some("new.pdf"));
assert_eq!(spec.unicode_filename.as_deref(), Some("new.pdf"));
}
#[test]
fn test_best_filename_prefers_unicode() {
let spec = FileSpec {
file_system: None,
filename: Some("fallback.pdf".into()),
unicode_filename: Some("unicode.pdf".into()),
dos_filename: None,
unix_filename: None,
embedded_file: None,
description: None,
data: None,
};
assert_eq!(spec.name(), Some("unicode.pdf"));
}
#[test]
fn test_best_filename_falls_back() {
let spec = FileSpec {
file_system: None,
filename: None,
unicode_filename: None,
dos_filename: Some("DOS.PDF".into()),
unix_filename: None,
embedded_file: None,
description: None,
data: None,
};
assert_eq!(spec.name(), Some("DOS.PDF"));
}
#[test]
fn test_best_filename_none() {
let spec = FileSpec {
file_system: None,
filename: None,
unicode_filename: None,
dos_filename: None,
unix_filename: None,
embedded_file: None,
description: None,
data: None,
};
assert!(spec.name().is_none());
}
#[test]
fn test_encode_filename_unix() {
assert_eq!(encode_filename("/home/user/doc.pdf"), "/home/user/doc.pdf");
}
#[test]
fn test_encode_filename_windows() {
assert_eq!(encode_filename("C:\\Users\\doc.pdf"), "/C/Users/doc.pdf");
}
#[test]
fn test_encode_filename_already_pdf() {
assert_eq!(encode_filename("/path/to/file.pdf"), "/path/to/file.pdf");
}
#[test]
fn test_decode_filename_drive_letter() {
assert_eq!(decode_filename("/C/Users/doc.pdf"), "C:/Users/doc.pdf");
}
#[test]
fn test_decode_filename_unix() {
assert_eq!(decode_filename("/home/user/doc.pdf"), "/home/user/doc.pdf");
}
#[test]
fn test_decode_filename_no_drive() {
assert_eq!(decode_filename("relative/path.pdf"), "relative/path.pdf");
}
#[test]
fn test_underlying_bytes_returns_none_when_no_data() {
let spec = FileSpec {
file_system: None,
filename: Some("report.pdf".into()),
unicode_filename: None,
dos_filename: None,
unix_filename: None,
embedded_file: None,
description: None,
data: None,
};
assert!(spec.underlying_bytes().is_none());
}
#[test]
fn test_cpdf_file_spec_get_file_stream() {
let store = build_store();
let mut dict1 = HashMap::new();
dict1.insert(Name::f(), str_obj("test.pdf"));
let spec1 = parse_file_spec(&Object::Dictionary(dict1), &store).unwrap();
assert!(spec1.embedded_file.is_none());
let mut dict2 = HashMap::new();
dict2.insert(Name::f(), str_obj("test.pdf"));
dict2.insert(Name::ef(), Object::Dictionary(HashMap::new()));
let spec2 = parse_file_spec(&Object::Dictionary(dict2), &store).unwrap();
assert!(spec2.embedded_file.is_none());
let mut ef_dict = HashMap::new();
ef_dict.insert(Name::f(), Object::Reference(ObjectId::new(10, 0)));
let mut dict3 = HashMap::new();
dict3.insert(Name::f(), str_obj("test.pdf"));
dict3.insert(Name::ef(), Object::Dictionary(ef_dict));
let spec3 = parse_file_spec(&Object::Dictionary(dict3), &store).unwrap();
assert_eq!(spec3.embedded_file, Some(ObjectId::new(10, 0)));
}
#[test]
fn test_cpdf_file_spec_get_params_dict() {
let store = build_store();
let spec = parse_file_spec(&Object::Name(Name::from("test.pdf")), &store);
assert!(spec.is_none());
let mut ef_dict = HashMap::new();
ef_dict.insert(Name::f(), Object::Reference(ObjectId::new(999, 0)));
let mut dict = HashMap::new();
dict.insert(Name::uf(), str_obj("test.pdf"));
dict.insert(Name::ef(), Object::Dictionary(ef_dict));
let spec = parse_file_spec(&Object::Dictionary(dict), &store).unwrap();
assert!(spec.file_data().is_none());
assert_eq!(spec.embedded_file, Some(ObjectId::new(999, 0)));
}
#[test]
fn test_underlying_bytes_returns_data_when_present() {
let payload = b"Hello, embedded file!".to_vec();
let spec = FileSpec {
file_system: None,
filename: Some("doc.txt".into()),
unicode_filename: None,
dos_filename: None,
unix_filename: None,
embedded_file: None,
description: None,
data: Some(payload.clone()),
};
assert_eq!(spec.underlying_bytes(), Some(payload.as_slice()));
}
}