1use std::collections::HashMap;
8
9use rpdfium_core::{Name, PdfSource};
10use rpdfium_parser::{Object, ObjectId, ObjectStore};
11
12use crate::error::{DocError, DocResult};
13use crate::name_tree::NameTree;
14
15#[derive(Debug, Clone)]
17pub struct FileSpec {
18 pub file_system: Option<String>,
20 pub filename: Option<String>,
22 pub unicode_filename: Option<String>,
24 pub dos_filename: Option<String>,
26 pub unix_filename: Option<String>,
28 pub embedded_file: Option<ObjectId>,
30 pub description: Option<String>,
32 pub data: Option<Vec<u8>>,
38}
39
40pub fn parse_file_spec<S: PdfSource>(obj: &Object, store: &ObjectStore<S>) -> Option<FileSpec> {
44 let resolved = store.deep_resolve(obj).ok()?;
45 let dict = resolved.as_dict()?;
46
47 let file_system = dict
48 .get(&Name::fs())
49 .and_then(|o| store.deep_resolve(o).ok())
50 .and_then(|o| o.as_name().map(|n| n.as_str().into_owned()));
51
52 let filename = extract_string(dict, &Name::f(), store);
53
54 let unicode_filename = extract_string(dict, &Name::uf(), store);
55
56 let dos_filename = extract_string(dict, &Name::dos(), store);
57
58 let unix_filename = extract_string(dict, &Name::unix_name(), store);
59
60 let ef_resolved = dict
61 .get(&Name::ef())
62 .and_then(|o| store.deep_resolve(o).ok());
63
64 let embedded_file = ef_resolved
65 .as_ref()
66 .and_then(|o| o.as_dict().cloned())
67 .and_then(|ef_dict| ef_dict.get(&Name::f()).and_then(|o| o.as_reference()));
68
69 let data: Option<Vec<u8>> = embedded_file.and_then(|stream_id| {
71 let stream_obj = store.resolve(stream_id).ok()?;
72 store.decode_stream(stream_obj).ok()
73 });
74
75 let description = extract_string(dict, &Name::desc(), store);
76
77 Some(FileSpec {
78 file_system,
79 filename,
80 unicode_filename,
81 dos_filename,
82 unix_filename,
83 embedded_file,
84 description,
85 data,
86 })
87}
88
89impl FileSpec {
90 pub fn name(&self) -> Option<&str> {
97 self.unicode_filename
98 .as_deref()
99 .or(self.filename.as_deref())
100 .or(self.unix_filename.as_deref())
101 .or(self.dos_filename.as_deref())
102 }
103
104 #[inline]
108 pub fn attachment_get_name(&self) -> Option<&str> {
109 self.name()
110 }
111
112 #[deprecated(note = "use `attachment_get_name()` — matches upstream `FPDFAttachment_GetName`")]
116 #[inline]
117 pub fn get_name(&self) -> Option<&str> {
118 self.name()
119 }
120
121 pub fn file_data(&self) -> Option<&[u8]> {
129 self.data.as_deref()
130 }
131
132 #[inline]
136 pub fn attachment_get_file(&self) -> Option<&[u8]> {
137 self.file_data()
138 }
139
140 #[deprecated(note = "use `attachment_get_file()` — matches upstream `FPDFAttachment_GetFile`")]
144 #[inline]
145 pub fn get_file(&self) -> Option<&[u8]> {
146 self.file_data()
147 }
148
149 pub fn subtype(&self) -> Option<&str> {
161 None
162 }
163
164 #[deprecated(note = "use `subtype()` — there is no public `FPDFAttachment_GetSubtype` API")]
166 #[inline]
167 pub fn get_subtype(&self) -> Option<&str> {
168 self.subtype()
169 }
170
171 pub fn underlying_bytes(&self) -> Option<&[u8]> {
179 self.data.as_deref()
180 }
181
182 #[deprecated(
184 note = "use `underlying_bytes()` — there is no public `FPDFAttachment_GetUnderlyingFile` API"
185 )]
186 #[inline]
187 pub fn get_underlying_bytes(&self) -> Option<&[u8]> {
188 self.underlying_bytes()
189 }
190
191 pub fn set_filename(&mut self, filename: &str) -> DocResult<()> {
197 self.filename = Some(encode_filename(filename));
198 self.unicode_filename = Some(filename.to_string());
199 Ok(())
200 }
201
202 #[deprecated(since = "0.1.0", note = "use name() instead")]
207 #[inline]
208 pub fn best_filename(&self) -> Option<&str> {
209 self.name()
210 }
211}
212
213pub fn encode_filename(path: &str) -> String {
218 let normalized = path.replace('\\', "/");
219 if normalized.len() >= 2 && normalized.as_bytes()[1] == b':' {
221 let drive = &normalized[0..1];
222 let rest = &normalized[2..];
223 format!("/{drive}{rest}")
224 } else {
225 normalized
226 }
227}
228
229pub fn decode_filename(path: &str) -> String {
234 if path.len() >= 3
236 && path.starts_with('/')
237 && path.as_bytes()[1].is_ascii_alphabetic()
238 && path.as_bytes()[2] == b'/'
239 {
240 let drive = &path[1..2];
241 let rest = &path[2..];
242 return format!("{drive}:{rest}");
243 }
244 path.to_string()
245}
246
247pub fn collect_attachments<S: PdfSource>(
256 catalog: &Object,
257 store: &ObjectStore<S>,
258) -> DocResult<Vec<FileSpec>> {
259 let catalog_dict = match catalog.as_dict() {
261 Some(d) => d,
262 None => return Ok(Vec::new()),
263 };
264
265 let names_obj = match catalog_dict
266 .get(&Name::names())
267 .and_then(|o| store.deep_resolve(o).ok())
268 {
269 Some(o) => o,
270 None => return Ok(Vec::new()),
271 };
272
273 let names_dict = match names_obj.as_dict() {
274 Some(d) => d,
275 None => return Ok(Vec::new()),
276 };
277
278 let ef_obj = match names_dict
279 .get(&Name::embedded_files())
280 .and_then(|o| store.deep_resolve(o).ok())
281 {
282 Some(o) => o,
283 None => return Ok(Vec::new()),
284 };
285
286 let tree = NameTree::parse(ef_obj, store, |val_obj| {
288 parse_file_spec(val_obj, store).ok_or(DocError::UnexpectedType)
289 })?;
290
291 Ok(tree.entries().iter().map(|(_, v)| v.clone()).collect())
292}
293
294fn extract_string<S: PdfSource>(
296 dict: &HashMap<Name, Object>,
297 key: &Name,
298 store: &ObjectStore<S>,
299) -> Option<String> {
300 dict.get(key)
301 .and_then(|o| store.deep_resolve(o).ok())
302 .and_then(|o| o.as_string().map(|s| s.to_string_lossy()))
303}
304
305#[cfg(test)]
306mod tests {
307 use super::*;
308 use rpdfium_core::PdfString;
309
310 fn build_store() -> ObjectStore<Vec<u8>> {
311 let pdf = build_minimal_pdf();
312 ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
313 }
314
315 fn build_minimal_pdf() -> Vec<u8> {
316 let mut pdf = Vec::new();
317 pdf.extend_from_slice(b"%PDF-1.4\n");
318 let obj1_offset = pdf.len();
319 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
320 let obj2_offset = pdf.len();
321 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
322 let xref_offset = pdf.len();
323 pdf.extend_from_slice(b"xref\n0 3\n");
324 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
325 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
326 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
327 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
328 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
329 pdf
330 }
331
332 fn str_obj(s: &str) -> Object {
333 Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
334 }
335
336 #[test]
337 fn test_parse_file_spec_full() {
338 let store = build_store();
339
340 let mut ef_dict = HashMap::new();
341 ef_dict.insert(Name::f(), Object::Reference(ObjectId::new(10, 0)));
342
343 let mut dict = HashMap::new();
344 dict.insert(Name::fs(), Object::Name(Name::from("URL")));
345 dict.insert(Name::f(), str_obj("report.pdf"));
346 dict.insert(Name::uf(), str_obj("report.pdf"));
347 dict.insert(Name::dos(), str_obj("REPORT.PDF"));
348 dict.insert(Name::unix_name(), str_obj("/home/user/report.pdf"));
349 dict.insert(Name::ef(), Object::Dictionary(ef_dict));
350 dict.insert(Name::desc(), str_obj("Annual report"));
351
352 let obj = Object::Dictionary(dict);
353 let spec = parse_file_spec(&obj, &store).unwrap();
354
355 assert_eq!(spec.file_system.as_deref(), Some("URL"));
356 assert_eq!(spec.filename.as_deref(), Some("report.pdf"));
357 assert_eq!(spec.unicode_filename.as_deref(), Some("report.pdf"));
358 assert_eq!(spec.dos_filename.as_deref(), Some("REPORT.PDF"));
359 assert_eq!(spec.unix_filename.as_deref(), Some("/home/user/report.pdf"));
360 assert_eq!(spec.embedded_file, Some(ObjectId::new(10, 0)));
361 assert_eq!(spec.description.as_deref(), Some("Annual report"));
362 }
363
364 #[test]
365 fn test_parse_file_spec_minimal() {
366 let store = build_store();
367
368 let mut dict = HashMap::new();
369 dict.insert(Name::f(), str_obj("data.txt"));
370
371 let obj = Object::Dictionary(dict);
372 let spec = parse_file_spec(&obj, &store).unwrap();
373
374 assert!(spec.file_system.is_none());
375 assert_eq!(spec.filename.as_deref(), Some("data.txt"));
376 assert!(spec.unicode_filename.is_none());
377 assert!(spec.embedded_file.is_none());
378 }
379
380 #[test]
381 fn test_parse_file_spec_not_dict_returns_none() {
382 let store = build_store();
383 let obj = Object::Integer(42);
384 assert!(parse_file_spec(&obj, &store).is_none());
385 }
386
387 #[test]
388 fn test_set_filename_updates_in_memory() {
389 let mut spec = FileSpec {
390 file_system: None,
391 filename: Some("test.pdf".into()),
392 unicode_filename: None,
393 dos_filename: None,
394 unix_filename: None,
395 embedded_file: None,
396 description: None,
397 data: None,
398 };
399 spec.set_filename("new.pdf").unwrap();
400 assert_eq!(spec.filename.as_deref(), Some("new.pdf"));
401 assert_eq!(spec.unicode_filename.as_deref(), Some("new.pdf"));
402 }
403
404 #[test]
405 fn test_best_filename_prefers_unicode() {
406 let spec = FileSpec {
407 file_system: None,
408 filename: Some("fallback.pdf".into()),
409 unicode_filename: Some("unicode.pdf".into()),
410 dos_filename: None,
411 unix_filename: None,
412 embedded_file: None,
413 description: None,
414 data: None,
415 };
416 assert_eq!(spec.name(), Some("unicode.pdf"));
417 }
418
419 #[test]
420 fn test_best_filename_falls_back() {
421 let spec = FileSpec {
422 file_system: None,
423 filename: None,
424 unicode_filename: None,
425 dos_filename: Some("DOS.PDF".into()),
426 unix_filename: None,
427 embedded_file: None,
428 description: None,
429 data: None,
430 };
431 assert_eq!(spec.name(), Some("DOS.PDF"));
432 }
433
434 #[test]
435 fn test_best_filename_none() {
436 let spec = FileSpec {
437 file_system: None,
438 filename: None,
439 unicode_filename: None,
440 dos_filename: None,
441 unix_filename: None,
442 embedded_file: None,
443 description: None,
444 data: None,
445 };
446 assert!(spec.name().is_none());
447 }
448
449 #[test]
450 fn test_encode_filename_unix() {
451 assert_eq!(encode_filename("/home/user/doc.pdf"), "/home/user/doc.pdf");
452 }
453
454 #[test]
455 fn test_encode_filename_windows() {
456 assert_eq!(encode_filename("C:\\Users\\doc.pdf"), "/C/Users/doc.pdf");
457 }
458
459 #[test]
460 fn test_encode_filename_already_pdf() {
461 assert_eq!(encode_filename("/path/to/file.pdf"), "/path/to/file.pdf");
462 }
463
464 #[test]
465 fn test_decode_filename_drive_letter() {
466 assert_eq!(decode_filename("/C/Users/doc.pdf"), "C:/Users/doc.pdf");
467 }
468
469 #[test]
470 fn test_decode_filename_unix() {
471 assert_eq!(decode_filename("/home/user/doc.pdf"), "/home/user/doc.pdf");
472 }
473
474 #[test]
475 fn test_decode_filename_no_drive() {
476 assert_eq!(decode_filename("relative/path.pdf"), "relative/path.pdf");
477 }
478
479 #[test]
484 fn test_underlying_bytes_returns_none_when_no_data() {
485 let spec = FileSpec {
486 file_system: None,
487 filename: Some("report.pdf".into()),
488 unicode_filename: None,
489 dos_filename: None,
490 unix_filename: None,
491 embedded_file: None,
492 description: None,
493 data: None,
494 };
495 assert!(spec.underlying_bytes().is_none());
496 }
497
498 #[test]
507 fn test_cpdf_file_spec_get_file_stream() {
508 let store = build_store();
509
510 let mut dict1 = HashMap::new();
512 dict1.insert(Name::f(), str_obj("test.pdf"));
513 let spec1 = parse_file_spec(&Object::Dictionary(dict1), &store).unwrap();
514 assert!(spec1.embedded_file.is_none());
515
516 let mut dict2 = HashMap::new();
518 dict2.insert(Name::f(), str_obj("test.pdf"));
519 dict2.insert(Name::ef(), Object::Dictionary(HashMap::new()));
520 let spec2 = parse_file_spec(&Object::Dictionary(dict2), &store).unwrap();
521 assert!(spec2.embedded_file.is_none());
522
523 let mut ef_dict = HashMap::new();
525 ef_dict.insert(Name::f(), Object::Reference(ObjectId::new(10, 0)));
526 let mut dict3 = HashMap::new();
527 dict3.insert(Name::f(), str_obj("test.pdf"));
528 dict3.insert(Name::ef(), Object::Dictionary(ef_dict));
529 let spec3 = parse_file_spec(&Object::Dictionary(dict3), &store).unwrap();
530 assert_eq!(spec3.embedded_file, Some(ObjectId::new(10, 0)));
531 }
532
533 #[test]
540 fn test_cpdf_file_spec_get_params_dict() {
541 let store = build_store();
542
543 let spec = parse_file_spec(&Object::Name(Name::from("test.pdf")), &store);
545 assert!(spec.is_none());
546
547 let mut ef_dict = HashMap::new();
549 ef_dict.insert(Name::f(), Object::Reference(ObjectId::new(999, 0)));
550 let mut dict = HashMap::new();
551 dict.insert(Name::uf(), str_obj("test.pdf"));
552 dict.insert(Name::ef(), Object::Dictionary(ef_dict));
553 let spec = parse_file_spec(&Object::Dictionary(dict), &store).unwrap();
554 assert!(spec.file_data().is_none());
556 assert_eq!(spec.embedded_file, Some(ObjectId::new(999, 0)));
557 }
558
559 #[test]
560 fn test_underlying_bytes_returns_data_when_present() {
561 let payload = b"Hello, embedded file!".to_vec();
562 let spec = FileSpec {
563 file_system: None,
564 filename: Some("doc.txt".into()),
565 unicode_filename: None,
566 dos_filename: None,
567 unix_filename: None,
568 embedded_file: None,
569 description: None,
570 data: Some(payload.clone()),
571 };
572 assert_eq!(spec.underlying_bytes(), Some(payload.as_slice()));
573 }
574}