1use std::collections::HashMap;
4
5use rpdfium_core::{Name, PdfSource};
6use rpdfium_parser::{Object, ObjectStore};
7
8use crate::error::{DocError, DocResult};
9
10#[derive(Debug, Clone, Default)]
12pub struct DocumentMetadata {
13 pub title: Option<String>,
15 pub author: Option<String>,
17 pub subject: Option<String>,
19 pub keywords: Option<String>,
21 pub creator: Option<String>,
23 pub producer: Option<String>,
25 pub creation_date: Option<String>,
27 pub mod_date: Option<String>,
29}
30
31pub fn parse_metadata<S: PdfSource>(
33 info_obj: &Object,
34 store: &ObjectStore<S>,
35) -> DocResult<DocumentMetadata> {
36 let resolved = store
37 .deep_resolve(info_obj)
38 .map_err(|e| DocError::Parser(e.to_string()))?;
39 let dict = resolved.as_dict().ok_or(DocError::UnexpectedType)?;
40
41 Ok(DocumentMetadata {
42 title: extract_string_field(dict, &Name::title(), store),
43 author: extract_string_field(dict, &Name::author(), store),
44 subject: extract_string_field(dict, &Name::subject(), store),
45 keywords: extract_string_field(dict, &Name::keywords(), store),
46 creator: extract_string_field(dict, &Name::creator(), store),
47 producer: extract_string_field(dict, &Name::producer(), store),
48 creation_date: extract_string_field(dict, &Name::creation_date(), store),
49 mod_date: extract_string_field(dict, &Name::mod_date(), store),
50 })
51}
52
53fn extract_string_field<S: PdfSource>(
56 dict: &HashMap<Name, Object>,
57 key: &Name,
58 store: &ObjectStore<S>,
59) -> Option<String> {
60 let obj = dict.get(key)?;
61 let resolved = store.deep_resolve(obj).ok()?;
62 resolved.as_string().map(|s| s.to_string_lossy())
63}
64
65#[cfg(test)]
66mod tests {
67 use super::*;
68 use rpdfium_core::PdfString;
69
70 fn build_store() -> ObjectStore<Vec<u8>> {
71 let pdf = build_minimal_pdf();
72 ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
73 }
74
75 fn build_minimal_pdf() -> Vec<u8> {
76 let mut pdf = Vec::new();
77 pdf.extend_from_slice(b"%PDF-1.4\n");
78 let obj1_offset = pdf.len();
79 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
80 let obj2_offset = pdf.len();
81 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
82 let xref_offset = pdf.len();
83 pdf.extend_from_slice(b"xref\n0 3\n");
84 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
85 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
86 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
87 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
88 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
89 pdf
90 }
91
92 fn str_obj(s: &str) -> Object {
93 Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
94 }
95
96 #[test]
97 fn test_full_metadata() {
98 let store = build_store();
99 let mut dict = HashMap::new();
100 dict.insert(Name::title(), str_obj("My Document"));
101 dict.insert(Name::author(), str_obj("John Doe"));
102 dict.insert(Name::subject(), str_obj("Testing"));
103 dict.insert(Name::keywords(), str_obj("pdf rust"));
104 dict.insert(Name::creator(), str_obj("TestApp"));
105 dict.insert(Name::producer(), str_obj("rpdfium"));
106 dict.insert(Name::creation_date(), str_obj("D:20240101120000"));
107 dict.insert(Name::mod_date(), str_obj("D:20240615090000"));
108 let obj = Object::Dictionary(dict);
109 let meta = parse_metadata(&obj, &store).unwrap();
110 assert_eq!(meta.title.as_deref(), Some("My Document"));
111 assert_eq!(meta.author.as_deref(), Some("John Doe"));
112 assert_eq!(meta.subject.as_deref(), Some("Testing"));
113 assert_eq!(meta.keywords.as_deref(), Some("pdf rust"));
114 assert_eq!(meta.creator.as_deref(), Some("TestApp"));
115 assert_eq!(meta.producer.as_deref(), Some("rpdfium"));
116 assert_eq!(meta.creation_date.as_deref(), Some("D:20240101120000"));
117 assert_eq!(meta.mod_date.as_deref(), Some("D:20240615090000"));
118 }
119
120 #[test]
121 fn test_partial_metadata() {
122 let store = build_store();
123 let mut dict = HashMap::new();
124 dict.insert(Name::title(), str_obj("Partial"));
125 dict.insert(Name::producer(), str_obj("rpdfium"));
126 let obj = Object::Dictionary(dict);
127 let meta = parse_metadata(&obj, &store).unwrap();
128 assert_eq!(meta.title.as_deref(), Some("Partial"));
129 assert!(meta.author.is_none());
130 assert!(meta.subject.is_none());
131 assert!(meta.keywords.is_none());
132 assert!(meta.creator.is_none());
133 assert_eq!(meta.producer.as_deref(), Some("rpdfium"));
134 assert!(meta.creation_date.is_none());
135 assert!(meta.mod_date.is_none());
136 }
137
138 #[test]
139 fn test_empty_info_dict() {
140 let store = build_store();
141 let obj = Object::Dictionary(HashMap::new());
142 let meta = parse_metadata(&obj, &store).unwrap();
143 assert!(meta.title.is_none());
144 assert!(meta.author.is_none());
145 assert!(meta.subject.is_none());
146 assert!(meta.keywords.is_none());
147 assert!(meta.creator.is_none());
148 assert!(meta.producer.is_none());
149 assert!(meta.creation_date.is_none());
150 assert!(meta.mod_date.is_none());
151 }
152
153 #[test]
154 fn test_non_string_values_ignored() {
155 let store = build_store();
156 let mut dict = HashMap::new();
157 dict.insert(Name::title(), Object::Integer(42));
159 dict.insert(Name::author(), Object::Boolean(true));
160 dict.insert(Name::subject(), str_obj("Valid Subject"));
161 let obj = Object::Dictionary(dict);
162 let meta = parse_metadata(&obj, &store).unwrap();
163 assert!(meta.title.is_none());
164 assert!(meta.author.is_none());
165 assert_eq!(meta.subject.as_deref(), Some("Valid Subject"));
166 }
167
168 #[test]
169 fn test_metadata_default() {
170 let meta = DocumentMetadata::default();
171 assert!(meta.title.is_none());
172 assert!(meta.author.is_none());
173 }
174
175 #[test]
184 #[ignore = "CheckForSharedForm not yet implemented"]
185 fn test_cpdf_metadata_check_shared_form_email_at_top_level() {
186 todo!()
189 }
190
191 #[test]
193 #[ignore = "CheckForSharedForm not yet implemented"]
194 fn test_cpdf_metadata_check_shared_form_acrobat_at_top_level() {
195 todo!()
198 }
199
200 #[test]
202 #[ignore = "CheckForSharedForm not yet implemented"]
203 fn test_cpdf_metadata_check_shared_form_filesystem_at_top_level() {
204 todo!()
207 }
208
209 #[test]
211 #[ignore = "CheckForSharedForm not yet implemented"]
212 fn test_cpdf_metadata_check_shared_form_without_workflow() {
213 todo!()
216 }
217
218 #[test]
220 #[ignore = "CheckForSharedForm not yet implemented"]
221 fn test_cpdf_metadata_check_shared_form_as_child() {
222 todo!()
225 }
226
227 #[test]
229 #[ignore = "CheckForSharedForm not yet implemented"]
230 fn test_cpdf_metadata_check_shared_form_as_no_adhoc() {
231 todo!()
234 }
235
236 #[test]
238 #[ignore = "CheckForSharedForm not yet implemented"]
239 fn test_cpdf_metadata_check_shared_form_exceed_max_depth() {
240 todo!()
243 }
244
245 #[test]
247 #[ignore = "CheckForSharedForm not yet implemented"]
248 fn test_cpdf_metadata_check_shared_form_wrong_namespace() {
249 todo!()
252 }
253
254 #[test]
256 #[ignore = "CheckForSharedForm not yet implemented"]
257 fn test_cpdf_metadata_check_shared_form_multiple_errors() {
258 todo!()
261 }
262}