1#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13pub enum SharedFormType {
14 #[default]
16 None,
17 EmailBased,
19 AcrobatCom,
21 FilesystemBased,
23}
24
25pub fn check_for_shared_form(xml: &[u8]) -> SharedFormType {
30 let text = String::from_utf8_lossy(xml);
31
32 if !text.contains("adhocwf") && !text.contains("AcrobatAdhocWorkflow") {
34 return SharedFormType::None;
35 }
36
37 let workflow_value = extract_simple_tag(&text, "adhocwf:workflowType")
39 .or_else(|| extract_simple_tag(&text, "AcrobatAdhocWorkflow:workflowType"));
40
41 match workflow_value.as_deref() {
42 Some("0") => SharedFormType::EmailBased,
43 Some("1") => SharedFormType::AcrobatCom,
44 Some("2") => SharedFormType::FilesystemBased,
45 Some(_) => SharedFormType::EmailBased,
47 None => SharedFormType::EmailBased,
48 }
49}
50
51#[derive(Debug, Clone, Default)]
53pub struct XmpMetadata {
54 pub title: Option<String>,
56 pub creator: Option<String>,
58 pub description: Option<String>,
60 pub create_date: Option<String>,
62 pub modify_date: Option<String>,
64 pub producer: Option<String>,
66 pub keywords: Option<String>,
68}
69
70impl XmpMetadata {
71 pub fn from_xml(xml: &[u8]) -> Self {
76 let text = String::from_utf8_lossy(xml);
77
78 Self {
79 title: extract_rdf_li_value(&text, "dc:title"),
80 creator: extract_rdf_li_value(&text, "dc:creator"),
81 description: extract_rdf_li_value(&text, "dc:description"),
82 create_date: extract_simple_tag(&text, "xmp:CreateDate"),
83 modify_date: extract_simple_tag(&text, "xmp:ModifyDate"),
84 producer: extract_simple_tag(&text, "pdf:Producer"),
85 keywords: extract_simple_tag(&text, "pdf:Keywords"),
86 }
87 }
88}
89
90fn extract_rdf_li_value(text: &str, tag: &str) -> Option<String> {
101 let open = format!("<{tag}");
102 let close = format!("</{tag}>");
103
104 let start = text.find(&open)?;
105 let end = text[start..].find(&close)?;
106 let section = &text[start..start + end];
107
108 extract_inner_rdf_li(section)
110}
111
112fn extract_inner_rdf_li(text: &str) -> Option<String> {
114 let li_start = text.find("<rdf:li")?;
115 let rest = &text[li_start..];
116 let content_start = rest.find('>')? + 1;
118 let content_rest = &rest[content_start..];
119 let content_end = content_rest.find("</rdf:li>")?;
120 let value = content_rest[..content_end].trim();
121 if value.is_empty() {
122 None
123 } else {
124 Some(value.to_string())
125 }
126}
127
128fn extract_simple_tag(text: &str, tag: &str) -> Option<String> {
130 let open = format!("<{tag}>");
131 let close = format!("</{tag}>");
132
133 let start = text.find(&open)?;
134 let content_start = start + open.len();
135 let content_rest = &text[content_start..];
136 let content_end = content_rest.find(&close)?;
137 let value = content_rest[..content_end].trim();
138 if value.is_empty() {
139 None
140 } else {
141 Some(value.to_string())
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn test_parse_basic_xmp() {
151 let xml = br#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
152<x:xmpmeta xmlns:x="adobe:ns:meta/">
153 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
154 xmlns:dc="http://purl.org/dc/elements/1.1/"
155 xmlns:xmp="http://ns.adobe.com/xap/1.0/"
156 xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
157 <rdf:Description rdf:about="">
158 <dc:title>
159 <rdf:Alt>
160 <rdf:li xml:lang="x-default">Test Document</rdf:li>
161 </rdf:Alt>
162 </dc:title>
163 <dc:creator>
164 <rdf:Seq>
165 <rdf:li>John Doe</rdf:li>
166 </rdf:Seq>
167 </dc:creator>
168 <dc:description>
169 <rdf:Alt>
170 <rdf:li xml:lang="x-default">A test PDF document</rdf:li>
171 </rdf:Alt>
172 </dc:description>
173 <xmp:CreateDate>2024-01-15T10:30:00Z</xmp:CreateDate>
174 <xmp:ModifyDate>2024-06-01T14:00:00Z</xmp:ModifyDate>
175 <pdf:Producer>rpdfium 0.1</pdf:Producer>
176 <pdf:Keywords>test, pdf, document</pdf:Keywords>
177 </rdf:Description>
178 </rdf:RDF>
179</x:xmpmeta>
180<?xpacket end="w"?>"#;
181
182 let meta = XmpMetadata::from_xml(xml);
183 assert_eq!(meta.title.as_deref(), Some("Test Document"));
184 assert_eq!(meta.creator.as_deref(), Some("John Doe"));
185 assert_eq!(meta.description.as_deref(), Some("A test PDF document"));
186 assert_eq!(meta.create_date.as_deref(), Some("2024-01-15T10:30:00Z"));
187 assert_eq!(meta.modify_date.as_deref(), Some("2024-06-01T14:00:00Z"));
188 assert_eq!(meta.producer.as_deref(), Some("rpdfium 0.1"));
189 assert_eq!(meta.keywords.as_deref(), Some("test, pdf, document"));
190 }
191
192 #[test]
193 fn test_parse_empty_xmp() {
194 let xml = b"<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?><?xpacket end=\"w\"?>";
195 let meta = XmpMetadata::from_xml(xml);
196 assert!(meta.title.is_none());
197 assert!(meta.creator.is_none());
198 assert!(meta.create_date.is_none());
199 }
200
201 #[test]
202 fn test_parse_partial_xmp() {
203 let xml = br#"<x:xmpmeta>
204<rdf:RDF>
205<rdf:Description>
206 <pdf:Producer>Test Producer</pdf:Producer>
207</rdf:Description>
208</rdf:RDF>
209</x:xmpmeta>"#;
210
211 let meta = XmpMetadata::from_xml(xml);
212 assert!(meta.title.is_none());
213 assert_eq!(meta.producer.as_deref(), Some("Test Producer"));
214 }
215
216 #[test]
217 fn test_parse_xmp_with_whitespace() {
218 let xml = br#"<x:xmpmeta>
219<rdf:RDF>
220<rdf:Description>
221 <xmp:CreateDate>
222 2024-03-15
223 </xmp:CreateDate>
224</rdf:Description>
225</rdf:RDF>
226</x:xmpmeta>"#;
227
228 let meta = XmpMetadata::from_xml(xml);
229 assert_eq!(meta.create_date.as_deref(), Some("2024-03-15"));
230 }
231
232 #[test]
235 fn test_shared_form_none() {
236 let xml = b"<x:xmpmeta><rdf:RDF><rdf:Description></rdf:Description></rdf:RDF></x:xmpmeta>";
237 assert_eq!(check_for_shared_form(xml), SharedFormType::None);
238 }
239
240 #[test]
241 fn test_shared_form_email() {
242 let xml = br#"<x:xmpmeta>
243<rdf:RDF>
244<rdf:Description xmlns:adhocwf="http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/">
245 <adhocwf:workflowType>0</adhocwf:workflowType>
246</rdf:Description>
247</rdf:RDF>
248</x:xmpmeta>"#;
249 assert_eq!(check_for_shared_form(xml), SharedFormType::EmailBased);
250 }
251
252 #[test]
253 fn test_shared_form_acrobat_com() {
254 let xml = br#"<x:xmpmeta>
255<rdf:RDF>
256<rdf:Description xmlns:adhocwf="http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/">
257 <adhocwf:workflowType>1</adhocwf:workflowType>
258</rdf:Description>
259</rdf:RDF>
260</x:xmpmeta>"#;
261 assert_eq!(check_for_shared_form(xml), SharedFormType::AcrobatCom);
262 }
263
264 #[test]
265 fn test_shared_form_filesystem() {
266 let xml =
267 br#"<rdf:Description xmlns:adhocwf="http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/">
268 <adhocwf:workflowType>2</adhocwf:workflowType>
269</rdf:Description>"#;
270 assert_eq!(check_for_shared_form(xml), SharedFormType::FilesystemBased);
271 }
272
273 #[test]
274 fn test_shared_form_namespace_no_type() {
275 let xml =
277 br#"<rdf:Description xmlns:adhocwf="http://ns.adobe.com/AcrobatAdhocWorkflow/1.0/">
278</rdf:Description>"#;
279 assert_eq!(check_for_shared_form(xml), SharedFormType::EmailBased);
280 }
281
282 #[test]
283 fn test_shared_form_alternate_namespace() {
284 let xml = br#"<rdf:Description xmlns:AcrobatAdhocWorkflow="http://example.com/">
285 <AcrobatAdhocWorkflow:workflowType>1</AcrobatAdhocWorkflow:workflowType>
286</rdf:Description>"#;
287 assert_eq!(check_for_shared_form(xml), SharedFormType::AcrobatCom);
288 }
289
290 #[test]
291 fn test_parse_invalid_utf8() {
292 let xml = b"\xff\xfe<xmp:CreateDate>2024-01-01</xmp:CreateDate>";
293 let meta = XmpMetadata::from_xml(xml);
294 assert_eq!(meta.create_date.as_deref(), Some("2024-01-01"));
296 }
297}