Skip to main content

pdfplumber_core/
metadata.rs

1//! Document-level metadata types.
2//!
3//! Provides [`DocumentMetadata`] for PDF document information dictionary fields
4//! such as title, author, creation date, etc.
5
6/// Document-level metadata extracted from the PDF /Info dictionary.
7///
8/// All fields are optional since PDFs may omit the /Info dictionary entirely
9/// or include only a subset of fields.
10///
11/// # PDF Date Format
12///
13/// Date fields (`creation_date`, `mod_date`) are stored as raw PDF date
14/// strings in the format `D:YYYYMMDDHHmmSSOHH'mm'`. Use
15/// [`DocumentMetadata::parse_pdf_date`] to extract components.
16#[derive(Debug, Clone, Default, PartialEq)]
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
18pub struct DocumentMetadata {
19    /// Document title.
20    pub title: Option<String>,
21    /// Document author.
22    pub author: Option<String>,
23    /// Document subject / description.
24    pub subject: Option<String>,
25    /// Keywords associated with the document.
26    pub keywords: Option<String>,
27    /// Application that created the original document.
28    pub creator: Option<String>,
29    /// Application that produced the PDF.
30    pub producer: Option<String>,
31    /// Date the document was created (raw PDF date string).
32    pub creation_date: Option<String>,
33    /// Date the document was last modified (raw PDF date string).
34    pub mod_date: Option<String>,
35}
36
37impl DocumentMetadata {
38    /// Returns `true` if all metadata fields are `None`.
39    pub fn is_empty(&self) -> bool {
40        self.title.is_none()
41            && self.author.is_none()
42            && self.subject.is_none()
43            && self.keywords.is_none()
44            && self.creator.is_none()
45            && self.producer.is_none()
46            && self.creation_date.is_none()
47            && self.mod_date.is_none()
48    }
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54
55    #[test]
56    fn default_metadata_is_empty() {
57        let meta = DocumentMetadata::default();
58        assert!(meta.is_empty());
59        assert_eq!(meta.title, None);
60        assert_eq!(meta.author, None);
61        assert_eq!(meta.subject, None);
62        assert_eq!(meta.keywords, None);
63        assert_eq!(meta.creator, None);
64        assert_eq!(meta.producer, None);
65        assert_eq!(meta.creation_date, None);
66        assert_eq!(meta.mod_date, None);
67    }
68
69    #[test]
70    fn metadata_with_all_fields() {
71        let meta = DocumentMetadata {
72            title: Some("Test Document".to_string()),
73            author: Some("John Doe".to_string()),
74            subject: Some("Testing".to_string()),
75            keywords: Some("test, pdf, rust".to_string()),
76            creator: Some("LibreOffice".to_string()),
77            producer: Some("pdfplumber-rs".to_string()),
78            creation_date: Some("D:20240101120000+00'00'".to_string()),
79            mod_date: Some("D:20240615153000+00'00'".to_string()),
80        };
81        assert!(!meta.is_empty());
82        assert_eq!(meta.title.as_deref(), Some("Test Document"));
83        assert_eq!(meta.author.as_deref(), Some("John Doe"));
84    }
85
86    #[test]
87    fn metadata_with_partial_fields() {
88        let meta = DocumentMetadata {
89            title: Some("Only Title".to_string()),
90            ..Default::default()
91        };
92        assert!(!meta.is_empty());
93        assert_eq!(meta.title.as_deref(), Some("Only Title"));
94        assert_eq!(meta.author, None);
95    }
96
97    #[test]
98    fn metadata_clone_and_eq() {
99        let meta1 = DocumentMetadata {
100            title: Some("Test".to_string()),
101            ..Default::default()
102        };
103        let meta2 = meta1.clone();
104        assert_eq!(meta1, meta2);
105    }
106}