Skip to main content

edgeparse_core/models/
document.rs

1//! PdfDocument — top-level extracted document.
2
3use serde::{Deserialize, Serialize};
4
5use super::content::ContentElement;
6
7/// The top-level extracted PDF document.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct PdfDocument {
10    /// Original file name
11    pub file_name: String,
12    /// Number of pages
13    pub number_of_pages: u32,
14    /// Document author
15    pub author: Option<String>,
16    /// Document title
17    pub title: Option<String>,
18    /// Creation date
19    pub creation_date: Option<String>,
20    /// Modification date
21    pub modification_date: Option<String>,
22    /// PDF producer application
23    pub producer: Option<String>,
24    /// Creator application
25    pub creator: Option<String>,
26    /// Document subject
27    pub subject: Option<String>,
28    /// Comma-separated keywords
29    pub keywords: Option<String>,
30    /// Top-level content elements (reading order)
31    pub kids: Vec<ContentElement>,
32}
33
34impl PdfDocument {
35    /// Create a new empty PdfDocument.
36    pub fn new(file_name: String) -> Self {
37        Self {
38            file_name,
39            number_of_pages: 0,
40            author: None,
41            title: None,
42            creation_date: None,
43            modification_date: None,
44            producer: None,
45            creator: None,
46            subject: None,
47            keywords: None,
48            kids: Vec::new(),
49        }
50    }
51
52    /// Return a list of (key, value) pairs for non-empty metadata fields.
53    pub fn metadata_pairs(&self) -> Vec<(&str, &str)> {
54        let mut pairs = Vec::new();
55        pairs.push(("File", self.file_name.as_str()));
56        if let Some(ref v) = self.title {
57            pairs.push(("Title", v.as_str()));
58        }
59        if let Some(ref v) = self.author {
60            pairs.push(("Author", v.as_str()));
61        }
62        if let Some(ref v) = self.subject {
63            pairs.push(("Subject", v.as_str()));
64        }
65        if let Some(ref v) = self.keywords {
66            pairs.push(("Keywords", v.as_str()));
67        }
68        if let Some(ref v) = self.creator {
69            pairs.push(("Creator", v.as_str()));
70        }
71        if let Some(ref v) = self.producer {
72            pairs.push(("Producer", v.as_str()));
73        }
74        if let Some(ref v) = self.creation_date {
75            pairs.push(("Created", v.as_str()));
76        }
77        if let Some(ref v) = self.modification_date {
78            pairs.push(("Modified", v.as_str()));
79        }
80        pairs
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn test_new_document() {
90        let doc = PdfDocument::new("test.pdf".to_string());
91        assert_eq!(doc.file_name, "test.pdf");
92        assert_eq!(doc.number_of_pages, 0);
93        assert!(doc.kids.is_empty());
94    }
95
96    #[test]
97    fn test_metadata_pairs() {
98        let mut doc = PdfDocument::new("report.pdf".to_string());
99        doc.title = Some("Annual Report".to_string());
100        doc.author = Some("Alice".to_string());
101        doc.keywords = Some("finance, report".to_string());
102
103        let pairs = doc.metadata_pairs();
104        assert_eq!(pairs[0], ("File", "report.pdf"));
105        assert_eq!(pairs[1], ("Title", "Annual Report"));
106        assert_eq!(pairs[2], ("Author", "Alice"));
107        assert_eq!(pairs[3], ("Keywords", "finance, report"));
108        assert_eq!(pairs.len(), 4);
109    }
110
111    #[test]
112    fn test_metadata_pairs_empty() {
113        let doc = PdfDocument::new("test.pdf".to_string());
114        let pairs = doc.metadata_pairs();
115        // Only "File" present
116        assert_eq!(pairs.len(), 1);
117        assert_eq!(pairs[0].0, "File");
118    }
119}