Skip to main content

edgeparse_core/models/
document.rs

1//! PdfDocument — top-level extracted document.
2
3use serde::{Deserialize, Serialize};
4
5use super::content::ContentElement;
6
7/// The top-level extracted PDF document.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct PdfDocument {
10    /// Original file name
11    pub file_name: String,
12    /// Original source path when available.
13    #[serde(skip_serializing_if = "Option::is_none")]
14    pub source_path: Option<String>,
15    /// Number of pages
16    pub number_of_pages: u32,
17    /// Document author
18    pub author: Option<String>,
19    /// Document title
20    pub title: Option<String>,
21    /// Creation date
22    pub creation_date: Option<String>,
23    /// Modification date
24    pub modification_date: Option<String>,
25    /// PDF producer application
26    pub producer: Option<String>,
27    /// Creator application
28    pub creator: Option<String>,
29    /// Document subject
30    pub subject: Option<String>,
31    /// Comma-separated keywords
32    pub keywords: Option<String>,
33    /// Top-level content elements (reading order)
34    pub kids: Vec<ContentElement>,
35}
36
37impl PdfDocument {
38    /// Create a new empty PdfDocument.
39    pub fn new(file_name: String) -> Self {
40        Self {
41            file_name,
42            source_path: None,
43            number_of_pages: 0,
44            author: None,
45            title: None,
46            creation_date: None,
47            modification_date: None,
48            producer: None,
49            creator: None,
50            subject: None,
51            keywords: None,
52            kids: Vec::new(),
53        }
54    }
55
56    /// Return a list of (key, value) pairs for non-empty metadata fields.
57    pub fn metadata_pairs(&self) -> Vec<(&str, &str)> {
58        let mut pairs = Vec::new();
59        pairs.push(("File", self.file_name.as_str()));
60        if let Some(ref v) = self.title {
61            pairs.push(("Title", v.as_str()));
62        }
63        if let Some(ref v) = self.author {
64            pairs.push(("Author", v.as_str()));
65        }
66        if let Some(ref v) = self.subject {
67            pairs.push(("Subject", v.as_str()));
68        }
69        if let Some(ref v) = self.keywords {
70            pairs.push(("Keywords", v.as_str()));
71        }
72        if let Some(ref v) = self.creator {
73            pairs.push(("Creator", v.as_str()));
74        }
75        if let Some(ref v) = self.producer {
76            pairs.push(("Producer", v.as_str()));
77        }
78        if let Some(ref v) = self.creation_date {
79            pairs.push(("Created", v.as_str()));
80        }
81        if let Some(ref v) = self.modification_date {
82            pairs.push(("Modified", v.as_str()));
83        }
84        pairs
85    }
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn test_new_document() {
94        let doc = PdfDocument::new("test.pdf".to_string());
95        assert_eq!(doc.file_name, "test.pdf");
96        assert_eq!(doc.source_path, None);
97        assert_eq!(doc.number_of_pages, 0);
98        assert!(doc.kids.is_empty());
99    }
100
101    #[test]
102    fn test_metadata_pairs() {
103        let mut doc = PdfDocument::new("report.pdf".to_string());
104        doc.title = Some("Annual Report".to_string());
105        doc.author = Some("Alice".to_string());
106        doc.keywords = Some("finance, report".to_string());
107
108        let pairs = doc.metadata_pairs();
109        assert_eq!(pairs[0], ("File", "report.pdf"));
110        assert_eq!(pairs[1], ("Title", "Annual Report"));
111        assert_eq!(pairs[2], ("Author", "Alice"));
112        assert_eq!(pairs[3], ("Keywords", "finance, report"));
113        assert_eq!(pairs.len(), 4);
114    }
115
116    #[test]
117    fn test_metadata_pairs_empty() {
118        let doc = PdfDocument::new("test.pdf".to_string());
119        let pairs = doc.metadata_pairs();
120        // Only "File" present
121        assert_eq!(pairs.len(), 1);
122        assert_eq!(pairs[0].0, "File");
123    }
124}