1use crate::models::content::ContentElement;
6use crate::models::document::PdfDocument;
7
8#[derive(Debug, Clone, PartialEq)]
10pub enum ChangeKind {
11 Added,
13 Removed,
15 Modified,
17}
18
19#[derive(Debug, Clone)]
21pub struct Change {
22 pub kind: ChangeKind,
24 pub index: usize,
26 pub description: String,
28}
29
30#[derive(Debug, Clone)]
32pub struct DiffResult {
33 pub changes: Vec<Change>,
35 pub metadata_changed: bool,
37 pub page_count_changed: bool,
39}
40
41impl DiffResult {
42 pub fn is_identical(&self) -> bool {
44 self.changes.is_empty() && !self.metadata_changed && !self.page_count_changed
45 }
46
47 pub fn change_count(&self) -> usize {
49 self.changes.len()
50 }
51
52 pub fn summary(&self) -> String {
54 if self.is_identical() {
55 return "Documents are identical.".to_string();
56 }
57 let added = self
58 .changes
59 .iter()
60 .filter(|c| c.kind == ChangeKind::Added)
61 .count();
62 let removed = self
63 .changes
64 .iter()
65 .filter(|c| c.kind == ChangeKind::Removed)
66 .count();
67 let modified = self
68 .changes
69 .iter()
70 .filter(|c| c.kind == ChangeKind::Modified)
71 .count();
72 format!(
73 "{} change(s): {} added, {} removed, {} modified{}{}",
74 self.changes.len(),
75 added,
76 removed,
77 modified,
78 if self.metadata_changed {
79 ", metadata changed"
80 } else {
81 ""
82 },
83 if self.page_count_changed {
84 ", page count changed"
85 } else {
86 ""
87 },
88 )
89 }
90}
91
92pub fn diff_documents(old: &PdfDocument, new: &PdfDocument) -> DiffResult {
94 let mut changes = Vec::new();
95
96 let metadata_changed = old.author != new.author
97 || old.title != new.title
98 || old.creation_date != new.creation_date
99 || old.producer != new.producer
100 || old.creator != new.creator
101 || old.subject != new.subject
102 || old.keywords != new.keywords;
103
104 let page_count_changed = old.number_of_pages != new.number_of_pages;
105
106 let max_len = old.kids.len().max(new.kids.len());
108 for i in 0..max_len {
109 match (old.kids.get(i), new.kids.get(i)) {
110 (Some(old_elem), Some(new_elem)) => {
111 if !elements_equal(old_elem, new_elem) {
112 changes.push(Change {
113 kind: ChangeKind::Modified,
114 index: i,
115 description: format!(
116 "Element {} changed: {:?} -> {:?}",
117 i,
118 element_tag(old_elem),
119 element_tag(new_elem)
120 ),
121 });
122 }
123 }
124 (Some(_), None) => {
125 changes.push(Change {
126 kind: ChangeKind::Removed,
127 index: i,
128 description: format!("Element {} removed", i),
129 });
130 }
131 (None, Some(_)) => {
132 changes.push(Change {
133 kind: ChangeKind::Added,
134 index: i,
135 description: format!("Element {} added", i),
136 });
137 }
138 (None, None) => unreachable!(),
139 }
140 }
141
142 DiffResult {
143 changes,
144 metadata_changed,
145 page_count_changed,
146 }
147}
148
149fn elements_equal(a: &ContentElement, b: &ContentElement) -> bool {
151 element_tag(a) == element_tag(b) && a.bbox() == b.bbox()
154}
155
156fn element_tag(elem: &ContentElement) -> &'static str {
158 match elem {
159 ContentElement::TextChunk(_) => "TextChunk",
160 ContentElement::TextLine(_) => "TextLine",
161 ContentElement::TextBlock(_) => "TextBlock",
162 ContentElement::Paragraph(_) => "Paragraph",
163 ContentElement::Heading(_) => "Heading",
164 ContentElement::NumberHeading(_) => "NumberHeading",
165 ContentElement::Table(_) => "Table",
166 ContentElement::Figure(_) => "Figure",
167 ContentElement::Formula(_) => "Formula",
168 ContentElement::Picture(_) => "Picture",
169 ContentElement::Caption(_) => "Caption",
170 ContentElement::HeaderFooter(_) => "HeaderFooter",
171 ContentElement::Image(_) => "Image",
172 ContentElement::Line(_) => "Line",
173 ContentElement::LineArt(_) => "LineArt",
174 ContentElement::List(_) => "List",
175 ContentElement::TableBorder(_) => "TableBorder",
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use super::*;
182 use crate::models::bbox::BoundingBox;
183 use crate::models::chunks::TextChunk;
184 use crate::models::document::PdfDocument;
185 use crate::models::enums::{PdfLayer, TextFormat, TextType};
186
187 fn make_text_chunk(text: &str) -> ContentElement {
188 ContentElement::TextChunk(TextChunk {
189 value: text.to_string(),
190 bbox: BoundingBox::new(None, 0.0, 0.0, 100.0, 10.0),
191 font_name: "Helvetica".to_string(),
192 font_size: 12.0,
193 font_weight: 400.0,
194 italic_angle: 0.0,
195 font_color: "#000000".to_string(),
196 contrast_ratio: 21.0,
197 symbol_ends: vec![],
198 text_format: TextFormat::Normal,
199 text_type: TextType::Regular,
200 pdf_layer: PdfLayer::Main,
201 ocg_visible: true,
202 index: None,
203 page_number: Some(1),
204 level: None,
205 mcid: None,
206 })
207 }
208
209 #[test]
210 fn test_identical_documents() {
211 let doc = PdfDocument::new("test.pdf".to_string());
212 let result = diff_documents(&doc, &doc);
213 assert!(result.is_identical());
214 assert_eq!(result.change_count(), 0);
215 }
216
217 #[test]
218 fn test_metadata_changed() {
219 let mut old = PdfDocument::new("test.pdf".to_string());
220 let mut new = PdfDocument::new("test.pdf".to_string());
221 old.author = Some("Alice".to_string());
222 new.author = Some("Bob".to_string());
223 let result = diff_documents(&old, &new);
224 assert!(result.metadata_changed);
225 assert!(!result.page_count_changed);
226 }
227
228 #[test]
229 fn test_element_added() {
230 let old = PdfDocument::new("test.pdf".to_string());
231 let mut new = PdfDocument::new("test.pdf".to_string());
232 new.kids.push(make_text_chunk("hello"));
233 let result = diff_documents(&old, &new);
234 assert_eq!(result.change_count(), 1);
235 assert_eq!(result.changes[0].kind, ChangeKind::Added);
236 }
237
238 #[test]
239 fn test_element_removed() {
240 let mut old = PdfDocument::new("test.pdf".to_string());
241 old.kids.push(make_text_chunk("hello"));
242 let new = PdfDocument::new("test.pdf".to_string());
243 let result = diff_documents(&old, &new);
244 assert_eq!(result.change_count(), 1);
245 assert_eq!(result.changes[0].kind, ChangeKind::Removed);
246 }
247
248 #[test]
249 fn test_summary() {
250 let old = PdfDocument::new("test.pdf".to_string());
251 let result = diff_documents(&old, &old);
252 assert_eq!(result.summary(), "Documents are identical.");
253 }
254}