1use crate::annotation::*;
18use crate::error::{PdfXmlError, Result};
19use log::{debug, warn};
20use quick_xml::events::Event;
21use quick_xml::Reader;
22use std::collections::HashMap;
23
24fn extract_plain_text_from_richtext(input: &str) -> String {
37 let normalized_breaks = regex::Regex::new(r"(?is)<\s*br\s*/?\s*>")
40 .unwrap()
41 .replace_all(input, "\n")
42 .to_string();
43 let normalized_blocks = regex::Regex::new(r"(?is)</\s*(p|div|li|tr|h[1-6])\s*>")
44 .unwrap()
45 .replace_all(&normalized_breaks, "\n")
46 .to_string();
47 let no_tags = regex::Regex::new(r"(?is)<[^>]+>")
48 .unwrap()
49 .replace_all(&normalized_blocks, "")
50 .to_string();
51 let decoded = no_tags
52 .replace(" ", " ")
53 .replace(" ", " ")
54 .replace("<", "<")
55 .replace(">", ">")
56 .replace("&", "&")
57 .replace(""", "\"")
58 .replace(""", "\"")
59 .replace("'", "'")
60 .replace("'", "'");
61
62 decoded
63 .lines()
64 .map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
65 .filter(|line| !line.is_empty())
66 .collect::<Vec<_>>()
67 .join("\n")
68}
69
70#[derive(Debug, Clone)]
72pub struct XfdfDocument {
79 pub xmlns: Option<String>,
81
82 pub fields: Vec<XfdfField>,
84
85 pub annotations: Vec<Annotation>,
87
88 pub metadata: HashMap<String, String>,
90}
91
92#[derive(Debug, Clone)]
94pub struct XfdfField {
96 #[allow(dead_code)]
97 pub name: String,
98 pub value: Option<String>,
99 pub children: Vec<XfdfField>,
100}
101
102impl XfdfDocument {
103 pub fn parse(xml_str: &str) -> Result<Self> {
129 let mut reader = Reader::from_str(xml_str);
130 reader.config_mut().trim_text(true);
131
132 let mut doc = XfdfDocument {
133 xmlns: None,
134 fields: Vec::new(),
135 annotations: Vec::new(),
136 metadata: HashMap::new(),
137 };
138
139 let mut buf = Vec::new();
143 let mut in_annots = false;
144 let mut in_fields = false;
145 let mut current_field_stack: Vec<XfdfField> = Vec::new();
146 let mut current_annotation_attrs: HashMap<String, String> = HashMap::new();
147 let mut current_annotation_content: String = String::new();
148 let mut current_annotation_type: Option<String> = None;
149
150 let mut in_inklist = false;
152 let mut current_gesture_data: String = String::new();
153 let mut inklist_gestures: Vec<String> = Vec::new();
154 let mut current_child_tag: Option<String> = None; let mut child_tag_content: String = String::new(); loop {
158 match reader.read_event_into(&mut buf) {
162 Ok(Event::Start(ref e)) => {
163 let tag_name = String::from_utf8_lossy(e.local_name().as_ref()).into_owned();
164
165 debug!("开始标签: <{}>", tag_name);
166
167 match tag_name.as_ref() {
168 "xfdf" => {
169 for attr in e.attributes().filter_map(|a| a.ok()) {
170 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
171 let value = String::from_utf8_lossy(attr.value.as_ref()).to_string();
172
173 match key.as_str() {
174 "xmlns" => doc.xmlns = Some(value),
175 _ => { doc.metadata.insert(key, value); }
176 }
177 }
178 }
179 "f" | "field" => {
180 in_fields = true;
181 let field_name = e.attributes()
182 .filter_map(|a| a.ok())
183 .find(|a| String::from_utf8_lossy(a.key.as_ref()) == "name")
184 .map(|a| String::from_utf8_lossy(&a.value).to_string())
185 .unwrap_or_else(|| format!("field_{}", current_field_stack.len()));
186
187 current_field_stack.push(XfdfField {
188 name: field_name,
189 value: None,
190 children: Vec::new(),
191 });
192 }
193 "value" => {}
194 "annots" => {
195 in_annots = true;
196 debug!("进入 annots 区域");
197 }
198 annot_type if Self::is_annotation_tag(annot_type) && in_annots => {
200 debug!("发现注释类型: {}", annot_type);
201 current_annotation_type = Some(annot_type.to_string());
202 current_annotation_content.clear();
203 inklist_gestures.clear();
204
205 current_annotation_attrs.clear();
207 for attr in e.attributes().filter_map(|a| a.ok()) {
208 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
209 let value = String::from_utf8_lossy(attr.value.as_ref()).to_string();
210 debug!(" 属性: {} = {}", key, value);
211 current_annotation_attrs.insert(key, value);
212 }
213 }
214 child_tag if current_annotation_type.is_some() && in_annots => {
216 match child_tag {
217 "inklist" => {
218 in_inklist = true;
219 inklist_gestures.clear();
220 debug!("进入 inklist");
221 }
222 "gesture" if in_inklist => {
223 current_gesture_data.clear();
224 debug!("开始 gesture");
225 }
226 "contents" | "contents-richtext" | "defaultstyle" | "defaultappearance"
227 | "trn-custom-data" | "imagedata" => {
228 current_child_tag = Some(child_tag.to_string());
230 child_tag_content.clear();
231 }
232 "popup" => {
233 debug!("跳过嵌套 popup");
235 }
236 _ => {
237 debug!("未知子元素: {}", child_tag);
238 }
239 }
240 }
241 "popup" if in_annots && current_annotation_type.is_none() => {
242 current_annotation_type = Some("popup".to_string());
243 current_annotation_content.clear();
244 current_annotation_attrs.clear();
245 for attr in e.attributes().filter_map(|a| a.ok()) {
246 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
247 let value = String::from_utf8_lossy(attr.value.as_ref()).to_string();
248 current_annotation_attrs.insert(key, value);
249 }
250 }
251 _ => {}
252 }
253 }
254 Ok(Event::Empty(ref e)) => {
255 let tag_name = String::from_utf8_lossy(e.local_name().as_ref()).into_owned();
256 debug!("空标签: <{}/>", tag_name);
257
258 if tag_name == "value" && !current_field_stack.is_empty() {
259 if let Some(field) = current_field_stack.last_mut() {
260 field.value = Some(String::new());
261 }
262 }
263
264 if tag_name == "popup" && current_annotation_type.is_some() {
266 debug!("自闭合 popup 子元素,跳过");
267 continue;
268 }
269
270 if in_annots && Self::is_annotation_tag(&tag_name) {
271 let mut attrs = HashMap::new();
272 for attr in e.attributes().filter_map(|a| a.ok()) {
273 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
274 let value = String::from_utf8_lossy(attr.value.as_ref()).to_string();
275 attrs.insert(key, value);
276 }
277
278 match Self::build_annotation(&tag_name, &attrs, "") {
279 Ok(annotation) => {
280 doc.annotations.push(annotation);
281 debug!("成功解析自闭合 {} 注释", tag_name);
282 }
283 Err(err) => {
284 warn!("解析自闭合 {} 注解失败: {}", tag_name, err);
285 }
286 }
287 }
288 }
289 Ok(Event::Text(e)) => {
290 let text = e.unescape()?;
291 debug!("文本内容: [{}]", if text.len() > 100 { &text[..100] } else { &text });
292
293 if in_fields {
294 if let Some(field) = current_field_stack.last_mut() {
295 if field.value.is_none() || field.value.as_deref() == Some("") {
296 field.value = Some(text.to_string());
297 }
298 }
299 }
300
301 if current_annotation_type.is_some() {
302 if current_child_tag.is_some() {
303 child_tag_content.push_str(&text);
305 } else if in_inklist {
306 current_gesture_data.push_str(text.trim());
308 } else {
309 if !text.trim().is_empty() {
311 current_annotation_content.push_str(&text);
312 }
313 }
314 }
315 }
316 Ok(Event::End(ref e)) => {
317 let tag_name = String::from_utf8_lossy(e.local_name().as_ref()).into_owned();
318 debug!("结束标签: </{}>", tag_name);
319
320 match tag_name.as_ref() {
321 "f" | "field" => {
322 if let Some(field) = current_field_stack.pop() {
323 if let Some(parent) = current_field_stack.last_mut() {
324 parent.children.push(field);
325 } else {
326 doc.fields.push(field);
327 }
328 }
329 }
330 "fields" => {
331 in_fields = false;
332 }
333 "annots" => {
334 in_annots = false;
335 }
336 "inklist" => {
337 in_inklist = false;
338 if !inklist_gestures.is_empty() {
340 current_annotation_attrs.insert(
341 "_inklist".to_string(),
342 inklist_gestures.join("\x1b")
343 );
344 }
345 debug!("inklist 结束, 共 {} 条手势", inklist_gestures.len());
346 }
347 "gesture" if in_inklist => {
348 if !current_gesture_data.is_empty() {
349 inklist_gestures.push(current_gesture_data.clone());
350 debug!("gesture 数据: {} 个点",
351 current_gesture_data.matches(';').count());
352 }
353 current_gesture_data.clear();
354 }
355 "contents" | "contents-richtext" | "defaultstyle" | "defaultappearance"
357 | "trn-custom-data" | "imagedata" => {
358 if current_child_tag.as_deref() == Some(tag_name.as_str()) {
359 if tag_name == "contents" {
360 current_annotation_content = child_tag_content.clone();
362 } else if tag_name == "contents-richtext" {
363 current_annotation_attrs.insert(tag_name.to_string(), child_tag_content.clone());
364 } else {
365 current_annotation_attrs.insert(tag_name.to_string(), child_tag_content.clone());
367 }
368 current_child_tag = None;
369 child_tag_content.clear();
370 }
371 }
372 annot_type if Self::is_annotation_tag(annot_type) && current_annotation_type.as_deref() == Some(annot_type) => {
373 if let Some(typ) = current_annotation_type.take() {
375 let result = Self::build_annotation_with_children(
377 &typ,
378 ¤t_annotation_attrs,
379 ¤t_annotation_content,
380 );
381 match result {
382 Ok(annotation) => {
383 doc.annotations.push(annotation);
384 debug!("成功解析 {} 注释", typ);
385 }
386 Err(e) => {
387 warn!("解析 {} 注解失败: {}", typ, e);
388 }
389 }
390 }
391 current_annotation_attrs.clear();
392 current_annotation_content.clear();
393 inklist_gestures.clear();
394 current_child_tag = None;
395 child_tag_content.clear();
396 }
397 _ => {}
398 }
399 }
400 Ok(Event::Eof) => break,
401 Err(e) => {
402 return Err(PdfXmlError::XmlParse(e));
403 }
404 _ => {}
405 }
406
407 buf.clear();
408 }
409
410 debug!(
411 "解析完成: {} 个字段, {} 条注释",
412 doc.fields.len(),
413 doc.annotations.len()
414 );
415
416 Ok(doc)
417 }
418
419 fn is_annotation_tag(tag: &str) -> bool {
424 matches!(tag,
425 "text" | "highlight" | "underline" | "strikeout" | "squiggly" |
426 "freetext" | "square" | "circle" | "line" |
427 "polygon" | "polyline" | "ink" | "stamp" |
428 "caret" | "fileattachment" | "sound" | "link" |
429 "popup" | "widget"
430 )
431 }
432
433 fn build_annotation(
439 annotation_type: &str,
440 attrs: &HashMap<String, String>,
441 content: &str,
442 ) -> Result<Annotation> {
443 let base = Self::build_base(attrs, content)?;
444
445 Ok(match annotation_type {
446 "text" => Annotation::Text(TextAnnotation {
447 base,
448 open: attrs.get("open").map(|v| v == "yes").unwrap_or(false),
449 icon_type: attrs.get("icon").cloned().unwrap_or_else(|| "Note".to_string()),
450 }),
451 "highlight" => Annotation::Highlight(HighlightAnnotation {
452 base,
453 coords: attrs.get("coords").cloned(),
454 }),
455 "underline" => Annotation::Underline(UnderlineAnnotation {
456 base,
457 coords: attrs.get("coords").cloned(),
458 }),
459 "strikeout" => Annotation::StrikeOut(StrikeOutAnnotation {
460 base,
461 coords: attrs.get("coords").cloned(),
462 }),
463 "squiggly" => Annotation::Squiggly(SquigglyAnnotation {
464 base,
465 coords: attrs.get("coords").cloned(),
466 }),
467 "freetext" => Annotation::FreeText(FreeTextAnnotation {
468 base,
469 default_style: attrs.get("defaultstyle").cloned(),
470 default_appearance: attrs.get("defaultappearance").cloned(),
471 text_color: attrs.get("TextColor").cloned(),
472 align: attrs.get("align")
473 .and_then(|v| v.parse().ok())
474 .unwrap_or(0),
475 }),
476 "square" => Annotation::Square(SquareAnnotation {
477 base,
478 width: attrs.get("width")
479 .and_then(|v| v.parse().ok())
480 .unwrap_or(1.0),
481 }),
482 "circle" => Annotation::Circle(CircleAnnotation {
483 base,
484 width: attrs.get("width")
485 .and_then(|v| v.parse().ok())
486 .unwrap_or(1.0),
487 interior_color: attrs.get("interiorcolor").cloned(),
488 }),
489 "line" => Annotation::Line(LineAnnotation {
490 base,
491 start: attrs.get("start").cloned(),
492 end: attrs.get("end").cloned(),
493 head_style: attrs.get("head").cloned().unwrap_or_default(),
494 tail_style: attrs.get("tail").cloned().unwrap_or_default(),
495 width: attrs.get("width")
496 .and_then(|v| v.parse().ok())
497 .unwrap_or(1.0),
498 }),
499 "polygon" | "polyline" => Annotation::Polygon(PolygonAnnotation {
500 base,
501 vertices: attrs.get("vertices").cloned(),
502 is_closed: annotation_type == "polygon",
503 }),
504 "ink" => {
505 let ink_list_data = attrs.get("_inklist")
507 .map(|s| s.split('\x1b').map(String::from).collect())
508 .unwrap_or_default();
509 Annotation::Ink(InkAnnotation {
510 base,
511 ink_list: ink_list_data,
512 width: attrs.get("width")
513 .and_then(|v| v.parse().ok())
514 .unwrap_or(1.0),
515 })},
516 "stamp" => Annotation::Stamp(StampAnnotation {
517 base,
518 icon: attrs.get("icon").cloned().unwrap_or_default(),
519 image_data: attrs.get("imagedata").cloned(),
520 }),
521 "popup" => Annotation::Popup(PopupAnnotation {
522 base,
523 open: attrs.get("open").map(|v| v == "yes").unwrap_or(false),
524 parent_name: attrs.get("parent").cloned(),
525 }),
526 other => {
527 return Err(PdfXmlError::UnsupportedAnnotationType(other.to_string()));
528 }
529 })
530 }
531
532 fn build_annotation_with_children(
534 annotation_type: &str,
535 attrs: &HashMap<String, String>,
536 content: &str,
537 ) -> Result<Annotation> {
538 Self::build_annotation(annotation_type, attrs, content)
540 }
541 fn build_base(attrs: &HashMap<String, String>, content: &str) -> Result<AnnotationBase> {
545 let contents = if content.trim().is_empty() {
549 attrs.get("contents-richtext")
550 .map(|rich| extract_plain_text_from_richtext(rich))
551 .filter(|text| !text.trim().is_empty())
552 } else {
553 Some(content.to_string())
554 };
555
556 Ok(AnnotationBase {
557 name: attrs.get("name").cloned(),
558 page: attrs.get("page")
559 .and_then(|v| v.parse::<usize>().ok())
560 .unwrap_or(0),
561 rect: attrs.get("rect").and_then(|r| Rect::from_string(r)),
562 title: attrs.get("title").cloned(),
563 subject: attrs.get("subject").cloned(),
564 contents,
565 creation_date: attrs.get("creationdate").cloned(),
566 modification_date: attrs.get("date").cloned(),
567 color: attrs.get("color").cloned(),
568 opacity: attrs.get("opacity")
569 .and_then(|v| v.parse::<f32>().ok())
570 .unwrap_or(1.0),
571 flags: attrs.get("flags")
572 .and_then(|v| u32::from_str_radix(v, 16).ok())
573 .unwrap_or_default(),
574 extra: attrs.iter()
575 .filter(|(k, _)| !Self::is_known_attr(k))
576 .map(|(k, v)| (k.clone(), v.clone()))
577 .collect(),
578 })
579 }
580
581 fn is_known_attr(key: &str) -> bool {
583 matches!(key,
584 "name" | "page" | "rect" | "title" | "subject" |
585 "creationdate" | "date" | "color" | "opacity" | "flags" |
586 "open" | "icon" | "width" | "defaultstyle" | "defaultappearance" | "align" |
587 "start" | "end" | "head" | "tail" | "vertices" |
588 "interiorcolor" | "parent" | "coords" | "TextColor" |
589 "contents-richtext" | "_inklist" | "imagedata"
590 )
591 }
592
593 pub fn get_annotations_for_page(&self, page: usize) -> Vec<&Annotation> {
612 self.annotations.iter().filter(|a| a.page() == page).collect()
613 }
614
615 pub fn total_pages(&self) -> usize {
635 self.annotations
636 .iter()
637 .map(|a| a.page())
638 .max()
639 .map(|p| p + 1)
640 .unwrap_or(1)
641 }
642 pub fn to_xfdf_string(&self) -> Result<String> {
664 let mut xml = String::from("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
668 xml.push_str("<xfdf xmlns=\"http://ns.adobe.com/xfdf/\" xml:space=\"preserve\">\n");
669
670 if !self.annotations.is_empty() {
671 xml.push_str(" <annots>\n");
672 for annotation in &self.annotations {
673 xml.push_str(&annotation_to_xfdf_element(annotation)?);
674 }
675 xml.push_str(" </annots>\n");
676 }
677
678 xml.push_str("</xfdf>\n");
679 Ok(xml)
680 }
681}
682
683fn escape_xml_text(value: &str) -> String {
684 value
685 .replace('&', "&")
686 .replace('<', "<")
687 .replace('>', ">")
688}
689
690fn escape_xml_attr(value: &str) -> String {
691 escape_xml_text(value)
692 .replace('"', """)
693 .replace('\'', "'")
694}
695
696fn format_rect(rect: &Rect) -> String {
697 format!("{},{},{},{}", rect.left, rect.bottom, rect.right, rect.top)
698}
699
700fn format_opacity(opacity: f32) -> String {
701 let mut s = format!("{:.3}", opacity);
702 while s.contains('.') && s.ends_with('0') {
703 s.pop();
704 }
705 if s.ends_with('.') {
706 s.pop();
707 }
708 s
709}
710
711fn push_base_attrs(attrs: &mut Vec<(String, String)>, base: &AnnotationBase) {
712 if let Some(name) = &base.name {
713 attrs.push(("name".to_string(), name.clone()));
714 }
715 attrs.push(("page".to_string(), base.page.to_string()));
716 if let Some(rect) = &base.rect {
717 attrs.push(("rect".to_string(), format_rect(rect)));
718 }
719 if let Some(title) = &base.title {
720 attrs.push(("title".to_string(), title.clone()));
721 }
722 if let Some(subject) = &base.subject {
723 attrs.push(("subject".to_string(), subject.clone()));
724 }
725 if let Some(creation_date) = &base.creation_date {
726 attrs.push(("creationdate".to_string(), creation_date.clone()));
727 }
728 if let Some(modification_date) = &base.modification_date {
729 attrs.push(("date".to_string(), modification_date.clone()));
730 }
731 if let Some(color) = &base.color {
732 attrs.push(("color".to_string(), color.clone()));
733 }
734 if (base.opacity - 1.0).abs() > f32::EPSILON {
735 attrs.push(("opacity".to_string(), format_opacity(base.opacity)));
736 }
737 if base.flags != 0 {
738 attrs.push(("flags".to_string(), format!("{:X}", base.flags)));
739 }
740 for (key, value) in &base.extra {
741 attrs.push((key.clone(), value.clone()));
742 }
743}
744
745fn attrs_to_string(attrs: &[(String, String)]) -> String {
746 attrs
747 .iter()
748 .map(|(key, value)| format!(" {}=\"{}\"", key, escape_xml_attr(value)))
749 .collect::<String>()
750}
751
752fn annotation_to_xfdf_element(annotation: &Annotation) -> Result<String> {
753 let (tag, attrs, contents) = match annotation {
754 Annotation::Text(text) => {
755 let mut attrs = Vec::new();
756 push_base_attrs(&mut attrs, &text.base);
757 if text.open {
758 attrs.push(("open".to_string(), "yes".to_string()));
759 }
760 if text.icon_type != "Note" {
761 attrs.push(("icon".to_string(), text.icon_type.clone()));
762 }
763 ("text", attrs, text.base.contents.clone())
764 }
765 Annotation::Highlight(highlight) => {
766 let mut attrs = Vec::new();
767 push_base_attrs(&mut attrs, &highlight.base);
768 if let Some(coords) = &highlight.coords {
769 attrs.push(("coords".to_string(), coords.clone()));
770 }
771 ("highlight", attrs, highlight.base.contents.clone())
772 }
773 Annotation::Underline(underline) => {
774 let mut attrs = Vec::new();
775 push_base_attrs(&mut attrs, &underline.base);
776 if let Some(coords) = &underline.coords {
777 attrs.push(("coords".to_string(), coords.clone()));
778 }
779 ("underline", attrs, underline.base.contents.clone())
780 }
781 Annotation::StrikeOut(strikeout) => {
782 let mut attrs = Vec::new();
783 push_base_attrs(&mut attrs, &strikeout.base);
784 if let Some(coords) = &strikeout.coords {
785 attrs.push(("coords".to_string(), coords.clone()));
786 }
787 ("strikeout", attrs, strikeout.base.contents.clone())
788 }
789 Annotation::Squiggly(squiggly) => {
790 let mut attrs = Vec::new();
791 push_base_attrs(&mut attrs, &squiggly.base);
792 if let Some(coords) = &squiggly.coords {
793 attrs.push(("coords".to_string(), coords.clone()));
794 }
795 ("squiggly", attrs, squiggly.base.contents.clone())
796 }
797 Annotation::FreeText(freetext) => {
798 let mut attrs = Vec::new();
799 push_base_attrs(&mut attrs, &freetext.base);
800 if let Some(default_style) = &freetext.default_style {
801 attrs.push(("defaultstyle".to_string(), default_style.clone()));
802 }
803 if let Some(default_appearance) = &freetext.default_appearance {
804 attrs.push(("defaultappearance".to_string(), default_appearance.clone()));
805 }
806 if let Some(text_color) = &freetext.text_color {
807 attrs.push(("TextColor".to_string(), text_color.clone()));
808 }
809 if freetext.align != 0 {
810 attrs.push(("align".to_string(), freetext.align.to_string()));
811 }
812 ("freetext", attrs, freetext.base.contents.clone())
813 }
814 Annotation::Square(square) => {
815 let mut attrs = Vec::new();
816 push_base_attrs(&mut attrs, &square.base);
817 if (square.width - 1.0).abs() > f32::EPSILON {
818 attrs.push(("width".to_string(), format_opacity(square.width)));
819 }
820 ("square", attrs, square.base.contents.clone())
821 }
822 Annotation::Circle(circle) => {
823 let mut attrs = Vec::new();
824 push_base_attrs(&mut attrs, &circle.base);
825 if (circle.width - 1.0).abs() > f32::EPSILON {
826 attrs.push(("width".to_string(), format_opacity(circle.width)));
827 }
828 if let Some(interior_color) = &circle.interior_color {
829 attrs.push(("interiorcolor".to_string(), interior_color.clone()));
830 }
831 ("circle", attrs, circle.base.contents.clone())
832 }
833 Annotation::Line(line) => {
834 let mut attrs = Vec::new();
835 push_base_attrs(&mut attrs, &line.base);
836 if let Some(start) = &line.start {
837 attrs.push(("start".to_string(), start.clone()));
838 }
839 if let Some(end) = &line.end {
840 attrs.push(("end".to_string(), end.clone()));
841 }
842 if !line.head_style.is_empty() {
843 attrs.push(("head".to_string(), line.head_style.clone()));
844 }
845 if !line.tail_style.is_empty() {
846 attrs.push(("tail".to_string(), line.tail_style.clone()));
847 }
848 if (line.width - 1.0).abs() > f32::EPSILON {
849 attrs.push(("width".to_string(), format_opacity(line.width)));
850 }
851 ("line", attrs, line.base.contents.clone())
852 }
853 Annotation::Polygon(polygon) => {
854 let mut attrs = Vec::new();
855 push_base_attrs(&mut attrs, &polygon.base);
856 if let Some(vertices) = &polygon.vertices {
857 attrs.push(("vertices".to_string(), vertices.clone()));
858 }
859 let tag = if polygon.is_closed { "polygon" } else { "polyline" };
860 (tag, attrs, polygon.base.contents.clone())
861 }
862 Annotation::Ink(ink) => {
863 let mut attrs = Vec::new();
864 push_base_attrs(&mut attrs, &ink.base);
865 if (ink.width - 1.0).abs() > f32::EPSILON {
866 attrs.push(("width".to_string(), format_opacity(ink.width)));
867 }
868 let mut xml = format!(" <ink{}>\n", attrs_to_string(&attrs));
869 if let Some(contents) = &ink.base.contents {
870 xml.push_str(&format!(" {}\n", escape_xml_text(contents)));
871 }
872 if !ink.ink_list.is_empty() {
873 xml.push_str(" <inklist>\n");
874 for gesture in &ink.ink_list {
875 xml.push_str(&format!(" <gesture>{}</gesture>\n", escape_xml_text(gesture)));
876 }
877 xml.push_str(" </inklist>\n");
878 }
879 xml.push_str(" </ink>\n");
880 return Ok(xml);
881 }
882 Annotation::Stamp(stamp) => {
883 let mut attrs = Vec::new();
884 push_base_attrs(&mut attrs, &stamp.base);
885 if !stamp.icon.is_empty() {
886 attrs.push(("icon".to_string(), stamp.icon.clone()));
887 }
888 if let Some(image_data) = &stamp.image_data {
889 attrs.push(("imagedata".to_string(), image_data.clone()));
890 }
891 ("stamp", attrs, stamp.base.contents.clone())
892 }
893 Annotation::Popup(popup) => {
894 let mut attrs = Vec::new();
895 push_base_attrs(&mut attrs, &popup.base);
896 if popup.open {
897 attrs.push(("open".to_string(), "yes".to_string()));
898 }
899 if let Some(parent_name) = &popup.parent_name {
900 attrs.push(("parent".to_string(), parent_name.clone()));
901 }
902 ("popup", attrs, popup.base.contents.clone())
903 }
904 };
905
906 let attrs = attrs_to_string(&attrs);
907 match contents {
908 Some(contents) if !contents.is_empty() => Ok(format!(
909 " <{tag}{attrs}>{}</{tag}>\n",
910 escape_xml_text(&contents)
911 )),
912 _ => Ok(format!(" <{tag}{attrs}/>\n")),
913 }
914}
915
916#[cfg(test)]
917mod tests {
918 use super::*;
919
920 #[test]
921 fn test_parse_simple_xfdf() {
922 let xml = concat!(
923 "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",
924 "<xfdf xmlns=\"http://ns.adobe.com/xfdf/\" xml:space=\"preserve\">",
925 " <annots>",
926 " <text subject=\"Test Comment\" page=\"0\" rect=\"100,600,300,650\"",
927 " title=\"Author\" date=\"D:20240101120000\" color=\"#FFFF00\">",
928 " This is a test comment",
929 " </text>",
930 " </annots>",
931 "</xfdf>"
932 );
933
934 let doc = XfdfDocument::parse(xml).unwrap();
935 assert_eq!(doc.annotations.len(), 1);
936
937 match &doc.annotations[0] {
938 Annotation::Text(text) => {
939 assert_eq!(text.base.subject.as_deref(), Some("Test Comment"));
940 assert_eq!(text.base.page, 0);
941 assert_eq!(text.base.contents.as_deref(), Some("This is a test comment"));
942 }
943 _ => panic!("Expected Text annotation"),
944 }
945 }
946
947 #[test]
948 fn test_parse_multiple_annotations() {
949 let xml = concat!(
950 "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",
951 "<xfdf xmlns=\"http://ns.adobe.com/xfdf/\" xml:space=\"preserve\">",
952 " <annots>",
953 " <highlight page=\"0\" rect=\"50,700,200,720\" color=\"#FFFF00\"/>",
954 " <underline page=\"1\" rect=\"50,500,150,520\"/>",
955 " <square page=\"0\" rect=\"300,400,450,550\" width=\"2\" color=\"#0000FF\"/>",
956 " </annots>",
957 "</xfdf>"
958 );
959
960 let doc = XfdfDocument::parse(xml).unwrap();
961 assert_eq!(doc.annotations.len(), 3);
962 assert_eq!(doc.total_pages(), 2); }
964
965 #[test]
966 fn test_rect_parsing() {
967 let rect = Rect::from_string("100,200,300,400").unwrap();
968 assert!((rect.left - 100.0).abs() < f64::EPSILON);
969 assert!((rect.bottom - 200.0).abs() < f64::EPSILON);
970 assert!((rect.right - 300.0).abs() < f64::EPSILON);
971 assert!((rect.top - 400.0).abs() < f64::EPSILON);
972 }
973
974 #[test]
975 fn test_to_xfdf_string() {
976 let doc = XfdfDocument {
977 xmlns: Some("http://ns.adobe.com/xfdf/".to_string()),
978 fields: Vec::new(),
979 annotations: vec![Annotation::Text(TextAnnotation {
980 base: AnnotationBase {
981 name: Some("annot-1".to_string()),
982 page: 0,
983 rect: Some(Rect {
984 left: 100.0,
985 bottom: 600.0,
986 right: 300.0,
987 top: 650.0,
988 }),
989 title: Some("Author".to_string()),
990 subject: Some("Test Comment".to_string()),
991 contents: Some("Hello <XFDF> & PDF".to_string()),
992 creation_date: None,
993 modification_date: Some("D:20240101120000".to_string()),
994 color: Some("#FFFF00".to_string()),
995 opacity: 1.0,
996 flags: 0,
997 extra: HashMap::new(),
998 },
999 open: false,
1000 icon_type: "Note".to_string(),
1001 })],
1002 metadata: HashMap::new(),
1003 };
1004
1005 let xml = doc.to_xfdf_string().unwrap();
1006 assert!(xml.contains("<xfdf xmlns=\"http://ns.adobe.com/xfdf/\" xml:space=\"preserve\">"));
1007 assert!(xml.contains("<annots>"));
1008 assert!(xml.contains("<text name=\"annot-1\" page=\"0\" rect=\"100,600,300,650\" title=\"Author\" subject=\"Test Comment\" date=\"D:20240101120000\" color=\"#FFFF00\">Hello <XFDF> & PDF</text>"));
1009 }
1010
1011 #[test]
1012 fn test_extract_plain_text_from_richtext_preserves_line_breaks() {
1013 let rich = "<body><p>Hello world</p><div>Second<br/>line & more</div></body>";
1014 let text = extract_plain_text_from_richtext(rich);
1015 assert_eq!(text, "Hello world\nSecond\nline & more");
1016 }
1017}