1use std::collections::HashMap;
16
17use rpdfium_core::{Name, PdfSource};
18use rpdfium_parser::{Object, ObjectId, ObjectStore};
19
20#[derive(Debug, Clone)]
22pub struct StructElement {
23 pub struct_type: String,
25 pub obj_type: Option<String>,
29 pub alt_text: Option<String>,
31 pub actual_text: Option<String>,
33 pub lang: Option<String>,
35 pub title: Option<String>,
37 pub id: Option<String>,
39 pub page_ref: Option<ObjectId>,
41 pub mcids: Vec<i64>,
43 pub children: Vec<StructElement>,
45 pub attributes: Vec<StructAttribute>,
47 pub parent_index: Option<usize>,
56}
57
58#[derive(Debug, Clone)]
60pub struct StructAttribute {
61 pub owner: String,
63 pub entries: Vec<(String, AttributeValue)>,
65}
66
67#[derive(Debug, Clone)]
69pub enum AttributeValue {
70 Number(f64),
72 Text(String),
74 Array(Vec<f64>),
76 Name(String),
78}
79
80pub(crate) fn parse_struct_element<S: PdfSource>(
82 dict: &HashMap<Name, Object>,
83 store: &ObjectStore<S>,
84) -> StructElement {
85 let struct_type = dict
86 .get(&Name::s())
87 .and_then(|obj| obj.as_name())
88 .map(|n| n.as_str().into_owned())
89 .unwrap_or_default();
90
91 let obj_type = dict
92 .get(&Name::obj_type())
93 .and_then(|obj| obj.as_name())
94 .map(|n| n.as_str().into_owned());
95
96 let alt_text = dict
97 .get(&Name::alt())
98 .and_then(|obj| store.deep_resolve(obj).ok())
99 .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
100
101 let actual_text = dict
102 .get(&Name::actual_text())
103 .and_then(|obj| store.deep_resolve(obj).ok())
104 .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
105
106 let lang = dict
107 .get(&Name::lang())
108 .and_then(|obj| store.deep_resolve(obj).ok())
109 .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
110
111 let title = dict
112 .get(&Name::t())
113 .and_then(|obj| store.deep_resolve(obj).ok())
114 .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
115
116 let id = dict
117 .get(&Name::id())
118 .and_then(|obj| store.deep_resolve(obj).ok())
119 .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
120
121 let page_ref = dict.get(&Name::pg()).and_then(|obj| obj.as_reference());
122
123 let attributes = parse_attributes(dict, store);
125
126 StructElement {
127 struct_type,
128 obj_type,
129 alt_text,
130 actual_text,
131 lang,
132 title,
133 id,
134 page_ref,
135 mcids: Vec::new(),
136 children: Vec::new(),
137 attributes,
138 parent_index: None,
139 }
140}
141
142pub(crate) fn parse_attributes<S: PdfSource>(
146 dict: &HashMap<Name, Object>,
147 store: &ObjectStore<S>,
148) -> Vec<StructAttribute> {
149 let a_obj = match dict.get(&Name::a()) {
150 Some(obj) => obj,
151 None => return Vec::new(),
152 };
153 let resolved = match store.deep_resolve(a_obj) {
154 Ok(r) => r,
155 Err(_) => return Vec::new(),
156 };
157
158 match resolved {
159 Object::Dictionary(attr_dict) => {
160 if let Some(attr) = parse_single_attribute(attr_dict, store) {
161 vec![attr]
162 } else {
163 Vec::new()
164 }
165 }
166 Object::Array(arr) => {
167 let mut attrs = Vec::new();
168 for item in arr {
169 let item_resolved = match store.deep_resolve(item) {
170 Ok(r) => r,
171 Err(_) => continue,
172 };
173 if let Some(attr_dict) = item_resolved.as_dict() {
174 if let Some(attr) = parse_single_attribute(attr_dict, store) {
175 attrs.push(attr);
176 }
177 }
178 }
179 attrs
180 }
181 _ => Vec::new(),
182 }
183}
184
185fn parse_single_attribute<S: PdfSource>(
187 dict: &HashMap<Name, Object>,
188 store: &ObjectStore<S>,
189) -> Option<StructAttribute> {
190 let owner = dict
191 .get(&Name::o())
192 .and_then(|obj| obj.as_name().map(|n| n.as_str().into_owned()))
193 .unwrap_or_default();
194
195 let o_name = Name::o();
196 let entries: Vec<(String, AttributeValue)> = dict
197 .iter()
198 .filter(|(k, _)| **k != o_name)
199 .filter_map(|(k, v)| {
200 let key = k.as_str().into_owned();
201 let resolved = store.deep_resolve(v).ok()?;
202 let val = convert_attribute_value(resolved)?;
203 Some((key, val))
204 })
205 .collect();
206
207 Some(StructAttribute { owner, entries })
208}
209
210pub(crate) fn convert_attribute_value(obj: &Object) -> Option<AttributeValue> {
212 if let Some(n) = obj.as_f64() {
213 return Some(AttributeValue::Number(n));
214 }
215 if let Some(s) = obj.as_string() {
216 return Some(AttributeValue::Text(s.to_string_lossy()));
217 }
218 if let Some(n) = obj.as_name() {
219 return Some(AttributeValue::Name(n.as_str().into_owned()));
220 }
221 if let Some(arr) = obj.as_array() {
222 let values: Vec<f64> = arr.iter().filter_map(|o| o.as_f64()).collect();
223 if !values.is_empty() {
224 return Some(AttributeValue::Array(values));
225 }
226 }
227 None
228}
229
230impl StructElement {
235 pub fn struct_type(&self) -> &str {
241 &self.struct_type
242 }
243
244 #[inline]
248 pub fn struct_element_get_type(&self) -> &str {
249 self.struct_type()
250 }
251
252 #[deprecated(
255 since = "0.1.0",
256 note = "use `struct_element_get_type()` — matches upstream `FPDF_StructElement_GetType`"
257 )]
258 #[inline]
259 pub fn get_type(&self) -> &str {
260 self.struct_type()
261 }
262
263 pub fn obj_type(&self) -> Option<&str> {
268 self.obj_type.as_deref()
269 }
270
271 #[inline]
275 pub fn struct_element_get_obj_type(&self) -> Option<&str> {
276 self.obj_type()
277 }
278
279 #[deprecated(
282 since = "0.1.0",
283 note = "use `struct_element_get_obj_type()` — matches upstream `FPDF_StructElement_GetObjType`"
284 )]
285 #[inline]
286 pub fn get_obj_type(&self) -> Option<&str> {
287 self.obj_type()
288 }
289
290 pub fn alt_text(&self) -> Option<&str> {
296 self.alt_text.as_deref()
297 }
298
299 #[inline]
303 pub fn struct_element_get_alt_text(&self) -> Option<&str> {
304 self.alt_text()
305 }
306
307 #[deprecated(
310 since = "0.1.0",
311 note = "use `struct_element_get_alt_text()` — matches upstream `FPDF_StructElement_GetAltText`"
312 )]
313 #[inline]
314 pub fn get_alt_text(&self) -> Option<&str> {
315 self.alt_text()
316 }
317
318 pub fn actual_text(&self) -> Option<&str> {
322 self.actual_text.as_deref()
323 }
324
325 #[inline]
329 pub fn struct_element_get_actual_text(&self) -> Option<&str> {
330 self.actual_text()
331 }
332
333 #[deprecated(
336 since = "0.1.0",
337 note = "use `struct_element_get_actual_text()` — matches upstream `FPDF_StructElement_GetActualText`"
338 )]
339 #[inline]
340 pub fn get_actual_text(&self) -> Option<&str> {
341 self.actual_text()
342 }
343
344 pub fn title(&self) -> Option<&str> {
348 self.title.as_deref()
349 }
350
351 #[inline]
355 pub fn struct_element_get_title(&self) -> Option<&str> {
356 self.title()
357 }
358
359 #[deprecated(
362 since = "0.1.0",
363 note = "use `struct_element_get_title()` — matches upstream `FPDF_StructElement_GetTitle`"
364 )]
365 #[inline]
366 pub fn get_title(&self) -> Option<&str> {
367 self.title()
368 }
369
370 pub fn id(&self) -> Option<&str> {
374 self.id.as_deref()
375 }
376
377 #[inline]
381 pub fn struct_element_get_id(&self) -> Option<&str> {
382 self.id()
383 }
384
385 #[deprecated(
388 since = "0.1.0",
389 note = "use `struct_element_get_id()` — matches upstream `FPDF_StructElement_GetID`"
390 )]
391 #[inline]
392 pub fn get_id(&self) -> Option<&str> {
393 self.id()
394 }
395
396 pub fn lang(&self) -> Option<&str> {
400 self.lang.as_deref()
401 }
402
403 #[inline]
407 pub fn struct_element_get_lang(&self) -> Option<&str> {
408 self.lang()
409 }
410
411 #[deprecated(
414 since = "0.1.0",
415 note = "use `struct_element_get_lang()` — matches upstream `FPDF_StructElement_GetLang`"
416 )]
417 #[inline]
418 pub fn get_lang(&self) -> Option<&str> {
419 self.lang()
420 }
421
422 pub fn string_attribute(&self, attr_name: &str) -> Option<&str> {
432 for attr in &self.attributes {
433 for (k, v) in &attr.entries {
434 if k == attr_name {
435 return match v {
436 AttributeValue::Text(s) => Some(s.as_str()),
437 AttributeValue::Name(n) => Some(n.as_str()),
438 _ => None,
439 };
440 }
441 }
442 }
443 None
444 }
445
446 #[inline]
450 pub fn struct_element_get_string_attribute(&self, attr_name: &str) -> Option<&str> {
451 self.string_attribute(attr_name)
452 }
453
454 #[deprecated(
457 since = "0.1.0",
458 note = "use `struct_element_get_string_attribute()` — matches upstream `FPDF_StructElement_GetStringAttribute`"
459 )]
460 #[inline]
461 pub fn get_string_attribute(&self, attr_name: &str) -> Option<&str> {
462 self.string_attribute(attr_name)
463 }
464
465 pub fn marked_content_id(&self) -> i64 {
474 self.mcids.first().copied().unwrap_or(-1)
475 }
476
477 #[inline]
481 pub fn struct_element_get_marked_content_id(&self) -> i64 {
482 self.marked_content_id()
483 }
484
485 #[deprecated(
488 since = "0.1.0",
489 note = "use `struct_element_get_marked_content_id()` — matches upstream `FPDF_StructElement_GetMarkedContentID`"
490 )]
491 #[inline]
492 pub fn get_marked_content_id(&self) -> i64 {
493 self.marked_content_id()
494 }
495
496 pub fn marked_content_id_count(&self) -> i64 {
502 if self.mcids.is_empty() {
503 -1
504 } else {
505 self.mcids.len() as i64
506 }
507 }
508
509 #[deprecated(
512 since = "0.1.0",
513 note = "use struct_element_get_marked_content_id_count() or marked_content_id_count() instead"
514 )]
515 #[inline]
516 pub fn mcid_count(&self) -> i64 {
517 self.marked_content_id_count()
518 }
519
520 #[inline]
524 pub fn struct_element_get_marked_content_id_count(&self) -> i64 {
525 self.marked_content_id_count()
526 }
527
528 #[deprecated(
531 since = "0.1.0",
532 note = "use `struct_element_get_marked_content_id_count()` — matches upstream `FPDF_StructElement_GetMarkedContentIdCount`"
533 )]
534 #[inline]
535 pub fn get_marked_content_id_count(&self) -> i64 {
536 self.marked_content_id_count()
537 }
538
539 pub fn marked_content_id_at_index(&self, index: usize) -> i64 {
544 self.mcids.get(index).copied().unwrap_or(-1)
545 }
546
547 #[deprecated(
550 since = "0.1.0",
551 note = "use struct_element_get_marked_content_id_at_index() or marked_content_id_at_index() instead"
552 )]
553 #[inline]
554 pub fn mcid_at_index(&self, index: usize) -> i64 {
555 self.marked_content_id_at_index(index)
556 }
557
558 #[inline]
562 pub fn struct_element_get_marked_content_id_at_index(&self, index: usize) -> i64 {
563 self.marked_content_id_at_index(index)
564 }
565
566 #[deprecated(
569 since = "0.1.0",
570 note = "use `struct_element_get_marked_content_id_at_index()` — matches upstream `FPDF_StructElement_GetMarkedContentIdAtIndex`"
571 )]
572 #[inline]
573 pub fn get_marked_content_id_at_index(&self, index: usize) -> i64 {
574 self.marked_content_id_at_index(index)
575 }
576
577 pub fn child_count(&self) -> usize {
583 self.children.len()
584 }
585
586 #[inline]
590 pub fn struct_element_count_children(&self) -> usize {
591 self.child_count()
592 }
593
594 #[deprecated(
597 since = "0.1.0",
598 note = "use `struct_element_count_children()` — matches upstream `FPDF_StructElement_CountChildren`"
599 )]
600 #[inline]
601 pub fn count_children(&self) -> usize {
602 self.child_count()
603 }
604
605 pub fn child_at_index(&self, index: usize) -> Option<&StructElement> {
610 self.children.get(index)
611 }
612
613 #[inline]
617 pub fn struct_element_get_child_at_index(&self, index: usize) -> Option<&StructElement> {
618 self.child_at_index(index)
619 }
620
621 #[deprecated(
624 since = "0.1.0",
625 note = "use `struct_element_get_child_at_index()` — matches upstream `FPDF_StructElement_GetChildAtIndex`"
626 )]
627 #[inline]
628 pub fn get_child_at_index(&self, index: usize) -> Option<&StructElement> {
629 self.child_at_index(index)
630 }
631
632 pub fn child_marked_content_id(&self, index: usize) -> i64 {
637 self.children
638 .get(index)
639 .and_then(|c| c.mcids.first().copied())
640 .unwrap_or(-1)
641 }
642
643 #[inline]
648 pub fn struct_element_get_child_marked_content_id(&self, index: usize) -> i64 {
649 self.child_marked_content_id(index)
650 }
651
652 #[deprecated(
655 since = "0.1.0",
656 note = "use `struct_element_get_child_marked_content_id()` — matches upstream `FPDF_StructElement_GetChildMarkedContentID`"
657 )]
658 #[inline]
659 pub fn get_child_marked_content_id(&self, index: usize) -> i64 {
660 self.child_marked_content_id(index)
661 }
662
663 pub fn attribute_count(&self) -> usize {
669 self.attributes.len()
670 }
671
672 #[inline]
676 pub fn struct_element_get_attribute_count(&self) -> usize {
677 self.attribute_count()
678 }
679
680 #[deprecated(
683 since = "0.1.0",
684 note = "use `struct_element_get_attribute_count()` — matches upstream `FPDF_StructElement_GetAttributeCount`"
685 )]
686 #[inline]
687 pub fn get_attribute_count(&self) -> usize {
688 self.attribute_count()
689 }
690
691 pub fn attribute_at_index(&self, index: usize) -> Option<&StructAttribute> {
695 self.attributes.get(index)
696 }
697
698 #[inline]
703 pub fn struct_element_get_attribute_at_index(&self, index: usize) -> Option<&StructAttribute> {
704 self.attribute_at_index(index)
705 }
706
707 #[deprecated(
710 since = "0.1.0",
711 note = "use `struct_element_get_attribute_at_index()` — matches upstream `FPDF_StructElement_GetAttributeAtIndex`"
712 )]
713 #[inline]
714 pub fn get_attribute_at_index(&self, index: usize) -> Option<&StructAttribute> {
715 self.attribute_at_index(index)
716 }
717
718 pub fn page_ref(&self) -> Option<ObjectId> {
722 self.page_ref
723 }
724
725 pub fn parent_index(&self) -> Option<usize> {
735 self.parent_index
736 }
737
738 #[inline]
749 pub fn struct_element_get_parent(&self) -> Option<usize> {
750 self.parent_index()
751 }
752
753 #[deprecated(
756 since = "0.1.0",
757 note = "use `struct_element_get_parent()` — matches upstream `FPDF_StructElement_GetParent`"
758 )]
759 #[inline]
760 pub fn get_parent(&self) -> Option<usize> {
761 self.parent_index()
762 }
763
764 #[deprecated(
767 since = "0.1.0",
768 note = "use struct_element_get_parent() or parent_index() instead"
769 )]
770 #[inline]
771 pub fn get_parent_index(&self) -> Option<usize> {
772 self.parent_index()
773 }
774}
775
776impl StructAttribute {
781 pub fn owner(&self) -> &str {
783 &self.owner
784 }
785
786 pub fn entry_count(&self) -> usize {
790 self.entries.len()
791 }
792
793 #[inline]
797 pub fn struct_element_attr_get_count(&self) -> usize {
798 self.entry_count()
799 }
800
801 #[deprecated(
804 since = "0.1.0",
805 note = "use `struct_element_attr_get_count()` — matches upstream `FPDF_StructElement_Attr_GetCount`"
806 )]
807 #[inline]
808 pub fn get_count(&self) -> usize {
809 self.entry_count()
810 }
811
812 pub fn entry_name_at_index(&self, index: usize) -> Option<&str> {
816 self.entries.get(index).map(|(k, _)| k.as_str())
817 }
818
819 #[inline]
824 pub fn struct_element_attr_get_name(&self, index: usize) -> Option<&str> {
825 self.entry_name_at_index(index)
826 }
827
828 #[deprecated(
831 since = "0.1.0",
832 note = "use `struct_element_attr_get_name()` — matches upstream `FPDF_StructElement_Attr_GetName`"
833 )]
834 #[inline]
835 pub fn get_name(&self, index: usize) -> Option<&str> {
836 self.entry_name_at_index(index)
837 }
838
839 pub fn value_for_key(&self, name: &str) -> Option<&AttributeValue> {
843 self.entries.iter().find(|(k, _)| k == name).map(|(_, v)| v)
844 }
845
846 #[inline]
851 pub fn struct_element_attr_get_value(&self, name: &str) -> Option<&AttributeValue> {
852 self.value_for_key(name)
853 }
854
855 #[deprecated(
858 since = "0.1.0",
859 note = "use `struct_element_attr_get_value()` — matches upstream `FPDF_StructElement_Attr_GetValue`"
860 )]
861 #[inline]
862 pub fn get_value(&self, name: &str) -> Option<&AttributeValue> {
863 self.value_for_key(name)
864 }
865
866 pub fn value_at_index(&self, index: usize) -> Option<&AttributeValue> {
868 self.entries.get(index).map(|(_, v)| v)
869 }
870}
871
872impl AttributeValue {
877 pub fn as_number(&self) -> Option<f64> {
881 match self {
882 AttributeValue::Number(n) => Some(*n),
883 _ => None,
884 }
885 }
886
887 #[inline]
891 pub fn struct_element_attr_get_number_value(&self) -> Option<f64> {
892 self.as_number()
893 }
894
895 #[deprecated(
898 since = "0.1.0",
899 note = "use `struct_element_attr_get_number_value()` — matches upstream `FPDF_StructElement_Attr_GetNumberValue`"
900 )]
901 #[inline]
902 pub fn get_number_value(&self) -> Option<f64> {
903 self.as_number()
904 }
905
906 pub fn as_text(&self) -> Option<&str> {
910 match self {
911 AttributeValue::Text(s) => Some(s.as_str()),
912 _ => None,
913 }
914 }
915
916 #[inline]
920 pub fn struct_element_attr_get_string_value(&self) -> Option<&str> {
921 self.as_text()
922 }
923
924 #[deprecated(
927 since = "0.1.0",
928 note = "use `struct_element_attr_get_string_value()` — matches upstream `FPDF_StructElement_Attr_GetStringValue`"
929 )]
930 #[inline]
931 pub fn get_string_value(&self) -> Option<&str> {
932 self.as_text()
933 }
934
935 pub fn as_name_str(&self) -> Option<&str> {
939 match self {
940 AttributeValue::Name(n) => Some(n.as_str()),
941 _ => None,
942 }
943 }
944
945 #[deprecated(
947 note = "use `as_name_str()` — no public `FPDF_StructElement_Attr_GetNameValue` API"
948 )]
949 #[inline]
950 pub fn get_name_value(&self) -> Option<&str> {
951 self.as_name_str()
952 }
953
954 pub fn as_array(&self) -> Option<&[f64]> {
959 match self {
960 AttributeValue::Array(arr) => Some(arr.as_slice()),
961 _ => None,
962 }
963 }
964
965 #[deprecated(note = "use `as_array()` — no public `FPDF_StructElement_Attr_GetArrayValue` API")]
967 #[inline]
968 pub fn get_array_value(&self) -> Option<&[f64]> {
969 self.as_array()
970 }
971
972 pub fn child_count(&self) -> i64 {
976 match self {
977 AttributeValue::Array(arr) => arr.len() as i64,
978 _ => -1,
979 }
980 }
981
982 #[inline]
986 pub fn struct_element_attr_count_children(&self) -> i64 {
987 self.child_count()
988 }
989
990 #[deprecated(
993 since = "0.1.0",
994 note = "use `struct_element_attr_count_children()` — matches upstream `FPDF_StructElement_Attr_CountChildren`"
995 )]
996 #[inline]
997 pub fn count_children(&self) -> i64 {
998 self.child_count()
999 }
1000
1001 pub fn child_at_index(&self, index: usize) -> Option<f64> {
1006 match self {
1007 AttributeValue::Array(arr) => arr.get(index).copied(),
1008 _ => None,
1009 }
1010 }
1011
1012 #[inline]
1016 pub fn struct_element_attr_get_child_at_index(&self, index: usize) -> Option<f64> {
1017 self.child_at_index(index)
1018 }
1019
1020 #[deprecated(
1023 since = "0.1.0",
1024 note = "use `struct_element_attr_get_child_at_index()` — matches upstream `FPDF_StructElement_Attr_GetChildAtIndex`"
1025 )]
1026 #[inline]
1027 pub fn get_child_at_index(&self, index: usize) -> Option<f64> {
1028 self.child_at_index(index)
1029 }
1030
1031 pub fn type_name(&self) -> &'static str {
1040 match self {
1041 AttributeValue::Number(_) => "Number",
1042 AttributeValue::Text(_) => "String",
1043 AttributeValue::Name(_) => "Name",
1044 AttributeValue::Array(_) => "Array",
1045 }
1046 }
1047
1048 #[inline]
1052 pub fn struct_element_attr_get_type(&self) -> &'static str {
1053 self.type_name()
1054 }
1055
1056 #[deprecated(
1059 since = "0.1.0",
1060 note = "use `struct_element_attr_get_type()` — matches upstream `FPDF_StructElement_Attr_GetType`"
1061 )]
1062 #[inline]
1063 pub fn get_type(&self) -> &'static str {
1064 self.type_name()
1065 }
1066
1067 pub fn is_string_like(&self) -> bool {
1073 matches!(self, AttributeValue::Text(_) | AttributeValue::Name(_))
1074 }
1075
1076 pub fn as_bool(&self) -> Option<bool> {
1087 match self {
1088 AttributeValue::Name(n) | AttributeValue::Text(n) => {
1089 if n.eq_ignore_ascii_case("true") {
1090 Some(true)
1091 } else if n.eq_ignore_ascii_case("false") {
1092 Some(false)
1093 } else {
1094 None
1095 }
1096 }
1097 AttributeValue::Number(n) => Some(*n != 0.0),
1098 AttributeValue::Array(_) => None,
1099 }
1100 }
1101
1102 #[inline]
1106 pub fn struct_element_attr_get_boolean_value(&self) -> Option<bool> {
1107 self.as_bool()
1108 }
1109
1110 #[deprecated(
1113 since = "0.1.0",
1114 note = "use `struct_element_attr_get_boolean_value()` — matches upstream `FPDF_StructElement_Attr_GetBooleanValue`"
1115 )]
1116 #[inline]
1117 pub fn get_boolean_value(&self) -> Option<bool> {
1118 self.as_bool()
1119 }
1120
1121 pub fn as_blob(&self) -> Option<&[u8]> {
1126 match self {
1127 AttributeValue::Text(s) => Some(s.as_bytes()),
1128 AttributeValue::Name(n) => Some(n.as_bytes()),
1129 _ => None,
1130 }
1131 }
1132
1133 #[inline]
1137 pub fn struct_element_attr_get_blob_value(&self) -> Option<&[u8]> {
1138 self.as_blob()
1139 }
1140
1141 #[deprecated(
1144 since = "0.1.0",
1145 note = "use `struct_element_attr_get_blob_value()` — matches upstream `FPDF_StructElement_Attr_GetBlobValue`"
1146 )]
1147 #[inline]
1148 pub fn get_blob_value(&self) -> Option<&[u8]> {
1149 self.as_blob()
1150 }
1151}
1152
1153#[cfg(test)]
1154mod tests {
1155 use super::*;
1156
1157 fn build_store() -> ObjectStore<Vec<u8>> {
1158 let pdf = build_minimal_pdf();
1159 ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
1160 }
1161
1162 fn build_minimal_pdf() -> Vec<u8> {
1163 let mut pdf = Vec::new();
1164 pdf.extend_from_slice(b"%PDF-1.4\n");
1165 let obj1_offset = pdf.len();
1166 pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1167 let obj2_offset = pdf.len();
1168 pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1169 let xref_offset = pdf.len();
1170 pdf.extend_from_slice(b"xref\n0 3\n");
1171 pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1172 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1173 pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1174 pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1175 pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1176 pdf
1177 }
1178
1179 fn name_obj(s: &str) -> Object {
1180 Object::Name(Name::from(s))
1181 }
1182
1183 fn struct_elem_dict(tag: &str) -> HashMap<Name, Object> {
1184 let mut d = HashMap::new();
1185 d.insert(Name::s(), name_obj(tag));
1186 d
1187 }
1188
1189 #[test]
1190 fn test_parse_struct_element_basic() {
1191 let store = build_store();
1192 let dict = struct_elem_dict("P");
1193 let elem = parse_struct_element(&dict, &store);
1194 assert_eq!(elem.struct_type, "P");
1195 assert!(elem.obj_type.is_none());
1196 assert!(elem.alt_text.is_none());
1197 assert!(elem.attributes.is_empty());
1198 assert!(elem.mcids.is_empty());
1199 assert!(elem.children.is_empty());
1200 }
1201
1202 #[test]
1203 fn test_parse_struct_element_obj_type() {
1204 let store = build_store();
1205 let mut dict = struct_elem_dict("Span");
1206 dict.insert(Name::obj_type(), name_obj("Elem"));
1207 let elem = parse_struct_element(&dict, &store);
1208 assert_eq!(elem.obj_type.as_deref(), Some("Elem"));
1209 }
1210
1211 #[test]
1212 fn test_parse_struct_element_obj_type_none() {
1213 let store = build_store();
1214 let dict = struct_elem_dict("P");
1215 let elem = parse_struct_element(&dict, &store);
1216 assert!(elem.obj_type.is_none());
1217 }
1218
1219 #[test]
1220 fn test_parse_attributes_single_dict() {
1221 let store = build_store();
1222 let mut attr_dict = HashMap::new();
1223 attr_dict.insert(Name::o(), name_obj("Layout"));
1224 attr_dict.insert(Name::from("WritingMode"), name_obj("LrTb"));
1225 attr_dict.insert(Name::from("SpaceBefore"), Object::Real(12.0));
1226
1227 let mut elem_dict = struct_elem_dict("TD");
1228 elem_dict.insert(Name::a(), Object::Dictionary(attr_dict));
1229
1230 let attrs = parse_attributes(&elem_dict, &store);
1231 assert_eq!(attrs.len(), 1);
1232 assert_eq!(attrs[0].owner, "Layout");
1233 assert!(attrs[0].entries.len() >= 2);
1234
1235 let wm = attrs[0].entries.iter().find(|(k, _)| k == "WritingMode");
1236 assert!(wm.is_some());
1237 match &wm.unwrap().1 {
1238 AttributeValue::Name(n) => assert_eq!(n, "LrTb"),
1239 _ => panic!("expected Name"),
1240 }
1241 }
1242
1243 #[test]
1244 fn test_parse_attributes_array() {
1245 let store = build_store();
1246 let mut attr1 = HashMap::new();
1247 attr1.insert(Name::o(), name_obj("Layout"));
1248 attr1.insert(Name::from("TextAlign"), name_obj("Center"));
1249
1250 let mut attr2 = HashMap::new();
1251 attr2.insert(Name::o(), name_obj("Table"));
1252 attr2.insert(Name::from("RowSpan"), Object::Integer(2));
1253
1254 let mut elem_dict = struct_elem_dict("TD");
1255 elem_dict.insert(
1256 Name::a(),
1257 Object::Array(vec![Object::Dictionary(attr1), Object::Dictionary(attr2)]),
1258 );
1259
1260 let attrs = parse_attributes(&elem_dict, &store);
1261 assert_eq!(attrs.len(), 2);
1262 assert_eq!(attrs[0].owner, "Layout");
1263 assert_eq!(attrs[1].owner, "Table");
1264 }
1265
1266 #[test]
1267 fn test_parse_attributes_none() {
1268 let store = build_store();
1269 let dict = struct_elem_dict("P");
1270 let attrs = parse_attributes(&dict, &store);
1271 assert!(attrs.is_empty());
1272 }
1273
1274 #[test]
1275 fn test_convert_attribute_value_number() {
1276 let obj = Object::Real(3.14);
1277 match convert_attribute_value(&obj) {
1278 Some(AttributeValue::Number(n)) => assert!((n - 3.14).abs() < 0.001),
1279 _ => panic!("expected Number"),
1280 }
1281 }
1282
1283 #[test]
1284 fn test_convert_attribute_value_name() {
1285 let obj = name_obj("LrTb");
1286 match convert_attribute_value(&obj) {
1287 Some(AttributeValue::Name(n)) => assert_eq!(n, "LrTb"),
1288 _ => panic!("expected Name"),
1289 }
1290 }
1291
1292 #[test]
1293 fn test_convert_attribute_value_array() {
1294 let obj = Object::Array(vec![
1295 Object::Real(1.0),
1296 Object::Real(2.0),
1297 Object::Real(3.0),
1298 ]);
1299 match convert_attribute_value(&obj) {
1300 Some(AttributeValue::Array(arr)) => {
1301 assert_eq!(arr.len(), 3);
1302 assert!((arr[0] - 1.0).abs() < 0.001);
1303 }
1304 _ => panic!("expected Array"),
1305 }
1306 }
1307
1308 fn make_full_element() -> StructElement {
1311 StructElement {
1312 struct_type: "Figure".to_string(),
1313 obj_type: Some("Elem".to_string()),
1314 alt_text: Some("A cat photo".to_string()),
1315 actual_text: Some("Cat".to_string()),
1316 title: Some("My Figure".to_string()),
1317 id: Some("fig-001".to_string()),
1318 lang: Some("en-US".to_string()),
1319 page_ref: Some(ObjectId::new(5, 0)),
1320 mcids: vec![10, 20, 30],
1321 parent_index: None,
1322 children: vec![StructElement {
1323 struct_type: "Span".to_string(),
1324 obj_type: None,
1325 alt_text: None,
1326 actual_text: None,
1327 title: None,
1328 id: None,
1329 lang: None,
1330 page_ref: None,
1331 mcids: vec![42],
1332 children: Vec::new(),
1333 attributes: Vec::new(),
1334 parent_index: None,
1335 }],
1336 attributes: vec![StructAttribute {
1337 owner: "Layout".to_string(),
1338 entries: vec![
1339 (
1340 "WritingMode".to_string(),
1341 AttributeValue::Name("LrTb".to_string()),
1342 ),
1343 ("SpaceBefore".to_string(), AttributeValue::Number(12.0)),
1344 (
1345 "TextLabel".to_string(),
1346 AttributeValue::Text("hello".to_string()),
1347 ),
1348 (
1349 "Padding".to_string(),
1350 AttributeValue::Array(vec![1.0, 2.0, 3.0, 4.0]),
1351 ),
1352 ],
1353 }],
1354 }
1355 }
1356
1357 #[test]
1358 fn test_struct_element_struct_type_getter() {
1359 let elem = make_full_element();
1360 assert_eq!(elem.struct_type(), "Figure");
1361 assert_eq!(elem.struct_element_get_type(), "Figure");
1362 }
1363
1364 #[test]
1365 fn test_struct_element_obj_type_getter() {
1366 let elem = make_full_element();
1367 assert_eq!(elem.obj_type(), Some("Elem"));
1368 assert_eq!(elem.struct_element_get_obj_type(), Some("Elem"));
1369
1370 let empty = StructElement {
1371 struct_type: "P".to_string(),
1372 obj_type: None,
1373 alt_text: None,
1374 actual_text: None,
1375 title: None,
1376 id: None,
1377 lang: None,
1378 page_ref: None,
1379 mcids: Vec::new(),
1380 children: Vec::new(),
1381 attributes: Vec::new(),
1382 parent_index: None,
1383 };
1384 assert!(empty.obj_type().is_none());
1385 }
1386
1387 #[test]
1388 fn test_struct_element_alt_text_getter() {
1389 let elem = make_full_element();
1390 assert_eq!(elem.alt_text(), Some("A cat photo"));
1391 assert_eq!(elem.struct_element_get_alt_text(), Some("A cat photo"));
1392 }
1393
1394 #[test]
1395 fn test_struct_element_actual_text_getter() {
1396 let elem = make_full_element();
1397 assert_eq!(elem.actual_text(), Some("Cat"));
1398 assert_eq!(elem.struct_element_get_actual_text(), Some("Cat"));
1399 }
1400
1401 #[test]
1402 fn test_struct_element_title_getter() {
1403 let elem = make_full_element();
1404 assert_eq!(elem.title(), Some("My Figure"));
1405 assert_eq!(elem.struct_element_get_title(), Some("My Figure"));
1406 }
1407
1408 #[test]
1409 fn test_struct_element_id_getter() {
1410 let elem = make_full_element();
1411 assert_eq!(elem.id(), Some("fig-001"));
1412 assert_eq!(elem.struct_element_get_id(), Some("fig-001"));
1413 }
1414
1415 #[test]
1416 fn test_struct_element_lang_getter() {
1417 let elem = make_full_element();
1418 assert_eq!(elem.lang(), Some("en-US"));
1419 assert_eq!(elem.struct_element_get_lang(), Some("en-US"));
1420 }
1421
1422 #[test]
1423 fn test_struct_element_string_attribute_found() {
1424 let elem = make_full_element();
1425 assert_eq!(elem.string_attribute("WritingMode"), Some("LrTb"));
1427 assert_eq!(
1428 elem.struct_element_get_string_attribute("WritingMode"),
1429 Some("LrTb")
1430 );
1431 assert_eq!(elem.string_attribute("TextLabel"), Some("hello"));
1433 }
1434
1435 #[test]
1436 fn test_struct_element_string_attribute_not_found() {
1437 let elem = make_full_element();
1438 assert!(elem.string_attribute("NoSuchAttr").is_none());
1439 assert!(elem.string_attribute("SpaceBefore").is_none());
1441 }
1442
1443 #[test]
1444 fn test_struct_element_marked_content_id() {
1445 let elem = make_full_element();
1446 assert_eq!(elem.marked_content_id(), 10);
1447 assert_eq!(elem.struct_element_get_marked_content_id(), 10);
1448
1449 let mut empty = make_full_element();
1451 empty.mcids.clear();
1452 assert_eq!(empty.marked_content_id(), -1);
1453 }
1454
1455 #[test]
1456 fn test_struct_element_mcid_count() {
1457 let elem = make_full_element();
1458 assert_eq!(elem.struct_element_get_marked_content_id_count(), 3);
1459 assert_eq!(elem.struct_element_get_marked_content_id_count(), 3);
1460
1461 let mut empty = make_full_element();
1462 empty.mcids.clear();
1463 assert_eq!(empty.struct_element_get_marked_content_id_count(), -1);
1464 }
1465
1466 #[test]
1467 fn test_struct_element_mcid_at_index() {
1468 let elem = make_full_element();
1469 assert_eq!(elem.struct_element_get_marked_content_id_at_index(0), 10);
1470 assert_eq!(elem.struct_element_get_marked_content_id_at_index(1), 20);
1471 assert_eq!(elem.struct_element_get_marked_content_id_at_index(2), 30);
1472 assert_eq!(elem.struct_element_get_marked_content_id_at_index(99), -1);
1473 assert_eq!(elem.struct_element_get_marked_content_id_at_index(0), 10);
1474 }
1475
1476 #[test]
1477 fn test_struct_element_child_count() {
1478 let elem = make_full_element();
1479 assert_eq!(elem.child_count(), 1);
1480 assert_eq!(elem.struct_element_count_children(), 1);
1481 }
1482
1483 #[test]
1484 fn test_struct_element_child_at_index() {
1485 let elem = make_full_element();
1486 let child = elem.child_at_index(0).unwrap();
1487 assert_eq!(child.struct_type(), "Span");
1488 assert!(elem.child_at_index(99).is_none());
1489 assert_eq!(
1490 elem.struct_element_get_child_at_index(0)
1491 .unwrap()
1492 .struct_type(),
1493 "Span"
1494 );
1495 }
1496
1497 #[test]
1498 fn test_struct_element_child_marked_content_id() {
1499 let elem = make_full_element();
1500 assert_eq!(elem.child_marked_content_id(0), 42);
1501 assert_eq!(elem.child_marked_content_id(99), -1);
1502 assert_eq!(elem.struct_element_get_child_marked_content_id(0), 42);
1503 }
1504
1505 #[test]
1506 fn test_struct_element_attribute_count() {
1507 let elem = make_full_element();
1508 assert_eq!(elem.attribute_count(), 1);
1509 assert_eq!(elem.struct_element_get_attribute_count(), 1);
1510 }
1511
1512 #[test]
1513 fn test_struct_element_attribute_at_index() {
1514 let elem = make_full_element();
1515 let attr = elem.attribute_at_index(0).unwrap();
1516 assert_eq!(attr.owner(), "Layout");
1517 assert!(elem.attribute_at_index(99).is_none());
1518 assert_eq!(
1519 elem.struct_element_get_attribute_at_index(0)
1520 .unwrap()
1521 .owner(),
1522 "Layout"
1523 );
1524 }
1525
1526 #[test]
1527 fn test_struct_element_page_ref() {
1528 let elem = make_full_element();
1529 assert_eq!(elem.page_ref(), Some(ObjectId::new(5, 0)));
1530 }
1531
1532 #[test]
1539 fn test_struct_element_parent_index_root_is_none() {
1540 let elem = make_full_element();
1542 assert!(elem.parent_index().is_none());
1543 assert!(elem.struct_element_get_parent().is_none());
1544 }
1545
1546 #[test]
1552 fn test_struct_element_parent_index_child_has_parent() {
1553 let child0 = StructElement {
1555 struct_type: "Span".to_string(),
1556 obj_type: None,
1557 alt_text: None,
1558 actual_text: None,
1559 title: None,
1560 id: None,
1561 lang: None,
1562 page_ref: None,
1563 mcids: Vec::new(),
1564 children: Vec::new(),
1565 attributes: Vec::new(),
1566 parent_index: Some(0),
1567 };
1568 let child1 = StructElement {
1569 struct_type: "Link".to_string(),
1570 obj_type: None,
1571 alt_text: None,
1572 actual_text: None,
1573 title: None,
1574 id: None,
1575 lang: None,
1576 page_ref: None,
1577 mcids: Vec::new(),
1578 children: Vec::new(),
1579 attributes: Vec::new(),
1580 parent_index: Some(1),
1581 };
1582
1583 assert_eq!(child0.parent_index(), Some(0));
1585 assert_eq!(child0.struct_element_get_parent(), Some(0));
1586 assert_eq!(child1.parent_index(), Some(1));
1587 assert_eq!(child1.struct_element_get_parent(), Some(1));
1588 }
1589
1590 fn make_attr() -> StructAttribute {
1593 StructAttribute {
1594 owner: "Table".to_string(),
1595 entries: vec![
1596 ("RowSpan".to_string(), AttributeValue::Number(2.0)),
1597 ("ColSpan".to_string(), AttributeValue::Number(1.0)),
1598 ("Scope".to_string(), AttributeValue::Name("Row".to_string())),
1599 ],
1600 }
1601 }
1602
1603 #[test]
1604 fn test_struct_attribute_owner() {
1605 let attr = make_attr();
1606 assert_eq!(attr.owner(), "Table");
1607 }
1608
1609 #[test]
1610 fn test_struct_attribute_entry_count() {
1611 let attr = make_attr();
1612 assert_eq!(attr.entry_count(), 3);
1613 assert_eq!(attr.struct_element_attr_get_count(), 3);
1614 }
1615
1616 #[test]
1617 fn test_struct_attribute_entry_name_at_index() {
1618 let attr = make_attr();
1619 assert_eq!(attr.entry_name_at_index(0), Some("RowSpan"));
1620 assert_eq!(attr.entry_name_at_index(2), Some("Scope"));
1621 assert!(attr.entry_name_at_index(99).is_none());
1622 assert_eq!(attr.struct_element_attr_get_name(0), Some("RowSpan"));
1623 }
1624
1625 #[test]
1626 fn test_struct_attribute_value_for_key() {
1627 let attr = make_attr();
1628 assert!(attr.value_for_key("RowSpan").is_some());
1629 assert!(attr.value_for_key("NoSuch").is_none());
1630 assert!(attr.struct_element_attr_get_value("Scope").is_some());
1631 }
1632
1633 #[test]
1634 fn test_struct_attribute_value_at_index() {
1635 let attr = make_attr();
1636 assert!(attr.value_at_index(0).is_some());
1637 assert!(attr.value_at_index(99).is_none());
1638 }
1639
1640 #[test]
1643 fn test_attribute_value_as_number() {
1644 let v = AttributeValue::Number(3.14);
1645 assert!((v.as_number().unwrap() - 3.14).abs() < 0.001);
1646 assert_eq!(v.type_name(), "Number");
1647 assert_eq!(v.struct_element_attr_get_type(), "Number");
1648 assert!(v.struct_element_attr_get_string_value().is_none());
1649 assert!(v.as_name_str().is_none());
1650 assert!(v.as_array().is_none());
1651 assert!(v.as_blob().is_none());
1652 assert_eq!(v.child_count(), -1);
1653 }
1654
1655 #[test]
1656 fn test_attribute_value_as_text() {
1657 let v = AttributeValue::Text("hello".to_string());
1658 assert_eq!(v.as_text(), Some("hello"));
1659 assert_eq!(v.struct_element_attr_get_string_value(), Some("hello"));
1660 assert_eq!(v.type_name(), "String");
1661 assert!(v.as_number().is_none());
1662 assert!(v.is_string_like());
1663 assert_eq!(v.as_blob(), Some(b"hello".as_slice()));
1664 assert_eq!(
1665 v.struct_element_attr_get_blob_value(),
1666 Some(b"hello".as_slice())
1667 );
1668 }
1669
1670 #[test]
1671 fn test_attribute_value_as_name() {
1672 let v = AttributeValue::Name("LrTb".to_string());
1673 assert_eq!(v.as_name_str(), Some("LrTb"));
1674 assert_eq!(v.as_name_str(), Some("LrTb"));
1675 assert_eq!(v.type_name(), "Name");
1676 assert!(v.is_string_like());
1677 assert!(v.as_number().is_none());
1678 assert_eq!(v.as_blob(), Some(b"LrTb".as_slice()));
1679 }
1680
1681 #[test]
1682 fn test_attribute_value_as_array() {
1683 let v = AttributeValue::Array(vec![1.0, 2.0, 3.0]);
1684 assert_eq!(v.as_array(), Some([1.0_f64, 2.0, 3.0].as_slice()));
1685 assert_eq!(v.as_array(), Some([1.0_f64, 2.0, 3.0].as_slice()));
1686 assert_eq!(v.type_name(), "Array");
1687 assert_eq!(v.child_count(), 3);
1688 assert_eq!(v.struct_element_attr_count_children(), 3);
1689 assert_eq!(v.child_at_index(1), Some(2.0));
1690 assert_eq!(v.struct_element_attr_get_child_at_index(2), Some(3.0));
1691 assert!(v.child_at_index(99).is_none());
1692 assert!(!v.is_string_like());
1693 assert!(v.as_blob().is_none());
1694 assert!(v.as_number().is_none());
1695 }
1696
1697 #[test]
1700 fn test_attribute_value_as_bool_name_true() {
1701 let v = AttributeValue::Name("true".to_string());
1702 assert_eq!(v.as_bool(), Some(true));
1703 assert_eq!(v.struct_element_attr_get_boolean_value(), Some(true));
1704
1705 let v2 = AttributeValue::Name("True".to_string());
1707 assert_eq!(v2.as_bool(), Some(true));
1708
1709 let v3 = AttributeValue::Name("TRUE".to_string());
1710 assert_eq!(v3.as_bool(), Some(true));
1711 }
1712
1713 #[test]
1714 fn test_attribute_value_as_bool_name_false() {
1715 let v = AttributeValue::Name("false".to_string());
1716 assert_eq!(v.as_bool(), Some(false));
1717
1718 let v2 = AttributeValue::Name("False".to_string());
1719 assert_eq!(v2.as_bool(), Some(false));
1720 }
1721
1722 #[test]
1723 fn test_attribute_value_as_bool_text() {
1724 let v = AttributeValue::Text("true".to_string());
1725 assert_eq!(v.as_bool(), Some(true));
1726
1727 let v2 = AttributeValue::Text("false".to_string());
1728 assert_eq!(v2.as_bool(), Some(false));
1729
1730 let v3 = AttributeValue::Text("maybe".to_string());
1732 assert_eq!(v3.as_bool(), None);
1733 }
1734
1735 #[test]
1736 fn test_attribute_value_as_bool_number() {
1737 assert_eq!(AttributeValue::Number(0.0).as_bool(), Some(false));
1738 assert_eq!(AttributeValue::Number(1.0).as_bool(), Some(true));
1739 assert_eq!(AttributeValue::Number(-1.0).as_bool(), Some(true));
1740 assert_eq!(AttributeValue::Number(42.0).as_bool(), Some(true));
1741 }
1742
1743 #[test]
1744 fn test_attribute_value_as_bool_array_returns_none() {
1745 let v = AttributeValue::Array(vec![1.0, 0.0]);
1746 assert_eq!(v.as_bool(), None);
1747 assert_eq!(v.struct_element_attr_get_boolean_value(), None);
1748 }
1749
1750 #[test]
1751 fn test_attribute_value_as_bool_non_boolean_name_returns_none() {
1752 let v = AttributeValue::Name("LrTb".to_string());
1753 assert_eq!(v.as_bool(), None);
1754 }
1755}