Skip to main content

rpdfium_doc/
struct_element.rs

1// Derived from PDFium's cpdf_structelement.h/cpp
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! Structure element — `CPDF_StructElement`.
7//!
8//! A single node in the tagged PDF structure tree, carrying semantic
9//! information such as type tag, alternative text, language, attributes,
10//! and marked content IDs (ISO 32000-2 section 14.7.2).
11//!
12//! Tree building is in [`crate::struct_tree`] (corresponds to
13//! `CPDF_StructTree`).
14
15use std::collections::HashMap;
16
17use rpdfium_core::{Name, PdfSource};
18use rpdfium_parser::{Object, ObjectId, ObjectStore};
19
20/// A single node in the structure tree.
21#[derive(Debug, Clone)]
22pub struct StructElement {
23    /// Structure type tag (e.g., "Document", "P", "H1", "Table", "Figure").
24    pub struct_type: String,
25    /// Object type from `/ObjType` (e.g., "Elem", "MCR", "OBJR").
26    ///
27    /// Corresponds to upstream `CPDF_StructElement::GetObjType()`.
28    pub obj_type: Option<String>,
29    /// Alternative text (`/Alt`).
30    pub alt_text: Option<String>,
31    /// Actual text (`/ActualText`).
32    pub actual_text: Option<String>,
33    /// Language tag (`/Lang`).
34    pub lang: Option<String>,
35    /// Title (`/T`).
36    pub title: Option<String>,
37    /// Element identifier (`/ID`).
38    pub id: Option<String>,
39    /// Page reference (`/Pg`) — indirect object ID.
40    pub page_ref: Option<ObjectId>,
41    /// Marked content IDs associated with this element.
42    pub mcids: Vec<i64>,
43    /// Child structure elements.
44    pub children: Vec<StructElement>,
45    /// Attributes from the `/A` key.
46    pub attributes: Vec<StructAttribute>,
47    /// Index of this element within its parent's `children` vec.
48    ///
49    /// `None` for root elements (direct children of `StructTree::root_elements`).
50    /// For non-root elements, this is the zero-based index in the parent's
51    /// `children` list, which can be used together with the parent reference to
52    /// navigate the tree upward.
53    ///
54    /// Corresponds to upstream `FPDF_StructElement_GetParent`.
55    pub parent_index: Option<usize>,
56}
57
58/// A single attribute dictionary associated with a structure element.
59#[derive(Debug, Clone)]
60pub struct StructAttribute {
61    /// The attribute owner (from `/O`).
62    pub owner: String,
63    /// Key-value entries from the attribute dictionary (excluding `/O`).
64    pub entries: Vec<(String, AttributeValue)>,
65}
66
67/// A value in a structure attribute dictionary.
68#[derive(Debug, Clone)]
69pub enum AttributeValue {
70    /// A numeric value.
71    Number(f64),
72    /// A text (string) value.
73    Text(String),
74    /// An array of numbers.
75    Array(Vec<f64>),
76    /// A name value.
77    Name(String),
78}
79
80/// Parse a single structure element's own attributes (not children).
81pub(crate) fn parse_struct_element<S: PdfSource>(
82    dict: &HashMap<Name, Object>,
83    store: &ObjectStore<S>,
84) -> StructElement {
85    let struct_type = dict
86        .get(&Name::s())
87        .and_then(|obj| obj.as_name())
88        .map(|n| n.as_str().into_owned())
89        .unwrap_or_default();
90
91    let obj_type = dict
92        .get(&Name::obj_type())
93        .and_then(|obj| obj.as_name())
94        .map(|n| n.as_str().into_owned());
95
96    let alt_text = dict
97        .get(&Name::alt())
98        .and_then(|obj| store.deep_resolve(obj).ok())
99        .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
100
101    let actual_text = dict
102        .get(&Name::actual_text())
103        .and_then(|obj| store.deep_resolve(obj).ok())
104        .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
105
106    let lang = dict
107        .get(&Name::lang())
108        .and_then(|obj| store.deep_resolve(obj).ok())
109        .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
110
111    let title = dict
112        .get(&Name::t())
113        .and_then(|obj| store.deep_resolve(obj).ok())
114        .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
115
116    let id = dict
117        .get(&Name::id())
118        .and_then(|obj| store.deep_resolve(obj).ok())
119        .and_then(|obj| obj.as_string().map(|s| s.to_string_lossy()));
120
121    let page_ref = dict.get(&Name::pg()).and_then(|obj| obj.as_reference());
122
123    // Parse /A attributes
124    let attributes = parse_attributes(dict, store);
125
126    StructElement {
127        struct_type,
128        obj_type,
129        alt_text,
130        actual_text,
131        lang,
132        title,
133        id,
134        page_ref,
135        mcids: Vec::new(),
136        children: Vec::new(),
137        attributes,
138        parent_index: None,
139    }
140}
141
142/// Parse `/A` attributes from a structure element dictionary.
143///
144/// The `/A` key can be a single attribute dictionary or an array of them.
145pub(crate) fn parse_attributes<S: PdfSource>(
146    dict: &HashMap<Name, Object>,
147    store: &ObjectStore<S>,
148) -> Vec<StructAttribute> {
149    let a_obj = match dict.get(&Name::a()) {
150        Some(obj) => obj,
151        None => return Vec::new(),
152    };
153    let resolved = match store.deep_resolve(a_obj) {
154        Ok(r) => r,
155        Err(_) => return Vec::new(),
156    };
157
158    match resolved {
159        Object::Dictionary(attr_dict) => {
160            if let Some(attr) = parse_single_attribute(attr_dict, store) {
161                vec![attr]
162            } else {
163                Vec::new()
164            }
165        }
166        Object::Array(arr) => {
167            let mut attrs = Vec::new();
168            for item in arr {
169                let item_resolved = match store.deep_resolve(item) {
170                    Ok(r) => r,
171                    Err(_) => continue,
172                };
173                if let Some(attr_dict) = item_resolved.as_dict() {
174                    if let Some(attr) = parse_single_attribute(attr_dict, store) {
175                        attrs.push(attr);
176                    }
177                }
178            }
179            attrs
180        }
181        _ => Vec::new(),
182    }
183}
184
185/// Parse a single attribute dictionary into a `StructAttribute`.
186fn parse_single_attribute<S: PdfSource>(
187    dict: &HashMap<Name, Object>,
188    store: &ObjectStore<S>,
189) -> Option<StructAttribute> {
190    let owner = dict
191        .get(&Name::o())
192        .and_then(|obj| obj.as_name().map(|n| n.as_str().into_owned()))
193        .unwrap_or_default();
194
195    let o_name = Name::o();
196    let entries: Vec<(String, AttributeValue)> = dict
197        .iter()
198        .filter(|(k, _)| **k != o_name)
199        .filter_map(|(k, v)| {
200            let key = k.as_str().into_owned();
201            let resolved = store.deep_resolve(v).ok()?;
202            let val = convert_attribute_value(resolved)?;
203            Some((key, val))
204        })
205        .collect();
206
207    Some(StructAttribute { owner, entries })
208}
209
210/// Convert a PDF object to an `AttributeValue`.
211pub(crate) fn convert_attribute_value(obj: &Object) -> Option<AttributeValue> {
212    if let Some(n) = obj.as_f64() {
213        return Some(AttributeValue::Number(n));
214    }
215    if let Some(s) = obj.as_string() {
216        return Some(AttributeValue::Text(s.to_string_lossy()));
217    }
218    if let Some(n) = obj.as_name() {
219        return Some(AttributeValue::Name(n.as_str().into_owned()));
220    }
221    if let Some(arr) = obj.as_array() {
222        let values: Vec<f64> = arr.iter().filter_map(|o| o.as_f64()).collect();
223        if !values.is_empty() {
224            return Some(AttributeValue::Array(values));
225        }
226    }
227    None
228}
229
230// ---------------------------------------------------------------------------
231// StructElement methods — FPDF_StructElement_Get* equivalents
232// ---------------------------------------------------------------------------
233
234impl StructElement {
235    // --- Type and Object Type ---
236
237    /// Returns the structure type tag (e.g., "Document", "P", "H1", "Table", "Figure").
238    ///
239    /// Corresponds to upstream `FPDF_StructElement_GetType`.
240    pub fn struct_type(&self) -> &str {
241        &self.struct_type
242    }
243
244    /// ADR-019 Tier 2 alias for [`struct_type()`](StructElement::struct_type).
245    ///
246    /// Corresponds to upstream `FPDF_StructElement_GetType`.
247    #[inline]
248    pub fn struct_element_get_type(&self) -> &str {
249        self.struct_type()
250    }
251
252    /// Deprecated short alias — use [`struct_element_get_type()`](StructElement::struct_element_get_type)
253    /// or [`struct_type()`](StructElement::struct_type) instead.
254    #[deprecated(
255        since = "0.1.0",
256        note = "use `struct_element_get_type()` — matches upstream `FPDF_StructElement_GetType`"
257    )]
258    #[inline]
259    pub fn get_type(&self) -> &str {
260        self.struct_type()
261    }
262
263    /// Returns the object type (`/ObjType`), if present.
264    ///
265    /// Typical values: `"Elem"`, `"MCR"`, `"OBJR"`.
266    /// Corresponds to upstream `FPDF_StructElement_GetObjType`.
267    pub fn obj_type(&self) -> Option<&str> {
268        self.obj_type.as_deref()
269    }
270
271    /// ADR-019 Tier 2 alias for [`obj_type()`](StructElement::obj_type).
272    ///
273    /// Corresponds to upstream `FPDF_StructElement_GetObjType`.
274    #[inline]
275    pub fn struct_element_get_obj_type(&self) -> Option<&str> {
276        self.obj_type()
277    }
278
279    /// Deprecated short alias — use [`struct_element_get_obj_type()`](StructElement::struct_element_get_obj_type)
280    /// or [`obj_type()`](StructElement::obj_type) instead.
281    #[deprecated(
282        since = "0.1.0",
283        note = "use `struct_element_get_obj_type()` — matches upstream `FPDF_StructElement_GetObjType`"
284    )]
285    #[inline]
286    pub fn get_obj_type(&self) -> Option<&str> {
287        self.obj_type()
288    }
289
290    // --- Text attributes ---
291
292    /// Returns the alternative text (`/Alt`), if present.
293    ///
294    /// Corresponds to upstream `FPDF_StructElement_GetAltText`.
295    pub fn alt_text(&self) -> Option<&str> {
296        self.alt_text.as_deref()
297    }
298
299    /// ADR-019 Tier 2 alias for [`alt_text()`](StructElement::alt_text).
300    ///
301    /// Corresponds to upstream `FPDF_StructElement_GetAltText`.
302    #[inline]
303    pub fn struct_element_get_alt_text(&self) -> Option<&str> {
304        self.alt_text()
305    }
306
307    /// Deprecated short alias — use [`struct_element_get_alt_text()`](StructElement::struct_element_get_alt_text)
308    /// or [`alt_text()`](StructElement::alt_text) instead.
309    #[deprecated(
310        since = "0.1.0",
311        note = "use `struct_element_get_alt_text()` — matches upstream `FPDF_StructElement_GetAltText`"
312    )]
313    #[inline]
314    pub fn get_alt_text(&self) -> Option<&str> {
315        self.alt_text()
316    }
317
318    /// Returns the actual text (`/ActualText`), if present.
319    ///
320    /// Corresponds to upstream `FPDF_StructElement_GetActualText`.
321    pub fn actual_text(&self) -> Option<&str> {
322        self.actual_text.as_deref()
323    }
324
325    /// ADR-019 Tier 2 alias for [`actual_text()`](StructElement::actual_text).
326    ///
327    /// Corresponds to upstream `FPDF_StructElement_GetActualText`.
328    #[inline]
329    pub fn struct_element_get_actual_text(&self) -> Option<&str> {
330        self.actual_text()
331    }
332
333    /// Deprecated short alias — use [`struct_element_get_actual_text()`](StructElement::struct_element_get_actual_text)
334    /// or [`actual_text()`](StructElement::actual_text) instead.
335    #[deprecated(
336        since = "0.1.0",
337        note = "use `struct_element_get_actual_text()` — matches upstream `FPDF_StructElement_GetActualText`"
338    )]
339    #[inline]
340    pub fn get_actual_text(&self) -> Option<&str> {
341        self.actual_text()
342    }
343
344    /// Returns the title (`/T`), if present.
345    ///
346    /// Corresponds to upstream `FPDF_StructElement_GetTitle`.
347    pub fn title(&self) -> Option<&str> {
348        self.title.as_deref()
349    }
350
351    /// ADR-019 Tier 2 alias for [`title()`](StructElement::title).
352    ///
353    /// Corresponds to upstream `FPDF_StructElement_GetTitle`.
354    #[inline]
355    pub fn struct_element_get_title(&self) -> Option<&str> {
356        self.title()
357    }
358
359    /// Deprecated short alias — use [`struct_element_get_title()`](StructElement::struct_element_get_title)
360    /// or [`title()`](StructElement::title) instead.
361    #[deprecated(
362        since = "0.1.0",
363        note = "use `struct_element_get_title()` — matches upstream `FPDF_StructElement_GetTitle`"
364    )]
365    #[inline]
366    pub fn get_title(&self) -> Option<&str> {
367        self.title()
368    }
369
370    /// Returns the element ID (`/ID`), if present.
371    ///
372    /// Corresponds to upstream `FPDF_StructElement_GetID`.
373    pub fn id(&self) -> Option<&str> {
374        self.id.as_deref()
375    }
376
377    /// ADR-019 Tier 2 alias for [`id()`](StructElement::id).
378    ///
379    /// Corresponds to upstream `FPDF_StructElement_GetID`.
380    #[inline]
381    pub fn struct_element_get_id(&self) -> Option<&str> {
382        self.id()
383    }
384
385    /// Deprecated short alias — use [`struct_element_get_id()`](StructElement::struct_element_get_id)
386    /// or [`id()`](StructElement::id) instead.
387    #[deprecated(
388        since = "0.1.0",
389        note = "use `struct_element_get_id()` — matches upstream `FPDF_StructElement_GetID`"
390    )]
391    #[inline]
392    pub fn get_id(&self) -> Option<&str> {
393        self.id()
394    }
395
396    /// Returns the language tag (`/Lang`), if present (e.g., `"en-US"`).
397    ///
398    /// Corresponds to upstream `FPDF_StructElement_GetLang`.
399    pub fn lang(&self) -> Option<&str> {
400        self.lang.as_deref()
401    }
402
403    /// ADR-019 Tier 2 alias for [`lang()`](StructElement::lang).
404    ///
405    /// Corresponds to upstream `FPDF_StructElement_GetLang`.
406    #[inline]
407    pub fn struct_element_get_lang(&self) -> Option<&str> {
408        self.lang()
409    }
410
411    /// Deprecated short alias — use [`struct_element_get_lang()`](StructElement::struct_element_get_lang)
412    /// or [`lang()`](StructElement::lang) instead.
413    #[deprecated(
414        since = "0.1.0",
415        note = "use `struct_element_get_lang()` — matches upstream `FPDF_StructElement_GetLang`"
416    )]
417    #[inline]
418    pub fn get_lang(&self) -> Option<&str> {
419        self.lang()
420    }
421
422    // --- String attribute lookup ---
423
424    /// Look up a named string/name attribute in all attribute dictionaries.
425    ///
426    /// Searches all [`StructAttribute`] entries associated with this element for an
427    /// entry with `attr_name` key, and returns the value if it is a
428    /// [`AttributeValue::Text`] or [`AttributeValue::Name`] variant.
429    ///
430    /// Corresponds to upstream `FPDF_StructElement_GetStringAttribute`.
431    pub fn string_attribute(&self, attr_name: &str) -> Option<&str> {
432        for attr in &self.attributes {
433            for (k, v) in &attr.entries {
434                if k == attr_name {
435                    return match v {
436                        AttributeValue::Text(s) => Some(s.as_str()),
437                        AttributeValue::Name(n) => Some(n.as_str()),
438                        _ => None,
439                    };
440                }
441            }
442        }
443        None
444    }
445
446    /// ADR-019 Tier 2 alias for [`string_attribute()`](StructElement::string_attribute).
447    ///
448    /// Corresponds to upstream `FPDF_StructElement_GetStringAttribute`.
449    #[inline]
450    pub fn struct_element_get_string_attribute(&self, attr_name: &str) -> Option<&str> {
451        self.string_attribute(attr_name)
452    }
453
454    /// Deprecated short alias — use [`struct_element_get_string_attribute()`](StructElement::struct_element_get_string_attribute)
455    /// or [`string_attribute()`](StructElement::string_attribute) instead.
456    #[deprecated(
457        since = "0.1.0",
458        note = "use `struct_element_get_string_attribute()` — matches upstream `FPDF_StructElement_GetStringAttribute`"
459    )]
460    #[inline]
461    pub fn get_string_attribute(&self, attr_name: &str) -> Option<&str> {
462        self.string_attribute(attr_name)
463    }
464
465    // --- Marked content IDs ---
466
467    /// Returns the first marked content ID associated with this element, or `-1` if none.
468    ///
469    /// For elements with multiple MCIDs, use [`marked_content_id_count()`](StructElement::marked_content_id_count)
470    /// and [`marked_content_id_at_index()`](StructElement::marked_content_id_at_index) to retrieve all of them.
471    ///
472    /// Corresponds to upstream `FPDF_StructElement_GetMarkedContentID`.
473    pub fn marked_content_id(&self) -> i64 {
474        self.mcids.first().copied().unwrap_or(-1)
475    }
476
477    /// ADR-019 Tier 2 alias for [`marked_content_id()`](StructElement::marked_content_id).
478    ///
479    /// Corresponds to upstream `FPDF_StructElement_GetMarkedContentID`.
480    #[inline]
481    pub fn struct_element_get_marked_content_id(&self) -> i64 {
482        self.marked_content_id()
483    }
484
485    /// Deprecated short alias — use [`struct_element_get_marked_content_id()`](StructElement::struct_element_get_marked_content_id)
486    /// or [`marked_content_id()`](StructElement::marked_content_id) instead.
487    #[deprecated(
488        since = "0.1.0",
489        note = "use `struct_element_get_marked_content_id()` — matches upstream `FPDF_StructElement_GetMarkedContentID`"
490    )]
491    #[inline]
492    pub fn get_marked_content_id(&self) -> i64 {
493        self.marked_content_id()
494    }
495
496    /// Returns the number of marked content IDs associated with this element.
497    ///
498    /// Returns `-1` if there are no MCIDs (matching upstream sentinel).
499    ///
500    /// Corresponds to `FPDF_StructElement_GetMarkedContentIdCount` in PDFium.
501    pub fn marked_content_id_count(&self) -> i64 {
502        if self.mcids.is_empty() {
503            -1
504        } else {
505            self.mcids.len() as i64
506        }
507    }
508
509    /// Deprecated: use [`struct_element_get_marked_content_id_count()`](StructElement::struct_element_get_marked_content_id_count)
510    /// (upstream alias) or [`marked_content_id_count()`](StructElement::marked_content_id_count) (primary) instead.
511    #[deprecated(
512        since = "0.1.0",
513        note = "use struct_element_get_marked_content_id_count() or marked_content_id_count() instead"
514    )]
515    #[inline]
516    pub fn mcid_count(&self) -> i64 {
517        self.marked_content_id_count()
518    }
519
520    /// ADR-019 Tier 2 alias for [`marked_content_id_count()`](StructElement::marked_content_id_count).
521    ///
522    /// Corresponds to `FPDF_StructElement_GetMarkedContentIdCount` in PDFium.
523    #[inline]
524    pub fn struct_element_get_marked_content_id_count(&self) -> i64 {
525        self.marked_content_id_count()
526    }
527
528    /// Deprecated short alias — use [`struct_element_get_marked_content_id_count()`](StructElement::struct_element_get_marked_content_id_count)
529    /// or [`marked_content_id_count()`](StructElement::marked_content_id_count) instead.
530    #[deprecated(
531        since = "0.1.0",
532        note = "use `struct_element_get_marked_content_id_count()` — matches upstream `FPDF_StructElement_GetMarkedContentIdCount`"
533    )]
534    #[inline]
535    pub fn get_marked_content_id_count(&self) -> i64 {
536        self.marked_content_id_count()
537    }
538
539    /// Returns the marked content ID at a given zero-based index, or `-1` if the index
540    /// is out of range.
541    ///
542    /// Corresponds to `FPDF_StructElement_GetMarkedContentIdAtIndex` in PDFium.
543    pub fn marked_content_id_at_index(&self, index: usize) -> i64 {
544        self.mcids.get(index).copied().unwrap_or(-1)
545    }
546
547    /// Deprecated: use [`struct_element_get_marked_content_id_at_index()`](StructElement::struct_element_get_marked_content_id_at_index)
548    /// (upstream alias) or [`marked_content_id_at_index()`](StructElement::marked_content_id_at_index) (primary) instead.
549    #[deprecated(
550        since = "0.1.0",
551        note = "use struct_element_get_marked_content_id_at_index() or marked_content_id_at_index() instead"
552    )]
553    #[inline]
554    pub fn mcid_at_index(&self, index: usize) -> i64 {
555        self.marked_content_id_at_index(index)
556    }
557
558    /// ADR-019 Tier 2 alias for [`marked_content_id_at_index()`](StructElement::marked_content_id_at_index).
559    ///
560    /// Corresponds to `FPDF_StructElement_GetMarkedContentIdAtIndex` in PDFium.
561    #[inline]
562    pub fn struct_element_get_marked_content_id_at_index(&self, index: usize) -> i64 {
563        self.marked_content_id_at_index(index)
564    }
565
566    /// Deprecated short alias — use [`struct_element_get_marked_content_id_at_index()`](StructElement::struct_element_get_marked_content_id_at_index)
567    /// or [`marked_content_id_at_index()`](StructElement::marked_content_id_at_index) instead.
568    #[deprecated(
569        since = "0.1.0",
570        note = "use `struct_element_get_marked_content_id_at_index()` — matches upstream `FPDF_StructElement_GetMarkedContentIdAtIndex`"
571    )]
572    #[inline]
573    pub fn get_marked_content_id_at_index(&self, index: usize) -> i64 {
574        self.marked_content_id_at_index(index)
575    }
576
577    // --- Children ---
578
579    /// Returns the number of direct children of this element.
580    ///
581    /// Corresponds to upstream `FPDF_StructElement_CountChildren`.
582    pub fn child_count(&self) -> usize {
583        self.children.len()
584    }
585
586    /// ADR-019 Tier 2 alias for [`child_count()`](StructElement::child_count).
587    ///
588    /// Corresponds to upstream `FPDF_StructElement_CountChildren`.
589    #[inline]
590    pub fn struct_element_count_children(&self) -> usize {
591        self.child_count()
592    }
593
594    /// Deprecated short alias — use [`struct_element_count_children()`](StructElement::struct_element_count_children)
595    /// or [`child_count()`](StructElement::child_count) instead.
596    #[deprecated(
597        since = "0.1.0",
598        note = "use `struct_element_count_children()` — matches upstream `FPDF_StructElement_CountChildren`"
599    )]
600    #[inline]
601    pub fn count_children(&self) -> usize {
602        self.child_count()
603    }
604
605    /// Returns the child element at a given zero-based index, or `None` if the index
606    /// is out of range.
607    ///
608    /// Corresponds to upstream `FPDF_StructElement_GetChildAtIndex`.
609    pub fn child_at_index(&self, index: usize) -> Option<&StructElement> {
610        self.children.get(index)
611    }
612
613    /// ADR-019 Tier 2 alias for [`child_at_index()`](StructElement::child_at_index).
614    ///
615    /// Corresponds to upstream `FPDF_StructElement_GetChildAtIndex`.
616    #[inline]
617    pub fn struct_element_get_child_at_index(&self, index: usize) -> Option<&StructElement> {
618        self.child_at_index(index)
619    }
620
621    /// Deprecated short alias — use [`struct_element_get_child_at_index()`](StructElement::struct_element_get_child_at_index)
622    /// or [`child_at_index()`](StructElement::child_at_index) instead.
623    #[deprecated(
624        since = "0.1.0",
625        note = "use `struct_element_get_child_at_index()` — matches upstream `FPDF_StructElement_GetChildAtIndex`"
626    )]
627    #[inline]
628    pub fn get_child_at_index(&self, index: usize) -> Option<&StructElement> {
629        self.child_at_index(index)
630    }
631
632    /// Returns the marked content ID of a child element at a given zero-based index,
633    /// or `-1` if the child does not exist or has no MCID.
634    ///
635    /// Corresponds to upstream `FPDF_StructElement_GetChildMarkedContentID`.
636    pub fn child_marked_content_id(&self, index: usize) -> i64 {
637        self.children
638            .get(index)
639            .and_then(|c| c.mcids.first().copied())
640            .unwrap_or(-1)
641    }
642
643    /// ADR-019 Tier 2 alias for
644    /// [`child_marked_content_id()`](StructElement::child_marked_content_id).
645    ///
646    /// Corresponds to upstream `FPDF_StructElement_GetChildMarkedContentID`.
647    #[inline]
648    pub fn struct_element_get_child_marked_content_id(&self, index: usize) -> i64 {
649        self.child_marked_content_id(index)
650    }
651
652    /// Deprecated short alias — use [`struct_element_get_child_marked_content_id()`](StructElement::struct_element_get_child_marked_content_id)
653    /// or [`child_marked_content_id()`](StructElement::child_marked_content_id) instead.
654    #[deprecated(
655        since = "0.1.0",
656        note = "use `struct_element_get_child_marked_content_id()` — matches upstream `FPDF_StructElement_GetChildMarkedContentID`"
657    )]
658    #[inline]
659    pub fn get_child_marked_content_id(&self, index: usize) -> i64 {
660        self.child_marked_content_id(index)
661    }
662
663    // --- Attributes ---
664
665    /// Returns the number of attribute dictionaries associated with this element.
666    ///
667    /// Corresponds to upstream `FPDF_StructElement_GetAttributeCount`.
668    pub fn attribute_count(&self) -> usize {
669        self.attributes.len()
670    }
671
672    /// ADR-019 Tier 2 alias for [`attribute_count()`](StructElement::attribute_count).
673    ///
674    /// Corresponds to upstream `FPDF_StructElement_GetAttributeCount`.
675    #[inline]
676    pub fn struct_element_get_attribute_count(&self) -> usize {
677        self.attribute_count()
678    }
679
680    /// Deprecated short alias — use [`struct_element_get_attribute_count()`](StructElement::struct_element_get_attribute_count)
681    /// or [`attribute_count()`](StructElement::attribute_count) instead.
682    #[deprecated(
683        since = "0.1.0",
684        note = "use `struct_element_get_attribute_count()` — matches upstream `FPDF_StructElement_GetAttributeCount`"
685    )]
686    #[inline]
687    pub fn get_attribute_count(&self) -> usize {
688        self.attribute_count()
689    }
690
691    /// Returns the attribute dictionary at a given zero-based index, or `None`.
692    ///
693    /// Corresponds to upstream `FPDF_StructElement_GetAttributeAtIndex`.
694    pub fn attribute_at_index(&self, index: usize) -> Option<&StructAttribute> {
695        self.attributes.get(index)
696    }
697
698    /// ADR-019 Tier 2 alias for
699    /// [`attribute_at_index()`](StructElement::attribute_at_index).
700    ///
701    /// Corresponds to upstream `FPDF_StructElement_GetAttributeAtIndex`.
702    #[inline]
703    pub fn struct_element_get_attribute_at_index(&self, index: usize) -> Option<&StructAttribute> {
704        self.attribute_at_index(index)
705    }
706
707    /// Deprecated short alias — use [`struct_element_get_attribute_at_index()`](StructElement::struct_element_get_attribute_at_index)
708    /// or [`attribute_at_index()`](StructElement::attribute_at_index) instead.
709    #[deprecated(
710        since = "0.1.0",
711        note = "use `struct_element_get_attribute_at_index()` — matches upstream `FPDF_StructElement_GetAttributeAtIndex`"
712    )]
713    #[inline]
714    pub fn get_attribute_at_index(&self, index: usize) -> Option<&StructAttribute> {
715        self.attribute_at_index(index)
716    }
717
718    /// Returns the page object ID reference, if present.
719    ///
720    /// Corresponds to the `/Pg` entry (used internally by structure tree page filtering).
721    pub fn page_ref(&self) -> Option<ObjectId> {
722        self.page_ref
723    }
724
725    // --- Parent back-reference ---
726
727    /// Returns the zero-based index of this element within its parent's `children` list,
728    /// or `None` if this element is a root element (a direct child of `StructTree`).
729    ///
730    /// Use this together with the parent `StructElement` to navigate upward in the tree.
731    /// Root elements return `None` — they have no parent.
732    ///
733    /// Corresponds to upstream `FPDF_StructElement_GetParent`.
734    pub fn parent_index(&self) -> Option<usize> {
735        self.parent_index
736    }
737
738    /// ADR-019 Tier 2 alias for [`parent_index()`](StructElement::parent_index).
739    ///
740    /// Returns the zero-based index of this element within its parent's `children`
741    /// list, or `None` for root elements.
742    ///
743    /// Note: the rpdfium tree model stores elements by value, so this returns an
744    /// index rather than a handle.  Use this index together with the parent node
745    /// to navigate upward in the tree.
746    ///
747    /// Corresponds to upstream `FPDF_StructElement_GetParent`.
748    #[inline]
749    pub fn struct_element_get_parent(&self) -> Option<usize> {
750        self.parent_index()
751    }
752
753    /// Deprecated short alias — use [`struct_element_get_parent()`](StructElement::struct_element_get_parent)
754    /// or [`parent_index()`](StructElement::parent_index) instead.
755    #[deprecated(
756        since = "0.1.0",
757        note = "use `struct_element_get_parent()` — matches upstream `FPDF_StructElement_GetParent`"
758    )]
759    #[inline]
760    pub fn get_parent(&self) -> Option<usize> {
761        self.parent_index()
762    }
763
764    /// Deprecated: use [`struct_element_get_parent()`](StructElement::struct_element_get_parent) (upstream alias) or
765    /// [`parent_index()`](StructElement::parent_index) (primary) instead.
766    #[deprecated(
767        since = "0.1.0",
768        note = "use struct_element_get_parent() or parent_index() instead"
769    )]
770    #[inline]
771    pub fn get_parent_index(&self) -> Option<usize> {
772        self.parent_index()
773    }
774}
775
776// ---------------------------------------------------------------------------
777// StructAttribute methods — FPDF_StructElement_Attr_* equivalents
778// ---------------------------------------------------------------------------
779
780impl StructAttribute {
781    /// Returns the attribute owner name (from `/O` key, e.g. `"Layout"`, `"Table"`).
782    pub fn owner(&self) -> &str {
783        &self.owner
784    }
785
786    /// Returns the number of key-value entries in this attribute dictionary.
787    ///
788    /// Corresponds to upstream `FPDF_StructElement_Attr_GetCount`.
789    pub fn entry_count(&self) -> usize {
790        self.entries.len()
791    }
792
793    /// ADR-019 Tier 2 alias for [`entry_count()`](StructAttribute::entry_count).
794    ///
795    /// Corresponds to upstream `FPDF_StructElement_Attr_GetCount`.
796    #[inline]
797    pub fn struct_element_attr_get_count(&self) -> usize {
798        self.entry_count()
799    }
800
801    /// Deprecated short alias — use [`struct_element_attr_get_count()`](StructAttribute::struct_element_attr_get_count)
802    /// or [`entry_count()`](StructAttribute::entry_count) instead.
803    #[deprecated(
804        since = "0.1.0",
805        note = "use `struct_element_attr_get_count()` — matches upstream `FPDF_StructElement_Attr_GetCount`"
806    )]
807    #[inline]
808    pub fn get_count(&self) -> usize {
809        self.entry_count()
810    }
811
812    /// Returns the name (key) of the entry at a given zero-based index, or `None`.
813    ///
814    /// Corresponds to upstream `FPDF_StructElement_Attr_GetName`.
815    pub fn entry_name_at_index(&self, index: usize) -> Option<&str> {
816        self.entries.get(index).map(|(k, _)| k.as_str())
817    }
818
819    /// ADR-019 Tier 2 alias for
820    /// [`entry_name_at_index()`](StructAttribute::entry_name_at_index).
821    ///
822    /// Corresponds to upstream `FPDF_StructElement_Attr_GetName`.
823    #[inline]
824    pub fn struct_element_attr_get_name(&self, index: usize) -> Option<&str> {
825        self.entry_name_at_index(index)
826    }
827
828    /// Deprecated short alias — use [`struct_element_attr_get_name()`](StructAttribute::struct_element_attr_get_name)
829    /// or [`entry_name_at_index()`](StructAttribute::entry_name_at_index) instead.
830    #[deprecated(
831        since = "0.1.0",
832        note = "use `struct_element_attr_get_name()` — matches upstream `FPDF_StructElement_Attr_GetName`"
833    )]
834    #[inline]
835    pub fn get_name(&self, index: usize) -> Option<&str> {
836        self.entry_name_at_index(index)
837    }
838
839    /// Returns a reference to the value for the entry with the given key name, or `None`.
840    ///
841    /// Corresponds to upstream `FPDF_StructElement_Attr_GetValue`.
842    pub fn value_for_key(&self, name: &str) -> Option<&AttributeValue> {
843        self.entries.iter().find(|(k, _)| k == name).map(|(_, v)| v)
844    }
845
846    /// ADR-019 Tier 2 alias for
847    /// [`value_for_key()`](StructAttribute::value_for_key).
848    ///
849    /// Corresponds to upstream `FPDF_StructElement_Attr_GetValue`.
850    #[inline]
851    pub fn struct_element_attr_get_value(&self, name: &str) -> Option<&AttributeValue> {
852        self.value_for_key(name)
853    }
854
855    /// Deprecated short alias — use [`struct_element_attr_get_value()`](StructAttribute::struct_element_attr_get_value)
856    /// or [`value_for_key()`](StructAttribute::value_for_key) instead.
857    #[deprecated(
858        since = "0.1.0",
859        note = "use `struct_element_attr_get_value()` — matches upstream `FPDF_StructElement_Attr_GetValue`"
860    )]
861    #[inline]
862    pub fn get_value(&self, name: &str) -> Option<&AttributeValue> {
863        self.value_for_key(name)
864    }
865
866    /// Returns a reference to the value at the given zero-based index, or `None`.
867    pub fn value_at_index(&self, index: usize) -> Option<&AttributeValue> {
868        self.entries.get(index).map(|(_, v)| v)
869    }
870}
871
872// ---------------------------------------------------------------------------
873// AttributeValue methods — FPDF_StructElement_Attr_Get*Value equivalents
874// ---------------------------------------------------------------------------
875
876impl AttributeValue {
877    /// Returns the numeric value if this is a `Number` variant, else `None`.
878    ///
879    /// Corresponds to upstream `FPDF_StructElement_Attr_GetNumberValue`.
880    pub fn as_number(&self) -> Option<f64> {
881        match self {
882            AttributeValue::Number(n) => Some(*n),
883            _ => None,
884        }
885    }
886
887    /// ADR-019 Tier 2 alias for [`as_number()`](AttributeValue::as_number).
888    ///
889    /// Corresponds to upstream `FPDF_StructElement_Attr_GetNumberValue`.
890    #[inline]
891    pub fn struct_element_attr_get_number_value(&self) -> Option<f64> {
892        self.as_number()
893    }
894
895    /// Deprecated short alias — use [`struct_element_attr_get_number_value()`](AttributeValue::struct_element_attr_get_number_value)
896    /// or [`as_number()`](AttributeValue::as_number) instead.
897    #[deprecated(
898        since = "0.1.0",
899        note = "use `struct_element_attr_get_number_value()` — matches upstream `FPDF_StructElement_Attr_GetNumberValue`"
900    )]
901    #[inline]
902    pub fn get_number_value(&self) -> Option<f64> {
903        self.as_number()
904    }
905
906    /// Returns the string value if this is a `Text` variant, else `None`.
907    ///
908    /// Corresponds to upstream `FPDF_StructElement_Attr_GetStringValue`.
909    pub fn as_text(&self) -> Option<&str> {
910        match self {
911            AttributeValue::Text(s) => Some(s.as_str()),
912            _ => None,
913        }
914    }
915
916    /// ADR-019 Tier 2 alias for [`as_text()`](AttributeValue::as_text).
917    ///
918    /// Corresponds to upstream `FPDF_StructElement_Attr_GetStringValue`.
919    #[inline]
920    pub fn struct_element_attr_get_string_value(&self) -> Option<&str> {
921        self.as_text()
922    }
923
924    /// Deprecated short alias — use [`struct_element_attr_get_string_value()`](AttributeValue::struct_element_attr_get_string_value)
925    /// or [`as_text()`](AttributeValue::as_text) instead.
926    #[deprecated(
927        since = "0.1.0",
928        note = "use `struct_element_attr_get_string_value()` — matches upstream `FPDF_StructElement_Attr_GetStringValue`"
929    )]
930    #[inline]
931    pub fn get_string_value(&self) -> Option<&str> {
932        self.as_text()
933    }
934
935    /// Returns the name string if this is a `Name` variant, else `None`.
936    ///
937    /// Corresponds to upstream `FPDF_StructElement_Attr_GetType` (distinguishes name from string).
938    pub fn as_name_str(&self) -> Option<&str> {
939        match self {
940            AttributeValue::Name(n) => Some(n.as_str()),
941            _ => None,
942        }
943    }
944
945    /// Non-upstream alias — use [`as_name_str()`](AttributeValue::as_name_str).
946    #[deprecated(
947        note = "use `as_name_str()` — no public `FPDF_StructElement_Attr_GetNameValue` API"
948    )]
949    #[inline]
950    pub fn get_name_value(&self) -> Option<&str> {
951        self.as_name_str()
952    }
953
954    /// Returns the array of numbers if this is an `Array` variant, else `None`.
955    ///
956    /// Corresponds to upstream `FPDF_StructElement_Attr_CountChildren` /
957    /// `FPDF_StructElement_Attr_GetChildAtIndex` for array-valued attributes.
958    pub fn as_array(&self) -> Option<&[f64]> {
959        match self {
960            AttributeValue::Array(arr) => Some(arr.as_slice()),
961            _ => None,
962        }
963    }
964
965    /// Non-upstream alias — use [`as_array()`](AttributeValue::as_array).
966    #[deprecated(note = "use `as_array()` — no public `FPDF_StructElement_Attr_GetArrayValue` API")]
967    #[inline]
968    pub fn get_array_value(&self) -> Option<&[f64]> {
969        self.as_array()
970    }
971
972    /// Returns the number of children (array elements) for an `Array` value, or `-1`.
973    ///
974    /// Corresponds to upstream `FPDF_StructElement_Attr_CountChildren`.
975    pub fn child_count(&self) -> i64 {
976        match self {
977            AttributeValue::Array(arr) => arr.len() as i64,
978            _ => -1,
979        }
980    }
981
982    /// ADR-019 Tier 2 alias for [`child_count()`](AttributeValue::child_count).
983    ///
984    /// Corresponds to upstream `FPDF_StructElement_Attr_CountChildren`.
985    #[inline]
986    pub fn struct_element_attr_count_children(&self) -> i64 {
987        self.child_count()
988    }
989
990    /// Deprecated short alias — use [`struct_element_attr_count_children()`](AttributeValue::struct_element_attr_count_children)
991    /// or [`child_count()`](AttributeValue::child_count) instead.
992    #[deprecated(
993        since = "0.1.0",
994        note = "use `struct_element_attr_count_children()` — matches upstream `FPDF_StructElement_Attr_CountChildren`"
995    )]
996    #[inline]
997    pub fn count_children(&self) -> i64 {
998        self.child_count()
999    }
1000
1001    /// Returns the child value at the given zero-based index for an `Array` value.
1002    ///
1003    /// Returns `None` if the value is not an `Array` or the index is out of range.
1004    /// Corresponds to upstream `FPDF_StructElement_Attr_GetChildAtIndex`.
1005    pub fn child_at_index(&self, index: usize) -> Option<f64> {
1006        match self {
1007            AttributeValue::Array(arr) => arr.get(index).copied(),
1008            _ => None,
1009        }
1010    }
1011
1012    /// ADR-019 Tier 2 alias for [`child_at_index()`](AttributeValue::child_at_index).
1013    ///
1014    /// Corresponds to upstream `FPDF_StructElement_Attr_GetChildAtIndex`.
1015    #[inline]
1016    pub fn struct_element_attr_get_child_at_index(&self, index: usize) -> Option<f64> {
1017        self.child_at_index(index)
1018    }
1019
1020    /// Deprecated short alias — use [`struct_element_attr_get_child_at_index()`](AttributeValue::struct_element_attr_get_child_at_index)
1021    /// or [`child_at_index()`](AttributeValue::child_at_index) instead.
1022    #[deprecated(
1023        since = "0.1.0",
1024        note = "use `struct_element_attr_get_child_at_index()` — matches upstream `FPDF_StructElement_Attr_GetChildAtIndex`"
1025    )]
1026    #[inline]
1027    pub fn get_child_at_index(&self, index: usize) -> Option<f64> {
1028        self.child_at_index(index)
1029    }
1030
1031    /// Returns a string description of the type of this value.
1032    ///
1033    /// - `"Number"` for numeric values (PDF real/integer)
1034    /// - `"String"` for text string values
1035    /// - `"Name"` for PDF name values
1036    /// - `"Array"` for number arrays
1037    ///
1038    /// Corresponds to upstream `FPDF_StructElement_Attr_GetType`.
1039    pub fn type_name(&self) -> &'static str {
1040        match self {
1041            AttributeValue::Number(_) => "Number",
1042            AttributeValue::Text(_) => "String",
1043            AttributeValue::Name(_) => "Name",
1044            AttributeValue::Array(_) => "Array",
1045        }
1046    }
1047
1048    /// ADR-019 Tier 2 alias for [`type_name()`](AttributeValue::type_name).
1049    ///
1050    /// Corresponds to upstream `FPDF_StructElement_Attr_GetType`.
1051    #[inline]
1052    pub fn struct_element_attr_get_type(&self) -> &'static str {
1053        self.type_name()
1054    }
1055
1056    /// Deprecated short alias — use [`struct_element_attr_get_type()`](AttributeValue::struct_element_attr_get_type)
1057    /// or [`type_name()`](AttributeValue::type_name) instead.
1058    #[deprecated(
1059        since = "0.1.0",
1060        note = "use `struct_element_attr_get_type()` — matches upstream `FPDF_StructElement_Attr_GetType`"
1061    )]
1062    #[inline]
1063    pub fn get_type(&self) -> &'static str {
1064        self.type_name()
1065    }
1066
1067    /// Returns `true` if this is a `Name` or `Text` variant (string-like types),
1068    /// `false` otherwise.
1069    ///
1070    /// Corresponds to upstream `FPDF_StructElement_Attr_GetBlobValue` — blob values
1071    /// in upstream are raw string bytes; in rpdfium strings are always decoded to UTF-8.
1072    pub fn is_string_like(&self) -> bool {
1073        matches!(self, AttributeValue::Text(_) | AttributeValue::Name(_))
1074    }
1075
1076    /// Returns a boolean interpretation of this attribute value, or `None`
1077    /// if the value cannot be interpreted as a boolean.
1078    ///
1079    /// - `Name("true")` / `Text("true")` → `Some(true)` (case-insensitive)
1080    /// - `Name("false")` / `Text("false")` → `Some(false)` (case-insensitive)
1081    /// - `Number(0.0)` → `Some(false)`
1082    /// - `Number(non-zero)` → `Some(true)`
1083    /// - `Array(_)` → `None`
1084    ///
1085    /// Corresponds to upstream `FPDF_StructElement_Attr_GetBooleanValue`.
1086    pub fn as_bool(&self) -> Option<bool> {
1087        match self {
1088            AttributeValue::Name(n) | AttributeValue::Text(n) => {
1089                if n.eq_ignore_ascii_case("true") {
1090                    Some(true)
1091                } else if n.eq_ignore_ascii_case("false") {
1092                    Some(false)
1093                } else {
1094                    None
1095                }
1096            }
1097            AttributeValue::Number(n) => Some(*n != 0.0),
1098            AttributeValue::Array(_) => None,
1099        }
1100    }
1101
1102    /// ADR-019 Tier 2 alias for [`as_bool()`](AttributeValue::as_bool).
1103    ///
1104    /// Corresponds to upstream `FPDF_StructElement_Attr_GetBooleanValue`.
1105    #[inline]
1106    pub fn struct_element_attr_get_boolean_value(&self) -> Option<bool> {
1107        self.as_bool()
1108    }
1109
1110    /// Deprecated short alias — use [`struct_element_attr_get_boolean_value()`](AttributeValue::struct_element_attr_get_boolean_value)
1111    /// or [`as_bool()`](AttributeValue::as_bool) instead.
1112    #[deprecated(
1113        since = "0.1.0",
1114        note = "use `struct_element_attr_get_boolean_value()` — matches upstream `FPDF_StructElement_Attr_GetBooleanValue`"
1115    )]
1116    #[inline]
1117    pub fn get_boolean_value(&self) -> Option<bool> {
1118        self.as_bool()
1119    }
1120
1121    /// Returns the raw bytes of the string value for blob-compatible types.
1122    ///
1123    /// Returns `Some(bytes)` for `Text` and `Name` variants (as UTF-8 encoded bytes),
1124    /// `None` otherwise. Corresponds to upstream `FPDF_StructElement_Attr_GetBlobValue`.
1125    pub fn as_blob(&self) -> Option<&[u8]> {
1126        match self {
1127            AttributeValue::Text(s) => Some(s.as_bytes()),
1128            AttributeValue::Name(n) => Some(n.as_bytes()),
1129            _ => None,
1130        }
1131    }
1132
1133    /// ADR-019 Tier 2 alias for [`as_blob()`](AttributeValue::as_blob).
1134    ///
1135    /// Corresponds to upstream `FPDF_StructElement_Attr_GetBlobValue`.
1136    #[inline]
1137    pub fn struct_element_attr_get_blob_value(&self) -> Option<&[u8]> {
1138        self.as_blob()
1139    }
1140
1141    /// Deprecated short alias — use [`struct_element_attr_get_blob_value()`](AttributeValue::struct_element_attr_get_blob_value)
1142    /// or [`as_blob()`](AttributeValue::as_blob) instead.
1143    #[deprecated(
1144        since = "0.1.0",
1145        note = "use `struct_element_attr_get_blob_value()` — matches upstream `FPDF_StructElement_Attr_GetBlobValue`"
1146    )]
1147    #[inline]
1148    pub fn get_blob_value(&self) -> Option<&[u8]> {
1149        self.as_blob()
1150    }
1151}
1152
1153#[cfg(test)]
1154mod tests {
1155    use super::*;
1156
1157    fn build_store() -> ObjectStore<Vec<u8>> {
1158        let pdf = build_minimal_pdf();
1159        ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
1160    }
1161
1162    fn build_minimal_pdf() -> Vec<u8> {
1163        let mut pdf = Vec::new();
1164        pdf.extend_from_slice(b"%PDF-1.4\n");
1165        let obj1_offset = pdf.len();
1166        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1167        let obj2_offset = pdf.len();
1168        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
1169        let xref_offset = pdf.len();
1170        pdf.extend_from_slice(b"xref\n0 3\n");
1171        pdf.extend_from_slice(b"0000000000 65535 f \r\n");
1172        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
1173        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
1174        pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
1175        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
1176        pdf
1177    }
1178
1179    fn name_obj(s: &str) -> Object {
1180        Object::Name(Name::from(s))
1181    }
1182
1183    fn struct_elem_dict(tag: &str) -> HashMap<Name, Object> {
1184        let mut d = HashMap::new();
1185        d.insert(Name::s(), name_obj(tag));
1186        d
1187    }
1188
1189    #[test]
1190    fn test_parse_struct_element_basic() {
1191        let store = build_store();
1192        let dict = struct_elem_dict("P");
1193        let elem = parse_struct_element(&dict, &store);
1194        assert_eq!(elem.struct_type, "P");
1195        assert!(elem.obj_type.is_none());
1196        assert!(elem.alt_text.is_none());
1197        assert!(elem.attributes.is_empty());
1198        assert!(elem.mcids.is_empty());
1199        assert!(elem.children.is_empty());
1200    }
1201
1202    #[test]
1203    fn test_parse_struct_element_obj_type() {
1204        let store = build_store();
1205        let mut dict = struct_elem_dict("Span");
1206        dict.insert(Name::obj_type(), name_obj("Elem"));
1207        let elem = parse_struct_element(&dict, &store);
1208        assert_eq!(elem.obj_type.as_deref(), Some("Elem"));
1209    }
1210
1211    #[test]
1212    fn test_parse_struct_element_obj_type_none() {
1213        let store = build_store();
1214        let dict = struct_elem_dict("P");
1215        let elem = parse_struct_element(&dict, &store);
1216        assert!(elem.obj_type.is_none());
1217    }
1218
1219    #[test]
1220    fn test_parse_attributes_single_dict() {
1221        let store = build_store();
1222        let mut attr_dict = HashMap::new();
1223        attr_dict.insert(Name::o(), name_obj("Layout"));
1224        attr_dict.insert(Name::from("WritingMode"), name_obj("LrTb"));
1225        attr_dict.insert(Name::from("SpaceBefore"), Object::Real(12.0));
1226
1227        let mut elem_dict = struct_elem_dict("TD");
1228        elem_dict.insert(Name::a(), Object::Dictionary(attr_dict));
1229
1230        let attrs = parse_attributes(&elem_dict, &store);
1231        assert_eq!(attrs.len(), 1);
1232        assert_eq!(attrs[0].owner, "Layout");
1233        assert!(attrs[0].entries.len() >= 2);
1234
1235        let wm = attrs[0].entries.iter().find(|(k, _)| k == "WritingMode");
1236        assert!(wm.is_some());
1237        match &wm.unwrap().1 {
1238            AttributeValue::Name(n) => assert_eq!(n, "LrTb"),
1239            _ => panic!("expected Name"),
1240        }
1241    }
1242
1243    #[test]
1244    fn test_parse_attributes_array() {
1245        let store = build_store();
1246        let mut attr1 = HashMap::new();
1247        attr1.insert(Name::o(), name_obj("Layout"));
1248        attr1.insert(Name::from("TextAlign"), name_obj("Center"));
1249
1250        let mut attr2 = HashMap::new();
1251        attr2.insert(Name::o(), name_obj("Table"));
1252        attr2.insert(Name::from("RowSpan"), Object::Integer(2));
1253
1254        let mut elem_dict = struct_elem_dict("TD");
1255        elem_dict.insert(
1256            Name::a(),
1257            Object::Array(vec![Object::Dictionary(attr1), Object::Dictionary(attr2)]),
1258        );
1259
1260        let attrs = parse_attributes(&elem_dict, &store);
1261        assert_eq!(attrs.len(), 2);
1262        assert_eq!(attrs[0].owner, "Layout");
1263        assert_eq!(attrs[1].owner, "Table");
1264    }
1265
1266    #[test]
1267    fn test_parse_attributes_none() {
1268        let store = build_store();
1269        let dict = struct_elem_dict("P");
1270        let attrs = parse_attributes(&dict, &store);
1271        assert!(attrs.is_empty());
1272    }
1273
1274    #[test]
1275    fn test_convert_attribute_value_number() {
1276        let obj = Object::Real(3.14);
1277        match convert_attribute_value(&obj) {
1278            Some(AttributeValue::Number(n)) => assert!((n - 3.14).abs() < 0.001),
1279            _ => panic!("expected Number"),
1280        }
1281    }
1282
1283    #[test]
1284    fn test_convert_attribute_value_name() {
1285        let obj = name_obj("LrTb");
1286        match convert_attribute_value(&obj) {
1287            Some(AttributeValue::Name(n)) => assert_eq!(n, "LrTb"),
1288            _ => panic!("expected Name"),
1289        }
1290    }
1291
1292    #[test]
1293    fn test_convert_attribute_value_array() {
1294        let obj = Object::Array(vec![
1295            Object::Real(1.0),
1296            Object::Real(2.0),
1297            Object::Real(3.0),
1298        ]);
1299        match convert_attribute_value(&obj) {
1300            Some(AttributeValue::Array(arr)) => {
1301                assert_eq!(arr.len(), 3);
1302                assert!((arr[0] - 1.0).abs() < 0.001);
1303            }
1304            _ => panic!("expected Array"),
1305        }
1306    }
1307
1308    // --- StructElement method tests ---
1309
1310    fn make_full_element() -> StructElement {
1311        StructElement {
1312            struct_type: "Figure".to_string(),
1313            obj_type: Some("Elem".to_string()),
1314            alt_text: Some("A cat photo".to_string()),
1315            actual_text: Some("Cat".to_string()),
1316            title: Some("My Figure".to_string()),
1317            id: Some("fig-001".to_string()),
1318            lang: Some("en-US".to_string()),
1319            page_ref: Some(ObjectId::new(5, 0)),
1320            mcids: vec![10, 20, 30],
1321            parent_index: None,
1322            children: vec![StructElement {
1323                struct_type: "Span".to_string(),
1324                obj_type: None,
1325                alt_text: None,
1326                actual_text: None,
1327                title: None,
1328                id: None,
1329                lang: None,
1330                page_ref: None,
1331                mcids: vec![42],
1332                children: Vec::new(),
1333                attributes: Vec::new(),
1334                parent_index: None,
1335            }],
1336            attributes: vec![StructAttribute {
1337                owner: "Layout".to_string(),
1338                entries: vec![
1339                    (
1340                        "WritingMode".to_string(),
1341                        AttributeValue::Name("LrTb".to_string()),
1342                    ),
1343                    ("SpaceBefore".to_string(), AttributeValue::Number(12.0)),
1344                    (
1345                        "TextLabel".to_string(),
1346                        AttributeValue::Text("hello".to_string()),
1347                    ),
1348                    (
1349                        "Padding".to_string(),
1350                        AttributeValue::Array(vec![1.0, 2.0, 3.0, 4.0]),
1351                    ),
1352                ],
1353            }],
1354        }
1355    }
1356
1357    #[test]
1358    fn test_struct_element_struct_type_getter() {
1359        let elem = make_full_element();
1360        assert_eq!(elem.struct_type(), "Figure");
1361        assert_eq!(elem.struct_element_get_type(), "Figure");
1362    }
1363
1364    #[test]
1365    fn test_struct_element_obj_type_getter() {
1366        let elem = make_full_element();
1367        assert_eq!(elem.obj_type(), Some("Elem"));
1368        assert_eq!(elem.struct_element_get_obj_type(), Some("Elem"));
1369
1370        let empty = StructElement {
1371            struct_type: "P".to_string(),
1372            obj_type: None,
1373            alt_text: None,
1374            actual_text: None,
1375            title: None,
1376            id: None,
1377            lang: None,
1378            page_ref: None,
1379            mcids: Vec::new(),
1380            children: Vec::new(),
1381            attributes: Vec::new(),
1382            parent_index: None,
1383        };
1384        assert!(empty.obj_type().is_none());
1385    }
1386
1387    #[test]
1388    fn test_struct_element_alt_text_getter() {
1389        let elem = make_full_element();
1390        assert_eq!(elem.alt_text(), Some("A cat photo"));
1391        assert_eq!(elem.struct_element_get_alt_text(), Some("A cat photo"));
1392    }
1393
1394    #[test]
1395    fn test_struct_element_actual_text_getter() {
1396        let elem = make_full_element();
1397        assert_eq!(elem.actual_text(), Some("Cat"));
1398        assert_eq!(elem.struct_element_get_actual_text(), Some("Cat"));
1399    }
1400
1401    #[test]
1402    fn test_struct_element_title_getter() {
1403        let elem = make_full_element();
1404        assert_eq!(elem.title(), Some("My Figure"));
1405        assert_eq!(elem.struct_element_get_title(), Some("My Figure"));
1406    }
1407
1408    #[test]
1409    fn test_struct_element_id_getter() {
1410        let elem = make_full_element();
1411        assert_eq!(elem.id(), Some("fig-001"));
1412        assert_eq!(elem.struct_element_get_id(), Some("fig-001"));
1413    }
1414
1415    #[test]
1416    fn test_struct_element_lang_getter() {
1417        let elem = make_full_element();
1418        assert_eq!(elem.lang(), Some("en-US"));
1419        assert_eq!(elem.struct_element_get_lang(), Some("en-US"));
1420    }
1421
1422    #[test]
1423    fn test_struct_element_string_attribute_found() {
1424        let elem = make_full_element();
1425        // Name value
1426        assert_eq!(elem.string_attribute("WritingMode"), Some("LrTb"));
1427        assert_eq!(
1428            elem.struct_element_get_string_attribute("WritingMode"),
1429            Some("LrTb")
1430        );
1431        // Text value
1432        assert_eq!(elem.string_attribute("TextLabel"), Some("hello"));
1433    }
1434
1435    #[test]
1436    fn test_struct_element_string_attribute_not_found() {
1437        let elem = make_full_element();
1438        assert!(elem.string_attribute("NoSuchAttr").is_none());
1439        // Number attribute — not string-like
1440        assert!(elem.string_attribute("SpaceBefore").is_none());
1441    }
1442
1443    #[test]
1444    fn test_struct_element_marked_content_id() {
1445        let elem = make_full_element();
1446        assert_eq!(elem.marked_content_id(), 10);
1447        assert_eq!(elem.struct_element_get_marked_content_id(), 10);
1448
1449        // Empty mcids → -1
1450        let mut empty = make_full_element();
1451        empty.mcids.clear();
1452        assert_eq!(empty.marked_content_id(), -1);
1453    }
1454
1455    #[test]
1456    fn test_struct_element_mcid_count() {
1457        let elem = make_full_element();
1458        assert_eq!(elem.struct_element_get_marked_content_id_count(), 3);
1459        assert_eq!(elem.struct_element_get_marked_content_id_count(), 3);
1460
1461        let mut empty = make_full_element();
1462        empty.mcids.clear();
1463        assert_eq!(empty.struct_element_get_marked_content_id_count(), -1);
1464    }
1465
1466    #[test]
1467    fn test_struct_element_mcid_at_index() {
1468        let elem = make_full_element();
1469        assert_eq!(elem.struct_element_get_marked_content_id_at_index(0), 10);
1470        assert_eq!(elem.struct_element_get_marked_content_id_at_index(1), 20);
1471        assert_eq!(elem.struct_element_get_marked_content_id_at_index(2), 30);
1472        assert_eq!(elem.struct_element_get_marked_content_id_at_index(99), -1);
1473        assert_eq!(elem.struct_element_get_marked_content_id_at_index(0), 10);
1474    }
1475
1476    #[test]
1477    fn test_struct_element_child_count() {
1478        let elem = make_full_element();
1479        assert_eq!(elem.child_count(), 1);
1480        assert_eq!(elem.struct_element_count_children(), 1);
1481    }
1482
1483    #[test]
1484    fn test_struct_element_child_at_index() {
1485        let elem = make_full_element();
1486        let child = elem.child_at_index(0).unwrap();
1487        assert_eq!(child.struct_type(), "Span");
1488        assert!(elem.child_at_index(99).is_none());
1489        assert_eq!(
1490            elem.struct_element_get_child_at_index(0)
1491                .unwrap()
1492                .struct_type(),
1493            "Span"
1494        );
1495    }
1496
1497    #[test]
1498    fn test_struct_element_child_marked_content_id() {
1499        let elem = make_full_element();
1500        assert_eq!(elem.child_marked_content_id(0), 42);
1501        assert_eq!(elem.child_marked_content_id(99), -1);
1502        assert_eq!(elem.struct_element_get_child_marked_content_id(0), 42);
1503    }
1504
1505    #[test]
1506    fn test_struct_element_attribute_count() {
1507        let elem = make_full_element();
1508        assert_eq!(elem.attribute_count(), 1);
1509        assert_eq!(elem.struct_element_get_attribute_count(), 1);
1510    }
1511
1512    #[test]
1513    fn test_struct_element_attribute_at_index() {
1514        let elem = make_full_element();
1515        let attr = elem.attribute_at_index(0).unwrap();
1516        assert_eq!(attr.owner(), "Layout");
1517        assert!(elem.attribute_at_index(99).is_none());
1518        assert_eq!(
1519            elem.struct_element_get_attribute_at_index(0)
1520                .unwrap()
1521                .owner(),
1522            "Layout"
1523        );
1524    }
1525
1526    #[test]
1527    fn test_struct_element_page_ref() {
1528        let elem = make_full_element();
1529        assert_eq!(elem.page_ref(), Some(ObjectId::new(5, 0)));
1530    }
1531
1532    // --- parent_index tests (FPDF_StructElement_GetParent) ---
1533
1534    /// Root elements (those without a parent) always return `None`.
1535    ///
1536    /// Corresponds to upstream `FPDF_StructElement_GetParent` returning `NULL`
1537    /// for root-level elements.
1538    #[test]
1539    fn test_struct_element_parent_index_root_is_none() {
1540        // make_full_element() is a root element: parent_index is explicitly None.
1541        let elem = make_full_element();
1542        assert!(elem.parent_index().is_none());
1543        assert!(elem.struct_element_get_parent().is_none());
1544    }
1545
1546    /// A child element that is manually placed in a parent's `children` vec
1547    /// with `parent_index: Some(idx)` reports the correct index.
1548    ///
1549    /// Corresponds to upstream `FPDF_StructElement_GetParent` returning the
1550    /// parent handle for non-root elements.
1551    #[test]
1552    fn test_struct_element_parent_index_child_has_parent() {
1553        // Build a parent with two children; the second child is at index 1.
1554        let child0 = StructElement {
1555            struct_type: "Span".to_string(),
1556            obj_type: None,
1557            alt_text: None,
1558            actual_text: None,
1559            title: None,
1560            id: None,
1561            lang: None,
1562            page_ref: None,
1563            mcids: Vec::new(),
1564            children: Vec::new(),
1565            attributes: Vec::new(),
1566            parent_index: Some(0),
1567        };
1568        let child1 = StructElement {
1569            struct_type: "Link".to_string(),
1570            obj_type: None,
1571            alt_text: None,
1572            actual_text: None,
1573            title: None,
1574            id: None,
1575            lang: None,
1576            page_ref: None,
1577            mcids: Vec::new(),
1578            children: Vec::new(),
1579            attributes: Vec::new(),
1580            parent_index: Some(1),
1581        };
1582
1583        // child0 is at index 0 in the parent's children, child1 at index 1.
1584        assert_eq!(child0.parent_index(), Some(0));
1585        assert_eq!(child0.struct_element_get_parent(), Some(0));
1586        assert_eq!(child1.parent_index(), Some(1));
1587        assert_eq!(child1.struct_element_get_parent(), Some(1));
1588    }
1589
1590    // --- StructAttribute method tests ---
1591
1592    fn make_attr() -> StructAttribute {
1593        StructAttribute {
1594            owner: "Table".to_string(),
1595            entries: vec![
1596                ("RowSpan".to_string(), AttributeValue::Number(2.0)),
1597                ("ColSpan".to_string(), AttributeValue::Number(1.0)),
1598                ("Scope".to_string(), AttributeValue::Name("Row".to_string())),
1599            ],
1600        }
1601    }
1602
1603    #[test]
1604    fn test_struct_attribute_owner() {
1605        let attr = make_attr();
1606        assert_eq!(attr.owner(), "Table");
1607    }
1608
1609    #[test]
1610    fn test_struct_attribute_entry_count() {
1611        let attr = make_attr();
1612        assert_eq!(attr.entry_count(), 3);
1613        assert_eq!(attr.struct_element_attr_get_count(), 3);
1614    }
1615
1616    #[test]
1617    fn test_struct_attribute_entry_name_at_index() {
1618        let attr = make_attr();
1619        assert_eq!(attr.entry_name_at_index(0), Some("RowSpan"));
1620        assert_eq!(attr.entry_name_at_index(2), Some("Scope"));
1621        assert!(attr.entry_name_at_index(99).is_none());
1622        assert_eq!(attr.struct_element_attr_get_name(0), Some("RowSpan"));
1623    }
1624
1625    #[test]
1626    fn test_struct_attribute_value_for_key() {
1627        let attr = make_attr();
1628        assert!(attr.value_for_key("RowSpan").is_some());
1629        assert!(attr.value_for_key("NoSuch").is_none());
1630        assert!(attr.struct_element_attr_get_value("Scope").is_some());
1631    }
1632
1633    #[test]
1634    fn test_struct_attribute_value_at_index() {
1635        let attr = make_attr();
1636        assert!(attr.value_at_index(0).is_some());
1637        assert!(attr.value_at_index(99).is_none());
1638    }
1639
1640    // --- AttributeValue method tests ---
1641
1642    #[test]
1643    fn test_attribute_value_as_number() {
1644        let v = AttributeValue::Number(3.14);
1645        assert!((v.as_number().unwrap() - 3.14).abs() < 0.001);
1646        assert_eq!(v.type_name(), "Number");
1647        assert_eq!(v.struct_element_attr_get_type(), "Number");
1648        assert!(v.struct_element_attr_get_string_value().is_none());
1649        assert!(v.as_name_str().is_none());
1650        assert!(v.as_array().is_none());
1651        assert!(v.as_blob().is_none());
1652        assert_eq!(v.child_count(), -1);
1653    }
1654
1655    #[test]
1656    fn test_attribute_value_as_text() {
1657        let v = AttributeValue::Text("hello".to_string());
1658        assert_eq!(v.as_text(), Some("hello"));
1659        assert_eq!(v.struct_element_attr_get_string_value(), Some("hello"));
1660        assert_eq!(v.type_name(), "String");
1661        assert!(v.as_number().is_none());
1662        assert!(v.is_string_like());
1663        assert_eq!(v.as_blob(), Some(b"hello".as_slice()));
1664        assert_eq!(
1665            v.struct_element_attr_get_blob_value(),
1666            Some(b"hello".as_slice())
1667        );
1668    }
1669
1670    #[test]
1671    fn test_attribute_value_as_name() {
1672        let v = AttributeValue::Name("LrTb".to_string());
1673        assert_eq!(v.as_name_str(), Some("LrTb"));
1674        assert_eq!(v.as_name_str(), Some("LrTb"));
1675        assert_eq!(v.type_name(), "Name");
1676        assert!(v.is_string_like());
1677        assert!(v.as_number().is_none());
1678        assert_eq!(v.as_blob(), Some(b"LrTb".as_slice()));
1679    }
1680
1681    #[test]
1682    fn test_attribute_value_as_array() {
1683        let v = AttributeValue::Array(vec![1.0, 2.0, 3.0]);
1684        assert_eq!(v.as_array(), Some([1.0_f64, 2.0, 3.0].as_slice()));
1685        assert_eq!(v.as_array(), Some([1.0_f64, 2.0, 3.0].as_slice()));
1686        assert_eq!(v.type_name(), "Array");
1687        assert_eq!(v.child_count(), 3);
1688        assert_eq!(v.struct_element_attr_count_children(), 3);
1689        assert_eq!(v.child_at_index(1), Some(2.0));
1690        assert_eq!(v.struct_element_attr_get_child_at_index(2), Some(3.0));
1691        assert!(v.child_at_index(99).is_none());
1692        assert!(!v.is_string_like());
1693        assert!(v.as_blob().is_none());
1694        assert!(v.as_number().is_none());
1695    }
1696
1697    // --- as_bool tests ---
1698
1699    #[test]
1700    fn test_attribute_value_as_bool_name_true() {
1701        let v = AttributeValue::Name("true".to_string());
1702        assert_eq!(v.as_bool(), Some(true));
1703        assert_eq!(v.struct_element_attr_get_boolean_value(), Some(true));
1704
1705        // Case-insensitive
1706        let v2 = AttributeValue::Name("True".to_string());
1707        assert_eq!(v2.as_bool(), Some(true));
1708
1709        let v3 = AttributeValue::Name("TRUE".to_string());
1710        assert_eq!(v3.as_bool(), Some(true));
1711    }
1712
1713    #[test]
1714    fn test_attribute_value_as_bool_name_false() {
1715        let v = AttributeValue::Name("false".to_string());
1716        assert_eq!(v.as_bool(), Some(false));
1717
1718        let v2 = AttributeValue::Name("False".to_string());
1719        assert_eq!(v2.as_bool(), Some(false));
1720    }
1721
1722    #[test]
1723    fn test_attribute_value_as_bool_text() {
1724        let v = AttributeValue::Text("true".to_string());
1725        assert_eq!(v.as_bool(), Some(true));
1726
1727        let v2 = AttributeValue::Text("false".to_string());
1728        assert_eq!(v2.as_bool(), Some(false));
1729
1730        // Non-boolean string
1731        let v3 = AttributeValue::Text("maybe".to_string());
1732        assert_eq!(v3.as_bool(), None);
1733    }
1734
1735    #[test]
1736    fn test_attribute_value_as_bool_number() {
1737        assert_eq!(AttributeValue::Number(0.0).as_bool(), Some(false));
1738        assert_eq!(AttributeValue::Number(1.0).as_bool(), Some(true));
1739        assert_eq!(AttributeValue::Number(-1.0).as_bool(), Some(true));
1740        assert_eq!(AttributeValue::Number(42.0).as_bool(), Some(true));
1741    }
1742
1743    #[test]
1744    fn test_attribute_value_as_bool_array_returns_none() {
1745        let v = AttributeValue::Array(vec![1.0, 0.0]);
1746        assert_eq!(v.as_bool(), None);
1747        assert_eq!(v.struct_element_attr_get_boolean_value(), None);
1748    }
1749
1750    #[test]
1751    fn test_attribute_value_as_bool_non_boolean_name_returns_none() {
1752        let v = AttributeValue::Name("LrTb".to_string());
1753        assert_eq!(v.as_bool(), None);
1754    }
1755}