litchi 0.0.1

High-performance parser for Microsoft Office, OpenDocument, and Apple iWork file formats with unified API
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
//! Base Element class for ODF XML elements.
//!
//! This module provides the fundamental Element class that all ODF elements
//! inherit from, providing common functionality for XML manipulation.

use crate::common::{Error, Result};
use crate::odf::elements::namespace::{QualifiedName, NamespaceContext};
use quick_xml::events::Event;
use std::collections::HashMap;

/// Property definition for element attributes
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct PropDef {
    pub name: String,
    pub attr: String,
    pub family: String,
}

#[allow(dead_code)]
impl PropDef {
    pub fn new(name: &str, attr: &str) -> Self {
        Self {
            name: name.to_string(),
            attr: attr.to_string(),
            family: String::new(),
        }
    }

    pub fn with_family(name: &str, attr: &str, family: &str) -> Self {
        Self {
            name: name.to_string(),
            attr: attr.to_string(),
            family: family.to_string(),
        }
    }
}

/// Base trait for all ODF elements
#[allow(dead_code)]
pub trait ElementBase {
    /// Get the tag name of this element
    fn tag_name(&self) -> &str;

    /// Get the attributes of this element
    fn attributes(&self) -> &HashMap<String, String>;

    /// Get a mutable reference to attributes
    fn attributes_mut(&mut self) -> &mut HashMap<String, String>;

    /// Get the text content of this element
    fn text(&self) -> &str;

    /// Set the text content of this element
    fn set_text(&mut self, text: &str);

    /// Get child elements
    fn children(&self) -> &[Box<dyn ElementBase>];

    /// Get mutable child elements
    fn children_mut(&mut self) -> &mut Vec<Box<dyn ElementBase>>;

    /// Get attribute value by name
    fn get_attribute(&self, name: &str) -> Option<&str> {
        self.attributes().get(name).map(|s| s.as_str())
    }

    /// Set attribute value
    fn set_attribute(&mut self, name: &str, value: &str) {
        self.attributes_mut().insert(name.to_string(), value.to_string());
    }

    /// Remove attribute
    fn remove_attribute(&mut self, name: &str) {
        self.attributes_mut().remove(name);
    }

    /// Check if element has attribute
    fn has_attribute(&self, name: &str) -> bool {
        self.attributes().contains_key(name)
    }

    /// Get boolean attribute value
    fn get_bool_attribute(&self, name: &str) -> Option<bool> {
        self.get_attribute(name)
            .and_then(|s| match s {
                "true" | "1" => Some(true),
                "false" | "0" => Some(false),
                _ => None,
            })
    }

    /// Get numeric attribute value
    fn get_numeric_attribute(&self, name: &str) -> Option<f64> {
        self.get_attribute(name).and_then(|s| s.parse().ok())
    }

    /// Get integer attribute value
    fn get_int_attribute(&self, name: &str) -> Option<i64> {
        self.get_attribute(name).and_then(|s| s.parse().ok())
    }

    /// Find child elements by tag name
    fn get_elements_by_tag(&self, tag_name: &str) -> Vec<&dyn ElementBase> {
        self.children()
            .iter()
            .filter(|child| child.tag_name() == tag_name)
            .map(|child| child.as_ref())
            .collect()
    }

    /// Find first child element by tag name
    fn get_element_by_tag(&self, tag_name: &str) -> Option<&dyn ElementBase> {
        self.children()
            .iter()
            .find(|child| child.tag_name() == tag_name)
            .map(|child| child.as_ref())
    }

    /// Add child element
    fn add_child(&mut self, child: Box<dyn ElementBase>) {
        self.children_mut().push(child);
    }

    /// Remove child element at index
    fn remove_child(&mut self, index: usize) -> Option<Box<dyn ElementBase>> {
        if index < self.children().len() {
            Some(self.children_mut().remove(index))
        } else {
            None
        }
    }

    /// Get all text content recursively
    fn get_text_recursive(&self) -> String {
        let mut text = self.text().to_string();
        for child in self.children() {
            text.push_str(&child.get_text_recursive());
        }
        text
    }
}

/// Concrete Element implementation with namespace support
#[derive(Debug, Clone)]
pub struct Element {
    tag_name: String,
    qualified_name: QualifiedName,
    attributes: HashMap<String, String>,
    namespace_context: NamespaceContext,
    text_content: String,
    children: Vec<Element>,
}

impl Element {
    /// Create a new element
    pub fn new(tag_name: &str) -> Self {
        let qualified_name = QualifiedName::from_string(tag_name);
        Self {
            tag_name: tag_name.to_string(),
            qualified_name,
            attributes: HashMap::new(),
            namespace_context: NamespaceContext::new(),
            text_content: String::new(),
            children: Vec::new(),
        }
    }

    /// Create a new element with namespace context
    pub fn new_with_context(tag_name: &str, namespace_context: NamespaceContext) -> Self {
        let qualified_name = namespace_context.parse_qualified_name(tag_name);
        Self {
            tag_name: tag_name.to_string(),
            qualified_name,
            attributes: HashMap::new(),
            namespace_context,
            text_content: String::new(),
            children: Vec::new(),
        }
    }

    /// Get the qualified name
    pub fn qualified_name(&self) -> &QualifiedName {
        &self.qualified_name
    }

    /// Get the namespace URI
    pub fn namespace_uri(&self) -> Option<&str> {
        self.qualified_name.namespace_uri.as_deref()
    }

    /// Get the local name (without namespace prefix)
    pub fn local_name(&self) -> &str {
        &self.qualified_name.local_name
    }

    /// Get the namespace context
    pub fn namespace_context(&self) -> &NamespaceContext {
        &self.namespace_context
    }

    /// Set namespace context
    pub fn set_namespace_context(&mut self, context: NamespaceContext) {
        self.namespace_context = context;
        // Re-parse qualified name with new context
        self.qualified_name = self.namespace_context.parse_qualified_name(&self.tag_name);
    }

    /// Add a namespace declaration
    pub fn add_namespace(&mut self, prefix: &str, uri: &str) {
        self.namespace_context.add_namespace(prefix, uri);
        // Re-parse qualified name with updated context
        self.qualified_name = self.namespace_context.parse_qualified_name(&self.tag_name);
    }

    /// Check if element name matches (namespace-aware)
    pub fn name_matches(&self, name: &str) -> bool {
        self.qualified_name.matches_str(name, Some(&self.namespace_context))
    }

    /// Get attribute with namespace-aware lookup
    pub fn get_qualified_attribute(&self, name: &str) -> Option<&str> {
        // First try exact match
        if let Some(value) = self.attributes.get(name) {
            return Some(value);
        }

        // Try namespace-aware match
        let qualified_name = self.namespace_context.parse_qualified_name(name);
        for (key, value) in &self.attributes {
            let key_qualified = self.namespace_context.parse_qualified_name(key);
            if key_qualified.matches(&qualified_name) {
                return Some(value);
            }
        }

        None
    }

    /// Create element from XML bytes
    pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
        let mut reader = quick_xml::Reader::from_reader(bytes);
        let mut buf = Vec::new();
        let mut stack = Vec::new();

        loop {
            match reader.read_event_into(&mut buf) {
                Ok(Event::Start(ref e)) => {
                    let tag_name = String::from_utf8(e.name().as_ref().to_vec())
                        .map_err(|_| Error::InvalidFormat("Invalid UTF-8 in tag name".to_string()))?;

                    let mut namespace_context = NamespaceContext::new();

                    // First pass: collect namespace declarations
                    for attr_result in e.attributes() {
                        let attr = attr_result.map_err(|_| Error::InvalidFormat("Invalid attribute".to_string()))?;
                        let key = String::from_utf8(attr.key.as_ref().to_vec())
                            .map_err(|_| Error::InvalidFormat("Invalid UTF-8 in attribute key".to_string()))?;
                        let value = String::from_utf8(attr.value.to_vec())
                            .map_err(|_| Error::InvalidFormat("Invalid UTF-8 in attribute value".to_string()))?;

                        // Check for namespace declarations
                        if key == "xmlns" || key.starts_with("xmlns:") {
                            namespace_context.add_namespace(&key, &value);
                        }
                    }

                    let mut element = Element::new_with_context(&tag_name, namespace_context);

                    // Second pass: set regular attributes
                    for attr_result in e.attributes() {
                        let attr = attr_result.map_err(|_| Error::InvalidFormat("Invalid attribute".to_string()))?;
                        let key = String::from_utf8(attr.key.as_ref().to_vec())
                            .map_err(|_| Error::InvalidFormat("Invalid UTF-8 in attribute key".to_string()))?;
                        let value = String::from_utf8(attr.value.to_vec())
                            .map_err(|_| Error::InvalidFormat("Invalid UTF-8 in attribute value".to_string()))?;

                        // Skip namespace declarations - they're already handled
                        if !(key == "xmlns" || key.starts_with("xmlns:")) {
                            element.set_attribute(&key, &value);
                        }
                    }

                    stack.push(element);
                }
                Ok(Event::Text(ref t)) => {
                    if let Some(current) = stack.last_mut() {
                        let text = String::from_utf8(t.to_vec())
                            .map_err(|_| Error::InvalidFormat("Invalid UTF-8 in text content".to_string()))?;
                        current.text_content.push_str(&text);
                    }
                }
                Ok(Event::End(ref e)) => {
                    let _tag_name = String::from_utf8(e.name().as_ref().to_vec()) // Tag name for debugging - kept for future use
                        .map_err(|_| Error::InvalidFormat("Invalid UTF-8 in tag name".to_string()))?;

                    if let Some(element) = stack.pop() {
                        if let Some(parent) = stack.last_mut() {
                            parent.children.push(element);
                        } else {
                            // This is the root element
                            return Ok(element);
                        }
                    }
                }
                Ok(Event::Eof) => break,
                Err(e) => return Err(Error::InvalidFormat(format!("XML parsing error: {}", e))),
                _ => {}
            }
            buf.clear();
        }

        Err(Error::InvalidFormat("No root element found".to_string()))
    }

    /// Serialize element to XML string
    pub fn to_xml_string(&self) -> String {
        let mut xml = String::new();
        self.write_xml(&mut xml, 0);
        xml
    }

    fn write_xml(&self, output: &mut String, indent: usize) {
        let indent_str = "  ".repeat(indent);

        // Opening tag
        output.push_str(&indent_str);
        output.push('<');
        output.push_str(&self.tag_name);

        // Attributes
        for (key, value) in &self.attributes {
            output.push(' ');
            output.push_str(key);
            output.push_str("=\"");
            // Escape quotes in attribute values
            for ch in value.chars() {
                match ch {
                    '"' => output.push_str("&quot;"),
                    '&' => output.push_str("&amp;"),
                    '<' => output.push_str("&lt;"),
                    '>' => output.push_str("&gt;"),
                    _ => output.push(ch),
                }
            }
            output.push('"');
        }

        if self.children.is_empty() && self.text_content.is_empty() {
            // Self-closing tag
            output.push_str(" />");
        } else {
            output.push('>');

            // Text content
            if !self.text_content.is_empty() {
                // Escape text content
                for ch in self.text_content.chars() {
                    match ch {
                        '&' => output.push_str("&amp;"),
                        '<' => output.push_str("&lt;"),
                        '>' => output.push_str("&gt;"),
                        _ => output.push(ch),
                    }
                }
            }

            // Child elements
            for child in &self.children {
                output.push('\n');
                child.write_xml(output, indent + 1);
            }

            // Closing tag
            if !self.children.is_empty() {
                output.push('\n');
                output.push_str(&indent_str);
            }
            output.push_str("</");
            output.push_str(&self.tag_name);
            output.push('>');
        }
    }
}

impl ElementBase for Element {
    fn tag_name(&self) -> &str {
        &self.tag_name
    }

    fn attributes(&self) -> &HashMap<String, String> {
        &self.attributes
    }

    fn attributes_mut(&mut self) -> &mut HashMap<String, String> {
        &mut self.attributes
    }

    fn text(&self) -> &str {
        &self.text_content
    }

    fn set_text(&mut self, text: &str) {
        self.text_content = text.to_string();
    }

    fn children(&self) -> &[Box<dyn ElementBase>] {
        // This is a workaround since we can't return &[Box<dyn ElementBase>]
        // from a concrete type. In practice, you'd implement this differently
        // with proper trait objects.
        unsafe { std::mem::transmute(&self.children[..]) }
    }

    fn children_mut(&mut self) -> &mut Vec<Box<dyn ElementBase>> {
        unsafe { std::mem::transmute(&mut self.children) }
    }
}

/// Helper for creating elements with specific tag names
#[allow(dead_code)]
pub struct ElementFactory;

#[allow(dead_code)]
impl ElementFactory {
    /// Create a text paragraph element
    pub fn paragraph() -> Element {
        Element::new("text:p")
    }

    /// Create a text span element
    pub fn span() -> Element {
        Element::new("text:span")
    }

    /// Create a heading element
    pub fn heading(level: u8) -> Element {
        let mut element = Element::new("text:h");
        element.set_attribute("text:outline-level", &level.to_string());
        element
    }

    /// Create a table element
    pub fn table() -> Element {
        Element::new("table:table")
    }

    /// Create a table row element
    pub fn table_row() -> Element {
        Element::new("table:table-row")
    }

    /// Create a table cell element
    pub fn table_cell() -> Element {
        Element::new("table:table-cell")
    }
}