cml_rs/
parser.rs

1//! CML v0.2 XML Parser
2//!
3//! Parses CML v0.2 XML documents into strongly-typed structures
4
5use crate::types::*;
6use crate::{CmlError, Result};
7use quick_xml::events::{BytesStart, Event};
8use quick_xml::Reader;
9use std::io::BufRead;
10
11/// Parser for CML v0.2 documents
12pub struct CmlParser;
13
14impl CmlParser {
15    /// Parse CML v0.2 XML from string
16    pub fn parse_str(xml: &str) -> Result<CmlDocument> {
17        let reader = Reader::from_str(xml);
18        Self::parse(reader)
19    }
20
21    /// Parse CML v0.2 XML from reader
22    pub fn parse<R: BufRead>(mut reader: Reader<R>) -> Result<CmlDocument> {
23        let mut buf = Vec::new();
24
25        loop {
26            match reader.read_event_into(&mut buf) {
27                Ok(Event::Start(e)) if e.name().as_ref() == b"cml" => {
28                    return Self::parse_cml(&mut reader, e);
29                }
30                Ok(Event::Eof) => {
31                    return Err(CmlError::InvalidStructure(
32                        "No <cml> root element found".to_string(),
33                    ))
34                }
35                Ok(_) => {}
36                Err(e) => return Err(e.into()),
37            }
38            buf.clear();
39        }
40    }
41
42    /// Parse <cml> root element
43    fn parse_cml<R: BufRead>(
44        reader: &mut Reader<R>,
45        start: BytesStart,
46    ) -> Result<CmlDocument> {
47        let mut version = None;
48        let mut encoding = None;
49        let mut profile = None;
50        let mut id = None;
51
52        // Parse attributes
53        for attr in start.attributes() {
54            let attr = attr?;
55            let key = attr.key.as_ref();
56            let value = String::from_utf8_lossy(&attr.value).to_string();
57
58            match key {
59                b"version" => version = Some(value),
60                b"encoding" => encoding = Some(value),
61                b"profile" => profile = Some(value),
62                b"id" => id = Some(value),
63                _ => {}
64            }
65        }
66
67        let version = version.ok_or_else(|| {
68            CmlError::MissingAttribute("version required on <cml>".to_string())
69        })?;
70
71        let encoding = encoding.ok_or_else(|| {
72            CmlError::MissingAttribute("encoding required on <cml>".to_string())
73        })?;
74
75        let profile = profile.ok_or_else(|| {
76            CmlError::MissingAttribute("profile required on <cml>".to_string())
77        })?;
78
79        let mut header = None;
80        let mut body = None;
81        let mut footer = None;
82
83        let mut buf = Vec::new();
84
85        loop {
86            match reader.read_event_into(&mut buf) {
87                Ok(Event::Start(e)) => match e.name().as_ref() {
88                    b"header" => {
89                        header = Some(Self::parse_header(reader)?);
90                    }
91                    b"body" => {
92                        body = Some(Self::parse_body(reader)?);
93                    }
94                    b"footer" => {
95                        footer = Some(Self::parse_footer(reader)?);
96                    }
97                    _ => {}
98                },
99                Ok(Event::End(e)) if e.name().as_ref() == b"cml" => {
100                    break;
101                }
102                Ok(Event::Eof) => {
103                    return Err(CmlError::InvalidStructure("Unexpected EOF in <cml>".to_string()))
104                }
105                Ok(_) => {}
106                Err(e) => return Err(e.into()),
107            }
108            buf.clear();
109        }
110
111        let header =
112            header.ok_or_else(|| CmlError::InvalidStructure("<header> required".to_string()))?;
113
114        let body =
115            body.ok_or_else(|| CmlError::InvalidStructure("<body> required".to_string()))?;
116
117        let footer =
118            footer.ok_or_else(|| CmlError::InvalidStructure("<footer> required".to_string()))?;
119
120        Ok(CmlDocument {
121            version,
122            encoding,
123            profile,
124            id,
125            header,
126            body,
127            footer,
128        })
129    }
130
131    /// Parse <header> element
132    fn parse_header<R: BufRead>(reader: &mut Reader<R>) -> Result<Header> {
133        let mut title = None;
134        let mut authors = Vec::new();
135        let mut dates = Vec::new();
136        let mut identifiers = Vec::new();
137        let mut version = None;
138        let mut description = None;
139        let mut provenance = None;
140        let mut source = None;
141        let mut meta = Vec::new();
142
143        let mut buf = Vec::new();
144
145        loop {
146            match reader.read_event_into(&mut buf) {
147                Ok(Event::Start(e)) => match e.name().as_ref() {
148                    b"title" => {
149                        title = Some(Self::read_text(reader, "title")?);
150                    }
151                    b"author" => {
152                        authors.push(Self::parse_author(reader, e)?);
153                    }
154                    b"identifier" => {
155                        identifiers.push(Self::parse_identifier(reader, e)?);
156                    }
157                    b"version" => {
158                        version = Some(Self::read_text(reader, "version")?);
159                    }
160                    b"description" => {
161                        description = Some(Self::read_text(reader, "description")?);
162                    }
163                    b"provenance" => {
164                        provenance = Some(Self::read_text(reader, "provenance")?);
165                    }
166                    b"source" => {
167                        source = Some(Self::read_text(reader, "source")?);
168                    }
169                    _ => {}
170                },
171                Ok(Event::Empty(e)) => match e.name().as_ref() {
172                    b"date" => {
173                        dates.push(Self::parse_date_entry(e)?);
174                    }
175                    b"meta" => {
176                        meta.push(Self::parse_meta_entry(e)?);
177                    }
178                    _ => {}
179                },
180                Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
181                    break;
182                }
183                Ok(Event::Eof) => {
184                    return Err(CmlError::InvalidStructure(
185                        "Unexpected EOF in <header>".to_string(),
186                    ))
187                }
188                Ok(_) => {}
189                Err(e) => return Err(e.into()),
190            }
191            buf.clear();
192        }
193
194        let title =
195            title.ok_or_else(|| CmlError::InvalidStructure("<title> required in header".to_string()))?;
196
197        Ok(Header {
198            title,
199            authors,
200            dates,
201            identifiers,
202            version,
203            description,
204            provenance,
205            source,
206            meta,
207        })
208    }
209
210    /// Parse <author> element
211    fn parse_author<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Author> {
212        let mut role = None;
213        let mut reference = None;
214
215        // Parse attributes
216        for attr in start.attributes() {
217            let attr = attr?;
218            let key = attr.key.as_ref();
219            let value = String::from_utf8_lossy(&attr.value).to_string();
220
221            match key {
222                b"role" => role = Some(value),
223                b"reference" => reference = Some(value),
224                _ => {}
225            }
226        }
227
228        let name = Self::read_text(reader, "author")?;
229
230        Ok(Author {
231            name,
232            role,
233            reference,
234        })
235    }
236
237    /// Parse <date> element (self-closing)
238    fn parse_date_entry(start: BytesStart) -> Result<DateEntry> {
239        let mut date_type = None;
240        let mut when = None;
241
242        for attr in start.attributes() {
243            let attr = attr?;
244            let key = attr.key.as_ref();
245            let value = String::from_utf8_lossy(&attr.value).to_string();
246
247            match key {
248                b"type" => date_type = Some(value),
249                b"when" => when = Some(value),
250                _ => {}
251            }
252        }
253
254        let date_type = date_type
255            .ok_or_else(|| CmlError::MissingAttribute("type required on <date>".to_string()))?;
256
257        let when =
258            when.ok_or_else(|| CmlError::MissingAttribute("when required on <date>".to_string()))?;
259
260        Ok(DateEntry { date_type, when })
261    }
262
263    /// Parse <identifier> element
264    fn parse_identifier<R: BufRead>(
265        reader: &mut Reader<R>,
266        start: BytesStart,
267    ) -> Result<Identifier> {
268        let mut scheme = None;
269
270        for attr in start.attributes() {
271            let attr = attr?;
272            let key = attr.key.as_ref();
273            let value = String::from_utf8_lossy(&attr.value).to_string();
274
275            if key == b"scheme" {
276                scheme = Some(value);
277            }
278        }
279
280        let scheme = scheme.ok_or_else(|| {
281            CmlError::MissingAttribute("scheme required on <identifier>".to_string())
282        })?;
283
284        let value = Self::read_text(reader, "identifier")?;
285
286        Ok(Identifier { scheme, value })
287    }
288
289    /// Parse <meta> element (self-closing)
290    fn parse_meta_entry(start: BytesStart) -> Result<MetaEntry> {
291        let mut name = None;
292        let mut value = None;
293
294        for attr in start.attributes() {
295            let attr = attr?;
296            let key = attr.key.as_ref();
297            let attr_value = String::from_utf8_lossy(&attr.value).to_string();
298
299            match key {
300                b"name" => name = Some(attr_value),
301                b"value" => value = Some(attr_value),
302                _ => {}
303            }
304        }
305
306        let name =
307            name.ok_or_else(|| CmlError::MissingAttribute("name required on <meta>".to_string()))?;
308
309        let value = value
310            .ok_or_else(|| CmlError::MissingAttribute("value required on <meta>".to_string()))?;
311
312        Ok(MetaEntry { name, value })
313    }
314
315    /// Parse <body> element
316    fn parse_body<R: BufRead>(reader: &mut Reader<R>) -> Result<Body> {
317        let blocks = Self::parse_blocks_until(reader, b"body")?;
318
319        if blocks.is_empty() {
320            return Err(CmlError::InvalidStructure(
321                "<body> must contain at least one block element".to_string(),
322            ));
323        }
324
325        Ok(Body { blocks })
326    }
327
328    /// Parse block elements until end tag
329    fn parse_blocks_until<R: BufRead>(
330        reader: &mut Reader<R>,
331        end_tag: &[u8],
332    ) -> Result<Vec<BlockElement>> {
333        let mut blocks = Vec::new();
334        let mut buf = Vec::new();
335
336        loop {
337            match reader.read_event_into(&mut buf) {
338                Ok(Event::Start(e)) => {
339                    if let Some(block) = Self::parse_block_element(reader, e)? {
340                        blocks.push(block);
341                    }
342                }
343                Ok(Event::Empty(e)) => {
344                    // Handle self-closing block elements like <break/>
345                    if e.name().as_ref() == b"break" {
346                        blocks.push(BlockElement::Break(Self::parse_break_empty(e)?));
347                    }
348                }
349                Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
350                    break;
351                }
352                Ok(Event::Eof) => {
353                    return Err(CmlError::InvalidStructure(format!(
354                        "Unexpected EOF waiting for end tag: {}",
355                        String::from_utf8_lossy(end_tag)
356                    )))
357                }
358                Ok(_) => {}
359                Err(e) => return Err(e.into()),
360            }
361            buf.clear();
362        }
363
364        Ok(blocks)
365    }
366
367    /// Parse a block element
368    fn parse_block_element<R: BufRead>(
369        reader: &mut Reader<R>,
370        start: BytesStart,
371    ) -> Result<Option<BlockElement>> {
372        let element = match start.name().as_ref() {
373            b"section" => BlockElement::Section(Self::parse_section(reader, start)?),
374            b"paragraph" => BlockElement::Paragraph(Self::parse_paragraph(reader, start)?),
375            b"heading" => BlockElement::Heading(Self::parse_heading(reader, start)?),
376            b"aside" => BlockElement::Aside(Self::parse_aside(reader, start)?),
377            b"quote" => BlockElement::Quote(Self::parse_quote(reader, start)?),
378            b"list" => BlockElement::List(Self::parse_list(reader, start)?),
379            b"table" => BlockElement::Table(Self::parse_table(reader, start)?),
380            b"code" => BlockElement::Code(Self::parse_code(reader, start)?),
381            b"break" => BlockElement::Break(Self::parse_break(reader, start)?),
382            b"figure" => BlockElement::Figure(Self::parse_figure(reader, start)?),
383            _ => return Ok(None),
384        };
385
386        Ok(Some(element))
387    }
388
389    /// Parse <section> element
390    fn parse_section<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Section> {
391        let mut id = None;
392        let mut section_type = None;
393        let mut reference = None;
394
395        for attr in start.attributes() {
396            let attr = attr?;
397            let key = attr.key.as_ref();
398            let value = String::from_utf8_lossy(&attr.value).to_string();
399
400            match key {
401                b"id" => id = Some(value),
402                b"type" => section_type = Some(value),
403                b"ref" => reference = Some(value),
404                _ => {}
405            }
406        }
407
408        let content = Self::parse_blocks_until(reader, b"section")?;
409
410        Ok(Section {
411            id,
412            section_type,
413            reference,
414            content,
415        })
416    }
417
418    /// Parse <paragraph> element
419    fn parse_paragraph<R: BufRead>(
420        reader: &mut Reader<R>,
421        start: BytesStart,
422    ) -> Result<Paragraph> {
423        let mut id = None;
424        let mut paragraph_type = None;
425
426        for attr in start.attributes() {
427            let attr = attr?;
428            let key = attr.key.as_ref();
429            let value = String::from_utf8_lossy(&attr.value).to_string();
430
431            match key {
432                b"id" => id = Some(value),
433                b"type" => paragraph_type = Some(value),
434                _ => {}
435            }
436        }
437
438        let content = Self::parse_inline_content(reader, b"paragraph")?;
439
440        Ok(Paragraph {
441            id,
442            paragraph_type,
443            content,
444        })
445    }
446
447    /// Parse <heading> element
448    fn parse_heading<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Heading> {
449        let mut id = None;
450        let mut heading_type = None;
451        let mut size = None;
452
453        for attr in start.attributes() {
454            let attr = attr?;
455            let key = attr.key.as_ref();
456            let value = String::from_utf8_lossy(&attr.value).to_string();
457
458            match key {
459                b"id" => id = Some(value),
460                b"type" => heading_type = Some(value),
461                b"size" => size = Some(value.parse().map_err(|_| {
462                    CmlError::InvalidAttribute("size must be a number".to_string())
463                })?),
464                _ => {}
465            }
466        }
467
468        let size = size
469            .ok_or_else(|| CmlError::MissingAttribute("size required on <heading>".to_string()))?;
470
471        let content = Self::parse_inline_content(reader, b"heading")?;
472
473        Ok(Heading {
474            id,
475            heading_type,
476            size,
477            content,
478        })
479    }
480
481    /// Parse <aside> element
482    fn parse_aside<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Aside> {
483        let mut id = None;
484        let mut aside_type = None;
485        let mut side = None;
486
487        for attr in start.attributes() {
488            let attr = attr?;
489            let key = attr.key.as_ref();
490            let value = String::from_utf8_lossy(&attr.value).to_string();
491
492            match key {
493                b"id" => id = Some(value),
494                b"type" => aside_type = Some(value),
495                b"side" => {
496                    side = Some(match value.as_str() {
497                        "left" => Side::Left,
498                        "right" => Side::Right,
499                        _ => {
500                            return Err(CmlError::InvalidAttribute(
501                                "side must be 'left' or 'right'".to_string(),
502                            ))
503                        }
504                    })
505                }
506                _ => {}
507            }
508        }
509
510        let side =
511            side.ok_or_else(|| CmlError::MissingAttribute("side required on <aside>".to_string()))?;
512
513        let content = Self::parse_blocks_until(reader, b"aside")?;
514
515        Ok(Aside {
516            id,
517            aside_type,
518            side,
519            content,
520        })
521    }
522
523    /// Parse <quote> element
524    fn parse_quote<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Quote> {
525        let mut id = None;
526        let mut reference = None;
527        let mut source = None;
528
529        for attr in start.attributes() {
530            let attr = attr?;
531            let key = attr.key.as_ref();
532            let value = String::from_utf8_lossy(&attr.value).to_string();
533
534            match key {
535                b"id" => id = Some(value),
536                b"ref" => reference = Some(value),
537                b"source" => source = Some(value),
538                _ => {}
539            }
540        }
541
542        let content = Self::parse_blocks_until(reader, b"quote")?;
543
544        Ok(Quote {
545            id,
546            reference,
547            source,
548            content,
549        })
550    }
551
552    /// Parse <list> element
553    fn parse_list<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<List> {
554        let mut id = None;
555        let mut list_type = None;
556        let mut style = None;
557
558        for attr in start.attributes() {
559            let attr = attr?;
560            let key = attr.key.as_ref();
561            let value = String::from_utf8_lossy(&attr.value).to_string();
562
563            match key {
564                b"id" => id = Some(value),
565                b"type" => {
566                    list_type = Some(match value.as_str() {
567                        "ordered" => ListType::Ordered,
568                        "unordered" => ListType::Unordered,
569                        _ => {
570                            return Err(CmlError::InvalidAttribute(
571                                "list type must be 'ordered' or 'unordered'".to_string(),
572                            ))
573                        }
574                    })
575                }
576                b"style" => {
577                    style = Some(match value.as_str() {
578                        "numeric" => ListStyle::Numeric,
579                        "roman" => ListStyle::Roman,
580                        "alpha" => ListStyle::Alpha,
581                        "symbolic" => ListStyle::Symbolic,
582                        _ => {
583                            return Err(CmlError::InvalidAttribute(
584                                "invalid list style".to_string(),
585                            ))
586                        }
587                    })
588                }
589                _ => {}
590            }
591        }
592
593        let mut items = Vec::new();
594        let mut buf = Vec::new();
595
596        loop {
597            match reader.read_event_into(&mut buf) {
598                Ok(Event::Start(e)) if e.name().as_ref() == b"item" => {
599                    items.push(Self::parse_list_item(reader, e)?);
600                }
601                Ok(Event::End(e)) if e.name().as_ref() == b"list" => {
602                    break;
603                }
604                Ok(Event::Eof) => {
605                    return Err(CmlError::InvalidStructure(
606                        "Unexpected EOF in <list>".to_string(),
607                    ))
608                }
609                Ok(_) => {}
610                Err(e) => return Err(e.into()),
611            }
612            buf.clear();
613        }
614
615        if items.is_empty() {
616            return Err(CmlError::InvalidStructure(
617                "<list> must contain at least one <item>".to_string(),
618            ));
619        }
620
621        Ok(List {
622            id,
623            list_type,
624            style,
625            items,
626        })
627    }
628
629    /// Parse <item> element
630    fn parse_list_item<R: BufRead>(
631        reader: &mut Reader<R>,
632        start: BytesStart,
633    ) -> Result<ListItem> {
634        let mut id = None;
635
636        for attr in start.attributes() {
637            let attr = attr?;
638            if attr.key.as_ref() == b"id" {
639                id = Some(String::from_utf8_lossy(&attr.value).to_string());
640            }
641        }
642
643        // Try to parse as inline first, if we find a block element, switch to block mode
644        let content = Self::parse_list_item_content(reader)?;
645
646        Ok(ListItem { id, content })
647    }
648
649    /// Parse list item content (inline or block)
650    fn parse_list_item_content<R: BufRead>(reader: &mut Reader<R>) -> Result<ListItemContent> {
651        let mut blocks = Vec::new();
652        let mut inlines = Vec::new();
653        let mut has_blocks = false;
654        let mut buf = Vec::new();
655
656        loop {
657            match reader.read_event_into(&mut buf) {
658                Ok(Event::Start(e)) => {
659                    let name = e.name();
660                    // Check if this is a block element
661                    if Self::is_block_element(name.as_ref()) {
662                        has_blocks = true;
663                        if let Some(block) = Self::parse_block_element(reader, e)? {
664                            blocks.push(block);
665                        }
666                    } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
667                        inlines.push(inline);
668                    }
669                }
670                Ok(Event::Text(e)) => {
671                    let text = e.unescape().unwrap().to_string().trim().to_string();
672                    if !text.is_empty() {
673                        inlines.push(InlineElement::Text(text));
674                    }
675                }
676                Ok(Event::End(e)) if e.name().as_ref() == b"item" => {
677                    break;
678                }
679                Ok(Event::Eof) => {
680                    return Err(CmlError::InvalidStructure(
681                        "Unexpected EOF in <item>".to_string(),
682                    ))
683                }
684                Ok(_) => {}
685                Err(e) => return Err(e.into()),
686            }
687            buf.clear();
688        }
689
690        if has_blocks {
691            Ok(ListItemContent::Block(blocks))
692        } else {
693            Ok(ListItemContent::Inline(inlines))
694        }
695    }
696
697    /// Check if element name is a block element
698    fn is_block_element(name: &[u8]) -> bool {
699        matches!(
700            name,
701            b"section"
702                | b"paragraph"
703                | b"heading"
704                | b"aside"
705                | b"quote"
706                | b"list"
707                | b"table"
708                | b"code"
709                | b"break"
710                | b"figure"
711        )
712    }
713
714    /// Parse <table> element
715    fn parse_table<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Table> {
716        let mut id = None;
717        let mut table_type = None;
718
719        for attr in start.attributes() {
720            let attr = attr?;
721            let key = attr.key.as_ref();
722            let value = String::from_utf8_lossy(&attr.value).to_string();
723
724            match key {
725                b"id" => id = Some(value),
726                b"type" => table_type = Some(value),
727                _ => {}
728            }
729        }
730
731        let mut header = None;
732        let mut body = None;
733        let mut footer = None;
734        let mut buf = Vec::new();
735
736        loop {
737            match reader.read_event_into(&mut buf) {
738                Ok(Event::Start(e)) => match e.name().as_ref() {
739                    b"header" => {
740                        header = Some(Self::parse_table_header(reader)?);
741                    }
742                    b"body" => {
743                        body = Some(Self::parse_table_body(reader)?);
744                    }
745                    b"footer" => {
746                        footer = Some(Self::parse_table_footer(reader)?);
747                    }
748                    _ => {}
749                },
750                Ok(Event::End(e)) if e.name().as_ref() == b"table" => {
751                    break;
752                }
753                Ok(Event::Eof) => {
754                    return Err(CmlError::InvalidStructure(
755                        "Unexpected EOF in <table>".to_string(),
756                    ))
757                }
758                Ok(_) => {}
759                Err(e) => return Err(e.into()),
760            }
761            buf.clear();
762        }
763
764        let body = body
765            .ok_or_else(|| CmlError::InvalidStructure("<body> required in table".to_string()))?;
766
767        Ok(Table {
768            id,
769            table_type,
770            header,
771            body,
772            footer,
773        })
774    }
775
776    /// Parse table <header>
777    fn parse_table_header<R: BufRead>(reader: &mut Reader<R>) -> Result<TableHeader> {
778        let mut rows = Vec::new();
779        let mut buf = Vec::new();
780
781        loop {
782            match reader.read_event_into(&mut buf) {
783                Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
784                    rows.push(Self::parse_table_row(reader, true)?);
785                }
786                Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
787                    break;
788                }
789                Ok(Event::Eof) => {
790                    return Err(CmlError::InvalidStructure(
791                        "Unexpected EOF in table <header>".to_string(),
792                    ))
793                }
794                Ok(_) => {}
795                Err(e) => return Err(e.into()),
796            }
797            buf.clear();
798        }
799
800        Ok(TableHeader { rows })
801    }
802
803    /// Parse table <body>
804    fn parse_table_body<R: BufRead>(reader: &mut Reader<R>) -> Result<TableBody> {
805        let mut rows = Vec::new();
806        let mut buf = Vec::new();
807
808        loop {
809            match reader.read_event_into(&mut buf) {
810                Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
811                    rows.push(Self::parse_table_row(reader, false)?);
812                }
813                Ok(Event::End(e)) if e.name().as_ref() == b"body" => {
814                    break;
815                }
816                Ok(Event::Eof) => {
817                    return Err(CmlError::InvalidStructure(
818                        "Unexpected EOF in table <body>".to_string(),
819                    ))
820                }
821                Ok(_) => {}
822                Err(e) => return Err(e.into()),
823            }
824            buf.clear();
825        }
826
827        Ok(TableBody { rows })
828    }
829
830    /// Parse table <footer>
831    fn parse_table_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<TableFooter> {
832        let mut caption = None;
833        let mut buf = Vec::new();
834
835        loop {
836            match reader.read_event_into(&mut buf) {
837                Ok(Event::Start(e)) if e.name().as_ref() == b"caption" => {
838                    let content = Self::parse_inline_content(reader, b"caption")?;
839                    caption = Some(Caption { content });
840                }
841                Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
842                    break;
843                }
844                Ok(Event::Eof) => {
845                    return Err(CmlError::InvalidStructure(
846                        "Unexpected EOF in table <footer>".to_string(),
847                    ))
848                }
849                Ok(_) => {}
850                Err(e) => return Err(e.into()),
851            }
852            buf.clear();
853        }
854
855        let caption = caption.ok_or_else(|| {
856            CmlError::InvalidStructure("<caption> required in table footer".to_string())
857        })?;
858
859        Ok(TableFooter { caption })
860    }
861
862    /// Parse table <row>
863    fn parse_table_row<R: BufRead>(reader: &mut Reader<R>, is_header: bool) -> Result<TableRow> {
864        let mut columns = Vec::new();
865        let mut buf = Vec::new();
866
867        loop {
868            match reader.read_event_into(&mut buf) {
869                Ok(Event::Start(e)) if e.name().as_ref() == b"column" => {
870                    columns.push(Self::parse_table_column(reader, e, is_header)?);
871                }
872                Ok(Event::End(e)) if e.name().as_ref() == b"row" => {
873                    break;
874                }
875                Ok(Event::Eof) => {
876                    return Err(CmlError::InvalidStructure(
877                        "Unexpected EOF in table <row>".to_string(),
878                    ))
879                }
880                Ok(_) => {}
881                Err(e) => return Err(e.into()),
882            }
883            buf.clear();
884        }
885
886        Ok(TableRow { columns })
887    }
888
889    /// Parse table <column>
890    fn parse_table_column<R: BufRead>(
891        reader: &mut Reader<R>,
892        start: BytesStart,
893        is_header: bool,
894    ) -> Result<TableColumn> {
895        let mut sort = None;
896
897        if is_header {
898            for attr in start.attributes() {
899                let attr = attr?;
900                if attr.key.as_ref() == b"sort" {
901                    let value = String::from_utf8_lossy(&attr.value).to_string();
902                    sort = Some(match value.as_str() {
903                        "asc" => SortOrder::Asc,
904                        "desc" => SortOrder::Desc,
905                        _ => {
906                            return Err(CmlError::InvalidAttribute(
907                                "sort must be 'asc' or 'desc'".to_string(),
908                            ))
909                        }
910                    });
911                }
912            }
913        }
914
915        // Look for <cell> element
916        let cell = Self::parse_table_cell(reader)?;
917
918        Ok(TableColumn { sort, cell })
919    }
920
921    /// Parse table <cell>
922    fn parse_table_cell<R: BufRead>(reader: &mut Reader<R>) -> Result<TableCell> {
923        let mut buf = Vec::new();
924        let mut colspan = None;
925        let mut rowspan = None;
926        let mut content = Vec::new();
927
928        loop {
929            match reader.read_event_into(&mut buf) {
930                Ok(Event::Start(e)) if e.name().as_ref() == b"cell" => {
931                    // Parse attributes
932                    for attr in e.attributes() {
933                        let attr = attr?;
934                        let key = attr.key.as_ref();
935                        let value = String::from_utf8_lossy(&attr.value).to_string();
936
937                        match key {
938                            b"colspan" => {
939                                colspan = Some(value.parse().map_err(|_| {
940                                    CmlError::InvalidAttribute(
941                                        "colspan must be a number".to_string(),
942                                    )
943                                })?)
944                            }
945                            b"rowspan" => {
946                                rowspan = Some(value.parse().map_err(|_| {
947                                    CmlError::InvalidAttribute(
948                                        "rowspan must be a number".to_string(),
949                                    )
950                                })?)
951                            }
952                            _ => {}
953                        }
954                    }
955
956                    // Parse inline content
957                    content = Self::parse_inline_content(reader, b"cell")?;
958                    break;
959                }
960                Ok(Event::Eof) => {
961                    return Err(CmlError::InvalidStructure(
962                        "Expected <cell> in column".to_string(),
963                    ))
964                }
965                Ok(_) => {}
966                Err(e) => return Err(e.into()),
967            }
968            buf.clear();
969        }
970
971        Ok(TableCell {
972            colspan,
973            rowspan,
974            content,
975        })
976    }
977
978    /// Parse <code> element
979    fn parse_code<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Code> {
980        let mut id = None;
981        let mut lang = None;
982        let mut copyable = None;
983
984        for attr in start.attributes() {
985            let attr = attr?;
986            let key = attr.key.as_ref();
987            let value = String::from_utf8_lossy(&attr.value).to_string();
988
989            match key {
990                b"id" => id = Some(value),
991                b"lang" => lang = Some(value),
992                b"copyable" => {
993                    copyable = Some(match value.as_str() {
994                        "true" => true,
995                        "false" => false,
996                        _ => {
997                            return Err(CmlError::InvalidAttribute(
998                                "copyable must be 'true' or 'false'".to_string(),
999                            ))
1000                        }
1001                    })
1002                }
1003                _ => {}
1004            }
1005        }
1006
1007        let content = Self::read_text(reader, "code")?;
1008
1009        Ok(Code {
1010            id,
1011            lang,
1012            copyable,
1013            content,
1014        })
1015    }
1016
1017    /// Parse <break> element
1018    fn parse_break<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Break> {
1019        let mut break_type = None;
1020
1021        for attr in start.attributes() {
1022            let attr = attr?;
1023            if attr.key.as_ref() == b"type" {
1024                break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1025            }
1026        }
1027
1028        // Read until end tag (should be immediate)
1029        let mut buf = Vec::new();
1030        loop {
1031            match reader.read_event_into(&mut buf) {
1032                Ok(Event::End(e)) if e.name().as_ref() == b"break" => {
1033                    break;
1034                }
1035                Ok(Event::Eof) => {
1036                    return Err(CmlError::InvalidStructure(
1037                        "Unexpected EOF in <break>".to_string(),
1038                    ))
1039                }
1040                Ok(_) => {}
1041                Err(e) => return Err(e.into()),
1042            }
1043            buf.clear();
1044        }
1045
1046        Ok(Break { break_type })
1047    }
1048
1049    /// Parse self-closing <break/> element
1050    fn parse_break_empty(start: BytesStart) -> Result<Break> {
1051        let mut break_type = None;
1052
1053        for attr in start.attributes() {
1054            let attr = attr?;
1055            if attr.key.as_ref() == b"type" {
1056                break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1057            }
1058        }
1059
1060        Ok(Break { break_type })
1061    }
1062
1063    /// Parse <figure> element (reserved for v0.3)
1064    fn parse_figure<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Figure> {
1065        let mut id = None;
1066        let mut figure_type = None;
1067        let mut reference = None;
1068
1069        for attr in start.attributes() {
1070            let attr = attr?;
1071            let key = attr.key.as_ref();
1072            let value = String::from_utf8_lossy(&attr.value).to_string();
1073
1074            match key {
1075                b"id" => id = Some(value),
1076                b"type" => figure_type = Some(value),
1077                b"ref" => reference = Some(value),
1078                _ => {}
1079            }
1080        }
1081
1082        // Skip content (reserved for v0.3)
1083        let mut buf = Vec::new();
1084        loop {
1085            match reader.read_event_into(&mut buf) {
1086                Ok(Event::End(e)) if e.name().as_ref() == b"figure" => {
1087                    break;
1088                }
1089                Ok(Event::Eof) => {
1090                    return Err(CmlError::InvalidStructure(
1091                        "Unexpected EOF in <figure>".to_string(),
1092                    ))
1093                }
1094                Ok(_) => {}
1095                Err(e) => return Err(e.into()),
1096            }
1097            buf.clear();
1098        }
1099
1100        // TODO: Emit warning that <figure> is reserved for v0.3
1101
1102        Ok(Figure {
1103            id,
1104            figure_type,
1105            reference,
1106        })
1107    }
1108
1109    /// Parse inline content until end tag
1110    fn parse_inline_content<R: BufRead>(
1111        reader: &mut Reader<R>,
1112        end_tag: &[u8],
1113    ) -> Result<Vec<InlineElement>> {
1114        let mut elements = Vec::new();
1115        let mut buf = Vec::new();
1116
1117        loop {
1118            match reader.read_event_into(&mut buf) {
1119                Ok(Event::Start(e)) => {
1120                    if let Some(inline) = Self::parse_inline_element(reader, e)? {
1121                        elements.push(inline);
1122                    }
1123                }
1124                Ok(Event::Empty(e)) => {
1125                    // Handle self-closing inline elements like <end/>
1126                    if e.name().as_ref() == b"end" {
1127                        elements.push(InlineElement::End(Self::parse_end_empty(e)?));
1128                    }
1129                }
1130                Ok(Event::Text(e)) => {
1131                    let text = e.unescape().unwrap().to_string();
1132                    if !text.trim().is_empty() {
1133                        elements.push(InlineElement::Text(text));
1134                    }
1135                }
1136                Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
1137                    break;
1138                }
1139                Ok(Event::Eof) => {
1140                    return Err(CmlError::InvalidStructure(format!(
1141                        "Unexpected EOF waiting for end tag: {}",
1142                        String::from_utf8_lossy(end_tag)
1143                    )))
1144                }
1145                Ok(_) => {}
1146                Err(e) => return Err(e.into()),
1147            }
1148            buf.clear();
1149        }
1150
1151        Ok(elements)
1152    }
1153
1154    /// Parse an inline element
1155    fn parse_inline_element<R: BufRead>(
1156        reader: &mut Reader<R>,
1157        start: BytesStart,
1158    ) -> Result<Option<InlineElement>> {
1159        let element = match start.name().as_ref() {
1160            b"em" => InlineElement::Em(Self::parse_em(reader, start)?),
1161            b"bo" => InlineElement::Bo(Self::parse_bo(reader)?),
1162            b"un" => InlineElement::Un(Self::parse_un(reader)?),
1163            b"st" => InlineElement::St(Self::parse_st(reader)?),
1164            b"snip" => InlineElement::Snip(Self::parse_snip(reader, start)?),
1165            b"key" => InlineElement::Key(Self::parse_key(reader)?),
1166            b"rf" => InlineElement::Rf(Self::parse_rf(reader, start)?),
1167            b"tg" => InlineElement::Tg(Self::parse_tg(reader, start)?),
1168            b"lk" => InlineElement::Lk(Self::parse_lk(reader, start)?),
1169            b"curr" => InlineElement::Curr(Self::parse_curr(reader, start)?),
1170            b"end" => InlineElement::End(Self::parse_end(reader, start)?),
1171            _ => return Ok(None),
1172        };
1173
1174        Ok(Some(element))
1175    }
1176
1177    /// Parse <em> element
1178    fn parse_em<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Em> {
1179        let mut em_type = None;
1180
1181        for attr in start.attributes() {
1182            let attr = attr?;
1183            if attr.key.as_ref() == b"type" {
1184                let value = String::from_utf8_lossy(&attr.value).to_string();
1185                em_type = Some(match value.as_str() {
1186                    "stress" => EmphasisType::Stress,
1187                    "contrast" => EmphasisType::Contrast,
1188                    _ => {
1189                        return Err(CmlError::InvalidAttribute(
1190                            "em type must be 'stress' or 'contrast'".to_string(),
1191                        ))
1192                    }
1193                });
1194            }
1195        }
1196
1197        let content = Self::parse_inline_content(reader, b"em")?;
1198
1199        Ok(Em { em_type, content })
1200    }
1201
1202    /// Parse <bo> element
1203    fn parse_bo<R: BufRead>(reader: &mut Reader<R>) -> Result<Bo> {
1204        let content = Self::parse_inline_content(reader, b"bo")?;
1205        Ok(Bo { content })
1206    }
1207
1208    /// Parse <un> element
1209    fn parse_un<R: BufRead>(reader: &mut Reader<R>) -> Result<Un> {
1210        let content = Self::parse_inline_content(reader, b"un")?;
1211        Ok(Un { content })
1212    }
1213
1214    /// Parse <st> element
1215    fn parse_st<R: BufRead>(reader: &mut Reader<R>) -> Result<St> {
1216        let content = Self::parse_inline_content(reader, b"st")?;
1217        Ok(St { content })
1218    }
1219
1220    /// Parse <snip> element (text only, no nesting)
1221    fn parse_snip<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Snip> {
1222        let mut char = None;
1223
1224        for attr in start.attributes() {
1225            let attr = attr?;
1226            if attr.key.as_ref() == b"char" {
1227                char = Some(String::from_utf8_lossy(&attr.value).to_string());
1228            }
1229        }
1230
1231        let content = Self::read_text(reader, "snip")?;
1232
1233        Ok(Snip { char, content })
1234    }
1235
1236    /// Parse <key> element (text only, no nesting)
1237    fn parse_key<R: BufRead>(reader: &mut Reader<R>) -> Result<Key> {
1238        let content = Self::read_text(reader, "key")?;
1239        Ok(Key { content })
1240    }
1241
1242    /// Parse <rf> element (internal reference)
1243    fn parse_rf<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Rf> {
1244        let mut reference = None;
1245        let mut role = None;
1246        let mut title = None;
1247
1248        for attr in start.attributes() {
1249            let attr = attr?;
1250            let key = attr.key.as_ref();
1251            let value = String::from_utf8_lossy(&attr.value).to_string();
1252
1253            match key {
1254                b"ref" => reference = Some(value),
1255                b"role" => role = Some(value),
1256                b"title" => title = Some(value),
1257                _ => {}
1258            }
1259        }
1260
1261        let reference =
1262            reference.ok_or_else(|| CmlError::MissingAttribute("ref required on <rf>".to_string()))?;
1263
1264        let content = Self::read_text(reader, "rf")?;
1265
1266        Ok(Rf {
1267            reference,
1268            role,
1269            title,
1270            content,
1271        })
1272    }
1273
1274    /// Parse <tg> element (topic tag)
1275    fn parse_tg<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Tg> {
1276        let mut reference = None;
1277        let mut role = None;
1278        let mut title = None;
1279
1280        for attr in start.attributes() {
1281            let attr = attr?;
1282            let key = attr.key.as_ref();
1283            let value = String::from_utf8_lossy(&attr.value).to_string();
1284
1285            match key {
1286                b"ref" => reference = Some(value),
1287                b"role" => role = Some(value),
1288                b"title" => title = Some(value),
1289                _ => {}
1290            }
1291        }
1292
1293        let reference =
1294            reference.ok_or_else(|| CmlError::MissingAttribute("ref required on <tg>".to_string()))?;
1295
1296        let content = Self::read_text(reader, "tg")?;
1297
1298        Ok(Tg {
1299            reference,
1300            role,
1301            title,
1302            content,
1303        })
1304    }
1305
1306    /// Parse <lk> element (external link)
1307    fn parse_lk<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Lk> {
1308        let mut reference = None;
1309        let mut role = None;
1310        let mut title = None;
1311
1312        for attr in start.attributes() {
1313            let attr = attr?;
1314            let key = attr.key.as_ref();
1315            let value = String::from_utf8_lossy(&attr.value).to_string();
1316
1317            match key {
1318                b"ref" => reference = Some(value),
1319                b"role" => role = Some(value),
1320                b"title" => title = Some(value),
1321                _ => {}
1322            }
1323        }
1324
1325        let reference =
1326            reference.ok_or_else(|| CmlError::MissingAttribute("ref required on <lk>".to_string()))?;
1327
1328        let content = Self::read_text(reader, "lk")?;
1329
1330        Ok(Lk {
1331            reference,
1332            role,
1333            title,
1334            content,
1335        })
1336    }
1337
1338    /// Parse <curr> element (currency)
1339    fn parse_curr<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Curr> {
1340        let mut currency_type = None;
1341        let mut format = None;
1342
1343        for attr in start.attributes() {
1344            let attr = attr?;
1345            let key = attr.key.as_ref();
1346            let value = String::from_utf8_lossy(&attr.value).to_string();
1347
1348            match key {
1349                b"type" => currency_type = Some(value),
1350                b"format" => {
1351                    format = Some(match value.as_str() {
1352                        "symbol" => CurrencyFormat::Symbol,
1353                        "code" => CurrencyFormat::Code,
1354                        "name" => CurrencyFormat::Name,
1355                        _ => {
1356                            return Err(CmlError::InvalidAttribute(
1357                                "currency format must be 'symbol', 'code', or 'name'".to_string(),
1358                            ))
1359                        }
1360                    })
1361                }
1362                _ => {}
1363            }
1364        }
1365
1366        let currency_type = currency_type
1367            .ok_or_else(|| CmlError::MissingAttribute("type required on <curr>".to_string()))?;
1368
1369        let value = Self::read_text(reader, "curr")?;
1370
1371        Ok(Curr {
1372            currency_type,
1373            format,
1374            value,
1375        })
1376    }
1377
1378    /// Parse <end> element
1379    fn parse_end<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<End> {
1380        let mut kind = None;
1381
1382        for attr in start.attributes() {
1383            let attr = attr?;
1384            if attr.key.as_ref() == b"kind" {
1385                let value = String::from_utf8_lossy(&attr.value).to_string();
1386                kind = Some(match value.as_str() {
1387                    "line" => EndKind::Line,
1388                    "verse" => EndKind::Verse,
1389                    "item" => EndKind::Item,
1390                    _ => {
1391                        return Err(CmlError::InvalidAttribute(
1392                            "end kind must be 'line', 'verse', or 'item'".to_string(),
1393                        ))
1394                    }
1395                });
1396            }
1397        }
1398
1399        // Read until end tag (should be immediate for self-closing)
1400        let mut buf = Vec::new();
1401        loop {
1402            match reader.read_event_into(&mut buf) {
1403                Ok(Event::End(e)) if e.name().as_ref() == b"end" => {
1404                    break;
1405                }
1406                Ok(Event::Eof) => {
1407                    return Err(CmlError::InvalidStructure(
1408                        "Unexpected EOF in <end>".to_string(),
1409                    ))
1410                }
1411                Ok(_) => {}
1412                Err(e) => return Err(e.into()),
1413            }
1414            buf.clear();
1415        }
1416
1417        Ok(End { kind })
1418    }
1419
1420    /// Parse self-closing <end/> element
1421    fn parse_end_empty(start: BytesStart) -> Result<End> {
1422        let mut kind = None;
1423
1424        for attr in start.attributes() {
1425            let attr = attr?;
1426            if attr.key.as_ref() == b"kind" {
1427                let value = String::from_utf8_lossy(&attr.value).to_string();
1428                kind = Some(match value.as_str() {
1429                    "line" => EndKind::Line,
1430                    "verse" => EndKind::Verse,
1431                    "item" => EndKind::Item,
1432                    _ => {
1433                        return Err(CmlError::InvalidAttribute(
1434                            "end kind must be 'line', 'verse', or 'item'".to_string(),
1435                        ))
1436                    }
1437                });
1438            }
1439        }
1440
1441        Ok(End { kind })
1442    }
1443
1444    /// Parse <footer> element
1445    fn parse_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<Footer> {
1446        let mut signatures = None;
1447        let mut citations = None;
1448        let mut annotations = None;
1449
1450        let mut buf = Vec::new();
1451
1452        loop {
1453            match reader.read_event_into(&mut buf) {
1454                Ok(Event::Start(e)) => match e.name().as_ref() {
1455                    b"signatures" => {
1456                        signatures = Some(Self::parse_signatures(reader)?);
1457                    }
1458                    b"citations" => {
1459                        citations = Some(Self::parse_citations(reader)?);
1460                    }
1461                    b"annotations" => {
1462                        annotations = Some(Self::parse_annotations(reader)?);
1463                    }
1464                    _ => {}
1465                },
1466                Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
1467                    break;
1468                }
1469                Ok(Event::Eof) => {
1470                    return Err(CmlError::InvalidStructure(
1471                        "Unexpected EOF in <footer>".to_string(),
1472                    ))
1473                }
1474                Ok(_) => {}
1475                Err(e) => return Err(e.into()),
1476            }
1477            buf.clear();
1478        }
1479
1480        Ok(Footer {
1481            signatures,
1482            citations,
1483            annotations,
1484        })
1485    }
1486
1487    /// Parse <signatures> container
1488    fn parse_signatures<R: BufRead>(reader: &mut Reader<R>) -> Result<Signatures> {
1489        let mut signatures = Vec::new();
1490        let mut buf = Vec::new();
1491
1492        loop {
1493            match reader.read_event_into(&mut buf) {
1494                Ok(Event::Start(e)) if e.name().as_ref() == b"signature" => {
1495                    signatures.push(Self::parse_signature(reader, e)?);
1496                }
1497                Ok(Event::End(e)) if e.name().as_ref() == b"signatures" => {
1498                    break;
1499                }
1500                Ok(Event::Eof) => {
1501                    return Err(CmlError::InvalidStructure(
1502                        "Unexpected EOF in <signatures>".to_string(),
1503                    ))
1504                }
1505                Ok(_) => {}
1506                Err(e) => return Err(e.into()),
1507            }
1508            buf.clear();
1509        }
1510
1511        Ok(Signatures { signatures })
1512    }
1513
1514    /// Parse <signature> element
1515    fn parse_signature<R: BufRead>(
1516        reader: &mut Reader<R>,
1517        start: BytesStart,
1518    ) -> Result<Signature> {
1519        let mut when = None;
1520        let mut role = None;
1521        let mut reference = None;
1522
1523        for attr in start.attributes() {
1524            let attr = attr?;
1525            let key = attr.key.as_ref();
1526            let value = String::from_utf8_lossy(&attr.value).to_string();
1527
1528            match key {
1529                b"when" => when = Some(value),
1530                b"role" => role = Some(value),
1531                b"ref" => reference = Some(value),
1532                _ => {}
1533            }
1534        }
1535
1536        let when = when.ok_or_else(|| {
1537            CmlError::MissingAttribute("when required on <signature>".to_string())
1538        })?;
1539
1540        let content = Self::read_text(reader, "signature")?;
1541
1542        Ok(Signature {
1543            when,
1544            role,
1545            reference,
1546            content,
1547        })
1548    }
1549
1550    /// Parse <citations> container
1551    fn parse_citations<R: BufRead>(reader: &mut Reader<R>) -> Result<Citations> {
1552        let mut citations = Vec::new();
1553        let mut buf = Vec::new();
1554
1555        loop {
1556            match reader.read_event_into(&mut buf) {
1557                Ok(Event::Start(e)) if e.name().as_ref() == b"citation" => {
1558                    citations.push(Self::parse_citation(reader, e)?);
1559                }
1560                Ok(Event::End(e)) if e.name().as_ref() == b"citations" => {
1561                    break;
1562                }
1563                Ok(Event::Eof) => {
1564                    return Err(CmlError::InvalidStructure(
1565                        "Unexpected EOF in <citations>".to_string(),
1566                    ))
1567                }
1568                Ok(_) => {}
1569                Err(e) => return Err(e.into()),
1570            }
1571            buf.clear();
1572        }
1573
1574        Ok(Citations { citations })
1575    }
1576
1577    /// Parse <citation> element
1578    fn parse_citation<R: BufRead>(
1579        reader: &mut Reader<R>,
1580        start: BytesStart,
1581    ) -> Result<Citation> {
1582        let mut reference = None;
1583        let mut citation_type = None;
1584
1585        for attr in start.attributes() {
1586            let attr = attr?;
1587            let key = attr.key.as_ref();
1588            let value = String::from_utf8_lossy(&attr.value).to_string();
1589
1590            match key {
1591                b"ref" => reference = Some(value),
1592                b"type" => citation_type = Some(value),
1593                _ => {}
1594            }
1595        }
1596
1597        let reference = reference.ok_or_else(|| {
1598            CmlError::MissingAttribute("ref required on <citation>".to_string())
1599        })?;
1600
1601        let content = Self::parse_inline_content(reader, b"citation")?;
1602
1603        Ok(Citation {
1604            reference,
1605            citation_type,
1606            content,
1607        })
1608    }
1609
1610    /// Parse <annotations> container
1611    fn parse_annotations<R: BufRead>(reader: &mut Reader<R>) -> Result<Annotations> {
1612        let mut notes = Vec::new();
1613        let mut buf = Vec::new();
1614
1615        loop {
1616            match reader.read_event_into(&mut buf) {
1617                Ok(Event::Start(e)) if e.name().as_ref() == b"note" => {
1618                    notes.push(Self::parse_note(reader, e)?);
1619                }
1620                Ok(Event::End(e)) if e.name().as_ref() == b"annotations" => {
1621                    break;
1622                }
1623                Ok(Event::Eof) => {
1624                    return Err(CmlError::InvalidStructure(
1625                        "Unexpected EOF in <annotations>".to_string(),
1626                    ))
1627                }
1628                Ok(_) => {}
1629                Err(e) => return Err(e.into()),
1630            }
1631            buf.clear();
1632        }
1633
1634        Ok(Annotations { notes })
1635    }
1636
1637    /// Parse <note> element
1638    fn parse_note<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Note> {
1639        let mut id = None;
1640        let mut note_type = None;
1641        let mut reference = None;
1642
1643        for attr in start.attributes() {
1644            let attr = attr?;
1645            let key = attr.key.as_ref();
1646            let value = String::from_utf8_lossy(&attr.value).to_string();
1647
1648            match key {
1649                b"id" => id = Some(value),
1650                b"type" => note_type = Some(value),
1651                b"ref" => reference = Some(value),
1652                _ => {}
1653            }
1654        }
1655
1656        // Try parsing as inline first, if we find block elements, switch to block mode
1657        let content = Self::parse_note_content(reader)?;
1658
1659        Ok(Note {
1660            id,
1661            note_type,
1662            reference,
1663            content,
1664        })
1665    }
1666
1667    /// Parse note content (inline or block)
1668    fn parse_note_content<R: BufRead>(reader: &mut Reader<R>) -> Result<NoteContent> {
1669        // Similar to list item content parsing
1670        let mut blocks = Vec::new();
1671        let mut inlines = Vec::new();
1672        let mut has_blocks = false;
1673        let mut buf = Vec::new();
1674
1675        loop {
1676            match reader.read_event_into(&mut buf) {
1677                Ok(Event::Start(e)) => {
1678                    let name = e.name();
1679                    if Self::is_block_element(name.as_ref()) {
1680                        has_blocks = true;
1681                        if let Some(block) = Self::parse_block_element(reader, e)? {
1682                            blocks.push(block);
1683                        }
1684                    } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
1685                        inlines.push(inline);
1686                    }
1687                }
1688                Ok(Event::Text(e)) => {
1689                    let text = e.unescape().unwrap().to_string().trim().to_string();
1690                    if !text.is_empty() {
1691                        inlines.push(InlineElement::Text(text));
1692                    }
1693                }
1694                Ok(Event::End(e)) if e.name().as_ref() == b"note" => {
1695                    break;
1696                }
1697                Ok(Event::Eof) => {
1698                    return Err(CmlError::InvalidStructure(
1699                        "Unexpected EOF in <note>".to_string(),
1700                    ))
1701                }
1702                Ok(_) => {}
1703                Err(e) => return Err(e.into()),
1704            }
1705            buf.clear();
1706        }
1707
1708        if has_blocks {
1709            Ok(NoteContent::Block(blocks))
1710        } else {
1711            Ok(NoteContent::Inline(inlines))
1712        }
1713    }
1714
1715    /// Read plain text content from an element
1716    fn read_text<R: BufRead>(reader: &mut Reader<R>, tag_name: &str) -> Result<String> {
1717        let mut buf = Vec::new();
1718        let mut text = String::new();
1719
1720        loop {
1721            match reader.read_event_into(&mut buf) {
1722                Ok(Event::Text(e)) => {
1723                    text.push_str(&e.unescape().unwrap());
1724                }
1725                Ok(Event::End(e)) if e.name().as_ref() == tag_name.as_bytes() => {
1726                    break;
1727                }
1728                Ok(Event::Eof) => {
1729                    return Err(CmlError::InvalidStructure(format!(
1730                        "Unexpected EOF in <{}>",
1731                        tag_name
1732                    )))
1733                }
1734                Ok(_) => {}
1735                Err(e) => return Err(e.into()),
1736            }
1737            buf.clear();
1738        }
1739
1740        Ok(text)
1741    }
1742}
1743
1744#[cfg(test)]
1745mod tests {
1746    use super::*;
1747
1748    #[test]
1749    fn test_parse_minimal_document() {
1750        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1751<cml profile="core" version="0.2" encoding="utf-8">
1752  <header>
1753    <title>Test Document</title>
1754  </header>
1755  <body>
1756    <paragraph>Hello, world!</paragraph>
1757  </body>
1758  <footer>
1759  </footer>
1760</cml>"#;
1761
1762        let doc = CmlParser::parse_str(xml).unwrap();
1763
1764        assert_eq!(doc.version, "0.2");
1765        assert_eq!(doc.profile, "core");
1766        assert_eq!(doc.header.title, "Test Document");
1767        assert_eq!(doc.body.blocks.len(), 1);
1768    }
1769
1770    #[test]
1771    fn test_parse_header_with_metadata() {
1772        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1773<cml profile="core" version="0.2" encoding="utf-8">
1774  <header>
1775    <title>Test</title>
1776    <author role="editor">John Doe</author>
1777    <date type="created" when="2025-12-22"/>
1778    <identifier scheme="doi">10.1234/test</identifier>
1779    <meta name="status" value="draft"/>
1780  </header>
1781  <body>
1782    <paragraph>Content</paragraph>
1783  </body>
1784  <footer>
1785  </footer>
1786</cml>"#;
1787
1788        let doc = CmlParser::parse_str(xml).unwrap();
1789
1790        assert_eq!(doc.header.authors.len(), 1);
1791        assert_eq!(doc.header.authors[0].name, "John Doe");
1792        assert_eq!(doc.header.dates.len(), 1);
1793        assert_eq!(doc.header.identifiers.len(), 1);
1794        assert_eq!(doc.header.meta.len(), 1);
1795    }
1796
1797    #[test]
1798    fn test_parse_inline_elements() {
1799        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1800<cml profile="core" version="0.2" encoding="utf-8">
1801  <header>
1802    <title>Test</title>
1803  </header>
1804  <body>
1805    <paragraph>This is <em>emphasized</em> and <bo>bold</bo> text.</paragraph>
1806  </body>
1807  <footer>
1808  </footer>
1809</cml>"#;
1810
1811        let doc = CmlParser::parse_str(xml).unwrap();
1812
1813        if let BlockElement::Paragraph(para) = &doc.body.blocks[0] {
1814            assert_eq!(para.content.len(), 5); // text, em, text, bo, text
1815        } else {
1816            panic!("Expected paragraph");
1817        }
1818    }
1819
1820    #[test]
1821    fn test_parse_list() {
1822        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1823<cml profile="core" version="0.2" encoding="utf-8">
1824  <header>
1825    <title>Test</title>
1826  </header>
1827  <body>
1828    <list type="ordered" style="numeric">
1829      <item>First</item>
1830      <item>Second</item>
1831    </list>
1832  </body>
1833  <footer>
1834  </footer>
1835</cml>"#;
1836
1837        let doc = CmlParser::parse_str(xml).unwrap();
1838
1839        if let BlockElement::List(list) = &doc.body.blocks[0] {
1840            assert_eq!(list.items.len(), 2);
1841            assert!(matches!(list.list_type, Some(ListType::Ordered)));
1842        } else {
1843            panic!("Expected list");
1844        }
1845    }
1846
1847    #[test]
1848    fn test_parse_footer_with_signature() {
1849        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1850<cml profile="core" version="0.2" encoding="utf-8">
1851  <header>
1852    <title>Test</title>
1853  </header>
1854  <body>
1855    <paragraph>Content</paragraph>
1856  </body>
1857  <footer>
1858    <signatures>
1859      <signature when="2025-12-22T10:30:00Z" role="author">Jane Doe</signature>
1860    </signatures>
1861  </footer>
1862</cml>"#;
1863
1864        let doc = CmlParser::parse_str(xml).unwrap();
1865
1866        assert!(doc.footer.signatures.is_some());
1867        let sigs = doc.footer.signatures.unwrap();
1868        assert_eq!(sigs.signatures.len(), 1);
1869        assert_eq!(sigs.signatures[0].content, "Jane Doe");
1870    }
1871}