Skip to main content

cml_rs/
parser.rs

1//! CML v0.2 XML Parser
2//!
3//! Parses CML v0.2 XML documents into strongly-typed structures
4
5use crate::types::*;
6use crate::{CmlError, Result};
7use quick_xml::events::{BytesStart, Event};
8use quick_xml::Reader;
9use std::io::BufRead;
10
11/// Parser for CML v0.2 documents
12pub struct CmlParser;
13
14impl CmlParser {
15    /// Parse CML v0.2 XML from string
16    pub fn parse_str(xml: &str) -> Result<CmlDocument> {
17        let reader = Reader::from_str(xml);
18        Self::parse(reader)
19    }
20
21    /// Parse CML v0.2 XML from reader
22    pub fn parse<R: BufRead>(mut reader: Reader<R>) -> Result<CmlDocument> {
23        let mut buf = Vec::new();
24
25        loop {
26            match reader.read_event_into(&mut buf) {
27                Ok(Event::Start(e)) if e.name().as_ref() == b"cml" => {
28                    return Self::parse_cml(&mut reader, e);
29                }
30                Ok(Event::Eof) => {
31                    return Err(CmlError::InvalidStructure(
32                        "No <cml> root element found".to_string(),
33                    ))
34                }
35                Ok(_) => {}
36                Err(e) => return Err(e.into()),
37            }
38            buf.clear();
39        }
40    }
41
42    /// Parse <cml> root element
43    fn parse_cml<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<CmlDocument> {
44        let mut version = None;
45        let mut encoding = None;
46        let mut profile = None;
47        let mut id = None;
48
49        // Parse attributes
50        for attr in start.attributes() {
51            let attr = attr?;
52            let key = attr.key.as_ref();
53            let value = String::from_utf8_lossy(&attr.value).to_string();
54
55            match key {
56                b"version" => version = Some(value),
57                b"encoding" => encoding = Some(value),
58                b"profile" => profile = Some(value),
59                b"id" => id = Some(value),
60                _ => {}
61            }
62        }
63
64        let version = version
65            .ok_or_else(|| CmlError::MissingAttribute("version required on <cml>".to_string()))?;
66
67        let encoding = encoding
68            .ok_or_else(|| CmlError::MissingAttribute("encoding required on <cml>".to_string()))?;
69
70        let profile = profile
71            .ok_or_else(|| CmlError::MissingAttribute("profile required on <cml>".to_string()))?;
72
73        let mut header = None;
74        let mut body = None;
75        let mut footer = None;
76
77        let mut buf = Vec::new();
78
79        loop {
80            match reader.read_event_into(&mut buf) {
81                Ok(Event::Start(e)) => match e.name().as_ref() {
82                    b"header" => {
83                        header = Some(Self::parse_header(reader)?);
84                    }
85                    b"body" => {
86                        body = Some(Self::parse_body(reader)?);
87                    }
88                    b"footer" => {
89                        footer = Some(Self::parse_footer(reader)?);
90                    }
91                    _ => {}
92                },
93                Ok(Event::End(e)) if e.name().as_ref() == b"cml" => {
94                    break;
95                }
96                Ok(Event::Eof) => {
97                    return Err(CmlError::InvalidStructure(
98                        "Unexpected EOF in <cml>".to_string(),
99                    ))
100                }
101                Ok(_) => {}
102                Err(e) => return Err(e.into()),
103            }
104            buf.clear();
105        }
106
107        let header =
108            header.ok_or_else(|| CmlError::InvalidStructure("<header> required".to_string()))?;
109
110        let body = body.ok_or_else(|| CmlError::InvalidStructure("<body> required".to_string()))?;
111
112        let footer =
113            footer.ok_or_else(|| CmlError::InvalidStructure("<footer> required".to_string()))?;
114
115        Ok(CmlDocument {
116            version,
117            encoding,
118            profile,
119            id,
120            header,
121            body,
122            footer,
123        })
124    }
125
126    /// Parse <header> element
127    fn parse_header<R: BufRead>(reader: &mut Reader<R>) -> Result<Header> {
128        let mut title = None;
129        let mut authors = Vec::new();
130        let mut dates = Vec::new();
131        let mut identifiers = Vec::new();
132        let mut version = None;
133        let mut description = None;
134        let mut provenance = None;
135        let mut source = None;
136        let mut meta = Vec::new();
137
138        let mut buf = Vec::new();
139
140        loop {
141            match reader.read_event_into(&mut buf) {
142                Ok(Event::Start(e)) => match e.name().as_ref() {
143                    b"title" => {
144                        title = Some(Self::read_text(reader, "title")?);
145                    }
146                    b"author" => {
147                        authors.push(Self::parse_author(reader, e)?);
148                    }
149                    b"identifier" => {
150                        identifiers.push(Self::parse_identifier(reader, e)?);
151                    }
152                    b"version" => {
153                        version = Some(Self::read_text(reader, "version")?);
154                    }
155                    b"description" => {
156                        description = Some(Self::read_text(reader, "description")?);
157                    }
158                    b"provenance" => {
159                        provenance = Some(Self::read_text(reader, "provenance")?);
160                    }
161                    b"source" => {
162                        source = Some(Self::read_text(reader, "source")?);
163                    }
164                    _ => {}
165                },
166                Ok(Event::Empty(e)) => match e.name().as_ref() {
167                    b"date" => {
168                        dates.push(Self::parse_date_entry(e)?);
169                    }
170                    b"meta" => {
171                        meta.push(Self::parse_meta_entry(e)?);
172                    }
173                    _ => {}
174                },
175                Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
176                    break;
177                }
178                Ok(Event::Eof) => {
179                    return Err(CmlError::InvalidStructure(
180                        "Unexpected EOF in <header>".to_string(),
181                    ))
182                }
183                Ok(_) => {}
184                Err(e) => return Err(e.into()),
185            }
186            buf.clear();
187        }
188
189        let title = title
190            .ok_or_else(|| CmlError::InvalidStructure("<title> required in header".to_string()))?;
191
192        Ok(Header {
193            title,
194            authors,
195            dates,
196            identifiers,
197            version,
198            description,
199            provenance,
200            source,
201            meta,
202        })
203    }
204
205    /// Parse <author> element
206    fn parse_author<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Author> {
207        let mut role = None;
208        let mut reference = None;
209
210        // Parse attributes
211        for attr in start.attributes() {
212            let attr = attr?;
213            let key = attr.key.as_ref();
214            let value = String::from_utf8_lossy(&attr.value).to_string();
215
216            match key {
217                b"role" => role = Some(value),
218                b"reference" => reference = Some(value),
219                _ => {}
220            }
221        }
222
223        let name = Self::read_text(reader, "author")?;
224
225        Ok(Author {
226            name,
227            role,
228            reference,
229        })
230    }
231
232    /// Parse <date> element (self-closing)
233    fn parse_date_entry(start: BytesStart) -> Result<DateEntry> {
234        let mut date_type = None;
235        let mut when = None;
236
237        for attr in start.attributes() {
238            let attr = attr?;
239            let key = attr.key.as_ref();
240            let value = String::from_utf8_lossy(&attr.value).to_string();
241
242            match key {
243                b"type" => date_type = Some(value),
244                b"when" => when = Some(value),
245                _ => {}
246            }
247        }
248
249        let date_type = date_type
250            .ok_or_else(|| CmlError::MissingAttribute("type required on <date>".to_string()))?;
251
252        let when =
253            when.ok_or_else(|| CmlError::MissingAttribute("when required on <date>".to_string()))?;
254
255        Ok(DateEntry { date_type, when })
256    }
257
258    /// Parse <identifier> element
259    fn parse_identifier<R: BufRead>(
260        reader: &mut Reader<R>,
261        start: BytesStart,
262    ) -> Result<Identifier> {
263        let mut scheme = None;
264
265        for attr in start.attributes() {
266            let attr = attr?;
267            let key = attr.key.as_ref();
268            let value = String::from_utf8_lossy(&attr.value).to_string();
269
270            if key == b"scheme" {
271                scheme = Some(value);
272            }
273        }
274
275        let scheme = scheme.ok_or_else(|| {
276            CmlError::MissingAttribute("scheme required on <identifier>".to_string())
277        })?;
278
279        let value = Self::read_text(reader, "identifier")?;
280
281        Ok(Identifier { scheme, value })
282    }
283
284    /// Parse <meta> element (self-closing)
285    fn parse_meta_entry(start: BytesStart) -> Result<MetaEntry> {
286        let mut name = None;
287        let mut value = None;
288
289        for attr in start.attributes() {
290            let attr = attr?;
291            let key = attr.key.as_ref();
292            let attr_value = String::from_utf8_lossy(&attr.value).to_string();
293
294            match key {
295                b"name" => name = Some(attr_value),
296                b"value" => value = Some(attr_value),
297                _ => {}
298            }
299        }
300
301        let name =
302            name.ok_or_else(|| CmlError::MissingAttribute("name required on <meta>".to_string()))?;
303
304        let value = value
305            .ok_or_else(|| CmlError::MissingAttribute("value required on <meta>".to_string()))?;
306
307        Ok(MetaEntry { name, value })
308    }
309
310    /// Parse <body> element
311    fn parse_body<R: BufRead>(reader: &mut Reader<R>) -> Result<Body> {
312        let blocks = Self::parse_blocks_until(reader, b"body")?;
313
314        if blocks.is_empty() {
315            return Err(CmlError::InvalidStructure(
316                "<body> must contain at least one block element".to_string(),
317            ));
318        }
319
320        Ok(Body { blocks })
321    }
322
323    /// Parse block elements until end tag
324    fn parse_blocks_until<R: BufRead>(
325        reader: &mut Reader<R>,
326        end_tag: &[u8],
327    ) -> Result<Vec<BlockElement>> {
328        let mut blocks = Vec::new();
329        let mut buf = Vec::new();
330
331        loop {
332            match reader.read_event_into(&mut buf) {
333                Ok(Event::Start(e)) => {
334                    if let Some(block) = Self::parse_block_element(reader, e)? {
335                        blocks.push(block);
336                    }
337                }
338                Ok(Event::Empty(e)) => {
339                    // Handle self-closing block elements like <break/>
340                    if e.name().as_ref() == b"break" {
341                        blocks.push(BlockElement::Break(Self::parse_break_empty(e)?));
342                    }
343                }
344                Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
345                    break;
346                }
347                Ok(Event::Eof) => {
348                    return Err(CmlError::InvalidStructure(format!(
349                        "Unexpected EOF waiting for end tag: {}",
350                        String::from_utf8_lossy(end_tag)
351                    )))
352                }
353                Ok(_) => {}
354                Err(e) => return Err(e.into()),
355            }
356            buf.clear();
357        }
358
359        Ok(blocks)
360    }
361
362    /// Parse a block element
363    fn parse_block_element<R: BufRead>(
364        reader: &mut Reader<R>,
365        start: BytesStart,
366    ) -> Result<Option<BlockElement>> {
367        let element = match start.name().as_ref() {
368            b"section" => BlockElement::Section(Self::parse_section(reader, start)?),
369            b"paragraph" => BlockElement::Paragraph(Self::parse_paragraph(reader, start)?),
370            b"heading" => BlockElement::Heading(Self::parse_heading(reader, start)?),
371            b"aside" => BlockElement::Aside(Self::parse_aside(reader, start)?),
372            b"quote" => BlockElement::Quote(Self::parse_quote(reader, start)?),
373            b"list" => BlockElement::List(Self::parse_list(reader, start)?),
374            b"table" => BlockElement::Table(Self::parse_table(reader, start)?),
375            b"code" => BlockElement::Code(Self::parse_code(reader, start)?),
376            b"break" => BlockElement::Break(Self::parse_break(reader, start)?),
377            b"figure" => BlockElement::Figure(Self::parse_figure(reader, start)?),
378            _ => return Ok(None),
379        };
380
381        Ok(Some(element))
382    }
383
384    /// Parse <section> element
385    fn parse_section<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Section> {
386        let mut id = None;
387        let mut section_type = None;
388        let mut reference = None;
389
390        for attr in start.attributes() {
391            let attr = attr?;
392            let key = attr.key.as_ref();
393            let value = String::from_utf8_lossy(&attr.value).to_string();
394
395            match key {
396                b"id" => id = Some(value),
397                b"type" => section_type = Some(value),
398                b"ref" => reference = Some(value),
399                _ => {}
400            }
401        }
402
403        let content = Self::parse_blocks_until(reader, b"section")?;
404
405        Ok(Section {
406            id,
407            section_type,
408            reference,
409            content,
410        })
411    }
412
413    /// Parse <paragraph> element
414    fn parse_paragraph<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Paragraph> {
415        let mut id = None;
416        let mut paragraph_type = None;
417
418        for attr in start.attributes() {
419            let attr = attr?;
420            let key = attr.key.as_ref();
421            let value = String::from_utf8_lossy(&attr.value).to_string();
422
423            match key {
424                b"id" => id = Some(value),
425                b"type" => paragraph_type = Some(value),
426                _ => {}
427            }
428        }
429
430        let content = Self::parse_inline_content(reader, b"paragraph")?;
431
432        Ok(Paragraph {
433            id,
434            paragraph_type,
435            content,
436        })
437    }
438
439    /// Parse <heading> element
440    fn parse_heading<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Heading> {
441        let mut id = None;
442        let mut heading_type = None;
443        let mut size = None;
444
445        for attr in start.attributes() {
446            let attr = attr?;
447            let key = attr.key.as_ref();
448            let value = String::from_utf8_lossy(&attr.value).to_string();
449
450            match key {
451                b"id" => id = Some(value),
452                b"type" => heading_type = Some(value),
453                b"size" => {
454                    size = Some(value.parse().map_err(|_| {
455                        CmlError::InvalidAttribute("size must be a number".to_string())
456                    })?)
457                }
458                _ => {}
459            }
460        }
461
462        let size = size
463            .ok_or_else(|| CmlError::MissingAttribute("size required on <heading>".to_string()))?;
464
465        let content = Self::parse_inline_content(reader, b"heading")?;
466
467        Ok(Heading {
468            id,
469            heading_type,
470            size,
471            content,
472        })
473    }
474
475    /// Parse <aside> element
476    fn parse_aside<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Aside> {
477        let mut id = None;
478        let mut aside_type = None;
479        let mut side = None;
480
481        for attr in start.attributes() {
482            let attr = attr?;
483            let key = attr.key.as_ref();
484            let value = String::from_utf8_lossy(&attr.value).to_string();
485
486            match key {
487                b"id" => id = Some(value),
488                b"type" => aside_type = Some(value),
489                b"side" => {
490                    side = Some(match value.as_str() {
491                        "left" => Side::Left,
492                        "right" => Side::Right,
493                        _ => {
494                            return Err(CmlError::InvalidAttribute(
495                                "side must be 'left' or 'right'".to_string(),
496                            ))
497                        }
498                    })
499                }
500                _ => {}
501            }
502        }
503
504        let side =
505            side.ok_or_else(|| CmlError::MissingAttribute("side required on <aside>".to_string()))?;
506
507        let content = Self::parse_blocks_until(reader, b"aside")?;
508
509        Ok(Aside {
510            id,
511            aside_type,
512            side,
513            content,
514        })
515    }
516
517    /// Parse <quote> element
518    fn parse_quote<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Quote> {
519        let mut id = None;
520        let mut reference = None;
521        let mut source = None;
522
523        for attr in start.attributes() {
524            let attr = attr?;
525            let key = attr.key.as_ref();
526            let value = String::from_utf8_lossy(&attr.value).to_string();
527
528            match key {
529                b"id" => id = Some(value),
530                b"ref" => reference = Some(value),
531                b"source" => source = Some(value),
532                _ => {}
533            }
534        }
535
536        let content = Self::parse_blocks_until(reader, b"quote")?;
537
538        Ok(Quote {
539            id,
540            reference,
541            source,
542            content,
543        })
544    }
545
546    /// Parse <list> element
547    fn parse_list<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<List> {
548        let mut id = None;
549        let mut list_type = None;
550        let mut style = None;
551
552        for attr in start.attributes() {
553            let attr = attr?;
554            let key = attr.key.as_ref();
555            let value = String::from_utf8_lossy(&attr.value).to_string();
556
557            match key {
558                b"id" => id = Some(value),
559                b"type" => {
560                    list_type = Some(match value.as_str() {
561                        "ordered" => ListType::Ordered,
562                        "unordered" => ListType::Unordered,
563                        _ => {
564                            return Err(CmlError::InvalidAttribute(
565                                "list type must be 'ordered' or 'unordered'".to_string(),
566                            ))
567                        }
568                    })
569                }
570                b"style" => {
571                    style = Some(match value.as_str() {
572                        "numeric" => ListStyle::Numeric,
573                        "roman" => ListStyle::Roman,
574                        "alpha" => ListStyle::Alpha,
575                        "symbolic" => ListStyle::Symbolic,
576                        _ => {
577                            return Err(CmlError::InvalidAttribute(
578                                "invalid list style".to_string(),
579                            ))
580                        }
581                    })
582                }
583                _ => {}
584            }
585        }
586
587        let mut items = Vec::new();
588        let mut buf = Vec::new();
589
590        loop {
591            match reader.read_event_into(&mut buf) {
592                Ok(Event::Start(e)) if e.name().as_ref() == b"item" => {
593                    items.push(Self::parse_list_item(reader, e)?);
594                }
595                Ok(Event::End(e)) if e.name().as_ref() == b"list" => {
596                    break;
597                }
598                Ok(Event::Eof) => {
599                    return Err(CmlError::InvalidStructure(
600                        "Unexpected EOF in <list>".to_string(),
601                    ))
602                }
603                Ok(_) => {}
604                Err(e) => return Err(e.into()),
605            }
606            buf.clear();
607        }
608
609        if items.is_empty() {
610            return Err(CmlError::InvalidStructure(
611                "<list> must contain at least one <item>".to_string(),
612            ));
613        }
614
615        Ok(List {
616            id,
617            list_type,
618            style,
619            items,
620        })
621    }
622
623    /// Parse <item> element
624    fn parse_list_item<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<ListItem> {
625        let mut id = None;
626
627        for attr in start.attributes() {
628            let attr = attr?;
629            if attr.key.as_ref() == b"id" {
630                id = Some(String::from_utf8_lossy(&attr.value).to_string());
631            }
632        }
633
634        // Try to parse as inline first, if we find a block element, switch to block mode
635        let content = Self::parse_list_item_content(reader)?;
636
637        Ok(ListItem { id, content })
638    }
639
640    /// Parse list item content (inline or block)
641    fn parse_list_item_content<R: BufRead>(reader: &mut Reader<R>) -> Result<ListItemContent> {
642        let mut blocks = Vec::new();
643        let mut inlines = Vec::new();
644        let mut has_blocks = false;
645        let mut buf = Vec::new();
646
647        loop {
648            match reader.read_event_into(&mut buf) {
649                Ok(Event::Start(e)) => {
650                    let name = e.name();
651                    // Check if this is a block element
652                    if Self::is_block_element(name.as_ref()) {
653                        has_blocks = true;
654                        if let Some(block) = Self::parse_block_element(reader, e)? {
655                            blocks.push(block);
656                        }
657                    } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
658                        inlines.push(inline);
659                    }
660                }
661                Ok(Event::Text(e)) => {
662                    let text = e.unescape().unwrap().to_string().trim().to_string();
663                    if !text.is_empty() {
664                        inlines.push(InlineElement::Text(text));
665                    }
666                }
667                Ok(Event::End(e)) if e.name().as_ref() == b"item" => {
668                    break;
669                }
670                Ok(Event::Eof) => {
671                    return Err(CmlError::InvalidStructure(
672                        "Unexpected EOF in <item>".to_string(),
673                    ))
674                }
675                Ok(_) => {}
676                Err(e) => return Err(e.into()),
677            }
678            buf.clear();
679        }
680
681        if has_blocks {
682            Ok(ListItemContent::Block(blocks))
683        } else {
684            Ok(ListItemContent::Inline(inlines))
685        }
686    }
687
688    /// Check if element name is a block element
689    fn is_block_element(name: &[u8]) -> bool {
690        matches!(
691            name,
692            b"section"
693                | b"paragraph"
694                | b"heading"
695                | b"aside"
696                | b"quote"
697                | b"list"
698                | b"table"
699                | b"code"
700                | b"break"
701                | b"figure"
702        )
703    }
704
705    /// Parse <table> element
706    fn parse_table<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Table> {
707        let mut id = None;
708        let mut table_type = None;
709
710        for attr in start.attributes() {
711            let attr = attr?;
712            let key = attr.key.as_ref();
713            let value = String::from_utf8_lossy(&attr.value).to_string();
714
715            match key {
716                b"id" => id = Some(value),
717                b"type" => table_type = Some(value),
718                _ => {}
719            }
720        }
721
722        let mut header = None;
723        let mut body = None;
724        let mut footer = None;
725        let mut buf = Vec::new();
726
727        loop {
728            match reader.read_event_into(&mut buf) {
729                Ok(Event::Start(e)) => match e.name().as_ref() {
730                    b"header" => {
731                        header = Some(Self::parse_table_header(reader)?);
732                    }
733                    b"body" => {
734                        body = Some(Self::parse_table_body(reader)?);
735                    }
736                    b"footer" => {
737                        footer = Some(Self::parse_table_footer(reader)?);
738                    }
739                    _ => {}
740                },
741                Ok(Event::End(e)) if e.name().as_ref() == b"table" => {
742                    break;
743                }
744                Ok(Event::Eof) => {
745                    return Err(CmlError::InvalidStructure(
746                        "Unexpected EOF in <table>".to_string(),
747                    ))
748                }
749                Ok(_) => {}
750                Err(e) => return Err(e.into()),
751            }
752            buf.clear();
753        }
754
755        let body =
756            body.ok_or_else(|| CmlError::InvalidStructure("<body> required in table".to_string()))?;
757
758        Ok(Table {
759            id,
760            table_type,
761            header,
762            body,
763            footer,
764        })
765    }
766
767    /// Parse table <header>
768    fn parse_table_header<R: BufRead>(reader: &mut Reader<R>) -> Result<TableHeader> {
769        let mut rows = Vec::new();
770        let mut buf = Vec::new();
771
772        loop {
773            match reader.read_event_into(&mut buf) {
774                Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
775                    rows.push(Self::parse_table_row(reader, true)?);
776                }
777                Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
778                    break;
779                }
780                Ok(Event::Eof) => {
781                    return Err(CmlError::InvalidStructure(
782                        "Unexpected EOF in table <header>".to_string(),
783                    ))
784                }
785                Ok(_) => {}
786                Err(e) => return Err(e.into()),
787            }
788            buf.clear();
789        }
790
791        Ok(TableHeader { rows })
792    }
793
794    /// Parse table <body>
795    fn parse_table_body<R: BufRead>(reader: &mut Reader<R>) -> Result<TableBody> {
796        let mut rows = Vec::new();
797        let mut buf = Vec::new();
798
799        loop {
800            match reader.read_event_into(&mut buf) {
801                Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
802                    rows.push(Self::parse_table_row(reader, false)?);
803                }
804                Ok(Event::End(e)) if e.name().as_ref() == b"body" => {
805                    break;
806                }
807                Ok(Event::Eof) => {
808                    return Err(CmlError::InvalidStructure(
809                        "Unexpected EOF in table <body>".to_string(),
810                    ))
811                }
812                Ok(_) => {}
813                Err(e) => return Err(e.into()),
814            }
815            buf.clear();
816        }
817
818        Ok(TableBody { rows })
819    }
820
821    /// Parse table <footer>
822    fn parse_table_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<TableFooter> {
823        let mut caption = None;
824        let mut buf = Vec::new();
825
826        loop {
827            match reader.read_event_into(&mut buf) {
828                Ok(Event::Start(e)) if e.name().as_ref() == b"caption" => {
829                    let content = Self::parse_inline_content(reader, b"caption")?;
830                    caption = Some(Caption { content });
831                }
832                Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
833                    break;
834                }
835                Ok(Event::Eof) => {
836                    return Err(CmlError::InvalidStructure(
837                        "Unexpected EOF in table <footer>".to_string(),
838                    ))
839                }
840                Ok(_) => {}
841                Err(e) => return Err(e.into()),
842            }
843            buf.clear();
844        }
845
846        let caption = caption.ok_or_else(|| {
847            CmlError::InvalidStructure("<caption> required in table footer".to_string())
848        })?;
849
850        Ok(TableFooter { caption })
851    }
852
853    /// Parse table <row>
854    fn parse_table_row<R: BufRead>(reader: &mut Reader<R>, is_header: bool) -> Result<TableRow> {
855        let mut columns = Vec::new();
856        let mut buf = Vec::new();
857
858        loop {
859            match reader.read_event_into(&mut buf) {
860                Ok(Event::Start(e)) if e.name().as_ref() == b"column" => {
861                    columns.push(Self::parse_table_column(reader, e, is_header)?);
862                }
863                Ok(Event::End(e)) if e.name().as_ref() == b"row" => {
864                    break;
865                }
866                Ok(Event::Eof) => {
867                    return Err(CmlError::InvalidStructure(
868                        "Unexpected EOF in table <row>".to_string(),
869                    ))
870                }
871                Ok(_) => {}
872                Err(e) => return Err(e.into()),
873            }
874            buf.clear();
875        }
876
877        Ok(TableRow { columns })
878    }
879
880    /// Parse table <column>
881    fn parse_table_column<R: BufRead>(
882        reader: &mut Reader<R>,
883        start: BytesStart,
884        is_header: bool,
885    ) -> Result<TableColumn> {
886        let mut sort = None;
887
888        if is_header {
889            for attr in start.attributes() {
890                let attr = attr?;
891                if attr.key.as_ref() == b"sort" {
892                    let value = String::from_utf8_lossy(&attr.value).to_string();
893                    sort = Some(match value.as_str() {
894                        "asc" => SortOrder::Asc,
895                        "desc" => SortOrder::Desc,
896                        _ => {
897                            return Err(CmlError::InvalidAttribute(
898                                "sort must be 'asc' or 'desc'".to_string(),
899                            ))
900                        }
901                    });
902                }
903            }
904        }
905
906        // Look for <cell> element
907        let cell = Self::parse_table_cell(reader)?;
908
909        Ok(TableColumn { sort, cell })
910    }
911
912    /// Parse table <cell>
913    fn parse_table_cell<R: BufRead>(reader: &mut Reader<R>) -> Result<TableCell> {
914        let mut buf = Vec::new();
915        let mut colspan = None;
916        let mut rowspan = None;
917        let mut content = Vec::new();
918
919        loop {
920            match reader.read_event_into(&mut buf) {
921                Ok(Event::Start(e)) if e.name().as_ref() == b"cell" => {
922                    // Parse attributes
923                    for attr in e.attributes() {
924                        let attr = attr?;
925                        let key = attr.key.as_ref();
926                        let value = String::from_utf8_lossy(&attr.value).to_string();
927
928                        match key {
929                            b"colspan" => {
930                                colspan = Some(value.parse().map_err(|_| {
931                                    CmlError::InvalidAttribute(
932                                        "colspan must be a number".to_string(),
933                                    )
934                                })?)
935                            }
936                            b"rowspan" => {
937                                rowspan = Some(value.parse().map_err(|_| {
938                                    CmlError::InvalidAttribute(
939                                        "rowspan must be a number".to_string(),
940                                    )
941                                })?)
942                            }
943                            _ => {}
944                        }
945                    }
946
947                    // Parse inline content
948                    content = Self::parse_inline_content(reader, b"cell")?;
949                    break;
950                }
951                Ok(Event::Eof) => {
952                    return Err(CmlError::InvalidStructure(
953                        "Expected <cell> in column".to_string(),
954                    ))
955                }
956                Ok(_) => {}
957                Err(e) => return Err(e.into()),
958            }
959            buf.clear();
960        }
961
962        Ok(TableCell {
963            colspan,
964            rowspan,
965            content,
966        })
967    }
968
969    /// Parse <code> element
970    fn parse_code<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Code> {
971        let mut id = None;
972        let mut lang = None;
973        let mut copyable = None;
974
975        for attr in start.attributes() {
976            let attr = attr?;
977            let key = attr.key.as_ref();
978            let value = String::from_utf8_lossy(&attr.value).to_string();
979
980            match key {
981                b"id" => id = Some(value),
982                b"lang" => lang = Some(value),
983                b"copyable" => {
984                    copyable = Some(match value.as_str() {
985                        "true" => true,
986                        "false" => false,
987                        _ => {
988                            return Err(CmlError::InvalidAttribute(
989                                "copyable must be 'true' or 'false'".to_string(),
990                            ))
991                        }
992                    })
993                }
994                _ => {}
995            }
996        }
997
998        let content = Self::read_text(reader, "code")?;
999
1000        Ok(Code {
1001            id,
1002            lang,
1003            copyable,
1004            content,
1005        })
1006    }
1007
1008    /// Parse <break> element
1009    fn parse_break<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Break> {
1010        let mut break_type = None;
1011
1012        for attr in start.attributes() {
1013            let attr = attr?;
1014            if attr.key.as_ref() == b"type" {
1015                break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1016            }
1017        }
1018
1019        // Read until end tag (should be immediate)
1020        let mut buf = Vec::new();
1021        loop {
1022            match reader.read_event_into(&mut buf) {
1023                Ok(Event::End(e)) if e.name().as_ref() == b"break" => {
1024                    break;
1025                }
1026                Ok(Event::Eof) => {
1027                    return Err(CmlError::InvalidStructure(
1028                        "Unexpected EOF in <break>".to_string(),
1029                    ))
1030                }
1031                Ok(_) => {}
1032                Err(e) => return Err(e.into()),
1033            }
1034            buf.clear();
1035        }
1036
1037        Ok(Break { break_type })
1038    }
1039
1040    /// Parse self-closing <break/> element
1041    fn parse_break_empty(start: BytesStart) -> Result<Break> {
1042        let mut break_type = None;
1043
1044        for attr in start.attributes() {
1045            let attr = attr?;
1046            if attr.key.as_ref() == b"type" {
1047                break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1048            }
1049        }
1050
1051        Ok(Break { break_type })
1052    }
1053
1054    /// Parse <figure> element (reserved for v0.3)
1055    fn parse_figure<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Figure> {
1056        let mut id = None;
1057        let mut figure_type = None;
1058        let mut reference = None;
1059
1060        for attr in start.attributes() {
1061            let attr = attr?;
1062            let key = attr.key.as_ref();
1063            let value = String::from_utf8_lossy(&attr.value).to_string();
1064
1065            match key {
1066                b"id" => id = Some(value),
1067                b"type" => figure_type = Some(value),
1068                b"ref" => reference = Some(value),
1069                _ => {}
1070            }
1071        }
1072
1073        // Skip content (reserved for v0.3)
1074        let mut buf = Vec::new();
1075        loop {
1076            match reader.read_event_into(&mut buf) {
1077                Ok(Event::End(e)) if e.name().as_ref() == b"figure" => {
1078                    break;
1079                }
1080                Ok(Event::Eof) => {
1081                    return Err(CmlError::InvalidStructure(
1082                        "Unexpected EOF in <figure>".to_string(),
1083                    ))
1084                }
1085                Ok(_) => {}
1086                Err(e) => return Err(e.into()),
1087            }
1088            buf.clear();
1089        }
1090
1091        // TODO: Emit warning that <figure> is reserved for v0.3
1092
1093        Ok(Figure {
1094            id,
1095            figure_type,
1096            reference,
1097        })
1098    }
1099
1100    /// Parse inline content until end tag
1101    fn parse_inline_content<R: BufRead>(
1102        reader: &mut Reader<R>,
1103        end_tag: &[u8],
1104    ) -> Result<Vec<InlineElement>> {
1105        let mut elements = Vec::new();
1106        let mut buf = Vec::new();
1107
1108        loop {
1109            match reader.read_event_into(&mut buf) {
1110                Ok(Event::Start(e)) => {
1111                    if let Some(inline) = Self::parse_inline_element(reader, e)? {
1112                        elements.push(inline);
1113                    }
1114                }
1115                Ok(Event::Empty(e)) => {
1116                    // Handle self-closing inline elements like <end/>
1117                    if e.name().as_ref() == b"end" {
1118                        elements.push(InlineElement::End(Self::parse_end_empty(e)?));
1119                    }
1120                }
1121                Ok(Event::Text(e)) => {
1122                    let text = e.unescape().unwrap().to_string();
1123                    if !text.trim().is_empty() {
1124                        elements.push(InlineElement::Text(text));
1125                    }
1126                }
1127                Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
1128                    break;
1129                }
1130                Ok(Event::Eof) => {
1131                    return Err(CmlError::InvalidStructure(format!(
1132                        "Unexpected EOF waiting for end tag: {}",
1133                        String::from_utf8_lossy(end_tag)
1134                    )))
1135                }
1136                Ok(_) => {}
1137                Err(e) => return Err(e.into()),
1138            }
1139            buf.clear();
1140        }
1141
1142        Ok(elements)
1143    }
1144
1145    /// Parse an inline element
1146    fn parse_inline_element<R: BufRead>(
1147        reader: &mut Reader<R>,
1148        start: BytesStart,
1149    ) -> Result<Option<InlineElement>> {
1150        let element = match start.name().as_ref() {
1151            b"em" => InlineElement::Em(Self::parse_em(reader, start)?),
1152            b"bo" => InlineElement::Bo(Self::parse_bo(reader)?),
1153            b"un" => InlineElement::Un(Self::parse_un(reader)?),
1154            b"st" => InlineElement::St(Self::parse_st(reader)?),
1155            b"snip" => InlineElement::Snip(Self::parse_snip(reader, start)?),
1156            b"key" => InlineElement::Key(Self::parse_key(reader)?),
1157            b"rf" => InlineElement::Rf(Self::parse_rf(reader, start)?),
1158            b"tg" => InlineElement::Tg(Self::parse_tg(reader, start)?),
1159            b"lk" => InlineElement::Lk(Self::parse_lk(reader, start)?),
1160            b"curr" => InlineElement::Curr(Self::parse_curr(reader, start)?),
1161            b"end" => InlineElement::End(Self::parse_end(reader, start)?),
1162            _ => return Ok(None),
1163        };
1164
1165        Ok(Some(element))
1166    }
1167
1168    /// Parse <em> element
1169    fn parse_em<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Em> {
1170        let mut em_type = None;
1171
1172        for attr in start.attributes() {
1173            let attr = attr?;
1174            if attr.key.as_ref() == b"type" {
1175                let value = String::from_utf8_lossy(&attr.value).to_string();
1176                em_type = Some(match value.as_str() {
1177                    "stress" => EmphasisType::Stress,
1178                    "contrast" => EmphasisType::Contrast,
1179                    _ => {
1180                        return Err(CmlError::InvalidAttribute(
1181                            "em type must be 'stress' or 'contrast'".to_string(),
1182                        ))
1183                    }
1184                });
1185            }
1186        }
1187
1188        let content = Self::parse_inline_content(reader, b"em")?;
1189
1190        Ok(Em { em_type, content })
1191    }
1192
1193    /// Parse <bo> element
1194    fn parse_bo<R: BufRead>(reader: &mut Reader<R>) -> Result<Bo> {
1195        let content = Self::parse_inline_content(reader, b"bo")?;
1196        Ok(Bo { content })
1197    }
1198
1199    /// Parse <un> element
1200    fn parse_un<R: BufRead>(reader: &mut Reader<R>) -> Result<Un> {
1201        let content = Self::parse_inline_content(reader, b"un")?;
1202        Ok(Un { content })
1203    }
1204
1205    /// Parse <st> element
1206    fn parse_st<R: BufRead>(reader: &mut Reader<R>) -> Result<St> {
1207        let content = Self::parse_inline_content(reader, b"st")?;
1208        Ok(St { content })
1209    }
1210
1211    /// Parse <snip> element (text only, no nesting)
1212    fn parse_snip<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Snip> {
1213        let mut char = None;
1214
1215        for attr in start.attributes() {
1216            let attr = attr?;
1217            if attr.key.as_ref() == b"char" {
1218                char = Some(String::from_utf8_lossy(&attr.value).to_string());
1219            }
1220        }
1221
1222        let content = Self::read_text(reader, "snip")?;
1223
1224        Ok(Snip { char, content })
1225    }
1226
1227    /// Parse <key> element (text only, no nesting)
1228    fn parse_key<R: BufRead>(reader: &mut Reader<R>) -> Result<Key> {
1229        let content = Self::read_text(reader, "key")?;
1230        Ok(Key { content })
1231    }
1232
1233    /// Parse <rf> element (internal reference)
1234    fn parse_rf<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Rf> {
1235        let mut reference = None;
1236        let mut role = None;
1237        let mut title = None;
1238
1239        for attr in start.attributes() {
1240            let attr = attr?;
1241            let key = attr.key.as_ref();
1242            let value = String::from_utf8_lossy(&attr.value).to_string();
1243
1244            match key {
1245                b"ref" => reference = Some(value),
1246                b"role" => role = Some(value),
1247                b"title" => title = Some(value),
1248                _ => {}
1249            }
1250        }
1251
1252        let reference = reference
1253            .ok_or_else(|| CmlError::MissingAttribute("ref required on <rf>".to_string()))?;
1254
1255        let content = Self::read_text(reader, "rf")?;
1256
1257        Ok(Rf {
1258            reference,
1259            role,
1260            title,
1261            content,
1262        })
1263    }
1264
1265    /// Parse <tg> element (topic tag)
1266    fn parse_tg<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Tg> {
1267        let mut reference = None;
1268        let mut role = None;
1269        let mut title = None;
1270
1271        for attr in start.attributes() {
1272            let attr = attr?;
1273            let key = attr.key.as_ref();
1274            let value = String::from_utf8_lossy(&attr.value).to_string();
1275
1276            match key {
1277                b"ref" => reference = Some(value),
1278                b"role" => role = Some(value),
1279                b"title" => title = Some(value),
1280                _ => {}
1281            }
1282        }
1283
1284        let reference = reference
1285            .ok_or_else(|| CmlError::MissingAttribute("ref required on <tg>".to_string()))?;
1286
1287        let content = Self::read_text(reader, "tg")?;
1288
1289        Ok(Tg {
1290            reference,
1291            role,
1292            title,
1293            content,
1294        })
1295    }
1296
1297    /// Parse <lk> element (external link)
1298    fn parse_lk<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Lk> {
1299        let mut reference = None;
1300        let mut role = None;
1301        let mut title = None;
1302
1303        for attr in start.attributes() {
1304            let attr = attr?;
1305            let key = attr.key.as_ref();
1306            let value = String::from_utf8_lossy(&attr.value).to_string();
1307
1308            match key {
1309                b"ref" => reference = Some(value),
1310                b"role" => role = Some(value),
1311                b"title" => title = Some(value),
1312                _ => {}
1313            }
1314        }
1315
1316        let reference = reference
1317            .ok_or_else(|| CmlError::MissingAttribute("ref required on <lk>".to_string()))?;
1318
1319        let content = Self::read_text(reader, "lk")?;
1320
1321        Ok(Lk {
1322            reference,
1323            role,
1324            title,
1325            content,
1326        })
1327    }
1328
1329    /// Parse <curr> element (currency)
1330    fn parse_curr<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Curr> {
1331        let mut currency_type = None;
1332        let mut format = None;
1333
1334        for attr in start.attributes() {
1335            let attr = attr?;
1336            let key = attr.key.as_ref();
1337            let value = String::from_utf8_lossy(&attr.value).to_string();
1338
1339            match key {
1340                b"type" => currency_type = Some(value),
1341                b"format" => {
1342                    format = Some(match value.as_str() {
1343                        "symbol" => CurrencyFormat::Symbol,
1344                        "code" => CurrencyFormat::Code,
1345                        "name" => CurrencyFormat::Name,
1346                        _ => {
1347                            return Err(CmlError::InvalidAttribute(
1348                                "currency format must be 'symbol', 'code', or 'name'".to_string(),
1349                            ))
1350                        }
1351                    })
1352                }
1353                _ => {}
1354            }
1355        }
1356
1357        let currency_type = currency_type
1358            .ok_or_else(|| CmlError::MissingAttribute("type required on <curr>".to_string()))?;
1359
1360        let value = Self::read_text(reader, "curr")?;
1361
1362        Ok(Curr {
1363            currency_type,
1364            format,
1365            value,
1366        })
1367    }
1368
1369    /// Parse <end> element
1370    fn parse_end<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<End> {
1371        let mut kind = None;
1372
1373        for attr in start.attributes() {
1374            let attr = attr?;
1375            if attr.key.as_ref() == b"kind" {
1376                let value = String::from_utf8_lossy(&attr.value).to_string();
1377                kind = Some(match value.as_str() {
1378                    "line" => EndKind::Line,
1379                    "verse" => EndKind::Verse,
1380                    "item" => EndKind::Item,
1381                    _ => {
1382                        return Err(CmlError::InvalidAttribute(
1383                            "end kind must be 'line', 'verse', or 'item'".to_string(),
1384                        ))
1385                    }
1386                });
1387            }
1388        }
1389
1390        // Read until end tag (should be immediate for self-closing)
1391        let mut buf = Vec::new();
1392        loop {
1393            match reader.read_event_into(&mut buf) {
1394                Ok(Event::End(e)) if e.name().as_ref() == b"end" => {
1395                    break;
1396                }
1397                Ok(Event::Eof) => {
1398                    return Err(CmlError::InvalidStructure(
1399                        "Unexpected EOF in <end>".to_string(),
1400                    ))
1401                }
1402                Ok(_) => {}
1403                Err(e) => return Err(e.into()),
1404            }
1405            buf.clear();
1406        }
1407
1408        Ok(End { kind })
1409    }
1410
1411    /// Parse self-closing <end/> element
1412    fn parse_end_empty(start: BytesStart) -> Result<End> {
1413        let mut kind = None;
1414
1415        for attr in start.attributes() {
1416            let attr = attr?;
1417            if attr.key.as_ref() == b"kind" {
1418                let value = String::from_utf8_lossy(&attr.value).to_string();
1419                kind = Some(match value.as_str() {
1420                    "line" => EndKind::Line,
1421                    "verse" => EndKind::Verse,
1422                    "item" => EndKind::Item,
1423                    _ => {
1424                        return Err(CmlError::InvalidAttribute(
1425                            "end kind must be 'line', 'verse', or 'item'".to_string(),
1426                        ))
1427                    }
1428                });
1429            }
1430        }
1431
1432        Ok(End { kind })
1433    }
1434
1435    /// Parse <footer> element
1436    fn parse_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<Footer> {
1437        let mut signatures = None;
1438        let mut citations = None;
1439        let mut annotations = None;
1440
1441        let mut buf = Vec::new();
1442
1443        loop {
1444            match reader.read_event_into(&mut buf) {
1445                Ok(Event::Start(e)) => match e.name().as_ref() {
1446                    b"signatures" => {
1447                        signatures = Some(Self::parse_signatures(reader)?);
1448                    }
1449                    b"citations" => {
1450                        citations = Some(Self::parse_citations(reader)?);
1451                    }
1452                    b"annotations" => {
1453                        annotations = Some(Self::parse_annotations(reader)?);
1454                    }
1455                    _ => {}
1456                },
1457                Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
1458                    break;
1459                }
1460                Ok(Event::Eof) => {
1461                    return Err(CmlError::InvalidStructure(
1462                        "Unexpected EOF in <footer>".to_string(),
1463                    ))
1464                }
1465                Ok(_) => {}
1466                Err(e) => return Err(e.into()),
1467            }
1468            buf.clear();
1469        }
1470
1471        Ok(Footer {
1472            signatures,
1473            citations,
1474            annotations,
1475        })
1476    }
1477
1478    /// Parse <signatures> container
1479    fn parse_signatures<R: BufRead>(reader: &mut Reader<R>) -> Result<Signatures> {
1480        let mut signatures = Vec::new();
1481        let mut buf = Vec::new();
1482
1483        loop {
1484            match reader.read_event_into(&mut buf) {
1485                Ok(Event::Start(e)) if e.name().as_ref() == b"signature" => {
1486                    signatures.push(Self::parse_signature(reader, e)?);
1487                }
1488                Ok(Event::End(e)) if e.name().as_ref() == b"signatures" => {
1489                    break;
1490                }
1491                Ok(Event::Eof) => {
1492                    return Err(CmlError::InvalidStructure(
1493                        "Unexpected EOF in <signatures>".to_string(),
1494                    ))
1495                }
1496                Ok(_) => {}
1497                Err(e) => return Err(e.into()),
1498            }
1499            buf.clear();
1500        }
1501
1502        Ok(Signatures { signatures })
1503    }
1504
1505    /// Parse <signature> element
1506    fn parse_signature<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Signature> {
1507        let mut when = None;
1508        let mut role = None;
1509        let mut reference = None;
1510
1511        for attr in start.attributes() {
1512            let attr = attr?;
1513            let key = attr.key.as_ref();
1514            let value = String::from_utf8_lossy(&attr.value).to_string();
1515
1516            match key {
1517                b"when" => when = Some(value),
1518                b"role" => role = Some(value),
1519                b"ref" => reference = Some(value),
1520                _ => {}
1521            }
1522        }
1523
1524        let when = when.ok_or_else(|| {
1525            CmlError::MissingAttribute("when required on <signature>".to_string())
1526        })?;
1527
1528        let content = Self::read_text(reader, "signature")?;
1529
1530        Ok(Signature {
1531            when,
1532            role,
1533            reference,
1534            content,
1535        })
1536    }
1537
1538    /// Parse <citations> container
1539    fn parse_citations<R: BufRead>(reader: &mut Reader<R>) -> Result<Citations> {
1540        let mut citations = Vec::new();
1541        let mut buf = Vec::new();
1542
1543        loop {
1544            match reader.read_event_into(&mut buf) {
1545                Ok(Event::Start(e)) if e.name().as_ref() == b"citation" => {
1546                    citations.push(Self::parse_citation(reader, e)?);
1547                }
1548                Ok(Event::End(e)) if e.name().as_ref() == b"citations" => {
1549                    break;
1550                }
1551                Ok(Event::Eof) => {
1552                    return Err(CmlError::InvalidStructure(
1553                        "Unexpected EOF in <citations>".to_string(),
1554                    ))
1555                }
1556                Ok(_) => {}
1557                Err(e) => return Err(e.into()),
1558            }
1559            buf.clear();
1560        }
1561
1562        Ok(Citations { citations })
1563    }
1564
1565    /// Parse <citation> element
1566    fn parse_citation<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Citation> {
1567        let mut reference = None;
1568        let mut citation_type = None;
1569
1570        for attr in start.attributes() {
1571            let attr = attr?;
1572            let key = attr.key.as_ref();
1573            let value = String::from_utf8_lossy(&attr.value).to_string();
1574
1575            match key {
1576                b"ref" => reference = Some(value),
1577                b"type" => citation_type = Some(value),
1578                _ => {}
1579            }
1580        }
1581
1582        let reference = reference
1583            .ok_or_else(|| CmlError::MissingAttribute("ref required on <citation>".to_string()))?;
1584
1585        let content = Self::parse_inline_content(reader, b"citation")?;
1586
1587        Ok(Citation {
1588            reference,
1589            citation_type,
1590            content,
1591        })
1592    }
1593
1594    /// Parse <annotations> container
1595    fn parse_annotations<R: BufRead>(reader: &mut Reader<R>) -> Result<Annotations> {
1596        let mut notes = Vec::new();
1597        let mut buf = Vec::new();
1598
1599        loop {
1600            match reader.read_event_into(&mut buf) {
1601                Ok(Event::Start(e)) if e.name().as_ref() == b"note" => {
1602                    notes.push(Self::parse_note(reader, e)?);
1603                }
1604                Ok(Event::End(e)) if e.name().as_ref() == b"annotations" => {
1605                    break;
1606                }
1607                Ok(Event::Eof) => {
1608                    return Err(CmlError::InvalidStructure(
1609                        "Unexpected EOF in <annotations>".to_string(),
1610                    ))
1611                }
1612                Ok(_) => {}
1613                Err(e) => return Err(e.into()),
1614            }
1615            buf.clear();
1616        }
1617
1618        Ok(Annotations { notes })
1619    }
1620
1621    /// Parse <note> element
1622    fn parse_note<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Note> {
1623        let mut id = None;
1624        let mut note_type = None;
1625        let mut reference = None;
1626
1627        for attr in start.attributes() {
1628            let attr = attr?;
1629            let key = attr.key.as_ref();
1630            let value = String::from_utf8_lossy(&attr.value).to_string();
1631
1632            match key {
1633                b"id" => id = Some(value),
1634                b"type" => note_type = Some(value),
1635                b"ref" => reference = Some(value),
1636                _ => {}
1637            }
1638        }
1639
1640        // Try parsing as inline first, if we find block elements, switch to block mode
1641        let content = Self::parse_note_content(reader)?;
1642
1643        Ok(Note {
1644            id,
1645            note_type,
1646            reference,
1647            content,
1648        })
1649    }
1650
1651    /// Parse note content (inline or block)
1652    fn parse_note_content<R: BufRead>(reader: &mut Reader<R>) -> Result<NoteContent> {
1653        // Similar to list item content parsing
1654        let mut blocks = Vec::new();
1655        let mut inlines = Vec::new();
1656        let mut has_blocks = false;
1657        let mut buf = Vec::new();
1658
1659        loop {
1660            match reader.read_event_into(&mut buf) {
1661                Ok(Event::Start(e)) => {
1662                    let name = e.name();
1663                    if Self::is_block_element(name.as_ref()) {
1664                        has_blocks = true;
1665                        if let Some(block) = Self::parse_block_element(reader, e)? {
1666                            blocks.push(block);
1667                        }
1668                    } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
1669                        inlines.push(inline);
1670                    }
1671                }
1672                Ok(Event::Text(e)) => {
1673                    let text = e.unescape().unwrap().to_string().trim().to_string();
1674                    if !text.is_empty() {
1675                        inlines.push(InlineElement::Text(text));
1676                    }
1677                }
1678                Ok(Event::End(e)) if e.name().as_ref() == b"note" => {
1679                    break;
1680                }
1681                Ok(Event::Eof) => {
1682                    return Err(CmlError::InvalidStructure(
1683                        "Unexpected EOF in <note>".to_string(),
1684                    ))
1685                }
1686                Ok(_) => {}
1687                Err(e) => return Err(e.into()),
1688            }
1689            buf.clear();
1690        }
1691
1692        if has_blocks {
1693            Ok(NoteContent::Block(blocks))
1694        } else {
1695            Ok(NoteContent::Inline(inlines))
1696        }
1697    }
1698
1699    /// Read plain text content from an element
1700    fn read_text<R: BufRead>(reader: &mut Reader<R>, tag_name: &str) -> Result<String> {
1701        let mut buf = Vec::new();
1702        let mut text = String::new();
1703
1704        loop {
1705            match reader.read_event_into(&mut buf) {
1706                Ok(Event::Text(e)) => {
1707                    text.push_str(&e.unescape().unwrap());
1708                }
1709                Ok(Event::End(e)) if e.name().as_ref() == tag_name.as_bytes() => {
1710                    break;
1711                }
1712                Ok(Event::Eof) => {
1713                    return Err(CmlError::InvalidStructure(format!(
1714                        "Unexpected EOF in <{}>",
1715                        tag_name
1716                    )))
1717                }
1718                Ok(_) => {}
1719                Err(e) => return Err(e.into()),
1720            }
1721            buf.clear();
1722        }
1723
1724        Ok(text)
1725    }
1726}
1727
1728#[cfg(test)]
1729mod tests {
1730    use super::*;
1731
1732    #[test]
1733    fn test_parse_minimal_document() {
1734        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1735<cml profile="core" version="0.2" encoding="utf-8">
1736  <header>
1737    <title>Test Document</title>
1738  </header>
1739  <body>
1740    <paragraph>Hello, world!</paragraph>
1741  </body>
1742  <footer>
1743  </footer>
1744</cml>"#;
1745
1746        let doc = CmlParser::parse_str(xml).unwrap();
1747
1748        assert_eq!(doc.version, "0.2");
1749        assert_eq!(doc.profile, "core");
1750        assert_eq!(doc.header.title, "Test Document");
1751        assert_eq!(doc.body.blocks.len(), 1);
1752    }
1753
1754    #[test]
1755    fn test_parse_header_with_metadata() {
1756        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1757<cml profile="core" version="0.2" encoding="utf-8">
1758  <header>
1759    <title>Test</title>
1760    <author role="editor">John Doe</author>
1761    <date type="created" when="2025-12-22"/>
1762    <identifier scheme="doi">10.1234/test</identifier>
1763    <meta name="status" value="draft"/>
1764  </header>
1765  <body>
1766    <paragraph>Content</paragraph>
1767  </body>
1768  <footer>
1769  </footer>
1770</cml>"#;
1771
1772        let doc = CmlParser::parse_str(xml).unwrap();
1773
1774        assert_eq!(doc.header.authors.len(), 1);
1775        assert_eq!(doc.header.authors[0].name, "John Doe");
1776        assert_eq!(doc.header.dates.len(), 1);
1777        assert_eq!(doc.header.identifiers.len(), 1);
1778        assert_eq!(doc.header.meta.len(), 1);
1779    }
1780
1781    #[test]
1782    fn test_parse_inline_elements() {
1783        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1784<cml profile="core" version="0.2" encoding="utf-8">
1785  <header>
1786    <title>Test</title>
1787  </header>
1788  <body>
1789    <paragraph>This is <em>emphasized</em> and <bo>bold</bo> text.</paragraph>
1790  </body>
1791  <footer>
1792  </footer>
1793</cml>"#;
1794
1795        let doc = CmlParser::parse_str(xml).unwrap();
1796
1797        if let BlockElement::Paragraph(para) = &doc.body.blocks[0] {
1798            assert_eq!(para.content.len(), 5); // text, em, text, bo, text
1799        } else {
1800            panic!("Expected paragraph");
1801        }
1802    }
1803
1804    #[test]
1805    fn test_parse_list() {
1806        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1807<cml profile="core" version="0.2" encoding="utf-8">
1808  <header>
1809    <title>Test</title>
1810  </header>
1811  <body>
1812    <list type="ordered" style="numeric">
1813      <item>First</item>
1814      <item>Second</item>
1815    </list>
1816  </body>
1817  <footer>
1818  </footer>
1819</cml>"#;
1820
1821        let doc = CmlParser::parse_str(xml).unwrap();
1822
1823        if let BlockElement::List(list) = &doc.body.blocks[0] {
1824            assert_eq!(list.items.len(), 2);
1825            assert!(matches!(list.list_type, Some(ListType::Ordered)));
1826        } else {
1827            panic!("Expected list");
1828        }
1829    }
1830
1831    #[test]
1832    fn test_parse_footer_with_signature() {
1833        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1834<cml profile="core" version="0.2" encoding="utf-8">
1835  <header>
1836    <title>Test</title>
1837  </header>
1838  <body>
1839    <paragraph>Content</paragraph>
1840  </body>
1841  <footer>
1842    <signatures>
1843      <signature when="2025-12-22T10:30:00Z" role="author">Jane Doe</signature>
1844    </signatures>
1845  </footer>
1846</cml>"#;
1847
1848        let doc = CmlParser::parse_str(xml).unwrap();
1849
1850        assert!(doc.footer.signatures.is_some());
1851        let sigs = doc.footer.signatures.unwrap();
1852        assert_eq!(sigs.signatures.len(), 1);
1853        assert_eq!(sigs.signatures[0].content, "Jane Doe");
1854    }
1855}