1use crate::types::*;
6use crate::{CmlError, Result};
7use quick_xml::events::{BytesStart, Event};
8use quick_xml::Reader;
9use std::io::BufRead;
10
11pub struct CmlParser;
13
14impl CmlParser {
15 pub fn parse_str(xml: &str) -> Result<CmlDocument> {
17 let reader = Reader::from_str(xml);
18 Self::parse(reader)
19 }
20
21 pub fn parse<R: BufRead>(mut reader: Reader<R>) -> Result<CmlDocument> {
23 let mut buf = Vec::new();
24
25 loop {
26 match reader.read_event_into(&mut buf) {
27 Ok(Event::Start(e)) if e.name().as_ref() == b"cml" => {
28 return Self::parse_cml(&mut reader, e);
29 }
30 Ok(Event::Eof) => {
31 return Err(CmlError::InvalidStructure(
32 "No <cml> root element found".to_string(),
33 ))
34 }
35 Ok(_) => {}
36 Err(e) => return Err(e.into()),
37 }
38 buf.clear();
39 }
40 }
41
42 fn parse_cml<R: BufRead>(
44 reader: &mut Reader<R>,
45 start: BytesStart,
46 ) -> Result<CmlDocument> {
47 let mut version = None;
48 let mut encoding = None;
49 let mut profile = None;
50 let mut id = None;
51
52 for attr in start.attributes() {
54 let attr = attr?;
55 let key = attr.key.as_ref();
56 let value = String::from_utf8_lossy(&attr.value).to_string();
57
58 match key {
59 b"version" => version = Some(value),
60 b"encoding" => encoding = Some(value),
61 b"profile" => profile = Some(value),
62 b"id" => id = Some(value),
63 _ => {}
64 }
65 }
66
67 let version = version.ok_or_else(|| {
68 CmlError::MissingAttribute("version required on <cml>".to_string())
69 })?;
70
71 let encoding = encoding.ok_or_else(|| {
72 CmlError::MissingAttribute("encoding required on <cml>".to_string())
73 })?;
74
75 let profile = profile.ok_or_else(|| {
76 CmlError::MissingAttribute("profile required on <cml>".to_string())
77 })?;
78
79 let mut header = None;
80 let mut body = None;
81 let mut footer = None;
82
83 let mut buf = Vec::new();
84
85 loop {
86 match reader.read_event_into(&mut buf) {
87 Ok(Event::Start(e)) => match e.name().as_ref() {
88 b"header" => {
89 header = Some(Self::parse_header(reader)?);
90 }
91 b"body" => {
92 body = Some(Self::parse_body(reader)?);
93 }
94 b"footer" => {
95 footer = Some(Self::parse_footer(reader)?);
96 }
97 _ => {}
98 },
99 Ok(Event::End(e)) if e.name().as_ref() == b"cml" => {
100 break;
101 }
102 Ok(Event::Eof) => {
103 return Err(CmlError::InvalidStructure("Unexpected EOF in <cml>".to_string()))
104 }
105 Ok(_) => {}
106 Err(e) => return Err(e.into()),
107 }
108 buf.clear();
109 }
110
111 let header =
112 header.ok_or_else(|| CmlError::InvalidStructure("<header> required".to_string()))?;
113
114 let body =
115 body.ok_or_else(|| CmlError::InvalidStructure("<body> required".to_string()))?;
116
117 let footer =
118 footer.ok_or_else(|| CmlError::InvalidStructure("<footer> required".to_string()))?;
119
120 Ok(CmlDocument {
121 version,
122 encoding,
123 profile,
124 id,
125 header,
126 body,
127 footer,
128 })
129 }
130
131 fn parse_header<R: BufRead>(reader: &mut Reader<R>) -> Result<Header> {
133 let mut title = None;
134 let mut authors = Vec::new();
135 let mut dates = Vec::new();
136 let mut identifiers = Vec::new();
137 let mut version = None;
138 let mut description = None;
139 let mut provenance = None;
140 let mut source = None;
141 let mut meta = Vec::new();
142
143 let mut buf = Vec::new();
144
145 loop {
146 match reader.read_event_into(&mut buf) {
147 Ok(Event::Start(e)) => match e.name().as_ref() {
148 b"title" => {
149 title = Some(Self::read_text(reader, "title")?);
150 }
151 b"author" => {
152 authors.push(Self::parse_author(reader, e)?);
153 }
154 b"identifier" => {
155 identifiers.push(Self::parse_identifier(reader, e)?);
156 }
157 b"version" => {
158 version = Some(Self::read_text(reader, "version")?);
159 }
160 b"description" => {
161 description = Some(Self::read_text(reader, "description")?);
162 }
163 b"provenance" => {
164 provenance = Some(Self::read_text(reader, "provenance")?);
165 }
166 b"source" => {
167 source = Some(Self::read_text(reader, "source")?);
168 }
169 _ => {}
170 },
171 Ok(Event::Empty(e)) => match e.name().as_ref() {
172 b"date" => {
173 dates.push(Self::parse_date_entry(e)?);
174 }
175 b"meta" => {
176 meta.push(Self::parse_meta_entry(e)?);
177 }
178 _ => {}
179 },
180 Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
181 break;
182 }
183 Ok(Event::Eof) => {
184 return Err(CmlError::InvalidStructure(
185 "Unexpected EOF in <header>".to_string(),
186 ))
187 }
188 Ok(_) => {}
189 Err(e) => return Err(e.into()),
190 }
191 buf.clear();
192 }
193
194 let title =
195 title.ok_or_else(|| CmlError::InvalidStructure("<title> required in header".to_string()))?;
196
197 Ok(Header {
198 title,
199 authors,
200 dates,
201 identifiers,
202 version,
203 description,
204 provenance,
205 source,
206 meta,
207 })
208 }
209
210 fn parse_author<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Author> {
212 let mut role = None;
213 let mut reference = None;
214
215 for attr in start.attributes() {
217 let attr = attr?;
218 let key = attr.key.as_ref();
219 let value = String::from_utf8_lossy(&attr.value).to_string();
220
221 match key {
222 b"role" => role = Some(value),
223 b"reference" => reference = Some(value),
224 _ => {}
225 }
226 }
227
228 let name = Self::read_text(reader, "author")?;
229
230 Ok(Author {
231 name,
232 role,
233 reference,
234 })
235 }
236
237 fn parse_date_entry(start: BytesStart) -> Result<DateEntry> {
239 let mut date_type = None;
240 let mut when = None;
241
242 for attr in start.attributes() {
243 let attr = attr?;
244 let key = attr.key.as_ref();
245 let value = String::from_utf8_lossy(&attr.value).to_string();
246
247 match key {
248 b"type" => date_type = Some(value),
249 b"when" => when = Some(value),
250 _ => {}
251 }
252 }
253
254 let date_type = date_type
255 .ok_or_else(|| CmlError::MissingAttribute("type required on <date>".to_string()))?;
256
257 let when =
258 when.ok_or_else(|| CmlError::MissingAttribute("when required on <date>".to_string()))?;
259
260 Ok(DateEntry { date_type, when })
261 }
262
263 fn parse_identifier<R: BufRead>(
265 reader: &mut Reader<R>,
266 start: BytesStart,
267 ) -> Result<Identifier> {
268 let mut scheme = None;
269
270 for attr in start.attributes() {
271 let attr = attr?;
272 let key = attr.key.as_ref();
273 let value = String::from_utf8_lossy(&attr.value).to_string();
274
275 if key == b"scheme" {
276 scheme = Some(value);
277 }
278 }
279
280 let scheme = scheme.ok_or_else(|| {
281 CmlError::MissingAttribute("scheme required on <identifier>".to_string())
282 })?;
283
284 let value = Self::read_text(reader, "identifier")?;
285
286 Ok(Identifier { scheme, value })
287 }
288
289 fn parse_meta_entry(start: BytesStart) -> Result<MetaEntry> {
291 let mut name = None;
292 let mut value = None;
293
294 for attr in start.attributes() {
295 let attr = attr?;
296 let key = attr.key.as_ref();
297 let attr_value = String::from_utf8_lossy(&attr.value).to_string();
298
299 match key {
300 b"name" => name = Some(attr_value),
301 b"value" => value = Some(attr_value),
302 _ => {}
303 }
304 }
305
306 let name =
307 name.ok_or_else(|| CmlError::MissingAttribute("name required on <meta>".to_string()))?;
308
309 let value = value
310 .ok_or_else(|| CmlError::MissingAttribute("value required on <meta>".to_string()))?;
311
312 Ok(MetaEntry { name, value })
313 }
314
315 fn parse_body<R: BufRead>(reader: &mut Reader<R>) -> Result<Body> {
317 let blocks = Self::parse_blocks_until(reader, b"body")?;
318
319 if blocks.is_empty() {
320 return Err(CmlError::InvalidStructure(
321 "<body> must contain at least one block element".to_string(),
322 ));
323 }
324
325 Ok(Body { blocks })
326 }
327
328 fn parse_blocks_until<R: BufRead>(
330 reader: &mut Reader<R>,
331 end_tag: &[u8],
332 ) -> Result<Vec<BlockElement>> {
333 let mut blocks = Vec::new();
334 let mut buf = Vec::new();
335
336 loop {
337 match reader.read_event_into(&mut buf) {
338 Ok(Event::Start(e)) => {
339 if let Some(block) = Self::parse_block_element(reader, e)? {
340 blocks.push(block);
341 }
342 }
343 Ok(Event::Empty(e)) => {
344 if e.name().as_ref() == b"break" {
346 blocks.push(BlockElement::Break(Self::parse_break_empty(e)?));
347 }
348 }
349 Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
350 break;
351 }
352 Ok(Event::Eof) => {
353 return Err(CmlError::InvalidStructure(format!(
354 "Unexpected EOF waiting for end tag: {}",
355 String::from_utf8_lossy(end_tag)
356 )))
357 }
358 Ok(_) => {}
359 Err(e) => return Err(e.into()),
360 }
361 buf.clear();
362 }
363
364 Ok(blocks)
365 }
366
367 fn parse_block_element<R: BufRead>(
369 reader: &mut Reader<R>,
370 start: BytesStart,
371 ) -> Result<Option<BlockElement>> {
372 let element = match start.name().as_ref() {
373 b"section" => BlockElement::Section(Self::parse_section(reader, start)?),
374 b"paragraph" => BlockElement::Paragraph(Self::parse_paragraph(reader, start)?),
375 b"heading" => BlockElement::Heading(Self::parse_heading(reader, start)?),
376 b"aside" => BlockElement::Aside(Self::parse_aside(reader, start)?),
377 b"quote" => BlockElement::Quote(Self::parse_quote(reader, start)?),
378 b"list" => BlockElement::List(Self::parse_list(reader, start)?),
379 b"table" => BlockElement::Table(Self::parse_table(reader, start)?),
380 b"code" => BlockElement::Code(Self::parse_code(reader, start)?),
381 b"break" => BlockElement::Break(Self::parse_break(reader, start)?),
382 b"figure" => BlockElement::Figure(Self::parse_figure(reader, start)?),
383 _ => return Ok(None),
384 };
385
386 Ok(Some(element))
387 }
388
389 fn parse_section<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Section> {
391 let mut id = None;
392 let mut section_type = None;
393 let mut reference = None;
394
395 for attr in start.attributes() {
396 let attr = attr?;
397 let key = attr.key.as_ref();
398 let value = String::from_utf8_lossy(&attr.value).to_string();
399
400 match key {
401 b"id" => id = Some(value),
402 b"type" => section_type = Some(value),
403 b"ref" => reference = Some(value),
404 _ => {}
405 }
406 }
407
408 let content = Self::parse_blocks_until(reader, b"section")?;
409
410 Ok(Section {
411 id,
412 section_type,
413 reference,
414 content,
415 })
416 }
417
418 fn parse_paragraph<R: BufRead>(
420 reader: &mut Reader<R>,
421 start: BytesStart,
422 ) -> Result<Paragraph> {
423 let mut id = None;
424 let mut paragraph_type = None;
425
426 for attr in start.attributes() {
427 let attr = attr?;
428 let key = attr.key.as_ref();
429 let value = String::from_utf8_lossy(&attr.value).to_string();
430
431 match key {
432 b"id" => id = Some(value),
433 b"type" => paragraph_type = Some(value),
434 _ => {}
435 }
436 }
437
438 let content = Self::parse_inline_content(reader, b"paragraph")?;
439
440 Ok(Paragraph {
441 id,
442 paragraph_type,
443 content,
444 })
445 }
446
447 fn parse_heading<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Heading> {
449 let mut id = None;
450 let mut heading_type = None;
451 let mut size = None;
452
453 for attr in start.attributes() {
454 let attr = attr?;
455 let key = attr.key.as_ref();
456 let value = String::from_utf8_lossy(&attr.value).to_string();
457
458 match key {
459 b"id" => id = Some(value),
460 b"type" => heading_type = Some(value),
461 b"size" => size = Some(value.parse().map_err(|_| {
462 CmlError::InvalidAttribute("size must be a number".to_string())
463 })?),
464 _ => {}
465 }
466 }
467
468 let size = size
469 .ok_or_else(|| CmlError::MissingAttribute("size required on <heading>".to_string()))?;
470
471 let content = Self::parse_inline_content(reader, b"heading")?;
472
473 Ok(Heading {
474 id,
475 heading_type,
476 size,
477 content,
478 })
479 }
480
481 fn parse_aside<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Aside> {
483 let mut id = None;
484 let mut aside_type = None;
485 let mut side = None;
486
487 for attr in start.attributes() {
488 let attr = attr?;
489 let key = attr.key.as_ref();
490 let value = String::from_utf8_lossy(&attr.value).to_string();
491
492 match key {
493 b"id" => id = Some(value),
494 b"type" => aside_type = Some(value),
495 b"side" => {
496 side = Some(match value.as_str() {
497 "left" => Side::Left,
498 "right" => Side::Right,
499 _ => {
500 return Err(CmlError::InvalidAttribute(
501 "side must be 'left' or 'right'".to_string(),
502 ))
503 }
504 })
505 }
506 _ => {}
507 }
508 }
509
510 let side =
511 side.ok_or_else(|| CmlError::MissingAttribute("side required on <aside>".to_string()))?;
512
513 let content = Self::parse_blocks_until(reader, b"aside")?;
514
515 Ok(Aside {
516 id,
517 aside_type,
518 side,
519 content,
520 })
521 }
522
523 fn parse_quote<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Quote> {
525 let mut id = None;
526 let mut reference = None;
527 let mut source = None;
528
529 for attr in start.attributes() {
530 let attr = attr?;
531 let key = attr.key.as_ref();
532 let value = String::from_utf8_lossy(&attr.value).to_string();
533
534 match key {
535 b"id" => id = Some(value),
536 b"ref" => reference = Some(value),
537 b"source" => source = Some(value),
538 _ => {}
539 }
540 }
541
542 let content = Self::parse_blocks_until(reader, b"quote")?;
543
544 Ok(Quote {
545 id,
546 reference,
547 source,
548 content,
549 })
550 }
551
552 fn parse_list<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<List> {
554 let mut id = None;
555 let mut list_type = None;
556 let mut style = None;
557
558 for attr in start.attributes() {
559 let attr = attr?;
560 let key = attr.key.as_ref();
561 let value = String::from_utf8_lossy(&attr.value).to_string();
562
563 match key {
564 b"id" => id = Some(value),
565 b"type" => {
566 list_type = Some(match value.as_str() {
567 "ordered" => ListType::Ordered,
568 "unordered" => ListType::Unordered,
569 _ => {
570 return Err(CmlError::InvalidAttribute(
571 "list type must be 'ordered' or 'unordered'".to_string(),
572 ))
573 }
574 })
575 }
576 b"style" => {
577 style = Some(match value.as_str() {
578 "numeric" => ListStyle::Numeric,
579 "roman" => ListStyle::Roman,
580 "alpha" => ListStyle::Alpha,
581 "symbolic" => ListStyle::Symbolic,
582 _ => {
583 return Err(CmlError::InvalidAttribute(
584 "invalid list style".to_string(),
585 ))
586 }
587 })
588 }
589 _ => {}
590 }
591 }
592
593 let mut items = Vec::new();
594 let mut buf = Vec::new();
595
596 loop {
597 match reader.read_event_into(&mut buf) {
598 Ok(Event::Start(e)) if e.name().as_ref() == b"item" => {
599 items.push(Self::parse_list_item(reader, e)?);
600 }
601 Ok(Event::End(e)) if e.name().as_ref() == b"list" => {
602 break;
603 }
604 Ok(Event::Eof) => {
605 return Err(CmlError::InvalidStructure(
606 "Unexpected EOF in <list>".to_string(),
607 ))
608 }
609 Ok(_) => {}
610 Err(e) => return Err(e.into()),
611 }
612 buf.clear();
613 }
614
615 if items.is_empty() {
616 return Err(CmlError::InvalidStructure(
617 "<list> must contain at least one <item>".to_string(),
618 ));
619 }
620
621 Ok(List {
622 id,
623 list_type,
624 style,
625 items,
626 })
627 }
628
629 fn parse_list_item<R: BufRead>(
631 reader: &mut Reader<R>,
632 start: BytesStart,
633 ) -> Result<ListItem> {
634 let mut id = None;
635
636 for attr in start.attributes() {
637 let attr = attr?;
638 if attr.key.as_ref() == b"id" {
639 id = Some(String::from_utf8_lossy(&attr.value).to_string());
640 }
641 }
642
643 let content = Self::parse_list_item_content(reader)?;
645
646 Ok(ListItem { id, content })
647 }
648
649 fn parse_list_item_content<R: BufRead>(reader: &mut Reader<R>) -> Result<ListItemContent> {
651 let mut blocks = Vec::new();
652 let mut inlines = Vec::new();
653 let mut has_blocks = false;
654 let mut buf = Vec::new();
655
656 loop {
657 match reader.read_event_into(&mut buf) {
658 Ok(Event::Start(e)) => {
659 let name = e.name();
660 if Self::is_block_element(name.as_ref()) {
662 has_blocks = true;
663 if let Some(block) = Self::parse_block_element(reader, e)? {
664 blocks.push(block);
665 }
666 } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
667 inlines.push(inline);
668 }
669 }
670 Ok(Event::Text(e)) => {
671 let text = e.unescape().unwrap().to_string().trim().to_string();
672 if !text.is_empty() {
673 inlines.push(InlineElement::Text(text));
674 }
675 }
676 Ok(Event::End(e)) if e.name().as_ref() == b"item" => {
677 break;
678 }
679 Ok(Event::Eof) => {
680 return Err(CmlError::InvalidStructure(
681 "Unexpected EOF in <item>".to_string(),
682 ))
683 }
684 Ok(_) => {}
685 Err(e) => return Err(e.into()),
686 }
687 buf.clear();
688 }
689
690 if has_blocks {
691 Ok(ListItemContent::Block(blocks))
692 } else {
693 Ok(ListItemContent::Inline(inlines))
694 }
695 }
696
697 fn is_block_element(name: &[u8]) -> bool {
699 matches!(
700 name,
701 b"section"
702 | b"paragraph"
703 | b"heading"
704 | b"aside"
705 | b"quote"
706 | b"list"
707 | b"table"
708 | b"code"
709 | b"break"
710 | b"figure"
711 )
712 }
713
714 fn parse_table<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Table> {
716 let mut id = None;
717 let mut table_type = None;
718
719 for attr in start.attributes() {
720 let attr = attr?;
721 let key = attr.key.as_ref();
722 let value = String::from_utf8_lossy(&attr.value).to_string();
723
724 match key {
725 b"id" => id = Some(value),
726 b"type" => table_type = Some(value),
727 _ => {}
728 }
729 }
730
731 let mut header = None;
732 let mut body = None;
733 let mut footer = None;
734 let mut buf = Vec::new();
735
736 loop {
737 match reader.read_event_into(&mut buf) {
738 Ok(Event::Start(e)) => match e.name().as_ref() {
739 b"header" => {
740 header = Some(Self::parse_table_header(reader)?);
741 }
742 b"body" => {
743 body = Some(Self::parse_table_body(reader)?);
744 }
745 b"footer" => {
746 footer = Some(Self::parse_table_footer(reader)?);
747 }
748 _ => {}
749 },
750 Ok(Event::End(e)) if e.name().as_ref() == b"table" => {
751 break;
752 }
753 Ok(Event::Eof) => {
754 return Err(CmlError::InvalidStructure(
755 "Unexpected EOF in <table>".to_string(),
756 ))
757 }
758 Ok(_) => {}
759 Err(e) => return Err(e.into()),
760 }
761 buf.clear();
762 }
763
764 let body = body
765 .ok_or_else(|| CmlError::InvalidStructure("<body> required in table".to_string()))?;
766
767 Ok(Table {
768 id,
769 table_type,
770 header,
771 body,
772 footer,
773 })
774 }
775
776 fn parse_table_header<R: BufRead>(reader: &mut Reader<R>) -> Result<TableHeader> {
778 let mut rows = Vec::new();
779 let mut buf = Vec::new();
780
781 loop {
782 match reader.read_event_into(&mut buf) {
783 Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
784 rows.push(Self::parse_table_row(reader, true)?);
785 }
786 Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
787 break;
788 }
789 Ok(Event::Eof) => {
790 return Err(CmlError::InvalidStructure(
791 "Unexpected EOF in table <header>".to_string(),
792 ))
793 }
794 Ok(_) => {}
795 Err(e) => return Err(e.into()),
796 }
797 buf.clear();
798 }
799
800 Ok(TableHeader { rows })
801 }
802
803 fn parse_table_body<R: BufRead>(reader: &mut Reader<R>) -> Result<TableBody> {
805 let mut rows = Vec::new();
806 let mut buf = Vec::new();
807
808 loop {
809 match reader.read_event_into(&mut buf) {
810 Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
811 rows.push(Self::parse_table_row(reader, false)?);
812 }
813 Ok(Event::End(e)) if e.name().as_ref() == b"body" => {
814 break;
815 }
816 Ok(Event::Eof) => {
817 return Err(CmlError::InvalidStructure(
818 "Unexpected EOF in table <body>".to_string(),
819 ))
820 }
821 Ok(_) => {}
822 Err(e) => return Err(e.into()),
823 }
824 buf.clear();
825 }
826
827 Ok(TableBody { rows })
828 }
829
830 fn parse_table_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<TableFooter> {
832 let mut caption = None;
833 let mut buf = Vec::new();
834
835 loop {
836 match reader.read_event_into(&mut buf) {
837 Ok(Event::Start(e)) if e.name().as_ref() == b"caption" => {
838 let content = Self::parse_inline_content(reader, b"caption")?;
839 caption = Some(Caption { content });
840 }
841 Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
842 break;
843 }
844 Ok(Event::Eof) => {
845 return Err(CmlError::InvalidStructure(
846 "Unexpected EOF in table <footer>".to_string(),
847 ))
848 }
849 Ok(_) => {}
850 Err(e) => return Err(e.into()),
851 }
852 buf.clear();
853 }
854
855 let caption = caption.ok_or_else(|| {
856 CmlError::InvalidStructure("<caption> required in table footer".to_string())
857 })?;
858
859 Ok(TableFooter { caption })
860 }
861
862 fn parse_table_row<R: BufRead>(reader: &mut Reader<R>, is_header: bool) -> Result<TableRow> {
864 let mut columns = Vec::new();
865 let mut buf = Vec::new();
866
867 loop {
868 match reader.read_event_into(&mut buf) {
869 Ok(Event::Start(e)) if e.name().as_ref() == b"column" => {
870 columns.push(Self::parse_table_column(reader, e, is_header)?);
871 }
872 Ok(Event::End(e)) if e.name().as_ref() == b"row" => {
873 break;
874 }
875 Ok(Event::Eof) => {
876 return Err(CmlError::InvalidStructure(
877 "Unexpected EOF in table <row>".to_string(),
878 ))
879 }
880 Ok(_) => {}
881 Err(e) => return Err(e.into()),
882 }
883 buf.clear();
884 }
885
886 Ok(TableRow { columns })
887 }
888
889 fn parse_table_column<R: BufRead>(
891 reader: &mut Reader<R>,
892 start: BytesStart,
893 is_header: bool,
894 ) -> Result<TableColumn> {
895 let mut sort = None;
896
897 if is_header {
898 for attr in start.attributes() {
899 let attr = attr?;
900 if attr.key.as_ref() == b"sort" {
901 let value = String::from_utf8_lossy(&attr.value).to_string();
902 sort = Some(match value.as_str() {
903 "asc" => SortOrder::Asc,
904 "desc" => SortOrder::Desc,
905 _ => {
906 return Err(CmlError::InvalidAttribute(
907 "sort must be 'asc' or 'desc'".to_string(),
908 ))
909 }
910 });
911 }
912 }
913 }
914
915 let cell = Self::parse_table_cell(reader)?;
917
918 Ok(TableColumn { sort, cell })
919 }
920
921 fn parse_table_cell<R: BufRead>(reader: &mut Reader<R>) -> Result<TableCell> {
923 let mut buf = Vec::new();
924 let mut colspan = None;
925 let mut rowspan = None;
926 let mut content = Vec::new();
927
928 loop {
929 match reader.read_event_into(&mut buf) {
930 Ok(Event::Start(e)) if e.name().as_ref() == b"cell" => {
931 for attr in e.attributes() {
933 let attr = attr?;
934 let key = attr.key.as_ref();
935 let value = String::from_utf8_lossy(&attr.value).to_string();
936
937 match key {
938 b"colspan" => {
939 colspan = Some(value.parse().map_err(|_| {
940 CmlError::InvalidAttribute(
941 "colspan must be a number".to_string(),
942 )
943 })?)
944 }
945 b"rowspan" => {
946 rowspan = Some(value.parse().map_err(|_| {
947 CmlError::InvalidAttribute(
948 "rowspan must be a number".to_string(),
949 )
950 })?)
951 }
952 _ => {}
953 }
954 }
955
956 content = Self::parse_inline_content(reader, b"cell")?;
958 break;
959 }
960 Ok(Event::Eof) => {
961 return Err(CmlError::InvalidStructure(
962 "Expected <cell> in column".to_string(),
963 ))
964 }
965 Ok(_) => {}
966 Err(e) => return Err(e.into()),
967 }
968 buf.clear();
969 }
970
971 Ok(TableCell {
972 colspan,
973 rowspan,
974 content,
975 })
976 }
977
978 fn parse_code<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Code> {
980 let mut id = None;
981 let mut lang = None;
982 let mut copyable = None;
983
984 for attr in start.attributes() {
985 let attr = attr?;
986 let key = attr.key.as_ref();
987 let value = String::from_utf8_lossy(&attr.value).to_string();
988
989 match key {
990 b"id" => id = Some(value),
991 b"lang" => lang = Some(value),
992 b"copyable" => {
993 copyable = Some(match value.as_str() {
994 "true" => true,
995 "false" => false,
996 _ => {
997 return Err(CmlError::InvalidAttribute(
998 "copyable must be 'true' or 'false'".to_string(),
999 ))
1000 }
1001 })
1002 }
1003 _ => {}
1004 }
1005 }
1006
1007 let content = Self::read_text(reader, "code")?;
1008
1009 Ok(Code {
1010 id,
1011 lang,
1012 copyable,
1013 content,
1014 })
1015 }
1016
1017 fn parse_break<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Break> {
1019 let mut break_type = None;
1020
1021 for attr in start.attributes() {
1022 let attr = attr?;
1023 if attr.key.as_ref() == b"type" {
1024 break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1025 }
1026 }
1027
1028 let mut buf = Vec::new();
1030 loop {
1031 match reader.read_event_into(&mut buf) {
1032 Ok(Event::End(e)) if e.name().as_ref() == b"break" => {
1033 break;
1034 }
1035 Ok(Event::Eof) => {
1036 return Err(CmlError::InvalidStructure(
1037 "Unexpected EOF in <break>".to_string(),
1038 ))
1039 }
1040 Ok(_) => {}
1041 Err(e) => return Err(e.into()),
1042 }
1043 buf.clear();
1044 }
1045
1046 Ok(Break { break_type })
1047 }
1048
1049 fn parse_break_empty(start: BytesStart) -> Result<Break> {
1051 let mut break_type = None;
1052
1053 for attr in start.attributes() {
1054 let attr = attr?;
1055 if attr.key.as_ref() == b"type" {
1056 break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1057 }
1058 }
1059
1060 Ok(Break { break_type })
1061 }
1062
1063 fn parse_figure<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Figure> {
1065 let mut id = None;
1066 let mut figure_type = None;
1067 let mut reference = None;
1068
1069 for attr in start.attributes() {
1070 let attr = attr?;
1071 let key = attr.key.as_ref();
1072 let value = String::from_utf8_lossy(&attr.value).to_string();
1073
1074 match key {
1075 b"id" => id = Some(value),
1076 b"type" => figure_type = Some(value),
1077 b"ref" => reference = Some(value),
1078 _ => {}
1079 }
1080 }
1081
1082 let mut buf = Vec::new();
1084 loop {
1085 match reader.read_event_into(&mut buf) {
1086 Ok(Event::End(e)) if e.name().as_ref() == b"figure" => {
1087 break;
1088 }
1089 Ok(Event::Eof) => {
1090 return Err(CmlError::InvalidStructure(
1091 "Unexpected EOF in <figure>".to_string(),
1092 ))
1093 }
1094 Ok(_) => {}
1095 Err(e) => return Err(e.into()),
1096 }
1097 buf.clear();
1098 }
1099
1100 Ok(Figure {
1103 id,
1104 figure_type,
1105 reference,
1106 })
1107 }
1108
1109 fn parse_inline_content<R: BufRead>(
1111 reader: &mut Reader<R>,
1112 end_tag: &[u8],
1113 ) -> Result<Vec<InlineElement>> {
1114 let mut elements = Vec::new();
1115 let mut buf = Vec::new();
1116
1117 loop {
1118 match reader.read_event_into(&mut buf) {
1119 Ok(Event::Start(e)) => {
1120 if let Some(inline) = Self::parse_inline_element(reader, e)? {
1121 elements.push(inline);
1122 }
1123 }
1124 Ok(Event::Empty(e)) => {
1125 if e.name().as_ref() == b"end" {
1127 elements.push(InlineElement::End(Self::parse_end_empty(e)?));
1128 }
1129 }
1130 Ok(Event::Text(e)) => {
1131 let text = e.unescape().unwrap().to_string();
1132 if !text.trim().is_empty() {
1133 elements.push(InlineElement::Text(text));
1134 }
1135 }
1136 Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
1137 break;
1138 }
1139 Ok(Event::Eof) => {
1140 return Err(CmlError::InvalidStructure(format!(
1141 "Unexpected EOF waiting for end tag: {}",
1142 String::from_utf8_lossy(end_tag)
1143 )))
1144 }
1145 Ok(_) => {}
1146 Err(e) => return Err(e.into()),
1147 }
1148 buf.clear();
1149 }
1150
1151 Ok(elements)
1152 }
1153
1154 fn parse_inline_element<R: BufRead>(
1156 reader: &mut Reader<R>,
1157 start: BytesStart,
1158 ) -> Result<Option<InlineElement>> {
1159 let element = match start.name().as_ref() {
1160 b"em" => InlineElement::Em(Self::parse_em(reader, start)?),
1161 b"bo" => InlineElement::Bo(Self::parse_bo(reader)?),
1162 b"un" => InlineElement::Un(Self::parse_un(reader)?),
1163 b"st" => InlineElement::St(Self::parse_st(reader)?),
1164 b"snip" => InlineElement::Snip(Self::parse_snip(reader, start)?),
1165 b"key" => InlineElement::Key(Self::parse_key(reader)?),
1166 b"rf" => InlineElement::Rf(Self::parse_rf(reader, start)?),
1167 b"tg" => InlineElement::Tg(Self::parse_tg(reader, start)?),
1168 b"lk" => InlineElement::Lk(Self::parse_lk(reader, start)?),
1169 b"curr" => InlineElement::Curr(Self::parse_curr(reader, start)?),
1170 b"end" => InlineElement::End(Self::parse_end(reader, start)?),
1171 _ => return Ok(None),
1172 };
1173
1174 Ok(Some(element))
1175 }
1176
1177 fn parse_em<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Em> {
1179 let mut em_type = None;
1180
1181 for attr in start.attributes() {
1182 let attr = attr?;
1183 if attr.key.as_ref() == b"type" {
1184 let value = String::from_utf8_lossy(&attr.value).to_string();
1185 em_type = Some(match value.as_str() {
1186 "stress" => EmphasisType::Stress,
1187 "contrast" => EmphasisType::Contrast,
1188 _ => {
1189 return Err(CmlError::InvalidAttribute(
1190 "em type must be 'stress' or 'contrast'".to_string(),
1191 ))
1192 }
1193 });
1194 }
1195 }
1196
1197 let content = Self::parse_inline_content(reader, b"em")?;
1198
1199 Ok(Em { em_type, content })
1200 }
1201
1202 fn parse_bo<R: BufRead>(reader: &mut Reader<R>) -> Result<Bo> {
1204 let content = Self::parse_inline_content(reader, b"bo")?;
1205 Ok(Bo { content })
1206 }
1207
1208 fn parse_un<R: BufRead>(reader: &mut Reader<R>) -> Result<Un> {
1210 let content = Self::parse_inline_content(reader, b"un")?;
1211 Ok(Un { content })
1212 }
1213
1214 fn parse_st<R: BufRead>(reader: &mut Reader<R>) -> Result<St> {
1216 let content = Self::parse_inline_content(reader, b"st")?;
1217 Ok(St { content })
1218 }
1219
1220 fn parse_snip<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Snip> {
1222 let mut char = None;
1223
1224 for attr in start.attributes() {
1225 let attr = attr?;
1226 if attr.key.as_ref() == b"char" {
1227 char = Some(String::from_utf8_lossy(&attr.value).to_string());
1228 }
1229 }
1230
1231 let content = Self::read_text(reader, "snip")?;
1232
1233 Ok(Snip { char, content })
1234 }
1235
1236 fn parse_key<R: BufRead>(reader: &mut Reader<R>) -> Result<Key> {
1238 let content = Self::read_text(reader, "key")?;
1239 Ok(Key { content })
1240 }
1241
1242 fn parse_rf<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Rf> {
1244 let mut reference = None;
1245 let mut role = None;
1246 let mut title = None;
1247
1248 for attr in start.attributes() {
1249 let attr = attr?;
1250 let key = attr.key.as_ref();
1251 let value = String::from_utf8_lossy(&attr.value).to_string();
1252
1253 match key {
1254 b"ref" => reference = Some(value),
1255 b"role" => role = Some(value),
1256 b"title" => title = Some(value),
1257 _ => {}
1258 }
1259 }
1260
1261 let reference =
1262 reference.ok_or_else(|| CmlError::MissingAttribute("ref required on <rf>".to_string()))?;
1263
1264 let content = Self::read_text(reader, "rf")?;
1265
1266 Ok(Rf {
1267 reference,
1268 role,
1269 title,
1270 content,
1271 })
1272 }
1273
1274 fn parse_tg<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Tg> {
1276 let mut reference = None;
1277 let mut role = None;
1278 let mut title = None;
1279
1280 for attr in start.attributes() {
1281 let attr = attr?;
1282 let key = attr.key.as_ref();
1283 let value = String::from_utf8_lossy(&attr.value).to_string();
1284
1285 match key {
1286 b"ref" => reference = Some(value),
1287 b"role" => role = Some(value),
1288 b"title" => title = Some(value),
1289 _ => {}
1290 }
1291 }
1292
1293 let reference =
1294 reference.ok_or_else(|| CmlError::MissingAttribute("ref required on <tg>".to_string()))?;
1295
1296 let content = Self::read_text(reader, "tg")?;
1297
1298 Ok(Tg {
1299 reference,
1300 role,
1301 title,
1302 content,
1303 })
1304 }
1305
1306 fn parse_lk<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Lk> {
1308 let mut reference = None;
1309 let mut role = None;
1310 let mut title = None;
1311
1312 for attr in start.attributes() {
1313 let attr = attr?;
1314 let key = attr.key.as_ref();
1315 let value = String::from_utf8_lossy(&attr.value).to_string();
1316
1317 match key {
1318 b"ref" => reference = Some(value),
1319 b"role" => role = Some(value),
1320 b"title" => title = Some(value),
1321 _ => {}
1322 }
1323 }
1324
1325 let reference =
1326 reference.ok_or_else(|| CmlError::MissingAttribute("ref required on <lk>".to_string()))?;
1327
1328 let content = Self::read_text(reader, "lk")?;
1329
1330 Ok(Lk {
1331 reference,
1332 role,
1333 title,
1334 content,
1335 })
1336 }
1337
1338 fn parse_curr<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Curr> {
1340 let mut currency_type = None;
1341 let mut format = None;
1342
1343 for attr in start.attributes() {
1344 let attr = attr?;
1345 let key = attr.key.as_ref();
1346 let value = String::from_utf8_lossy(&attr.value).to_string();
1347
1348 match key {
1349 b"type" => currency_type = Some(value),
1350 b"format" => {
1351 format = Some(match value.as_str() {
1352 "symbol" => CurrencyFormat::Symbol,
1353 "code" => CurrencyFormat::Code,
1354 "name" => CurrencyFormat::Name,
1355 _ => {
1356 return Err(CmlError::InvalidAttribute(
1357 "currency format must be 'symbol', 'code', or 'name'".to_string(),
1358 ))
1359 }
1360 })
1361 }
1362 _ => {}
1363 }
1364 }
1365
1366 let currency_type = currency_type
1367 .ok_or_else(|| CmlError::MissingAttribute("type required on <curr>".to_string()))?;
1368
1369 let value = Self::read_text(reader, "curr")?;
1370
1371 Ok(Curr {
1372 currency_type,
1373 format,
1374 value,
1375 })
1376 }
1377
1378 fn parse_end<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<End> {
1380 let mut kind = None;
1381
1382 for attr in start.attributes() {
1383 let attr = attr?;
1384 if attr.key.as_ref() == b"kind" {
1385 let value = String::from_utf8_lossy(&attr.value).to_string();
1386 kind = Some(match value.as_str() {
1387 "line" => EndKind::Line,
1388 "verse" => EndKind::Verse,
1389 "item" => EndKind::Item,
1390 _ => {
1391 return Err(CmlError::InvalidAttribute(
1392 "end kind must be 'line', 'verse', or 'item'".to_string(),
1393 ))
1394 }
1395 });
1396 }
1397 }
1398
1399 let mut buf = Vec::new();
1401 loop {
1402 match reader.read_event_into(&mut buf) {
1403 Ok(Event::End(e)) if e.name().as_ref() == b"end" => {
1404 break;
1405 }
1406 Ok(Event::Eof) => {
1407 return Err(CmlError::InvalidStructure(
1408 "Unexpected EOF in <end>".to_string(),
1409 ))
1410 }
1411 Ok(_) => {}
1412 Err(e) => return Err(e.into()),
1413 }
1414 buf.clear();
1415 }
1416
1417 Ok(End { kind })
1418 }
1419
1420 fn parse_end_empty(start: BytesStart) -> Result<End> {
1422 let mut kind = None;
1423
1424 for attr in start.attributes() {
1425 let attr = attr?;
1426 if attr.key.as_ref() == b"kind" {
1427 let value = String::from_utf8_lossy(&attr.value).to_string();
1428 kind = Some(match value.as_str() {
1429 "line" => EndKind::Line,
1430 "verse" => EndKind::Verse,
1431 "item" => EndKind::Item,
1432 _ => {
1433 return Err(CmlError::InvalidAttribute(
1434 "end kind must be 'line', 'verse', or 'item'".to_string(),
1435 ))
1436 }
1437 });
1438 }
1439 }
1440
1441 Ok(End { kind })
1442 }
1443
1444 fn parse_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<Footer> {
1446 let mut signatures = None;
1447 let mut citations = None;
1448 let mut annotations = None;
1449
1450 let mut buf = Vec::new();
1451
1452 loop {
1453 match reader.read_event_into(&mut buf) {
1454 Ok(Event::Start(e)) => match e.name().as_ref() {
1455 b"signatures" => {
1456 signatures = Some(Self::parse_signatures(reader)?);
1457 }
1458 b"citations" => {
1459 citations = Some(Self::parse_citations(reader)?);
1460 }
1461 b"annotations" => {
1462 annotations = Some(Self::parse_annotations(reader)?);
1463 }
1464 _ => {}
1465 },
1466 Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
1467 break;
1468 }
1469 Ok(Event::Eof) => {
1470 return Err(CmlError::InvalidStructure(
1471 "Unexpected EOF in <footer>".to_string(),
1472 ))
1473 }
1474 Ok(_) => {}
1475 Err(e) => return Err(e.into()),
1476 }
1477 buf.clear();
1478 }
1479
1480 Ok(Footer {
1481 signatures,
1482 citations,
1483 annotations,
1484 })
1485 }
1486
1487 fn parse_signatures<R: BufRead>(reader: &mut Reader<R>) -> Result<Signatures> {
1489 let mut signatures = Vec::new();
1490 let mut buf = Vec::new();
1491
1492 loop {
1493 match reader.read_event_into(&mut buf) {
1494 Ok(Event::Start(e)) if e.name().as_ref() == b"signature" => {
1495 signatures.push(Self::parse_signature(reader, e)?);
1496 }
1497 Ok(Event::End(e)) if e.name().as_ref() == b"signatures" => {
1498 break;
1499 }
1500 Ok(Event::Eof) => {
1501 return Err(CmlError::InvalidStructure(
1502 "Unexpected EOF in <signatures>".to_string(),
1503 ))
1504 }
1505 Ok(_) => {}
1506 Err(e) => return Err(e.into()),
1507 }
1508 buf.clear();
1509 }
1510
1511 Ok(Signatures { signatures })
1512 }
1513
1514 fn parse_signature<R: BufRead>(
1516 reader: &mut Reader<R>,
1517 start: BytesStart,
1518 ) -> Result<Signature> {
1519 let mut when = None;
1520 let mut role = None;
1521 let mut reference = None;
1522
1523 for attr in start.attributes() {
1524 let attr = attr?;
1525 let key = attr.key.as_ref();
1526 let value = String::from_utf8_lossy(&attr.value).to_string();
1527
1528 match key {
1529 b"when" => when = Some(value),
1530 b"role" => role = Some(value),
1531 b"ref" => reference = Some(value),
1532 _ => {}
1533 }
1534 }
1535
1536 let when = when.ok_or_else(|| {
1537 CmlError::MissingAttribute("when required on <signature>".to_string())
1538 })?;
1539
1540 let content = Self::read_text(reader, "signature")?;
1541
1542 Ok(Signature {
1543 when,
1544 role,
1545 reference,
1546 content,
1547 })
1548 }
1549
1550 fn parse_citations<R: BufRead>(reader: &mut Reader<R>) -> Result<Citations> {
1552 let mut citations = Vec::new();
1553 let mut buf = Vec::new();
1554
1555 loop {
1556 match reader.read_event_into(&mut buf) {
1557 Ok(Event::Start(e)) if e.name().as_ref() == b"citation" => {
1558 citations.push(Self::parse_citation(reader, e)?);
1559 }
1560 Ok(Event::End(e)) if e.name().as_ref() == b"citations" => {
1561 break;
1562 }
1563 Ok(Event::Eof) => {
1564 return Err(CmlError::InvalidStructure(
1565 "Unexpected EOF in <citations>".to_string(),
1566 ))
1567 }
1568 Ok(_) => {}
1569 Err(e) => return Err(e.into()),
1570 }
1571 buf.clear();
1572 }
1573
1574 Ok(Citations { citations })
1575 }
1576
1577 fn parse_citation<R: BufRead>(
1579 reader: &mut Reader<R>,
1580 start: BytesStart,
1581 ) -> Result<Citation> {
1582 let mut reference = None;
1583 let mut citation_type = None;
1584
1585 for attr in start.attributes() {
1586 let attr = attr?;
1587 let key = attr.key.as_ref();
1588 let value = String::from_utf8_lossy(&attr.value).to_string();
1589
1590 match key {
1591 b"ref" => reference = Some(value),
1592 b"type" => citation_type = Some(value),
1593 _ => {}
1594 }
1595 }
1596
1597 let reference = reference.ok_or_else(|| {
1598 CmlError::MissingAttribute("ref required on <citation>".to_string())
1599 })?;
1600
1601 let content = Self::parse_inline_content(reader, b"citation")?;
1602
1603 Ok(Citation {
1604 reference,
1605 citation_type,
1606 content,
1607 })
1608 }
1609
1610 fn parse_annotations<R: BufRead>(reader: &mut Reader<R>) -> Result<Annotations> {
1612 let mut notes = Vec::new();
1613 let mut buf = Vec::new();
1614
1615 loop {
1616 match reader.read_event_into(&mut buf) {
1617 Ok(Event::Start(e)) if e.name().as_ref() == b"note" => {
1618 notes.push(Self::parse_note(reader, e)?);
1619 }
1620 Ok(Event::End(e)) if e.name().as_ref() == b"annotations" => {
1621 break;
1622 }
1623 Ok(Event::Eof) => {
1624 return Err(CmlError::InvalidStructure(
1625 "Unexpected EOF in <annotations>".to_string(),
1626 ))
1627 }
1628 Ok(_) => {}
1629 Err(e) => return Err(e.into()),
1630 }
1631 buf.clear();
1632 }
1633
1634 Ok(Annotations { notes })
1635 }
1636
1637 fn parse_note<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Note> {
1639 let mut id = None;
1640 let mut note_type = None;
1641 let mut reference = None;
1642
1643 for attr in start.attributes() {
1644 let attr = attr?;
1645 let key = attr.key.as_ref();
1646 let value = String::from_utf8_lossy(&attr.value).to_string();
1647
1648 match key {
1649 b"id" => id = Some(value),
1650 b"type" => note_type = Some(value),
1651 b"ref" => reference = Some(value),
1652 _ => {}
1653 }
1654 }
1655
1656 let content = Self::parse_note_content(reader)?;
1658
1659 Ok(Note {
1660 id,
1661 note_type,
1662 reference,
1663 content,
1664 })
1665 }
1666
1667 fn parse_note_content<R: BufRead>(reader: &mut Reader<R>) -> Result<NoteContent> {
1669 let mut blocks = Vec::new();
1671 let mut inlines = Vec::new();
1672 let mut has_blocks = false;
1673 let mut buf = Vec::new();
1674
1675 loop {
1676 match reader.read_event_into(&mut buf) {
1677 Ok(Event::Start(e)) => {
1678 let name = e.name();
1679 if Self::is_block_element(name.as_ref()) {
1680 has_blocks = true;
1681 if let Some(block) = Self::parse_block_element(reader, e)? {
1682 blocks.push(block);
1683 }
1684 } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
1685 inlines.push(inline);
1686 }
1687 }
1688 Ok(Event::Text(e)) => {
1689 let text = e.unescape().unwrap().to_string().trim().to_string();
1690 if !text.is_empty() {
1691 inlines.push(InlineElement::Text(text));
1692 }
1693 }
1694 Ok(Event::End(e)) if e.name().as_ref() == b"note" => {
1695 break;
1696 }
1697 Ok(Event::Eof) => {
1698 return Err(CmlError::InvalidStructure(
1699 "Unexpected EOF in <note>".to_string(),
1700 ))
1701 }
1702 Ok(_) => {}
1703 Err(e) => return Err(e.into()),
1704 }
1705 buf.clear();
1706 }
1707
1708 if has_blocks {
1709 Ok(NoteContent::Block(blocks))
1710 } else {
1711 Ok(NoteContent::Inline(inlines))
1712 }
1713 }
1714
1715 fn read_text<R: BufRead>(reader: &mut Reader<R>, tag_name: &str) -> Result<String> {
1717 let mut buf = Vec::new();
1718 let mut text = String::new();
1719
1720 loop {
1721 match reader.read_event_into(&mut buf) {
1722 Ok(Event::Text(e)) => {
1723 text.push_str(&e.unescape().unwrap());
1724 }
1725 Ok(Event::End(e)) if e.name().as_ref() == tag_name.as_bytes() => {
1726 break;
1727 }
1728 Ok(Event::Eof) => {
1729 return Err(CmlError::InvalidStructure(format!(
1730 "Unexpected EOF in <{}>",
1731 tag_name
1732 )))
1733 }
1734 Ok(_) => {}
1735 Err(e) => return Err(e.into()),
1736 }
1737 buf.clear();
1738 }
1739
1740 Ok(text)
1741 }
1742}
1743
1744#[cfg(test)]
1745mod tests {
1746 use super::*;
1747
1748 #[test]
1749 fn test_parse_minimal_document() {
1750 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1751<cml profile="core" version="0.2" encoding="utf-8">
1752 <header>
1753 <title>Test Document</title>
1754 </header>
1755 <body>
1756 <paragraph>Hello, world!</paragraph>
1757 </body>
1758 <footer>
1759 </footer>
1760</cml>"#;
1761
1762 let doc = CmlParser::parse_str(xml).unwrap();
1763
1764 assert_eq!(doc.version, "0.2");
1765 assert_eq!(doc.profile, "core");
1766 assert_eq!(doc.header.title, "Test Document");
1767 assert_eq!(doc.body.blocks.len(), 1);
1768 }
1769
1770 #[test]
1771 fn test_parse_header_with_metadata() {
1772 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1773<cml profile="core" version="0.2" encoding="utf-8">
1774 <header>
1775 <title>Test</title>
1776 <author role="editor">John Doe</author>
1777 <date type="created" when="2025-12-22"/>
1778 <identifier scheme="doi">10.1234/test</identifier>
1779 <meta name="status" value="draft"/>
1780 </header>
1781 <body>
1782 <paragraph>Content</paragraph>
1783 </body>
1784 <footer>
1785 </footer>
1786</cml>"#;
1787
1788 let doc = CmlParser::parse_str(xml).unwrap();
1789
1790 assert_eq!(doc.header.authors.len(), 1);
1791 assert_eq!(doc.header.authors[0].name, "John Doe");
1792 assert_eq!(doc.header.dates.len(), 1);
1793 assert_eq!(doc.header.identifiers.len(), 1);
1794 assert_eq!(doc.header.meta.len(), 1);
1795 }
1796
1797 #[test]
1798 fn test_parse_inline_elements() {
1799 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1800<cml profile="core" version="0.2" encoding="utf-8">
1801 <header>
1802 <title>Test</title>
1803 </header>
1804 <body>
1805 <paragraph>This is <em>emphasized</em> and <bo>bold</bo> text.</paragraph>
1806 </body>
1807 <footer>
1808 </footer>
1809</cml>"#;
1810
1811 let doc = CmlParser::parse_str(xml).unwrap();
1812
1813 if let BlockElement::Paragraph(para) = &doc.body.blocks[0] {
1814 assert_eq!(para.content.len(), 5); } else {
1816 panic!("Expected paragraph");
1817 }
1818 }
1819
1820 #[test]
1821 fn test_parse_list() {
1822 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1823<cml profile="core" version="0.2" encoding="utf-8">
1824 <header>
1825 <title>Test</title>
1826 </header>
1827 <body>
1828 <list type="ordered" style="numeric">
1829 <item>First</item>
1830 <item>Second</item>
1831 </list>
1832 </body>
1833 <footer>
1834 </footer>
1835</cml>"#;
1836
1837 let doc = CmlParser::parse_str(xml).unwrap();
1838
1839 if let BlockElement::List(list) = &doc.body.blocks[0] {
1840 assert_eq!(list.items.len(), 2);
1841 assert!(matches!(list.list_type, Some(ListType::Ordered)));
1842 } else {
1843 panic!("Expected list");
1844 }
1845 }
1846
1847 #[test]
1848 fn test_parse_footer_with_signature() {
1849 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1850<cml profile="core" version="0.2" encoding="utf-8">
1851 <header>
1852 <title>Test</title>
1853 </header>
1854 <body>
1855 <paragraph>Content</paragraph>
1856 </body>
1857 <footer>
1858 <signatures>
1859 <signature when="2025-12-22T10:30:00Z" role="author">Jane Doe</signature>
1860 </signatures>
1861 </footer>
1862</cml>"#;
1863
1864 let doc = CmlParser::parse_str(xml).unwrap();
1865
1866 assert!(doc.footer.signatures.is_some());
1867 let sigs = doc.footer.signatures.unwrap();
1868 assert_eq!(sigs.signatures.len(), 1);
1869 assert_eq!(sigs.signatures[0].content, "Jane Doe");
1870 }
1871}