1use crate::types::*;
6use crate::{CmlError, Result};
7use quick_xml::events::{BytesStart, Event};
8use quick_xml::Reader;
9use std::io::BufRead;
10
11pub struct CmlParser;
13
14impl CmlParser {
15 pub fn parse_str(xml: &str) -> Result<CmlDocument> {
17 let reader = Reader::from_str(xml);
18 Self::parse(reader)
19 }
20
21 pub fn parse<R: BufRead>(mut reader: Reader<R>) -> Result<CmlDocument> {
23 let mut buf = Vec::new();
24
25 loop {
26 match reader.read_event_into(&mut buf) {
27 Ok(Event::Start(e)) if e.name().as_ref() == b"cml" => {
28 return Self::parse_cml(&mut reader, e);
29 }
30 Ok(Event::Eof) => {
31 return Err(CmlError::InvalidStructure(
32 "No <cml> root element found".to_string(),
33 ))
34 }
35 Ok(_) => {}
36 Err(e) => return Err(e.into()),
37 }
38 buf.clear();
39 }
40 }
41
42 fn parse_cml<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<CmlDocument> {
44 let mut version = None;
45 let mut encoding = None;
46 let mut profile = None;
47 let mut id = None;
48
49 for attr in start.attributes() {
51 let attr = attr?;
52 let key = attr.key.as_ref();
53 let value = String::from_utf8_lossy(&attr.value).to_string();
54
55 match key {
56 b"version" => version = Some(value),
57 b"encoding" => encoding = Some(value),
58 b"profile" => profile = Some(value),
59 b"id" => id = Some(value),
60 _ => {}
61 }
62 }
63
64 let version = version
65 .ok_or_else(|| CmlError::MissingAttribute("version required on <cml>".to_string()))?;
66
67 let encoding = encoding
68 .ok_or_else(|| CmlError::MissingAttribute("encoding required on <cml>".to_string()))?;
69
70 let profile = profile
71 .ok_or_else(|| CmlError::MissingAttribute("profile required on <cml>".to_string()))?;
72
73 let mut header = None;
74 let mut body = None;
75 let mut footer = None;
76
77 let mut buf = Vec::new();
78
79 loop {
80 match reader.read_event_into(&mut buf) {
81 Ok(Event::Start(e)) => match e.name().as_ref() {
82 b"header" => {
83 header = Some(Self::parse_header(reader)?);
84 }
85 b"body" => {
86 body = Some(Self::parse_body(reader)?);
87 }
88 b"footer" => {
89 footer = Some(Self::parse_footer(reader)?);
90 }
91 _ => {}
92 },
93 Ok(Event::End(e)) if e.name().as_ref() == b"cml" => {
94 break;
95 }
96 Ok(Event::Eof) => {
97 return Err(CmlError::InvalidStructure(
98 "Unexpected EOF in <cml>".to_string(),
99 ))
100 }
101 Ok(_) => {}
102 Err(e) => return Err(e.into()),
103 }
104 buf.clear();
105 }
106
107 let header =
108 header.ok_or_else(|| CmlError::InvalidStructure("<header> required".to_string()))?;
109
110 let body = body.ok_or_else(|| CmlError::InvalidStructure("<body> required".to_string()))?;
111
112 let footer =
113 footer.ok_or_else(|| CmlError::InvalidStructure("<footer> required".to_string()))?;
114
115 Ok(CmlDocument {
116 version,
117 encoding,
118 profile,
119 id,
120 header,
121 body,
122 footer,
123 })
124 }
125
126 fn parse_header<R: BufRead>(reader: &mut Reader<R>) -> Result<Header> {
128 let mut title = None;
129 let mut authors = Vec::new();
130 let mut dates = Vec::new();
131 let mut identifiers = Vec::new();
132 let mut version = None;
133 let mut description = None;
134 let mut provenance = None;
135 let mut source = None;
136 let mut meta = Vec::new();
137
138 let mut buf = Vec::new();
139
140 loop {
141 match reader.read_event_into(&mut buf) {
142 Ok(Event::Start(e)) => match e.name().as_ref() {
143 b"title" => {
144 title = Some(Self::read_text(reader, "title")?);
145 }
146 b"author" => {
147 authors.push(Self::parse_author(reader, e)?);
148 }
149 b"identifier" => {
150 identifiers.push(Self::parse_identifier(reader, e)?);
151 }
152 b"version" => {
153 version = Some(Self::read_text(reader, "version")?);
154 }
155 b"description" => {
156 description = Some(Self::read_text(reader, "description")?);
157 }
158 b"provenance" => {
159 provenance = Some(Self::read_text(reader, "provenance")?);
160 }
161 b"source" => {
162 source = Some(Self::read_text(reader, "source")?);
163 }
164 _ => {}
165 },
166 Ok(Event::Empty(e)) => match e.name().as_ref() {
167 b"date" => {
168 dates.push(Self::parse_date_entry(e)?);
169 }
170 b"meta" => {
171 meta.push(Self::parse_meta_entry(e)?);
172 }
173 _ => {}
174 },
175 Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
176 break;
177 }
178 Ok(Event::Eof) => {
179 return Err(CmlError::InvalidStructure(
180 "Unexpected EOF in <header>".to_string(),
181 ))
182 }
183 Ok(_) => {}
184 Err(e) => return Err(e.into()),
185 }
186 buf.clear();
187 }
188
189 let title = title
190 .ok_or_else(|| CmlError::InvalidStructure("<title> required in header".to_string()))?;
191
192 Ok(Header {
193 title,
194 authors,
195 dates,
196 identifiers,
197 version,
198 description,
199 provenance,
200 source,
201 meta,
202 })
203 }
204
205 fn parse_author<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Author> {
207 let mut role = None;
208 let mut reference = None;
209
210 for attr in start.attributes() {
212 let attr = attr?;
213 let key = attr.key.as_ref();
214 let value = String::from_utf8_lossy(&attr.value).to_string();
215
216 match key {
217 b"role" => role = Some(value),
218 b"reference" => reference = Some(value),
219 _ => {}
220 }
221 }
222
223 let name = Self::read_text(reader, "author")?;
224
225 Ok(Author {
226 name,
227 role,
228 reference,
229 })
230 }
231
232 fn parse_date_entry(start: BytesStart) -> Result<DateEntry> {
234 let mut date_type = None;
235 let mut when = None;
236
237 for attr in start.attributes() {
238 let attr = attr?;
239 let key = attr.key.as_ref();
240 let value = String::from_utf8_lossy(&attr.value).to_string();
241
242 match key {
243 b"type" => date_type = Some(value),
244 b"when" => when = Some(value),
245 _ => {}
246 }
247 }
248
249 let date_type = date_type
250 .ok_or_else(|| CmlError::MissingAttribute("type required on <date>".to_string()))?;
251
252 let when =
253 when.ok_or_else(|| CmlError::MissingAttribute("when required on <date>".to_string()))?;
254
255 Ok(DateEntry { date_type, when })
256 }
257
258 fn parse_identifier<R: BufRead>(
260 reader: &mut Reader<R>,
261 start: BytesStart,
262 ) -> Result<Identifier> {
263 let mut scheme = None;
264
265 for attr in start.attributes() {
266 let attr = attr?;
267 let key = attr.key.as_ref();
268 let value = String::from_utf8_lossy(&attr.value).to_string();
269
270 if key == b"scheme" {
271 scheme = Some(value);
272 }
273 }
274
275 let scheme = scheme.ok_or_else(|| {
276 CmlError::MissingAttribute("scheme required on <identifier>".to_string())
277 })?;
278
279 let value = Self::read_text(reader, "identifier")?;
280
281 Ok(Identifier { scheme, value })
282 }
283
284 fn parse_meta_entry(start: BytesStart) -> Result<MetaEntry> {
286 let mut name = None;
287 let mut value = None;
288
289 for attr in start.attributes() {
290 let attr = attr?;
291 let key = attr.key.as_ref();
292 let attr_value = String::from_utf8_lossy(&attr.value).to_string();
293
294 match key {
295 b"name" => name = Some(attr_value),
296 b"value" => value = Some(attr_value),
297 _ => {}
298 }
299 }
300
301 let name =
302 name.ok_or_else(|| CmlError::MissingAttribute("name required on <meta>".to_string()))?;
303
304 let value = value
305 .ok_or_else(|| CmlError::MissingAttribute("value required on <meta>".to_string()))?;
306
307 Ok(MetaEntry { name, value })
308 }
309
310 fn parse_body<R: BufRead>(reader: &mut Reader<R>) -> Result<Body> {
312 let blocks = Self::parse_blocks_until(reader, b"body")?;
313
314 if blocks.is_empty() {
315 return Err(CmlError::InvalidStructure(
316 "<body> must contain at least one block element".to_string(),
317 ));
318 }
319
320 Ok(Body { blocks })
321 }
322
323 fn parse_blocks_until<R: BufRead>(
325 reader: &mut Reader<R>,
326 end_tag: &[u8],
327 ) -> Result<Vec<BlockElement>> {
328 let mut blocks = Vec::new();
329 let mut buf = Vec::new();
330
331 loop {
332 match reader.read_event_into(&mut buf) {
333 Ok(Event::Start(e)) => {
334 if let Some(block) = Self::parse_block_element(reader, e)? {
335 blocks.push(block);
336 }
337 }
338 Ok(Event::Empty(e)) => {
339 if e.name().as_ref() == b"break" {
341 blocks.push(BlockElement::Break(Self::parse_break_empty(e)?));
342 }
343 }
344 Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
345 break;
346 }
347 Ok(Event::Eof) => {
348 return Err(CmlError::InvalidStructure(format!(
349 "Unexpected EOF waiting for end tag: {}",
350 String::from_utf8_lossy(end_tag)
351 )))
352 }
353 Ok(_) => {}
354 Err(e) => return Err(e.into()),
355 }
356 buf.clear();
357 }
358
359 Ok(blocks)
360 }
361
362 fn parse_block_element<R: BufRead>(
364 reader: &mut Reader<R>,
365 start: BytesStart,
366 ) -> Result<Option<BlockElement>> {
367 let element = match start.name().as_ref() {
368 b"section" => BlockElement::Section(Self::parse_section(reader, start)?),
369 b"paragraph" => BlockElement::Paragraph(Self::parse_paragraph(reader, start)?),
370 b"heading" => BlockElement::Heading(Self::parse_heading(reader, start)?),
371 b"aside" => BlockElement::Aside(Self::parse_aside(reader, start)?),
372 b"quote" => BlockElement::Quote(Self::parse_quote(reader, start)?),
373 b"list" => BlockElement::List(Self::parse_list(reader, start)?),
374 b"table" => BlockElement::Table(Self::parse_table(reader, start)?),
375 b"code" => BlockElement::Code(Self::parse_code(reader, start)?),
376 b"break" => BlockElement::Break(Self::parse_break(reader, start)?),
377 b"figure" => BlockElement::Figure(Self::parse_figure(reader, start)?),
378 _ => return Ok(None),
379 };
380
381 Ok(Some(element))
382 }
383
384 fn parse_section<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Section> {
386 let mut id = None;
387 let mut section_type = None;
388 let mut reference = None;
389
390 for attr in start.attributes() {
391 let attr = attr?;
392 let key = attr.key.as_ref();
393 let value = String::from_utf8_lossy(&attr.value).to_string();
394
395 match key {
396 b"id" => id = Some(value),
397 b"type" => section_type = Some(value),
398 b"ref" => reference = Some(value),
399 _ => {}
400 }
401 }
402
403 let content = Self::parse_blocks_until(reader, b"section")?;
404
405 Ok(Section {
406 id,
407 section_type,
408 reference,
409 content,
410 })
411 }
412
413 fn parse_paragraph<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Paragraph> {
415 let mut id = None;
416 let mut paragraph_type = None;
417
418 for attr in start.attributes() {
419 let attr = attr?;
420 let key = attr.key.as_ref();
421 let value = String::from_utf8_lossy(&attr.value).to_string();
422
423 match key {
424 b"id" => id = Some(value),
425 b"type" => paragraph_type = Some(value),
426 _ => {}
427 }
428 }
429
430 let content = Self::parse_inline_content(reader, b"paragraph")?;
431
432 Ok(Paragraph {
433 id,
434 paragraph_type,
435 content,
436 })
437 }
438
439 fn parse_heading<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Heading> {
441 let mut id = None;
442 let mut heading_type = None;
443 let mut size = None;
444
445 for attr in start.attributes() {
446 let attr = attr?;
447 let key = attr.key.as_ref();
448 let value = String::from_utf8_lossy(&attr.value).to_string();
449
450 match key {
451 b"id" => id = Some(value),
452 b"type" => heading_type = Some(value),
453 b"size" => {
454 size = Some(value.parse().map_err(|_| {
455 CmlError::InvalidAttribute("size must be a number".to_string())
456 })?)
457 }
458 _ => {}
459 }
460 }
461
462 let size = size
463 .ok_or_else(|| CmlError::MissingAttribute("size required on <heading>".to_string()))?;
464
465 let content = Self::parse_inline_content(reader, b"heading")?;
466
467 Ok(Heading {
468 id,
469 heading_type,
470 size,
471 content,
472 })
473 }
474
475 fn parse_aside<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Aside> {
477 let mut id = None;
478 let mut aside_type = None;
479 let mut side = None;
480
481 for attr in start.attributes() {
482 let attr = attr?;
483 let key = attr.key.as_ref();
484 let value = String::from_utf8_lossy(&attr.value).to_string();
485
486 match key {
487 b"id" => id = Some(value),
488 b"type" => aside_type = Some(value),
489 b"side" => {
490 side = Some(match value.as_str() {
491 "left" => Side::Left,
492 "right" => Side::Right,
493 _ => {
494 return Err(CmlError::InvalidAttribute(
495 "side must be 'left' or 'right'".to_string(),
496 ))
497 }
498 })
499 }
500 _ => {}
501 }
502 }
503
504 let side =
505 side.ok_or_else(|| CmlError::MissingAttribute("side required on <aside>".to_string()))?;
506
507 let content = Self::parse_blocks_until(reader, b"aside")?;
508
509 Ok(Aside {
510 id,
511 aside_type,
512 side,
513 content,
514 })
515 }
516
517 fn parse_quote<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Quote> {
519 let mut id = None;
520 let mut reference = None;
521 let mut source = None;
522
523 for attr in start.attributes() {
524 let attr = attr?;
525 let key = attr.key.as_ref();
526 let value = String::from_utf8_lossy(&attr.value).to_string();
527
528 match key {
529 b"id" => id = Some(value),
530 b"ref" => reference = Some(value),
531 b"source" => source = Some(value),
532 _ => {}
533 }
534 }
535
536 let content = Self::parse_blocks_until(reader, b"quote")?;
537
538 Ok(Quote {
539 id,
540 reference,
541 source,
542 content,
543 })
544 }
545
546 fn parse_list<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<List> {
548 let mut id = None;
549 let mut list_type = None;
550 let mut style = None;
551
552 for attr in start.attributes() {
553 let attr = attr?;
554 let key = attr.key.as_ref();
555 let value = String::from_utf8_lossy(&attr.value).to_string();
556
557 match key {
558 b"id" => id = Some(value),
559 b"type" => {
560 list_type = Some(match value.as_str() {
561 "ordered" => ListType::Ordered,
562 "unordered" => ListType::Unordered,
563 _ => {
564 return Err(CmlError::InvalidAttribute(
565 "list type must be 'ordered' or 'unordered'".to_string(),
566 ))
567 }
568 })
569 }
570 b"style" => {
571 style = Some(match value.as_str() {
572 "numeric" => ListStyle::Numeric,
573 "roman" => ListStyle::Roman,
574 "alpha" => ListStyle::Alpha,
575 "symbolic" => ListStyle::Symbolic,
576 _ => {
577 return Err(CmlError::InvalidAttribute(
578 "invalid list style".to_string(),
579 ))
580 }
581 })
582 }
583 _ => {}
584 }
585 }
586
587 let mut items = Vec::new();
588 let mut buf = Vec::new();
589
590 loop {
591 match reader.read_event_into(&mut buf) {
592 Ok(Event::Start(e)) if e.name().as_ref() == b"item" => {
593 items.push(Self::parse_list_item(reader, e)?);
594 }
595 Ok(Event::End(e)) if e.name().as_ref() == b"list" => {
596 break;
597 }
598 Ok(Event::Eof) => {
599 return Err(CmlError::InvalidStructure(
600 "Unexpected EOF in <list>".to_string(),
601 ))
602 }
603 Ok(_) => {}
604 Err(e) => return Err(e.into()),
605 }
606 buf.clear();
607 }
608
609 if items.is_empty() {
610 return Err(CmlError::InvalidStructure(
611 "<list> must contain at least one <item>".to_string(),
612 ));
613 }
614
615 Ok(List {
616 id,
617 list_type,
618 style,
619 items,
620 })
621 }
622
623 fn parse_list_item<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<ListItem> {
625 let mut id = None;
626
627 for attr in start.attributes() {
628 let attr = attr?;
629 if attr.key.as_ref() == b"id" {
630 id = Some(String::from_utf8_lossy(&attr.value).to_string());
631 }
632 }
633
634 let content = Self::parse_list_item_content(reader)?;
636
637 Ok(ListItem { id, content })
638 }
639
640 fn parse_list_item_content<R: BufRead>(reader: &mut Reader<R>) -> Result<ListItemContent> {
642 let mut blocks = Vec::new();
643 let mut inlines = Vec::new();
644 let mut has_blocks = false;
645 let mut buf = Vec::new();
646
647 loop {
648 match reader.read_event_into(&mut buf) {
649 Ok(Event::Start(e)) => {
650 let name = e.name();
651 if Self::is_block_element(name.as_ref()) {
653 has_blocks = true;
654 if let Some(block) = Self::parse_block_element(reader, e)? {
655 blocks.push(block);
656 }
657 } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
658 inlines.push(inline);
659 }
660 }
661 Ok(Event::Text(e)) => {
662 let text = e.unescape().unwrap().to_string().trim().to_string();
663 if !text.is_empty() {
664 inlines.push(InlineElement::Text(text));
665 }
666 }
667 Ok(Event::End(e)) if e.name().as_ref() == b"item" => {
668 break;
669 }
670 Ok(Event::Eof) => {
671 return Err(CmlError::InvalidStructure(
672 "Unexpected EOF in <item>".to_string(),
673 ))
674 }
675 Ok(_) => {}
676 Err(e) => return Err(e.into()),
677 }
678 buf.clear();
679 }
680
681 if has_blocks {
682 Ok(ListItemContent::Block(blocks))
683 } else {
684 Ok(ListItemContent::Inline(inlines))
685 }
686 }
687
688 fn is_block_element(name: &[u8]) -> bool {
690 matches!(
691 name,
692 b"section"
693 | b"paragraph"
694 | b"heading"
695 | b"aside"
696 | b"quote"
697 | b"list"
698 | b"table"
699 | b"code"
700 | b"break"
701 | b"figure"
702 )
703 }
704
705 fn parse_table<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Table> {
707 let mut id = None;
708 let mut table_type = None;
709
710 for attr in start.attributes() {
711 let attr = attr?;
712 let key = attr.key.as_ref();
713 let value = String::from_utf8_lossy(&attr.value).to_string();
714
715 match key {
716 b"id" => id = Some(value),
717 b"type" => table_type = Some(value),
718 _ => {}
719 }
720 }
721
722 let mut header = None;
723 let mut body = None;
724 let mut footer = None;
725 let mut buf = Vec::new();
726
727 loop {
728 match reader.read_event_into(&mut buf) {
729 Ok(Event::Start(e)) => match e.name().as_ref() {
730 b"header" => {
731 header = Some(Self::parse_table_header(reader)?);
732 }
733 b"body" => {
734 body = Some(Self::parse_table_body(reader)?);
735 }
736 b"footer" => {
737 footer = Some(Self::parse_table_footer(reader)?);
738 }
739 _ => {}
740 },
741 Ok(Event::End(e)) if e.name().as_ref() == b"table" => {
742 break;
743 }
744 Ok(Event::Eof) => {
745 return Err(CmlError::InvalidStructure(
746 "Unexpected EOF in <table>".to_string(),
747 ))
748 }
749 Ok(_) => {}
750 Err(e) => return Err(e.into()),
751 }
752 buf.clear();
753 }
754
755 let body =
756 body.ok_or_else(|| CmlError::InvalidStructure("<body> required in table".to_string()))?;
757
758 Ok(Table {
759 id,
760 table_type,
761 header,
762 body,
763 footer,
764 })
765 }
766
767 fn parse_table_header<R: BufRead>(reader: &mut Reader<R>) -> Result<TableHeader> {
769 let mut rows = Vec::new();
770 let mut buf = Vec::new();
771
772 loop {
773 match reader.read_event_into(&mut buf) {
774 Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
775 rows.push(Self::parse_table_row(reader, true)?);
776 }
777 Ok(Event::End(e)) if e.name().as_ref() == b"header" => {
778 break;
779 }
780 Ok(Event::Eof) => {
781 return Err(CmlError::InvalidStructure(
782 "Unexpected EOF in table <header>".to_string(),
783 ))
784 }
785 Ok(_) => {}
786 Err(e) => return Err(e.into()),
787 }
788 buf.clear();
789 }
790
791 Ok(TableHeader { rows })
792 }
793
794 fn parse_table_body<R: BufRead>(reader: &mut Reader<R>) -> Result<TableBody> {
796 let mut rows = Vec::new();
797 let mut buf = Vec::new();
798
799 loop {
800 match reader.read_event_into(&mut buf) {
801 Ok(Event::Start(e)) if e.name().as_ref() == b"row" => {
802 rows.push(Self::parse_table_row(reader, false)?);
803 }
804 Ok(Event::End(e)) if e.name().as_ref() == b"body" => {
805 break;
806 }
807 Ok(Event::Eof) => {
808 return Err(CmlError::InvalidStructure(
809 "Unexpected EOF in table <body>".to_string(),
810 ))
811 }
812 Ok(_) => {}
813 Err(e) => return Err(e.into()),
814 }
815 buf.clear();
816 }
817
818 Ok(TableBody { rows })
819 }
820
821 fn parse_table_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<TableFooter> {
823 let mut caption = None;
824 let mut buf = Vec::new();
825
826 loop {
827 match reader.read_event_into(&mut buf) {
828 Ok(Event::Start(e)) if e.name().as_ref() == b"caption" => {
829 let content = Self::parse_inline_content(reader, b"caption")?;
830 caption = Some(Caption { content });
831 }
832 Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
833 break;
834 }
835 Ok(Event::Eof) => {
836 return Err(CmlError::InvalidStructure(
837 "Unexpected EOF in table <footer>".to_string(),
838 ))
839 }
840 Ok(_) => {}
841 Err(e) => return Err(e.into()),
842 }
843 buf.clear();
844 }
845
846 let caption = caption.ok_or_else(|| {
847 CmlError::InvalidStructure("<caption> required in table footer".to_string())
848 })?;
849
850 Ok(TableFooter { caption })
851 }
852
853 fn parse_table_row<R: BufRead>(reader: &mut Reader<R>, is_header: bool) -> Result<TableRow> {
855 let mut columns = Vec::new();
856 let mut buf = Vec::new();
857
858 loop {
859 match reader.read_event_into(&mut buf) {
860 Ok(Event::Start(e)) if e.name().as_ref() == b"column" => {
861 columns.push(Self::parse_table_column(reader, e, is_header)?);
862 }
863 Ok(Event::End(e)) if e.name().as_ref() == b"row" => {
864 break;
865 }
866 Ok(Event::Eof) => {
867 return Err(CmlError::InvalidStructure(
868 "Unexpected EOF in table <row>".to_string(),
869 ))
870 }
871 Ok(_) => {}
872 Err(e) => return Err(e.into()),
873 }
874 buf.clear();
875 }
876
877 Ok(TableRow { columns })
878 }
879
880 fn parse_table_column<R: BufRead>(
882 reader: &mut Reader<R>,
883 start: BytesStart,
884 is_header: bool,
885 ) -> Result<TableColumn> {
886 let mut sort = None;
887
888 if is_header {
889 for attr in start.attributes() {
890 let attr = attr?;
891 if attr.key.as_ref() == b"sort" {
892 let value = String::from_utf8_lossy(&attr.value).to_string();
893 sort = Some(match value.as_str() {
894 "asc" => SortOrder::Asc,
895 "desc" => SortOrder::Desc,
896 _ => {
897 return Err(CmlError::InvalidAttribute(
898 "sort must be 'asc' or 'desc'".to_string(),
899 ))
900 }
901 });
902 }
903 }
904 }
905
906 let cell = Self::parse_table_cell(reader)?;
908
909 Ok(TableColumn { sort, cell })
910 }
911
912 fn parse_table_cell<R: BufRead>(reader: &mut Reader<R>) -> Result<TableCell> {
914 let mut buf = Vec::new();
915 let mut colspan = None;
916 let mut rowspan = None;
917 let mut content = Vec::new();
918
919 loop {
920 match reader.read_event_into(&mut buf) {
921 Ok(Event::Start(e)) if e.name().as_ref() == b"cell" => {
922 for attr in e.attributes() {
924 let attr = attr?;
925 let key = attr.key.as_ref();
926 let value = String::from_utf8_lossy(&attr.value).to_string();
927
928 match key {
929 b"colspan" => {
930 colspan = Some(value.parse().map_err(|_| {
931 CmlError::InvalidAttribute(
932 "colspan must be a number".to_string(),
933 )
934 })?)
935 }
936 b"rowspan" => {
937 rowspan = Some(value.parse().map_err(|_| {
938 CmlError::InvalidAttribute(
939 "rowspan must be a number".to_string(),
940 )
941 })?)
942 }
943 _ => {}
944 }
945 }
946
947 content = Self::parse_inline_content(reader, b"cell")?;
949 break;
950 }
951 Ok(Event::Eof) => {
952 return Err(CmlError::InvalidStructure(
953 "Expected <cell> in column".to_string(),
954 ))
955 }
956 Ok(_) => {}
957 Err(e) => return Err(e.into()),
958 }
959 buf.clear();
960 }
961
962 Ok(TableCell {
963 colspan,
964 rowspan,
965 content,
966 })
967 }
968
969 fn parse_code<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Code> {
971 let mut id = None;
972 let mut lang = None;
973 let mut copyable = None;
974
975 for attr in start.attributes() {
976 let attr = attr?;
977 let key = attr.key.as_ref();
978 let value = String::from_utf8_lossy(&attr.value).to_string();
979
980 match key {
981 b"id" => id = Some(value),
982 b"lang" => lang = Some(value),
983 b"copyable" => {
984 copyable = Some(match value.as_str() {
985 "true" => true,
986 "false" => false,
987 _ => {
988 return Err(CmlError::InvalidAttribute(
989 "copyable must be 'true' or 'false'".to_string(),
990 ))
991 }
992 })
993 }
994 _ => {}
995 }
996 }
997
998 let content = Self::read_text(reader, "code")?;
999
1000 Ok(Code {
1001 id,
1002 lang,
1003 copyable,
1004 content,
1005 })
1006 }
1007
1008 fn parse_break<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Break> {
1010 let mut break_type = None;
1011
1012 for attr in start.attributes() {
1013 let attr = attr?;
1014 if attr.key.as_ref() == b"type" {
1015 break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1016 }
1017 }
1018
1019 let mut buf = Vec::new();
1021 loop {
1022 match reader.read_event_into(&mut buf) {
1023 Ok(Event::End(e)) if e.name().as_ref() == b"break" => {
1024 break;
1025 }
1026 Ok(Event::Eof) => {
1027 return Err(CmlError::InvalidStructure(
1028 "Unexpected EOF in <break>".to_string(),
1029 ))
1030 }
1031 Ok(_) => {}
1032 Err(e) => return Err(e.into()),
1033 }
1034 buf.clear();
1035 }
1036
1037 Ok(Break { break_type })
1038 }
1039
1040 fn parse_break_empty(start: BytesStart) -> Result<Break> {
1042 let mut break_type = None;
1043
1044 for attr in start.attributes() {
1045 let attr = attr?;
1046 if attr.key.as_ref() == b"type" {
1047 break_type = Some(String::from_utf8_lossy(&attr.value).to_string());
1048 }
1049 }
1050
1051 Ok(Break { break_type })
1052 }
1053
1054 fn parse_figure<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Figure> {
1056 let mut id = None;
1057 let mut figure_type = None;
1058 let mut reference = None;
1059
1060 for attr in start.attributes() {
1061 let attr = attr?;
1062 let key = attr.key.as_ref();
1063 let value = String::from_utf8_lossy(&attr.value).to_string();
1064
1065 match key {
1066 b"id" => id = Some(value),
1067 b"type" => figure_type = Some(value),
1068 b"ref" => reference = Some(value),
1069 _ => {}
1070 }
1071 }
1072
1073 let mut buf = Vec::new();
1075 loop {
1076 match reader.read_event_into(&mut buf) {
1077 Ok(Event::End(e)) if e.name().as_ref() == b"figure" => {
1078 break;
1079 }
1080 Ok(Event::Eof) => {
1081 return Err(CmlError::InvalidStructure(
1082 "Unexpected EOF in <figure>".to_string(),
1083 ))
1084 }
1085 Ok(_) => {}
1086 Err(e) => return Err(e.into()),
1087 }
1088 buf.clear();
1089 }
1090
1091 Ok(Figure {
1094 id,
1095 figure_type,
1096 reference,
1097 })
1098 }
1099
1100 fn parse_inline_content<R: BufRead>(
1102 reader: &mut Reader<R>,
1103 end_tag: &[u8],
1104 ) -> Result<Vec<InlineElement>> {
1105 let mut elements = Vec::new();
1106 let mut buf = Vec::new();
1107
1108 loop {
1109 match reader.read_event_into(&mut buf) {
1110 Ok(Event::Start(e)) => {
1111 if let Some(inline) = Self::parse_inline_element(reader, e)? {
1112 elements.push(inline);
1113 }
1114 }
1115 Ok(Event::Empty(e)) => {
1116 if e.name().as_ref() == b"end" {
1118 elements.push(InlineElement::End(Self::parse_end_empty(e)?));
1119 }
1120 }
1121 Ok(Event::Text(e)) => {
1122 let text = e.unescape().unwrap().to_string();
1123 if !text.trim().is_empty() {
1124 elements.push(InlineElement::Text(text));
1125 }
1126 }
1127 Ok(Event::End(e)) if e.name().as_ref() == end_tag => {
1128 break;
1129 }
1130 Ok(Event::Eof) => {
1131 return Err(CmlError::InvalidStructure(format!(
1132 "Unexpected EOF waiting for end tag: {}",
1133 String::from_utf8_lossy(end_tag)
1134 )))
1135 }
1136 Ok(_) => {}
1137 Err(e) => return Err(e.into()),
1138 }
1139 buf.clear();
1140 }
1141
1142 Ok(elements)
1143 }
1144
1145 fn parse_inline_element<R: BufRead>(
1147 reader: &mut Reader<R>,
1148 start: BytesStart,
1149 ) -> Result<Option<InlineElement>> {
1150 let element = match start.name().as_ref() {
1151 b"em" => InlineElement::Em(Self::parse_em(reader, start)?),
1152 b"bo" => InlineElement::Bo(Self::parse_bo(reader)?),
1153 b"un" => InlineElement::Un(Self::parse_un(reader)?),
1154 b"st" => InlineElement::St(Self::parse_st(reader)?),
1155 b"snip" => InlineElement::Snip(Self::parse_snip(reader, start)?),
1156 b"key" => InlineElement::Key(Self::parse_key(reader)?),
1157 b"rf" => InlineElement::Rf(Self::parse_rf(reader, start)?),
1158 b"tg" => InlineElement::Tg(Self::parse_tg(reader, start)?),
1159 b"lk" => InlineElement::Lk(Self::parse_lk(reader, start)?),
1160 b"curr" => InlineElement::Curr(Self::parse_curr(reader, start)?),
1161 b"end" => InlineElement::End(Self::parse_end(reader, start)?),
1162 _ => return Ok(None),
1163 };
1164
1165 Ok(Some(element))
1166 }
1167
1168 fn parse_em<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Em> {
1170 let mut em_type = None;
1171
1172 for attr in start.attributes() {
1173 let attr = attr?;
1174 if attr.key.as_ref() == b"type" {
1175 let value = String::from_utf8_lossy(&attr.value).to_string();
1176 em_type = Some(match value.as_str() {
1177 "stress" => EmphasisType::Stress,
1178 "contrast" => EmphasisType::Contrast,
1179 _ => {
1180 return Err(CmlError::InvalidAttribute(
1181 "em type must be 'stress' or 'contrast'".to_string(),
1182 ))
1183 }
1184 });
1185 }
1186 }
1187
1188 let content = Self::parse_inline_content(reader, b"em")?;
1189
1190 Ok(Em { em_type, content })
1191 }
1192
1193 fn parse_bo<R: BufRead>(reader: &mut Reader<R>) -> Result<Bo> {
1195 let content = Self::parse_inline_content(reader, b"bo")?;
1196 Ok(Bo { content })
1197 }
1198
1199 fn parse_un<R: BufRead>(reader: &mut Reader<R>) -> Result<Un> {
1201 let content = Self::parse_inline_content(reader, b"un")?;
1202 Ok(Un { content })
1203 }
1204
1205 fn parse_st<R: BufRead>(reader: &mut Reader<R>) -> Result<St> {
1207 let content = Self::parse_inline_content(reader, b"st")?;
1208 Ok(St { content })
1209 }
1210
1211 fn parse_snip<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Snip> {
1213 let mut char = None;
1214
1215 for attr in start.attributes() {
1216 let attr = attr?;
1217 if attr.key.as_ref() == b"char" {
1218 char = Some(String::from_utf8_lossy(&attr.value).to_string());
1219 }
1220 }
1221
1222 let content = Self::read_text(reader, "snip")?;
1223
1224 Ok(Snip { char, content })
1225 }
1226
1227 fn parse_key<R: BufRead>(reader: &mut Reader<R>) -> Result<Key> {
1229 let content = Self::read_text(reader, "key")?;
1230 Ok(Key { content })
1231 }
1232
1233 fn parse_rf<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Rf> {
1235 let mut reference = None;
1236 let mut role = None;
1237 let mut title = None;
1238
1239 for attr in start.attributes() {
1240 let attr = attr?;
1241 let key = attr.key.as_ref();
1242 let value = String::from_utf8_lossy(&attr.value).to_string();
1243
1244 match key {
1245 b"ref" => reference = Some(value),
1246 b"role" => role = Some(value),
1247 b"title" => title = Some(value),
1248 _ => {}
1249 }
1250 }
1251
1252 let reference = reference
1253 .ok_or_else(|| CmlError::MissingAttribute("ref required on <rf>".to_string()))?;
1254
1255 let content = Self::read_text(reader, "rf")?;
1256
1257 Ok(Rf {
1258 reference,
1259 role,
1260 title,
1261 content,
1262 })
1263 }
1264
1265 fn parse_tg<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Tg> {
1267 let mut reference = None;
1268 let mut role = None;
1269 let mut title = None;
1270
1271 for attr in start.attributes() {
1272 let attr = attr?;
1273 let key = attr.key.as_ref();
1274 let value = String::from_utf8_lossy(&attr.value).to_string();
1275
1276 match key {
1277 b"ref" => reference = Some(value),
1278 b"role" => role = Some(value),
1279 b"title" => title = Some(value),
1280 _ => {}
1281 }
1282 }
1283
1284 let reference = reference
1285 .ok_or_else(|| CmlError::MissingAttribute("ref required on <tg>".to_string()))?;
1286
1287 let content = Self::read_text(reader, "tg")?;
1288
1289 Ok(Tg {
1290 reference,
1291 role,
1292 title,
1293 content,
1294 })
1295 }
1296
1297 fn parse_lk<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Lk> {
1299 let mut reference = None;
1300 let mut role = None;
1301 let mut title = None;
1302
1303 for attr in start.attributes() {
1304 let attr = attr?;
1305 let key = attr.key.as_ref();
1306 let value = String::from_utf8_lossy(&attr.value).to_string();
1307
1308 match key {
1309 b"ref" => reference = Some(value),
1310 b"role" => role = Some(value),
1311 b"title" => title = Some(value),
1312 _ => {}
1313 }
1314 }
1315
1316 let reference = reference
1317 .ok_or_else(|| CmlError::MissingAttribute("ref required on <lk>".to_string()))?;
1318
1319 let content = Self::read_text(reader, "lk")?;
1320
1321 Ok(Lk {
1322 reference,
1323 role,
1324 title,
1325 content,
1326 })
1327 }
1328
1329 fn parse_curr<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Curr> {
1331 let mut currency_type = None;
1332 let mut format = None;
1333
1334 for attr in start.attributes() {
1335 let attr = attr?;
1336 let key = attr.key.as_ref();
1337 let value = String::from_utf8_lossy(&attr.value).to_string();
1338
1339 match key {
1340 b"type" => currency_type = Some(value),
1341 b"format" => {
1342 format = Some(match value.as_str() {
1343 "symbol" => CurrencyFormat::Symbol,
1344 "code" => CurrencyFormat::Code,
1345 "name" => CurrencyFormat::Name,
1346 _ => {
1347 return Err(CmlError::InvalidAttribute(
1348 "currency format must be 'symbol', 'code', or 'name'".to_string(),
1349 ))
1350 }
1351 })
1352 }
1353 _ => {}
1354 }
1355 }
1356
1357 let currency_type = currency_type
1358 .ok_or_else(|| CmlError::MissingAttribute("type required on <curr>".to_string()))?;
1359
1360 let value = Self::read_text(reader, "curr")?;
1361
1362 Ok(Curr {
1363 currency_type,
1364 format,
1365 value,
1366 })
1367 }
1368
1369 fn parse_end<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<End> {
1371 let mut kind = None;
1372
1373 for attr in start.attributes() {
1374 let attr = attr?;
1375 if attr.key.as_ref() == b"kind" {
1376 let value = String::from_utf8_lossy(&attr.value).to_string();
1377 kind = Some(match value.as_str() {
1378 "line" => EndKind::Line,
1379 "verse" => EndKind::Verse,
1380 "item" => EndKind::Item,
1381 _ => {
1382 return Err(CmlError::InvalidAttribute(
1383 "end kind must be 'line', 'verse', or 'item'".to_string(),
1384 ))
1385 }
1386 });
1387 }
1388 }
1389
1390 let mut buf = Vec::new();
1392 loop {
1393 match reader.read_event_into(&mut buf) {
1394 Ok(Event::End(e)) if e.name().as_ref() == b"end" => {
1395 break;
1396 }
1397 Ok(Event::Eof) => {
1398 return Err(CmlError::InvalidStructure(
1399 "Unexpected EOF in <end>".to_string(),
1400 ))
1401 }
1402 Ok(_) => {}
1403 Err(e) => return Err(e.into()),
1404 }
1405 buf.clear();
1406 }
1407
1408 Ok(End { kind })
1409 }
1410
1411 fn parse_end_empty(start: BytesStart) -> Result<End> {
1413 let mut kind = None;
1414
1415 for attr in start.attributes() {
1416 let attr = attr?;
1417 if attr.key.as_ref() == b"kind" {
1418 let value = String::from_utf8_lossy(&attr.value).to_string();
1419 kind = Some(match value.as_str() {
1420 "line" => EndKind::Line,
1421 "verse" => EndKind::Verse,
1422 "item" => EndKind::Item,
1423 _ => {
1424 return Err(CmlError::InvalidAttribute(
1425 "end kind must be 'line', 'verse', or 'item'".to_string(),
1426 ))
1427 }
1428 });
1429 }
1430 }
1431
1432 Ok(End { kind })
1433 }
1434
1435 fn parse_footer<R: BufRead>(reader: &mut Reader<R>) -> Result<Footer> {
1437 let mut signatures = None;
1438 let mut citations = None;
1439 let mut annotations = None;
1440
1441 let mut buf = Vec::new();
1442
1443 loop {
1444 match reader.read_event_into(&mut buf) {
1445 Ok(Event::Start(e)) => match e.name().as_ref() {
1446 b"signatures" => {
1447 signatures = Some(Self::parse_signatures(reader)?);
1448 }
1449 b"citations" => {
1450 citations = Some(Self::parse_citations(reader)?);
1451 }
1452 b"annotations" => {
1453 annotations = Some(Self::parse_annotations(reader)?);
1454 }
1455 _ => {}
1456 },
1457 Ok(Event::End(e)) if e.name().as_ref() == b"footer" => {
1458 break;
1459 }
1460 Ok(Event::Eof) => {
1461 return Err(CmlError::InvalidStructure(
1462 "Unexpected EOF in <footer>".to_string(),
1463 ))
1464 }
1465 Ok(_) => {}
1466 Err(e) => return Err(e.into()),
1467 }
1468 buf.clear();
1469 }
1470
1471 Ok(Footer {
1472 signatures,
1473 citations,
1474 annotations,
1475 })
1476 }
1477
1478 fn parse_signatures<R: BufRead>(reader: &mut Reader<R>) -> Result<Signatures> {
1480 let mut signatures = Vec::new();
1481 let mut buf = Vec::new();
1482
1483 loop {
1484 match reader.read_event_into(&mut buf) {
1485 Ok(Event::Start(e)) if e.name().as_ref() == b"signature" => {
1486 signatures.push(Self::parse_signature(reader, e)?);
1487 }
1488 Ok(Event::End(e)) if e.name().as_ref() == b"signatures" => {
1489 break;
1490 }
1491 Ok(Event::Eof) => {
1492 return Err(CmlError::InvalidStructure(
1493 "Unexpected EOF in <signatures>".to_string(),
1494 ))
1495 }
1496 Ok(_) => {}
1497 Err(e) => return Err(e.into()),
1498 }
1499 buf.clear();
1500 }
1501
1502 Ok(Signatures { signatures })
1503 }
1504
1505 fn parse_signature<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Signature> {
1507 let mut when = None;
1508 let mut role = None;
1509 let mut reference = None;
1510
1511 for attr in start.attributes() {
1512 let attr = attr?;
1513 let key = attr.key.as_ref();
1514 let value = String::from_utf8_lossy(&attr.value).to_string();
1515
1516 match key {
1517 b"when" => when = Some(value),
1518 b"role" => role = Some(value),
1519 b"ref" => reference = Some(value),
1520 _ => {}
1521 }
1522 }
1523
1524 let when = when.ok_or_else(|| {
1525 CmlError::MissingAttribute("when required on <signature>".to_string())
1526 })?;
1527
1528 let content = Self::read_text(reader, "signature")?;
1529
1530 Ok(Signature {
1531 when,
1532 role,
1533 reference,
1534 content,
1535 })
1536 }
1537
1538 fn parse_citations<R: BufRead>(reader: &mut Reader<R>) -> Result<Citations> {
1540 let mut citations = Vec::new();
1541 let mut buf = Vec::new();
1542
1543 loop {
1544 match reader.read_event_into(&mut buf) {
1545 Ok(Event::Start(e)) if e.name().as_ref() == b"citation" => {
1546 citations.push(Self::parse_citation(reader, e)?);
1547 }
1548 Ok(Event::End(e)) if e.name().as_ref() == b"citations" => {
1549 break;
1550 }
1551 Ok(Event::Eof) => {
1552 return Err(CmlError::InvalidStructure(
1553 "Unexpected EOF in <citations>".to_string(),
1554 ))
1555 }
1556 Ok(_) => {}
1557 Err(e) => return Err(e.into()),
1558 }
1559 buf.clear();
1560 }
1561
1562 Ok(Citations { citations })
1563 }
1564
1565 fn parse_citation<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Citation> {
1567 let mut reference = None;
1568 let mut citation_type = None;
1569
1570 for attr in start.attributes() {
1571 let attr = attr?;
1572 let key = attr.key.as_ref();
1573 let value = String::from_utf8_lossy(&attr.value).to_string();
1574
1575 match key {
1576 b"ref" => reference = Some(value),
1577 b"type" => citation_type = Some(value),
1578 _ => {}
1579 }
1580 }
1581
1582 let reference = reference
1583 .ok_or_else(|| CmlError::MissingAttribute("ref required on <citation>".to_string()))?;
1584
1585 let content = Self::parse_inline_content(reader, b"citation")?;
1586
1587 Ok(Citation {
1588 reference,
1589 citation_type,
1590 content,
1591 })
1592 }
1593
1594 fn parse_annotations<R: BufRead>(reader: &mut Reader<R>) -> Result<Annotations> {
1596 let mut notes = Vec::new();
1597 let mut buf = Vec::new();
1598
1599 loop {
1600 match reader.read_event_into(&mut buf) {
1601 Ok(Event::Start(e)) if e.name().as_ref() == b"note" => {
1602 notes.push(Self::parse_note(reader, e)?);
1603 }
1604 Ok(Event::End(e)) if e.name().as_ref() == b"annotations" => {
1605 break;
1606 }
1607 Ok(Event::Eof) => {
1608 return Err(CmlError::InvalidStructure(
1609 "Unexpected EOF in <annotations>".to_string(),
1610 ))
1611 }
1612 Ok(_) => {}
1613 Err(e) => return Err(e.into()),
1614 }
1615 buf.clear();
1616 }
1617
1618 Ok(Annotations { notes })
1619 }
1620
1621 fn parse_note<R: BufRead>(reader: &mut Reader<R>, start: BytesStart) -> Result<Note> {
1623 let mut id = None;
1624 let mut note_type = None;
1625 let mut reference = None;
1626
1627 for attr in start.attributes() {
1628 let attr = attr?;
1629 let key = attr.key.as_ref();
1630 let value = String::from_utf8_lossy(&attr.value).to_string();
1631
1632 match key {
1633 b"id" => id = Some(value),
1634 b"type" => note_type = Some(value),
1635 b"ref" => reference = Some(value),
1636 _ => {}
1637 }
1638 }
1639
1640 let content = Self::parse_note_content(reader)?;
1642
1643 Ok(Note {
1644 id,
1645 note_type,
1646 reference,
1647 content,
1648 })
1649 }
1650
1651 fn parse_note_content<R: BufRead>(reader: &mut Reader<R>) -> Result<NoteContent> {
1653 let mut blocks = Vec::new();
1655 let mut inlines = Vec::new();
1656 let mut has_blocks = false;
1657 let mut buf = Vec::new();
1658
1659 loop {
1660 match reader.read_event_into(&mut buf) {
1661 Ok(Event::Start(e)) => {
1662 let name = e.name();
1663 if Self::is_block_element(name.as_ref()) {
1664 has_blocks = true;
1665 if let Some(block) = Self::parse_block_element(reader, e)? {
1666 blocks.push(block);
1667 }
1668 } else if let Some(inline) = Self::parse_inline_element(reader, e)? {
1669 inlines.push(inline);
1670 }
1671 }
1672 Ok(Event::Text(e)) => {
1673 let text = e.unescape().unwrap().to_string().trim().to_string();
1674 if !text.is_empty() {
1675 inlines.push(InlineElement::Text(text));
1676 }
1677 }
1678 Ok(Event::End(e)) if e.name().as_ref() == b"note" => {
1679 break;
1680 }
1681 Ok(Event::Eof) => {
1682 return Err(CmlError::InvalidStructure(
1683 "Unexpected EOF in <note>".to_string(),
1684 ))
1685 }
1686 Ok(_) => {}
1687 Err(e) => return Err(e.into()),
1688 }
1689 buf.clear();
1690 }
1691
1692 if has_blocks {
1693 Ok(NoteContent::Block(blocks))
1694 } else {
1695 Ok(NoteContent::Inline(inlines))
1696 }
1697 }
1698
1699 fn read_text<R: BufRead>(reader: &mut Reader<R>, tag_name: &str) -> Result<String> {
1701 let mut buf = Vec::new();
1702 let mut text = String::new();
1703
1704 loop {
1705 match reader.read_event_into(&mut buf) {
1706 Ok(Event::Text(e)) => {
1707 text.push_str(&e.unescape().unwrap());
1708 }
1709 Ok(Event::End(e)) if e.name().as_ref() == tag_name.as_bytes() => {
1710 break;
1711 }
1712 Ok(Event::Eof) => {
1713 return Err(CmlError::InvalidStructure(format!(
1714 "Unexpected EOF in <{}>",
1715 tag_name
1716 )))
1717 }
1718 Ok(_) => {}
1719 Err(e) => return Err(e.into()),
1720 }
1721 buf.clear();
1722 }
1723
1724 Ok(text)
1725 }
1726}
1727
1728#[cfg(test)]
1729mod tests {
1730 use super::*;
1731
1732 #[test]
1733 fn test_parse_minimal_document() {
1734 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1735<cml profile="core" version="0.2" encoding="utf-8">
1736 <header>
1737 <title>Test Document</title>
1738 </header>
1739 <body>
1740 <paragraph>Hello, world!</paragraph>
1741 </body>
1742 <footer>
1743 </footer>
1744</cml>"#;
1745
1746 let doc = CmlParser::parse_str(xml).unwrap();
1747
1748 assert_eq!(doc.version, "0.2");
1749 assert_eq!(doc.profile, "core");
1750 assert_eq!(doc.header.title, "Test Document");
1751 assert_eq!(doc.body.blocks.len(), 1);
1752 }
1753
1754 #[test]
1755 fn test_parse_header_with_metadata() {
1756 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1757<cml profile="core" version="0.2" encoding="utf-8">
1758 <header>
1759 <title>Test</title>
1760 <author role="editor">John Doe</author>
1761 <date type="created" when="2025-12-22"/>
1762 <identifier scheme="doi">10.1234/test</identifier>
1763 <meta name="status" value="draft"/>
1764 </header>
1765 <body>
1766 <paragraph>Content</paragraph>
1767 </body>
1768 <footer>
1769 </footer>
1770</cml>"#;
1771
1772 let doc = CmlParser::parse_str(xml).unwrap();
1773
1774 assert_eq!(doc.header.authors.len(), 1);
1775 assert_eq!(doc.header.authors[0].name, "John Doe");
1776 assert_eq!(doc.header.dates.len(), 1);
1777 assert_eq!(doc.header.identifiers.len(), 1);
1778 assert_eq!(doc.header.meta.len(), 1);
1779 }
1780
1781 #[test]
1782 fn test_parse_inline_elements() {
1783 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1784<cml profile="core" version="0.2" encoding="utf-8">
1785 <header>
1786 <title>Test</title>
1787 </header>
1788 <body>
1789 <paragraph>This is <em>emphasized</em> and <bo>bold</bo> text.</paragraph>
1790 </body>
1791 <footer>
1792 </footer>
1793</cml>"#;
1794
1795 let doc = CmlParser::parse_str(xml).unwrap();
1796
1797 if let BlockElement::Paragraph(para) = &doc.body.blocks[0] {
1798 assert_eq!(para.content.len(), 5); } else {
1800 panic!("Expected paragraph");
1801 }
1802 }
1803
1804 #[test]
1805 fn test_parse_list() {
1806 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1807<cml profile="core" version="0.2" encoding="utf-8">
1808 <header>
1809 <title>Test</title>
1810 </header>
1811 <body>
1812 <list type="ordered" style="numeric">
1813 <item>First</item>
1814 <item>Second</item>
1815 </list>
1816 </body>
1817 <footer>
1818 </footer>
1819</cml>"#;
1820
1821 let doc = CmlParser::parse_str(xml).unwrap();
1822
1823 if let BlockElement::List(list) = &doc.body.blocks[0] {
1824 assert_eq!(list.items.len(), 2);
1825 assert!(matches!(list.list_type, Some(ListType::Ordered)));
1826 } else {
1827 panic!("Expected list");
1828 }
1829 }
1830
1831 #[test]
1832 fn test_parse_footer_with_signature() {
1833 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1834<cml profile="core" version="0.2" encoding="utf-8">
1835 <header>
1836 <title>Test</title>
1837 </header>
1838 <body>
1839 <paragraph>Content</paragraph>
1840 </body>
1841 <footer>
1842 <signatures>
1843 <signature when="2025-12-22T10:30:00Z" role="author">Jane Doe</signature>
1844 </signatures>
1845 </footer>
1846</cml>"#;
1847
1848 let doc = CmlParser::parse_str(xml).unwrap();
1849
1850 assert!(doc.footer.signatures.is_some());
1851 let sigs = doc.footer.signatures.unwrap();
1852 assert_eq!(sigs.signatures.len(), 1);
1853 assert_eq!(sigs.signatures[0].content, "Jane Doe");
1854 }
1855}