1use crate::types::*;
10use crate::{CmlError, Result};
11use std::collections::HashSet;
12
13pub struct CmlValidator {
15 ids: HashSet<String>,
17 references: Vec<String>,
19}
20
21impl CmlValidator {
22 pub fn new() -> Self {
24 Self {
25 ids: HashSet::new(),
26 references: Vec::new(),
27 }
28 }
29
30 pub fn validate(document: &CmlDocument) -> Result<()> {
32 let mut validator = Self::new();
33 validator.validate_document(document)?;
34 Ok(())
35 }
36
37 fn validate_document(&mut self, doc: &CmlDocument) -> Result<()> {
39 if doc.version != "0.2" {
41 return Err(CmlError::ValidationError(format!(
42 "Invalid version '{}', expected '0.2'",
43 doc.version
44 )));
45 }
46
47 if doc.encoding != "utf-8" {
49 return Err(CmlError::ValidationError(format!(
50 "Invalid encoding '{}', expected 'utf-8'",
51 doc.encoding
52 )));
53 }
54
55 if doc.profile.is_empty() {
57 return Err(CmlError::ValidationError(
58 "Profile cannot be empty".to_string(),
59 ));
60 }
61
62 if let Some(id) = &doc.id {
64 self.track_id(id)?;
65 }
66
67 self.validate_header(&doc.header)?;
69
70 self.validate_body(&doc.body)?;
72
73 self.validate_footer(&doc.footer)?;
75
76 self.validate_references()?;
78
79 Ok(())
80 }
81
82 fn validate_header(&mut self, header: &Header) -> Result<()> {
84 if header.title.trim().is_empty() {
86 return Err(CmlError::ValidationError(
87 "Header title cannot be empty".to_string(),
88 ));
89 }
90
91 Ok(())
92 }
93
94 fn validate_body(&mut self, body: &Body) -> Result<()> {
96 if body.blocks.is_empty() {
97 return Err(CmlError::ValidationError(
98 "Body cannot be empty".to_string(),
99 ));
100 }
101
102 for block in &body.blocks {
103 self.validate_block_element(block)?;
104 }
105
106 Ok(())
107 }
108
109 fn validate_block_element(&mut self, block: &BlockElement) -> Result<()> {
111 match block {
112 BlockElement::Section(section) => self.validate_section(section),
113 BlockElement::Paragraph(para) => self.validate_paragraph(para),
114 BlockElement::Heading(heading) => self.validate_heading(heading),
115 BlockElement::Aside(aside) => self.validate_aside(aside),
116 BlockElement::Quote(quote) => self.validate_quote(quote),
117 BlockElement::List(list) => self.validate_list(list),
118 BlockElement::Table(table) => self.validate_table(table),
119 BlockElement::Code(_code) => Ok(()),
120 BlockElement::Break(_br) => Ok(()),
121 BlockElement::Figure(_) => {
122 Err(CmlError::ValidationError(
123 "Figure element is reserved for v0.3".to_string(),
124 ))
125 }
126 }
127 }
128
129 fn validate_section(&mut self, section: &Section) -> Result<()> {
131 if let Some(id) = §ion.id {
133 self.track_id(id)?;
134 }
135
136 for block in §ion.content {
138 self.validate_block_element(block)?;
139 }
140
141 Ok(())
142 }
143
144 fn validate_paragraph(&mut self, para: &Paragraph) -> Result<()> {
146 if let Some(id) = ¶.id {
148 self.track_id(id)?;
149 }
150
151 if para.content.is_empty() {
153 return Err(CmlError::ValidationError(
154 "Paragraph cannot be empty".to_string(),
155 ));
156 }
157
158 for inline in ¶.content {
160 self.validate_inline_element(inline)?;
161 }
162
163 Ok(())
164 }
165
166 fn validate_heading(&mut self, heading: &Heading) -> Result<()> {
168 if let Some(id) = &heading.id {
170 self.track_id(id)?;
171 }
172
173 if heading.size < 1 || heading.size > 6 {
175 return Err(CmlError::ValidationError(format!(
176 "Invalid heading size '{}', must be between 1 and 6",
177 heading.size
178 )));
179 }
180
181 if heading.content.is_empty() {
183 return Err(CmlError::ValidationError(
184 "Heading cannot be empty".to_string(),
185 ));
186 }
187
188 for inline in &heading.content {
190 self.validate_inline_element(inline)?;
191 }
192
193 Ok(())
194 }
195
196 fn validate_aside(&mut self, aside: &Aside) -> Result<()> {
198 for block in &aside.content {
200 self.validate_block_element(block)?;
201 }
202
203 Ok(())
204 }
205
206 fn validate_quote(&mut self, quote: &Quote) -> Result<()> {
208 if let Some(ref r) = quote.reference {
210 self.references.push(r.clone());
211 }
212
213 for block in "e.content {
215 self.validate_block_element(block)?;
216 }
217
218 Ok(())
219 }
220
221 fn validate_list(&mut self, list: &List) -> Result<()> {
223 if let Some(id) = &list.id {
225 self.track_id(id)?;
226 }
227
228 if list.items.is_empty() {
230 return Err(CmlError::ValidationError(
231 "List cannot be empty".to_string(),
232 ));
233 }
234
235 for item in &list.items {
237 self.validate_list_item(item)?;
238 }
239
240 Ok(())
241 }
242
243 fn validate_list_item(&mut self, item: &ListItem) -> Result<()> {
245 if let Some(id) = &item.id {
247 self.track_id(id)?;
248 }
249
250 match &item.content {
252 ListItemContent::Inline(inlines) => {
253 if inlines.is_empty() {
254 return Err(CmlError::ValidationError(
255 "List item inline content cannot be empty".to_string(),
256 ));
257 }
258 for inline in inlines {
259 self.validate_inline_element(inline)?;
260 }
261 }
262 ListItemContent::Block(blocks) => {
263 if blocks.is_empty() {
264 return Err(CmlError::ValidationError(
265 "List item block content cannot be empty".to_string(),
266 ));
267 }
268 for block in blocks {
269 self.validate_block_element(block)?;
270 }
271 }
272 }
273
274 Ok(())
275 }
276
277 fn validate_table(&mut self, table: &Table) -> Result<()> {
279 if let Some(id) = &table.id {
281 self.track_id(id)?;
282 }
283
284 if table.body.rows.is_empty() {
286 return Err(CmlError::ValidationError(
287 "Table body cannot be empty".to_string(),
288 ));
289 }
290
291 if let Some(ref header) = table.header {
293 if header.rows.is_empty() {
294 return Err(CmlError::ValidationError(
295 "Table header cannot be empty".to_string(),
296 ));
297 }
298 for row in &header.rows {
299 self.validate_table_row(row)?;
300 }
301 }
302
303 for row in &table.body.rows {
305 self.validate_table_row(row)?;
306 }
307
308 if let Some(ref footer) = table.footer {
310 for inline in &footer.caption.content {
311 self.validate_inline_element(inline)?;
312 }
313 }
314
315 Ok(())
316 }
317
318 fn validate_table_row(&mut self, row: &TableRow) -> Result<()> {
320 if row.columns.is_empty() {
321 return Err(CmlError::ValidationError(
322 "Table row cannot be empty".to_string(),
323 ));
324 }
325
326 for column in &row.columns {
327 self.validate_table_column(column)?;
328 }
329
330 Ok(())
331 }
332
333 fn validate_table_column(&mut self, column: &TableColumn) -> Result<()> {
335 for inline in &column.cell.content {
337 self.validate_inline_element(inline)?;
338 }
339
340 Ok(())
341 }
342
343 fn validate_inline_element(&mut self, inline: &InlineElement) -> Result<()> {
345 match inline {
346 InlineElement::Text(_) => Ok(()),
347 InlineElement::Em(em) => {
348 for inner in &em.content {
349 self.validate_inline_element(inner)?;
350 }
351 Ok(())
352 }
353 InlineElement::Bo(bo) => {
354 for inner in &bo.content {
355 self.validate_inline_element(inner)?;
356 }
357 Ok(())
358 }
359 InlineElement::Un(un) => {
360 for inner in &un.content {
361 self.validate_inline_element(inner)?;
362 }
363 Ok(())
364 }
365 InlineElement::St(st) => {
366 for inner in &st.content {
367 self.validate_inline_element(inner)?;
368 }
369 Ok(())
370 }
371 InlineElement::Snip(snip) => {
372 if snip.content.trim().is_empty() {
373 return Err(CmlError::ValidationError(
374 "Snippet content cannot be empty".to_string(),
375 ));
376 }
377 Ok(())
378 }
379 InlineElement::Key(key) => {
380 if key.content.trim().is_empty() {
381 return Err(CmlError::ValidationError(
382 "Keyboard content cannot be empty".to_string(),
383 ));
384 }
385 Ok(())
386 }
387 InlineElement::Rf(rf) => {
388 if rf.reference.trim().is_empty() {
389 return Err(CmlError::ValidationError(
390 "Reference 'reference' attribute cannot be empty".to_string(),
391 ));
392 }
393 self.references.push(rf.reference.clone());
395 Ok(())
396 }
397 InlineElement::Tg(tg) => {
398 if tg.reference.trim().is_empty() {
399 return Err(CmlError::ValidationError(
400 "Topic tag 'reference' attribute cannot be empty".to_string(),
401 ));
402 }
403 Ok(())
404 }
405 InlineElement::Lk(lk) => {
406 if lk.reference.trim().is_empty() {
407 return Err(CmlError::ValidationError(
408 "Link 'reference' attribute cannot be empty".to_string(),
409 ));
410 }
411 Ok(())
412 }
413 InlineElement::Curr(curr) => {
414 if curr.value.trim().is_empty() {
415 return Err(CmlError::ValidationError(
416 "Currency value cannot be empty".to_string(),
417 ));
418 }
419 Ok(())
420 }
421 InlineElement::End(_) => Ok(()),
422 }
423 }
424
425 fn validate_footer(&mut self, footer: &Footer) -> Result<()> {
427 if let Some(ref signatures) = footer.signatures {
429 self.validate_signatures(signatures)?;
430 }
431
432 if let Some(ref citations) = footer.citations {
434 self.validate_citations(citations)?;
435 }
436
437 if let Some(ref annotations) = footer.annotations {
439 self.validate_annotations(annotations)?;
440 }
441
442 Ok(())
443 }
444
445 fn validate_signatures(&mut self, signatures: &Signatures) -> Result<()> {
447 if signatures.signatures.is_empty() {
448 return Err(CmlError::ValidationError(
449 "Signatures section cannot be empty".to_string(),
450 ));
451 }
452
453 for signature in &signatures.signatures {
454 self.validate_signature(signature)?;
455 }
456
457 Ok(())
458 }
459
460 fn validate_signature(&mut self, signature: &Signature) -> Result<()> {
462 if signature.when.is_empty() {
464 return Err(CmlError::ValidationError(
465 "Signature 'when' attribute cannot be empty".to_string(),
466 ));
467 }
468
469 if let Some(ref r) = signature.reference {
471 self.references.push(r.clone());
472 }
473
474 if signature.content.trim().is_empty() {
476 return Err(CmlError::ValidationError(
477 "Signature content cannot be empty".to_string(),
478 ));
479 }
480
481 Ok(())
482 }
483
484 fn validate_citations(&mut self, citations: &Citations) -> Result<()> {
486 if citations.citations.is_empty() {
487 return Err(CmlError::ValidationError(
488 "Citations section cannot be empty".to_string(),
489 ));
490 }
491
492 for citation in &citations.citations {
493 self.validate_citation(citation)?;
494 }
495
496 Ok(())
497 }
498
499 fn validate_citation(&mut self, citation: &Citation) -> Result<()> {
501 if !citation.reference.is_empty() {
503 self.track_id(&citation.reference)?;
504 }
505
506 for inline in &citation.content {
508 self.validate_inline_element(inline)?;
509 }
510
511 Ok(())
512 }
513
514 fn validate_annotations(&mut self, annotations: &Annotations) -> Result<()> {
516 if annotations.notes.is_empty() {
517 return Err(CmlError::ValidationError(
518 "Annotations section cannot be empty".to_string(),
519 ));
520 }
521
522 for note in &annotations.notes {
523 self.validate_note(note)?;
524 }
525
526 Ok(())
527 }
528
529 fn validate_note(&mut self, note: &Note) -> Result<()> {
531 if let Some(id) = ¬e.id {
533 self.track_id(id)?;
534 }
535
536 if let Some(ref r) = note.reference {
538 self.references.push(r.clone());
539 }
540
541 match ¬e.content {
543 NoteContent::Inline(inlines) => {
544 for inline in inlines {
545 self.validate_inline_element(inline)?;
546 }
547 }
548 NoteContent::Block(blocks) => {
549 for block in blocks {
550 self.validate_block_element(block)?;
551 }
552 }
553 }
554
555 Ok(())
556 }
557
558 fn track_id(&mut self, id: &str) -> Result<()> {
560 if id.trim().is_empty() {
561 return Err(CmlError::ValidationError(
562 "ID cannot be empty".to_string(),
563 ));
564 }
565
566 if !self.ids.insert(id.to_string()) {
567 return Err(CmlError::DuplicateId(id.to_string()));
568 }
569
570 Ok(())
571 }
572
573 fn validate_references(&self) -> Result<()> {
575 for reference in &self.references {
576 if reference.starts_with("http://") || reference.starts_with("https://") {
583 continue;
584 }
585
586 if reference.contains(':') {
588 let parts: Vec<&str> = reference.split(':').collect();
590 if parts.len() != 2 {
591 return Err(CmlError::ValidationError(format!(
592 "Invalid pathless reference format '{}', should be 'namespace:identifier'",
593 reference
594 )));
595 }
596 continue;
597 }
598
599 if !self.ids.contains(reference) {
601 return Err(CmlError::ReferenceNotFound(reference.clone()));
602 }
603 }
604
605 Ok(())
606 }
607}
608
609impl Default for CmlValidator {
610 fn default() -> Self {
611 Self::new()
612 }
613}
614
615#[cfg(test)]
616mod tests {
617 use super::*;
618
619 #[test]
620 fn test_validate_minimal_document() {
621 let doc = CmlDocument {
622 version: "0.2".to_string(),
623 encoding: "utf-8".to_string(),
624 profile: "core".to_string(),
625 id: None,
626 header: Header {
627 title: "Test Document".to_string(),
628 authors: vec![],
629 dates: vec![],
630 identifiers: vec![],
631 version: None,
632 description: None,
633 provenance: None,
634 source: None,
635 meta: vec![],
636 },
637 body: Body {
638 blocks: vec![BlockElement::Paragraph(Paragraph {
639 id: None,
640 paragraph_type: None,
641 content: vec![InlineElement::Text("Hello, world!".to_string())],
642 })],
643 },
644 footer: Footer {
645 signatures: None,
646 citations: None,
647 annotations: None,
648 },
649 };
650
651 assert!(CmlValidator::validate(&doc).is_ok());
652 }
653
654 #[test]
655 fn test_validate_invalid_version() {
656 let doc = CmlDocument {
657 version: "1.0".to_string(),
658 encoding: "utf-8".to_string(),
659 profile: "core".to_string(),
660 id: None,
661 header: Header {
662 title: "Test".to_string(),
663 authors: vec![],
664 dates: vec![],
665 identifiers: vec![],
666 version: None,
667 description: None,
668 provenance: None,
669 source: None,
670 meta: vec![],
671 },
672 body: Body {
673 blocks: vec![BlockElement::Paragraph(Paragraph {
674 id: None,
675 paragraph_type: None,
676 content: vec![InlineElement::Text("Test".to_string())],
677 })],
678 },
679 footer: Footer {
680 signatures: None,
681 citations: None,
682 annotations: None,
683 },
684 };
685
686 assert!(CmlValidator::validate(&doc).is_err());
687 }
688
689 #[test]
690 fn test_validate_empty_title() {
691 let doc = CmlDocument {
692 version: "0.2".to_string(),
693 encoding: "utf-8".to_string(),
694 profile: "core".to_string(),
695 id: None,
696 header: Header {
697 title: "".to_string(),
698 authors: vec![],
699 dates: vec![],
700 identifiers: vec![],
701 version: None,
702 description: None,
703 provenance: None,
704 source: None,
705 meta: vec![],
706 },
707 body: Body {
708 blocks: vec![BlockElement::Paragraph(Paragraph {
709 id: None,
710 paragraph_type: None,
711 content: vec![InlineElement::Text("Test".to_string())],
712 })],
713 },
714 footer: Footer {
715 signatures: None,
716 citations: None,
717 annotations: None,
718 },
719 };
720
721 assert!(CmlValidator::validate(&doc).is_err());
722 }
723
724 #[test]
725 fn test_validate_duplicate_ids() {
726 let doc = CmlDocument {
727 version: "0.2".to_string(),
728 encoding: "utf-8".to_string(),
729 profile: "core".to_string(),
730 id: None,
731 header: Header {
732 title: "Test".to_string(),
733 authors: vec![],
734 dates: vec![],
735 identifiers: vec![],
736 version: None,
737 description: None,
738 provenance: None,
739 source: None,
740 meta: vec![],
741 },
742 body: Body {
743 blocks: vec![
744 BlockElement::Paragraph(Paragraph {
745 id: Some("para-1".to_string()),
746 paragraph_type: None,
747 content: vec![InlineElement::Text("First".to_string())],
748 }),
749 BlockElement::Paragraph(Paragraph {
750 id: Some("para-1".to_string()),
751 paragraph_type: None,
752 content: vec![InlineElement::Text("Second".to_string())],
753 }),
754 ],
755 },
756 footer: Footer {
757 signatures: None,
758 citations: None,
759 annotations: None,
760 },
761 };
762
763 let result = CmlValidator::validate(&doc);
764 assert!(result.is_err());
765 assert!(matches!(result.unwrap_err(), CmlError::DuplicateId(_)));
766 }
767
768 #[test]
769 fn test_validate_invalid_reference() {
770 let doc = CmlDocument {
771 version: "0.2".to_string(),
772 encoding: "utf-8".to_string(),
773 profile: "core".to_string(),
774 id: None,
775 header: Header {
776 title: "Test".to_string(),
777 authors: vec![],
778 dates: vec![],
779 identifiers: vec![],
780 version: None,
781 description: None,
782 provenance: None,
783 source: None,
784 meta: vec![],
785 },
786 body: Body {
787 blocks: vec![BlockElement::Paragraph(Paragraph {
788 id: None,
789 paragraph_type: None,
790 content: vec![InlineElement::Rf(Rf {
791 reference: "nonexistent-id".to_string(),
792 role: None,
793 title: None,
794 content: "Link".to_string(),
795 })],
796 })],
797 },
798 footer: Footer {
799 signatures: None,
800 citations: None,
801 annotations: None,
802 },
803 };
804
805 let result = CmlValidator::validate(&doc);
806 assert!(result.is_err());
807 assert!(matches!(result.unwrap_err(), CmlError::ReferenceNotFound(_)));
808 }
809
810 #[test]
811 fn test_validate_pathless_reference() {
812 let doc = CmlDocument {
813 version: "0.2".to_string(),
814 encoding: "utf-8".to_string(),
815 profile: "core".to_string(),
816 id: None,
817 header: Header {
818 title: "Test".to_string(),
819 authors: vec![],
820 dates: vec![],
821 identifiers: vec![],
822 version: None,
823 description: None,
824 provenance: None,
825 source: None,
826 meta: vec![],
827 },
828 body: Body {
829 blocks: vec![BlockElement::Paragraph(Paragraph {
830 id: None,
831 paragraph_type: None,
832 content: vec![InlineElement::Rf(Rf {
833 reference: "president:47".to_string(),
834 role: Some("person".to_string()),
835 title: None,
836 content: "President 47".to_string(),
837 })],
838 })],
839 },
840 footer: Footer {
841 signatures: None,
842 citations: None,
843 annotations: None,
844 },
845 };
846
847 assert!(CmlValidator::validate(&doc).is_ok());
848 }
849
850 #[test]
851 fn test_validate_invalid_heading_size() {
852 let doc = CmlDocument {
853 version: "0.2".to_string(),
854 encoding: "utf-8".to_string(),
855 profile: "core".to_string(),
856 id: None,
857 header: Header {
858 title: "Test".to_string(),
859 authors: vec![],
860 dates: vec![],
861 identifiers: vec![],
862 version: None,
863 description: None,
864 provenance: None,
865 source: None,
866 meta: vec![],
867 },
868 body: Body {
869 blocks: vec![BlockElement::Heading(Heading {
870 id: None,
871 heading_type: None,
872 size: 7,
873 content: vec![InlineElement::Text("Invalid".to_string())],
874 })],
875 },
876 footer: Footer {
877 signatures: None,
878 citations: None,
879 annotations: None,
880 },
881 };
882
883 assert!(CmlValidator::validate(&doc).is_err());
884 }
885
886 #[test]
887 fn test_validate_empty_body() {
888 let doc = CmlDocument {
889 version: "0.2".to_string(),
890 encoding: "utf-8".to_string(),
891 profile: "core".to_string(),
892 id: None,
893 header: Header {
894 title: "Test".to_string(),
895 authors: vec![],
896 dates: vec![],
897 identifiers: vec![],
898 version: None,
899 description: None,
900 provenance: None,
901 source: None,
902 meta: vec![],
903 },
904 body: Body {
905 blocks: vec![],
906 },
907 footer: Footer {
908 signatures: None,
909 citations: None,
910 annotations: None,
911 },
912 };
913
914 assert!(CmlValidator::validate(&doc).is_err());
915 }
916}