cml_rs/
validator.rs

1//! CML v0.2 Document Validator
2//!
3//! Validates CML documents for:
4//! - Structural correctness (header, body, footer)
5//! - ID uniqueness
6//! - Reference integrity
7//! - Required fields
8
9use crate::types::*;
10use crate::{CmlError, Result};
11use std::collections::HashSet;
12
13/// Validates a CML v0.2 document
14pub struct CmlValidator {
15    /// All IDs encountered during validation
16    ids: HashSet<String>,
17    /// References that need to be resolved
18    references: Vec<String>,
19}
20
21impl CmlValidator {
22    /// Create a new validator
23    pub fn new() -> Self {
24        Self {
25            ids: HashSet::new(),
26            references: Vec::new(),
27        }
28    }
29
30    /// Validate a complete CML document
31    pub fn validate(document: &CmlDocument) -> Result<()> {
32        let mut validator = Self::new();
33        validator.validate_document(document)?;
34        Ok(())
35    }
36
37    /// Validate the document structure
38    fn validate_document(&mut self, doc: &CmlDocument) -> Result<()> {
39        // Validate version
40        if doc.version != "0.2" {
41            return Err(CmlError::ValidationError(format!(
42                "Invalid version '{}', expected '0.2'",
43                doc.version
44            )));
45        }
46
47        // Validate encoding
48        if doc.encoding != "utf-8" {
49            return Err(CmlError::ValidationError(format!(
50                "Invalid encoding '{}', expected 'utf-8'",
51                doc.encoding
52            )));
53        }
54
55        // Validate profile format
56        if doc.profile.is_empty() {
57            return Err(CmlError::ValidationError(
58                "Profile cannot be empty".to_string(),
59            ));
60        }
61
62        // Track document ID if present
63        if let Some(id) = &doc.id {
64            self.track_id(id)?;
65        }
66
67        // Validate header (required)
68        self.validate_header(&doc.header)?;
69
70        // Validate body (required)
71        self.validate_body(&doc.body)?;
72
73        // Validate footer (required)
74        self.validate_footer(&doc.footer)?;
75
76        // Check all references are valid
77        self.validate_references()?;
78
79        Ok(())
80    }
81
82    /// Validate header structure
83    fn validate_header(&mut self, header: &Header) -> Result<()> {
84        // Title is required and cannot be empty
85        if header.title.trim().is_empty() {
86            return Err(CmlError::ValidationError(
87                "Header title cannot be empty".to_string(),
88            ));
89        }
90
91        Ok(())
92    }
93
94    /// Validate body structure
95    fn validate_body(&mut self, body: &Body) -> Result<()> {
96        if body.blocks.is_empty() {
97            return Err(CmlError::ValidationError(
98                "Body cannot be empty".to_string(),
99            ));
100        }
101
102        for block in &body.blocks {
103            self.validate_block_element(block)?;
104        }
105
106        Ok(())
107    }
108
109    /// Validate block element
110    fn validate_block_element(&mut self, block: &BlockElement) -> Result<()> {
111        match block {
112            BlockElement::Section(section) => self.validate_section(section),
113            BlockElement::Paragraph(para) => self.validate_paragraph(para),
114            BlockElement::Heading(heading) => self.validate_heading(heading),
115            BlockElement::Aside(aside) => self.validate_aside(aside),
116            BlockElement::Quote(quote) => self.validate_quote(quote),
117            BlockElement::List(list) => self.validate_list(list),
118            BlockElement::Table(table) => self.validate_table(table),
119            BlockElement::Code(_code) => Ok(()),
120            BlockElement::Break(_br) => Ok(()),
121            BlockElement::Figure(_) => {
122                Err(CmlError::ValidationError(
123                    "Figure element is reserved for v0.3".to_string(),
124                ))
125            }
126        }
127    }
128
129    /// Validate section
130    fn validate_section(&mut self, section: &Section) -> Result<()> {
131        // Track ID if present
132        if let Some(id) = &section.id {
133            self.track_id(id)?;
134        }
135
136        // Validate nested blocks
137        for block in &section.content {
138            self.validate_block_element(block)?;
139        }
140
141        Ok(())
142    }
143
144    /// Validate paragraph
145    fn validate_paragraph(&mut self, para: &Paragraph) -> Result<()> {
146        // Track ID if present
147        if let Some(id) = &para.id {
148            self.track_id(id)?;
149        }
150
151        // Paragraph must have content
152        if para.content.is_empty() {
153            return Err(CmlError::ValidationError(
154                "Paragraph cannot be empty".to_string(),
155            ));
156        }
157
158        // Validate inline content
159        for inline in &para.content {
160            self.validate_inline_element(inline)?;
161        }
162
163        Ok(())
164    }
165
166    /// Validate heading
167    fn validate_heading(&mut self, heading: &Heading) -> Result<()> {
168        // Track ID if present
169        if let Some(id) = &heading.id {
170            self.track_id(id)?;
171        }
172
173        // Validate size (1-6)
174        if heading.size < 1 || heading.size > 6 {
175            return Err(CmlError::ValidationError(format!(
176                "Invalid heading size '{}', must be between 1 and 6",
177                heading.size
178            )));
179        }
180
181        // Heading must have content
182        if heading.content.is_empty() {
183            return Err(CmlError::ValidationError(
184                "Heading cannot be empty".to_string(),
185            ));
186        }
187
188        // Validate inline content
189        for inline in &heading.content {
190            self.validate_inline_element(inline)?;
191        }
192
193        Ok(())
194    }
195
196    /// Validate aside
197    fn validate_aside(&mut self, aside: &Aside) -> Result<()> {
198        // Validate nested blocks
199        for block in &aside.content {
200            self.validate_block_element(block)?;
201        }
202
203        Ok(())
204    }
205
206    /// Validate quote
207    fn validate_quote(&mut self, quote: &Quote) -> Result<()> {
208        // Track ref if present
209        if let Some(ref r) = quote.reference {
210            self.references.push(r.clone());
211        }
212
213        // Validate nested blocks
214        for block in &quote.content {
215            self.validate_block_element(block)?;
216        }
217
218        Ok(())
219    }
220
221    /// Validate list
222    fn validate_list(&mut self, list: &List) -> Result<()> {
223        // Track ID if present
224        if let Some(id) = &list.id {
225            self.track_id(id)?;
226        }
227
228        // List must have items
229        if list.items.is_empty() {
230            return Err(CmlError::ValidationError(
231                "List cannot be empty".to_string(),
232            ));
233        }
234
235        // Validate items
236        for item in &list.items {
237            self.validate_list_item(item)?;
238        }
239
240        Ok(())
241    }
242
243    /// Validate list item
244    fn validate_list_item(&mut self, item: &ListItem) -> Result<()> {
245        // Track ID if present
246        if let Some(id) = &item.id {
247            self.track_id(id)?;
248        }
249
250        // Validate content
251        match &item.content {
252            ListItemContent::Inline(inlines) => {
253                if inlines.is_empty() {
254                    return Err(CmlError::ValidationError(
255                        "List item inline content cannot be empty".to_string(),
256                    ));
257                }
258                for inline in inlines {
259                    self.validate_inline_element(inline)?;
260                }
261            }
262            ListItemContent::Block(blocks) => {
263                if blocks.is_empty() {
264                    return Err(CmlError::ValidationError(
265                        "List item block content cannot be empty".to_string(),
266                    ));
267                }
268                for block in blocks {
269                    self.validate_block_element(block)?;
270                }
271            }
272        }
273
274        Ok(())
275    }
276
277    /// Validate table
278    fn validate_table(&mut self, table: &Table) -> Result<()> {
279        // Track ID if present
280        if let Some(id) = &table.id {
281            self.track_id(id)?;
282        }
283
284        // Table must have body
285        if table.body.rows.is_empty() {
286            return Err(CmlError::ValidationError(
287                "Table body cannot be empty".to_string(),
288            ));
289        }
290
291        // Validate header if present
292        if let Some(ref header) = table.header {
293            if header.rows.is_empty() {
294                return Err(CmlError::ValidationError(
295                    "Table header cannot be empty".to_string(),
296                ));
297            }
298            for row in &header.rows {
299                self.validate_table_row(row)?;
300            }
301        }
302
303        // Validate body
304        for row in &table.body.rows {
305            self.validate_table_row(row)?;
306        }
307
308        // Validate footer if present
309        if let Some(ref footer) = table.footer {
310            for inline in &footer.caption.content {
311                self.validate_inline_element(inline)?;
312            }
313        }
314
315        Ok(())
316    }
317
318    /// Validate table row
319    fn validate_table_row(&mut self, row: &TableRow) -> Result<()> {
320        if row.columns.is_empty() {
321            return Err(CmlError::ValidationError(
322                "Table row cannot be empty".to_string(),
323            ));
324        }
325
326        for column in &row.columns {
327            self.validate_table_column(column)?;
328        }
329
330        Ok(())
331    }
332
333    /// Validate table column
334    fn validate_table_column(&mut self, column: &TableColumn) -> Result<()> {
335        // Validate cell
336        for inline in &column.cell.content {
337            self.validate_inline_element(inline)?;
338        }
339
340        Ok(())
341    }
342
343    /// Validate inline element
344    fn validate_inline_element(&mut self, inline: &InlineElement) -> Result<()> {
345        match inline {
346            InlineElement::Text(_) => Ok(()),
347            InlineElement::Em(em) => {
348                for inner in &em.content {
349                    self.validate_inline_element(inner)?;
350                }
351                Ok(())
352            }
353            InlineElement::Bo(bo) => {
354                for inner in &bo.content {
355                    self.validate_inline_element(inner)?;
356                }
357                Ok(())
358            }
359            InlineElement::Un(un) => {
360                for inner in &un.content {
361                    self.validate_inline_element(inner)?;
362                }
363                Ok(())
364            }
365            InlineElement::St(st) => {
366                for inner in &st.content {
367                    self.validate_inline_element(inner)?;
368                }
369                Ok(())
370            }
371            InlineElement::Snip(snip) => {
372                if snip.content.trim().is_empty() {
373                    return Err(CmlError::ValidationError(
374                        "Snippet content cannot be empty".to_string(),
375                    ));
376                }
377                Ok(())
378            }
379            InlineElement::Key(key) => {
380                if key.content.trim().is_empty() {
381                    return Err(CmlError::ValidationError(
382                        "Keyboard content cannot be empty".to_string(),
383                    ));
384                }
385                Ok(())
386            }
387            InlineElement::Rf(rf) => {
388                if rf.reference.trim().is_empty() {
389                    return Err(CmlError::ValidationError(
390                        "Reference 'reference' attribute cannot be empty".to_string(),
391                    ));
392                }
393                // Track reference for later validation
394                self.references.push(rf.reference.clone());
395                Ok(())
396            }
397            InlineElement::Tg(tg) => {
398                if tg.reference.trim().is_empty() {
399                    return Err(CmlError::ValidationError(
400                        "Topic tag 'reference' attribute cannot be empty".to_string(),
401                    ));
402                }
403                Ok(())
404            }
405            InlineElement::Lk(lk) => {
406                if lk.reference.trim().is_empty() {
407                    return Err(CmlError::ValidationError(
408                        "Link 'reference' attribute cannot be empty".to_string(),
409                    ));
410                }
411                Ok(())
412            }
413            InlineElement::Curr(curr) => {
414                if curr.value.trim().is_empty() {
415                    return Err(CmlError::ValidationError(
416                        "Currency value cannot be empty".to_string(),
417                    ));
418                }
419                Ok(())
420            }
421            InlineElement::End(_) => Ok(()),
422        }
423    }
424
425    /// Validate footer
426    fn validate_footer(&mut self, footer: &Footer) -> Result<()> {
427        // Validate signatures if present
428        if let Some(ref signatures) = footer.signatures {
429            self.validate_signatures(signatures)?;
430        }
431
432        // Validate citations if present
433        if let Some(ref citations) = footer.citations {
434            self.validate_citations(citations)?;
435        }
436
437        // Validate annotations if present
438        if let Some(ref annotations) = footer.annotations {
439            self.validate_annotations(annotations)?;
440        }
441
442        Ok(())
443    }
444
445    /// Validate signatures
446    fn validate_signatures(&mut self, signatures: &Signatures) -> Result<()> {
447        if signatures.signatures.is_empty() {
448            return Err(CmlError::ValidationError(
449                "Signatures section cannot be empty".to_string(),
450            ));
451        }
452
453        for signature in &signatures.signatures {
454            self.validate_signature(signature)?;
455        }
456
457        Ok(())
458    }
459
460    /// Validate signature
461    fn validate_signature(&mut self, signature: &Signature) -> Result<()> {
462        // Validate when (ISO 8601 datetime)
463        if signature.when.is_empty() {
464            return Err(CmlError::ValidationError(
465                "Signature 'when' attribute cannot be empty".to_string(),
466            ));
467        }
468
469        // Track ref if present
470        if let Some(ref r) = signature.reference {
471            self.references.push(r.clone());
472        }
473
474        // Content cannot be empty
475        if signature.content.trim().is_empty() {
476            return Err(CmlError::ValidationError(
477                "Signature content cannot be empty".to_string(),
478            ));
479        }
480
481        Ok(())
482    }
483
484    /// Validate citations
485    fn validate_citations(&mut self, citations: &Citations) -> Result<()> {
486        if citations.citations.is_empty() {
487            return Err(CmlError::ValidationError(
488                "Citations section cannot be empty".to_string(),
489            ));
490        }
491
492        for citation in &citations.citations {
493            self.validate_citation(citation)?;
494        }
495
496        Ok(())
497    }
498
499    /// Validate citation
500    fn validate_citation(&mut self, citation: &Citation) -> Result<()> {
501        // Track ref (citation reference is ID)
502        if !citation.reference.is_empty() {
503            self.track_id(&citation.reference)?;
504        }
505
506        // Validate inline content
507        for inline in &citation.content {
508            self.validate_inline_element(inline)?;
509        }
510
511        Ok(())
512    }
513
514    /// Validate annotations
515    fn validate_annotations(&mut self, annotations: &Annotations) -> Result<()> {
516        if annotations.notes.is_empty() {
517            return Err(CmlError::ValidationError(
518                "Annotations section cannot be empty".to_string(),
519            ));
520        }
521
522        for note in &annotations.notes {
523            self.validate_note(note)?;
524        }
525
526        Ok(())
527    }
528
529    /// Validate note
530    fn validate_note(&mut self, note: &Note) -> Result<()> {
531        // Track ID if present
532        if let Some(id) = &note.id {
533            self.track_id(id)?;
534        }
535
536        // Track ref if present
537        if let Some(ref r) = note.reference {
538            self.references.push(r.clone());
539        }
540
541        // Validate content
542        match &note.content {
543            NoteContent::Inline(inlines) => {
544                for inline in inlines {
545                    self.validate_inline_element(inline)?;
546                }
547            }
548            NoteContent::Block(blocks) => {
549                for block in blocks {
550                    self.validate_block_element(block)?;
551                }
552            }
553        }
554
555        Ok(())
556    }
557
558    /// Track an ID and ensure it's unique
559    fn track_id(&mut self, id: &str) -> Result<()> {
560        if id.trim().is_empty() {
561            return Err(CmlError::ValidationError(
562                "ID cannot be empty".to_string(),
563            ));
564        }
565
566        if !self.ids.insert(id.to_string()) {
567            return Err(CmlError::DuplicateId(id.to_string()));
568        }
569
570        Ok(())
571    }
572
573    /// Validate all references point to valid IDs
574    fn validate_references(&self) -> Result<()> {
575        for reference in &self.references {
576            // References can be:
577            // 1. Internal IDs (must exist in document)
578            // 2. Pathless references (namespace:identifier format)
579            // 3. External URLs
580
581            // Skip URLs
582            if reference.starts_with("http://") || reference.starts_with("https://") {
583                continue;
584            }
585
586            // Pathless references have a colon
587            if reference.contains(':') {
588                // Validate format: should be namespace:identifier
589                let parts: Vec<&str> = reference.split(':').collect();
590                if parts.len() != 2 {
591                    return Err(CmlError::ValidationError(format!(
592                        "Invalid pathless reference format '{}', should be 'namespace:identifier'",
593                        reference
594                    )));
595                }
596                continue;
597            }
598
599            // Internal ID reference - must exist
600            if !self.ids.contains(reference) {
601                return Err(CmlError::ReferenceNotFound(reference.clone()));
602            }
603        }
604
605        Ok(())
606    }
607}
608
609impl Default for CmlValidator {
610    fn default() -> Self {
611        Self::new()
612    }
613}
614
615#[cfg(test)]
616mod tests {
617    use super::*;
618
619    #[test]
620    fn test_validate_minimal_document() {
621        let doc = CmlDocument {
622            version: "0.2".to_string(),
623            encoding: "utf-8".to_string(),
624            profile: "core".to_string(),
625            id: None,
626            header: Header {
627                title: "Test Document".to_string(),
628                authors: vec![],
629                dates: vec![],
630                identifiers: vec![],
631                version: None,
632                description: None,
633                provenance: None,
634                source: None,
635                meta: vec![],
636            },
637            body: Body {
638                blocks: vec![BlockElement::Paragraph(Paragraph {
639                    id: None,
640                    paragraph_type: None,
641                    content: vec![InlineElement::Text("Hello, world!".to_string())],
642                })],
643            },
644            footer: Footer {
645                signatures: None,
646                citations: None,
647                annotations: None,
648            },
649        };
650
651        assert!(CmlValidator::validate(&doc).is_ok());
652    }
653
654    #[test]
655    fn test_validate_invalid_version() {
656        let doc = CmlDocument {
657            version: "1.0".to_string(),
658            encoding: "utf-8".to_string(),
659            profile: "core".to_string(),
660            id: None,
661            header: Header {
662                title: "Test".to_string(),
663                authors: vec![],
664                dates: vec![],
665                identifiers: vec![],
666                version: None,
667                description: None,
668                provenance: None,
669                source: None,
670                meta: vec![],
671            },
672            body: Body {
673                blocks: vec![BlockElement::Paragraph(Paragraph {
674                    id: None,
675                    paragraph_type: None,
676                    content: vec![InlineElement::Text("Test".to_string())],
677                })],
678            },
679            footer: Footer {
680                signatures: None,
681                citations: None,
682                annotations: None,
683            },
684        };
685
686        assert!(CmlValidator::validate(&doc).is_err());
687    }
688
689    #[test]
690    fn test_validate_empty_title() {
691        let doc = CmlDocument {
692            version: "0.2".to_string(),
693            encoding: "utf-8".to_string(),
694            profile: "core".to_string(),
695            id: None,
696            header: Header {
697                title: "".to_string(),
698                authors: vec![],
699                dates: vec![],
700                identifiers: vec![],
701                version: None,
702                description: None,
703                provenance: None,
704                source: None,
705                meta: vec![],
706            },
707            body: Body {
708                blocks: vec![BlockElement::Paragraph(Paragraph {
709                    id: None,
710                    paragraph_type: None,
711                    content: vec![InlineElement::Text("Test".to_string())],
712                })],
713            },
714            footer: Footer {
715                signatures: None,
716                citations: None,
717                annotations: None,
718            },
719        };
720
721        assert!(CmlValidator::validate(&doc).is_err());
722    }
723
724    #[test]
725    fn test_validate_duplicate_ids() {
726        let doc = CmlDocument {
727            version: "0.2".to_string(),
728            encoding: "utf-8".to_string(),
729            profile: "core".to_string(),
730            id: None,
731            header: Header {
732                title: "Test".to_string(),
733                authors: vec![],
734                dates: vec![],
735                identifiers: vec![],
736                version: None,
737                description: None,
738                provenance: None,
739                source: None,
740                meta: vec![],
741            },
742            body: Body {
743                blocks: vec![
744                    BlockElement::Paragraph(Paragraph {
745                        id: Some("para-1".to_string()),
746                        paragraph_type: None,
747                        content: vec![InlineElement::Text("First".to_string())],
748                    }),
749                    BlockElement::Paragraph(Paragraph {
750                        id: Some("para-1".to_string()),
751                        paragraph_type: None,
752                        content: vec![InlineElement::Text("Second".to_string())],
753                    }),
754                ],
755            },
756            footer: Footer {
757                signatures: None,
758                citations: None,
759                annotations: None,
760            },
761        };
762
763        let result = CmlValidator::validate(&doc);
764        assert!(result.is_err());
765        assert!(matches!(result.unwrap_err(), CmlError::DuplicateId(_)));
766    }
767
768    #[test]
769    fn test_validate_invalid_reference() {
770        let doc = CmlDocument {
771            version: "0.2".to_string(),
772            encoding: "utf-8".to_string(),
773            profile: "core".to_string(),
774            id: None,
775            header: Header {
776                title: "Test".to_string(),
777                authors: vec![],
778                dates: vec![],
779                identifiers: vec![],
780                version: None,
781                description: None,
782                provenance: None,
783                source: None,
784                meta: vec![],
785            },
786            body: Body {
787                blocks: vec![BlockElement::Paragraph(Paragraph {
788                    id: None,
789                    paragraph_type: None,
790                    content: vec![InlineElement::Rf(Rf {
791                        reference: "nonexistent-id".to_string(),
792                        role: None,
793                        title: None,
794                        content: "Link".to_string(),
795                    })],
796                })],
797            },
798            footer: Footer {
799                signatures: None,
800                citations: None,
801                annotations: None,
802            },
803        };
804
805        let result = CmlValidator::validate(&doc);
806        assert!(result.is_err());
807        assert!(matches!(result.unwrap_err(), CmlError::ReferenceNotFound(_)));
808    }
809
810    #[test]
811    fn test_validate_pathless_reference() {
812        let doc = CmlDocument {
813            version: "0.2".to_string(),
814            encoding: "utf-8".to_string(),
815            profile: "core".to_string(),
816            id: None,
817            header: Header {
818                title: "Test".to_string(),
819                authors: vec![],
820                dates: vec![],
821                identifiers: vec![],
822                version: None,
823                description: None,
824                provenance: None,
825                source: None,
826                meta: vec![],
827            },
828            body: Body {
829                blocks: vec![BlockElement::Paragraph(Paragraph {
830                    id: None,
831                    paragraph_type: None,
832                    content: vec![InlineElement::Rf(Rf {
833                        reference: "president:47".to_string(),
834                        role: Some("person".to_string()),
835                        title: None,
836                        content: "President 47".to_string(),
837                    })],
838                })],
839            },
840            footer: Footer {
841                signatures: None,
842                citations: None,
843                annotations: None,
844            },
845        };
846
847        assert!(CmlValidator::validate(&doc).is_ok());
848    }
849
850    #[test]
851    fn test_validate_invalid_heading_size() {
852        let doc = CmlDocument {
853            version: "0.2".to_string(),
854            encoding: "utf-8".to_string(),
855            profile: "core".to_string(),
856            id: None,
857            header: Header {
858                title: "Test".to_string(),
859                authors: vec![],
860                dates: vec![],
861                identifiers: vec![],
862                version: None,
863                description: None,
864                provenance: None,
865                source: None,
866                meta: vec![],
867            },
868            body: Body {
869                blocks: vec![BlockElement::Heading(Heading {
870                    id: None,
871                    heading_type: None,
872                    size: 7,
873                    content: vec![InlineElement::Text("Invalid".to_string())],
874                })],
875            },
876            footer: Footer {
877                signatures: None,
878                citations: None,
879                annotations: None,
880            },
881        };
882
883        assert!(CmlValidator::validate(&doc).is_err());
884    }
885
886    #[test]
887    fn test_validate_empty_body() {
888        let doc = CmlDocument {
889            version: "0.2".to_string(),
890            encoding: "utf-8".to_string(),
891            profile: "core".to_string(),
892            id: None,
893            header: Header {
894                title: "Test".to_string(),
895                authors: vec![],
896                dates: vec![],
897                identifiers: vec![],
898                version: None,
899                description: None,
900                provenance: None,
901                source: None,
902                meta: vec![],
903            },
904            body: Body {
905                blocks: vec![],
906            },
907            footer: Footer {
908                signatures: None,
909                citations: None,
910                annotations: None,
911            },
912        };
913
914        assert!(CmlValidator::validate(&doc).is_err());
915    }
916}