Skip to main content

cml_rs/
validator.rs

1//! CML v0.2 Document Validator
2//!
3//! Validates CML documents for:
4//! - Structural correctness (header, body, footer)
5//! - ID uniqueness
6//! - Reference integrity
7//! - Required fields
8//! - Profile-specific constraints
9
10use crate::profile::{ConstraintRegistry, ResolvedConstraints};
11use crate::types::{ListType, *};
12use crate::{CmlError, Result};
13use std::collections::{HashMap, HashSet};
14
15/// Validates a CML v0.2 document
16pub struct CmlValidator {
17    /// All IDs encountered during validation
18    ids: HashSet<String>,
19    /// References that need to be resolved
20    references: Vec<String>,
21    /// Element occurrence counts
22    element_counts: HashMap<String, u32>,
23    /// Current profile name
24    profile: String,
25    /// Resolved constraints for the profile
26    constraints: Option<ResolvedConstraints>,
27}
28
29impl CmlValidator {
30    /// Create a new validator
31    pub fn new() -> Self {
32        Self {
33            ids: HashSet::new(),
34            references: Vec::new(),
35            element_counts: HashMap::new(),
36            profile: String::new(),
37            constraints: None,
38        }
39    }
40
41    /// Create a validator with constraints for a specific profile
42    pub fn with_constraints(constraints: ResolvedConstraints) -> Self {
43        Self {
44            ids: HashSet::new(),
45            references: Vec::new(),
46            element_counts: HashMap::new(),
47            profile: constraints.profile.clone(),
48            constraints: Some(constraints),
49        }
50    }
51
52    /// Validate a complete CML document (basic validation without profile constraints)
53    pub fn validate(document: &CmlDocument) -> Result<()> {
54        let mut validator = Self::new();
55        validator.validate_document(document)?;
56        Ok(())
57    }
58
59    /// Validate a document with profile-specific constraints
60    pub fn validate_with_profile(document: &CmlDocument) -> Result<()> {
61        // Extract profile name from document (e.g., "legal" from "legal:contract")
62        let profile_name = document
63            .profile
64            .split(':')
65            .next()
66            .unwrap_or(&document.profile);
67
68        // Try to load constraints for this profile
69        let mut registry = ConstraintRegistry::with_builtins()?;
70        let constraints = registry.get(profile_name)?.clone();
71
72        let mut validator = Self::with_constraints(constraints);
73        validator.validate_document(document)?;
74        validator.validate_constraints(document)?;
75        Ok(())
76    }
77
78    /// Validate the document structure
79    fn validate_document(&mut self, doc: &CmlDocument) -> Result<()> {
80        // Validate version
81        if doc.version != "0.2" {
82            return Err(CmlError::ValidationError(format!(
83                "Invalid version '{}', expected '0.2'",
84                doc.version
85            )));
86        }
87
88        // Validate encoding
89        if doc.encoding != "utf-8" {
90            return Err(CmlError::ValidationError(format!(
91                "Invalid encoding '{}', expected 'utf-8'",
92                doc.encoding
93            )));
94        }
95
96        // Validate profile format
97        if doc.profile.is_empty() {
98            return Err(CmlError::ValidationError(
99                "Profile cannot be empty".to_string(),
100            ));
101        }
102
103        // Track document ID if present
104        if let Some(id) = &doc.id {
105            self.track_id(id)?;
106        }
107
108        // Count structural elements
109        self.count_element("cml");
110        self.count_element("header");
111        self.count_element("title");
112        self.count_element("body");
113        self.count_element("footer");
114
115        // Validate header (required)
116        self.validate_header(&doc.header)?;
117
118        // Validate body (required)
119        self.validate_body(&doc.body)?;
120
121        // Validate footer (required)
122        self.validate_footer(&doc.footer)?;
123
124        // Check all references are valid
125        self.validate_references()?;
126
127        Ok(())
128    }
129
130    /// Validate header structure
131    fn validate_header(&mut self, header: &Header) -> Result<()> {
132        // Title is required and cannot be empty
133        if header.title.trim().is_empty() {
134            return Err(CmlError::ValidationError(
135                "Header title cannot be empty".to_string(),
136            ));
137        }
138
139        Ok(())
140    }
141
142    /// Validate body structure
143    fn validate_body(&mut self, body: &Body) -> Result<()> {
144        if body.blocks.is_empty() {
145            return Err(CmlError::ValidationError(
146                "Body cannot be empty".to_string(),
147            ));
148        }
149
150        for block in &body.blocks {
151            self.validate_block_element(block)?;
152        }
153
154        Ok(())
155    }
156
157    /// Validate block element
158    fn validate_block_element(&mut self, block: &BlockElement) -> Result<()> {
159        // Count the element
160        self.count_element(Self::get_block_element_name(block));
161
162        match block {
163            BlockElement::Section(section) => self.validate_section(section),
164            BlockElement::Paragraph(para) => self.validate_paragraph(para),
165            BlockElement::Heading(heading) => self.validate_heading(heading),
166            BlockElement::Aside(aside) => self.validate_aside(aside),
167            BlockElement::Quote(quote) => self.validate_quote(quote),
168            BlockElement::List(list) => self.validate_list(list),
169            BlockElement::Table(table) => self.validate_table(table),
170            BlockElement::Code(_code) => Ok(()),
171            BlockElement::Break(_br) => Ok(()),
172            BlockElement::Figure(_) => Err(CmlError::ValidationError(
173                "Figure element is reserved for v0.3".to_string(),
174            )),
175        }
176    }
177
178    /// Validate section
179    fn validate_section(&mut self, section: &Section) -> Result<()> {
180        // Track ID if present
181        if let Some(id) = &section.id {
182            self.track_id(id)?;
183        }
184
185        // Validate nested blocks
186        for block in &section.content {
187            self.validate_block_element(block)?;
188        }
189
190        Ok(())
191    }
192
193    /// Validate paragraph
194    fn validate_paragraph(&mut self, para: &Paragraph) -> Result<()> {
195        // Track ID if present
196        if let Some(id) = &para.id {
197            self.track_id(id)?;
198        }
199
200        // Paragraph must have content
201        if para.content.is_empty() {
202            return Err(CmlError::ValidationError(
203                "Paragraph cannot be empty".to_string(),
204            ));
205        }
206
207        // Validate inline content
208        for inline in &para.content {
209            self.validate_inline_element(inline)?;
210        }
211
212        Ok(())
213    }
214
215    /// Validate heading
216    fn validate_heading(&mut self, heading: &Heading) -> Result<()> {
217        // Track ID if present
218        if let Some(id) = &heading.id {
219            self.track_id(id)?;
220        }
221
222        // Validate size (1-6)
223        if heading.size < 1 || heading.size > 6 {
224            return Err(CmlError::ValidationError(format!(
225                "Invalid heading size '{}', must be between 1 and 6",
226                heading.size
227            )));
228        }
229
230        // Heading must have content
231        if heading.content.is_empty() {
232            return Err(CmlError::ValidationError(
233                "Heading cannot be empty".to_string(),
234            ));
235        }
236
237        // Validate inline content
238        for inline in &heading.content {
239            self.validate_inline_element(inline)?;
240        }
241
242        Ok(())
243    }
244
245    /// Validate aside
246    fn validate_aside(&mut self, aside: &Aside) -> Result<()> {
247        // Validate nested blocks
248        for block in &aside.content {
249            self.validate_block_element(block)?;
250        }
251
252        Ok(())
253    }
254
255    /// Validate quote
256    fn validate_quote(&mut self, quote: &Quote) -> Result<()> {
257        // Track ref if present
258        if let Some(ref r) = quote.reference {
259            self.references.push(r.clone());
260        }
261
262        // Validate nested blocks
263        for block in &quote.content {
264            self.validate_block_element(block)?;
265        }
266
267        Ok(())
268    }
269
270    /// Validate list
271    fn validate_list(&mut self, list: &List) -> Result<()> {
272        // Track ID if present
273        if let Some(id) = &list.id {
274            self.track_id(id)?;
275        }
276
277        // List must have items
278        if list.items.is_empty() {
279            return Err(CmlError::ValidationError(
280                "List cannot be empty".to_string(),
281            ));
282        }
283
284        // Validate items
285        for item in &list.items {
286            self.validate_list_item(item)?;
287        }
288
289        Ok(())
290    }
291
292    /// Validate list item
293    fn validate_list_item(&mut self, item: &ListItem) -> Result<()> {
294        // Track ID if present
295        if let Some(id) = &item.id {
296            self.track_id(id)?;
297        }
298
299        // Validate content
300        match &item.content {
301            ListItemContent::Inline(inlines) => {
302                if inlines.is_empty() {
303                    return Err(CmlError::ValidationError(
304                        "List item inline content cannot be empty".to_string(),
305                    ));
306                }
307                for inline in inlines {
308                    self.validate_inline_element(inline)?;
309                }
310            }
311            ListItemContent::Block(blocks) => {
312                if blocks.is_empty() {
313                    return Err(CmlError::ValidationError(
314                        "List item block content cannot be empty".to_string(),
315                    ));
316                }
317                for block in blocks {
318                    self.validate_block_element(block)?;
319                }
320            }
321        }
322
323        Ok(())
324    }
325
326    /// Validate table
327    fn validate_table(&mut self, table: &Table) -> Result<()> {
328        // Track ID if present
329        if let Some(id) = &table.id {
330            self.track_id(id)?;
331        }
332
333        // Table must have body
334        if table.body.rows.is_empty() {
335            return Err(CmlError::ValidationError(
336                "Table body cannot be empty".to_string(),
337            ));
338        }
339
340        // Validate header if present
341        if let Some(ref header) = table.header {
342            if header.rows.is_empty() {
343                return Err(CmlError::ValidationError(
344                    "Table header cannot be empty".to_string(),
345                ));
346            }
347            for row in &header.rows {
348                self.validate_table_row(row)?;
349            }
350        }
351
352        // Validate body
353        for row in &table.body.rows {
354            self.validate_table_row(row)?;
355        }
356
357        // Validate footer if present
358        if let Some(ref footer) = table.footer {
359            for inline in &footer.caption.content {
360                self.validate_inline_element(inline)?;
361            }
362        }
363
364        Ok(())
365    }
366
367    /// Validate table row
368    fn validate_table_row(&mut self, row: &TableRow) -> Result<()> {
369        if row.columns.is_empty() {
370            return Err(CmlError::ValidationError(
371                "Table row cannot be empty".to_string(),
372            ));
373        }
374
375        for column in &row.columns {
376            self.validate_table_column(column)?;
377        }
378
379        Ok(())
380    }
381
382    /// Validate table column
383    fn validate_table_column(&mut self, column: &TableColumn) -> Result<()> {
384        // Validate cell
385        for inline in &column.cell.content {
386            self.validate_inline_element(inline)?;
387        }
388
389        Ok(())
390    }
391
392    /// Validate inline element
393    fn validate_inline_element(&mut self, inline: &InlineElement) -> Result<()> {
394        match inline {
395            InlineElement::Text(_) => Ok(()),
396            InlineElement::Em(em) => {
397                for inner in &em.content {
398                    self.validate_inline_element(inner)?;
399                }
400                Ok(())
401            }
402            InlineElement::Bo(bo) => {
403                for inner in &bo.content {
404                    self.validate_inline_element(inner)?;
405                }
406                Ok(())
407            }
408            InlineElement::Un(un) => {
409                for inner in &un.content {
410                    self.validate_inline_element(inner)?;
411                }
412                Ok(())
413            }
414            InlineElement::St(st) => {
415                for inner in &st.content {
416                    self.validate_inline_element(inner)?;
417                }
418                Ok(())
419            }
420            InlineElement::Snip(snip) => {
421                if snip.content.trim().is_empty() {
422                    return Err(CmlError::ValidationError(
423                        "Snippet content cannot be empty".to_string(),
424                    ));
425                }
426                Ok(())
427            }
428            InlineElement::Key(key) => {
429                if key.content.trim().is_empty() {
430                    return Err(CmlError::ValidationError(
431                        "Keyboard content cannot be empty".to_string(),
432                    ));
433                }
434                Ok(())
435            }
436            InlineElement::Rf(rf) => {
437                if rf.reference.trim().is_empty() {
438                    return Err(CmlError::ValidationError(
439                        "Reference 'reference' attribute cannot be empty".to_string(),
440                    ));
441                }
442                // Track reference for later validation
443                self.references.push(rf.reference.clone());
444                Ok(())
445            }
446            InlineElement::Tg(tg) => {
447                if tg.reference.trim().is_empty() {
448                    return Err(CmlError::ValidationError(
449                        "Topic tag 'reference' attribute cannot be empty".to_string(),
450                    ));
451                }
452                Ok(())
453            }
454            InlineElement::Lk(lk) => {
455                if lk.reference.trim().is_empty() {
456                    return Err(CmlError::ValidationError(
457                        "Link 'reference' attribute cannot be empty".to_string(),
458                    ));
459                }
460                Ok(())
461            }
462            InlineElement::Curr(curr) => {
463                if curr.value.trim().is_empty() {
464                    return Err(CmlError::ValidationError(
465                        "Currency value cannot be empty".to_string(),
466                    ));
467                }
468                Ok(())
469            }
470            InlineElement::End(_) => Ok(()),
471        }
472    }
473
474    /// Validate footer
475    fn validate_footer(&mut self, footer: &Footer) -> Result<()> {
476        // Validate signatures if present
477        if let Some(ref signatures) = footer.signatures {
478            self.validate_signatures(signatures)?;
479        }
480
481        // Validate citations if present
482        if let Some(ref citations) = footer.citations {
483            self.validate_citations(citations)?;
484        }
485
486        // Validate annotations if present
487        if let Some(ref annotations) = footer.annotations {
488            self.validate_annotations(annotations)?;
489        }
490
491        Ok(())
492    }
493
494    /// Validate signatures
495    fn validate_signatures(&mut self, signatures: &Signatures) -> Result<()> {
496        if signatures.signatures.is_empty() {
497            return Err(CmlError::ValidationError(
498                "Signatures section cannot be empty".to_string(),
499            ));
500        }
501
502        for signature in &signatures.signatures {
503            self.validate_signature(signature)?;
504        }
505
506        Ok(())
507    }
508
509    /// Validate signature
510    fn validate_signature(&mut self, signature: &Signature) -> Result<()> {
511        // Validate when (ISO 8601 datetime)
512        if signature.when.is_empty() {
513            return Err(CmlError::ValidationError(
514                "Signature 'when' attribute cannot be empty".to_string(),
515            ));
516        }
517
518        // Track ref if present
519        if let Some(ref r) = signature.reference {
520            self.references.push(r.clone());
521        }
522
523        // Content cannot be empty
524        if signature.content.trim().is_empty() {
525            return Err(CmlError::ValidationError(
526                "Signature content cannot be empty".to_string(),
527            ));
528        }
529
530        Ok(())
531    }
532
533    /// Validate citations
534    fn validate_citations(&mut self, citations: &Citations) -> Result<()> {
535        if citations.citations.is_empty() {
536            return Err(CmlError::ValidationError(
537                "Citations section cannot be empty".to_string(),
538            ));
539        }
540
541        for citation in &citations.citations {
542            self.validate_citation(citation)?;
543        }
544
545        Ok(())
546    }
547
548    /// Validate citation
549    fn validate_citation(&mut self, citation: &Citation) -> Result<()> {
550        // Track ref (citation reference is ID)
551        if !citation.reference.is_empty() {
552            self.track_id(&citation.reference)?;
553        }
554
555        // Validate inline content
556        for inline in &citation.content {
557            self.validate_inline_element(inline)?;
558        }
559
560        Ok(())
561    }
562
563    /// Validate annotations
564    fn validate_annotations(&mut self, annotations: &Annotations) -> Result<()> {
565        if annotations.notes.is_empty() {
566            return Err(CmlError::ValidationError(
567                "Annotations section cannot be empty".to_string(),
568            ));
569        }
570
571        for note in &annotations.notes {
572            self.validate_note(note)?;
573        }
574
575        Ok(())
576    }
577
578    /// Validate note
579    fn validate_note(&mut self, note: &Note) -> Result<()> {
580        // Track ID if present
581        if let Some(id) = &note.id {
582            self.track_id(id)?;
583        }
584
585        // Track ref if present
586        if let Some(ref r) = note.reference {
587            self.references.push(r.clone());
588        }
589
590        // Validate content
591        match &note.content {
592            NoteContent::Inline(inlines) => {
593                for inline in inlines {
594                    self.validate_inline_element(inline)?;
595                }
596            }
597            NoteContent::Block(blocks) => {
598                for block in blocks {
599                    self.validate_block_element(block)?;
600                }
601            }
602        }
603
604        Ok(())
605    }
606
607    /// Track an ID and ensure it's unique
608    fn track_id(&mut self, id: &str) -> Result<()> {
609        if id.trim().is_empty() {
610            return Err(CmlError::ValidationError("ID cannot be empty".to_string()));
611        }
612
613        if !self.ids.insert(id.to_string()) {
614            return Err(CmlError::DuplicateId(id.to_string()));
615        }
616
617        Ok(())
618    }
619
620    /// Validate all references point to valid IDs
621    fn validate_references(&self) -> Result<()> {
622        for reference in &self.references {
623            // References can be:
624            // 1. Internal IDs (must exist in document)
625            // 2. Pathless references (namespace:identifier format)
626            // 3. External URLs
627
628            // Skip URLs
629            if reference.starts_with("http://") || reference.starts_with("https://") {
630                continue;
631            }
632
633            // Pathless references have a colon
634            if reference.contains(':') {
635                // Validate format: should be namespace:identifier
636                let parts: Vec<&str> = reference.split(':').collect();
637                if parts.len() != 2 {
638                    return Err(CmlError::ValidationError(format!(
639                        "Invalid pathless reference format '{}', should be 'namespace:identifier'",
640                        reference
641                    )));
642                }
643                continue;
644            }
645
646            // Internal ID reference - must exist
647            if !self.ids.contains(reference) {
648                return Err(CmlError::ReferenceNotFound(reference.clone()));
649            }
650        }
651
652        Ok(())
653    }
654
655    // =========================================================================
656    // Constraint Validation
657    // =========================================================================
658
659    /// Validate profile-specific constraints
660    fn validate_constraints(&self, doc: &CmlDocument) -> Result<()> {
661        let Some(constraints) = &self.constraints else {
662            return Ok(());
663        };
664
665        // Validate hierarchy constraints (must_be_first, etc.)
666        self.validate_hierarchy_constraints(doc, constraints)?;
667
668        // Validate list constraints
669        self.validate_list_constraints(doc, constraints)?;
670
671        // Validate element occurrence constraints
672        self.validate_occurrence_constraints(constraints)?;
673
674        Ok(())
675    }
676
677    /// Validate hierarchy constraints like must_be_first
678    fn validate_hierarchy_constraints(
679        &self,
680        doc: &CmlDocument,
681        constraints: &ResolvedConstraints,
682    ) -> Result<()> {
683        // Check must_be_first constraints on body children
684        for (element_name, hierarchy) in &constraints.hierarchy {
685            if hierarchy.must_be_first {
686                self.validate_must_be_first(element_name, &doc.body)?;
687            }
688        }
689
690        Ok(())
691    }
692
693    /// Validate that an element is first if it appears
694    fn validate_must_be_first(&self, element_name: &str, body: &Body) -> Result<()> {
695        let mut found_element = false;
696        let mut found_at_position = None;
697
698        for (i, block) in body.blocks.iter().enumerate() {
699            let block_name = Self::get_block_element_name(block);
700            if block_name == element_name {
701                found_element = true;
702                found_at_position = Some(i);
703                break;
704            }
705        }
706
707        if found_element {
708            if let Some(pos) = found_at_position {
709                if pos != 0 {
710                    return Err(CmlError::ValidationError(format!(
711                        "Element '{}' must be the first element in body when present (found at position {})",
712                        element_name, pos + 1
713                    )));
714                }
715            }
716        }
717
718        Ok(())
719    }
720
721    /// Get the element name for a block element
722    fn get_block_element_name(block: &BlockElement) -> &'static str {
723        match block {
724            BlockElement::Section(_) => "section",
725            BlockElement::Paragraph(_) => "paragraph",
726            BlockElement::Heading(_) => "heading",
727            BlockElement::Aside(_) => "aside",
728            BlockElement::Quote(_) => "quote",
729            BlockElement::List(_) => "list",
730            BlockElement::Table(_) => "table",
731            BlockElement::Code(_) => "code",
732            BlockElement::Break(_) => "break",
733            BlockElement::Figure(_) => "figure",
734        }
735    }
736
737    /// Validate list constraints
738    fn validate_list_constraints(
739        &self,
740        doc: &CmlDocument,
741        constraints: &ResolvedConstraints,
742    ) -> Result<()> {
743        if let Some(list_constraints) = &constraints.list_constraints {
744            if let Some(ordered) = &list_constraints.ordered {
745                if let Some(enforce_order) = &ordered.enforce_order {
746                    if enforce_order == "alphanumeric" || enforce_order == "numeric" {
747                        self.validate_ordered_lists(&doc.body, enforce_order)?;
748                    }
749                }
750            }
751        }
752
753        Ok(())
754    }
755
756    /// Validate ordered lists have items in correct order
757    fn validate_ordered_lists(&self, body: &Body, order_type: &str) -> Result<()> {
758        for block in &body.blocks {
759            self.validate_block_list_order(block, order_type)?;
760        }
761        Ok(())
762    }
763
764    /// Recursively validate list ordering in blocks
765    fn validate_block_list_order(&self, block: &BlockElement, order_type: &str) -> Result<()> {
766        match block {
767            BlockElement::List(list) => {
768                if list.list_type == Some(ListType::Ordered) {
769                    self.validate_list_item_order(list, order_type)?;
770                }
771                // Check nested lists
772                for item in &list.items {
773                    if let ListItemContent::Block(blocks) = &item.content {
774                        for b in blocks {
775                            self.validate_block_list_order(b, order_type)?;
776                        }
777                    }
778                }
779            }
780            BlockElement::Section(section) => {
781                for b in &section.content {
782                    self.validate_block_list_order(b, order_type)?;
783                }
784            }
785            BlockElement::Aside(aside) => {
786                for b in &aside.content {
787                    self.validate_block_list_order(b, order_type)?;
788                }
789            }
790            BlockElement::Quote(quote) => {
791                for b in &quote.content {
792                    self.validate_block_list_order(b, order_type)?;
793                }
794            }
795            _ => {}
796        }
797        Ok(())
798    }
799
800    /// Validate that list items are in alphanumeric order
801    fn validate_list_item_order(&self, list: &List, order_type: &str) -> Result<()> {
802        let mut texts: Vec<String> = Vec::new();
803
804        for item in &list.items {
805            let text = self.extract_list_item_text(item);
806            texts.push(text);
807        }
808
809        // Check if items are in order
810        for i in 1..texts.len() {
811            let is_ordered = match order_type {
812                "numeric" => {
813                    // Try to parse as numbers first
814                    let prev = texts[i - 1].trim().parse::<f64>();
815                    let curr = texts[i].trim().parse::<f64>();
816                    match (prev, curr) {
817                        (Ok(p), Ok(c)) => p <= c,
818                        _ => texts[i - 1].to_lowercase() <= texts[i].to_lowercase(),
819                    }
820                }
821                _ => {
822                    // Alphanumeric: case-insensitive string comparison
823                    texts[i - 1].to_lowercase() <= texts[i].to_lowercase()
824                }
825            };
826
827            if !is_ordered {
828                return Err(CmlError::ValidationError(format!(
829                    "Ordered list items must be in {} order: '{}' should come after '{}'",
830                    order_type,
831                    texts[i],
832                    texts[i - 1]
833                )));
834            }
835        }
836
837        Ok(())
838    }
839
840    /// Extract text content from a list item for ordering comparison
841    fn extract_list_item_text(&self, item: &ListItem) -> String {
842        match &item.content {
843            ListItemContent::Inline(inlines) => self.extract_inline_text(inlines),
844            ListItemContent::Block(blocks) => {
845                // Get text from first paragraph or heading
846                for block in blocks {
847                    match block {
848                        BlockElement::Paragraph(p) => {
849                            return self.extract_inline_text(&p.content);
850                        }
851                        BlockElement::Heading(h) => {
852                            return self.extract_inline_text(&h.content);
853                        }
854                        _ => {}
855                    }
856                }
857                String::new()
858            }
859        }
860    }
861
862    /// Extract plain text from inline elements
863    fn extract_inline_text(&self, inlines: &[InlineElement]) -> String {
864        let mut text = String::new();
865        for inline in inlines {
866            match inline {
867                InlineElement::Text(t) => text.push_str(t),
868                InlineElement::Em(em) => text.push_str(&self.extract_inline_text(&em.content)),
869                InlineElement::Bo(bo) => text.push_str(&self.extract_inline_text(&bo.content)),
870                InlineElement::Un(un) => text.push_str(&self.extract_inline_text(&un.content)),
871                InlineElement::St(st) => text.push_str(&self.extract_inline_text(&st.content)),
872                InlineElement::Snip(snip) => text.push_str(&snip.content),
873                InlineElement::Key(key) => text.push_str(&key.content),
874                InlineElement::Lk(lk) => text.push_str(&lk.content),
875                InlineElement::Rf(rf) => text.push_str(&rf.content),
876                InlineElement::Tg(tg) => text.push_str(&tg.content),
877                InlineElement::Curr(curr) => text.push_str(&curr.value),
878                InlineElement::End(_) => {}
879            }
880        }
881        text
882    }
883
884    /// Validate element occurrence constraints
885    fn validate_occurrence_constraints(&self, constraints: &ResolvedConstraints) -> Result<()> {
886        for (element_name, constraint) in &constraints.elements {
887            let count = self.element_counts.get(element_name).unwrap_or(&0);
888
889            if let Some(min) = constraint.min_occurs {
890                if *count < min {
891                    return Err(CmlError::ValidationError(format!(
892                        "Element '{}' must occur at least {} times (found {})",
893                        element_name, min, count
894                    )));
895                }
896            }
897
898            if let Some(max) = constraint.max_occurs {
899                if *count > max {
900                    return Err(CmlError::ValidationError(format!(
901                        "Element '{}' can occur at most {} times (found {})",
902                        element_name, max, count
903                    )));
904                }
905            }
906        }
907
908        // Also check hierarchy max_occurs
909        for (element_name, hierarchy) in &constraints.hierarchy {
910            if let Some(max) = hierarchy.max_occurs {
911                let count = self.element_counts.get(element_name).unwrap_or(&0);
912                if *count > max {
913                    return Err(CmlError::ValidationError(format!(
914                        "Element '{}' can occur at most {} times (found {})",
915                        element_name, max, count
916                    )));
917                }
918            }
919        }
920
921        Ok(())
922    }
923
924    /// Increment element count
925    fn count_element(&mut self, name: &str) {
926        *self.element_counts.entry(name.to_string()).or_insert(0) += 1;
927    }
928}
929
930impl Default for CmlValidator {
931    fn default() -> Self {
932        Self::new()
933    }
934}
935
936#[cfg(test)]
937mod tests {
938    use super::*;
939
940    #[test]
941    fn test_validate_minimal_document() {
942        let doc = CmlDocument {
943            version: "0.2".to_string(),
944            encoding: "utf-8".to_string(),
945            profile: "core".to_string(),
946            id: None,
947            header: Header {
948                title: "Test Document".to_string(),
949                authors: vec![],
950                dates: vec![],
951                identifiers: vec![],
952                version: None,
953                description: None,
954                provenance: None,
955                source: None,
956                meta: vec![],
957            },
958            body: Body {
959                blocks: vec![BlockElement::Paragraph(Paragraph {
960                    id: None,
961                    paragraph_type: None,
962                    content: vec![InlineElement::Text("Hello, world!".to_string())],
963                })],
964            },
965            footer: Footer {
966                signatures: None,
967                citations: None,
968                annotations: None,
969            },
970        };
971
972        assert!(CmlValidator::validate(&doc).is_ok());
973    }
974
975    #[test]
976    fn test_validate_invalid_version() {
977        let doc = CmlDocument {
978            version: "1.0".to_string(),
979            encoding: "utf-8".to_string(),
980            profile: "core".to_string(),
981            id: None,
982            header: Header {
983                title: "Test".to_string(),
984                authors: vec![],
985                dates: vec![],
986                identifiers: vec![],
987                version: None,
988                description: None,
989                provenance: None,
990                source: None,
991                meta: vec![],
992            },
993            body: Body {
994                blocks: vec![BlockElement::Paragraph(Paragraph {
995                    id: None,
996                    paragraph_type: None,
997                    content: vec![InlineElement::Text("Test".to_string())],
998                })],
999            },
1000            footer: Footer {
1001                signatures: None,
1002                citations: None,
1003                annotations: None,
1004            },
1005        };
1006
1007        assert!(CmlValidator::validate(&doc).is_err());
1008    }
1009
1010    #[test]
1011    fn test_validate_empty_title() {
1012        let doc = CmlDocument {
1013            version: "0.2".to_string(),
1014            encoding: "utf-8".to_string(),
1015            profile: "core".to_string(),
1016            id: None,
1017            header: Header {
1018                title: "".to_string(),
1019                authors: vec![],
1020                dates: vec![],
1021                identifiers: vec![],
1022                version: None,
1023                description: None,
1024                provenance: None,
1025                source: None,
1026                meta: vec![],
1027            },
1028            body: Body {
1029                blocks: vec![BlockElement::Paragraph(Paragraph {
1030                    id: None,
1031                    paragraph_type: None,
1032                    content: vec![InlineElement::Text("Test".to_string())],
1033                })],
1034            },
1035            footer: Footer {
1036                signatures: None,
1037                citations: None,
1038                annotations: None,
1039            },
1040        };
1041
1042        assert!(CmlValidator::validate(&doc).is_err());
1043    }
1044
1045    #[test]
1046    fn test_validate_duplicate_ids() {
1047        let doc = CmlDocument {
1048            version: "0.2".to_string(),
1049            encoding: "utf-8".to_string(),
1050            profile: "core".to_string(),
1051            id: None,
1052            header: Header {
1053                title: "Test".to_string(),
1054                authors: vec![],
1055                dates: vec![],
1056                identifiers: vec![],
1057                version: None,
1058                description: None,
1059                provenance: None,
1060                source: None,
1061                meta: vec![],
1062            },
1063            body: Body {
1064                blocks: vec![
1065                    BlockElement::Paragraph(Paragraph {
1066                        id: Some("para-1".to_string()),
1067                        paragraph_type: None,
1068                        content: vec![InlineElement::Text("First".to_string())],
1069                    }),
1070                    BlockElement::Paragraph(Paragraph {
1071                        id: Some("para-1".to_string()),
1072                        paragraph_type: None,
1073                        content: vec![InlineElement::Text("Second".to_string())],
1074                    }),
1075                ],
1076            },
1077            footer: Footer {
1078                signatures: None,
1079                citations: None,
1080                annotations: None,
1081            },
1082        };
1083
1084        let result = CmlValidator::validate(&doc);
1085        assert!(result.is_err());
1086        assert!(matches!(result.unwrap_err(), CmlError::DuplicateId(_)));
1087    }
1088
1089    #[test]
1090    fn test_validate_invalid_reference() {
1091        let doc = CmlDocument {
1092            version: "0.2".to_string(),
1093            encoding: "utf-8".to_string(),
1094            profile: "core".to_string(),
1095            id: None,
1096            header: Header {
1097                title: "Test".to_string(),
1098                authors: vec![],
1099                dates: vec![],
1100                identifiers: vec![],
1101                version: None,
1102                description: None,
1103                provenance: None,
1104                source: None,
1105                meta: vec![],
1106            },
1107            body: Body {
1108                blocks: vec![BlockElement::Paragraph(Paragraph {
1109                    id: None,
1110                    paragraph_type: None,
1111                    content: vec![InlineElement::Rf(Rf {
1112                        reference: "nonexistent-id".to_string(),
1113                        role: None,
1114                        title: None,
1115                        content: "Link".to_string(),
1116                    })],
1117                })],
1118            },
1119            footer: Footer {
1120                signatures: None,
1121                citations: None,
1122                annotations: None,
1123            },
1124        };
1125
1126        let result = CmlValidator::validate(&doc);
1127        assert!(result.is_err());
1128        assert!(matches!(
1129            result.unwrap_err(),
1130            CmlError::ReferenceNotFound(_)
1131        ));
1132    }
1133
1134    #[test]
1135    fn test_validate_pathless_reference() {
1136        let doc = CmlDocument {
1137            version: "0.2".to_string(),
1138            encoding: "utf-8".to_string(),
1139            profile: "core".to_string(),
1140            id: None,
1141            header: Header {
1142                title: "Test".to_string(),
1143                authors: vec![],
1144                dates: vec![],
1145                identifiers: vec![],
1146                version: None,
1147                description: None,
1148                provenance: None,
1149                source: None,
1150                meta: vec![],
1151            },
1152            body: Body {
1153                blocks: vec![BlockElement::Paragraph(Paragraph {
1154                    id: None,
1155                    paragraph_type: None,
1156                    content: vec![InlineElement::Rf(Rf {
1157                        reference: "president:47".to_string(),
1158                        role: Some("person".to_string()),
1159                        title: None,
1160                        content: "President 47".to_string(),
1161                    })],
1162                })],
1163            },
1164            footer: Footer {
1165                signatures: None,
1166                citations: None,
1167                annotations: None,
1168            },
1169        };
1170
1171        assert!(CmlValidator::validate(&doc).is_ok());
1172    }
1173
1174    #[test]
1175    fn test_validate_invalid_heading_size() {
1176        let doc = CmlDocument {
1177            version: "0.2".to_string(),
1178            encoding: "utf-8".to_string(),
1179            profile: "core".to_string(),
1180            id: None,
1181            header: Header {
1182                title: "Test".to_string(),
1183                authors: vec![],
1184                dates: vec![],
1185                identifiers: vec![],
1186                version: None,
1187                description: None,
1188                provenance: None,
1189                source: None,
1190                meta: vec![],
1191            },
1192            body: Body {
1193                blocks: vec![BlockElement::Heading(Heading {
1194                    id: None,
1195                    heading_type: None,
1196                    size: 7,
1197                    content: vec![InlineElement::Text("Invalid".to_string())],
1198                })],
1199            },
1200            footer: Footer {
1201                signatures: None,
1202                citations: None,
1203                annotations: None,
1204            },
1205        };
1206
1207        assert!(CmlValidator::validate(&doc).is_err());
1208    }
1209
1210    #[test]
1211    fn test_validate_empty_body() {
1212        let doc = CmlDocument {
1213            version: "0.2".to_string(),
1214            encoding: "utf-8".to_string(),
1215            profile: "core".to_string(),
1216            id: None,
1217            header: Header {
1218                title: "Test".to_string(),
1219                authors: vec![],
1220                dates: vec![],
1221                identifiers: vec![],
1222                version: None,
1223                description: None,
1224                provenance: None,
1225                source: None,
1226                meta: vec![],
1227            },
1228            body: Body { blocks: vec![] },
1229            footer: Footer {
1230                signatures: None,
1231                citations: None,
1232                annotations: None,
1233            },
1234        };
1235
1236        assert!(CmlValidator::validate(&doc).is_err());
1237    }
1238
1239    #[test]
1240    fn test_validate_with_profile_core() {
1241        let doc = CmlDocument {
1242            version: "0.2".to_string(),
1243            encoding: "utf-8".to_string(),
1244            profile: "core".to_string(),
1245            id: None,
1246            header: Header {
1247                title: "Test Document".to_string(),
1248                authors: vec![],
1249                dates: vec![],
1250                identifiers: vec![],
1251                version: None,
1252                description: None,
1253                provenance: None,
1254                source: None,
1255                meta: vec![],
1256            },
1257            body: Body {
1258                blocks: vec![BlockElement::Paragraph(Paragraph {
1259                    id: None,
1260                    paragraph_type: None,
1261                    content: vec![InlineElement::Text("Hello, world!".to_string())],
1262                })],
1263            },
1264            footer: Footer {
1265                signatures: None,
1266                citations: None,
1267                annotations: None,
1268            },
1269        };
1270
1271        assert!(CmlValidator::validate_with_profile(&doc).is_ok());
1272    }
1273
1274    #[test]
1275    fn test_ordered_list_alphanumeric_valid() {
1276        // Create a document with correctly ordered list items
1277        let doc = CmlDocument {
1278            version: "0.2".to_string(),
1279            encoding: "utf-8".to_string(),
1280            profile: "core".to_string(),
1281            id: None,
1282            header: Header {
1283                title: "Test".to_string(),
1284                authors: vec![],
1285                dates: vec![],
1286                identifiers: vec![],
1287                version: None,
1288                description: None,
1289                provenance: None,
1290                source: None,
1291                meta: vec![],
1292            },
1293            body: Body {
1294                blocks: vec![BlockElement::List(List {
1295                    id: None,
1296                    list_type: Some(ListType::Ordered),
1297                    style: None,
1298                    items: vec![
1299                        ListItem {
1300                            id: None,
1301                            content: ListItemContent::Inline(vec![InlineElement::Text(
1302                                "Apple".to_string(),
1303                            )]),
1304                        },
1305                        ListItem {
1306                            id: None,
1307                            content: ListItemContent::Inline(vec![InlineElement::Text(
1308                                "Banana".to_string(),
1309                            )]),
1310                        },
1311                        ListItem {
1312                            id: None,
1313                            content: ListItemContent::Inline(vec![InlineElement::Text(
1314                                "Cherry".to_string(),
1315                            )]),
1316                        },
1317                    ],
1318                })],
1319            },
1320            footer: Footer {
1321                signatures: None,
1322                citations: None,
1323                annotations: None,
1324            },
1325        };
1326
1327        // Basic validation should pass
1328        assert!(CmlValidator::validate(&doc).is_ok());
1329
1330        // Profile validation should also pass (items are in alphabetical order)
1331        assert!(CmlValidator::validate_with_profile(&doc).is_ok());
1332    }
1333
1334    #[test]
1335    fn test_ordered_list_alphanumeric_invalid() {
1336        // Create a document with incorrectly ordered list items
1337        let doc = CmlDocument {
1338            version: "0.2".to_string(),
1339            encoding: "utf-8".to_string(),
1340            profile: "core".to_string(),
1341            id: None,
1342            header: Header {
1343                title: "Test".to_string(),
1344                authors: vec![],
1345                dates: vec![],
1346                identifiers: vec![],
1347                version: None,
1348                description: None,
1349                provenance: None,
1350                source: None,
1351                meta: vec![],
1352            },
1353            body: Body {
1354                blocks: vec![BlockElement::List(List {
1355                    id: None,
1356                    list_type: Some(ListType::Ordered),
1357                    style: None,
1358                    items: vec![
1359                        ListItem {
1360                            id: None,
1361                            content: ListItemContent::Inline(vec![InlineElement::Text(
1362                                "Cherry".to_string(),
1363                            )]),
1364                        },
1365                        ListItem {
1366                            id: None,
1367                            content: ListItemContent::Inline(vec![InlineElement::Text(
1368                                "Apple".to_string(),
1369                            )]),
1370                        },
1371                        ListItem {
1372                            id: None,
1373                            content: ListItemContent::Inline(vec![InlineElement::Text(
1374                                "Banana".to_string(),
1375                            )]),
1376                        },
1377                    ],
1378                })],
1379            },
1380            footer: Footer {
1381                signatures: None,
1382                citations: None,
1383                annotations: None,
1384            },
1385        };
1386
1387        // Basic validation should pass (doesn't check order)
1388        assert!(CmlValidator::validate(&doc).is_ok());
1389
1390        // Profile validation should fail (items are not in alphabetical order)
1391        let result = CmlValidator::validate_with_profile(&doc);
1392        assert!(result.is_err());
1393        let err_msg = format!("{:?}", result.unwrap_err());
1394        assert!(err_msg.contains("order"));
1395    }
1396
1397    #[test]
1398    fn test_unordered_list_no_order_enforcement() {
1399        // Create a document with unordered list - order doesn't matter
1400        let doc = CmlDocument {
1401            version: "0.2".to_string(),
1402            encoding: "utf-8".to_string(),
1403            profile: "core".to_string(),
1404            id: None,
1405            header: Header {
1406                title: "Test".to_string(),
1407                authors: vec![],
1408                dates: vec![],
1409                identifiers: vec![],
1410                version: None,
1411                description: None,
1412                provenance: None,
1413                source: None,
1414                meta: vec![],
1415            },
1416            body: Body {
1417                blocks: vec![BlockElement::List(List {
1418                    id: None,
1419                    list_type: Some(ListType::Unordered),
1420                    style: None,
1421                    items: vec![
1422                        ListItem {
1423                            id: None,
1424                            content: ListItemContent::Inline(vec![InlineElement::Text(
1425                                "Zebra".to_string(),
1426                            )]),
1427                        },
1428                        ListItem {
1429                            id: None,
1430                            content: ListItemContent::Inline(vec![InlineElement::Text(
1431                                "Apple".to_string(),
1432                            )]),
1433                        },
1434                    ],
1435                })],
1436            },
1437            footer: Footer {
1438                signatures: None,
1439                citations: None,
1440                annotations: None,
1441            },
1442        };
1443
1444        // Both validations should pass for unordered lists
1445        assert!(CmlValidator::validate(&doc).is_ok());
1446        assert!(CmlValidator::validate_with_profile(&doc).is_ok());
1447    }
1448}