cml_rs/
validator.rs

1//! CML v0.2 Document Validator
2//!
3//! Validates CML documents for:
4//! - Structural correctness (header, body, footer)
5//! - ID uniqueness
6//! - Reference integrity
7//! - Required fields
8//! - Profile-specific constraints
9
10use crate::profile::{ConstraintRegistry, ResolvedConstraints};
11use crate::types::{*, ListType};
12use crate::{CmlError, Result};
13use std::collections::{HashMap, HashSet};
14
15/// Validates a CML v0.2 document
16pub struct CmlValidator {
17    /// All IDs encountered during validation
18    ids: HashSet<String>,
19    /// References that need to be resolved
20    references: Vec<String>,
21    /// Element occurrence counts
22    element_counts: HashMap<String, u32>,
23    /// Current profile name
24    profile: String,
25    /// Resolved constraints for the profile
26    constraints: Option<ResolvedConstraints>,
27}
28
29impl CmlValidator {
30    /// Create a new validator
31    pub fn new() -> Self {
32        Self {
33            ids: HashSet::new(),
34            references: Vec::new(),
35            element_counts: HashMap::new(),
36            profile: String::new(),
37            constraints: None,
38        }
39    }
40
41    /// Create a validator with constraints for a specific profile
42    pub fn with_constraints(constraints: ResolvedConstraints) -> Self {
43        Self {
44            ids: HashSet::new(),
45            references: Vec::new(),
46            element_counts: HashMap::new(),
47            profile: constraints.profile.clone(),
48            constraints: Some(constraints),
49        }
50    }
51
52    /// Validate a complete CML document (basic validation without profile constraints)
53    pub fn validate(document: &CmlDocument) -> Result<()> {
54        let mut validator = Self::new();
55        validator.validate_document(document)?;
56        Ok(())
57    }
58
59    /// Validate a document with profile-specific constraints
60    pub fn validate_with_profile(document: &CmlDocument) -> Result<()> {
61        // Extract profile name from document (e.g., "legal" from "legal:contract")
62        let profile_name = document
63            .profile
64            .split(':')
65            .next()
66            .unwrap_or(&document.profile);
67
68        // Try to load constraints for this profile
69        let mut registry = ConstraintRegistry::with_builtins()?;
70        let constraints = registry.get(profile_name)?.clone();
71
72        let mut validator = Self::with_constraints(constraints);
73        validator.validate_document(document)?;
74        validator.validate_constraints(document)?;
75        Ok(())
76    }
77
78    /// Validate the document structure
79    fn validate_document(&mut self, doc: &CmlDocument) -> Result<()> {
80        // Validate version
81        if doc.version != "0.2" {
82            return Err(CmlError::ValidationError(format!(
83                "Invalid version '{}', expected '0.2'",
84                doc.version
85            )));
86        }
87
88        // Validate encoding
89        if doc.encoding != "utf-8" {
90            return Err(CmlError::ValidationError(format!(
91                "Invalid encoding '{}', expected 'utf-8'",
92                doc.encoding
93            )));
94        }
95
96        // Validate profile format
97        if doc.profile.is_empty() {
98            return Err(CmlError::ValidationError(
99                "Profile cannot be empty".to_string(),
100            ));
101        }
102
103        // Track document ID if present
104        if let Some(id) = &doc.id {
105            self.track_id(id)?;
106        }
107
108        // Count structural elements
109        self.count_element("cml");
110        self.count_element("header");
111        self.count_element("title");
112        self.count_element("body");
113        self.count_element("footer");
114
115        // Validate header (required)
116        self.validate_header(&doc.header)?;
117
118        // Validate body (required)
119        self.validate_body(&doc.body)?;
120
121        // Validate footer (required)
122        self.validate_footer(&doc.footer)?;
123
124        // Check all references are valid
125        self.validate_references()?;
126
127        Ok(())
128    }
129
130    /// Validate header structure
131    fn validate_header(&mut self, header: &Header) -> Result<()> {
132        // Title is required and cannot be empty
133        if header.title.trim().is_empty() {
134            return Err(CmlError::ValidationError(
135                "Header title cannot be empty".to_string(),
136            ));
137        }
138
139        Ok(())
140    }
141
142    /// Validate body structure
143    fn validate_body(&mut self, body: &Body) -> Result<()> {
144        if body.blocks.is_empty() {
145            return Err(CmlError::ValidationError(
146                "Body cannot be empty".to_string(),
147            ));
148        }
149
150        for block in &body.blocks {
151            self.validate_block_element(block)?;
152        }
153
154        Ok(())
155    }
156
157    /// Validate block element
158    fn validate_block_element(&mut self, block: &BlockElement) -> Result<()> {
159        // Count the element
160        self.count_element(Self::get_block_element_name(block));
161
162        match block {
163            BlockElement::Section(section) => self.validate_section(section),
164            BlockElement::Paragraph(para) => self.validate_paragraph(para),
165            BlockElement::Heading(heading) => self.validate_heading(heading),
166            BlockElement::Aside(aside) => self.validate_aside(aside),
167            BlockElement::Quote(quote) => self.validate_quote(quote),
168            BlockElement::List(list) => self.validate_list(list),
169            BlockElement::Table(table) => self.validate_table(table),
170            BlockElement::Code(_code) => Ok(()),
171            BlockElement::Break(_br) => Ok(()),
172            BlockElement::Figure(_) => {
173                Err(CmlError::ValidationError(
174                    "Figure element is reserved for v0.3".to_string(),
175                ))
176            }
177        }
178    }
179
180    /// Validate section
181    fn validate_section(&mut self, section: &Section) -> Result<()> {
182        // Track ID if present
183        if let Some(id) = &section.id {
184            self.track_id(id)?;
185        }
186
187        // Validate nested blocks
188        for block in &section.content {
189            self.validate_block_element(block)?;
190        }
191
192        Ok(())
193    }
194
195    /// Validate paragraph
196    fn validate_paragraph(&mut self, para: &Paragraph) -> Result<()> {
197        // Track ID if present
198        if let Some(id) = &para.id {
199            self.track_id(id)?;
200        }
201
202        // Paragraph must have content
203        if para.content.is_empty() {
204            return Err(CmlError::ValidationError(
205                "Paragraph cannot be empty".to_string(),
206            ));
207        }
208
209        // Validate inline content
210        for inline in &para.content {
211            self.validate_inline_element(inline)?;
212        }
213
214        Ok(())
215    }
216
217    /// Validate heading
218    fn validate_heading(&mut self, heading: &Heading) -> Result<()> {
219        // Track ID if present
220        if let Some(id) = &heading.id {
221            self.track_id(id)?;
222        }
223
224        // Validate size (1-6)
225        if heading.size < 1 || heading.size > 6 {
226            return Err(CmlError::ValidationError(format!(
227                "Invalid heading size '{}', must be between 1 and 6",
228                heading.size
229            )));
230        }
231
232        // Heading must have content
233        if heading.content.is_empty() {
234            return Err(CmlError::ValidationError(
235                "Heading cannot be empty".to_string(),
236            ));
237        }
238
239        // Validate inline content
240        for inline in &heading.content {
241            self.validate_inline_element(inline)?;
242        }
243
244        Ok(())
245    }
246
247    /// Validate aside
248    fn validate_aside(&mut self, aside: &Aside) -> Result<()> {
249        // Validate nested blocks
250        for block in &aside.content {
251            self.validate_block_element(block)?;
252        }
253
254        Ok(())
255    }
256
257    /// Validate quote
258    fn validate_quote(&mut self, quote: &Quote) -> Result<()> {
259        // Track ref if present
260        if let Some(ref r) = quote.reference {
261            self.references.push(r.clone());
262        }
263
264        // Validate nested blocks
265        for block in &quote.content {
266            self.validate_block_element(block)?;
267        }
268
269        Ok(())
270    }
271
272    /// Validate list
273    fn validate_list(&mut self, list: &List) -> Result<()> {
274        // Track ID if present
275        if let Some(id) = &list.id {
276            self.track_id(id)?;
277        }
278
279        // List must have items
280        if list.items.is_empty() {
281            return Err(CmlError::ValidationError(
282                "List cannot be empty".to_string(),
283            ));
284        }
285
286        // Validate items
287        for item in &list.items {
288            self.validate_list_item(item)?;
289        }
290
291        Ok(())
292    }
293
294    /// Validate list item
295    fn validate_list_item(&mut self, item: &ListItem) -> Result<()> {
296        // Track ID if present
297        if let Some(id) = &item.id {
298            self.track_id(id)?;
299        }
300
301        // Validate content
302        match &item.content {
303            ListItemContent::Inline(inlines) => {
304                if inlines.is_empty() {
305                    return Err(CmlError::ValidationError(
306                        "List item inline content cannot be empty".to_string(),
307                    ));
308                }
309                for inline in inlines {
310                    self.validate_inline_element(inline)?;
311                }
312            }
313            ListItemContent::Block(blocks) => {
314                if blocks.is_empty() {
315                    return Err(CmlError::ValidationError(
316                        "List item block content cannot be empty".to_string(),
317                    ));
318                }
319                for block in blocks {
320                    self.validate_block_element(block)?;
321                }
322            }
323        }
324
325        Ok(())
326    }
327
328    /// Validate table
329    fn validate_table(&mut self, table: &Table) -> Result<()> {
330        // Track ID if present
331        if let Some(id) = &table.id {
332            self.track_id(id)?;
333        }
334
335        // Table must have body
336        if table.body.rows.is_empty() {
337            return Err(CmlError::ValidationError(
338                "Table body cannot be empty".to_string(),
339            ));
340        }
341
342        // Validate header if present
343        if let Some(ref header) = table.header {
344            if header.rows.is_empty() {
345                return Err(CmlError::ValidationError(
346                    "Table header cannot be empty".to_string(),
347                ));
348            }
349            for row in &header.rows {
350                self.validate_table_row(row)?;
351            }
352        }
353
354        // Validate body
355        for row in &table.body.rows {
356            self.validate_table_row(row)?;
357        }
358
359        // Validate footer if present
360        if let Some(ref footer) = table.footer {
361            for inline in &footer.caption.content {
362                self.validate_inline_element(inline)?;
363            }
364        }
365
366        Ok(())
367    }
368
369    /// Validate table row
370    fn validate_table_row(&mut self, row: &TableRow) -> Result<()> {
371        if row.columns.is_empty() {
372            return Err(CmlError::ValidationError(
373                "Table row cannot be empty".to_string(),
374            ));
375        }
376
377        for column in &row.columns {
378            self.validate_table_column(column)?;
379        }
380
381        Ok(())
382    }
383
384    /// Validate table column
385    fn validate_table_column(&mut self, column: &TableColumn) -> Result<()> {
386        // Validate cell
387        for inline in &column.cell.content {
388            self.validate_inline_element(inline)?;
389        }
390
391        Ok(())
392    }
393
394    /// Validate inline element
395    fn validate_inline_element(&mut self, inline: &InlineElement) -> Result<()> {
396        match inline {
397            InlineElement::Text(_) => Ok(()),
398            InlineElement::Em(em) => {
399                for inner in &em.content {
400                    self.validate_inline_element(inner)?;
401                }
402                Ok(())
403            }
404            InlineElement::Bo(bo) => {
405                for inner in &bo.content {
406                    self.validate_inline_element(inner)?;
407                }
408                Ok(())
409            }
410            InlineElement::Un(un) => {
411                for inner in &un.content {
412                    self.validate_inline_element(inner)?;
413                }
414                Ok(())
415            }
416            InlineElement::St(st) => {
417                for inner in &st.content {
418                    self.validate_inline_element(inner)?;
419                }
420                Ok(())
421            }
422            InlineElement::Snip(snip) => {
423                if snip.content.trim().is_empty() {
424                    return Err(CmlError::ValidationError(
425                        "Snippet content cannot be empty".to_string(),
426                    ));
427                }
428                Ok(())
429            }
430            InlineElement::Key(key) => {
431                if key.content.trim().is_empty() {
432                    return Err(CmlError::ValidationError(
433                        "Keyboard content cannot be empty".to_string(),
434                    ));
435                }
436                Ok(())
437            }
438            InlineElement::Rf(rf) => {
439                if rf.reference.trim().is_empty() {
440                    return Err(CmlError::ValidationError(
441                        "Reference 'reference' attribute cannot be empty".to_string(),
442                    ));
443                }
444                // Track reference for later validation
445                self.references.push(rf.reference.clone());
446                Ok(())
447            }
448            InlineElement::Tg(tg) => {
449                if tg.reference.trim().is_empty() {
450                    return Err(CmlError::ValidationError(
451                        "Topic tag 'reference' attribute cannot be empty".to_string(),
452                    ));
453                }
454                Ok(())
455            }
456            InlineElement::Lk(lk) => {
457                if lk.reference.trim().is_empty() {
458                    return Err(CmlError::ValidationError(
459                        "Link 'reference' attribute cannot be empty".to_string(),
460                    ));
461                }
462                Ok(())
463            }
464            InlineElement::Curr(curr) => {
465                if curr.value.trim().is_empty() {
466                    return Err(CmlError::ValidationError(
467                        "Currency value cannot be empty".to_string(),
468                    ));
469                }
470                Ok(())
471            }
472            InlineElement::End(_) => Ok(()),
473        }
474    }
475
476    /// Validate footer
477    fn validate_footer(&mut self, footer: &Footer) -> Result<()> {
478        // Validate signatures if present
479        if let Some(ref signatures) = footer.signatures {
480            self.validate_signatures(signatures)?;
481        }
482
483        // Validate citations if present
484        if let Some(ref citations) = footer.citations {
485            self.validate_citations(citations)?;
486        }
487
488        // Validate annotations if present
489        if let Some(ref annotations) = footer.annotations {
490            self.validate_annotations(annotations)?;
491        }
492
493        Ok(())
494    }
495
496    /// Validate signatures
497    fn validate_signatures(&mut self, signatures: &Signatures) -> Result<()> {
498        if signatures.signatures.is_empty() {
499            return Err(CmlError::ValidationError(
500                "Signatures section cannot be empty".to_string(),
501            ));
502        }
503
504        for signature in &signatures.signatures {
505            self.validate_signature(signature)?;
506        }
507
508        Ok(())
509    }
510
511    /// Validate signature
512    fn validate_signature(&mut self, signature: &Signature) -> Result<()> {
513        // Validate when (ISO 8601 datetime)
514        if signature.when.is_empty() {
515            return Err(CmlError::ValidationError(
516                "Signature 'when' attribute cannot be empty".to_string(),
517            ));
518        }
519
520        // Track ref if present
521        if let Some(ref r) = signature.reference {
522            self.references.push(r.clone());
523        }
524
525        // Content cannot be empty
526        if signature.content.trim().is_empty() {
527            return Err(CmlError::ValidationError(
528                "Signature content cannot be empty".to_string(),
529            ));
530        }
531
532        Ok(())
533    }
534
535    /// Validate citations
536    fn validate_citations(&mut self, citations: &Citations) -> Result<()> {
537        if citations.citations.is_empty() {
538            return Err(CmlError::ValidationError(
539                "Citations section cannot be empty".to_string(),
540            ));
541        }
542
543        for citation in &citations.citations {
544            self.validate_citation(citation)?;
545        }
546
547        Ok(())
548    }
549
550    /// Validate citation
551    fn validate_citation(&mut self, citation: &Citation) -> Result<()> {
552        // Track ref (citation reference is ID)
553        if !citation.reference.is_empty() {
554            self.track_id(&citation.reference)?;
555        }
556
557        // Validate inline content
558        for inline in &citation.content {
559            self.validate_inline_element(inline)?;
560        }
561
562        Ok(())
563    }
564
565    /// Validate annotations
566    fn validate_annotations(&mut self, annotations: &Annotations) -> Result<()> {
567        if annotations.notes.is_empty() {
568            return Err(CmlError::ValidationError(
569                "Annotations section cannot be empty".to_string(),
570            ));
571        }
572
573        for note in &annotations.notes {
574            self.validate_note(note)?;
575        }
576
577        Ok(())
578    }
579
580    /// Validate note
581    fn validate_note(&mut self, note: &Note) -> Result<()> {
582        // Track ID if present
583        if let Some(id) = &note.id {
584            self.track_id(id)?;
585        }
586
587        // Track ref if present
588        if let Some(ref r) = note.reference {
589            self.references.push(r.clone());
590        }
591
592        // Validate content
593        match &note.content {
594            NoteContent::Inline(inlines) => {
595                for inline in inlines {
596                    self.validate_inline_element(inline)?;
597                }
598            }
599            NoteContent::Block(blocks) => {
600                for block in blocks {
601                    self.validate_block_element(block)?;
602                }
603            }
604        }
605
606        Ok(())
607    }
608
609    /// Track an ID and ensure it's unique
610    fn track_id(&mut self, id: &str) -> Result<()> {
611        if id.trim().is_empty() {
612            return Err(CmlError::ValidationError(
613                "ID cannot be empty".to_string(),
614            ));
615        }
616
617        if !self.ids.insert(id.to_string()) {
618            return Err(CmlError::DuplicateId(id.to_string()));
619        }
620
621        Ok(())
622    }
623
624    /// Validate all references point to valid IDs
625    fn validate_references(&self) -> Result<()> {
626        for reference in &self.references {
627            // References can be:
628            // 1. Internal IDs (must exist in document)
629            // 2. Pathless references (namespace:identifier format)
630            // 3. External URLs
631
632            // Skip URLs
633            if reference.starts_with("http://") || reference.starts_with("https://") {
634                continue;
635            }
636
637            // Pathless references have a colon
638            if reference.contains(':') {
639                // Validate format: should be namespace:identifier
640                let parts: Vec<&str> = reference.split(':').collect();
641                if parts.len() != 2 {
642                    return Err(CmlError::ValidationError(format!(
643                        "Invalid pathless reference format '{}', should be 'namespace:identifier'",
644                        reference
645                    )));
646                }
647                continue;
648            }
649
650            // Internal ID reference - must exist
651            if !self.ids.contains(reference) {
652                return Err(CmlError::ReferenceNotFound(reference.clone()));
653            }
654        }
655
656        Ok(())
657    }
658
659    // =========================================================================
660    // Constraint Validation
661    // =========================================================================
662
663    /// Validate profile-specific constraints
664    fn validate_constraints(&self, doc: &CmlDocument) -> Result<()> {
665        let Some(constraints) = &self.constraints else {
666            return Ok(());
667        };
668
669        // Validate hierarchy constraints (must_be_first, etc.)
670        self.validate_hierarchy_constraints(doc, constraints)?;
671
672        // Validate list constraints
673        self.validate_list_constraints(doc, constraints)?;
674
675        // Validate element occurrence constraints
676        self.validate_occurrence_constraints(constraints)?;
677
678        Ok(())
679    }
680
681    /// Validate hierarchy constraints like must_be_first
682    fn validate_hierarchy_constraints(
683        &self,
684        doc: &CmlDocument,
685        constraints: &ResolvedConstraints,
686    ) -> Result<()> {
687        // Check must_be_first constraints on body children
688        for (element_name, hierarchy) in &constraints.hierarchy {
689            if hierarchy.must_be_first {
690                self.validate_must_be_first(element_name, &doc.body)?;
691            }
692        }
693
694        Ok(())
695    }
696
697    /// Validate that an element is first if it appears
698    fn validate_must_be_first(&self, element_name: &str, body: &Body) -> Result<()> {
699        let mut found_element = false;
700        let mut found_at_position = None;
701
702        for (i, block) in body.blocks.iter().enumerate() {
703            let block_name = Self::get_block_element_name(block);
704            if block_name == element_name {
705                found_element = true;
706                found_at_position = Some(i);
707                break;
708            }
709        }
710
711        if found_element {
712            if let Some(pos) = found_at_position {
713                if pos != 0 {
714                    return Err(CmlError::ValidationError(format!(
715                        "Element '{}' must be the first element in body when present (found at position {})",
716                        element_name, pos + 1
717                    )));
718                }
719            }
720        }
721
722        Ok(())
723    }
724
725    /// Get the element name for a block element
726    fn get_block_element_name(block: &BlockElement) -> &'static str {
727        match block {
728            BlockElement::Section(_) => "section",
729            BlockElement::Paragraph(_) => "paragraph",
730            BlockElement::Heading(_) => "heading",
731            BlockElement::Aside(_) => "aside",
732            BlockElement::Quote(_) => "quote",
733            BlockElement::List(_) => "list",
734            BlockElement::Table(_) => "table",
735            BlockElement::Code(_) => "code",
736            BlockElement::Break(_) => "break",
737            BlockElement::Figure(_) => "figure",
738        }
739    }
740
741    /// Validate list constraints
742    fn validate_list_constraints(
743        &self,
744        doc: &CmlDocument,
745        constraints: &ResolvedConstraints,
746    ) -> Result<()> {
747        if let Some(list_constraints) = &constraints.list_constraints {
748            if let Some(ordered) = &list_constraints.ordered {
749                if let Some(enforce_order) = &ordered.enforce_order {
750                    if enforce_order == "alphanumeric" || enforce_order == "numeric" {
751                        self.validate_ordered_lists(&doc.body, enforce_order)?;
752                    }
753                }
754            }
755        }
756
757        Ok(())
758    }
759
760    /// Validate ordered lists have items in correct order
761    fn validate_ordered_lists(&self, body: &Body, order_type: &str) -> Result<()> {
762        for block in &body.blocks {
763            self.validate_block_list_order(block, order_type)?;
764        }
765        Ok(())
766    }
767
768    /// Recursively validate list ordering in blocks
769    fn validate_block_list_order(&self, block: &BlockElement, order_type: &str) -> Result<()> {
770        match block {
771            BlockElement::List(list) => {
772                if list.list_type == Some(ListType::Ordered) {
773                    self.validate_list_item_order(list, order_type)?;
774                }
775                // Check nested lists
776                for item in &list.items {
777                    if let ListItemContent::Block(blocks) = &item.content {
778                        for b in blocks {
779                            self.validate_block_list_order(b, order_type)?;
780                        }
781                    }
782                }
783            }
784            BlockElement::Section(section) => {
785                for b in &section.content {
786                    self.validate_block_list_order(b, order_type)?;
787                }
788            }
789            BlockElement::Aside(aside) => {
790                for b in &aside.content {
791                    self.validate_block_list_order(b, order_type)?;
792                }
793            }
794            BlockElement::Quote(quote) => {
795                for b in &quote.content {
796                    self.validate_block_list_order(b, order_type)?;
797                }
798            }
799            _ => {}
800        }
801        Ok(())
802    }
803
804    /// Validate that list items are in alphanumeric order
805    fn validate_list_item_order(&self, list: &List, order_type: &str) -> Result<()> {
806        let mut texts: Vec<String> = Vec::new();
807
808        for item in &list.items {
809            let text = self.extract_list_item_text(item);
810            texts.push(text);
811        }
812
813        // Check if items are in order
814        for i in 1..texts.len() {
815            let is_ordered = match order_type {
816                "numeric" => {
817                    // Try to parse as numbers first
818                    let prev = texts[i - 1].trim().parse::<f64>();
819                    let curr = texts[i].trim().parse::<f64>();
820                    match (prev, curr) {
821                        (Ok(p), Ok(c)) => p <= c,
822                        _ => texts[i - 1].to_lowercase() <= texts[i].to_lowercase(),
823                    }
824                }
825                _ => {
826                    // Alphanumeric: case-insensitive string comparison
827                    texts[i - 1].to_lowercase() <= texts[i].to_lowercase()
828                }
829            };
830
831            if !is_ordered {
832                return Err(CmlError::ValidationError(format!(
833                    "Ordered list items must be in {} order: '{}' should come after '{}'",
834                    order_type,
835                    texts[i],
836                    texts[i - 1]
837                )));
838            }
839        }
840
841        Ok(())
842    }
843
844    /// Extract text content from a list item for ordering comparison
845    fn extract_list_item_text(&self, item: &ListItem) -> String {
846        match &item.content {
847            ListItemContent::Inline(inlines) => self.extract_inline_text(inlines),
848            ListItemContent::Block(blocks) => {
849                // Get text from first paragraph or heading
850                for block in blocks {
851                    match block {
852                        BlockElement::Paragraph(p) => {
853                            return self.extract_inline_text(&p.content);
854                        }
855                        BlockElement::Heading(h) => {
856                            return self.extract_inline_text(&h.content);
857                        }
858                        _ => {}
859                    }
860                }
861                String::new()
862            }
863        }
864    }
865
866    /// Extract plain text from inline elements
867    fn extract_inline_text(&self, inlines: &[InlineElement]) -> String {
868        let mut text = String::new();
869        for inline in inlines {
870            match inline {
871                InlineElement::Text(t) => text.push_str(t),
872                InlineElement::Em(em) => text.push_str(&self.extract_inline_text(&em.content)),
873                InlineElement::Bo(bo) => text.push_str(&self.extract_inline_text(&bo.content)),
874                InlineElement::Un(un) => text.push_str(&self.extract_inline_text(&un.content)),
875                InlineElement::St(st) => text.push_str(&self.extract_inline_text(&st.content)),
876                InlineElement::Snip(snip) => text.push_str(&snip.content),
877                InlineElement::Key(key) => text.push_str(&key.content),
878                InlineElement::Lk(lk) => text.push_str(&lk.content),
879                InlineElement::Rf(rf) => text.push_str(&rf.content),
880                InlineElement::Tg(tg) => text.push_str(&tg.content),
881                InlineElement::Curr(curr) => text.push_str(&curr.value),
882                InlineElement::End(_) => {}
883            }
884        }
885        text
886    }
887
888    /// Validate element occurrence constraints
889    fn validate_occurrence_constraints(&self, constraints: &ResolvedConstraints) -> Result<()> {
890        for (element_name, constraint) in &constraints.elements {
891            let count = self.element_counts.get(element_name).unwrap_or(&0);
892
893            if let Some(min) = constraint.min_occurs {
894                if *count < min {
895                    return Err(CmlError::ValidationError(format!(
896                        "Element '{}' must occur at least {} times (found {})",
897                        element_name, min, count
898                    )));
899                }
900            }
901
902            if let Some(max) = constraint.max_occurs {
903                if *count > max {
904                    return Err(CmlError::ValidationError(format!(
905                        "Element '{}' can occur at most {} times (found {})",
906                        element_name, max, count
907                    )));
908                }
909            }
910        }
911
912        // Also check hierarchy max_occurs
913        for (element_name, hierarchy) in &constraints.hierarchy {
914            if let Some(max) = hierarchy.max_occurs {
915                let count = self.element_counts.get(element_name).unwrap_or(&0);
916                if *count > max {
917                    return Err(CmlError::ValidationError(format!(
918                        "Element '{}' can occur at most {} times (found {})",
919                        element_name, max, count
920                    )));
921                }
922            }
923        }
924
925        Ok(())
926    }
927
928    /// Increment element count
929    fn count_element(&mut self, name: &str) {
930        *self.element_counts.entry(name.to_string()).or_insert(0) += 1;
931    }
932}
933
934impl Default for CmlValidator {
935    fn default() -> Self {
936        Self::new()
937    }
938}
939
940#[cfg(test)]
941mod tests {
942    use super::*;
943
944    #[test]
945    fn test_validate_minimal_document() {
946        let doc = CmlDocument {
947            version: "0.2".to_string(),
948            encoding: "utf-8".to_string(),
949            profile: "core".to_string(),
950            id: None,
951            header: Header {
952                title: "Test Document".to_string(),
953                authors: vec![],
954                dates: vec![],
955                identifiers: vec![],
956                version: None,
957                description: None,
958                provenance: None,
959                source: None,
960                meta: vec![],
961            },
962            body: Body {
963                blocks: vec![BlockElement::Paragraph(Paragraph {
964                    id: None,
965                    paragraph_type: None,
966                    content: vec![InlineElement::Text("Hello, world!".to_string())],
967                })],
968            },
969            footer: Footer {
970                signatures: None,
971                citations: None,
972                annotations: None,
973            },
974        };
975
976        assert!(CmlValidator::validate(&doc).is_ok());
977    }
978
979    #[test]
980    fn test_validate_invalid_version() {
981        let doc = CmlDocument {
982            version: "1.0".to_string(),
983            encoding: "utf-8".to_string(),
984            profile: "core".to_string(),
985            id: None,
986            header: Header {
987                title: "Test".to_string(),
988                authors: vec![],
989                dates: vec![],
990                identifiers: vec![],
991                version: None,
992                description: None,
993                provenance: None,
994                source: None,
995                meta: vec![],
996            },
997            body: Body {
998                blocks: vec![BlockElement::Paragraph(Paragraph {
999                    id: None,
1000                    paragraph_type: None,
1001                    content: vec![InlineElement::Text("Test".to_string())],
1002                })],
1003            },
1004            footer: Footer {
1005                signatures: None,
1006                citations: None,
1007                annotations: None,
1008            },
1009        };
1010
1011        assert!(CmlValidator::validate(&doc).is_err());
1012    }
1013
1014    #[test]
1015    fn test_validate_empty_title() {
1016        let doc = CmlDocument {
1017            version: "0.2".to_string(),
1018            encoding: "utf-8".to_string(),
1019            profile: "core".to_string(),
1020            id: None,
1021            header: Header {
1022                title: "".to_string(),
1023                authors: vec![],
1024                dates: vec![],
1025                identifiers: vec![],
1026                version: None,
1027                description: None,
1028                provenance: None,
1029                source: None,
1030                meta: vec![],
1031            },
1032            body: Body {
1033                blocks: vec![BlockElement::Paragraph(Paragraph {
1034                    id: None,
1035                    paragraph_type: None,
1036                    content: vec![InlineElement::Text("Test".to_string())],
1037                })],
1038            },
1039            footer: Footer {
1040                signatures: None,
1041                citations: None,
1042                annotations: None,
1043            },
1044        };
1045
1046        assert!(CmlValidator::validate(&doc).is_err());
1047    }
1048
1049    #[test]
1050    fn test_validate_duplicate_ids() {
1051        let doc = CmlDocument {
1052            version: "0.2".to_string(),
1053            encoding: "utf-8".to_string(),
1054            profile: "core".to_string(),
1055            id: None,
1056            header: Header {
1057                title: "Test".to_string(),
1058                authors: vec![],
1059                dates: vec![],
1060                identifiers: vec![],
1061                version: None,
1062                description: None,
1063                provenance: None,
1064                source: None,
1065                meta: vec![],
1066            },
1067            body: Body {
1068                blocks: vec![
1069                    BlockElement::Paragraph(Paragraph {
1070                        id: Some("para-1".to_string()),
1071                        paragraph_type: None,
1072                        content: vec![InlineElement::Text("First".to_string())],
1073                    }),
1074                    BlockElement::Paragraph(Paragraph {
1075                        id: Some("para-1".to_string()),
1076                        paragraph_type: None,
1077                        content: vec![InlineElement::Text("Second".to_string())],
1078                    }),
1079                ],
1080            },
1081            footer: Footer {
1082                signatures: None,
1083                citations: None,
1084                annotations: None,
1085            },
1086        };
1087
1088        let result = CmlValidator::validate(&doc);
1089        assert!(result.is_err());
1090        assert!(matches!(result.unwrap_err(), CmlError::DuplicateId(_)));
1091    }
1092
1093    #[test]
1094    fn test_validate_invalid_reference() {
1095        let doc = CmlDocument {
1096            version: "0.2".to_string(),
1097            encoding: "utf-8".to_string(),
1098            profile: "core".to_string(),
1099            id: None,
1100            header: Header {
1101                title: "Test".to_string(),
1102                authors: vec![],
1103                dates: vec![],
1104                identifiers: vec![],
1105                version: None,
1106                description: None,
1107                provenance: None,
1108                source: None,
1109                meta: vec![],
1110            },
1111            body: Body {
1112                blocks: vec![BlockElement::Paragraph(Paragraph {
1113                    id: None,
1114                    paragraph_type: None,
1115                    content: vec![InlineElement::Rf(Rf {
1116                        reference: "nonexistent-id".to_string(),
1117                        role: None,
1118                        title: None,
1119                        content: "Link".to_string(),
1120                    })],
1121                })],
1122            },
1123            footer: Footer {
1124                signatures: None,
1125                citations: None,
1126                annotations: None,
1127            },
1128        };
1129
1130        let result = CmlValidator::validate(&doc);
1131        assert!(result.is_err());
1132        assert!(matches!(result.unwrap_err(), CmlError::ReferenceNotFound(_)));
1133    }
1134
1135    #[test]
1136    fn test_validate_pathless_reference() {
1137        let doc = CmlDocument {
1138            version: "0.2".to_string(),
1139            encoding: "utf-8".to_string(),
1140            profile: "core".to_string(),
1141            id: None,
1142            header: Header {
1143                title: "Test".to_string(),
1144                authors: vec![],
1145                dates: vec![],
1146                identifiers: vec![],
1147                version: None,
1148                description: None,
1149                provenance: None,
1150                source: None,
1151                meta: vec![],
1152            },
1153            body: Body {
1154                blocks: vec![BlockElement::Paragraph(Paragraph {
1155                    id: None,
1156                    paragraph_type: None,
1157                    content: vec![InlineElement::Rf(Rf {
1158                        reference: "president:47".to_string(),
1159                        role: Some("person".to_string()),
1160                        title: None,
1161                        content: "President 47".to_string(),
1162                    })],
1163                })],
1164            },
1165            footer: Footer {
1166                signatures: None,
1167                citations: None,
1168                annotations: None,
1169            },
1170        };
1171
1172        assert!(CmlValidator::validate(&doc).is_ok());
1173    }
1174
1175    #[test]
1176    fn test_validate_invalid_heading_size() {
1177        let doc = CmlDocument {
1178            version: "0.2".to_string(),
1179            encoding: "utf-8".to_string(),
1180            profile: "core".to_string(),
1181            id: None,
1182            header: Header {
1183                title: "Test".to_string(),
1184                authors: vec![],
1185                dates: vec![],
1186                identifiers: vec![],
1187                version: None,
1188                description: None,
1189                provenance: None,
1190                source: None,
1191                meta: vec![],
1192            },
1193            body: Body {
1194                blocks: vec![BlockElement::Heading(Heading {
1195                    id: None,
1196                    heading_type: None,
1197                    size: 7,
1198                    content: vec![InlineElement::Text("Invalid".to_string())],
1199                })],
1200            },
1201            footer: Footer {
1202                signatures: None,
1203                citations: None,
1204                annotations: None,
1205            },
1206        };
1207
1208        assert!(CmlValidator::validate(&doc).is_err());
1209    }
1210
1211    #[test]
1212    fn test_validate_empty_body() {
1213        let doc = CmlDocument {
1214            version: "0.2".to_string(),
1215            encoding: "utf-8".to_string(),
1216            profile: "core".to_string(),
1217            id: None,
1218            header: Header {
1219                title: "Test".to_string(),
1220                authors: vec![],
1221                dates: vec![],
1222                identifiers: vec![],
1223                version: None,
1224                description: None,
1225                provenance: None,
1226                source: None,
1227                meta: vec![],
1228            },
1229            body: Body {
1230                blocks: vec![],
1231            },
1232            footer: Footer {
1233                signatures: None,
1234                citations: None,
1235                annotations: None,
1236            },
1237        };
1238
1239        assert!(CmlValidator::validate(&doc).is_err());
1240    }
1241
1242    #[test]
1243    fn test_validate_with_profile_core() {
1244        let doc = CmlDocument {
1245            version: "0.2".to_string(),
1246            encoding: "utf-8".to_string(),
1247            profile: "core".to_string(),
1248            id: None,
1249            header: Header {
1250                title: "Test Document".to_string(),
1251                authors: vec![],
1252                dates: vec![],
1253                identifiers: vec![],
1254                version: None,
1255                description: None,
1256                provenance: None,
1257                source: None,
1258                meta: vec![],
1259            },
1260            body: Body {
1261                blocks: vec![BlockElement::Paragraph(Paragraph {
1262                    id: None,
1263                    paragraph_type: None,
1264                    content: vec![InlineElement::Text("Hello, world!".to_string())],
1265                })],
1266            },
1267            footer: Footer {
1268                signatures: None,
1269                citations: None,
1270                annotations: None,
1271            },
1272        };
1273
1274        assert!(CmlValidator::validate_with_profile(&doc).is_ok());
1275    }
1276
1277    #[test]
1278    fn test_ordered_list_alphanumeric_valid() {
1279        // Create a document with correctly ordered list items
1280        let doc = CmlDocument {
1281            version: "0.2".to_string(),
1282            encoding: "utf-8".to_string(),
1283            profile: "core".to_string(),
1284            id: None,
1285            header: Header {
1286                title: "Test".to_string(),
1287                authors: vec![],
1288                dates: vec![],
1289                identifiers: vec![],
1290                version: None,
1291                description: None,
1292                provenance: None,
1293                source: None,
1294                meta: vec![],
1295            },
1296            body: Body {
1297                blocks: vec![BlockElement::List(List {
1298                    id: None,
1299                    list_type: Some(ListType::Ordered),
1300                    style: None,
1301                    items: vec![
1302                        ListItem {
1303                            id: None,
1304                            content: ListItemContent::Inline(vec![InlineElement::Text(
1305                                "Apple".to_string(),
1306                            )]),
1307                        },
1308                        ListItem {
1309                            id: None,
1310                            content: ListItemContent::Inline(vec![InlineElement::Text(
1311                                "Banana".to_string(),
1312                            )]),
1313                        },
1314                        ListItem {
1315                            id: None,
1316                            content: ListItemContent::Inline(vec![InlineElement::Text(
1317                                "Cherry".to_string(),
1318                            )]),
1319                        },
1320                    ],
1321                })],
1322            },
1323            footer: Footer {
1324                signatures: None,
1325                citations: None,
1326                annotations: None,
1327            },
1328        };
1329
1330        // Basic validation should pass
1331        assert!(CmlValidator::validate(&doc).is_ok());
1332
1333        // Profile validation should also pass (items are in alphabetical order)
1334        assert!(CmlValidator::validate_with_profile(&doc).is_ok());
1335    }
1336
1337    #[test]
1338    fn test_ordered_list_alphanumeric_invalid() {
1339        // Create a document with incorrectly ordered list items
1340        let doc = CmlDocument {
1341            version: "0.2".to_string(),
1342            encoding: "utf-8".to_string(),
1343            profile: "core".to_string(),
1344            id: None,
1345            header: Header {
1346                title: "Test".to_string(),
1347                authors: vec![],
1348                dates: vec![],
1349                identifiers: vec![],
1350                version: None,
1351                description: None,
1352                provenance: None,
1353                source: None,
1354                meta: vec![],
1355            },
1356            body: Body {
1357                blocks: vec![BlockElement::List(List {
1358                    id: None,
1359                    list_type: Some(ListType::Ordered),
1360                    style: None,
1361                    items: vec![
1362                        ListItem {
1363                            id: None,
1364                            content: ListItemContent::Inline(vec![InlineElement::Text(
1365                                "Cherry".to_string(),
1366                            )]),
1367                        },
1368                        ListItem {
1369                            id: None,
1370                            content: ListItemContent::Inline(vec![InlineElement::Text(
1371                                "Apple".to_string(),
1372                            )]),
1373                        },
1374                        ListItem {
1375                            id: None,
1376                            content: ListItemContent::Inline(vec![InlineElement::Text(
1377                                "Banana".to_string(),
1378                            )]),
1379                        },
1380                    ],
1381                })],
1382            },
1383            footer: Footer {
1384                signatures: None,
1385                citations: None,
1386                annotations: None,
1387            },
1388        };
1389
1390        // Basic validation should pass (doesn't check order)
1391        assert!(CmlValidator::validate(&doc).is_ok());
1392
1393        // Profile validation should fail (items are not in alphabetical order)
1394        let result = CmlValidator::validate_with_profile(&doc);
1395        assert!(result.is_err());
1396        let err_msg = format!("{:?}", result.unwrap_err());
1397        assert!(err_msg.contains("order"));
1398    }
1399
1400    #[test]
1401    fn test_unordered_list_no_order_enforcement() {
1402        // Create a document with unordered list - order doesn't matter
1403        let doc = CmlDocument {
1404            version: "0.2".to_string(),
1405            encoding: "utf-8".to_string(),
1406            profile: "core".to_string(),
1407            id: None,
1408            header: Header {
1409                title: "Test".to_string(),
1410                authors: vec![],
1411                dates: vec![],
1412                identifiers: vec![],
1413                version: None,
1414                description: None,
1415                provenance: None,
1416                source: None,
1417                meta: vec![],
1418            },
1419            body: Body {
1420                blocks: vec![BlockElement::List(List {
1421                    id: None,
1422                    list_type: Some(ListType::Unordered),
1423                    style: None,
1424                    items: vec![
1425                        ListItem {
1426                            id: None,
1427                            content: ListItemContent::Inline(vec![InlineElement::Text(
1428                                "Zebra".to_string(),
1429                            )]),
1430                        },
1431                        ListItem {
1432                            id: None,
1433                            content: ListItemContent::Inline(vec![InlineElement::Text(
1434                                "Apple".to_string(),
1435                            )]),
1436                        },
1437                    ],
1438                })],
1439            },
1440            footer: Footer {
1441                signatures: None,
1442                citations: None,
1443                annotations: None,
1444            },
1445        };
1446
1447        // Both validations should pass for unordered lists
1448        assert!(CmlValidator::validate(&doc).is_ok());
1449        assert!(CmlValidator::validate_with_profile(&doc).is_ok());
1450    }
1451}