Skip to main content

cdx_core/content/
validation.rs

1//! Content validation.
2
3use std::collections::HashSet;
4use std::fmt;
5
6use super::{Block, Content, Text};
7use crate::extensions::ExtensionBlock;
8
9/// Content structure validation error.
10///
11/// Reports issues with block hierarchy, unique IDs, heading levels,
12/// parent-child constraints, and similar structural rules within
13/// document content.
14///
15/// See also [`crate::validation::SchemaValidationError`] for JSON schema
16/// validation of manifest and metadata files.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct ValidationError {
19    /// Path to the invalid element (e.g., `blocks[0].children[1]`).
20    pub path: String,
21
22    /// Description of the validation failure.
23    pub message: String,
24}
25
26impl fmt::Display for ValidationError {
27    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28        if self.path.is_empty() {
29            write!(f, "{}", self.message)
30        } else {
31            write!(f, "{}: {}", self.path, self.message)
32        }
33    }
34}
35
36impl std::error::Error for ValidationError {}
37
38/// Validate content structure and rules.
39///
40/// This validates:
41/// - Block structure (correct children types)
42/// - Unique block IDs
43/// - Required fields
44/// - Heading levels (1-6)
45/// - List items only in lists
46/// - Table rows only in tables
47/// - Table cells only in rows
48///
49/// # Errors
50///
51/// Returns a vector of validation errors if any are found.
52#[must_use]
53pub fn validate_content(content: &Content) -> Vec<ValidationError> {
54    let mut errors = Vec::new();
55    let mut seen_ids = HashSet::new();
56
57    for (i, block) in content.blocks.iter().enumerate() {
58        let path = format!("blocks[{i}]");
59        validate_block(block, &path, &mut errors, &mut seen_ids, None);
60    }
61
62    errors
63}
64
65/// Parent context for validating child blocks.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67enum ParentContext {
68    List,
69    Table,
70    TableRow,
71    DefinitionList,
72    Figure,
73}
74
75/// Context passed through validation.
76struct ValidationContext<'a> {
77    errors: &'a mut Vec<ValidationError>,
78    seen_ids: &'a mut HashSet<String>,
79}
80
81impl ValidationContext<'_> {
82    fn add_error(&mut self, path: &str, message: impl Into<String>) {
83        self.errors.push(ValidationError {
84            path: path.to_string(),
85            message: message.into(),
86        });
87    }
88}
89
90fn validate_block(
91    block: &Block,
92    path: &str,
93    errors: &mut Vec<ValidationError>,
94    seen_ids: &mut HashSet<String>,
95    parent: Option<ParentContext>,
96) {
97    let mut ctx = ValidationContext { errors, seen_ids };
98
99    // Check ID uniqueness
100    if let Some(id) = block.id() {
101        if !ctx.seen_ids.insert(id.to_string()) {
102            ctx.add_error(path, format!("duplicate block ID: {id}"));
103        }
104    }
105
106    match block {
107        Block::Paragraph { children, .. } => validate_text_children(children, path, ctx.errors),
108        Block::Heading {
109            level, children, ..
110        } => {
111            validate_heading(*level, children, path, ctx.errors);
112        }
113        Block::List { children, .. } => validate_list(children, path, &mut ctx),
114        Block::ListItem { children, .. } => validate_list_item(children, path, parent, &mut ctx),
115        Block::Blockquote { children, .. } => validate_container(children, path, &mut ctx),
116        Block::CodeBlock { children, .. } => validate_code_block(children, path, ctx.errors),
117        Block::HorizontalRule { .. } | Block::Break { .. } | Block::Signature(_) => {}
118        Block::Image(img) => validate_image(img, path, ctx.errors),
119        Block::Table { children, .. } => validate_table(children, path, &mut ctx),
120        Block::TableRow { children, .. } => validate_table_row(children, path, parent, &mut ctx),
121        Block::TableCell(cell) => validate_table_cell(cell, path, parent, ctx.errors),
122        Block::Math(math) => validate_math(math, path, ctx.errors),
123        Block::Extension(ext) => validate_extension(ext, path, &mut ctx),
124        // New block types
125        Block::DefinitionList(dl) => validate_definition_list(&dl.children, path, &mut ctx),
126        Block::DefinitionItem { children, .. } => {
127            validate_definition_item(children, path, parent, &mut ctx);
128        }
129        Block::DefinitionTerm { children, .. } => {
130            validate_text_children(children, path, ctx.errors);
131        }
132        Block::DefinitionDescription { children, .. } => {
133            validate_container(children, path, &mut ctx);
134        }
135        Block::Measurement(m) => validate_measurement(m, path, ctx.errors),
136        Block::Svg(svg) => validate_svg(svg, path, ctx.errors),
137        Block::Barcode(bc) => validate_barcode(bc, path, ctx.errors),
138        Block::Figure(fig) => validate_figure(&fig.children, path, &mut ctx),
139        Block::FigCaption(fc) => validate_figcaption(&fc.children, path, parent, ctx.errors),
140        Block::Admonition(adm) => validate_container(&adm.children, path, &mut ctx),
141    }
142}
143
144fn validate_heading(level: u8, children: &[Text], path: &str, errors: &mut Vec<ValidationError>) {
145    if !(1..=6).contains(&level) {
146        errors.push(ValidationError {
147            path: path.to_string(),
148            message: format!("heading level must be 1-6, got {level}"),
149        });
150    }
151    validate_text_children(children, path, errors);
152}
153
154fn validate_list(children: &[Block], path: &str, ctx: &mut ValidationContext<'_>) {
155    for (i, child) in children.iter().enumerate() {
156        let child_path = format!("{path}.children[{i}]");
157        if !matches!(child, Block::ListItem { .. }) {
158            ctx.add_error(
159                &child_path,
160                format!("list children must be listItem, got {}", child.block_type()),
161            );
162        }
163        validate_block(
164            child,
165            &child_path,
166            ctx.errors,
167            ctx.seen_ids,
168            Some(ParentContext::List),
169        );
170    }
171}
172
173fn validate_list_item(
174    children: &[Block],
175    path: &str,
176    parent: Option<ParentContext>,
177    ctx: &mut ValidationContext<'_>,
178) {
179    if parent != Some(ParentContext::List) {
180        ctx.add_error(path, "listItem must be a child of list");
181    }
182    for (i, child) in children.iter().enumerate() {
183        let child_path = format!("{path}.children[{i}]");
184        validate_block(child, &child_path, ctx.errors, ctx.seen_ids, None);
185    }
186}
187
188fn validate_container(children: &[Block], path: &str, ctx: &mut ValidationContext<'_>) {
189    for (i, child) in children.iter().enumerate() {
190        let child_path = format!("{path}.children[{i}]");
191        validate_block(child, &child_path, ctx.errors, ctx.seen_ids, None);
192    }
193}
194
195fn validate_code_block(children: &[Text], path: &str, errors: &mut Vec<ValidationError>) {
196    if children.len() != 1 {
197        errors.push(ValidationError {
198            path: path.to_string(),
199            message: format!(
200                "codeBlock should have exactly 1 text node, got {}",
201                children.len()
202            ),
203        });
204    }
205    for child in children {
206        if !child.marks.is_empty() {
207            errors.push(ValidationError {
208                path: path.to_string(),
209                message: "codeBlock text should not have marks".to_string(),
210            });
211        }
212    }
213}
214
215fn validate_image(img: &super::block::ImageBlock, path: &str, errors: &mut Vec<ValidationError>) {
216    if img.src.is_empty() {
217        errors.push(ValidationError {
218            path: path.to_string(),
219            message: "image src is required".to_string(),
220        });
221    }
222    if img.alt.is_empty() {
223        errors.push(ValidationError {
224            path: path.to_string(),
225            message: "image alt is required".to_string(),
226        });
227    }
228}
229
230fn validate_table(children: &[Block], path: &str, ctx: &mut ValidationContext<'_>) {
231    for (i, child) in children.iter().enumerate() {
232        let child_path = format!("{path}.children[{i}]");
233        if !matches!(child, Block::TableRow { .. }) {
234            ctx.add_error(
235                &child_path,
236                format!(
237                    "table children must be tableRow, got {}",
238                    child.block_type()
239                ),
240            );
241        }
242        validate_block(
243            child,
244            &child_path,
245            ctx.errors,
246            ctx.seen_ids,
247            Some(ParentContext::Table),
248        );
249    }
250}
251
252fn validate_table_row(
253    children: &[Block],
254    path: &str,
255    parent: Option<ParentContext>,
256    ctx: &mut ValidationContext<'_>,
257) {
258    if parent != Some(ParentContext::Table) {
259        ctx.add_error(path, "tableRow must be a child of table");
260    }
261    for (i, child) in children.iter().enumerate() {
262        let child_path = format!("{path}.children[{i}]");
263        if !matches!(child, Block::TableCell(_)) {
264            ctx.add_error(
265                &child_path,
266                format!(
267                    "tableRow children must be tableCell, got {}",
268                    child.block_type()
269                ),
270            );
271        }
272        validate_block(
273            child,
274            &child_path,
275            ctx.errors,
276            ctx.seen_ids,
277            Some(ParentContext::TableRow),
278        );
279    }
280}
281
282fn validate_table_cell(
283    cell: &super::block::TableCellBlock,
284    path: &str,
285    parent: Option<ParentContext>,
286    errors: &mut Vec<ValidationError>,
287) {
288    if parent != Some(ParentContext::TableRow) {
289        errors.push(ValidationError {
290            path: path.to_string(),
291            message: "tableCell must be a child of tableRow".to_string(),
292        });
293    }
294    if cell.colspan == 0 {
295        errors.push(ValidationError {
296            path: path.to_string(),
297            message: "tableCell colspan must be at least 1".to_string(),
298        });
299    }
300    if cell.rowspan == 0 {
301        errors.push(ValidationError {
302            path: path.to_string(),
303            message: "tableCell rowspan must be at least 1".to_string(),
304        });
305    }
306    validate_text_children(&cell.children, path, errors);
307}
308
309fn validate_math(math: &super::block::MathBlock, path: &str, errors: &mut Vec<ValidationError>) {
310    if math.value.is_empty() {
311        errors.push(ValidationError {
312            path: path.to_string(),
313            message: "math value is required".to_string(),
314        });
315    }
316}
317
318fn validate_extension(ext: &ExtensionBlock, path: &str, ctx: &mut ValidationContext<'_>) {
319    // Validate extension namespace and type
320    if ext.namespace.is_empty() {
321        ctx.add_error(path, "extension namespace is required");
322    }
323    if ext.block_type.is_empty() {
324        ctx.add_error(path, "extension block type is required");
325    }
326
327    // Validate children recursively
328    for (i, child) in ext.children.iter().enumerate() {
329        let child_path = format!("{path}.children[{i}]");
330        validate_block(child, &child_path, ctx.errors, ctx.seen_ids, None);
331    }
332
333    // Validate fallback content if present
334    if let Some(fallback) = &ext.fallback {
335        let fallback_path = format!("{path}.fallback");
336        validate_block(fallback, &fallback_path, ctx.errors, ctx.seen_ids, None);
337    }
338}
339
340fn validate_text_children(children: &[Text], path: &str, errors: &mut Vec<ValidationError>) {
341    for (i, text) in children.iter().enumerate() {
342        if text.value.is_empty() {
343            errors.push(ValidationError {
344                path: format!("{path}.children[{i}]"),
345                message: "text value cannot be empty".to_string(),
346            });
347        }
348    }
349}
350
351fn validate_definition_list(children: &[Block], path: &str, ctx: &mut ValidationContext<'_>) {
352    for (i, child) in children.iter().enumerate() {
353        let child_path = format!("{path}.children[{i}]");
354        if !matches!(child, Block::DefinitionItem { .. }) {
355            ctx.add_error(
356                &child_path,
357                format!(
358                    "definitionList children must be definitionItem, got {}",
359                    child.block_type()
360                ),
361            );
362        }
363        validate_block(
364            child,
365            &child_path,
366            ctx.errors,
367            ctx.seen_ids,
368            Some(ParentContext::DefinitionList),
369        );
370    }
371}
372
373fn validate_definition_item(
374    children: &[Block],
375    path: &str,
376    parent: Option<ParentContext>,
377    ctx: &mut ValidationContext<'_>,
378) {
379    if parent != Some(ParentContext::DefinitionList) {
380        ctx.add_error(path, "definitionItem must be a child of definitionList");
381    }
382    for (i, child) in children.iter().enumerate() {
383        let child_path = format!("{path}.children[{i}]");
384        validate_block(child, &child_path, ctx.errors, ctx.seen_ids, None);
385    }
386}
387
388fn validate_measurement(
389    m: &super::block::MeasurementBlock,
390    path: &str,
391    errors: &mut Vec<ValidationError>,
392) {
393    if m.display.is_empty() {
394        errors.push(ValidationError {
395            path: path.to_string(),
396            message: "measurement display is required".to_string(),
397        });
398    }
399}
400
401fn validate_svg(svg: &super::block::SvgBlock, path: &str, errors: &mut Vec<ValidationError>) {
402    // SVG must have exactly one of src or content
403    match (&svg.src, &svg.content) {
404        (Some(_), Some(_)) => {
405            errors.push(ValidationError {
406                path: path.to_string(),
407                message: "svg must have either src or content, not both".to_string(),
408            });
409        }
410        (None, None) => {
411            errors.push(ValidationError {
412                path: path.to_string(),
413                message: "svg must have either src or content".to_string(),
414            });
415        }
416        _ => {}
417    }
418}
419
420fn validate_barcode(
421    bc: &super::block::BarcodeBlock,
422    path: &str,
423    errors: &mut Vec<ValidationError>,
424) {
425    if bc.data.is_empty() {
426        errors.push(ValidationError {
427            path: path.to_string(),
428            message: "barcode data is required".to_string(),
429        });
430    }
431    // Check for generic/placeholder alt text
432    let alt_lower = bc.alt.to_lowercase();
433    if bc.alt.is_empty() || alt_lower == "barcode" || alt_lower == "qr code" || alt_lower == "image"
434    {
435        errors.push(ValidationError {
436            path: path.to_string(),
437            message: "barcode alt must be meaningful (not just 'barcode' or 'image')".to_string(),
438        });
439    }
440}
441
442fn validate_figure(children: &[Block], path: &str, ctx: &mut ValidationContext<'_>) {
443    for (i, child) in children.iter().enumerate() {
444        let child_path = format!("{path}.children[{i}]");
445        validate_block(
446            child,
447            &child_path,
448            ctx.errors,
449            ctx.seen_ids,
450            Some(ParentContext::Figure),
451        );
452    }
453}
454
455fn validate_figcaption(
456    children: &[Text],
457    path: &str,
458    parent: Option<ParentContext>,
459    errors: &mut Vec<ValidationError>,
460) {
461    if parent != Some(ParentContext::Figure) {
462        errors.push(ValidationError {
463            path: path.to_string(),
464            message: "figcaption should be a child of figure".to_string(),
465        });
466    }
467    validate_text_children(children, path, errors);
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473    use crate::content::{BlockAttributes, Mark, Text};
474
475    #[test]
476    fn test_valid_content() {
477        let content = Content::new(vec![
478            Block::heading(1, vec![Text::plain("Title")]),
479            Block::paragraph(vec![Text::plain("Body")]),
480        ]);
481        let errors = validate_content(&content);
482        assert!(errors.is_empty());
483    }
484
485    #[test]
486    fn test_duplicate_ids() {
487        let content = Content::new(vec![
488            Block::Paragraph {
489                id: Some("dup".to_string()),
490                children: vec![Text::plain("First")],
491                attributes: BlockAttributes::default(),
492            },
493            Block::Paragraph {
494                id: Some("dup".to_string()),
495                children: vec![Text::plain("Second")],
496                attributes: BlockAttributes::default(),
497            },
498        ]);
499        let errors = validate_content(&content);
500        assert_eq!(errors.len(), 1);
501        assert!(errors[0].message.contains("duplicate"));
502    }
503
504    #[test]
505    fn test_invalid_heading_level() {
506        let content = Content::new(vec![Block::Heading {
507            id: None,
508            level: 7,
509            children: vec![Text::plain("Too deep")],
510            attributes: BlockAttributes::default(),
511        }]);
512        let errors = validate_content(&content);
513        assert_eq!(errors.len(), 1);
514        assert!(errors[0].message.contains("level"));
515    }
516
517    #[test]
518    fn test_list_item_outside_list() {
519        let content = Content::new(vec![Block::list_item(vec![Block::paragraph(vec![
520            Text::plain("Orphan"),
521        ])])]);
522        let errors = validate_content(&content);
523        assert_eq!(errors.len(), 1);
524        assert!(errors[0].message.contains("child of list"));
525    }
526
527    #[test]
528    fn test_list_with_wrong_children() {
529        let content = Content::new(vec![Block::List {
530            id: None,
531            ordered: false,
532            start: None,
533            children: vec![Block::paragraph(vec![Text::plain("Wrong")])],
534            attributes: BlockAttributes::default(),
535        }]);
536        let errors = validate_content(&content);
537        assert_eq!(errors.len(), 1);
538        assert!(errors[0].message.contains("listItem"));
539    }
540
541    #[test]
542    fn test_code_block_with_marks() {
543        let content = Content::new(vec![Block::CodeBlock {
544            id: None,
545            language: Some("rust".to_string()),
546            highlighting: None,
547            tokens: None,
548            children: vec![Text::with_marks("code", vec![Mark::Bold])],
549            attributes: BlockAttributes::default(),
550        }]);
551        let errors = validate_content(&content);
552        assert_eq!(errors.len(), 1);
553        assert!(errors[0].message.contains("marks"));
554    }
555
556    #[test]
557    fn test_empty_image() {
558        let content = Content::new(vec![Block::Image(super::super::block::ImageBlock {
559            id: None,
560            src: String::new(),
561            alt: String::new(),
562            title: None,
563            width: None,
564            height: None,
565        })]);
566        let errors = validate_content(&content);
567        assert_eq!(errors.len(), 2);
568    }
569
570    #[test]
571    fn test_valid_table() {
572        let content = Content::new(vec![Block::table(vec![Block::table_row(
573            vec![Block::table_cell(vec![Text::plain("Cell")])],
574            false,
575        )])]);
576        let errors = validate_content(&content);
577        assert!(errors.is_empty());
578    }
579
580    #[test]
581    fn test_table_row_outside_table() {
582        let content = Content::new(vec![Block::table_row(
583            vec![Block::table_cell(vec![Text::plain("Orphan")])],
584            false,
585        )]);
586        let errors = validate_content(&content);
587        assert!(errors.iter().any(|e| e.message.contains("child of table")));
588    }
589
590    // Tests for new block type validation
591
592    #[test]
593    fn test_valid_definition_list() {
594        let content = Content::new(vec![Block::definition_list(vec![Block::definition_item(
595            vec![
596                Block::definition_term(vec![Text::plain("Term")]),
597                Block::definition_description(vec![Block::paragraph(vec![Text::plain("Desc")])]),
598            ],
599        )])]);
600        let errors = validate_content(&content);
601        assert!(errors.is_empty());
602    }
603
604    #[test]
605    fn test_definition_item_outside_list() {
606        let content = Content::new(vec![Block::definition_item(vec![Block::definition_term(
607            vec![Text::plain("Orphan term")],
608        )])]);
609        let errors = validate_content(&content);
610        assert!(errors
611            .iter()
612            .any(|e| e.message.contains("child of definitionList")));
613    }
614
615    #[test]
616    fn test_definition_list_with_wrong_children() {
617        let content = Content::new(vec![Block::DefinitionList(
618            super::super::block::DefinitionListBlock::new(vec![Block::paragraph(vec![
619                Text::plain("Wrong"),
620            ])]),
621        )]);
622        let errors = validate_content(&content);
623        assert!(errors.iter().any(|e| e.message.contains("definitionItem")));
624    }
625
626    #[test]
627    fn test_svg_with_both_src_and_content() {
628        let content = Content::new(vec![Block::Svg(super::super::block::SvgBlock {
629            id: None,
630            src: Some("file.svg".to_string()),
631            content: Some("<svg>...</svg>".to_string()),
632            width: None,
633            height: None,
634            alt: None,
635        })]);
636        let errors = validate_content(&content);
637        assert!(errors
638            .iter()
639            .any(|e| e.message.contains("either src or content, not both")));
640    }
641
642    #[test]
643    fn test_svg_with_neither_src_nor_content() {
644        let content = Content::new(vec![Block::Svg(super::super::block::SvgBlock {
645            id: None,
646            src: None,
647            content: None,
648            width: None,
649            height: None,
650            alt: None,
651        })]);
652        let errors = validate_content(&content);
653        assert!(errors
654            .iter()
655            .any(|e| e.message.contains("either src or content")));
656    }
657
658    #[test]
659    fn test_barcode_with_generic_alt() {
660        use super::super::block::{BarcodeBlock, BarcodeFormat};
661
662        let content = Content::new(vec![Block::Barcode(BarcodeBlock::new(
663            BarcodeFormat::Qr,
664            "https://example.com",
665            "barcode", // Generic alt text should fail
666        ))]);
667        let errors = validate_content(&content);
668        assert!(errors.iter().any(|e| e.message.contains("meaningful")));
669    }
670
671    #[test]
672    fn test_barcode_with_good_alt() {
673        use super::super::block::{BarcodeBlock, BarcodeFormat};
674
675        let content = Content::new(vec![Block::Barcode(BarcodeBlock::new(
676            BarcodeFormat::Qr,
677            "https://example.com",
678            "Link to example.com homepage",
679        ))]);
680        let errors = validate_content(&content);
681        assert!(errors.is_empty());
682    }
683
684    #[test]
685    fn test_valid_figure() {
686        let content = Content::new(vec![Block::figure(vec![
687            Block::image("photo.png", "A photo"),
688            Block::figcaption(vec![Text::plain("Figure 1")]),
689        ])]);
690        let errors = validate_content(&content);
691        assert!(errors.is_empty());
692    }
693
694    #[test]
695    fn test_figcaption_outside_figure() {
696        let content = Content::new(vec![Block::figcaption(vec![Text::plain("Orphan caption")])]);
697        let errors = validate_content(&content);
698        assert!(errors.iter().any(|e| e.message.contains("child of figure")));
699    }
700
701    #[test]
702    fn test_measurement_empty_display() {
703        let content = Content::new(vec![Block::Measurement(
704            super::super::block::MeasurementBlock {
705                id: None,
706                value: 42.0,
707                uncertainty: None,
708                uncertainty_notation: None,
709                exponent: None,
710                display: String::new(), // Empty display should fail
711                unit: None,
712            },
713        )]);
714        let errors = validate_content(&content);
715        assert!(errors.iter().any(|e| e.message.contains("display")));
716    }
717}