schemaorg-validate 0.3.0

Parse and validate Schema.org structured data (JSON-LD, Microdata, RDFa) against the official vocabulary and Google Rich Results profiles.
Documentation
//! SARIF 2.1.0 output support for structured data validation.
//!
//! Produces [SARIF](https://sarifweb.azurewebsites.net/) 2.1.0 JSON output
//! compatible with GitHub Code Scanning and other static analysis tools.
//!
//! # Design
//!
//! Hand-rolled using `serde_json::json!()` to avoid external dependencies.
//! We use a small subset of the SARIF spec (~15 fields) which is sufficient
//! for all our diagnostic types.

use serde_json::Value;

use crate::profiles::ProfileResult;
use crate::validation::diagnostics::{DiagnosticCode, Severity, ValidationDiagnostic};
use crate::validation::ValidationResult;

/// SARIF schema URI for version 2.1.0.
const SARIF_SCHEMA: &str = concat!(
    "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/",
    "main/sarif-2.1/schema/sarif-schema-2.1.0.json",
);

/// Maps a [`DiagnosticCode`] to its SARIF rule ID string.
#[must_use]
pub fn rule_id(code: DiagnosticCode) -> &'static str {
    match code {
        // Schema vocabulary rules
        DiagnosticCode::UnknownType => "SCHEMA001",
        DiagnosticCode::UnknownProperty => "SCHEMA002",
        DiagnosticCode::InvalidValueType => "SCHEMA003",
        DiagnosticCode::DeprecatedType => "SCHEMA004",
        DiagnosticCode::DeprecatedProperty => "SCHEMA005",
        DiagnosticCode::PropertyNotForType => "SCHEMA006",
        DiagnosticCode::PendingType => "SCHEMA007",
        DiagnosticCode::PendingProperty => "SCHEMA008",
        DiagnosticCode::ExpectedUrlGotText => "SCHEMA009",
        DiagnosticCode::ExpectedTextGotNode => "SCHEMA010",
        DiagnosticCode::InvalidEnumValue => "SCHEMA011",
        DiagnosticCode::InvalidBoolean => "SCHEMA012",
        DiagnosticCode::InvalidNumber => "SCHEMA013",
        // Profile rules
        DiagnosticCode::RequiredFieldMissing => "PROFILE001",
        DiagnosticCode::RecommendedFieldMissing => "PROFILE002",
        DiagnosticCode::NestedRequiredFieldMissing => "PROFILE003",
        DiagnosticCode::InvalidFieldValue => "PROFILE004",
        DiagnosticCode::EligibilityRestricted => "PROFILE005",
    }
}

/// Maps a [`Severity`] to a SARIF level string.
#[must_use]
pub fn sarif_level(severity: Severity) -> &'static str {
    match severity {
        Severity::Error => "error",
        Severity::Warning => "warning",
        Severity::Info => "note",
    }
}

/// Builds the SARIF rule definition for a [`DiagnosticCode`].
fn build_rule(code: DiagnosticCode) -> Value {
    let id = rule_id(code);
    let (name, desc, level) = match code {
        DiagnosticCode::UnknownType => ("UnknownType", "Unknown Schema.org type", "error"),
        DiagnosticCode::UnknownProperty => {
            ("UnknownProperty", "Unknown Schema.org property", "error")
        }
        DiagnosticCode::InvalidValueType => ("InvalidValueType", "Value type mismatch", "error"),
        DiagnosticCode::DeprecatedType => {
            ("DeprecatedType", "Type retired from Schema.org", "warning")
        }
        DiagnosticCode::DeprecatedProperty => {
            ("DeprecatedProperty", "Property superseded", "warning")
        }
        DiagnosticCode::PropertyNotForType => (
            "PropertyNotForType",
            "Property not valid for this type",
            "warning",
        ),
        DiagnosticCode::PendingType => ("PendingType", "Type in pending vocabulary", "note"),
        DiagnosticCode::PendingProperty => {
            ("PendingProperty", "Property in pending vocabulary", "note")
        }
        DiagnosticCode::ExpectedUrlGotText => {
            ("ExpectedUrlGotText", "Expected URL value", "warning")
        }
        DiagnosticCode::ExpectedTextGotNode => (
            "ExpectedTextGotNode",
            "Expected text, got nested object",
            "error",
        ),
        DiagnosticCode::InvalidEnumValue => {
            ("InvalidEnumValue", "Invalid enumeration member", "error")
        }
        DiagnosticCode::InvalidBoolean => ("InvalidBoolean", "Expected boolean value", "warning"),
        DiagnosticCode::InvalidNumber => ("InvalidNumber", "Expected numeric value", "warning"),
        DiagnosticCode::RequiredFieldMissing => (
            "RequiredFieldMissing",
            "Required field for rich results",
            "error",
        ),
        DiagnosticCode::RecommendedFieldMissing => (
            "RecommendedFieldMissing",
            "Recommended field for rich results",
            "warning",
        ),
        DiagnosticCode::NestedRequiredFieldMissing => (
            "NestedRequiredFieldMissing",
            "Missing field in nested requirement",
            "error",
        ),
        DiagnosticCode::InvalidFieldValue => (
            "InvalidFieldValue",
            "Invalid field value for profile",
            "error",
        ),
        DiagnosticCode::EligibilityRestricted => (
            "EligibilityRestricted",
            "Rich result eligibility restricted",
            "note",
        ),
    };

    serde_json::json!({
        "id": id,
        "name": name,
        "shortDescription": { "text": desc },
        "defaultConfiguration": { "level": level }
    })
}

/// Converts a single [`ValidationDiagnostic`] to a SARIF result object.
fn diagnostic_to_result(diag: &ValidationDiagnostic, source_uri: &str) -> Value {
    let mut location = serde_json::json!({
        "physicalLocation": {
            "artifactLocation": { "uri": source_uri }
        }
    });

    if let Some(loc) = &diag.source_location {
        location["physicalLocation"]["region"] = serde_json::json!({
            "startLine": loc.line,
            "startColumn": loc.column
        });
    }

    if !diag.path.is_empty() {
        location["logicalLocations"] = serde_json::json!([{
            "fullyQualifiedName": &diag.path,
            "kind": "object"
        }]);
    }

    serde_json::json!({
        "ruleId": rule_id(diag.code),
        "level": sarif_level(diag.severity),
        "message": { "text": &diag.message },
        "locations": [location]
    })
}

/// Builds the complete SARIF 2.1.0 JSON document.
///
/// Combines vocabulary diagnostics and optional profile diagnostics into
/// a single SARIF run.
#[must_use]
pub fn build_sarif(
    vocab_result: &ValidationResult,
    profile_result: Option<&ProfileResult>,
    source_uri: &str,
) -> Value {
    // Collect all unique diagnostic codes to emit rules
    let mut seen_codes = std::collections::HashSet::new();
    let mut rules = Vec::new();
    let mut results = Vec::new();

    for diag in &vocab_result.diagnostics {
        if seen_codes.insert(diag.code) {
            rules.push(build_rule(diag.code));
        }
        results.push(diagnostic_to_result(diag, source_uri));
    }

    if let Some(pr) = profile_result {
        for diag in &pr.diagnostics {
            if seen_codes.insert(diag.code) {
                rules.push(build_rule(diag.code));
            }
            results.push(diagnostic_to_result(diag, source_uri));
        }
    }

    serde_json::json!({
        "$schema": SARIF_SCHEMA,
        "version": "2.1.0",
        "runs": [{
            "tool": {
                "driver": {
                    "name": "schemaorg-validate",
                    "version": env!("CARGO_PKG_VERSION"),
                    "informationUri": "https://github.com/mitrovicsinisaa/schemaorg-rs",
                    "rules": rules
                }
            },
            "results": results,
            "invocations": [{
                "executionSuccessful": true,
                "toolConfigurationNotifications": []
            }]
        }]
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::validation::diagnostics::{DiagnosticCode, Severity, ValidationDiagnostic};
    use crate::validation::ValidationResult;

    #[test]
    fn rule_id_mapping_completeness() {
        // Verify all known codes produce non-fallback rule IDs
        let codes = [
            DiagnosticCode::UnknownType,
            DiagnosticCode::UnknownProperty,
            DiagnosticCode::InvalidValueType,
            DiagnosticCode::DeprecatedType,
            DiagnosticCode::DeprecatedProperty,
            DiagnosticCode::PropertyNotForType,
            DiagnosticCode::PendingType,
            DiagnosticCode::PendingProperty,
            DiagnosticCode::ExpectedUrlGotText,
            DiagnosticCode::ExpectedTextGotNode,
            DiagnosticCode::InvalidEnumValue,
            DiagnosticCode::InvalidBoolean,
            DiagnosticCode::InvalidNumber,
            DiagnosticCode::RequiredFieldMissing,
            DiagnosticCode::RecommendedFieldMissing,
            DiagnosticCode::NestedRequiredFieldMissing,
            DiagnosticCode::InvalidFieldValue,
            DiagnosticCode::EligibilityRestricted,
        ];
        for code in codes {
            assert_ne!(rule_id(code), "SCHEMA999", "missing rule ID for {code:?}");
        }
    }

    #[test]
    fn sarif_level_mapping() {
        assert_eq!(sarif_level(Severity::Error), "error");
        assert_eq!(sarif_level(Severity::Warning), "warning");
        assert_eq!(sarif_level(Severity::Info), "note");
    }

    #[test]
    fn build_sarif_empty_result() {
        let vocab = ValidationResult::default();
        let sarif = build_sarif(&vocab, None, "test.html");

        assert_eq!(sarif["version"], "2.1.0");
        assert_eq!(sarif["runs"][0]["results"].as_array().unwrap().len(), 0);
    }

    #[test]
    fn build_sarif_with_diagnostics() {
        let vocab = ValidationResult {
            diagnostics: vec![ValidationDiagnostic {
                path: "Product.name".into(),
                severity: Severity::Error,
                code: DiagnosticCode::UnknownProperty,
                message: "Unknown property 'namee'".into(),
                source_location: None,
            }],
        };

        let sarif = build_sarif(&vocab, None, "index.html");

        let results = sarif["runs"][0]["results"].as_array().unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0]["ruleId"], "SCHEMA002");
        assert_eq!(results[0]["level"], "error");
        assert_eq!(
            results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"],
            "index.html"
        );
    }

    #[test]
    fn build_sarif_with_source_location() {
        let vocab = ValidationResult {
            diagnostics: vec![ValidationDiagnostic {
                path: "Product".into(),
                severity: Severity::Warning,
                code: DiagnosticCode::DeprecatedType,
                message: "Deprecated type".into(),
                source_location: Some(crate::types::SourceLocation {
                    line: 15,
                    column: 3,
                    byte_offset: 200,
                }),
            }],
        };

        let sarif = build_sarif(&vocab, None, "page.html");
        let region = &sarif["runs"][0]["results"][0]["locations"][0]["physicalLocation"]["region"];
        assert_eq!(region["startLine"], 15);
        assert_eq!(region["startColumn"], 3);
    }
}