data-protocol-validator 0.1.0

Rust validator for Data Protocol schemas - validates versioned bioinformatics analysis output against JSON Schema-based protocol definitions
Documentation
use regex::Regex;

/// Validate a string value against a named format.
///
/// Returns `true` if the value satisfies the format, or if the format is
/// unknown (unknown formats are treated as always valid, matching JSON Schema
/// behaviour).
pub fn validate_format(value: &str, format: &str) -> bool {
    match format {
        "date" => validate_date(value),
        "date-time" => validate_datetime(value),
        "email" => validate_email(value),
        "uri" => validate_uri(value),
        "uuid" => validate_uuid(value),
        _ => true, // unknown formats pass
    }
}

fn validate_date(value: &str) -> bool {
    let re = Regex::new(r"^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$").unwrap();
    if !re.is_match(value) {
        return false;
    }
    // Also check that the date is actually parseable
    chrono::NaiveDate::parse_from_str(value, "%Y-%m-%d").is_ok()
}

fn validate_datetime(value: &str) -> bool {
    let re = Regex::new(
        r"^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])T(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:\.\d+)?(?:Z|[+-](?:[01]\d|2[0-3]):[0-5]\d)$"
    ).unwrap();
    if !re.is_match(value) {
        return false;
    }
    // Verify the date-time is parseable
    use chrono::{DateTime, FixedOffset};
    DateTime::<FixedOffset>::parse_from_rfc3339(value).is_ok()
        || chrono::NaiveDateTime::parse_from_str(value, "%Y-%m-%dT%H:%M:%S%.fZ").is_ok()
}

fn validate_email(value: &str) -> bool {
    let re = Regex::new(r"^[^\s@]+@[^\s@]+\.[^\s@]+$").unwrap();
    re.is_match(value)
}

fn validate_uri(value: &str) -> bool {
    let re = Regex::new(r"^[a-zA-Z][a-zA-Z0-9+\-.]*://[^\s]+$").unwrap();
    re.is_match(value)
}

fn validate_uuid(value: &str) -> bool {
    let re =
        Regex::new(r"(?i)^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$").unwrap();
    re.is_match(value)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_date_valid() {
        assert!(validate_format("2024-01-15", "date"));
    }

    #[test]
    fn test_date_invalid_month() {
        assert!(!validate_format("2024-13-01", "date"));
    }

    #[test]
    fn test_datetime_valid() {
        assert!(validate_format("2024-01-15T10:30:00Z", "date-time"));
    }

    #[test]
    fn test_datetime_with_offset() {
        assert!(validate_format("2024-01-15T10:30:00+09:00", "date-time"));
    }

    #[test]
    fn test_email_valid() {
        assert!(validate_format("user@example.com", "email"));
    }

    #[test]
    fn test_email_invalid() {
        assert!(!validate_format("not-an-email", "email"));
    }

    #[test]
    fn test_uri_valid() {
        assert!(validate_format("https://example.com/path", "uri"));
    }

    #[test]
    fn test_uuid_valid() {
        assert!(validate_format(
            "550e8400-e29b-41d4-a716-446655440000",
            "uuid"
        ));
    }

    #[test]
    fn test_unknown_format_passes() {
        assert!(validate_format("anything", "unknown-format"));
    }
}