use std::sync::OnceLock;
use regex::Regex;
use crate::schema::types::{CompiledSchema, SimpleType, TypeDef};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(missing_docs)]
pub enum PrimitiveKind {
Boolean,
Decimal,
Float,
Double,
Integer,
Long,
Int,
Short,
Byte,
NonNegativeInteger,
PositiveInteger,
NonPositiveInteger,
NegativeInteger,
UnsignedLong,
UnsignedInt,
UnsignedShort,
UnsignedByte,
Date,
DateTime,
Time,
GYear,
GYearMonth,
GMonth,
GMonthDay,
GDay,
Duration,
HexBinary,
Base64Binary,
AnyUri,
QName,
}
impl PrimitiveKind {
pub fn from_type_name(name: &str) -> Option<Self> {
let local = name.strip_prefix("xs:").unwrap_or(name);
Some(match local {
"boolean" => Self::Boolean,
"decimal" => Self::Decimal,
"float" => Self::Float,
"double" => Self::Double,
"integer" => Self::Integer,
"long" => Self::Long,
"int" => Self::Int,
"short" => Self::Short,
"byte" => Self::Byte,
"nonNegativeInteger" => Self::NonNegativeInteger,
"positiveInteger" => Self::PositiveInteger,
"nonPositiveInteger" => Self::NonPositiveInteger,
"negativeInteger" => Self::NegativeInteger,
"unsignedLong" => Self::UnsignedLong,
"unsignedInt" => Self::UnsignedInt,
"unsignedShort" => Self::UnsignedShort,
"unsignedByte" => Self::UnsignedByte,
"date" => Self::Date,
"dateTime" => Self::DateTime,
"time" => Self::Time,
"gYear" => Self::GYear,
"gYearMonth" => Self::GYearMonth,
"gMonth" => Self::GMonth,
"gMonthDay" => Self::GMonthDay,
"gDay" => Self::GDay,
"duration" => Self::Duration,
"hexBinary" => Self::HexBinary,
"base64Binary" => Self::Base64Binary,
"anyURI" => Self::AnyUri,
"QName" => Self::QName,
_ => return None,
})
}
pub fn resolve(schema: &CompiledSchema, simple: &SimpleType) -> Option<Self> {
if let Some(kind) = Self::from_type_name(&simple.name) {
return Some(kind);
}
let mut current = simple.base_type.clone()?;
for _ in 0..16 {
if let Some(kind) = Self::from_type_name(¤t) {
return Some(kind);
}
let next = match schema.get_type(¤t)? {
TypeDef::Simple(s) => s,
TypeDef::Complex(_) => return None,
};
if let Some(kind) = Self::from_type_name(&next.name) {
return Some(kind);
}
current = next.base_type.clone()?;
}
None
}
pub fn validate(&self, raw: &str) -> Result<(), PrimitiveError> {
let normalized = collapse(raw);
let v = normalized.as_str();
match self {
Self::Boolean => validate_with_regex(v, boolean_regex(), "boolean"),
Self::Decimal => validate_with_regex(v, decimal_regex(), "decimal"),
Self::Float => validate_with_regex(v, double_regex(), "float"),
Self::Double => validate_with_regex(v, double_regex(), "double"),
Self::Integer => validate_integer_lexical(v, "integer"),
Self::Long => validate_bounded_integer(v, i64::MIN as i128, i64::MAX as i128, "long"),
Self::Int => validate_bounded_integer(v, i32::MIN as i128, i32::MAX as i128, "int"),
Self::Short => validate_bounded_integer(v, i16::MIN as i128, i16::MAX as i128, "short"),
Self::Byte => validate_bounded_integer(v, i8::MIN as i128, i8::MAX as i128, "byte"),
Self::NonNegativeInteger => {
validate_signed_integer(v, "nonNegativeInteger", SignReq::NonNegative)
}
Self::PositiveInteger => {
validate_signed_integer(v, "positiveInteger", SignReq::Positive)
}
Self::NonPositiveInteger => {
validate_signed_integer(v, "nonPositiveInteger", SignReq::NonPositive)
}
Self::NegativeInteger => {
validate_signed_integer(v, "negativeInteger", SignReq::Negative)
}
Self::UnsignedLong => {
validate_signed_integer(v, "unsignedLong", SignReq::NonNegative)?;
validate_unsigned_range(v, u64::MAX as u128, "unsignedLong")
}
Self::UnsignedInt => {
validate_signed_integer(v, "unsignedInt", SignReq::NonNegative)?;
validate_unsigned_range(v, u32::MAX as u128, "unsignedInt")
}
Self::UnsignedShort => {
validate_signed_integer(v, "unsignedShort", SignReq::NonNegative)?;
validate_unsigned_range(v, u16::MAX as u128, "unsignedShort")
}
Self::UnsignedByte => {
validate_signed_integer(v, "unsignedByte", SignReq::NonNegative)?;
validate_unsigned_range(v, u8::MAX as u128, "unsignedByte")
}
Self::Date => validate_date(v),
Self::DateTime => validate_datetime(v),
Self::GYear => validate_gyear(v),
Self::Time
| Self::GYearMonth
| Self::GMonth
| Self::GMonthDay
| Self::GDay
| Self::Duration
| Self::HexBinary
| Self::Base64Binary
| Self::AnyUri
| Self::QName => Ok(()),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PrimitiveError {
InvalidLexical {
kind: &'static str,
value: String,
},
OutOfRange {
value: String,
constraint: &'static str,
},
}
impl std::fmt::Display for PrimitiveError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidLexical { kind, value } => {
write!(f, "'{}' is not a valid xs:{}", value, kind)
}
Self::OutOfRange { value, constraint } => {
write!(f, "'{}' {}", value, constraint)
}
}
}
}
impl std::error::Error for PrimitiveError {}
fn boolean_regex() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| Regex::new(r"^(?:true|false|1|0)$").unwrap())
}
fn integer_regex() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| Regex::new(r"^[+-]?[0-9]+$").unwrap())
}
fn decimal_regex() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| Regex::new(r"^[+-]?(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)$").unwrap())
}
fn double_regex() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| {
Regex::new(r"^(?:[+-]?(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:[eE][+-]?[0-9]+)?|[+-]?INF|NaN)$")
.unwrap()
})
}
fn date_regex() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| {
Regex::new(r"^(-?)([0-9]{4,})-([0-9]{2})-([0-9]{2})(Z|[+-][0-9]{2}:[0-9]{2})?$").unwrap()
})
}
fn gyear_regex() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| Regex::new(r"^(-?)([0-9]{4,})(Z|[+-][0-9]{2}:[0-9]{2})?$").unwrap())
}
fn datetime_regex() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| {
Regex::new(
r"^(-?)([0-9]{4,})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})(?:\.[0-9]+)?(Z|[+-][0-9]{2}:[0-9]{2})?$",
)
.unwrap()
})
}
fn validate_with_regex(v: &str, regex: &Regex, kind: &'static str) -> Result<(), PrimitiveError> {
if regex.is_match(v) {
Ok(())
} else {
Err(PrimitiveError::InvalidLexical {
kind,
value: v.to_string(),
})
}
}
fn validate_integer_lexical(v: &str, kind: &'static str) -> Result<(), PrimitiveError> {
validate_with_regex(v, integer_regex(), kind)
}
fn validate_bounded_integer(
v: &str,
min: i128,
max: i128,
kind: &'static str,
) -> Result<(), PrimitiveError> {
validate_integer_lexical(v, kind)?;
let stripped = v.strip_prefix('+').unwrap_or(v);
let parsed: i128 = stripped.parse().map_err(|_| PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: "is outside the type's representable range",
})?;
if parsed < min || parsed > max {
Err(PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: "is outside the type's representable range",
})
} else {
Ok(())
}
}
fn validate_unsigned_range(v: &str, max: u128, _kind: &'static str) -> Result<(), PrimitiveError> {
let stripped = v.strip_prefix('+').unwrap_or(v);
let parsed: u128 = stripped.parse().map_err(|_| PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: "is outside the type's representable range",
})?;
if parsed > max {
Err(PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: "is outside the type's representable range",
})
} else {
Ok(())
}
}
#[derive(Debug, Clone, Copy)]
enum SignReq {
NonNegative,
Positive,
NonPositive,
Negative,
}
fn validate_signed_integer(
v: &str,
kind: &'static str,
requirement: SignReq,
) -> Result<(), PrimitiveError> {
validate_integer_lexical(v, kind)?;
let (negative, digits) = if let Some(rest) = v.strip_prefix('-') {
(true, rest)
} else {
let rest = v.strip_prefix('+').unwrap_or(v);
(false, rest)
};
let all_zero = digits.bytes().all(|b| b == b'0');
let is_positive = !negative && !all_zero;
let is_negative = negative && !all_zero;
let ok = match requirement {
SignReq::NonNegative => !is_negative,
SignReq::Positive => is_positive,
SignReq::NonPositive => !is_positive,
SignReq::Negative => is_negative,
};
if ok {
Ok(())
} else {
Err(PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: match requirement {
SignReq::NonNegative => "must be greater than or equal to 0",
SignReq::Positive => "must be greater than or equal to 1",
SignReq::NonPositive => "must be less than or equal to 0",
SignReq::Negative => "must be less than or equal to -1",
},
})
}
}
fn validate_date(v: &str) -> Result<(), PrimitiveError> {
let caps = date_regex()
.captures(v)
.ok_or_else(|| PrimitiveError::InvalidLexical {
kind: "date",
value: v.to_string(),
})?;
let sign_neg = !caps.get(1).map(|m| m.as_str()).unwrap_or("").is_empty();
let year: i64 =
caps.get(2)
.unwrap()
.as_str()
.parse()
.map_err(|_| PrimitiveError::InvalidLexical {
kind: "date",
value: v.to_string(),
})?;
let year = if sign_neg { -year } else { year };
let month: u32 = caps.get(3).unwrap().as_str().parse().unwrap();
let day: u32 = caps.get(4).unwrap().as_str().parse().unwrap();
validate_year_month_day(year, month, day, v)?;
if let Some(tz) = caps.get(5) {
validate_timezone(tz.as_str(), v)?;
}
Ok(())
}
fn validate_gyear(v: &str) -> Result<(), PrimitiveError> {
let caps = gyear_regex()
.captures(v)
.ok_or_else(|| PrimitiveError::InvalidLexical {
kind: "gYear",
value: v.to_string(),
})?;
let _year: i64 =
caps.get(2)
.unwrap()
.as_str()
.parse()
.map_err(|_| PrimitiveError::InvalidLexical {
kind: "gYear",
value: v.to_string(),
})?;
if let Some(tz) = caps.get(3) {
validate_timezone(tz.as_str(), v)?;
}
Ok(())
}
fn validate_datetime(v: &str) -> Result<(), PrimitiveError> {
let caps = datetime_regex()
.captures(v)
.ok_or_else(|| PrimitiveError::InvalidLexical {
kind: "dateTime",
value: v.to_string(),
})?;
let sign_neg = !caps.get(1).map(|m| m.as_str()).unwrap_or("").is_empty();
let year: i64 =
caps.get(2)
.unwrap()
.as_str()
.parse()
.map_err(|_| PrimitiveError::InvalidLexical {
kind: "dateTime",
value: v.to_string(),
})?;
let year = if sign_neg { -year } else { year };
let month: u32 = caps.get(3).unwrap().as_str().parse().unwrap();
let day: u32 = caps.get(4).unwrap().as_str().parse().unwrap();
let hour: u32 = caps.get(5).unwrap().as_str().parse().unwrap();
let minute: u32 = caps.get(6).unwrap().as_str().parse().unwrap();
let second: u32 = caps.get(7).unwrap().as_str().parse().unwrap();
validate_year_month_day(year, month, day, v)?;
let end_of_day = hour == 24 && minute == 0 && second == 0;
if !end_of_day && hour > 23 {
return Err(PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: "hour-of-day must be 00-23 (or 24:00:00)",
});
}
if minute > 59 {
return Err(PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: "minute must be 00-59",
});
}
if second > 60 {
return Err(PrimitiveError::OutOfRange {
value: v.to_string(),
constraint: "second must be 00-60",
});
}
if let Some(tz) = caps.get(8) {
validate_timezone(tz.as_str(), v)?;
}
Ok(())
}
fn validate_year_month_day(
year: i64,
month: u32,
day: u32,
raw: &str,
) -> Result<(), PrimitiveError> {
if !(1..=12).contains(&month) {
return Err(PrimitiveError::OutOfRange {
value: raw.to_string(),
constraint: "month must be between 01 and 12",
});
}
let max_day = match month {
1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
4 | 6 | 9 | 11 => 30,
2 => {
if is_leap_year(year) {
29
} else {
28
}
}
_ => unreachable!(),
};
if !(1..=max_day).contains(&day) {
return Err(PrimitiveError::OutOfRange {
value: raw.to_string(),
constraint: "day is not valid for the given month",
});
}
Ok(())
}
fn is_leap_year(year: i64) -> bool {
if year % 400 == 0 {
true
} else if year % 100 == 0 {
false
} else {
year % 4 == 0
}
}
fn validate_timezone(tz: &str, raw: &str) -> Result<(), PrimitiveError> {
if tz == "Z" {
return Ok(());
}
let hh: u32 = tz[1..3]
.parse()
.map_err(|_| PrimitiveError::InvalidLexical {
kind: "timezone",
value: tz.to_string(),
})?;
let mm: u32 = tz[4..6]
.parse()
.map_err(|_| PrimitiveError::InvalidLexical {
kind: "timezone",
value: tz.to_string(),
})?;
if mm > 59 {
return Err(PrimitiveError::OutOfRange {
value: raw.to_string(),
constraint: "timezone minute offset must be 00-59",
});
}
if hh > 14 || (hh == 14 && mm > 0) {
return Err(PrimitiveError::OutOfRange {
value: raw.to_string(),
constraint: "timezone offset out of range (allowed: -14:00..+14:00)",
});
}
Ok(())
}
fn collapse(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut prev_was_space = true; for ch in input.chars() {
if ch.is_whitespace() {
if !prev_was_space {
out.push(' ');
prev_was_space = true;
}
} else {
out.push(ch);
prev_was_space = false;
}
}
if out.ends_with(' ') {
out.pop();
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::xsd::builtin::register_builtin_types;
fn schema() -> CompiledSchema {
let mut s = CompiledSchema::new();
register_builtin_types(&mut s);
s
}
fn simple_with_base(base: &str) -> SimpleType {
let mut st = SimpleType::new("");
st.base_type = Some(base.to_string());
st
}
#[test]
fn from_type_name_accepts_prefixed_and_unprefixed() {
assert_eq!(
PrimitiveKind::from_type_name("xs:boolean"),
Some(PrimitiveKind::Boolean)
);
assert_eq!(
PrimitiveKind::from_type_name("boolean"),
Some(PrimitiveKind::Boolean)
);
assert_eq!(
PrimitiveKind::from_type_name("xs:nonNegativeInteger"),
Some(PrimitiveKind::NonNegativeInteger)
);
assert_eq!(PrimitiveKind::from_type_name("string"), None);
assert_eq!(PrimitiveKind::from_type_name("anyType"), None);
}
#[test]
fn resolve_via_built_in_chain() {
let s = schema();
let int_def = match s.get_type("xs:int").unwrap() {
TypeDef::Simple(simple) => simple.clone(),
_ => panic!("xs:int should be SimpleType"),
};
assert_eq!(
PrimitiveKind::resolve(&s, &int_def),
Some(PrimitiveKind::Int)
);
}
#[test]
fn resolve_anonymous_restriction_of_boolean() {
let s = schema();
let anon = simple_with_base("xs:boolean");
assert_eq!(
PrimitiveKind::resolve(&s, &anon),
Some(PrimitiveKind::Boolean)
);
}
#[test]
fn resolve_returns_none_for_string_derivatives() {
let s = schema();
let ncname = match s.get_type("xs:NCName").unwrap() {
TypeDef::Simple(simple) => simple.clone(),
_ => panic!("xs:NCName should be SimpleType"),
};
assert_eq!(PrimitiveKind::resolve(&s, &ncname), None);
}
#[test]
fn boolean_accepts_canonical_literals() {
for v in ["true", "false", "1", "0"] {
assert!(PrimitiveKind::Boolean.validate(v).is_ok(), "{v}");
}
}
#[test]
fn boolean_rejects_other_forms() {
for v in ["True", "FALSE", "yes", "2", "", " "] {
assert!(PrimitiveKind::Boolean.validate(v).is_err(), "{v}");
}
}
#[test]
fn boolean_collapses_whitespace_before_match() {
assert!(PrimitiveKind::Boolean.validate(" true ").is_ok());
assert!(PrimitiveKind::Boolean.validate("\ntrue\n").is_ok());
}
#[test]
fn integer_lexical_pass() {
for v in ["0", "42", "-42", "+42"] {
assert!(PrimitiveKind::Integer.validate(v).is_ok(), "{v}");
}
}
#[test]
fn integer_lexical_fail() {
for v in ["1.5", "abc", "", "1e2", "+"] {
assert!(PrimitiveKind::Integer.validate(v).is_err(), "{v}");
}
}
#[test]
fn int_enforces_32bit_range() {
assert!(PrimitiveKind::Int.validate("-2147483648").is_ok());
assert!(PrimitiveKind::Int.validate("2147483647").is_ok());
assert!(PrimitiveKind::Int.validate("2147483648").is_err());
assert!(PrimitiveKind::Int.validate("-2147483649").is_err());
}
#[test]
fn non_negative_integer_sign_check() {
assert!(PrimitiveKind::NonNegativeInteger.validate("0").is_ok());
assert!(PrimitiveKind::NonNegativeInteger.validate("100").is_ok());
assert!(PrimitiveKind::NonNegativeInteger.validate("+0").is_ok());
assert!(PrimitiveKind::NonNegativeInteger.validate("-0").is_ok());
assert!(PrimitiveKind::NonNegativeInteger.validate("-1").is_err());
}
#[test]
fn positive_integer_sign_check() {
assert!(PrimitiveKind::PositiveInteger.validate("1").is_ok());
assert!(PrimitiveKind::PositiveInteger.validate("+1").is_ok());
assert!(PrimitiveKind::PositiveInteger.validate("0").is_err());
assert!(PrimitiveKind::PositiveInteger.validate("-0").is_err());
assert!(PrimitiveKind::PositiveInteger.validate("-1").is_err());
}
#[test]
fn decimal_lexical_pass() {
for v in ["0", "1.5", "-1.5", ".5", "1.", "+1.5"] {
assert!(PrimitiveKind::Decimal.validate(v).is_ok(), "{v}");
}
}
#[test]
fn decimal_lexical_fail() {
for v in ["1e2", "abc", "1.5.6", "", "+", ".", "1.2.3"] {
assert!(PrimitiveKind::Decimal.validate(v).is_err(), "{v}");
}
}
#[test]
fn double_lexical_pass() {
for v in ["0", "1.5", "-1.5e-3", "1.2E10", "INF", "-INF", "NaN", ".5"] {
assert!(PrimitiveKind::Double.validate(v).is_ok(), "{v}");
}
}
#[test]
fn double_lexical_fail() {
for v in ["abc", "1.5.6", "inf", "nan", ""] {
assert!(PrimitiveKind::Double.validate(v).is_err(), "{v}");
}
}
#[test]
fn float_shares_double_lexical_space() {
assert!(PrimitiveKind::Float.validate("3.14").is_ok());
assert!(PrimitiveKind::Float.validate("abc").is_err());
}
#[test]
fn date_canonical_forms() {
assert!(PrimitiveKind::Date.validate("2026-05-28").is_ok());
assert!(PrimitiveKind::Date.validate("2026-05-28Z").is_ok());
assert!(PrimitiveKind::Date.validate("2026-05-28+09:00").is_ok());
assert!(PrimitiveKind::Date.validate("2026-05-28-05:00").is_ok());
}
#[test]
fn date_invalid_forms() {
for v in [
"2026-13-01",
"2026-02-30",
"26-05-28",
"2026/05/28",
"abc",
"",
] {
assert!(PrimitiveKind::Date.validate(v).is_err(), "{v}");
}
}
#[test]
fn date_leap_year_handling() {
assert!(PrimitiveKind::Date.validate("2024-02-29").is_ok()); assert!(PrimitiveKind::Date.validate("2023-02-29").is_err()); assert!(PrimitiveKind::Date.validate("2000-02-29").is_ok()); assert!(PrimitiveKind::Date.validate("1900-02-29").is_err()); }
#[test]
fn gyear_canonical_forms() {
assert!(PrimitiveKind::GYear.validate("2026").is_ok());
assert!(PrimitiveKind::GYear.validate("2026Z").is_ok());
assert!(PrimitiveKind::GYear.validate("2026+09:00").is_ok());
}
#[test]
fn gyear_invalid_forms() {
for v in ["26", "2026-05", "abc", ""] {
assert!(PrimitiveKind::GYear.validate(v).is_err(), "{v}");
}
}
#[test]
fn datetime_canonical_forms() {
assert!(
PrimitiveKind::DateTime
.validate("2026-05-28T10:30:00")
.is_ok()
);
assert!(
PrimitiveKind::DateTime
.validate("2026-05-28T10:30:00.123")
.is_ok()
);
assert!(
PrimitiveKind::DateTime
.validate("2026-05-28T10:30:00Z")
.is_ok()
);
assert!(
PrimitiveKind::DateTime
.validate("2026-05-28T10:30:00+09:00")
.is_ok()
);
}
#[test]
fn datetime_invalid_forms() {
for v in [
"2026-05-28 10:30:00", "2026-05-28T10:30", "2026-13-01T10:30:00", "abc",
"",
] {
assert!(PrimitiveKind::DateTime.validate(v).is_err(), "{v}");
}
}
}