use std::collections::HashSet;
use std::sync::Arc;
use regex::Regex;
use crate::error::Result;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum WhitespaceHandling {
Preserve,
Replace,
#[default]
Collapse,
}
#[derive(Debug, Clone)]
pub enum FacetError {
TooShort {
value_len: usize,
min_len: usize,
},
TooLong {
value_len: usize,
max_len: usize,
},
WrongLength {
value_len: usize,
required_len: usize,
},
BelowMinInclusive {
value: String,
min: String,
},
AboveMaxInclusive {
value: String,
max: String,
},
BelowMinExclusive {
value: String,
min: String,
},
AboveMaxExclusive {
value: String,
max: String,
},
PatternMismatch {
value: String,
pattern: String,
},
NotInEnumeration {
value: String,
allowed: Vec<String>,
},
TooManyDigits {
found: usize,
max: usize,
},
TooManyFractionDigits {
found: usize,
max: usize,
},
InvalidPattern {
pattern: String,
error: String,
},
}
impl std::fmt::Display for FacetError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FacetError::TooShort { value_len, min_len } => {
write!(
f,
"value length {} is less than minimum {}",
value_len, min_len
)
}
FacetError::TooLong { value_len, max_len } => {
write!(f, "value length {} exceeds maximum {}", value_len, max_len)
}
FacetError::WrongLength {
value_len,
required_len,
} => {
write!(
f,
"value length {} does not match required {}",
value_len, required_len
)
}
FacetError::BelowMinInclusive { value, min } => {
write!(f, "value '{}' is below minimum '{}'", value, min)
}
FacetError::AboveMaxInclusive { value, max } => {
write!(f, "value '{}' exceeds maximum '{}'", value, max)
}
FacetError::BelowMinExclusive { value, min } => {
write!(f, "value '{}' must be greater than '{}'", value, min)
}
FacetError::AboveMaxExclusive { value, max } => {
write!(f, "value '{}' must be less than '{}'", value, max)
}
FacetError::PatternMismatch { value, pattern } => {
write!(f, "value '{}' does not match pattern '{}'", value, pattern)
}
FacetError::NotInEnumeration { value, allowed } => {
write!(f, "value '{}' not in allowed values: {:?}", value, allowed)
}
FacetError::TooManyDigits { found, max } => {
write!(f, "value has {} digits, maximum is {}", found, max)
}
FacetError::TooManyFractionDigits { found, max } => {
write!(f, "value has {} fraction digits, maximum is {}", found, max)
}
FacetError::InvalidPattern { pattern, error } => {
write!(f, "invalid pattern '{}': {}", pattern, error)
}
}
}
}
impl std::error::Error for FacetError {}
#[derive(Debug, Clone, Default)]
pub struct FacetConstraints {
pub length: Option<usize>,
pub min_length: Option<usize>,
pub max_length: Option<usize>,
pub min_inclusive: Option<String>,
pub max_inclusive: Option<String>,
pub min_exclusive: Option<String>,
pub max_exclusive: Option<String>,
pub patterns: Vec<String>,
pub compiled_patterns: Vec<Arc<Regex>>,
pub enumeration: HashSet<String>,
pub total_digits: Option<usize>,
pub fraction_digits: Option<usize>,
pub whitespace: WhitespaceHandling,
}
impl FacetConstraints {
pub fn new() -> Self {
Self::default()
}
pub fn with_length(mut self, len: usize) -> Self {
self.length = Some(len);
self
}
pub fn with_min_length(mut self, len: usize) -> Self {
self.min_length = Some(len);
self
}
pub fn with_max_length(mut self, len: usize) -> Self {
self.max_length = Some(len);
self
}
pub fn with_min_inclusive(mut self, value: impl Into<String>) -> Self {
self.min_inclusive = Some(value.into());
self
}
pub fn with_max_inclusive(mut self, value: impl Into<String>) -> Self {
self.max_inclusive = Some(value.into());
self
}
pub fn with_pattern(mut self, pattern: impl Into<String>) -> Self {
self.patterns.push(pattern.into());
self
}
pub fn with_enumeration(mut self, values: impl IntoIterator<Item = impl Into<String>>) -> Self {
self.enumeration.extend(values.into_iter().map(Into::into));
self
}
pub fn with_whitespace(mut self, mode: WhitespaceHandling) -> Self {
self.whitespace = mode;
self
}
pub fn compile_patterns(&mut self) -> Result<()> {
self.compiled_patterns.clear();
for pattern in &self.patterns {
let anchored = format!("^(?:{})$", pattern);
match Regex::new(&anchored) {
Ok(regex) => {
self.compiled_patterns.push(Arc::new(regex));
}
Err(e) => {
tracing::warn!("Invalid XSD pattern '{}': {}", pattern, e);
}
}
}
Ok(())
}
pub fn patterns_compiled(&self) -> bool {
self.patterns.is_empty() || !self.compiled_patterns.is_empty()
}
}
pub struct FacetValidator<'a> {
constraints: &'a FacetConstraints,
}
impl<'a> FacetValidator<'a> {
pub fn new(constraints: &'a FacetConstraints) -> Self {
Self { constraints }
}
pub fn validate(&self, value: &str) -> std::result::Result<(), FacetError> {
let processed = self.apply_whitespace(value);
let value = processed.as_deref().unwrap_or(value);
self.validate_length(value)?;
self.validate_patterns(value)?;
self.validate_enumeration(value)?;
self.validate_numeric_constraints(value)?;
Ok(())
}
fn apply_whitespace(&self, value: &str) -> Option<String> {
match self.constraints.whitespace {
WhitespaceHandling::Preserve => None,
WhitespaceHandling::Replace => {
Some(
value
.chars()
.map(|c| {
if c == '\t' || c == '\n' || c == '\r' {
' '
} else {
c
}
})
.collect(),
)
}
WhitespaceHandling::Collapse => {
let replaced: String = value
.chars()
.map(|c| if c.is_whitespace() { ' ' } else { c })
.collect();
let mut result = String::new();
let mut prev_space = true; for c in replaced.chars() {
if c == ' ' {
if !prev_space {
result.push(c);
}
prev_space = true;
} else {
result.push(c);
prev_space = false;
}
}
if result.ends_with(' ') {
result.pop();
}
Some(result)
}
}
}
fn validate_length(&self, value: &str) -> std::result::Result<(), FacetError> {
let len = value.chars().count();
if let Some(exact) = self.constraints.length {
if len != exact {
return Err(FacetError::WrongLength {
value_len: len,
required_len: exact,
});
}
}
if let Some(min) = self.constraints.min_length {
if len < min {
return Err(FacetError::TooShort {
value_len: len,
min_len: min,
});
}
}
if let Some(max) = self.constraints.max_length {
if len > max {
return Err(FacetError::TooLong {
value_len: len,
max_len: max,
});
}
}
Ok(())
}
fn validate_patterns(&self, value: &str) -> std::result::Result<(), FacetError> {
if self.constraints.patterns.is_empty() {
return Ok(());
}
if !self.constraints.compiled_patterns.is_empty() {
for (i, regex) in self.constraints.compiled_patterns.iter().enumerate() {
if !regex.is_match(value) {
return Err(FacetError::PatternMismatch {
value: value.to_string(),
pattern: self
.constraints
.patterns
.get(i)
.cloned()
.unwrap_or_default(),
});
}
}
} else {
for pattern in &self.constraints.patterns {
let anchored = format!("^(?:{})$", pattern);
match Regex::new(&anchored) {
Ok(regex) => {
if !regex.is_match(value) {
return Err(FacetError::PatternMismatch {
value: value.to_string(),
pattern: pattern.clone(),
});
}
}
Err(e) => {
return Err(FacetError::InvalidPattern {
pattern: pattern.clone(),
error: e.to_string(),
});
}
}
}
}
Ok(())
}
fn validate_enumeration(&self, value: &str) -> std::result::Result<(), FacetError> {
if !self.constraints.enumeration.is_empty() && !self.constraints.enumeration.contains(value)
{
return Err(FacetError::NotInEnumeration {
value: value.to_string(),
allowed: self.constraints.enumeration.iter().cloned().collect(),
});
}
Ok(())
}
fn validate_numeric_constraints(&self, value: &str) -> std::result::Result<(), FacetError> {
if let Ok(num) = value.parse::<f64>() {
if let Some(ref min) = self.constraints.min_inclusive {
if let Ok(min_val) = min.parse::<f64>() {
if num < min_val {
return Err(FacetError::BelowMinInclusive {
value: value.to_string(),
min: min.clone(),
});
}
}
}
if let Some(ref max) = self.constraints.max_inclusive {
if let Ok(max_val) = max.parse::<f64>() {
if num > max_val {
return Err(FacetError::AboveMaxInclusive {
value: value.to_string(),
max: max.clone(),
});
}
}
}
if let Some(ref min) = self.constraints.min_exclusive {
if let Ok(min_val) = min.parse::<f64>() {
if num <= min_val {
return Err(FacetError::BelowMinExclusive {
value: value.to_string(),
min: min.clone(),
});
}
}
}
if let Some(ref max) = self.constraints.max_exclusive {
if let Ok(max_val) = max.parse::<f64>() {
if num >= max_val {
return Err(FacetError::AboveMaxExclusive {
value: value.to_string(),
max: max.clone(),
});
}
}
}
}
if let Some(max_digits) = self.constraints.total_digits {
let digit_count = count_significant_digits(value);
if digit_count > max_digits {
return Err(FacetError::TooManyDigits {
found: digit_count,
max: max_digits,
});
}
}
if let Some(max_fraction) = self.constraints.fraction_digits {
let fraction_count = count_fraction_digits(value);
if fraction_count > max_fraction {
return Err(FacetError::TooManyFractionDigits {
found: fraction_count,
max: max_fraction,
});
}
}
Ok(())
}
}
fn count_significant_digits(value: &str) -> usize {
let value = value.trim_start_matches('-').trim_start_matches('+');
let value = value.trim_start_matches('0');
let value = value.replace('.', "");
let value = value.trim_end_matches('0');
value.chars().filter(|c| c.is_ascii_digit()).count()
}
fn count_fraction_digits(value: &str) -> usize {
if let Some(pos) = value.find('.') {
value[pos + 1..]
.chars()
.filter(|c| c.is_ascii_digit())
.count()
} else {
0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_length_validation() {
let constraints = FacetConstraints::new()
.with_min_length(2)
.with_max_length(5);
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("ab").is_ok());
assert!(validator.validate("abcde").is_ok());
assert!(validator.validate("a").is_err());
assert!(validator.validate("abcdef").is_err());
}
#[test]
fn test_exact_length() {
let constraints = FacetConstraints::new().with_length(3);
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("abc").is_ok());
assert!(validator.validate("ab").is_err());
assert!(validator.validate("abcd").is_err());
}
#[test]
fn test_enumeration() {
let constraints = FacetConstraints::new().with_enumeration(["red", "green", "blue"]);
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("red").is_ok());
assert!(validator.validate("green").is_ok());
assert!(validator.validate("yellow").is_err());
}
#[test]
fn test_pattern() {
let mut constraints = FacetConstraints::new().with_pattern(r"[a-z]+");
constraints.compile_patterns().unwrap();
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("hello").is_ok());
assert!(validator.validate("world").is_ok());
assert!(validator.validate("Hello").is_err());
assert!(validator.validate("hello123").is_err());
assert!(validator.validate("").is_err());
assert_eq!(constraints.patterns.len(), 1);
assert_eq!(constraints.compiled_patterns.len(), 1);
}
#[test]
fn test_pattern_multiple() {
let mut constraints = FacetConstraints::new()
.with_pattern(r"[a-z]+")
.with_pattern(r".{3,}"); constraints.compile_patterns().unwrap();
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("hello").is_ok());
assert!(validator.validate("hi").is_err());
assert!(validator.validate("Hello").is_err());
}
#[test]
fn test_numeric_range() {
let constraints = FacetConstraints::new()
.with_min_inclusive("0")
.with_max_inclusive("100");
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("0").is_ok());
assert!(validator.validate("50").is_ok());
assert!(validator.validate("100").is_ok());
assert!(validator.validate("-1").is_err());
assert!(validator.validate("101").is_err());
}
#[test]
fn test_whitespace_collapse() {
let constraints = FacetConstraints::new()
.with_whitespace(WhitespaceHandling::Collapse)
.with_enumeration(["hello world"]);
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("hello world").is_ok());
assert!(validator.validate(" hello world ").is_ok());
}
#[test]
fn test_fraction_digits() {
let constraints = FacetConstraints {
fraction_digits: Some(2),
..Default::default()
};
let validator = FacetValidator::new(&constraints);
assert!(validator.validate("1.23").is_ok());
assert!(validator.validate("1.2").is_ok());
assert!(validator.validate("1").is_ok());
assert!(validator.validate("1.234").is_err());
}
#[test]
fn test_count_significant_digits() {
assert_eq!(count_significant_digits("123"), 3);
assert_eq!(count_significant_digits("1.23"), 3);
assert_eq!(count_significant_digits("0.123"), 3);
assert_eq!(count_significant_digits("-123"), 3);
assert_eq!(count_significant_digits("00123"), 3);
}
#[test]
fn test_count_fraction_digits() {
assert_eq!(count_fraction_digits("1.23"), 2);
assert_eq!(count_fraction_digits("1"), 0);
assert_eq!(count_fraction_digits("1.0"), 1);
assert_eq!(count_fraction_digits("1.234"), 3);
}
}