1use parking_lot::RwLock;
114use roxmltree::{Document as XmlDocument, Node, ParsingOptions};
115use std::collections::HashMap;
116use std::fmt;
117use std::fs;
118use std::path::{Path, PathBuf};
119use std::sync::Arc;
120
121#[derive(Debug, Clone, PartialEq)]
123pub enum ValidationError {
124 SchemaParseError {
126 message: String,
128 },
129
130 DocumentParseError {
132 message: String,
134 line: Option<usize>,
136 column: Option<usize>,
138 },
139
140 ElementValidationError {
142 element: String,
144 expected: String,
146 found: String,
148 line: Option<usize>,
150 },
151
152 AttributeValidationError {
154 element: String,
156 attribute: String,
158 message: String,
160 line: Option<usize>,
162 },
163
164 TypeValidationError {
166 name: String,
168 expected_type: String,
170 value: String,
172 line: Option<usize>,
174 },
175
176 CardinalityError {
178 element: String,
180 min: usize,
182 max: Option<usize>,
184 actual: usize,
186 line: Option<usize>,
188 },
189
190 RequiredAttributeMissing {
192 element: String,
194 attribute: String,
196 line: Option<usize>,
198 },
199
200 UnknownElement {
202 element: String,
204 line: Option<usize>,
206 },
207
208 SchemaNotFound {
210 path: PathBuf,
212 },
213
214 IoError {
216 message: String,
218 },
219}
220
221impl fmt::Display for ValidationError {
222 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223 match self {
224 ValidationError::SchemaParseError { message } => {
225 write!(f, "Schema parse error: {}", message)
226 }
227 ValidationError::DocumentParseError {
228 message,
229 line,
230 column,
231 } => {
232 write!(f, "Document parse error: {}", message)?;
233 if let Some(l) = line {
234 write!(f, " at line {}", l)?;
235 if let Some(c) = column {
236 write!(f, ", column {}", c)?;
237 }
238 }
239 Ok(())
240 }
241 ValidationError::ElementValidationError {
242 element,
243 expected,
244 found,
245 line,
246 } => {
247 write!(
248 f,
249 "Element validation failed for '{}': expected {}, found '{}'",
250 element, expected, found
251 )?;
252 if let Some(l) = line {
253 write!(f, " at line {}", l)?;
254 }
255 Ok(())
256 }
257 ValidationError::AttributeValidationError {
258 element,
259 attribute,
260 message,
261 line,
262 } => {
263 write!(
264 f,
265 "Attribute validation failed for '{}.{}': {}",
266 element, attribute, message
267 )?;
268 if let Some(l) = line {
269 write!(f, " at line {}", l)?;
270 }
271 Ok(())
272 }
273 ValidationError::TypeValidationError {
274 name,
275 expected_type,
276 value,
277 line,
278 } => {
279 write!(
280 f,
281 "Type validation failed for '{}': expected {}, found '{}'",
282 name, expected_type, value
283 )?;
284 if let Some(l) = line {
285 write!(f, " at line {}", l)?;
286 }
287 Ok(())
288 }
289 ValidationError::CardinalityError {
290 element,
291 min,
292 max,
293 actual,
294 line,
295 } => {
296 write!(
297 f,
298 "Cardinality error for '{}': expected {}..{}, found {}",
299 element,
300 min,
301 max.map_or("unbounded".to_string(), |m| m.to_string()),
302 actual
303 )?;
304 if let Some(l) = line {
305 write!(f, " at line {}", l)?;
306 }
307 Ok(())
308 }
309 ValidationError::RequiredAttributeMissing {
310 element,
311 attribute,
312 line,
313 } => {
314 write!(
315 f,
316 "Required attribute '{}' missing from element '{}'",
317 attribute, element
318 )?;
319 if let Some(l) = line {
320 write!(f, " at line {}", l)?;
321 }
322 Ok(())
323 }
324 ValidationError::UnknownElement { element, line } => {
325 write!(f, "Unknown element '{}' not defined in schema", element)?;
326 if let Some(l) = line {
327 write!(f, " at line {}", l)?;
328 }
329 Ok(())
330 }
331 ValidationError::SchemaNotFound { path } => {
332 write!(f, "Schema file not found: {}", path.display())
333 }
334 ValidationError::IoError { message } => {
335 write!(f, "I/O error: {}", message)
336 }
337 }
338 }
339}
340
341impl std::error::Error for ValidationError {}
342
343#[derive(Debug, Clone)]
345struct Schema {
346 elements: HashMap<String, ElementDef>,
347}
348
349#[derive(Debug, Clone)]
351struct ElementDef {
352 name: String,
353 type_name: Option<String>,
354 complex_type: Option<ComplexType>,
355 min_occurs: usize,
356 max_occurs: Option<usize>,
357}
358
359#[derive(Debug, Clone)]
361struct ComplexType {
362 sequence: Vec<ElementDef>,
363 attributes: Vec<AttributeDef>,
364}
365
366#[derive(Debug, Clone)]
368struct AttributeDef {
369 name: String,
370 type_name: String,
371 required: bool,
372}
373
374#[derive(Debug, Clone)]
378pub struct SchemaValidator {
379 schema: Schema,
380}
381
382impl SchemaValidator {
383 pub fn from_xsd(xsd: &str) -> Result<Self, ValidationError> {
407 let schema = Self::parse_xsd(xsd)?;
408 Ok(Self { schema })
409 }
410
411 fn parse_xsd(xsd: &str) -> Result<Schema, ValidationError> {
413 let options = ParsingOptions {
415 allow_dtd: false, ..Default::default()
417 };
418
419 let doc = XmlDocument::parse_with_options(xsd, options).map_err(|e| {
420 ValidationError::SchemaParseError {
421 message: e.to_string(),
422 }
423 })?;
424
425 let root = doc.root_element();
426
427 if root.tag_name().name() != "schema" {
429 return Err(ValidationError::SchemaParseError {
430 message: "Root element must be <xs:schema>".to_string(),
431 });
432 }
433
434 let mut elements = HashMap::new();
435
436 for child in root.children().filter(|n| n.is_element()) {
438 if child.tag_name().name() == "element" {
439 let elem_def = Self::parse_element(&child)?;
440 elements.insert(elem_def.name.clone(), elem_def);
441 }
442 }
443
444 Ok(Schema { elements })
445 }
446
447 fn parse_element(node: &Node<'_, '_>) -> Result<ElementDef, ValidationError> {
449 let name = node
450 .attribute("name")
451 .ok_or_else(|| ValidationError::SchemaParseError {
452 message: "Element must have 'name' attribute".to_string(),
453 })?
454 .to_string();
455
456 let type_name = node.attribute("type").map(|s| s.to_string());
457 let min_occurs = node
458 .attribute("minOccurs")
459 .and_then(|s| s.parse::<usize>().ok())
460 .unwrap_or(1);
461 let max_occurs = node.attribute("maxOccurs").and_then(|s| {
462 if s == "unbounded" {
463 None
464 } else {
465 s.parse::<usize>().ok()
466 }
467 });
468
469 let mut complex_type = None;
471 for child in node.children().filter(|n| n.is_element()) {
472 if child.tag_name().name() == "complexType" {
473 complex_type = Some(Self::parse_complex_type(&child)?);
474 break;
475 }
476 }
477
478 Ok(ElementDef {
479 name,
480 type_name,
481 complex_type,
482 min_occurs,
483 max_occurs,
484 })
485 }
486
487 fn parse_complex_type(node: &Node<'_, '_>) -> Result<ComplexType, ValidationError> {
489 let mut sequence = Vec::new();
490 let mut attributes = Vec::new();
491
492 for child in node.children().filter(|n| n.is_element()) {
493 match child.tag_name().name() {
494 "sequence" => {
495 for elem_node in child.children().filter(|n| n.is_element()) {
496 if elem_node.tag_name().name() == "element" {
497 sequence.push(Self::parse_element(&elem_node)?);
498 }
499 }
500 }
501 "attribute" => {
502 attributes.push(Self::parse_attribute(&child)?);
503 }
504 _ => {}
505 }
506 }
507
508 Ok(ComplexType {
509 sequence,
510 attributes,
511 })
512 }
513
514 fn parse_attribute(node: &Node<'_, '_>) -> Result<AttributeDef, ValidationError> {
516 let name = node
517 .attribute("name")
518 .ok_or_else(|| ValidationError::SchemaParseError {
519 message: "Attribute must have 'name' attribute".to_string(),
520 })?
521 .to_string();
522
523 let type_name = node.attribute("type").unwrap_or("xs:string").to_string();
524
525 let required = node.attribute("use") == Some("required");
526
527 Ok(AttributeDef {
528 name,
529 type_name,
530 required,
531 })
532 }
533
534 pub fn from_file(path: &Path) -> Result<Self, ValidationError> {
556 if !path.exists() {
557 return Err(ValidationError::SchemaNotFound {
558 path: path.to_path_buf(),
559 });
560 }
561
562 let content = fs::read_to_string(path).map_err(|e| ValidationError::IoError {
563 message: e.to_string(),
564 })?;
565
566 Self::from_xsd(&content)
567 }
568
569 pub fn validate(&self, xml: &str) -> Result<(), ValidationError> {
596 let options = ParsingOptions {
598 allow_dtd: false, ..Default::default()
600 };
601
602 let doc = XmlDocument::parse_with_options(xml, options).map_err(|e| {
603 ValidationError::DocumentParseError {
604 message: e.to_string(),
605 line: None,
606 column: None,
607 }
608 })?;
609
610 let root = doc.root_element();
611 let root_name = root.tag_name().name();
612
613 let schema_elem =
615 self.schema
616 .elements
617 .get(root_name)
618 .ok_or_else(|| ValidationError::UnknownElement {
619 element: root_name.to_string(),
620 line: Some(doc.text_pos_at(root.range().start).row as usize),
621 })?;
622
623 self.validate_element(&root, schema_elem)?;
624
625 Ok(())
626 }
627
628 fn validate_element(
630 &self,
631 node: &Node<'_, '_>,
632 schema_elem: &ElementDef,
633 ) -> Result<(), ValidationError> {
634 let line = node.document().text_pos_at(node.range().start).row as usize;
635
636 if let Some(ref type_name) = schema_elem.type_name {
638 self.validate_type(node, type_name, line)?;
639 }
640
641 if let Some(ref complex_type) = schema_elem.complex_type {
643 self.validate_attributes_complex(node, complex_type, line)?;
645
646 self.validate_children_complex(node, complex_type, line)?;
648 }
649
650 Ok(())
651 }
652
653 fn validate_type(
655 &self,
656 node: &Node<'_, '_>,
657 type_ref: &str,
658 line: usize,
659 ) -> Result<(), ValidationError> {
660 let text = node.text().unwrap_or("");
661
662 match type_ref {
664 "xs:string" | "string" => {
665 }
667 "xs:integer" | "integer" => {
668 if text.parse::<i64>().is_err() {
669 return Err(ValidationError::TypeValidationError {
670 name: node.tag_name().name().to_string(),
671 expected_type: "xs:integer".to_string(),
672 value: text.to_string(),
673 line: Some(line),
674 });
675 }
676 }
677 "xs:decimal" | "decimal" => {
678 if text.parse::<f64>().is_err() {
679 return Err(ValidationError::TypeValidationError {
680 name: node.tag_name().name().to_string(),
681 expected_type: "xs:decimal".to_string(),
682 value: text.to_string(),
683 line: Some(line),
684 });
685 }
686 }
687 "xs:boolean" | "boolean" => {
688 if !["true", "false", "1", "0"].contains(&text) {
689 return Err(ValidationError::TypeValidationError {
690 name: node.tag_name().name().to_string(),
691 expected_type: "xs:boolean".to_string(),
692 value: text.to_string(),
693 line: Some(line),
694 });
695 }
696 }
697 _ => {
698 }
700 }
701
702 Ok(())
703 }
704
705 fn validate_attributes_complex(
707 &self,
708 node: &Node<'_, '_>,
709 complex_type: &ComplexType,
710 line: usize,
711 ) -> Result<(), ValidationError> {
712 let element_name = node.tag_name().name();
713
714 for attr_def in &complex_type.attributes {
716 if attr_def.required && node.attribute(attr_def.name.as_str()).is_none() {
717 return Err(ValidationError::RequiredAttributeMissing {
718 element: element_name.to_string(),
719 attribute: attr_def.name.clone(),
720 line: Some(line),
721 });
722 }
723
724 if let Some(value) = node.attribute(attr_def.name.as_str()) {
726 self.validate_simple_type(value, &attr_def.type_name)
727 .map_err(|_| ValidationError::AttributeValidationError {
728 element: element_name.to_string(),
729 attribute: attr_def.name.clone(),
730 message: format!("Expected type {}, found '{}'", attr_def.type_name, value),
731 line: Some(line),
732 })?;
733 }
734 }
735
736 Ok(())
737 }
738
739 fn validate_children_complex(
741 &self,
742 node: &Node<'_, '_>,
743 complex_type: &ComplexType,
744 line: usize,
745 ) -> Result<(), ValidationError> {
746 let children: Vec<_> = node.children().filter(|n| n.is_element()).collect();
747
748 for child in &children {
750 let child_name = child.tag_name().name();
751
752 let schema_elem = complex_type
754 .sequence
755 .iter()
756 .find(|e| e.name == child_name)
757 .ok_or_else(|| ValidationError::UnknownElement {
758 element: child_name.to_string(),
759 line: Some(child.document().text_pos_at(child.range().start).row as usize),
760 })?;
761
762 self.validate_element(child, schema_elem)?;
763 }
764
765 for elem_def in &complex_type.sequence {
767 let count = children
768 .iter()
769 .filter(|n| n.tag_name().name() == elem_def.name)
770 .count();
771
772 if count < elem_def.min_occurs {
773 return Err(ValidationError::CardinalityError {
774 element: elem_def.name.clone(),
775 min: elem_def.min_occurs,
776 max: elem_def.max_occurs,
777 actual: count,
778 line: Some(line),
779 });
780 }
781
782 if let Some(max) = elem_def.max_occurs {
783 if count > max {
784 return Err(ValidationError::CardinalityError {
785 element: elem_def.name.clone(),
786 min: elem_def.min_occurs,
787 max: elem_def.max_occurs,
788 actual: count,
789 line: Some(line),
790 });
791 }
792 }
793 }
794
795 Ok(())
796 }
797
798 fn validate_simple_type(&self, value: &str, type_name: &str) -> Result<(), ()> {
800 match type_name {
801 "xs:string" | "string" => Ok(()),
802 "xs:integer" | "integer" => value.parse::<i64>().map(|_| ()).map_err(|_| ()),
803 "xs:decimal" | "decimal" => value.parse::<f64>().map(|_| ()).map_err(|_| ()),
804 "xs:boolean" | "boolean" => {
805 if ["true", "false", "1", "0"].contains(&value) {
806 Ok(())
807 } else {
808 Err(())
809 }
810 }
811 _ => Ok(()), }
813 }
814}
815
816pub struct SchemaCache {
836 cache: Arc<RwLock<HashMap<PathBuf, Arc<SchemaValidator>>>>,
837 max_size: usize,
838}
839
840impl SchemaCache {
841 pub fn new(max_size: usize) -> Self {
855 Self {
856 cache: Arc::new(RwLock::new(HashMap::new())),
857 max_size,
858 }
859 }
860
861 pub fn get_or_load(&self, path: &Path) -> Result<Arc<SchemaValidator>, ValidationError> {
885 {
887 let cache = self.cache.read();
888 if let Some(validator) = cache.get(path) {
889 return Ok(Arc::clone(validator));
890 }
891 }
892
893 let mut cache = self.cache.write();
895
896 if let Some(validator) = cache.get(path) {
898 return Ok(Arc::clone(validator));
899 }
900
901 let validator = Arc::new(SchemaValidator::from_file(path)?);
903
904 if cache.len() >= self.max_size {
906 if let Some(oldest_key) = cache.keys().next().cloned() {
907 cache.remove(&oldest_key);
908 }
909 }
910
911 cache.insert(path.to_path_buf(), Arc::clone(&validator));
912
913 Ok(validator)
914 }
915
916 pub fn clear(&self) {
927 self.cache.write().clear();
928 }
929
930 pub fn size(&self) -> usize {
941 self.cache.read().len()
942 }
943}
944
945impl Default for SchemaCache {
946 fn default() -> Self {
948 Self::new(100)
949 }
950}
951
952#[cfg(test)]
953mod tests {
954 use super::*;
955
956 const SIMPLE_SCHEMA: &str = r#"<?xml version="1.0"?>
957<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
958 <xs:element name="person">
959 <xs:complexType>
960 <xs:sequence>
961 <xs:element name="name" type="xs:string"/>
962 <xs:element name="age" type="xs:integer"/>
963 </xs:sequence>
964 </xs:complexType>
965 </xs:element>
966</xs:schema>"#;
967
968 #[test]
969 fn test_schema_validator_creation() {
970 let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA);
971 assert!(validator.is_ok());
972 }
973
974 #[test]
975 fn test_valid_document() {
976 let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
977
978 let xml = r#"<?xml version="1.0"?>
979<person>
980 <name>Alice</name>
981 <age>30</age>
982</person>"#;
983
984 assert!(validator.validate(xml).is_ok());
985 }
986
987 #[test]
988 fn test_invalid_type() {
989 let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
990
991 let xml = r#"<?xml version="1.0"?>
992<person>
993 <name>Alice</name>
994 <age>thirty</age>
995</person>"#;
996
997 let result = validator.validate(xml);
998 assert!(result.is_err());
999
1000 if let Err(ValidationError::TypeValidationError {
1001 name,
1002 expected_type,
1003 value,
1004 ..
1005 }) = result
1006 {
1007 assert_eq!(name, "age");
1008 assert_eq!(expected_type, "xs:integer");
1009 assert_eq!(value, "thirty");
1010 } else {
1011 panic!("Expected TypeValidationError");
1012 }
1013 }
1014
1015 #[test]
1016 fn test_unknown_element() {
1017 let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1018
1019 let xml = r#"<?xml version="1.0"?>
1020<person>
1021 <name>Alice</name>
1022 <age>30</age>
1023 <email>alice@example.com</email>
1024</person>"#;
1025
1026 let result = validator.validate(xml);
1027 assert!(result.is_err());
1028
1029 if let Err(ValidationError::UnknownElement { element, .. }) = result {
1030 assert_eq!(element, "email");
1031 } else {
1032 panic!("Expected UnknownElement error");
1033 }
1034 }
1035
1036 #[test]
1037 fn test_malformed_xml() {
1038 let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1039
1040 let xml = r#"<?xml version="1.0"?>
1041<person>
1042 <name>Alice
1043 <age>30</age>
1044</person>"#;
1045
1046 let result = validator.validate(xml);
1047 assert!(result.is_err());
1048 assert!(matches!(
1049 result,
1050 Err(ValidationError::DocumentParseError { .. })
1051 ));
1052 }
1053
1054 #[test]
1055 fn test_schema_cache() {
1056 use std::io::Write;
1057 use tempfile::NamedTempFile;
1058
1059 let cache = SchemaCache::new(5);
1060 assert_eq!(cache.size(), 0);
1061
1062 let mut temp_file = NamedTempFile::new().unwrap();
1064 temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1065 let path = temp_file.path();
1066
1067 let validator1 = cache.get_or_load(path).unwrap();
1069 assert_eq!(cache.size(), 1);
1070
1071 let validator2 = cache.get_or_load(path).unwrap();
1073 assert_eq!(cache.size(), 1);
1074
1075 assert!(Arc::ptr_eq(&validator1, &validator2));
1077
1078 cache.clear();
1080 assert_eq!(cache.size(), 0);
1081 }
1082
1083 #[test]
1084 fn test_cache_eviction() {
1085 use std::io::Write;
1086 use tempfile::NamedTempFile;
1087
1088 let cache = SchemaCache::new(2);
1089
1090 let mut files = vec![];
1092 for _ in 0..3 {
1093 let mut temp_file = NamedTempFile::new().unwrap();
1094 temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1095 files.push(temp_file);
1096 }
1097
1098 cache.get_or_load(files[0].path()).unwrap();
1100 cache.get_or_load(files[1].path()).unwrap();
1101 assert_eq!(cache.size(), 2);
1102
1103 cache.get_or_load(files[2].path()).unwrap();
1105 assert_eq!(cache.size(), 2);
1106 }
1107
1108 #[test]
1109 fn test_error_display() {
1110 let err = ValidationError::TypeValidationError {
1111 name: "age".to_string(),
1112 expected_type: "xs:integer".to_string(),
1113 value: "thirty".to_string(),
1114 line: Some(5),
1115 };
1116
1117 let display = err.to_string();
1118 assert!(display.contains("age"));
1119 assert!(display.contains("xs:integer"));
1120 assert!(display.contains("thirty"));
1121 assert!(display.contains("line 5"));
1122 }
1123
1124 #[test]
1125 fn test_schema_not_found() {
1126 let result = SchemaValidator::from_file(Path::new("/nonexistent/schema.xsd"));
1127 assert!(result.is_err());
1128 assert!(matches!(
1129 result,
1130 Err(ValidationError::SchemaNotFound { .. })
1131 ));
1132 }
1133
1134 #[test]
1135 fn test_invalid_schema() {
1136 let invalid_schema = r#"<?xml version="1.0"?>
1137<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1138 <xs:element name="broken" type="nonexistent:type"/>
1139</xs:schema>"#;
1140
1141 let _result = SchemaValidator::from_xsd(invalid_schema);
1142 }
1145
1146 #[test]
1147 fn test_boolean_type_validation() {
1148 let schema = r#"<?xml version="1.0"?>
1149<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1150 <xs:element name="flag" type="xs:boolean"/>
1151</xs:schema>"#;
1152
1153 let validator = SchemaValidator::from_xsd(schema).unwrap();
1154
1155 for val in &["true", "false", "1", "0"] {
1157 let xml = format!(r#"<?xml version="1.0"?><flag>{}</flag>"#, val);
1158 assert!(validator.validate(&xml).is_ok());
1159 }
1160
1161 let xml = r#"<?xml version="1.0"?><flag>yes</flag>"#;
1163 assert!(validator.validate(xml).is_err());
1164 }
1165
1166 #[test]
1167 fn test_decimal_type_validation() {
1168 let schema = r#"<?xml version="1.0"?>
1169<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1170 <xs:element name="price" type="xs:decimal"/>
1171</xs:schema>"#;
1172
1173 let validator = SchemaValidator::from_xsd(schema).unwrap();
1174
1175 let xml = r#"<?xml version="1.0"?><price>19.99</price>"#;
1177 assert!(validator.validate(xml).is_ok());
1178
1179 let xml = r#"<?xml version="1.0"?><price>not a number</price>"#;
1181 assert!(validator.validate(xml).is_err());
1182 }
1183
1184 #[test]
1185 fn test_concurrent_cache_access() {
1186 use std::io::Write;
1187 use std::sync::Arc;
1188 use std::thread;
1189 use tempfile::NamedTempFile;
1190
1191 let cache = Arc::new(SchemaCache::new(10));
1192
1193 let mut temp_file = NamedTempFile::new().unwrap();
1195 temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1196 let path = temp_file.path().to_path_buf();
1197
1198 let mut handles = vec![];
1200 for _ in 0..10 {
1201 let cache_clone = Arc::clone(&cache);
1202 let path_clone = path.clone();
1203 let handle = thread::spawn(move || {
1204 for _ in 0..100 {
1205 let _validator = cache_clone.get_or_load(&path_clone).unwrap();
1206 }
1207 });
1208 handles.push(handle);
1209 }
1210
1211 for handle in handles {
1213 handle.join().unwrap();
1214 }
1215
1216 assert_eq!(cache.size(), 1);
1218 }
1219}