raxb_validate/
lib.rs

1#![doc = include_str!("../README.md")]
2/*
3#![warn(
4    missing_docs,
5    missing_debug_implementations,
6    missing_copy_implementations,
7    trivial_casts,
8    trivial_numeric_casts,
9    unused_extern_crates,
10    unused_import_braces,
11    unused_qualifications,
12    variant_size_differences
13)]
14*/
15use std::cmp::Ordering;
16use std::collections::HashMap;
17use std::path::PathBuf;
18use std::ptr::null_mut;
19use std::{ffi::CStr, sync::Mutex};
20
21use raxb::quick_xml::events::Event;
22use raxb::quick_xml::NsReader;
23use raxb_libxml2_sys::{
24    _xmlError, xmlCharEncoding_XML_CHAR_ENCODING_UTF8, xmlInitParser,
25    xmlParserInputBufferCreateMem, xmlRegisterInputCallbacks, xmlSAXHandler,
26    xmlSchemaFreeParserCtxt, xmlSchemaFreeValidCtxt, xmlSchemaNewMemParserCtxt,
27    xmlSchemaNewValidCtxt, xmlSchemaParse, xmlSchemaParserCtxtPtr, xmlSchemaPtr,
28    xmlSchemaSetValidStructuredErrors, xmlSchemaValidCtxtPtr, xmlSchemaValidateStream,
29};
30
31use libc::{c_char, c_int, c_void, memcpy, size_t};
32use once_cell::sync::Lazy;
33use raxb_xmlschema::reader::{ReaderError, SchemaBundle, XmlSchemaResolver};
34use thiserror::Error;
35
36#[derive(Clone)]
37pub struct XmlSchemaPtr(pub xmlSchemaPtr);
38unsafe impl Send for XmlSchemaPtr {}
39unsafe impl Sync for XmlSchemaPtr {}
40
41#[derive(Debug, Clone)]
42#[repr(u8)]
43pub enum ErrorLevel {
44    None = 0,
45    Warning,
46    Error,
47    Fatal,
48}
49
50#[derive(Debug, Clone)]
51pub struct XmlValidationErrorEntry {
52    pub message: String,
53    pub line: i32,
54    pub level: ErrorLevel,
55}
56
57impl std::fmt::Display for XmlValidationErrorEntry {
58    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
59        writeln!(
60            f,
61            "{:?} at line {}: {}",
62            self.level, self.line, self.message
63        )?;
64        Ok(())
65    }
66}
67
68#[derive(Debug, Default)]
69pub struct XmlValidationError {
70    pub errors: Vec<XmlValidationErrorEntry>,
71}
72
73impl std::fmt::Display for XmlValidationError {
74    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
75        writeln!(f, "Xml Validation errors:")?;
76        for err in self.errors.iter() {
77            write!(f, "- {err}")?;
78        }
79        Ok(())
80    }
81}
82impl std::error::Error for XmlValidationError {}
83
84#[derive(Error, Debug)]
85pub enum ValidationError {
86    #[error("unable to lock reader")]
87    Lock,
88    #[error("unable to find schema location")]
89    NoSchemaLocation,
90    #[error("unable to find schema with location '{0}'")]
91    SchemaNotFound(String),
92    #[error("libxml2 internal error")]
93    Internal,
94    #[error(transparent)]
95    Validation(#[from] XmlValidationError),
96    #[error(transparent)]
97    Reader(#[from] ReaderError),
98}
99
100pub type ValidationResult<T> = Result<T, ValidationError>;
101
102pub struct InitState;
103
104impl InitState {
105    fn get(&self) -> bool {
106        true
107    }
108}
109static ACTIVE_READER: Lazy<Mutex<()>> = Lazy::new(Mutex::default);
110static INIT_STATE: Lazy<InitState> = Lazy::new(|| {
111    unsafe {
112        xmlInitParser();
113        xmlRegisterInputCallbacks(
114            Some(match_runtime_fn),
115            Some(open_runtime_fn),
116            Some(read_runtime_fn),
117            Some(close_runtime_fn),
118        );
119    }
120    // init_with_runtime_io(match_runtime_fn, open_runtime_fn);
121    InitState
122});
123struct ActiveSchemaResolver(Box<dyn XmlSchemaResolver>);
124static mut ACTIVE_BUNDLE_PTR: *mut ActiveSchemaResolver = null_mut();
125
126#[derive(Debug)]
127struct ReadCtx {
128    remaining_length: c_int,
129    offset: isize,
130    root: *const c_char,
131}
132
133extern "C" fn error_cb(ctx: *mut c_void, error: *const _xmlError) {
134    unsafe {
135        let m = CStr::from_ptr((*error).message);
136        (*(ctx as *mut XmlValidationError))
137            .errors
138            .push(XmlValidationErrorEntry {
139                line: (*error).line,
140                level: match (*error).level {
141                    1 => ErrorLevel::Warning,
142                    2 => ErrorLevel::Error,
143                    3 => ErrorLevel::Fatal,
144                    _ => ErrorLevel::None,
145                },
146                message: m.to_string_lossy().to_string(),
147            });
148    }
149}
150
151#[no_mangle]
152unsafe extern "C" fn match_runtime_fn(filename: *const c_char) -> c_int {
153    let filename_cstr = unsafe { CStr::from_ptr(filename) };
154    let filename = filename_cstr.to_str().unwrap();
155    unsafe {
156        if (*ACTIVE_BUNDLE_PTR).0.resolve(filename).is_some() {
157            return 1;
158        }
159    }
160    0
161}
162
163#[no_mangle]
164extern "C" fn open_runtime_fn(filename: *const c_char) -> *mut c_void {
165    let filename_cstr = unsafe { CStr::from_ptr(filename) };
166    let filename = filename_cstr.to_str().unwrap();
167    unsafe {
168        if let Some(b) = (*ACTIVE_BUNDLE_PTR).0.resolve(filename) {
169            let result = Box::<ReadCtx>::into_raw(Box::new(ReadCtx {
170                root: b.as_ptr() as *const c_char,
171                offset: 0,
172                remaining_length: b.len() as c_int,
173            }));
174            return result as *mut c_void;
175        }
176    }
177    null_mut()
178}
179
180#[no_mangle]
181extern "C" fn read_runtime_fn(context: *mut c_void, buffer: *mut c_char, len: c_int) -> c_int {
182    let mut l = len;
183    unsafe {
184        let ctx = context as *mut ReadCtx;
185        let ptr = (*ctx).root.offset((*ctx).offset) as *mut c_char;
186        if l > (*ctx).remaining_length {
187            l = (*ctx).remaining_length;
188        }
189        memcpy(buffer as *mut c_void, ptr as *mut c_void, l as size_t);
190        (*ctx).remaining_length -= l;
191        (*ctx).offset += l as isize;
192        l
193    }
194}
195
196#[no_mangle]
197extern "C" fn close_runtime_fn(context: *mut c_void) -> c_int {
198    unsafe {
199        let _ = Box::from_raw(context as *mut ReadCtx);
200    }
201    0
202}
203
204pub fn read_schema_bundle<T>(bundle: T) -> ValidationResult<XmlSchemaPtr>
205where
206    T: XmlSchemaResolver + Send + Sync + 'static,
207{
208    if INIT_STATE.get() {
209        let mut schema_resolver = ActiveSchemaResolver(Box::new(bundle));
210        let _active_reader_lock = ACTIVE_READER.lock().map_err(|_| ValidationError::Lock)?;
211        unsafe {
212            ACTIVE_BUNDLE_PTR = &mut schema_resolver as *mut ActiveSchemaResolver;
213            let buffer = (*ACTIVE_BUNDLE_PTR).0.entrypoint();
214            let l = buffer.len() - 1;
215            let parser: xmlSchemaParserCtxtPtr =
216                xmlSchemaNewMemParserCtxt(buffer.as_ptr() as *const c_char, l as c_int);
217            let ptr: xmlSchemaPtr = xmlSchemaParse(parser);
218            xmlSchemaFreeParserCtxt(parser);
219            if ptr.is_null() {
220                return Err(ValidationError::Internal);
221            }
222            Ok(XmlSchemaPtr(ptr))
223        }
224    } else {
225        Err(ValidationError::Internal)
226    }
227}
228
229pub fn validate_xml(xml: &[u8], schema: &XmlSchemaPtr) -> ValidationResult<()> {
230    if INIT_STATE.get() {
231        let mut error_ctx = XmlValidationError::default();
232        let result = unsafe {
233            let buffer = xml.as_ptr() as *const c_char;
234            let len = xml.len() as c_int;
235            let input =
236                xmlParserInputBufferCreateMem(buffer, len, xmlCharEncoding_XML_CHAR_ENCODING_UTF8);
237            let ctx: xmlSchemaValidCtxtPtr = xmlSchemaNewValidCtxt(schema.0);
238            xmlSchemaSetValidStructuredErrors(
239                ctx,
240                Some(error_cb),
241                &mut error_ctx as *mut XmlValidationError as *mut c_void,
242            );
243            let result = xmlSchemaValidateStream(
244                ctx,
245                input,
246                xmlCharEncoding_XML_CHAR_ENCODING_UTF8,
247                null_mut::<xmlSAXHandler>(),
248                null_mut(),
249            );
250            xmlSchemaFreeValidCtxt(ctx);
251            result
252        };
253        match result.cmp(&0) {
254            Ordering::Equal => Ok(()),
255            Ordering::Less => Err(ValidationError::Internal),
256            Ordering::Greater => Err(ValidationError::Validation(error_ctx)),
257        }
258    } else {
259        Err(ValidationError::Internal)
260    }
261}
262
263pub fn find_root_xsi_schema_location(xml: &[u8]) -> Result<String, ValidationError> {
264    let mut reader = NsReader::from_reader(xml);
265    reader.config_mut().trim_text(true);
266    let mut buf = Vec::new();
267    let mut schema_location: Option<String> = None;
268    loop {
269        match reader.read_resolved_event_into(&mut buf) {
270            Ok((_, Event::Start(ref e))) => {
271                schema_location = e.attributes().find_map(|a| {
272                    if let Ok(attr) = a {
273                        if attr.key.local_name().as_ref() == b"schemaLocation"
274                            || attr.key.local_name().as_ref() == b"noNamespaceSchemaLocation"
275                        {
276                            return String::from_utf8(attr.value.to_vec()).ok();
277                        }
278                    }
279                    None
280                });
281                break;
282            }
283            Ok((_, Event::Eof)) => break,
284            _ => (),
285        }
286        buf.clear();
287    }
288    schema_location
289        .ok_or(ValidationError::NoSchemaLocation)
290        .map(|s| {
291            if let Some((a, b)) = s.split_once(' ') {
292                if let Ok(b) = b.parse::<PathBuf>() {
293                    return format!("{} {}", a.trim(), b.file_name().unwrap().to_str().unwrap());
294                }
295            }
296            s
297        })
298}
299
300#[derive(Default)]
301pub struct ValidationMap {
302    inner: HashMap<String, XmlSchemaPtr>,
303}
304
305impl std::fmt::Debug for ValidationMap {
306    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
307        write!(f, "[")?;
308        for key in self.inner.keys() {
309            writeln!(f, "  '{key}',")?
310        }
311        write!(f, "]")
312    }
313}
314
315impl ValidationMap {
316    pub fn try_from_iter(
317        sources: impl Iterator<Item = impl AsRef<[u8]>>,
318    ) -> Result<Self, ValidationError> {
319        let mut inner = HashMap::default();
320        for source in sources {
321            let schema_bundle = SchemaBundle::from_slice(source.as_ref())?;
322            let schema_location = format!("{} {}", schema_bundle.target_ns(), schema_bundle.name());
323            let xml_schema_ptr = read_schema_bundle(schema_bundle)?;
324            inner.insert(schema_location, xml_schema_ptr);
325        }
326        Ok(Self { inner })
327    }
328
329    pub fn validate(&self, xml: &[u8]) -> Result<(), ValidationError> {
330        let xml_root_schema_location = find_root_xsi_schema_location(xml)?;
331        let xml_schema_ptr = self
332            .inner
333            .get(&xml_root_schema_location)
334            .ok_or(ValidationError::SchemaNotFound(xml_root_schema_location))?;
335        validate_xml(xml, xml_schema_ptr)
336    }
337}