#![deny(missing_docs)]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_cfg))]
use core::fmt;
mod strings;
pub mod selectors;
use selectors::CharSelector;
#[doc(inline)]
#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
pub use strings::{CompactString, Name, NcName};
#[doc(inline)]
pub use strings::{NameStr, NcNameStr};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Error {
EmptyName,
InvalidChar(char),
EmptyNamePart,
MultiColonName,
InvalidLocalName,
}
impl fmt::Display for Error {
fn fmt<'f>(&self, f: &'f mut fmt::Formatter) -> fmt::Result {
match self {
Self::EmptyName => f.write_str("Name and NCName must not be empty"),
Self::InvalidChar(c) => write!(f, "character U+{:04x} is not allowed", *c as u32),
Self::EmptyNamePart => f.write_str("empty string on one side of the colon"),
Self::MultiColonName => f.write_str("more than one colon"),
Self::InvalidLocalName => f.write_str("local name is invalid"),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {}
pub fn validate_name(s: &str) -> Result<(), Error> {
let mut chars = s.chars();
match chars.next() {
None => return Err(Error::EmptyName),
Some(c) => {
if !selectors::CLASS_XML_NAMESTART.select(c) {
return Err(Error::InvalidChar(c));
}
}
}
for ch in chars {
if !selectors::CLASS_XML_NAME.select(ch) {
return Err(Error::InvalidChar(ch));
}
}
Ok(())
}
pub fn validate_ncname(s: &str) -> Result<(), Error> {
let mut chars = s.chars();
match chars.next() {
None => return Err(Error::EmptyName),
Some(c) => {
if !selectors::CLASS_XML_NAMESTART.select(c) || c == ':' {
return Err(Error::InvalidChar(c));
}
}
}
for ch in chars {
if !selectors::CLASS_XML_NAME.select(ch) || ch == ':' {
return Err(Error::InvalidChar(ch));
}
}
Ok(())
}
pub fn validate_cdata(s: &str) -> Result<(), Error> {
let s = s.as_bytes();
for i in 0..s.len() {
let b = s[i];
if b < 0x09 || b == 0x0b || b == 0x0c || (b >= 0x0e && b <= 0x1f) {
return Err(Error::InvalidChar(b.into()));
}
if b == 0xbe || b == 0xbf {
if i >= 2 && s[i - 2] == 0xef && s[i - 1] == 0xbf {
let bit = (b & 0x01) as u32;
let ch = unsafe { char::from_u32_unchecked(0xfffe | bit) };
return Err(Error::InvalidChar(ch));
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cdata_smoketest() {
assert!(validate_cdata("foo bar baz http://<xyz>").is_ok());
assert!(validate_cdata("\u{ffff}").is_err());
}
#[test]
fn test_name_smoketest() {
assert!(validate_name("foobar").is_ok());
assert!(validate_name("foo:bar").is_ok());
assert!(validate_name("").is_err());
assert!(validate_name("foo bar baz http://<xyz>").is_err());
assert!(validate_name("\u{ffff}").is_err());
}
#[test]
fn test_ncname_smoketest() {
assert!(validate_ncname("foobar").is_ok());
assert!(validate_ncname("foo:bar").is_err());
assert!(validate_ncname("").is_err());
assert!(validate_ncname("foo bar baz http://<xyz>").is_err());
assert!(validate_ncname("\u{ffff}").is_err());
}
#[test]
fn test_validate_cdata_is_equivalent_to_nonchar_class() {
let mut buf = String::with_capacity(4);
for cp in 0x0..=0x10ffffu32 {
if let Some(ch) = std::char::from_u32(cp) {
buf.clear();
buf.push(ch);
if selectors::CLASS_XML_NONCHAR.select(ch) {
match validate_cdata(&buf) {
Err(Error::InvalidChar(v)) => {
assert_eq!(v, ch);
}
other => panic!("validate_cdata accepts {:?} (ch={:?}) which is rejected by CLASS_XML_NONCHAR: {:?}", buf, ch, other),
}
} else {
match validate_cdata(&buf) {
Ok(()) => (),
other => panic!("validate_cdata rejects {:?} (ch={:?}) which is accepted by CLASS_XML_NONCHAR: {:?}", buf, ch, other),
}
}
}
}
}
}