#[cfg(test)]
use rxml_validation::selectors::{CodepointRange, CodepointRanges};
pub trait ByteSelect {
fn select(&self, b: u8) -> bool;
}
#[cfg(test)]
pub(crate) struct AnyByte();
#[cfg(test)]
impl ByteSelect for AnyByte {
fn select(&self, _b: u8) -> bool {
true
}
}
impl ByteSelect for u8 {
fn select(&self, b: u8) -> bool {
*self == b
}
}
impl<T: Fn(u8) -> bool> ByteSelect for T {
fn select(&self, b: u8) -> bool {
self(b)
}
}
pub fn is_space(b: u8) -> bool {
b == b' ' || b == b'\n' || b == b'\t' || b == b'\r'
}
fn is_text_delimiter(b: u8) -> bool {
b == b'\r' || b == b'&' || b == b'<' || b == b']' || b < 0x09 || (b > 0x0a && b < 0x20)
}
pub fn maybe_text(b: u8) -> bool {
!is_text_delimiter(b)
}
fn is_cdata_content_delimiter(b: u8) -> bool {
b == b'\r' || b == b']' || b < 0x09 || (b > 0x0a && b < 0x20)
}
pub fn maybe_cdata_content(b: u8) -> bool {
!is_cdata_content_delimiter(b)
}
fn is_comment_content_delimiter(b: u8) -> bool {
b == b'-' || b == b'\r' || b < 0x09 || (b > 0x0a && b < 0x20)
}
pub fn maybe_comment_content(b: u8) -> bool {
!is_comment_content_delimiter(b)
}
fn is_name_delimiter(b: u8) -> bool {
if b == b':' || b == b'-' || b == b'.' || b == b'_' {
return false;
}
if b >= b'a' && b <= b'z' {
return false;
}
if b >= b'0' && b <= b'9' {
return false;
}
if b >= b'A' && b <= b'Z' {
return false;
}
if b >= 0x80 {
return false;
}
true
}
pub fn maybe_name(b: u8) -> bool {
!is_name_delimiter(b)
}
fn is_attval_apos_delimiter(b: u8) -> bool {
b < 0x20 || b == b'&' || b == b'\'' || b == b'<'
}
pub fn maybe_attval_apos(b: u8) -> bool {
!is_attval_apos_delimiter(b)
}
fn is_attval_quot_delimiter(b: u8) -> bool {
b < 0x20 || b == b'&' || b == b'"' || b == b'<'
}
pub fn maybe_attval_quot(b: u8) -> bool {
!is_attval_quot_delimiter(b)
}
pub fn is_nonchar_byte(b: u8) -> bool {
b <= 0x08 || b == 0x0b || b == 0x0c || (b >= 0x0e && b <= 0x1f)
}
pub fn is_decimal_digit(b: u8) -> bool {
b >= b'0' && b <= b'9'
}
pub fn is_hexadecimal_digit(b: u8) -> bool {
(b >= b'0' && b <= b'9') || (b >= b'a' && b <= b'f') || (b >= b'A' && b <= b'F')
}
#[cfg(test)]
const VALID_XML_CDATA_RANGES_TEXT_DELIMITED: &'static [CodepointRange] = &[
CodepointRange('\x09', '\x0a'),
CodepointRange('\u{0020}', '\u{0025}'), CodepointRange('\u{0027}', '\u{003b}'), CodepointRange('\u{003d}', '\u{005c}'), CodepointRange('\u{005e}', '\u{d7ff}'),
CodepointRange('\u{e000}', '\u{fffd}'),
CodepointRange('\u{10000}', '\u{10ffff}'),
];
#[cfg(test)]
const VALID_XML_CDATA_RANGES_ATT_APOS_DELIMITED: &'static [CodepointRange] = &[
CodepointRange('\u{0020}', '\u{0025}'), CodepointRange('\u{0028}', '\u{003b}'), CodepointRange('\u{003d}', '\u{d7ff}'),
CodepointRange('\u{e000}', '\u{fffd}'),
CodepointRange('\u{10000}', '\u{10ffff}'),
];
#[cfg(test)]
const VALID_XML_CDATA_RANGES_ATT_QUOT_DELIMITED: &'static [CodepointRange] = &[
CodepointRange('\u{0020}', '\u{0021}'), CodepointRange('\u{0023}', '\u{0025}'), CodepointRange('\u{0027}', '\u{003b}'), CodepointRange('\u{003d}', '\u{d7ff}'),
CodepointRange('\u{e000}', '\u{fffd}'),
CodepointRange('\u{10000}', '\u{10ffff}'),
];
#[cfg(test)]
const VALID_XML_CDATA_RANGES_CDATASECTION_DELIMITED: &'static [CodepointRange] = &[
CodepointRange('\x09', '\x0a'),
CodepointRange('\u{0020}', '\u{005c}'), CodepointRange('\u{005e}', '\u{d7ff}'),
CodepointRange('\u{e000}', '\u{fffd}'),
CodepointRange('\u{10000}', '\u{10ffff}'),
];
#[cfg(test)]
static CLASS_XML_CDATA_SECTION_CONTENTS_DELIMITED: CodepointRanges =
CodepointRanges(VALID_XML_CDATA_RANGES_CDATASECTION_DELIMITED);
#[cfg(test)]
mod tests {
use super::*;
use rxml_validation::selectors::*;
#[test]
fn test_nonchar_byte_range_is_superset_of_nonchar_codepoint_range() {
let mut buf = [0u8; 4];
for cp in 0x0..=0x10ffffu32 {
if let Some(ch) = char::from_u32(cp) {
let s = ch.encode_utf8(&mut buf[..]);
if !CLASS_XML_NONCHAR.select(ch) {
let mut ok = true;
for b in s.as_bytes() {
if is_nonchar_byte(*b) {
ok = false;
}
}
if !ok {
panic!("byte selector rejects any byte of U+{:04x}", cp);
}
}
}
}
}
#[test]
fn test_text_delimited_byte_range_is_superset_of_text_delimited_codepoint_range() {
let class = &CodepointRanges(VALID_XML_CDATA_RANGES_TEXT_DELIMITED);
let mut buf = [0u8; 4];
for cp in 0x0..=0x10ffffu32 {
if let Some(ch) = char::from_u32(cp) {
let s = ch.encode_utf8(&mut buf[..]);
for b in s.as_bytes() {
if class.select(ch) && is_text_delimiter(*b) {
panic!(
"byte selector rejects byte 0x{:02x}, which is a utf-8 byte of U+{:04x}",
*b, cp
);
}
}
}
}
}
#[test]
fn test_cdata_delimited_byte_range_is_superset_of_codepoint_range() {
let mut buf = [0u8; 4];
for cp in 0x0..=0x10ffffu32 {
if let Some(ch) = char::from_u32(cp) {
let s = ch.encode_utf8(&mut buf[..]);
for b in s.as_bytes() {
if CLASS_XML_CDATA_SECTION_CONTENTS_DELIMITED.select(ch)
&& is_cdata_content_delimiter(*b)
{
panic!(
"byte selector rejects byte 0x{:02x}, which is a utf-8 byte of U+{:04x}",
*b, cp
);
}
}
}
}
}
#[test]
fn test_name_byte_range_is_superset_of_codepoint_range() {
let mut buf = [0u8; 4];
for cp in 0x0..=0x10ffffu32 {
if let Some(ch) = char::from_u32(cp) {
let s = ch.encode_utf8(&mut buf[..]);
for b in s.as_bytes() {
if CLASS_XML_NAME.select(ch) && is_name_delimiter(*b) {
panic!(
"byte selector rejects byte 0x{:02x}, which is a utf-8 byte of U+{:04x}",
*b, cp
);
}
}
}
}
}
#[test]
fn test_att_apos_delimited_range_is_superset_of_codepoint_range() {
let class = CodepointRanges(VALID_XML_CDATA_RANGES_ATT_APOS_DELIMITED);
let mut buf = [0u8; 4];
for cp in 0x0..=0x10ffffu32 {
if let Some(ch) = char::from_u32(cp) {
let s = ch.encode_utf8(&mut buf[..]);
for b in s.as_bytes() {
if class.select(ch) && is_attval_apos_delimiter(*b) {
panic!(
"byte selector rejects byte 0x{:02x}, which is a utf-8 byte of U+{:04x}",
*b, cp
);
}
}
}
}
}
#[test]
fn test_att_quot_delimited_range_is_superset_of_codepoint_range() {
let class = CodepointRanges(VALID_XML_CDATA_RANGES_ATT_QUOT_DELIMITED);
let mut buf = [0u8; 4];
for cp in 0x0..=0x10ffffu32 {
if let Some(ch) = char::from_u32(cp) {
let s = ch.encode_utf8(&mut buf[..]);
for b in s.as_bytes() {
if class.select(ch) && is_attval_quot_delimiter(*b) {
panic!(
"byte selector rejects byte 0x{:02x}, which is a utf-8 byte of U+{:04x}",
*b, cp
);
}
}
}
}
}
#[test]
fn test_comment_delimiter() {
assert!(is_comment_content_delimiter(b'\0'));
assert!(is_comment_content_delimiter(b'-'));
assert!(is_comment_content_delimiter(b'\r'));
assert!(!is_comment_content_delimiter(b'\n'));
assert!(!is_comment_content_delimiter(b'\t'));
assert!(!is_comment_content_delimiter(b' '));
}
}