use std::str::from_utf8;
use super::error::description::*;
use super::*;
struct Tester<'a> {
expected: &'a [SaxElement<'a>],
current: usize,
cdata_buf: String,
}
impl<'a> Tester<'a> {
fn new(expected: &'a [SaxElement<'a>]) -> Self {
Tester {
expected,
current: 0,
cdata_buf: String::new(),
}
}
fn check_element(&mut self, element: &SaxElement) {
assert!(self.current < self.expected.len());
if let SaxElement::CData(cdata) = element {
from_utf8(cdata.as_bytes()).unwrap();
if let SaxElement::CData(cdata2) = self.expected[self.current] {
self.cdata_buf.push_str(cdata);
if self.cdata_buf.len() >= cdata2.len() {
assert_eq!(self.cdata_buf, cdata2);
self.current += 1;
self.cdata_buf.clear();
}
} else {
assert_eq!(element, &self.expected[self.current]);
}
} else {
assert_eq!(element, &self.expected[self.current]);
self.current += 1;
}
}
fn check(&mut self, s: &str) {
let nr_lines = s.matches("\n").count();
let nr_column = s.lines().last().unwrap().len();
let mut parser = SaxParser::new();
let mut elements = parser.elements(&s.as_bytes());
while let Some(element) = elements.next() {
let element = element.expect(&format!("Failed to parse {}", s));
self.check_element(&element);
}
assert_eq!(parser.parse_finish(), Ok(()));
assert_eq!(self.current, self.expected.len());
let location = parser.location();
assert_eq!(location.lines, nr_lines);
assert_eq!(location.column, nr_column);
assert_eq!(location.bytes, s.len());
parser.reset();
self.current = 0;
self.cdata_buf.clear();
for i in 0..s.len() {
let mut elements = parser.elements(&s.as_bytes()[i..i + 1]);
while let Some(element) = elements.next() {
let element = element.expect(&format!("Failed to parse {}", s));
self.check_element(&element);
}
}
assert_eq!(parser.parse_finish(), Ok(()));
assert_eq!(self.current, self.expected.len());
let location = parser.location();
assert_eq!(location.lines, nr_lines);
assert_eq!(location.column, nr_column);
assert_eq!(location.bytes, s.len());
}
}
struct BadTester {
bad_byte: usize,
err: ParseError,
}
impl BadTester {
fn new(bad_byte: usize, msg: &'static str) -> BadTester {
BadTester {
bad_byte,
err: ParseError::BadXml(msg),
}
}
fn check(&mut self, s: &str) {
let mut parser = SaxParser::new();
let mut elements = parser.elements(&s.as_bytes());
while let Some(element) = elements.next() {
if let Err(err) = element {
assert_eq!(err, self.err);
assert_eq!(parser.location().bytes, self.bad_byte);
return;
}
}
assert_eq!(parser.parse_finish(), Err(self.err));
}
fn check_bytes(&mut self, bytes: &[u8]) {
let mut parser = SaxParser::new();
let mut elements = parser.elements(bytes);
while let Some(element) = elements.next() {
if let Err(err) = element {
assert_eq!(err, self.err);
assert_eq!(parser.location().bytes, self.bad_byte);
return;
}
}
assert_eq!(parser.parse_finish(), Err(self.err));
}
}
#[test]
fn tags() {
Tester::new(&[SaxElement::StartTag("lonely"), SaxElement::StartTagEmpty]).check("<lonely/>");
Tester::new(&[SaxElement::StartTag("lonely"), SaxElement::StartTagEmpty])
.check(" <lonely/> ");
Tester::new(&[
SaxElement::StartTag("parent"),
SaxElement::StartTagContent,
SaxElement::StartTag("child"),
SaxElement::StartTagEmpty,
SaxElement::StartTag("child"),
SaxElement::StartTagEmpty,
SaxElement::CData("child"),
SaxElement::EndTag("parent"),
])
.check("<?xml version='1.0'?><parent><child/><child/>child</parent>");
Tester::new(&[
SaxElement::StartTag("parent"),
SaxElement::StartTagContent,
SaxElement::StartTag("empty"),
SaxElement::StartTagEmpty,
SaxElement::StartTag("b"),
SaxElement::StartTagContent,
SaxElement::CData("lala"),
SaxElement::EndTag("b"),
SaxElement::EndTag("parent"),
])
.check("<parent ><empty \t /><b>lala</b \n></parent>");
Tester::new(&[
SaxElement::StartTag("mytag"),
SaxElement::Attribute("abc", "123"),
SaxElement::Attribute("id", "XC72"),
SaxElement::StartTagContent,
SaxElement::EndTag("mytag"),
])
.check("<mytag abc='123' id=\"XC72\"></mytag>");
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::StartTag("b"),
SaxElement::Attribute("x1", "lala"),
SaxElement::StartTagEmpty,
SaxElement::StartTag("c"),
SaxElement::Attribute("x2", "bibi"),
SaxElement::StartTagEmpty,
SaxElement::EndTag("a"),
])
.check("<a><b x1 ='lala'/><c x2\t= \t'bibi'/></a>");
Tester::new(&[
SaxElement::StartTag("tag"),
SaxElement::Attribute("a", "1"),
SaxElement::Attribute("b", "2"),
SaxElement::Attribute("c", "3"),
SaxElement::Attribute("d", "4"),
SaxElement::Attribute("e", "5"),
SaxElement::Attribute("f", "6"),
SaxElement::Attribute("g", "7"),
SaxElement::Attribute("id", "xyz9"),
SaxElement::StartTagContent,
SaxElement::StartTag("sub"),
SaxElement::StartTagContent,
SaxElement::EndTag("sub"),
SaxElement::EndTag("tag"),
])
.check("<tag a = '1' b ='2' c= '3' d='4' e='5' f='6' g='7' id='xyz9'><sub></sub></tag>");
Tester::new(&[
SaxElement::StartTag("tag"),
SaxElement::Attribute("a", "12\"34"),
SaxElement::Attribute("b", "123'456"),
SaxElement::StartTagEmpty,
])
.check("<tag a='12\"34' b=\"123'456\" />");
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::StartTag("b"),
SaxElement::StartTagContent,
SaxElement::CData("john&mary"),
SaxElement::StartTag("c"),
SaxElement::StartTagContent,
SaxElement::StartTag("d"),
SaxElement::Attribute("e", "f"),
SaxElement::Attribute("g", "123456"),
SaxElement::Attribute("h", "madcat"),
SaxElement::Attribute("klm", "nop"),
SaxElement::StartTagEmpty,
SaxElement::EndTag("c"),
SaxElement::EndTag("b"),
SaxElement::EndTag("a"),
])
.check("<a><b>john&mary<c><d e='f' g='123456' h='madcat' klm='nop'/></c></b></a>");
}
#[test]
fn comments() {
Tester::new(&[
SaxElement::StartTag("item"),
SaxElement::Attribute("url", "http://jabber.org"),
SaxElement::StartTagContent,
SaxElement::CData("Jabber Site"),
SaxElement::EndTag("item"),
])
.check("<item url='http://jabber.org'><!-- little comment -->Jabber Site</item>");
Tester::new(&[
SaxElement::StartTag("index"),
SaxElement::StartTagContent,
SaxElement::StartTag("item"),
SaxElement::Attribute("name", "lala"),
SaxElement::Attribute("page", "42"),
SaxElement::StartTagEmpty,
SaxElement::EndTag("index"),
])
.check("<index><!-- <item> - tag has no childs --><item name='lala' page='42'/></index>");
Tester::new(&[SaxElement::StartTag("empty"), SaxElement::StartTagEmpty])
.check("<!-- comment --> <empty/> <!-- lala -->");
}
#[test]
fn cdatas() {
Tester::new(&[
SaxElement::StartTag("ka"),
SaxElement::StartTagContent,
SaxElement::CData("1234 <ka> lala ] ]] ]]] 4321"),
SaxElement::EndTag("ka"),
])
.check("<ka>1234<![CDATA[ <ka> lala ] ]] ]]] ]]>4321</ka>");
Tester::new(&[
SaxElement::StartTag("data"),
SaxElement::StartTagContent,
SaxElement::CData("[TEST]"),
SaxElement::EndTag("data"),
])
.check("<data><![CDATA[[TEST]]]></data>");
Tester::new(&[
SaxElement::StartTag("data"),
SaxElement::StartTagContent,
SaxElement::CData("[TEST]]"),
SaxElement::EndTag("data"),
])
.check("<data><![CDATA[[TEST]]]]></data>");
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::CData("[[bg:Чингис хан]][[bn:চেঙ্গিজ খান]]"),
SaxElement::EndTag("a"),
])
.check("<a>[[bg:Чингис хан]][[bn:চেঙ্গিজ খান]]</a>");
}
fn check_parts(parts: &[&[u8]]) {
let mut parser = SaxParser::new();
for part in parts {
let mut elements = parser.elements(part);
while let Some(element) = elements.next() {
element.unwrap();
}
}
}
#[test]
fn utf8_boundaries() {
check_parts(&[b"<a>", b"\xc2", b"\xa3", b"</a>"]);
check_parts(&[b"<a>\xc2", b"\xa3", b"</a>"]);
check_parts(&[b"<a>\xc2", b"\xa3</a>"]);
check_parts(&[b"<a>b", b"\xe0", b"\xa0", b"\x80", b"\xd0\xb8</a>"]);
check_parts(&[b"<a>b", b"\xe0\xa0", b"\x80", b"</a>"]);
check_parts(&[b"<a>b", b"\xe0", b"\xa0\x80", b"</a>"]);
check_parts(&[b"<a>b", b"\xe0", b"\xa0\x80</a>"]);
check_parts(&[b"<a>\xe0", b"\xa0", b"\x80", b"</a>"]);
check_parts(&[b"<a>\xe0", b"\xa0", b"\x80</a>"]);
check_parts(&[b"<a>\xe0", b"\xa0\x80", b"</a>"]);
check_parts(&[b"<a>\xe0\xa0", b"\x80", b"</a>"]);
check_parts(&[b"<a>\xe0\xa0", b"\x80</a>"]);
check_parts(&[b"<a>\xe0\xa0\x80", b"\xd0\xb8</a>"]);
check_parts(&[b"<a>", b"\xf0", b"\x96", b"\xbf", b"\xa2", b"</a>"]);
check_parts(&[b"<a>\xf0", b"\x96", b"\xbf", b"\xa2", b"</a>"]);
check_parts(&[b"<a>\xf0", b"\x96", b"\xbf", b"\xa2</a>"]);
check_parts(&[b"<a>\xf0", b"\x96\xbf", b"\xa2", b"</a>"]);
check_parts(&[b"<a>\xf0\x96", b"\xbf", b"\xa2", b"</a>"]);
check_parts(&[b"<a>\xf0", b"\x96", b"\xbf\xa2</a>"]);
check_parts(&[b"<a>\xf0", b"\x96\xbf\xa2", b"</a>"]);
check_parts(&[b"<a>\xf0\x96\xbf", b"\xa2", b"</a>"]);
check_parts(&[b"<a>\xf0", b"\x96\xbf\xa2</a>"]);
check_parts(&[b"<a>\xf0\x96\xbf", b"\xa2</a>"]);
check_parts(&[b"<a>e\xd0", b"\xbd\xd1\x81\xd0\xba</a>"]);
check_parts(&[b"<a>e\xd0\xbd", b"\xd1\x81\xd0\xba\xd0\xb8aa</a>"]);
check_parts(&[b"<a>e\xd0\xbd\xd1", b"\x81\xd0\xba\xd0\xb8aa</a>"]);
check_parts(&[b"<a>e\xd0\xbd\xd1\x81", b"\xd0\xba\xd0\xb8aa</a>"]);
check_parts(&[b"<a>e\xd0\xbd\xd1\x81\xd0", b"\xba\xd0\xb8aa</a>"]);
check_parts(&[b"<a>e\xd0\xbd\xd1\x81\xd0\xba", b"\xd0\xb8aa</a>"]);
check_parts(&[b"<a>e\xd0\xbd\xd1\x81\xd0\xba\xd0", b"\xb8aa</a>"]);
}
#[test]
fn dtds() {
Tester::new(&[
SaxElement::StartTag("x"),
SaxElement::StartTagContent,
SaxElement::CData("foo"),
SaxElement::EndTag("x"),
])
.check(" <!DOCTYPE greeting [ <!ELEMENT greeting (#PCDATA)> ]> <x>foo</x>");
}
#[test]
fn pi() {
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::CData("bibi"),
SaxElement::EndTag("a"),
])
.check("<a><?xml lala?>bibi</a>");
}
#[test]
fn entities() {
Tester::new(&[
SaxElement::StartTag("body"),
SaxElement::StartTagContent,
SaxElement::CData("I'm fixing parser&tester for \"<\" and \">\" chars."),
SaxElement::EndTag("body"),
])
.check("<body>I'm fixing parser&tester for "<" and ">" chars.</body>");
Tester::new(&[
SaxElement::StartTag("test"),
SaxElement::StartTagContent,
SaxElement::StartTag("standalone"),
SaxElement::Attribute("be", "happy"),
SaxElement::StartTagEmpty,
SaxElement::CData("abcd"),
SaxElement::StartTag("br"),
SaxElement::StartTagEmpty,
SaxElement::CData("<escape>"),
SaxElement::EndTag("test"),
])
.check("<test><standalone be='happy'/>abcd<br/><escape></test>");
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::CData(";AB;"),
SaxElement::EndTag("a"),
])
.check("<a>;AB;</a>");
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::CData(" \u{90} \u{900} \u{10abc} "),
SaxElement::EndTag("a"),
])
.check("<a>  ऀ 𐪼 </a>");
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::CData(" \u{d7ff} \u{fffd} \u{10ffff} "),
SaxElement::EndTag("a"),
])
.check("<a> ퟿ �  </a>");
}
#[test]
fn attribute_entities() {
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::Attribute("b", "a&b BA"),
SaxElement::StartTagContent,
SaxElement::EndTag("a"),
])
.check("<a b='a&b BA'></a>");
}
#[test]
fn long_tag() {
let name = "abc".repeat(500);
let xml = format!("<{}></{}>", name, name);
Tester::new(&[
SaxElement::StartTag(&name),
SaxElement::StartTagContent,
SaxElement::EndTag(&name),
])
.check(&xml);
}
#[test]
fn location() {
Tester::new(&[
SaxElement::StartTag("a"),
SaxElement::StartTagContent,
SaxElement::CData("\n\n "),
SaxElement::EndTag("a"),
])
.check("<a>\n\n </a>");
}
#[test]
fn bad_tags() {
BadTester::new(4, TAG_WHITESPACE_START).check("<a>< b/></a>");
BadTester::new(6, TAG_EMPTY_TAG_MISSING_END).check("<a><b/ ></a>");
BadTester::new(8, TAG_DOUBLE_END).check("<a></ccc/></a>");
BadTester::new(13, TAG_DOUBLE_END).check("<a><b/><c></c/></a>");
BadTester::new(1, TAG_CLOSE_WITHOUT_OPEN).check("</a>");
BadTester::new(9, TAG_END_TAG_ATTRIBUTES).check("<a> </a b>");
BadTester::new(8, TAG_OUTSIDE_ROOT).check("<a></a><b/>");
BadTester::new(10, TAG_ATTRIBUTE_BAD_NAME).check("<a a='1' b></a>");
BadTester::new(11, TAG_ATTRIBUTE_WITHOUT_QUOTE).check("<a a='1' b=></a>");
BadTester::new(12, TAG_ATTRIBUTE_WITHOUT_EQUAL).check("<a a='12' b '2'></a>");
BadTester::new(13, TAG_ATTRIBUTE_WITHOUT_EQUAL).check("<a a='123' b c='5'></a>");
BadTester::new(14, TAG_END_TAG_ATTRIBUTES).check("<a a='12'></a b='1'>");
BadTester::new(17, TAG_EMPTY_TAG_MISSING_END).check("<g><test a='123'/ b='lala'></g>");
BadTester::new(13, TAG_ATTRIBUTE_BAD_VALUE).check("<a a='1' b='></a>");
BadTester::new(13, TAG_ATTRIBUTE_BAD_VALUE).check("<a a='1' b=\"></a>");
BadTester::new(4, TAG_WHITESPACE_START).check("<a>< abc/></a>");
BadTester::new(5, TAG_EMPTY_NAME).check("<a> <> </a>");
BadTester::new(6, TAG_EMPTY_NAME).check("<a> </> </a>");
}
#[test]
fn bad_comments() {
BadTester::new(10, COMMENT_MISSING_END).check("<e><!-- -- --></e>");
BadTester::new(22, COMMENT_MISSING_DASH).check("<ha><!-- <lala> --><!- comment -></ha>");
BadTester::new(12, DOC_CDATA_WITHOUT_PARENT).check("<!-- c1 --> lala <ha/>");
BadTester::new(31, DOC_CDATA_WITHOUT_PARENT).check("<!-- c1 --> <ha/> <!-- pika -->c");
BadTester::new(9, COMMENT_MISSING_END).check("<!-- c ---> <ha/>");
}
#[test]
fn bad_pi() {
BadTester::new(12, PI_MISSING_END).check("<e/> <?xml ? >");
BadTester::new(12, DOC_OPEN_MARKUP).check("<e/> <?xml >");
BadTester::new(13, DOC_CDATA_WITHOUT_PARENT).check("<e/> <?xml ?>lala");
}
#[test]
fn bad_cdatas() {
BadTester::new(2, DOC_CDATA_WITHOUT_PARENT).check(" lala <a></a>");
BadTester::new(10, DOC_CDATA_WITHOUT_PARENT).check(" <a></a> lala");
BadTester::new(11, DOC_CDATA_WITHOUT_PARENT).check(" <a></a > lala");
BadTester::new(2, MARKUP_CDATA_SECTION_OUTSIDE_ROOT).check("<![CDATA[lala]> <a/>");
BadTester::new(8, MARKUP_CDATA_SECTION_OUTSIDE_ROOT).check(" <a/> <![CDATA[lala]>");
BadTester::new(7, MARKUP_CDATA_SECTION_BAD_START).check("<a> <![DATA[lala]> </a>");
BadTester::new(9, MARKUP_CDATA_SECTION_BAD_START).check("<a> <![CDaTA[lala]> </a>");
BadTester::new(12, MARKUP_CDATA_SECTION_BAD_START).check("<a> <![CDATAlala]> </a>");
}
#[test]
fn bad_entities() {
BadTester::new(8, REFERENCE_CUSTOM_ENTITY).check("<a>&lala;</a>");
BadTester::new(12, REFERENCE_CUSTOM_ENTITY).check("<a>&lala </a>");
BadTester::new(16, TAG_EMPTY_TAG_MISSING_END).check("<lol><<></lol>");
BadTester::new(6, REFERENCE_INVALID_DECIMAL).check("<a>a;</a>");
BadTester::new(6, REFERENCE_INVALID_DECIMAL).check("<a>ª</a>");
BadTester::new(8, REFERENCE_INVALID_HEX).check("<a>¥g;</a>");
BadTester::new(6, CHAR_INVALID).check("<a></a>");
BadTester::new(7, CHAR_INVALID).check("<a></a>");
BadTester::new(7, CHAR_INVALID).check("<a></a>");
BadTester::new(10, CHAR_INVALID).check("<a>�</a>");
BadTester::new(10, CHAR_INVALID).check("<a>�</a>");
BadTester::new(10, CHAR_INVALID).check("<a></a>");
BadTester::new(10, CHAR_INVALID).check("<a></a>");
BadTester::new(12, CHAR_INVALID).check("<a>�</a>");
}
#[test]
fn bad_chars() {
BadTester::new(6, UTF8_INVALID_PREFIX_BYTE).check_bytes(b"<test>\xFF</test>");
BadTester::new(6, UTF8_INVALID_PREFIX_BYTE).check_bytes(b"<test>\xFE</test>");
BadTester::new(2, CHAR_INVALID).check_bytes(b"<t\x00></t>");
BadTester::new(2, CHAR_INVALID).check_bytes(b"<t\x19></t>");
BadTester::new(8, UTF8_INVALID_CONT_BYTE).check_bytes(b"<test>\xe3\x8fa</test>");
BadTester::new(7, UTF8_OVERLONG_SEQUENCE).check_bytes(b"<test>\xC0\x80</test>");
BadTester::new(7, UTF8_OVERLONG_SEQUENCE).check_bytes(b"<test>\xC0\xaf</test>");
BadTester::new(8, UTF8_OVERLONG_SEQUENCE).check_bytes(b"<test>\xe0\x80\xaf</test>");
BadTester::new(9, UTF8_OVERLONG_SEQUENCE).check_bytes(b"<test>\xf0\x80\x80\xaf</test>");
BadTester::new(7, UTF8_OVERLONG_SEQUENCE).check_bytes(b"<test>\xc1\xbf</test>");
BadTester::new(8, UTF8_OVERLONG_SEQUENCE).check_bytes(b"<test>\xe0\x9f\xbf</test>");
BadTester::new(9, UTF8_OVERLONG_SEQUENCE).check_bytes(b"<test>\xf0\x8f\xbf\xbf</test>");
BadTester::new(1, UTF8_INVALID_PREFIX_BYTE).check_bytes(b"<\x8f\x85></\x8f\x85>");
BadTester::new(7, UTF8_OVERLONG_SEQUENCE).check_bytes(
b"<utf8>\xC1\x80<br/>\xED\x95\x9C\xEA\xB5\xAD\xEC\x96\xB4<err>\xC1\x65</err></utf8>",
);
}
#[test]
fn bad_unfinished() {
BadTester::new(5, DOC_OPEN_TAGS).check(" <a> ");
BadTester::new(20, DOC_NO_CONTENT).check(" <!-- lala --> ");
BadTester::new(27, DOC_OPEN_MARKUP).check(" <a></a> <!-- open comment ");
BadTester::new(23, DOC_OPEN_MARKUP).check(" <a></a> <?app open pi ");
}
#[test]
fn prints() {
let parser = SaxParser::new();
{
let location = parser.location();
let s1 = format!("{:?}", location);
assert!(s1.len() > 0);
let s2 = format!("{}", location);
assert!(s2.len() > 0);
}
{
let e = ParseError::NoMemory;
let s1 = format!("{:?}", e);
assert!(s1.len() > 0);
let s2 = format!("{}", e);
assert!(s2.len() > 0);
}
{
let e = ParseError::BadXml("test");
let s1 = format!("{:?}", e);
assert!(s1.len() > 0);
let s2 = format!("{}", e);
assert!(s2.len() > 0);
}
}