use std::os::raw::{c_char, c_int, c_void};
use std::ptr;
use libxml::bindings::{
xmlChar, xmlCreatePushParserCtxt, xmlFreeParserCtxt, xmlParseChunk, xmlSAXHandlerPtr,
};
use crate::parser::{ParseError, ParseErrorKind, ParseResult};
use crate::reader::Reader;
pub fn parse(reader: &mut Reader) -> ParseResult<String> {
let c = reader.peek();
match c {
Some('<') => {}
_ => {
return Err(ParseError::new(
reader.cursor().pos,
true,
ParseErrorKind::Xml,
));
}
}
let mut buf = String::new();
let mut parser = new_sax_parser();
let mut parser_context = ParserContext::new();
unsafe {
let context = xmlCreatePushParserCtxt(
&mut parser as xmlSAXHandlerPtr,
&mut parser_context as *mut ParserContext as *mut c_void,
ptr::null(),
0,
ptr::null(),
);
let mut prev_pos = reader.cursor().pos;
while let Some(c) = reader.read() {
buf.push(c);
let mut bytes = [0u8; 4];
let end = reader.is_eof() as c_int;
let bytes = c.encode_utf8(&mut bytes);
let count = bytes.len() as c_int;
let bytes = bytes.as_ptr() as *const c_char;
let ret = xmlParseChunk(context, bytes, count, end);
if ret != 0 {
xmlFreeParserCtxt(context);
return Err(ParseError::new(prev_pos, false, ParseErrorKind::Xml));
}
if std::matches!(parser_context.state, ParserState::EndElement)
&& parser_context.depth == 0
{
break;
}
prev_pos = reader.cursor().pos;
}
xmlFreeParserCtxt(context);
}
Ok(buf)
}
struct ParserContext {
depth: usize,
state: ParserState,
}
impl ParserContext {
fn new() -> ParserContext {
ParserContext {
depth: 0,
state: ParserState::Created,
}
}
}
enum ParserState {
Created,
StartDocument,
EndDocument,
StartElement,
EndElement,
}
fn new_sax_parser() -> libxml::bindings::xmlSAXHandler {
libxml::bindings::xmlSAXHandler {
internalSubset: None,
isStandalone: None,
hasInternalSubset: None,
hasExternalSubset: None,
resolveEntity: None,
getEntity: None,
entityDecl: None,
notationDecl: None,
attributeDecl: None,
elementDecl: None,
unparsedEntityDecl: None,
setDocumentLocator: None,
startDocument: Some(on_start_document),
endDocument: Some(on_end_document),
startElement: None,
endElement: None,
reference: None,
characters: None,
ignorableWhitespace: None,
processingInstruction: None,
comment: None,
warning: None,
error: None,
fatalError: None,
getParameterEntity: None,
cdataBlock: None,
externalSubset: None,
initialized: libxml::bindings::XML_SAX2_MAGIC,
_private: ptr::null_mut(),
startElementNs: Some(on_start_element),
endElementNs: Some(on_end_element),
serror: None,
}
}
unsafe extern "C" fn on_start_document(ctx: *mut c_void) {
let context: &mut ParserContext = unsafe { &mut *(ctx as *mut ParserContext) };
context.state = ParserState::StartDocument;
}
unsafe extern "C" fn on_end_document(ctx: *mut c_void) {
let context: &mut ParserContext = unsafe { &mut *(ctx as *mut ParserContext) };
context.state = ParserState::EndDocument;
}
unsafe extern "C" fn on_start_element(
ctx: *mut c_void,
_local_name: *const xmlChar,
_prefix: *const xmlChar,
_uri: *const xmlChar,
_nb_namespaces: c_int,
_namespaces: *mut *const xmlChar,
_nb_attributes: c_int,
_nb_defaulted: c_int,
_attributes: *mut *const xmlChar,
) {
let context: &mut ParserContext = unsafe { &mut *(ctx as *mut ParserContext) };
context.state = ParserState::StartElement;
context.depth += 1;
}
unsafe extern "C" fn on_end_element(
ctx: *mut c_void,
_local_name: *const xmlChar,
_prefix: *const xmlChar,
_uri: *const xmlChar,
) {
let context: &mut ParserContext = unsafe { &mut *(ctx as *mut ParserContext) };
context.state = ParserState::EndElement;
context.depth -= 1;
}
#[cfg(test)]
mod tests {
use super::*;
use crate::reader::{CharPos, Pos};
#[test]
fn parse_xml_brute_force_errors() {
let mut reader = Reader::new("");
let error = parse(&mut reader).err().unwrap();
assert_eq!(error.pos, Pos { line: 1, column: 1 });
assert_eq!(error.kind, ParseErrorKind::Xml);
assert!(error.recoverable);
let mut reader = Reader::new("x");
let error = parse(&mut reader).err().unwrap();
assert_eq!(error.pos, Pos { line: 1, column: 1 });
assert_eq!(error.kind, ParseErrorKind::Xml);
assert!(error.recoverable);
let mut reader = Reader::new("<<");
let error = parse(&mut reader).err().unwrap();
assert_eq!(error.pos, Pos { line: 1, column: 2 });
assert_eq!(error.kind, ParseErrorKind::Xml);
assert!(!error.recoverable);
let mut reader = Reader::new("<users><user /></users");
let error = parse(&mut reader).err().unwrap();
assert_eq!(
error.pos,
Pos {
line: 1,
column: 22
}
);
assert_eq!(error.kind, ParseErrorKind::Xml);
let mut reader = Reader::new("<users aa><user /></users");
let error = parse(&mut reader).err().unwrap();
assert_eq!(
error.pos,
Pos {
line: 1,
column: 10
}
);
assert_eq!(error.kind, ParseErrorKind::Xml);
}
#[test]
fn parse_xml_brute_force_ok() {
let mut reader = Reader::new("<users><user /></users>");
assert_eq!(
parse(&mut reader).unwrap(),
String::from("<users><user /></users>")
);
assert_eq!(reader.cursor().index, CharPos(23));
let mut reader = Reader::new("<users><user /></users>xx");
assert_eq!(
parse(&mut reader).unwrap(),
String::from("<users><user /></users>")
);
assert_eq!(reader.cursor().index, CharPos(23));
assert_eq!(reader.peek_n(2), String::from("xx"));
let mut reader = Reader::new("<?xml version=\"1.0\"?><users/>xxx");
assert_eq!(
parse(&mut reader).unwrap(),
String::from("<?xml version=\"1.0\"?><users/>")
);
assert_eq!(reader.cursor().index, CharPos(29));
}
#[test]
fn parse_xml_soap_utf8() {
let xml = r#"<?xml version='1.0' encoding='UTF-8'?>
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
<soapenv:Body>
<ns31:UploadInboundResponseElement xmlns:ns31="http://www.example.com/schema/xyzWS">
<ns31:UploadInboundResult><?xml version="1.0" encoding="UTF-8" ?><ATKCST><Head><FunCode>9000</FunCode><Remark>接收数据成功</Remark></Head></ATKCST></ns31:UploadInboundResult>
</ns31:UploadInboundResponseElement>
</soapenv:Body>
</soapenv:Envelope>"#;
let input = xml;
let output = xml;
let mut reader = Reader::new(input);
assert_eq!(parse(&mut reader).unwrap(), String::from(output),);
assert_eq!(reader.cursor().index, CharPos(520));
let input = format!("{xml} xx xx xx xx");
let output = xml;
let mut reader = Reader::new(&input);
assert_eq!(parse(&mut reader).unwrap(), String::from(output),);
assert_eq!(reader.cursor().index, CharPos(520));
let input = format!("{xml}{xml}");
let output = xml;
let mut reader = Reader::new(&input);
assert_eq!(parse(&mut reader).unwrap(), String::from(output),);
assert_eq!(reader.cursor().index, CharPos(520));
let mut reader = Reader::new(&input);
assert_eq!(parse(&mut reader).unwrap(), String::from(output),);
assert_eq!(reader.cursor().index, CharPos(520));
}
#[test]
fn parse_xml_books_with_entry_response_start() {
let xml = r#"<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
<title>The Sundered Grail</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters,
battle one another for control of England. Sequel to
Oberon's Legacy.</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
<title>Lover Birds</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology
conference, tempers fly as feathers get ruffled.</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
<title>Splish Splash</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty
thousand leagues beneath the sea.</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
<title>Creepy Crawlies</title>
<genre>Horror</genre>
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches,
centipedes, scorpions and other insects.</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
<title>Paradox Lost</title>
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertent trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
<title>Microsoft .NET: The Programming Bible</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in
detail in this deep programmer's reference.</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
<title>MSXML3: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in
detail, with attention to XML DOM interfaces, XSLT processing,
SAX and more.</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
<title>Visual Studio 7: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>49.95</price>
<publish_date>2001-04-16</publish_date>
<description>Microsoft Visual Studio 7 is explored in depth,
looking at how Visual Basic, Visual C++, C#, and ASP+ are
integrated into a comprehensive development
environment.</description>
</book>
</catalog>"#;
let chunk = format!("{xml}\nHTTP 200");
let mut reader = Reader::new(&chunk);
assert_eq!(parse(&mut reader).unwrap(), String::from(xml),);
assert_eq!(reader.cursor().index, CharPos(4411));
}
}