use encoding_rs::WINDOWS_1252;
use nom::sequence::tuple;
use std::borrow::Cow;
use xml::name::OwnedName;
use nom::bytes::complete as nbc;
use nom::character::complete as ncc;
use nom::error::VerboseError;
use nom::{multi as nm, Parser};
use nom::{Finish, IResult};
use xml::common::XmlVersion;
use crate::parsing::{get_nom_input_linecol, get_nom_input_offset, nom_context_error};
pub use xml::reader::XmlEvent;
fn to_xml_pos(pos: (usize, usize)) -> xml::common::TextPosition {
xml::common::TextPosition {
row: pos.0 as u64,
column: pos.1 as u64,
}
}
fn is_whitespace(c: u8) -> bool {
match c {
0x20 | 0x9 | 0xD | 0xA => true,
_ => false,
}
}
fn unescape_xml_str(input: &str) -> Cow<str> {
let mut res = Cow::from(input);
let mut input = input;
while let Some((before, after)) = input.split_once("&") {
if let Cow::Borrowed(_) = res {
res = String::with_capacity(input.len() * 6 / 5).into();
};
if let Cow::Owned(result_string) = &mut res {
result_string.push_str(before);
input = if after.starts_with("amp;") {
result_string.push('&');
&after[4..]
} else if after.starts_with("apos;") {
result_string.push('\'');
&after[5..]
} else if after.starts_with("gt;") {
result_string.push('>');
&after[3..]
} else if after.starts_with("lt;") {
result_string.push('<');
&after[3..]
} else if after.starts_with("quot;") {
result_string.push('"');
&after[5..]
} else {
result_string.push('&');
after
};
}
}
if let Cow::Owned(result_string) = &mut res {
result_string.push_str(input);
}
res
}
pub fn decode_xml_str(input: &[u8], lossy: bool) -> IResult<&[u8], Cow<str>, VerboseError<&[u8]>> {
let (input, _) = nbc::take_till(|c| !is_whitespace(c))(input)?;
let (input, _) = nbc::tag_no_case("<?xml")(input)?;
fn parse_str(input: &[u8]) -> IResult<&[u8], String, VerboseError<&[u8]>> {
fn parse_doublequoted(input: &[u8]) -> IResult<&[u8], String, VerboseError<&[u8]>> {
let (input, _) = nbc::tag("\"")(input)?;
let (input, str_input) = nbc::take_until("\"")(input)?;
let (input, _) = nbc::tag("\"")(input)?;
Ok((input, String::from_utf8_lossy(str_input).into_owned()))
}
fn parse_singlequoted(input: &[u8]) -> IResult<&[u8], String, VerboseError<&[u8]>> {
let (input, _) = nbc::tag("'")(input)?;
let (input, str_input) = nbc::take_until("'")(input)?;
let (input, _) = nbc::tag("'")(input)?;
Ok((input, String::from_utf8_lossy(str_input).into_owned()))
}
fn parse_unquoted(input: &[u8]) -> IResult<&[u8], String, VerboseError<&[u8]>> {
let (input, str_input) = nbc::take_till(|c| !is_whitespace(c))(input)?;
Ok((input, String::from_utf8_lossy(str_input).into_owned()))
}
nom::branch::alt((parse_doublequoted, parse_singlequoted, parse_unquoted))(input)
}
fn parse_attribute(input: &[u8]) -> IResult<&[u8], (String, String), VerboseError<&[u8]>> {
let (input, name) = nbc::take_till1(|c| is_whitespace(c) || c == '=' as u8)(input)?;
let name = String::from_utf8_lossy(name).into_owned();
let (input, _) = nbc::take_till(|c| !is_whitespace(c))(input)?;
let (input, _) = nbc::tag("=")(input)?;
let (input, _) = nbc::take_till(|c| !is_whitespace(c))(input)?;
let (input, value) = parse_str(input)?;
Ok((input, (name, value)))
}
let (input, _) = nbc::take_till(|c| !is_whitespace(c))(input)?;
let (input, attributes) =
nm::separated_list0(nbc::take_till1(|c| !is_whitespace(c)), parse_attribute)(input)?;
let (input, _) = nbc::take_till(|c| !is_whitespace(c))(input)?;
let (input, _) = nom::branch::alt((nbc::tag(">"), nbc::tag("?>")))(input)?;
let mut encoding = "UTF-8".to_owned();
for (name, value) in attributes.iter() {
match name.to_lowercase().as_str() {
"encoding" => encoding = value.to_uppercase(),
_ => {}
}
}
let xml_str: Cow<_> = match encoding.as_str() {
"UTF-8" | "NWN2UI" => {
if lossy {
String::from_utf8_lossy(input).into()
} else {
std::str::from_utf8(input)
.map_err(|e| nom_context_error("bad UTF8 sequence", &input[..e.valid_up_to()]))?
.into()
}
}
"WINDOWS-1252" => {
let (s, _encoding, _converted) = WINDOWS_1252.decode(input).to_owned();
s
}
_ => return Err(nom_context_error("unhandled XML encoding", input)),
};
Ok((input, xml_str))
}
fn parse_once<'a>(
input: &'a str,
stack: &[OwnedName],
) -> IResult<&'a str, (XmlEvent, bool), VerboseError<&'a str>> {
#[derive(Debug)]
enum TagType {
Open,
Close,
SelfClose,
}
#[derive(Debug)]
struct Tag<'a> {
name: &'a str,
tagtype: TagType,
attributes: Vec<(&'a str, Cow<'a, str>)>,
}
fn is_whitespace(c: char) -> bool {
match c {
' ' | '\t' | '\r' | '\n' => true,
_ => false,
}
}
fn parse_str(input: &str) -> IResult<&str, (Cow<str>, bool), VerboseError<&str>> {
use nom::bytes::complete::is_not;
use nom::character::complete::char;
use nom::sequence::delimited;
let is_quoted = match input.as_bytes().first() {
Some(b'"' | b'\'') => true,
_ => false,
};
let (input, s) = nom::branch::alt((
delimited(char('"'), is_not("\""), char('"')),
delimited(char('\''), is_not("'"), char('\'')),
nbc::take_till(|c| is_whitespace(c) || c == '>' || c == '/'),
))(input)?;
let s = unescape_xml_str(s);
Ok((input, (s, is_quoted)))
}
fn parse_tag(input: &str) -> IResult<&str, Tag, VerboseError<&str>> {
let (input, _) = nbc::tag("<")(input)?;
let (input, closing_tag) =
if let Ok((input, _)) = nbc::tag::<_, _, VerboseError<_>>("/")(input) {
(input, true)
} else {
(input, false)
};
let (input, _) = ncc::multispace0(input)?;
let (input, name) = ncc::alphanumeric1(input)?;
let (input, _) = ncc::multispace0(input)?;
fn parse_attribute(
input: &str,
) -> IResult<&str, (&str, Cow<str>, bool), VerboseError<&str>> {
let (input, (name, _, _, _, (value, is_quoted))) = tuple((
nbc::take_till1(|c| match c {
' ' | '\t' | '\r' | '\n' | '=' => true,
_ => false,
}),
ncc::multispace0,
nbc::tag("="),
ncc::multispace0,
parse_str,
))(input)?;
Ok((input, (name, value, is_quoted)))
}
let mut attributes = vec![];
let mut input = input;
loop {
let name;
let value;
let is_quoted;
(input, (name, value, is_quoted)) = match parse_attribute(input) {
Ok(res) => res,
_ => break,
};
attributes.push((name, value));
let sep = if is_quoted {
ncc::multispace0::<_, VerboseError<_>>(input)
} else {
ncc::multispace1(input)
};
(input, _) = match sep {
Ok(res) => res,
_ => break,
}
}
let (input, tagtype) = if closing_tag {
let (input, _) = nbc::tag(">")(input)?;
(input, TagType::Close)
} else {
let (input, txt) = nom::branch::alt((nbc::tag(">"), nbc::tag("/>")))(input)?;
match txt {
">" => (input, TagType::Open),
"/>" => (input, TagType::SelfClose),
_ => panic!(),
}
};
Ok((
input,
Tag {
name,
tagtype,
attributes,
},
))
}
fn parse_comment(input: &str) -> IResult<&str, &str, VerboseError<&str>> {
let (input, (_, comment, _)) = nom::sequence::tuple((
nbc::tag("<!--").or(nbc::tag("<--")),
nbc::take_until("-->"),
nbc::tag("-->"),
))(input)?;
Ok((input, comment))
}
fn parse_text(input: &str) -> IResult<&str, &str, VerboseError<&str>> {
nbc::take_until1("<")(input)
}
let mut input = input;
loop {
let parse_res: Option<Result<(XmlEvent, bool), _>>;
(input, parse_res) = if let Ok((input, comment)) = parse_comment(input) {
(
input,
Some(Ok((XmlEvent::Comment(comment.to_string()), false))),
)
} else if let Ok((input, tag)) = parse_tag(input) {
match tag.tagtype {
TagType::Open => {
let elmt = XmlEvent::StartElement {
name: xml::name::OwnedName::local(tag.name),
attributes: tag
.attributes
.into_iter()
.map(|(name, value)| {
xml::attribute::OwnedAttribute::new(
xml::name::OwnedName::local(name),
value.to_string(),
)
})
.collect(),
namespace: xml::namespace::Namespace::empty(),
};
(input, Some(Ok((elmt, false))))
}
TagType::Close => {
if let Some(expected_name) = stack.last() {
if tag.name == expected_name.local_name {
(
input,
Some(Ok((
XmlEvent::EndElement {
name: OwnedName::local(tag.name),
},
false,
))),
)
} else {
(input, None)
}
} else {
(input, None)
}
}
TagType::SelfClose => {
let elmt = XmlEvent::StartElement {
name: xml::name::OwnedName::local(tag.name),
attributes: tag
.attributes
.into_iter()
.map(|(name, value)| {
xml::attribute::OwnedAttribute::new(
xml::name::OwnedName::local(name),
value,
)
})
.collect(),
namespace: xml::namespace::Namespace::empty(),
};
(input, Some(Ok((elmt.into(), true))))
}
}
} else if let Ok((input, text)) = parse_text(input) {
if text.chars().all(|c| c.is_ascii_whitespace()) {
(input, None)
} else {
(
input,
Some(Ok((XmlEvent::Characters(text.trim().into()), false))),
)
}
} else {
let (consumed_input, _) = ncc::multispace0(input)?;
if consumed_input.len() > 0 {
return Err(nom::Err::Failure(VerboseError {
errors: vec![(
input,
nom::error::VerboseErrorKind::Nom(nom::error::ErrorKind::Fail),
)],
}));
} else {
(consumed_input, Some(Ok((XmlEvent::EndDocument {}, false))))
}
};
if let Some(parse_res) = parse_res {
return Ok((input, parse_res?));
} else {
continue;
}
}
}
pub struct GuiXmlEventReader<R: std::io::Read> {
source: R,
data: Option<String>,
data_ptr: usize,
pending_selfclose: bool,
stack: Vec<OwnedName>,
lossy: bool,
}
impl<R: std::io::Read> GuiXmlEventReader<R> {
pub fn new(source: R, lossy: bool) -> Self {
Self {
source,
data: None,
data_ptr: 0,
pending_selfclose: false,
stack: vec![],
lossy,
}
}
pub fn next(&mut self) -> xml::reader::Result<XmlEvent> {
match &self.data {
None => {
let mut data: Vec<u8> = vec![];
self.source.read_to_end(&mut data)?;
let (_, datastr) = decode_xml_str(data.as_slice(), self.lossy)
.finish()
.map_err(|e| -> xml::reader::Error {
xml::reader::Error::from((
&to_xml_pos((0, get_nom_input_offset(e.errors[0].0, data.as_slice()))),
std::borrow::Cow::from(format!("while parsing XML header: {:?}", e)),
))
})?;
self.data = Some(datastr.into_owned());
Ok(XmlEvent::StartDocument {
version: XmlVersion::Version10,
encoding: "UTF-8".to_string(),
standalone: None,
})
}
Some(data) => {
if self.pending_selfclose {
self.pending_selfclose = false;
return Ok(XmlEvent::EndElement {
name: self.stack.pop().expect("bad pending selfclose"),
});
}
let step_input = &data[self.data_ptr..];
let (input, (ev, self_closing)) = parse_once(step_input, &self.stack)
.finish()
.map_err(|e| -> xml::reader::Error {
use nom::error::convert_error;
xml::reader::Error::from((
&to_xml_pos(get_nom_input_linecol(e.errors[0].0, data)),
std::borrow::Cow::from(format!(
"while parsing XML data: {}",
convert_error(data.as_str(), e)
)),
))
})?;
match &ev {
XmlEvent::StartElement {
name,
attributes: _,
namespace: _,
} => {
self.stack.push(name.clone());
}
XmlEvent::EndElement { name } => {
let popped = self.stack.pop().ok_or(xml::reader::Error::from((
&to_xml_pos(get_nom_input_linecol(step_input, data.as_str())),
std::borrow::Cow::from(format!(
"unexpected closing element: {:?}",
name
)),
)))?;
debug_assert!(popped == *name);
}
XmlEvent::EndDocument => {
if let Some(elmt) = self.stack.last() {
return Err(xml::reader::Error::from((
&to_xml_pos(get_nom_input_linecol(input, data.as_str())),
std::borrow::Cow::from(format!(
"reached the end of the document while waiting for a closing \
</{}>",
elmt
)),
)));
}
}
_ => {}
}
self.data_ptr = data.len() - input.len();
self.pending_selfclose = self_closing;
Ok(ev)
}
}
}
pub fn skip(&mut self) -> xml::reader::Result<()> {
self.next()?;
Ok(())
}
pub fn source(&self) -> &R {
&self.source
}
pub fn source_mut(&mut self) -> &mut R {
&mut self.source
}
pub fn into_inner(self) -> R {
self.source
}
pub fn doctype(&self) -> Option<&str> {
None
}
}
#[cfg(test)]
mod tests {
use core::str;
use xml::attribute::OwnedAttribute;
use super::*;
fn read_all(input: &str) -> Result<Vec<XmlEvent>, Box<dyn std::error::Error>> {
let mut reader = GuiXmlEventReader::new(std::io::Cursor::new(input), false);
let mut res = vec![];
loop {
match reader.next()? {
XmlEvent::EndDocument => break,
ev => {
eprintln!("read event: {:?}", ev);
res.push(ev);
}
}
}
Ok(res)
}
#[test]
fn test_headers() {
read_all(r##"<?xml version="1.0" encoding="utf-8"?><UIScene></UIScene>"##).unwrap();
read_all(r##"<?xml version="1.0" encoding="utf-8" ?><UIScene></UIScene>"##).unwrap();
read_all(r##"<?xml version="1.0" encoding="utf-8"><UIScene></UIScene>"##).unwrap();
read_all(r##"<?xml version="1.0" encoding="utf-8" ><UIScene></UIScene>"##).unwrap();
read_all(r##"<?xml?><UIScene></UIScene>"##).unwrap();
read_all(r##"<?xml ?><UIScene></UIScene>"##).unwrap();
read_all(r##"<?xml><UIScene></UIScene>"##).unwrap();
read_all(r##"<?xml ><UIScene></UIScene>"##).unwrap();
assert!(GuiXmlEventReader::new(
std::io::Cursor::new(r##"<xml><UIScene></UIScene>"##),
false
)
.next()
.is_err());
assert!(
GuiXmlEventReader::new(std::io::Cursor::new(r##"<UIScene></UIScene>"##), false)
.next()
.is_err()
);
}
#[test]
fn test_errors() {
read_all(r##"<?xml?><UIScene value/>"##).unwrap_err();
read_all(r##"<?xml?><UIScene value=something with spaces/>"##).unwrap_err();
read_all(r##"<?xml?><<UIScene/>"##).unwrap_err();
read_all(r##"<?xml?><UIScene/><UIButton>"##).unwrap_err();
read_all(r##"<?xml?><UIScene/><UIButton><UIFrame/>"##).unwrap_err();
read_all(r##"<?xml?><UIScene/><UIButton></UIFrame>"##).unwrap_err();
read_all(r##"<?xml?><UIScene/></UIButton>"##).unwrap();
read_all(r##"<?xml?><UIScene/><UIButton></UIButton></UIButton>"##).unwrap();
read_all(r##"<?xml?><UIScene/><UIButton></UIFrame></UIButton>"##).unwrap();
let res = read_all(r##"<?xml?><UIScene/><UIText text="Hello world " ' < > & &err;"></UIText>"##).unwrap();
let (name, attributes, _) = if let XmlEvent::StartElement {
name,
attributes,
namespace,
} = &res[3]
{
(name, attributes, namespace)
} else {
panic!("{:?} is not XmlEvent::StartElement", &res[2])
};
assert_eq!(name.local_name, "UIText");
assert_eq!(
attributes
.iter()
.find(|v| v.name.local_name == "text")
.unwrap()
.value,
r##"Hello world " ' < > & &err;"##
);
}
#[test]
fn test_misc() {
let events = read_all(
r##"<?xml>
<UIPortrait name="PORTRAIT" texture="p_m_bg_dark.tga" x=14 y=85 width=128 height=128
update=true OnUpdate=UIPortrait_OnUpdate_UpdateCharacterPortrait()
OnRender=UIPortrait_OnRender_RenderCharacterPortrait()
ambground_intens=".4" ambgroundcolor_r=".7" ambgroundcolor_g=".55" ambgroundcolor_b=".4"
ambsky_intens=".8" ambskycolor_r=".3" ambskycolor_g=".4" ambskycolor_b=".78"
diffusecolor_r=.9 diffusecolor_g=.8 diffusecolor_b=.6
light_intens=.0 ></UIPortrait>
<UIListBox name="INFOPANE_LISTBOX" x="21" y="49" width="465" height="218" xPadding="5" yPadding="5" showpartialchild="true"
unequalcontrols="true" scrollsegmentsize="30" hidescrollbarwhennotneeded="true" >
<UIText name="HelpField" strref="183397" width="PARENT_WIDTH" height="DYNAMIC" fontfamily="Default" multiline="true" />
<UIScrollBar name="SB" style="STYLE_SB_THIN" />
</UIListBox>
<UIText name="Character"fontfamily="NWN2_Dialog" color=888888 update=true OnUpdate="UIText_OnUpdate_DisplaySelectedCharacter()" />
"##,
).unwrap();
if let XmlEvent::StartElement {
name,
attributes,
namespace: _,
} = &events[1]
{
assert_eq!(name.local_name.as_str(), "UIPortrait");
assert!(attributes.contains(&OwnedAttribute::new(OwnedName::local("name"), "PORTRAIT")));
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("OnUpdate"),
"UIPortrait_OnUpdate_UpdateCharacterPortrait()"
)));
} else {
panic!();
}
}
#[test]
fn test_parsing_campaign() {
let input = include_bytes!("../unittest/campaign.xml");
let mut reader = GuiXmlEventReader::new(std::io::Cursor::new(input), false);
let mut i = 0;
loop {
let ev = reader.next().unwrap();
match ev {
XmlEvent::EndDocument => break,
XmlEvent::Comment(_) => continue,
_ => {}
}
match i {
0 => {
if let XmlEvent::StartDocument {
version,
encoding,
standalone: _,
} = ev
{
assert_eq!(version, XmlVersion::Version10);
assert_eq!(encoding, "UTF-8");
} else {
panic!("{:?}", ev);
}
}
1 => {
if let XmlEvent::StartElement {
name,
attributes,
namespace,
} = ev
{
assert_eq!(name.local_name.as_str(), "UIScene");
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("name"),
"SCREEN_CAMPAIGNLIST"
)));
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("OnAdd"),
"UIScene_OnAdd_CreateCampaignList(\"CampaignListBox\")"
)));
assert_eq!(namespace, xml::namespace::Namespace::empty());
} else {
panic!("{:?}", ev);
}
}
2 => assert_eq!(
ev,
XmlEvent::EndElement {
name: OwnedName::local("UIScene")
}
),
3 => {
if let XmlEvent::StartElement {
name,
attributes,
namespace: _,
} = ev
{
assert_eq!(name.local_name.as_str(), "UIPane");
assert!(attributes
.contains(&OwnedAttribute::new(OwnedName::local("name"), "TitlePane")));
assert!(attributes
.contains(&OwnedAttribute::new(OwnedName::local("width"), "984")));
} else {
panic!("{:?}", ev);
}
}
4 => {
if let XmlEvent::StartElement {
name,
attributes,
namespace: _,
} = ev
{
assert_eq!(name.local_name.as_str(), "UIText");
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("name"),
"TITLE_TEXT"
)));
assert!(attributes
.contains(&OwnedAttribute::new(OwnedName::local("style"), "4")));
} else {
panic!("{:?}", ev);
}
}
5 => assert_eq!(
ev,
XmlEvent::EndElement {
name: OwnedName::local("UIText")
}
),
6 => {
if let XmlEvent::StartElement {
name,
attributes,
namespace: _,
} = ev
{
assert_eq!(name.local_name.as_str(), "UIIcon");
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("img"),
"main_sub_titles.tga"
)));
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("height"),
"PARENT_HEIGHT"
)));
} else {
panic!("{:?}", ev);
}
}
7 => assert_eq!(
ev,
XmlEvent::EndElement {
name: OwnedName::local("UIIcon")
}
),
8 => assert_eq!(
ev,
XmlEvent::EndElement {
name: OwnedName::local("UIPane")
}
),
22 => {
if let XmlEvent::StartElement {
name,
attributes,
namespace: _,
} = ev
{
assert_eq!(name.local_name.as_str(), "UIListBox");
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("name"),
"CAMP_DESC_LISTBOX"
)));
assert!(attributes
.contains(&OwnedAttribute::new(OwnedName::local("xPadding"), "5")));
} else {
panic!("{:?}", ev);
}
}
40 => {
if let XmlEvent::StartElement {
name,
attributes,
namespace: _,
} = ev
{
assert_eq!(name.local_name.as_str(), "UIButton");
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("name"),
"START_CAMPAIGN"
)));
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("OnLeftClick"),
"UIButton_Input_StartModule(\"SCREEN_CHARCHOICE\",Local:0)"
)));
} else {
panic!("{:?}", ev);
}
}
41 => assert_eq!(
ev,
XmlEvent::EndElement {
name: OwnedName::local("UIButton")
}
),
42 => assert_eq!(
ev,
XmlEvent::EndElement {
name: OwnedName::local("UIPane")
}
),
43 => {
if let XmlEvent::StartElement {
name,
attributes,
namespace: _,
} = ev
{
assert_eq!(name.local_name.as_str(), "UIIcon");
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("img"),
"main_sub_bg.tga"
)));
assert!(attributes.contains(&OwnedAttribute::new(
OwnedName::local("scalewithscene"),
"true"
)));
} else {
panic!("{:?}", ev);
}
}
44 => assert_eq!(
ev,
XmlEvent::EndElement {
name: OwnedName::local("UIIcon")
}
),
_ => {}
}
i += 1;
}
assert_eq!(i, 45);
}
#[test]
fn test_addbuddy() {
let input = include_bytes!("../unittest/addbuddy.xml");
let mut reader = GuiXmlEventReader::new(std::io::Cursor::new(input), true);
let output = std::io::Cursor::new(vec![]);
let mut writer = xml::writer::EmitterConfig::new()
.perform_indent(true)
.create_writer(output);
loop {
let ev = reader.next().unwrap();
if let XmlEvent::EndDocument = ev {
break;
}
if let Some(ev) = ev.as_writer_event() {
writer.write(ev).unwrap();
}
}
}
}