use std::str::from_utf8_unchecked;
use crate::{
chvalid::XmlCharValid,
error::XmlParserErrors,
parser::{
XML_MAX_HUGE_LENGTH, XML_MAX_TEXT_LENGTH, XmlParserCtxt, XmlParserInputState,
XmlParserOption, xml_fatal_err, xml_fatal_err_msg, xml_fatal_err_msg_int,
xml_fatal_err_msg_str,
},
};
impl XmlParserCtxt<'_> {
#[doc(alias = "xmlParseCommentComplex")]
fn parse_comment_complex(&mut self, buf: &mut String) {
let max_length = if self.options & XmlParserOption::XmlParseHuge as i32 != 0 {
XML_MAX_HUGE_LENGTH
} else {
XML_MAX_TEXT_LENGTH
};
let inputid = self.input().unwrap().id;
macro_rules! not_terminated {
() => {
xml_fatal_err_msg_str!(
self,
XmlParserErrors::XmlErrCommentNotFinished,
"Comment not terminated\n"
);
return;
};
}
let Some(mut q) = self.current_char() else {
not_terminated!();
};
if !q.is_xml_char() {
xml_fatal_err_msg_int!(
self,
XmlParserErrors::XmlErrInvalidChar,
format!("xmlParseComment: invalid xmlChar value {}\n", q as i32).as_str(),
q as i32
);
return;
}
self.skip_char();
let Some(mut r) = self.current_char() else {
not_terminated!();
};
if !r.is_xml_char() {
xml_fatal_err_msg_int!(
self,
XmlParserErrors::XmlErrInvalidChar,
format!("xmlParseComment: invalid xmlChar value {}\n", r as i32).as_str(),
r as i32
);
return;
}
self.skip_char();
let mut cur = self.current_char();
if cur.is_none() {
not_terminated!();
};
while let Some(nc) =
cur.filter(|&cur| cur.is_xml_char() && (cur != '>' || r != '-' || q != '-'))
{
if r == '-' && q == '-' {
xml_fatal_err(self, XmlParserErrors::XmlErrHyphenInComment, None);
}
buf.push(q);
if buf.len() > max_length {
xml_fatal_err_msg_str!(
self,
XmlParserErrors::XmlErrCommentNotFinished,
"Comment too big found"
);
return;
}
q = r;
r = nc;
self.advance_with_line_handling(nc.len_utf8());
cur = self.current_char();
}
if matches!(self.instate, XmlParserInputState::XmlParserEOF) {
return;
}
if let Some(cur) = cur {
if !cur.is_xml_char() {
xml_fatal_err_msg_int!(
self,
XmlParserErrors::XmlErrInvalidChar,
format!("xmlParseComment: invalid xmlChar value {}\n", cur as i32).as_str(),
cur as i32
);
} else {
if inputid != self.input().unwrap().id {
xml_fatal_err_msg(
self,
XmlParserErrors::XmlErrEntityBoundary,
"Comment doesn't start and stop in the same entity\n",
);
}
self.skip_char();
if !self.disable_sax {
if let Some(comment) = self.sax.as_deref_mut().and_then(|sax| sax.comment) {
comment(self, buf);
}
}
}
} else {
xml_fatal_err_msg_str!(
self,
XmlParserErrors::XmlErrCommentNotFinished,
"Comment not terminated \n<!--{}\n",
buf
);
}
}
#[doc(alias = "xmlParseComment")]
pub(crate) fn parse_comment(&mut self) {
let max_length = if self.options & XmlParserOption::XmlParseHuge as i32 != 0 {
XML_MAX_HUGE_LENGTH
} else {
XML_MAX_TEXT_LENGTH
};
if !self.content_bytes().starts_with(b"<!") {
return;
}
self.advance(2);
if !self.content_bytes().starts_with(b"--") {
return;
}
let state = self.instate;
self.instate = XmlParserInputState::XmlParserComment;
let inputid = self.input().unwrap().id;
self.advance(2);
self.grow();
let mut buf = String::new();
let has_sax_comment = self.sax.as_deref().is_some_and(|sax| sax.comment.is_some());
let mut input = self.content_bytes();
while !input.is_empty() {
let mut len = input
.iter()
.take_while(|&&b| {
b != b'-'
&& b != b'\r'
&& (b == b'\t' || b == b'\n' || (0x20..0x80).contains(&b))
})
.count();
if has_sax_comment {
unsafe {
buf.push_str(from_utf8_unchecked(&input[..len]));
}
if buf.len() > max_length {
xml_fatal_err_msg_str!(
self,
XmlParserErrors::XmlErrCommentNotFinished,
"Comment too big found"
);
return;
}
}
if len < input.len() && input[len] == b'\r' {
if len + 1 < input.len() && input[len + 1] == b'\n' {
len += 1;
} else {
self.advance_with_line_handling(len);
break;
}
}
self.advance_with_line_handling(len);
self.shrink();
self.grow();
if self.instate == XmlParserInputState::XmlParserEOF {
return;
}
input = self.content_bytes();
if !input.is_empty() && input[0] == b'-' {
if input.len() >= 2 && input[1] == b'-' {
if input.len() >= 3 && input[2] == b'>' {
if self.input().unwrap().id != inputid {
xml_fatal_err_msg(
self,
XmlParserErrors::XmlErrEntityBoundary,
"comment doesn't start and stop input the same entity\n",
);
}
self.advance(3);
if !self.disable_sax {
if let Some(comment) =
self.sax.as_deref_mut().and_then(|sax| sax.comment)
{
comment(self, &buf);
}
}
if !matches!(self.instate, XmlParserInputState::XmlParserEOF) {
self.instate = state;
}
return;
}
if !buf.is_empty() {
xml_fatal_err_msg_str!(
self,
XmlParserErrors::XmlErrHyphenInComment,
"Double hyphen within comment: <!--{}\n",
buf
);
} else {
xml_fatal_err_msg_str!(
self,
XmlParserErrors::XmlErrHyphenInComment,
"Double hyphen within comment\n"
);
}
if matches!(self.instate, XmlParserInputState::XmlParserEOF) {
return;
}
buf.push_str("--");
self.advance(2);
} else {
buf.push('-');
self.advance(1);
}
} else {
break;
}
input = self.content_bytes();
}
self.parse_comment_complex(&mut buf);
self.instate = state;
}
}
#[cfg(test)]
mod tests {
use std::cell::RefCell;
use crate::parser::{XmlParserCtxt, XmlSAXHandler, xml_ctxt_read_memory};
thread_local! {
static RESULT: RefCell<String> = const {
RefCell::new(String::new())
};
}
fn make_sax_handler_only_comment() -> XmlSAXHandler {
fn test_comment(_context: &mut XmlParserCtxt, comment: &str) {
RESULT.with_borrow_mut(|result| {
result.push_str(comment);
});
}
XmlSAXHandler {
comment: Some(test_comment),
..Default::default()
}
}
fn make_parser_context<'a>() -> XmlParserCtxt<'a> {
XmlParserCtxt::new_sax_parser(Some(Box::new(make_sax_handler_only_comment())), None)
.unwrap()
}
fn do_test(docs: &[(&str, &str)]) {
for &(doc, res) in docs {
let mut ctxt = make_parser_context();
xml_ctxt_read_memory(&mut ctxt, doc.as_bytes(), None, None, 0);
let result = RESULT.with_borrow(|result| result.clone());
assert_eq!(result, res);
RESULT.with_borrow_mut(|result| result.clear());
}
}
#[test]
fn only_ascii_comment_test() {
const DOCUMENT_AND_RESULT: &[(&str, &str)] = &[
("<!--Hello, World!!!--><root/>", "Hello, World!!!"),
("<!--\n--><root/>", "\n"),
("<!--\r\r\n--><root/>", "\n\n"),
("<!--\r\r\r\r--><root/>", "\n\n\n\n"),
(
"<!--Hello\t,\rWorld\r\n!!! --><root/>",
"Hello\t,\nWorld\n!!! ",
),
];
do_test(DOCUMENT_AND_RESULT);
}
#[test]
fn complex_comment_test() {
const DOCUMENT_AND_RESULT: &[(&str, &str)] = &[
("<!--あいうえお--><root/>", "あいうえお"),
("<!--あいうえお\n--><root/>", "あいうえお\n"),
("<!--あい\r\r\nうえお--><root/>", "あい\n\nうえお"),
(
"<!--ai\r\nあい\r\nうえ\nueo\r\nお--><root/>",
"ai\nあい\nうえ\nueo\nお",
),
];
do_test(DOCUMENT_AND_RESULT);
}
#[test]
fn invalid_comment_test() {
const DOCUMENT_AND_RESULT: &[(&str, &str)] = &[
("<!--Hello,--World!!!--><root/>", ""),
("<!--あいう--えお--><root/>", ""),
("<!--Hello,World!!!---><root/>", ""),
("<!--あいうえお---><root/>", ""),
];
do_test(DOCUMENT_AND_RESULT);
}
}