use crate::{
ParserSpec,
error::XMLError,
sax::{error::fatal_error, handler::SAXHandler, parser::XMLReader, source::InputSource},
};
impl<'a, Spec: ParserSpec<Reader = InputSource<'a>>, H: SAXHandler> XMLReader<Spec, H> {
pub(crate) fn is_char(&self, c: char) -> bool {
self.version.is_char(c)
}
pub(crate) fn is_name_start_char(&self, c: char) -> bool {
self.version.is_name_start_char(c)
}
pub(crate) fn is_name_char(&self, c: char) -> bool {
self.version.is_name_char(c)
}
pub(crate) fn is_whitespace(&self, c: char) -> bool {
self.version.is_whitespace(c)
}
pub(crate) fn skip_whitespaces(&mut self) -> Result<usize, XMLError> {
let mut skipped = 0;
while let Some(w) = self.source.peek_char()? {
if !self.is_whitespace(w) {
break;
}
self.source.next_char()?;
match w {
'\x20' | '\t' => self.locator.update_column(|c| c + 1),
'\n' => {
self.locator.set_column(1);
self.locator.update_line(|l| l + 1);
}
'\r' => {
if self.source.peek_char()?.is_some_and(|c| c == '\n') {
self.source.next_char()?;
}
self.locator.set_column(1);
self.locator.update_line(|l| l + 1);
}
_ => unimplemented!(),
}
skipped += 1;
}
Ok(skipped)
}
pub(crate) fn parse_nmtoken(&mut self, buffer: &mut String) -> Result<(), XMLError> {
let orig = buffer.len();
while let Some(c) = self.source.next_char_if(|c| self.version.is_name_char(c))? {
buffer.push(c);
self.locator.update_column(|c| c + 1);
}
if buffer.len() == orig {
fatal_error!(self, ParserEmptyName, "Nmtoken is empty.");
return Err(XMLError::ParserEmptyName);
}
Ok(())
}
pub(crate) fn parse_name(&mut self, buffer: &mut String) -> Result<(), XMLError> {
let Some(c) = self
.source
.next_char_if(|c| self.version.is_name_start_char(c))?
else {
fatal_error!(self, ParserEmptyName, "Name is empty.");
return Err(XMLError::ParserEmptyName);
};
buffer.push(c);
self.locator.update_column(|c| c + 1);
while let Some(c) = self.source.next_char_if(|c| self.version.is_name_char(c))? {
buffer.push(c);
self.locator.update_column(|c| c + 1);
}
Ok(())
}
fn parse_ncname_allow_empty(&mut self, buffer: &mut String) -> Result<(), XMLError> {
if self.source.content_bytes().is_empty() {
self.source.grow()?;
}
let mut col = self.locator.column();
let content = self.source.content_bytes();
let rem = content
.iter()
.position(|b| !b.is_ascii_alphabetic() && *b != b'_')
.unwrap_or(content.len());
if rem > 0 {
buffer.push_str(unsafe {
std::str::from_utf8_unchecked(&content[..rem])
});
self.source.advance(rem);
col += rem;
} else {
let Some(c) = self
.source
.next_char_if(|c| self.version.is_name_start_char(c) && c != ':')?
else {
return Ok(());
};
buffer.push(c);
col += 1;
}
self.source.grow()?;
while let content = self.source.content_bytes()
&& !content.is_empty()
{
match content
.iter()
.position(|b| !b.is_ascii_alphanumeric() && !matches!(b, b'-' | b'.' | b'_'))
{
Some(rem) if rem > 0 => {
buffer.push_str(unsafe {
std::str::from_utf8_unchecked(&content[..rem])
});
let end = content[rem] < 0x80;
self.source.advance(rem);
col += rem;
if end {
break;
}
}
None => {
buffer.push_str(unsafe {
std::str::from_utf8_unchecked(content)
});
let len = content.len();
self.source.advance(len);
col += len;
}
_ => {}
}
if let Some(c) = self.source.peek_char()?
&& self.version.is_name_char(c)
&& c != ':'
{
buffer.push(c);
col += 1;
self.source.advance(c.len_utf8());
} else {
break;
}
self.source.grow()?;
}
self.locator.set_column(col);
Ok(())
}
pub(crate) fn parse_ncname(&mut self, buffer: &mut String) -> Result<(), XMLError> {
let orig = buffer.len();
self.parse_ncname_allow_empty(buffer)?;
if buffer.len() == orig {
fatal_error!(self, ParserEmptyName, "Name is empty.");
return Err(XMLError::ParserEmptyName);
}
Ok(())
}
pub(crate) fn parse_qname(&mut self, buffer: &mut String) -> Result<usize, XMLError> {
let orig = buffer.len();
self.parse_ncname_allow_empty(buffer)?;
if self.source.next_char_if(|c| c == ':')?.is_none() {
return if buffer.len() == orig {
fatal_error!(self, ParserEmptyQName, "QName is empty.");
Err(XMLError::ParserEmptyQName)
} else {
Ok(0)
};
};
if buffer.len() == orig {
fatal_error!(
self,
ParserEmptyQNamePrefix,
"':' is found in QName, but its prefix is empty."
);
}
let prefix = buffer.len() - orig;
buffer.push(':');
self.locator.update_column(|c| c + 1);
self.parse_ncname_allow_empty(buffer)?;
if buffer.len() == orig + prefix + 1 {
fatal_error!(
self,
ParserEmptyQNameLocalPart,
"':' is found in QName, but its local part is empty."
);
Err(XMLError::ParserEmptyQNameLocalPart)
} else if prefix == 0 {
Err(XMLError::ParserEmptyQNamePrefix)
} else {
Ok(prefix)
}
}
pub fn validate_nmtoken(&self, nmtoken: &str) -> Result<(), XMLError> {
if nmtoken.is_empty() {
return Err(XMLError::ParserEmptyNmtoken);
}
nmtoken
.chars()
.all(|c| self.is_name_char(c))
.then_some(())
.ok_or(XMLError::ParserInvalidNameChar)
}
pub fn validate_name(&self, name: &str) -> Result<(), XMLError> {
if name.is_empty() {
return Err(XMLError::ParserEmptyName);
}
name.strip_prefix(|c| self.is_name_start_char(c))
.ok_or(XMLError::ParserInvalidNameStartChar)?
.chars()
.all(|c| self.is_name_char(c))
.then_some(())
.ok_or(XMLError::ParserInvalidNameChar)
}
pub fn validate_ncname(&self, name: &str) -> Result<(), XMLError> {
if name.is_empty() {
return Err(XMLError::ParserEmptyNCName);
}
name.strip_prefix(|c| c != ':' && self.is_name_start_char(c))
.ok_or(XMLError::ParserInvalidNCNameStartChar)?
.chars()
.all(|c| c != ':' && self.is_name_char(c))
.then_some(())
.ok_or(XMLError::ParserInvalidNCNameChar)
}
pub fn validate_qname(&self, mut name: &str) -> Result<(), XMLError> {
if name.is_empty() {
return Err(XMLError::ParserEmptyQName);
}
if name.starts_with(':') {
return Err(XMLError::ParserEmptyQNamePrefix);
}
name = name
.strip_prefix(|c| self.is_name_start_char(c))
.ok_or(XMLError::ParserInvalidNCNameStartChar)?;
name = name.trim_start_matches(|c| c != ':' && self.is_name_char(c));
if name.is_empty() {
return Ok(());
}
name = name
.strip_prefix(|c| c == ':')
.ok_or(XMLError::ParserInvalidQNameSeparator)?;
if name.is_empty() {
return Err(XMLError::ParserEmptyQNameLocalPart);
}
self.validate_ncname(name)
}
pub fn validate_nmtokens(&self, nmtokens: &str) -> Result<(), XMLError> {
if nmtokens.is_empty() {
return Err(XMLError::ParserEmptyNmtokens);
}
self.validate_names(nmtokens, |nmtoken| self.validate_nmtoken(nmtoken))
}
pub fn validate_names(
&self,
names: &str,
name_validation: impl Fn(&str) -> Result<(), XMLError>,
) -> Result<(), XMLError> {
if names.is_empty() {
return Err(XMLError::ParserEmptyNames);
}
for name in names.split('\x20') {
name_validation(name)?;
}
Ok(())
}
}