use std::{borrow::Cow, collections::BTreeMap, io::Read, str::Chars, sync::Arc};
use crate::{
XML_NS_NAMESPACE, XML_XML_NAMESPACE, XMLVersion,
error::XMLError,
relaxng::{
RelaxNGParseHandler, RelaxNGSchema, XML_RELAX_NG_ANNOTATION_NAMESPACE,
XML_RELAX_NG_NAMESPACE,
},
sax::{
Attribute, Attributes, DOCUMENT_ENTITY_NAME, EntityResolver, InputSource, Locator,
NamespaceStack, SAXHandler,
},
uri::{URIStr, URIString},
xsdtypes::XML_SCHEMA_DATATYPES_NAMESPACE,
};
#[derive(Debug, Clone)]
pub enum RncParseError {
InvalidCharcter,
InvalidToken,
InvalidPrefix,
InvalidURI,
UnacceptablePrefix,
UnacceptableNamespaceName,
DuplicateDeclaration,
UnqualifiedName,
MultipleRootElement,
UnclosedParentheses,
UnclosedBlock,
UnclosedAnnotation,
UnexpectedToken,
UnexpectedEOF,
}
impl std::fmt::Display for RncParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self:?}")
}
}
impl std::error::Error for RncParseError {}
#[derive(Debug, PartialEq)]
enum TokenType {
Documentation(String),
LiteralSegment(String),
Ident(String),
Keyword(&'static str),
CName(String, String),
NsName(String),
Assign,
Choice,
ChoiceAssign,
Interleave,
InterleaveAssign,
Group,
OpenBlock,
CloseBlock,
OpenGroup,
CloseGroup,
ZeroOrMore,
OneOrMore,
Optional,
Except,
FollowAnnot,
OpenAnnot,
CloseAnnot,
LiteralSep,
}
const KEYWORD: &[&str] = &[
"attribute",
"datatypes",
"default",
"div",
"element",
"empty",
"external",
"grammar",
"include",
"inherit",
"list",
"mixed",
"namespace",
"notAllowed",
"parent",
"start",
"string",
"text",
"token",
];
const URI_INHERIT: &str = "\0";
fn newline_normalization(source: &mut InputSource) -> Result<String, XMLError> {
let mut buf = String::new();
while let Some(c) = source.next_char()? {
match c {
'\r' => {
source.next_char_if(|c| c == '\n')?;
buf.push('\0');
}
'\n' => buf.push('\0'),
c if !XMLVersion::XML10.is_char(c) => {
return Err(XMLError::RncParseError(RncParseError::InvalidCharcter));
}
_ => buf.push(c),
}
}
Ok(buf)
}
fn escape_interpretation(source: &str) -> Result<String, XMLError> {
let mut buf = String::new();
let mut source = source.chars();
while let Some(c) = source.next() {
if c != '\\' {
buf.push(c);
continue;
}
let mut s = source.as_str().chars();
while let Some(d) = s.next() {
match d {
'x' => {
}
'{' => {
let mut code = 0i32;
for d in s.by_ref() {
match d {
'0'..='9' => {
code = code
.saturating_mul(16)
.saturating_add((d as u8 - b'0') as i32)
}
'a'..='f' => {
code = code
.saturating_mul(16)
.saturating_add((d as u8 - b'a' + 10) as i32)
}
'A'..='F' => {
code = code
.saturating_mul(16)
.saturating_add((d as u8 - b'A' + 10) as i32)
}
'}' => break,
_ => {
code = -1;
break;
}
}
}
if code >= 0
&& let Some(c) = char::from_u32(code as u32)
{
if !XMLVersion::XML10.is_char(c) {
return Err(XMLError::RncParseError(RncParseError::InvalidCharcter));
}
buf.push(c);
source = s;
} else {
buf.push(c);
}
break;
}
_ => {
buf.push(c);
break;
}
}
}
}
Ok(buf)
}
fn tokenization(source: &str) -> Result<Vec<TokenType>, XMLError> {
let mut source = source.chars();
let mut tokens = vec![];
while let keep = source.as_str()
&& let Some(c) = source.next()
{
match c {
'\t' | '\x20' | '\n' | '\0' => {
}
'#' => {
if source.as_str().starts_with("#") {
source.next();
let mut dest = String::new();
documentation_line_content(&mut source, &mut dest);
while let Some(mut s) = source
.as_str()
.strip_prefix(['\0', '\n'])
.map(|s| s.chars())
{
indent(&mut s);
if let Some(s) = s.as_str().strip_prefix("##") {
source = s.chars();
dest.push('\n');
documentation_line_content(&mut source, &mut dest);
} else {
break;
}
}
tokens.push(TokenType::Documentation(dest));
} else {
rest_of_line(&mut source, &mut String::new());
}
}
'"' => {
let mut dest = String::new();
if let Some(s) = source.as_str().strip_prefix("\"\"") {
source = s.chars();
string_no_triple_quot(&mut source, &mut dest)?;
source = source.as_str().strip_prefix("\"\"\"").unwrap().chars();
} else {
string_no_quot(&mut source, &mut dest)?;
source.next();
}
tokens.push(TokenType::LiteralSegment(dest));
}
'\'' => {
let mut dest = String::new();
if let Some(s) = source.as_str().strip_prefix("''") {
source = s.chars();
string_no_triple_apos(&mut source, &mut dest)?;
source = source.as_str().strip_prefix("'''").unwrap().chars();
} else {
string_no_apos(&mut source, &mut dest)?;
source.next();
}
tokens.push(TokenType::LiteralSegment(dest));
}
'\\' => {
let mut dest = String::new();
ncname(&mut source, &mut dest)?;
tokens.push(TokenType::Ident(dest));
}
'=' => tokens.push(TokenType::Assign),
'|' => {
if source.as_str().starts_with('=') {
source.next();
tokens.push(TokenType::ChoiceAssign);
} else {
tokens.push(TokenType::Choice);
}
}
'&' => {
if source.as_str().starts_with('=') {
source.next();
tokens.push(TokenType::InterleaveAssign);
} else {
tokens.push(TokenType::Interleave);
}
}
',' => tokens.push(TokenType::Group),
'{' => tokens.push(TokenType::OpenBlock),
'}' => tokens.push(TokenType::CloseBlock),
'(' => tokens.push(TokenType::OpenGroup),
')' => tokens.push(TokenType::CloseGroup),
'*' => tokens.push(TokenType::ZeroOrMore),
'+' => tokens.push(TokenType::OneOrMore),
'?' => tokens.push(TokenType::Optional),
'-' => tokens.push(TokenType::Except),
'>' => {
if let Some(s) = source.as_str().strip_prefix('>') {
source = s.chars();
tokens.push(TokenType::FollowAnnot);
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidToken));
}
}
'[' => tokens.push(TokenType::OpenAnnot),
']' => tokens.push(TokenType::CloseAnnot),
'~' => tokens.push(TokenType::LiteralSep),
_ => {
let mut pre = String::new();
let mut keep = keep.chars();
ncname(&mut keep, &mut pre)?;
match keep.as_str().as_bytes() {
[b':', b'*', ..] => {
source = keep.as_str()[2..].chars();
tokens.push(TokenType::NsName(pre));
}
[b':', ..] => {
source = keep.as_str()[1..].chars();
let mut loc = String::new();
ncname(&mut source, &mut loc)?;
tokens.push(TokenType::CName(pre, loc));
}
_ => {
source = keep;
if let Ok(keyword) =
KEYWORD.binary_search(&pre.as_str()).map(|pos| KEYWORD[pos])
{
tokens.push(TokenType::Keyword(keyword));
} else {
tokens.push(TokenType::Ident(pre));
}
}
}
}
}
}
Ok(tokens)
}
fn string_no_quot(source: &mut Chars, dest: &mut String) -> Result<(), XMLError> {
match source.as_str().chars().next() {
Some('"') => {
Ok(())
}
Some('\0') | None => Err(XMLError::RncParseError(RncParseError::InvalidToken)),
Some(c) => {
dest.push(c);
source.next();
string_no_quot(source, dest)
}
}
}
fn string_no_apos(source: &mut Chars, dest: &mut String) -> Result<(), XMLError> {
match source.as_str().chars().next() {
Some('\'') => {
Ok(())
}
Some('\0') | None => Err(XMLError::RncParseError(RncParseError::InvalidToken)),
Some(c) => {
dest.push(c);
source.next();
string_no_apos(source, dest)
}
}
}
fn string_no_triple_quot(source: &mut Chars, dest: &mut String) -> Result<(), XMLError> {
if source.as_str().starts_with("\"\"\"") {
return Ok(());
}
if let Some(c) = source.next() {
dest.push(c);
string_no_triple_quot(source, dest)
} else {
Err(XMLError::RncParseError(RncParseError::UnexpectedEOF))
}
}
fn string_no_triple_apos(source: &mut Chars, dest: &mut String) -> Result<(), XMLError> {
if source.as_str().starts_with("'''") {
return Ok(());
}
if let Some(c) = source.next() {
dest.push(c);
string_no_triple_apos(source, dest)
} else {
Err(XMLError::RncParseError(RncParseError::UnexpectedEOF))
}
}
fn indent(source: &mut Chars) {
while source.as_str().starts_with(['\t', '\x20']) {
source.next();
}
}
fn documentation_line_content(source: &mut Chars, dest: &mut String) {
match source.as_str().chars().next() {
Some('#') => {
source.next();
documentation_line_content(source, dest);
}
Some(' ') => {
source.next();
rest_of_line(source, dest);
}
Some('\0' | '\n') => {}
Some(c) => {
dest.push(c);
source.next();
rest_of_line(source, dest);
}
None => {}
}
}
fn rest_of_line(source: &mut Chars, dest: &mut String) {
match source.as_str().chars().next() {
Some('\0' | '\n') => {}
Some(c) => {
dest.push(c);
source.next();
rest_of_line(source, dest);
}
None => {}
}
}
fn ncname(source: &mut Chars, dest: &mut String) -> Result<(), XMLError> {
let mut s = source.as_str().chars();
match s.next() {
Some(':') => return Err(XMLError::RncParseError(RncParseError::InvalidToken)),
Some(c) if XMLVersion::XML10.is_name_start_char(c) => dest.push(c),
Some(_) | None => return Err(XMLError::RncParseError(RncParseError::InvalidToken)),
}
for c in s {
if c != ':' && XMLVersion::XML10.is_name_char(c) {
dest.push(c);
} else {
break;
}
}
*source = source.as_str()[dest.len()..].chars();
Ok(())
}
#[derive(Debug)]
enum NodeType {
AnyName,
Attribute,
Choice,
Data,
Define,
Div,
Element,
Empty,
Except,
ExternalRef,
Grammar,
Group,
Include,
Interleave,
List,
Mixed,
Name,
NotAllowed,
NsName,
OneOrMore,
Optional,
Param,
ParentRef,
Ref,
Start,
Text,
Value,
ZeroOrMore,
Documentation,
TextContent(String),
Foreign(Option<String>, String),
}
impl NodeType {
fn local_name(&self) -> Option<&str> {
match self {
Self::AnyName => Some("anyName"),
Self::Attribute => Some("attribute"),
Self::Choice => Some("choice"),
Self::Data => Some("data"),
Self::Define => Some("define"),
Self::Div => Some("div"),
Self::Documentation => Some("documentation"),
Self::Element => Some("element"),
Self::Empty => Some("empty"),
Self::Except => Some("except"),
Self::ExternalRef => Some("externalRef"),
Self::Foreign(_, loc) => Some(loc),
Self::Grammar => Some("grammar"),
Self::Group => Some("group"),
Self::Include => Some("include"),
Self::Interleave => Some("interleave"),
Self::List => Some("list"),
Self::Mixed => Some("mixed"),
Self::Name => Some("name"),
Self::NotAllowed => Some("notAllowed"),
Self::NsName => Some("nsName"),
Self::OneOrMore => Some("oneOrMore"),
Self::Optional => Some("optional"),
Self::Param => Some("param"),
Self::ParentRef => Some("parentRef"),
Self::Ref => Some("ref"),
Self::Start => Some("start"),
Self::Text => Some("text"),
Self::TextContent(_) => None,
Self::Value => Some("value"),
Self::ZeroOrMore => Some("zeroOrMore"),
}
}
fn qname(&self, rmap: &BTreeMap<Arc<str>, Arc<str>>) -> Option<Cow<'_, str>> {
match self {
Self::Foreign(ns, loc) => {
if let Some(ns) = ns.as_deref() {
let prefix = rmap.get(ns)?;
if !prefix.is_empty() {
Some(Cow::Owned(format!("{prefix}:{loc}")))
} else {
Some(Cow::Borrowed(loc))
}
} else {
Some(Cow::Borrowed(loc))
}
}
Self::TextContent(_) => None,
Self::Documentation => {
let nsname = rmap.get(XML_RELAX_NG_ANNOTATION_NAMESPACE)?;
let local = self.local_name()?;
if nsname.is_empty() {
Some(Cow::Borrowed(local))
} else {
Some(Cow::Owned(format!("{nsname}:{local}")))
}
}
_ => {
let nsname = rmap.get(XML_RELAX_NG_NAMESPACE)?;
let local = self.local_name()?;
if nsname.is_empty() {
Some(Cow::Borrowed(local))
} else {
Some(Cow::Owned(format!("{nsname}:{local}")))
}
}
}
}
fn namespace_name(&self) -> Option<&str> {
match self {
Self::Foreign(ns, _) => ns.as_deref(),
Self::TextContent(_) => None,
Self::Documentation => Some(XML_RELAX_NG_ANNOTATION_NAMESPACE),
_ => Some(XML_RELAX_NG_NAMESPACE),
}
}
}
struct Node {
r#type: NodeType,
atts: Attributes,
children: Vec<usize>,
}
impl Node {
fn push_attribute(&mut self, mut att: Attribute) -> Result<(), XMLError> {
att.set_specified();
if att.namespace_name.as_deref() == Some(XML_NS_NAMESPACE) {
att.set_nsdecl();
}
self.atts.push(att).map_err(|err| err.1)?;
Ok(())
}
}
fn parse(
mut source: &[TokenType],
base_uri: &URIStr,
) -> Result<(usize, Vec<Node>, NamespaceStack), XMLError> {
let mut ns_stack = NamespaceStack::default();
let mut dt_stack = NamespaceStack::default();
parse_preamble(&mut source, &mut ns_stack, &mut dt_stack)?;
let mut tree = vec![];
let root = parse_top_level_body(&mut source, base_uri, &mut tree, &ns_stack, &dt_stack)?;
if !source.is_empty() {
return Err(XMLError::RncParseError(RncParseError::UnexpectedToken));
}
let mut remove = vec![];
for ns in &ns_stack {
if !ns.namespace_name.is_empty()
&& ns.namespace_name.as_ref() != URI_INHERIT
&& ns.prefix.as_ref() != "xml"
{
let mut att = Attribute {
namespace_name: Some(XML_NS_NAMESPACE.to_owned()),
local_name: if ns.prefix.is_empty() {
Some("xmlns".to_owned())
} else {
Some(ns.prefix.to_string())
},
qname: if ns.prefix.is_empty() {
"xmlns".to_owned()
} else {
format!("xmlns:{}", ns.prefix)
},
value: ns.namespace_name.to_string(),
flag: 0,
};
att.set_nsdecl();
tree[root].push_attribute(att)?;
} else {
remove.push(ns.prefix.clone());
}
}
if ns_stack
.iter()
.all(|ns| ns.namespace_name.as_ref() != XML_RELAX_NG_NAMESPACE)
{
set_relaxng_namespace(
&mut tree[root],
&mut ns_stack,
"rng",
XML_RELAX_NG_NAMESPACE,
)?;
}
if tree
.iter()
.any(|node| matches!(node.r#type, NodeType::Documentation))
{
set_relaxng_namespace(
&mut tree[root],
&mut ns_stack,
"a",
XML_RELAX_NG_ANNOTATION_NAMESPACE,
)?;
}
if ns_stack.is_declared("") {
for node in tree
.iter_mut()
.filter(|n| matches!(n.r#type, NodeType::Foreign(None, _)))
{
node.push_attribute(Attribute {
namespace_name: Some(XML_NS_NAMESPACE.to_owned()),
local_name: Some("xmlns".to_owned()),
qname: "xmlns".to_owned(),
value: "".to_owned(),
flag: 0,
})?;
}
}
Ok((root, tree, ns_stack))
}
fn set_relaxng_namespace(
root: &mut Node,
ns: &mut NamespaceStack,
pre: &str,
name: &str,
) -> Result<(), XMLError> {
if !ns.is_declared("") {
let att = Attribute {
namespace_name: Some(XML_NS_NAMESPACE.to_owned()),
local_name: Some("xmlns".to_owned()),
qname: "xmlns".to_owned(),
value: name.to_owned(),
flag: 0,
};
ns.push("", name);
root.push_attribute(att)?;
} else {
let mut p = pre.to_owned();
let mut cnt = 0;
while ns.is_declared(&p) {
p = format!("{pre}{cnt}");
cnt += 1;
}
let att = Attribute {
namespace_name: Some(XML_NS_NAMESPACE.to_owned()),
local_name: Some(p.clone()),
qname: format!("xmlns:{p}"),
value: name.to_owned(),
flag: 0,
};
ns.push(&p, name);
root.push_attribute(att)?;
}
Ok(())
}
fn parse_preamble(
source: &mut &[TokenType],
ns: &mut NamespaceStack,
dt: &mut NamespaceStack,
) -> Result<(), XMLError> {
use TokenType::*;
macro_rules! ns_prefix {
( $prefix:expr, $namespace_name:expr ) => {
if ($prefix == "xml" && $namespace_name != XML_XML_NAMESPACE)
|| ($namespace_name == XML_XML_NAMESPACE && $prefix != "xml")
{
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
if $prefix == "xmlns" {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
};
}
macro_rules! datatype_prefix {
( $prefix:expr, $namespace_name:expr ) => {
if $prefix == "xsd" && $namespace_name != XML_SCHEMA_DATATYPES_NAMESPACE {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
if URIString::parse($namespace_name).is_err() {
return Err(XMLError::RncParseError(RncParseError::InvalidURI));
}
};
}
loop {
match source {
[
Keyword("namespace"),
Ident(pre),
Assign,
Keyword("inherit"),
rest @ ..,
] => {
ns_prefix!(pre, URI_INHERIT);
*source = rest;
set_namespace(ns, pre, URI_INHERIT)?;
}
[Keyword("namespace"), Ident(pre), Assign, rest @ ..] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
ns_prefix!(pre, value);
set_namespace(ns, pre, &value)?;
}
[
Keyword("namespace"),
Keyword(keyword),
Assign,
Keyword("inherit"),
rest @ ..,
] => {
ns_prefix!(*keyword, URI_INHERIT);
*source = rest;
set_namespace(ns, keyword, URI_INHERIT)?;
}
[Keyword("namespace"), Keyword(keyword), Assign, rest @ ..] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
ns_prefix!(*keyword, value);
set_namespace(ns, keyword, &value)?;
}
[
Keyword("default"),
Keyword("namespace"),
Assign,
Keyword("inherit"),
rest @ ..,
] => {
*source = rest;
set_namespace(ns, "", URI_INHERIT)?;
}
[Keyword("default"), Keyword("namespace"), Assign, rest @ ..] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
ns_prefix!("", value);
set_namespace(ns, "", &value)?;
}
[
Keyword("default"),
Keyword("namespace"),
Ident(pre),
Assign,
Keyword("inherit"),
rest @ ..,
] => {
ns_prefix!(pre, URI_INHERIT);
*source = rest;
set_namespace(ns, pre, URI_INHERIT)?;
set_namespace(ns, "", URI_INHERIT)?;
}
[
Keyword("default"),
Keyword("namespace"),
Keyword(pre),
Assign,
Keyword("inherit"),
rest @ ..,
] => {
ns_prefix!(*pre, URI_INHERIT);
*source = rest;
set_namespace(ns, pre, URI_INHERIT)?;
set_namespace(ns, "", URI_INHERIT)?;
}
[
Keyword("default"),
Keyword("namespace"),
Ident(pre),
Assign,
rest @ ..,
] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
ns_prefix!(pre, value);
ns_prefix!("", value);
set_namespace(ns, pre, &value)?;
set_namespace(ns, "", &value)?;
}
[
Keyword("default"),
Keyword("namespace"),
Keyword(pre),
Assign,
rest @ ..,
] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
ns_prefix!(*pre, value);
ns_prefix!("", value);
set_namespace(ns, pre, &value)?;
set_namespace(ns, "", &value)?;
}
[Keyword("datatypes"), Ident(pre), Assign, rest @ ..] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
datatype_prefix!(pre, &value);
set_namespace(dt, pre, &value)?;
}
[Keyword("datatypes"), Keyword(pre), Assign, rest @ ..] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
datatype_prefix!(*pre, &value);
set_namespace(dt, pre, &value)?;
}
_ => break Ok(()),
}
}
}
fn set_namespace(
ns: &mut NamespaceStack,
prefix: &str,
namespace_name: &str,
) -> Result<(), XMLError> {
if ns.is_declared(prefix) {
return Err(XMLError::RncParseError(RncParseError::DuplicateDeclaration));
}
ns.push(prefix, namespace_name);
Ok(())
}
fn parse_top_level_body(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
let mut tmp = *source;
parse_annotations(&mut tmp, &mut vec![], ns)?;
let node = match tmp {
[Keyword("start" | "include" | "div"), ..]
| [Ident(_), Assign | ChoiceAssign | InterleaveAssign, ..]
| [Ident(_), CName(_, _), OpenAnnot, ..] => {
let children = parse_grammar(source, base_uri, tree, ns, dt)?;
Node {
r#type: NodeType::Grammar,
atts: Attributes::default(),
children,
}
}
_ => {
let ret = parse_pattern(source, base_uri, tree, ns, dt)?;
return if ret.len() != 1 {
Err(XMLError::RncParseError(RncParseError::MultipleRootElement))
} else {
Ok(ret[0])
};
}
};
let ret = tree.len();
tree.push(node);
Ok(ret)
}
fn parse_grammar(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
let mut ret = vec![];
while !matches!(source, [CloseBlock, ..] | []) {
let ch = parse_member(source, base_uri, tree, ns, dt)?;
ret.push(ch);
}
Ok(ret)
}
fn parse_member(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
match source {
[Ident(_) | CName(_, _), OpenAnnot, ..] => {
parse_annotation_element_not_keyword(source, tree, ns)
}
_ => parse_annotated_component(source, base_uri, tree, ns, dt),
}
}
fn parse_annotated_component(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
let annot = parse_annotations(source, tree, ns)?;
let y = parse_component(source, base_uri, tree, ns, dt)?;
apply_annotations(tree, &mut vec![y], annot)?;
Ok(y)
}
fn parse_annotation_element_not_keyword(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
let r#type = match source {
[Ident(ident), ..] => NodeType::Foreign(None, ident.clone()),
[CName(pre, loc), ..] => {
if let Some(namespace) = ns.get(pre) {
if namespace.namespace_name.as_ref() == XML_RELAX_NG_NAMESPACE {
return Err(XMLError::RncParseError(
RncParseError::UnacceptableNamespaceName,
));
}
NodeType::Foreign(Some(namespace.namespace_name.to_string()), loc.clone())
} else {
NodeType::Foreign(None, loc.clone())
}
}
_ => return Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
};
*source = &source[1..];
let (atts, children) = parse_annotation_attributes_content(source, tree, ns)?;
let ret = tree.len();
tree.push(Node {
r#type,
atts,
children,
});
Ok(ret)
}
fn parse_annotations(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<Option<(Attributes, Vec<usize>)>, XMLError> {
let mut doc = parse_documentations(source, tree)?;
let (atts, children) = if matches!(source, [TokenType::OpenAnnot, ..]) {
*source = &source[1..];
let atts = parse_annotation_attributes(source, ns)?;
let children = parse_annotation_elements(source, tree, ns)?;
doc.extend(children);
if !matches!(source, [TokenType::CloseAnnot, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedAnnotation));
}
*source = &source[1..];
(atts, doc)
} else {
(Attributes::default(), doc)
};
if atts.is_empty() && children.is_empty() {
Ok(None)
} else {
Ok(Some((atts, children)))
}
}
fn parse_component(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
match source {
[Keyword("start"), ..] => parse_start(source, base_uri, tree, ns, dt),
[Keyword("include"), ..] => parse_include(source, base_uri, tree, ns, dt),
[Keyword("div"), ..] => parse_div(source, base_uri, tree, ns, dt),
[Ident(_), ..] => parse_define(source, base_uri, tree, ns, dt),
_ => Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
}
}
fn parse_start(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
assert!(matches!(source, [Keyword("start"), ..]));
*source = &source[1..];
let op = parse_assign_op(source)?;
let children = parse_pattern(source, base_uri, tree, ns, dt)?;
let mut node = Node {
r#type: NodeType::Start,
atts: Attributes::default(),
children,
};
if let Some(op) = op {
node.push_attribute(op)?;
}
let ret = tree.len();
tree.push(node);
Ok(ret)
}
fn parse_define(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
let Some(TokenType::Ident(ident)) = source.first() else {
unreachable!("internal error")
};
let value = ident.clone();
*source = &source[1..];
let op = parse_assign_op(source)?;
let children = parse_pattern(source, base_uri, tree, ns, dt)?;
let mut node = Node {
r#type: NodeType::Define,
atts: Attributes::default(),
children,
};
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("name".to_owned()),
qname: "name".to_owned(),
value,
flag: 0,
})?;
if let Some(op) = op {
node.push_attribute(op)?;
}
let ret = tree.len();
tree.push(node);
Ok(ret)
}
fn parse_include(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
assert!(matches!(source, [Keyword("include"), ..]));
*source = &source[1..];
let uri = base_uri.resolve(&parse_any_uri_literal(source)?);
let inherit = parse_opt_inherit(source, ns)?;
let children = parse_opt_include_body(source, base_uri, tree, ns, dt)?;
let mut node = Node {
r#type: NodeType::Include,
atts: Attributes::default(),
children,
};
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("href".to_owned()),
qname: "href".to_owned(),
value: uri.to_string(),
flag: 0,
})?;
if let Some(inherit) = inherit {
node.push_attribute(inherit)?;
}
let ret = tree.len();
tree.push(node);
Ok(ret)
}
fn parse_div(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
assert!(matches!(source, [Keyword("div"), ..]));
*source = &source[1..];
if !matches!(source, [OpenBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnexpectedToken));
}
*source = &source[1..];
let children = parse_grammar(source, base_uri, tree, ns, dt)?;
let ret = tree.len();
tree.push(Node {
r#type: NodeType::Div,
atts: Attributes::default(),
children,
});
Ok(ret)
}
fn parse_assign_op(source: &mut &[TokenType]) -> Result<Option<Attribute>, XMLError> {
use TokenType::*;
let op = match source {
[Assign, ..] => Ok(None),
[ChoiceAssign, ..] => Ok(Some(Attribute {
namespace_name: None,
local_name: Some("combine".to_owned()),
qname: "combine".to_owned(),
value: "choice".to_owned(),
flag: 0,
})),
[InterleaveAssign, ..] => Ok(Some(Attribute {
namespace_name: None,
local_name: Some("combine".to_owned()),
qname: "combine".to_owned(),
value: "interleave".to_owned(),
flag: 0,
})),
_ => return Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
};
*source = &source[1..];
op
}
fn parse_any_uri_literal(source: &mut &[TokenType]) -> Result<URIString, XMLError> {
let mut uri = String::new();
parse_literal(source, &mut uri)?;
match URIString::parse(uri) {
Ok(uri) => Ok(uri),
Err(_) => Err(XMLError::RncParseError(RncParseError::InvalidURI)),
}
}
fn parse_opt_inherit(
source: &mut &[TokenType],
ns: &NamespaceStack,
) -> Result<Option<Attribute>, XMLError> {
use TokenType::*;
match source {
[Keyword("inherit"), Assign, Ident(prefix), ..] => Ok(ns
.get(prefix)
.filter(|ns| URI_INHERIT != ns.namespace_name.as_ref())
.map(|ns| Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: ns.namespace_name.to_string(),
flag: 0,
})),
[Keyword("inherit"), Assign, Keyword(prefix), ..] => Ok(ns
.get(prefix)
.filter(|ns| URI_INHERIT != ns.namespace_name.as_ref())
.map(|ns| Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: ns.namespace_name.to_string(),
flag: 0,
})),
_ => Ok(ns.get("").map(|ns| Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: ns.namespace_name.to_string(),
flag: 0,
})),
}
}
fn parse_opt_include_body(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
match source {
[TokenType::OpenBlock, ..] => {
*source = &source[1..];
let ret = parse_include_body(source, base_uri, tree, ns, dt)?;
if !matches!(source, [TokenType::CloseBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedBlock));
}
Ok(ret)
}
_ => Ok(vec![]),
}
}
fn parse_literal(source: &mut &[TokenType], dest: &mut String) -> Result<(), XMLError> {
use TokenType::*;
match source {
[LiteralSegment(seg), LiteralSep, rest @ ..] => {
dest.push_str(seg.as_str());
*source = rest;
parse_literal(source, dest)
}
[LiteralSegment(seg), rest @ ..] => {
dest.push_str(seg.as_str());
*source = rest;
Ok(())
}
_ => Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
}
}
fn parse_include_body(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
let mut ret = vec![];
while !matches!(source, [TokenType::CloseBlock, ..] | []) {
let ch = parse_include_member(source, base_uri, tree, ns, dt)?;
ret.push(ch);
}
Ok(ret)
}
fn parse_include_member(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
match source {
[Ident(_) | CName(_, _), OpenAnnot, ..] => {
parse_annotation_element_not_keyword(source, tree, ns)
}
_ => parse_annotated_include_component(source, base_uri, tree, ns, dt),
}
}
fn parse_annotated_include_component(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
let annot = parse_annotations(source, tree, ns)?;
let y = parse_include_component(source, base_uri, tree, ns, dt)?;
apply_annotations(tree, &mut vec![y], annot)?;
Ok(y)
}
fn parse_include_component(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
match source {
[Keyword("start"), ..] => parse_start(source, base_uri, tree, ns, dt),
[Keyword("div"), ..] => parse_include_div(source, base_uri, tree, ns, dt),
_ => parse_define(source, base_uri, tree, ns, dt),
}
}
fn parse_include_div(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
assert!(matches!(source, [Keyword("div"), ..]));
*source = &source[1..];
if !matches!(source, [OpenBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnexpectedToken));
}
*source = &source[1..];
let children = parse_include_body(source, base_uri, tree, ns, dt)?;
if !matches!(source, [CloseBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedBlock));
}
*source = &source[1..];
let ret = tree.len();
tree.push(Node {
r#type: NodeType::Div,
atts: Attributes::default(),
children,
});
Ok(ret)
}
fn parse_pattern(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
parse_inner_pattern(source, base_uri, tree, ns, dt, None)
}
fn parse_inner_pattern(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
anno: Option<(Attributes, Vec<usize>)>,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
let x = parse_annotations(source, tree, ns)?;
let mut children = match source {
[Keyword("string" | "token") | CName(_, _), OpenBlock, ..] => {
let data = parse_primary(source, base_uri, tree, ns, dt)?;
if matches!(source, [Except, ..]) {
*source = &source[1..];
let except = parse_lead_annotated_primary(source, base_uri, tree, ns, dt)?;
let pos = tree.len();
tree[data].children.push(pos);
tree.push(Node {
r#type: NodeType::Except,
atts: Attributes::default(),
children: except,
});
apply_annotations(tree, &mut vec![data], x)?;
let mut follow = parse_follow_annotations(source, tree, ns)?;
follow.insert(0, data);
return apply_annotations_group(tree, follow, anno);
} else {
let mut data = vec![data];
apply_annotations(tree, &mut data, x)?;
data
}
}
[OpenGroup, ..] => {
*source = &source[1..];
let ret = parse_inner_pattern(source, base_uri, tree, ns, dt, x)?;
if !matches!(source, [CloseGroup, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedParentheses));
}
*source = &source[1..];
ret
}
_ => {
let mut prim = vec![parse_primary(source, base_uri, tree, ns, dt)?];
apply_annotations(tree, &mut prim, x)?;
prim
}
};
let follow = parse_follow_annotations(source, tree, ns)?;
children.extend(follow);
if let [ty @ (ZeroOrMore | OneOrMore | Optional), ..] = source {
*source = &source[1..];
let node = Node {
r#type: match ty {
ZeroOrMore => NodeType::ZeroOrMore,
OneOrMore => NodeType::OneOrMore,
Optional => NodeType::Optional,
_ => unreachable!(),
},
atts: Attributes::default(),
children,
};
let mut follow = parse_follow_annotations(source, tree, ns)?;
if !matches!(source, [Choice | Group | Interleave, ..]) {
let ret = tree.len();
follow.insert(0, ret);
tree.push(node);
apply_annotations(tree, &mut vec![ret], anno)?;
return Ok(follow);
}
follow.insert(0, tree.len());
tree.push(node);
children = follow;
}
let node = match source {
[ty @ (Choice | Group | Interleave), ..] => {
while source.first() == Some(ty) {
*source = &source[1..];
let append = parse_particle(source, base_uri, tree, ns, dt)?;
children.extend(append);
}
Node {
r#type: match ty {
Choice => NodeType::Choice,
Group => NodeType::Group,
Interleave => NodeType::Interleave,
_ => unreachable!(),
},
atts: Attributes::default(),
children,
}
}
_ => {
return apply_annotations_group(tree, children, anno);
}
};
let mut ret = vec![tree.len()];
tree.push(node);
apply_annotations(tree, &mut ret, anno)?;
Ok(ret)
}
fn parse_particle(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
parse_inner_particle(source, base_uri, tree, ns, dt, None)
}
fn parse_inner_particle(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
anno: Option<(Attributes, Vec<usize>)>,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
let prim = parse_annotated_primary(source, base_uri, tree, ns, dt)?;
let node = match source {
[ty @ (ZeroOrMore | OneOrMore | Optional), ..] => {
*source = &source[1..];
Node {
r#type: match ty {
ZeroOrMore => NodeType::ZeroOrMore,
OneOrMore => NodeType::OneOrMore,
Optional => NodeType::Optional,
_ => unreachable!(),
},
atts: Attributes::default(),
children: prim,
}
}
_ => {
if anno.is_some() {
Node {
r#type: NodeType::Group,
atts: Attributes::default(),
children: prim,
}
} else {
return Ok(prim);
}
}
};
let mut ret = vec![tree.len()];
tree.push(node);
apply_annotations(tree, &mut ret, anno)?;
ret.extend(parse_follow_annotations(source, tree, ns)?);
Ok(ret)
}
fn parse_annotated_primary(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
Ok([
parse_lead_annotated_primary(source, base_uri, tree, ns, dt)?,
parse_follow_annotations(source, tree, ns)?,
]
.concat())
}
fn parse_follow_annotations(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
match source {
[TokenType::FollowAnnot, ..] => {
*source = &source[1..];
let mut elems = parse_annotation_elements(source, tree, ns)?;
elems.extend(parse_follow_annotations(source, tree, ns)?);
Ok(elems)
}
_ => Ok(vec![]),
}
}
fn parse_lead_annotated_primary(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
let annot = parse_annotations(source, tree, ns)?;
match source {
[TokenType::OpenGroup, ..] => {
*source = &source[1..];
let pat = parse_inner_pattern(source, base_uri, tree, ns, dt, annot)?;
if !matches!(source, [TokenType::CloseGroup, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedParentheses));
}
*source = &source[1..];
Ok(pat)
}
_ => {
let mut prim = vec![parse_primary(source, base_uri, tree, ns, dt)?];
apply_annotations(tree, &mut prim, annot)?;
Ok(prim)
}
}
}
fn parse_primary(
source: &mut &[TokenType],
base_uri: &URIStr,
tree: &mut Vec<Node>,
ns: &NamespaceStack,
dt: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
let node = match source {
[Keyword(e @ ("element" | "attribute")), ..] => {
let is_elem = *e == "element";
*source = &source[1..];
let mut nc = parse_name_class(source, tree, ns, is_elem)?;
if !matches!(source, [OpenBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnexpectedToken));
}
*source = &source[1..];
let pat = parse_pattern(source, base_uri, tree, ns, dt)?;
if !matches!(source, [CloseBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedBlock));
}
*source = &source[1..];
nc.extend(pat);
Node {
r#type: if is_elem {
NodeType::Element
} else {
NodeType::Attribute
},
atts: Attributes::default(),
children: nc,
}
}
[Keyword(e @ ("mixed" | "list")), ..] => {
*source = &source[1..];
if !matches!(source, [OpenBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnexpectedToken));
}
*source = &source[1..];
let children = parse_pattern(source, base_uri, tree, ns, dt)?;
if !matches!(source, [CloseBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedBlock));
}
*source = &source[1..];
Node {
r#type: if *e == "mixed" {
NodeType::Mixed
} else {
NodeType::List
},
atts: Attributes::default(),
children,
}
}
[Keyword("empty"), ..] => {
*source = &source[1..];
Node {
r#type: NodeType::Empty,
atts: Attributes::default(),
children: vec![],
}
}
[Keyword("notAllowed"), ..] => {
*source = &source[1..];
Node {
r#type: NodeType::NotAllowed,
atts: Attributes::default(),
children: vec![],
}
}
[Keyword("text"), ..] => {
*source = &source[1..];
Node {
r#type: NodeType::Text,
atts: Attributes::default(),
children: vec![],
}
}
[Keyword("parent"), Ident(ident), ..] => {
let mut node = Node {
r#type: NodeType::ParentRef,
atts: Attributes::default(),
children: vec![],
};
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("name".to_owned()),
qname: "name".to_owned(),
value: ident.clone(),
flag: 0,
})?;
*source = &source[2..];
node
}
[Keyword("grammar"), ..] => {
*source = &source[1..];
if !matches!(source, [OpenBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnexpectedToken));
}
*source = &source[1..];
let children = parse_grammar(source, base_uri, tree, ns, dt)?;
if !matches!(source, [CloseBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedBlock));
}
*source = &source[1..];
Node {
r#type: NodeType::Grammar,
atts: Attributes::default(),
children,
}
}
[Keyword("external"), ..] => {
*source = &source[1..];
let uri = base_uri.resolve(&parse_any_uri_literal(source)?);
let inherit = parse_opt_inherit(source, ns)?;
let mut node = Node {
r#type: NodeType::ExternalRef,
atts: Attributes::default(),
children: vec![],
};
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("href".to_owned()),
qname: "href".to_owned(),
value: uri.to_string(),
flag: 0,
})?;
if let Some(inherit) = inherit {
node.push_attribute(inherit)?;
}
node
}
[Keyword("string" | "token"), next, ..] | [CName(_, _), next, ..] => {
let (lib, ty) = parse_datatype_name(source, dt)?;
let (r#type, children) = if matches!(next, LiteralSegment(_)) {
*source = &source[1..];
let mut value = String::new();
parse_literal(source, &mut value)?;
let ret = tree.len();
tree.push(Node {
r#type: NodeType::TextContent(value),
atts: Attributes::default(),
children: vec![],
});
(NodeType::Value, vec![ret])
} else {
*source = &source[1..];
(NodeType::Data, parse_opt_params(source, tree, ns)?)
};
let mut node = Node {
r#type,
atts: Attributes::default(),
children,
};
node.push_attribute(lib)?;
node.push_attribute(ty)?;
node
}
[LiteralSegment(_), ..] => {
let mut value = String::new();
parse_literal(source, &mut value)?;
let text = tree.len();
tree.push(Node {
r#type: NodeType::TextContent(value),
atts: Attributes::default(),
children: vec![],
});
Node {
r#type: NodeType::Value,
atts: Attributes::default(),
children: vec![text],
}
}
[Ident(ident), ..] => {
let mut node = Node {
r#type: NodeType::Ref,
atts: Attributes::default(),
children: vec![],
};
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("name".to_owned()),
qname: "name".to_owned(),
value: ident.clone(),
flag: 0,
})?;
*source = &source[1..];
node
}
_ => return Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
};
let ret = tree.len();
tree.push(node);
Ok(ret)
}
fn parse_datatype_name(
source: &mut &[TokenType],
dt: &NamespaceStack,
) -> Result<(Attribute, Attribute), XMLError> {
match source {
[TokenType::CName(pre, loc), ..] => {
if let Some(namespace) = dt.get(pre) {
Ok((
Attribute {
namespace_name: None,
local_name: Some("datatypeLibrary".to_owned()),
qname: "datatypeLibrary".to_owned(),
value: namespace.namespace_name.to_string(),
flag: 0,
},
Attribute {
namespace_name: None,
local_name: Some("type".to_owned()),
qname: "type".to_owned(),
value: loc.clone(),
flag: 0,
},
))
} else {
Err(XMLError::RncParseError(RncParseError::InvalidPrefix))
}
}
[TokenType::Keyword(k @ ("string" | "token")), ..] => Ok((
Attribute {
namespace_name: None,
local_name: Some("datatypeLibrary".to_owned()),
qname: "datatypeLibrary".to_owned(),
value: "".to_owned(),
flag: 0,
},
Attribute {
namespace_name: None,
local_name: Some("type".to_owned()),
qname: "type".to_owned(),
value: k.to_string(),
flag: 0,
},
)),
_ => Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
}
}
fn parse_opt_params(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
match source {
[OpenBlock, ..] => {
*source = &source[1..];
let mut ret = vec![];
while !matches!(source, [CloseBlock, ..] | []) {
let annot = parse_annotations(source, tree, ns)?;
let name = match source {
[Ident(ident), Assign, ..] => ident.clone(),
[Keyword(keyword), Assign, ..] => keyword.to_string(),
_ => return Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
};
*source = &source[2..];
let mut value = String::new();
parse_literal(source, &mut value)?;
let text = tree.len();
tree.push(Node {
r#type: NodeType::TextContent(value),
atts: Attributes::default(),
children: vec![],
});
let mut n = vec![tree.len()];
let mut node = Node {
r#type: NodeType::Param,
atts: Attributes::default(),
children: vec![text],
};
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("name".to_owned()),
qname: "name".to_owned(),
value: name,
flag: 0,
})?;
tree.push(node);
apply_annotations(tree, &mut n, annot)?;
ret.extend(n);
}
if !matches!(source, [CloseBlock, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedBlock));
}
*source = &source[1..];
Ok(ret)
}
_ => Ok(vec![]),
}
}
fn parse_name_class(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
is_elem: bool,
) -> Result<Vec<usize>, XMLError> {
parse_inner_name_class(source, tree, ns, is_elem, None)
}
fn parse_inner_name_class(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
is_elem: bool,
anno: Option<(Attributes, Vec<usize>)>,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
let x = parse_annotations(source, tree, ns)?;
let mut nc = match source {
[NsName(_) | ZeroOrMore, Except, ..] => {
let mut nc = vec![parse_except_name_class(source, tree, ns, is_elem)?];
apply_annotations(tree, &mut nc, x)?;
nc.extend(parse_follow_annotations(source, tree, ns)?);
return apply_annotations_choice(tree, nc, anno);
}
[OpenGroup, ..] => {
*source = &source[1..];
let nc = parse_inner_name_class(source, tree, ns, is_elem, x)?;
if !matches!(source, [CloseGroup, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedParentheses));
}
*source = &source[1..];
nc
}
_ => {
let mut nc = vec![parse_simple_name_class(source, tree, ns, is_elem)?];
apply_annotations(tree, &mut nc, x)?;
nc
}
};
nc.extend(parse_follow_annotations(source, tree, ns)?);
if matches!(source, [Choice, ..]) {
while matches!(source, [Choice, ..]) {
*source = &source[1..];
nc.extend(parse_annotated_simple_name_class(
source, tree, ns, is_elem,
)?);
}
let mut ret = vec![tree.len()];
tree.push(Node {
r#type: NodeType::Choice,
atts: Attributes::default(),
children: nc,
});
apply_annotations(tree, &mut ret, anno)?;
Ok(ret)
} else {
apply_annotations_choice(tree, nc, anno)
}
}
fn parse_annotated_simple_name_class(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
is_elem: bool,
) -> Result<Vec<usize>, XMLError> {
Ok([
parse_lead_annotated_simple_name_class(source, tree, ns, is_elem)?,
parse_follow_annotations(source, tree, ns)?,
]
.concat())
}
fn parse_lead_annotated_simple_name_class(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
is_elem: bool,
) -> Result<Vec<usize>, XMLError> {
let annot = parse_annotations(source, tree, ns)?;
match source {
[TokenType::OpenGroup, ..] => {
*source = &source[1..];
let ret = parse_inner_name_class(source, tree, ns, is_elem, annot)?;
if !matches!(source, [TokenType::CloseGroup, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedParentheses));
}
*source = &source[1..];
Ok(ret)
}
_ => {
let mut nc = vec![parse_simple_name_class(source, tree, ns, is_elem)?];
apply_annotations(tree, &mut nc, annot)?;
Ok(nc)
}
}
}
fn parse_simple_name_class(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
is_elem: bool,
) -> Result<usize, XMLError> {
let node = match source {
[TokenType::Ident(ident), ..] => {
let mut node = Node {
r#type: NodeType::Name,
atts: Attributes::default(),
children: vec![tree.len()],
};
tree.push(Node {
r#type: NodeType::TextContent(ident.clone()),
atts: Attributes::default(),
children: vec![],
});
if is_elem
&& let Some(namespace) = ns
.get("")
.filter(|ns| ns.namespace_name.as_ref() != URI_INHERIT)
{
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: namespace.namespace_name.to_string(),
flag: 0,
})?;
}
node
}
[TokenType::Keyword(keyword), ..] => {
let mut node = Node {
r#type: NodeType::Name,
atts: Attributes::default(),
children: vec![tree.len()],
};
tree.push(Node {
r#type: NodeType::TextContent(keyword.to_string()),
atts: Attributes::default(),
children: vec![],
});
if is_elem
&& let Some(namespace) = ns
.get("")
.filter(|ns| ns.namespace_name.as_ref() != URI_INHERIT)
{
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: namespace.namespace_name.to_string(),
flag: 0,
})?;
}
node
}
[TokenType::CName(pre, loc), ..] => {
let mut node = Node {
r#type: NodeType::Name,
atts: Attributes::default(),
children: vec![tree.len()],
};
tree.push(Node {
r#type: NodeType::TextContent(loc.clone()),
atts: Attributes::default(),
children: vec![],
});
if let Some(namespace) = ns.get(pre) {
if namespace.namespace_name.as_ref() != URI_INHERIT {
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: namespace.namespace_name.to_string(),
flag: 0,
})?;
}
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
node
}
[TokenType::NsName(pre), ..] => {
if let Some(namespace) = ns.get(pre) {
let mut node = Node {
r#type: NodeType::NsName,
atts: Attributes::default(),
children: vec![],
};
if namespace.namespace_name.as_ref() != URI_INHERIT {
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: namespace.namespace_name.to_string(),
flag: 0,
})?;
}
node
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
}
[TokenType::ZeroOrMore, ..] => Node {
r#type: NodeType::AnyName,
atts: Attributes::default(),
children: vec![],
},
_ => return Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
};
*source = &source[1..];
let ret = tree.len();
tree.push(node);
Ok(ret)
}
fn parse_except_name_class(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
is_elem: bool,
) -> Result<usize, XMLError> {
use TokenType::*;
let (node, except) = match source {
[NsName(pre), Except, ..] => {
*source = &source[2..];
let ch = parse_lead_annotated_simple_name_class(source, tree, ns, is_elem)?;
let mut node = Node {
r#type: NodeType::NsName,
atts: Attributes::default(),
children: vec![tree.len()],
};
if let Some(namespace) = ns.get(pre) {
if namespace.namespace_name.as_ref() != URI_INHERIT {
node.push_attribute(Attribute {
namespace_name: None,
local_name: Some("ns".to_owned()),
qname: "ns".to_owned(),
value: namespace.namespace_name.to_string(),
flag: 0,
})?;
}
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
(node, ch)
}
[ZeroOrMore, Except, ..] => {
*source = &source[2..];
let ch = parse_lead_annotated_simple_name_class(source, tree, ns, is_elem)?;
(
Node {
r#type: NodeType::AnyName,
atts: Attributes::default(),
children: vec![tree.len()],
},
ch,
)
}
_ => return Err(XMLError::RncParseError(RncParseError::UnexpectedToken)),
};
tree.push(Node {
r#type: NodeType::Except,
atts: Attributes::default(),
children: except,
});
let ret = tree.len();
tree.push(node);
Ok(ret)
}
fn parse_documentations(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
let mut ret = vec![];
loop {
match source {
[Documentation(doc), ..] => {
let text = Node {
r#type: NodeType::TextContent(doc.clone()),
atts: Attributes::default(),
children: vec![],
};
let node = Node {
r#type: NodeType::Documentation,
atts: Attributes::default(),
children: vec![tree.len()],
};
tree.push(text);
ret.push(tree.len());
tree.push(node);
*source = &source[1..];
}
_ => break Ok(ret),
}
}
}
fn parse_annotation_attributes(
source: &mut &[TokenType],
ns: &NamespaceStack,
) -> Result<Attributes, XMLError> {
use TokenType::*;
let mut atts = Attributes::default();
loop {
match source {
[CName(pre, loc), Assign, rest @ ..] => {
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
if let Some(namespace) = ns.get(pre) {
if namespace.namespace_name.as_ref() == XML_NS_NAMESPACE
|| namespace.namespace_name.as_ref() == XML_RELAX_NG_NAMESPACE
{
return Err(XMLError::RncParseError(
RncParseError::UnacceptableNamespaceName,
));
} else if namespace.namespace_name.is_empty() {
return Err(XMLError::RncParseError(RncParseError::UnqualifiedName));
}
let mut att = Attribute {
namespace_name: Some(namespace.namespace_name.to_string()),
local_name: Some(loc.clone()),
qname: format!("{pre}:{loc}"),
value,
flag: 0,
};
att.set_specified();
if att.namespace_name.as_deref() == Some(XML_NS_NAMESPACE) {
att.set_nsdecl();
}
atts.push(att).map_err(|err| err.1)?;
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
}
_ => break Ok(atts),
}
}
}
fn parse_annotation_elements(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
let mut ret = vec![];
loop {
let r#type = match source {
[CName(pre, loc), OpenAnnot, ..] => {
if let Some(namespace) = ns.get(pre) {
NodeType::Foreign(Some(namespace.namespace_name.to_string()), loc.clone())
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
}
[Ident(ident), OpenAnnot, ..] => NodeType::Foreign(None, ident.clone()),
[Keyword(keyword), OpenAnnot, ..] => NodeType::Foreign(None, keyword.to_string()),
_ => break Ok(ret),
};
*source = &source[1..];
let (atts, children) = parse_annotation_attributes_content(source, tree, ns)?;
ret.push(tree.len());
tree.push(Node {
r#type,
atts,
children,
});
}
}
fn parse_annotation_attributes_content(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<(Attributes, Vec<usize>), XMLError> {
use TokenType::*;
if !matches!(source, [OpenAnnot, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnexpectedToken));
}
*source = &source[1..];
let atts = parse_nested_annotation_attributes(source, ns)?;
let cont = parse_annotation_content(source, tree, ns)?;
if !matches!(source, [CloseAnnot, ..]) {
return Err(XMLError::RncParseError(RncParseError::UnclosedAnnotation));
}
*source = &source[1..];
Ok((atts, cont))
}
fn parse_nested_annotation_attributes(
source: &mut &[TokenType],
ns: &NamespaceStack,
) -> Result<Attributes, XMLError> {
use TokenType::*;
let mut atts = Attributes::default();
loop {
let (namespace_name, local_name, qname, rest) = match source {
[CName(pre, loc), Assign, rest @ ..] => {
if let Some(namespace) = ns.get(pre) {
if namespace.namespace_name.as_ref() == XML_RELAX_NG_NAMESPACE {
return Err(XMLError::RncParseError(
RncParseError::UnacceptableNamespaceName,
));
}
(
Some(namespace.namespace_name.to_string()),
loc.clone(),
format!("{pre}:{loc}"),
rest,
)
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
}
[Ident(ident), Assign, rest @ ..] => (None, ident.clone(), ident.clone(), rest),
[Keyword(keyword), Assign, rest @ ..] => {
(None, keyword.to_string(), keyword.to_string(), rest)
}
_ => break Ok(atts),
};
*source = rest;
let mut value = String::new();
parse_literal(source, &mut value)?;
let mut att = Attribute {
namespace_name,
local_name: Some(local_name),
qname,
value,
flag: 0,
};
att.set_specified();
if att.namespace_name.as_deref() == Some(XML_NS_NAMESPACE) {
att.set_nsdecl();
}
atts.push(att).map_err(|e| e.1)?;
}
}
fn parse_annotation_content(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<Vec<usize>, XMLError> {
use TokenType::*;
let mut ret = vec![];
loop {
match source {
[LiteralSegment(_), ..] => {
let mut lit = String::new();
parse_literal(source, &mut lit)?;
ret.push(tree.len());
tree.push(Node {
r#type: NodeType::TextContent(lit),
atts: Attributes::default(),
children: vec![],
});
}
[CName(_, _) | Ident(_) | Keyword(_), OpenAnnot, ..] => {
let ch = parse_nested_annotation_element(source, tree, ns)?;
ret.push(ch);
}
_ => break Ok(ret),
}
}
}
fn parse_nested_annotation_element(
source: &mut &[TokenType],
tree: &mut Vec<Node>,
ns: &NamespaceStack,
) -> Result<usize, XMLError> {
use TokenType::*;
let r#type = match source {
[CName(pre, loc), ..] => {
if let Some(namespace) = ns.get(pre) {
NodeType::Foreign(Some(namespace.namespace_name.to_string()), loc.clone())
} else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
}
[Ident(ident), ..] => NodeType::Foreign(None, ident.clone()),
[Keyword(keyword), ..] => NodeType::Foreign(None, keyword.to_string()),
_ => {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
}
};
*source = &source[1..];
let (atts, children) = parse_annotation_attributes_content(source, tree, ns)?;
let ret = tree.len();
tree.push(Node {
r#type,
atts,
children,
});
Ok(ret)
}
fn apply_annotations(
tree: &mut [Node],
elements: &mut Vec<usize>,
annot: Option<(Attributes, Vec<usize>)>,
) -> Result<(), XMLError> {
assert_eq!(elements.len(), 1);
let cur = elements[0];
if let Some((mut atts, children)) = annot {
if tree[cur].atts.is_empty() {
tree[cur].atts = atts;
} else {
for att in atts.drain() {
tree[cur].push_attribute(att)?;
}
}
if matches!(
tree[cur].r#type,
NodeType::Name | NodeType::Param | NodeType::Value
) {
elements.extend(children);
} else {
tree[cur].children = children
.into_iter()
.chain(tree[cur].children.iter().copied())
.collect();
}
}
Ok(())
}
fn apply_annotations_group(
tree: &mut Vec<Node>,
children: Vec<usize>,
annot: Option<(Attributes, Vec<usize>)>,
) -> Result<Vec<usize>, XMLError> {
if annot.is_some() {
let ret = tree.len();
tree.push(Node {
r#type: NodeType::Group,
atts: Attributes::default(),
children,
});
apply_annotations(tree, &mut vec![ret], annot)?;
Ok(vec![ret])
} else {
Ok(children)
}
}
fn apply_annotations_choice(
tree: &mut Vec<Node>,
children: Vec<usize>,
annot: Option<(Attributes, Vec<usize>)>,
) -> Result<Vec<usize>, XMLError> {
if annot.is_some() {
let ret = tree.len();
tree.push(Node {
r#type: NodeType::Choice,
atts: Attributes::default(),
children,
});
apply_annotations(tree, &mut vec![ret], annot)?;
Ok(vec![ret])
} else {
Ok(children)
}
}
fn default_base_uri() -> Result<URIString, XMLError> {
let mut pwd = std::env::current_dir()?;
pwd.push("schema.rng");
if !pwd.is_absolute() {
pwd = pwd.canonicalize()?;
}
Ok(URIString::parse_file_path(pwd)?)
}
impl RelaxNGSchema {
pub fn parse_compact_uri<H: SAXHandler>(
uri: impl AsRef<URIStr>,
encoding: Option<&str>,
handler: Option<H>,
) -> Result<Self, XMLError> {
let base_uri = default_base_uri()?;
let mut handler = new_dyn_handler(handler);
let mut source =
handler.resolve_entity(DOCUMENT_ENTITY_NAME, None, &base_uri, uri.as_ref())?;
if source.system_id().is_none() {
source.set_system_id(base_uri.resolve(uri.as_ref()));
}
if let Some(encoding) = encoding {
source.switch_encoding(encoding)?;
}
Self::parse_compact(source, &mut handler)?;
Ok(Self {
grammar: handler.simplification().map_err(|err| err.error)?,
})
}
pub fn parse_compact_reader<'a, H: SAXHandler>(
reader: impl Read + 'a,
encoding: Option<&str>,
uri: Option<&URIStr>,
handler: Option<H>,
) -> Result<Self, XMLError> {
let mut source = InputSource::from_reader(reader, encoding)?;
let mut handler = new_dyn_handler(handler);
if let Some(uri) = uri {
if !uri.is_absolute() {
source.set_system_id(default_base_uri()?.resolve(uri));
} else {
source.set_system_id(uri);
}
}
Self::parse_compact(source, &mut handler)?;
Ok(Self {
grammar: handler.simplification().map_err(|err| err.error)?,
})
}
pub fn parse_compact_str<H: SAXHandler>(
schema: &str,
uri: Option<&URIStr>,
handler: Option<H>,
) -> Result<Self, XMLError> {
let mut source = InputSource::from_content(schema);
let mut handler = new_dyn_handler(handler);
if let Some(uri) = uri {
if !uri.is_absolute() {
source.set_system_id(default_base_uri()?.resolve(uri));
} else {
source.set_system_id(uri);
}
} else {
source.set_system_id(default_base_uri()?);
}
Self::parse_compact(source, &mut handler)?;
Ok(Self {
grammar: handler.simplification().map_err(|err| err.error)?,
})
}
pub(super) fn parse_compact<H: SAXHandler + ?Sized>(
mut source: InputSource,
handler: &mut RelaxNGParseHandler<H>,
) -> Result<(), XMLError> {
let base_uri = source.system_id().unwrap().to_owned();
let source = newline_normalization(&mut source)?;
let source = escape_interpretation(&source)?;
let source = tokenization(&source)?;
let (root, tree, ns_stack) = parse(&source, &base_uri)?;
let mut rmap = BTreeMap::new();
for ns in &ns_stack {
rmap.insert(ns.namespace_name.clone(), ns.prefix.clone());
}
let locator = Arc::new(Locator::default());
locator.set_system_id(base_uri.into());
locator.set_line(0);
locator.set_column(0);
handler.set_document_locator(locator);
handler.start_document();
walk_nodes(handler, root, &tree, &rmap)?;
handler.end_document();
Ok(())
}
}
fn new_dyn_handler<'a, H: SAXHandler + 'a>(
handler: Option<H>,
) -> Box<RelaxNGParseHandler<dyn SAXHandler + 'a>> {
if let Some(handler) = handler {
Box::new(RelaxNGParseHandler::with_handler(handler)) as _
} else {
Box::new(RelaxNGParseHandler::default()) as _
}
}
fn walk_nodes<H: SAXHandler + ?Sized>(
handler: &mut RelaxNGParseHandler<H>,
cur: usize,
tree: &[Node],
rmap: &BTreeMap<Arc<str>, Arc<str>>,
) -> Result<(), XMLError> {
match &tree[cur].r#type {
NodeType::TextContent(text) => handler.characters(text),
ty => {
let Some(qname) = ty.qname(rmap) else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
};
let Some(local) = ty.local_name() else {
return Err(XMLError::RncParseError(RncParseError::InvalidPrefix));
};
let nsname = ty.namespace_name();
let mut ns = vec![];
for att in &tree[cur].atts {
if att.is_nsdecl() {
if let Some(pre) = att.local_name.as_deref().filter(|&pre| pre != "xmlns") {
ns.push(Some(pre));
handler.start_prefix_mapping(Some(pre), &att.value);
} else {
ns.push(None);
handler.start_prefix_mapping(None, &att.value);
}
}
}
handler.start_element(nsname, Some(local), &qname, &tree[cur].atts);
for &ch in &tree[cur].children {
walk_nodes(handler, ch, tree, rmap)?;
}
handler.end_element(nsname, Some(local), &qname);
while let Some(pre) = ns.pop() {
handler.end_prefix_mapping(pre);
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use crate::tree::TreeBuildHandler;
use super::*;
#[test]
fn parse_compact_tests() {
let mut handler = TreeBuildHandler::default();
RelaxNGSchema::parse_compact_uri(
URIString::parse_file_path("resources/relaxng/schema-of-schema.rnc").unwrap(),
None,
Some(&mut handler),
)
.inspect_err(|err| println!("err: {err}\n{}", handler.document))
.unwrap();
}
}