xml 1.2.1

An XML library in pure Rust
Documentation
use crate::common::{is_whitespace_char, XmlVersion};
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use crate::util::Encoding;

use super::{
    DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State,
    DEFAULT_VERSION,
};

impl PullParser {
    #[inline(never)]
    fn emit_start_document(&mut self) -> Option<Result> {
        debug_assert!(self.encountered == Encountered::None);
        self.encountered = Encountered::Declaration;

        let version = self.data.version;
        let encoding = self.data.take_encoding();
        let standalone = self.data.standalone;

        if let Some(new_encoding) = encoding.as_deref() {
            let new_encoding = match new_encoding.parse() {
                Ok(e) => e,
                Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1,
                Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))),
            };
            let current_encoding = self.lexer.encoding();
            if current_encoding != new_encoding {
                let set = match (current_encoding, new_encoding) {
                    (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new,
                    (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding,
                    _ if self.config.ignore_invalid_encoding_declarations => current_encoding,
                    _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))),
                };
                self.lexer.set_encoding(set);
            }
        }

        let current_encoding = self.lexer.encoding();
        self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
            version: version.unwrap_or(DEFAULT_VERSION),
            encoding: encoding.unwrap_or_else(move || current_encoding.to_string()),
            standalone
        }))
    }

    // TODO: remove redundancy via macros or extra methods
    pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
        match s {
            DeclarationSubstate::BeforeVersion => match t {
                Token::Character('v') => {
                    self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion))
                },
                Token::Character(c) if is_whitespace_char(c) => None, // continue
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::Attribute, |this, token, name| {
                match &*name.local_name {
                    "ersion" if name.namespace.is_none() =>
                        this.into_state_continue(State::InsideDeclaration(
                            if token == Token::EqualsSign {
                                DeclarationSubstate::InsideVersionValue
                            } else {
                                DeclarationSubstate::AfterVersion
                            }
                        )),
                    _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))),
                }
            }),

            DeclarationSubstate::AfterVersion => match t {
                Token::EqualsSign => {
                    self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue))
                },
                Token::Character(c) if is_whitespace_char(c) => None,
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
                this.data.version = match &*value {
                    "1.0" => Some(XmlVersion::Version10),
                    "1.1" => Some(XmlVersion::Version11),
                    // https://www.w3.org/TR/REC-xml/#sec-prolog-dtd
                    // VersionNum	   ::=   	'1.' [0-9]+
                    // "[...] This means that an XML 1.0 processor will accept 1.x
                    //  documents provided they do not use any non-1.0 features."
                    v if v.starts_with("1.")
                        && v.len() >= 3
                        && v.chars().skip(2).all(|c| c.is_ascii_digit()) =>
                    {
                        // XML 1.1 forbids any other versions
                        Some(XmlVersion::Version10)
                    }
                    _ => None,
                };
                if this.data.version.is_some() {
                    this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
                } else {
                    Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into())))
                }
            }),

            DeclarationSubstate::AfterVersionValue => match t {
                Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)),
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::BeforeEncoding => match t {
                Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
                Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                Token::Character(c) if is_whitespace_char(c) => None,  // skip whitespace
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::Attribute, |this, token, name| {
                match &*name.local_name {
                    "ncoding" if name.namespace.is_none() =>
                        this.into_state_continue(State::InsideDeclaration(
                            if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
                        )),
                    _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into())))
                }
            }),

            DeclarationSubstate::AfterEncoding => match t {
                Token::EqualsSign => {
                    self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue))
                },
                Token::Character(c) if is_whitespace_char(c) => None,
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
                this.data.encoding = Some(value);
                this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue))
            }),

            DeclarationSubstate::AfterEncodingValue => match t {
                Token::Character(c) if is_whitespace_char(c) => {
                    self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl))
                },
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::BeforeStandaloneDecl => match t {
                Token::Character('s') => {
                    self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl))
                },
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::Attribute, |this, token, name| {
                match &*name.local_name {
                    "tandalone" if name.namespace.is_none() =>
                        this.into_state_continue(State::InsideDeclaration(
                            if token == Token::EqualsSign {
                                DeclarationSubstate::InsideStandaloneDeclValue
                            } else {
                                DeclarationSubstate::AfterStandaloneDecl
                            }
                        )),
                    _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))),
                }
            }),

            DeclarationSubstate::AfterStandaloneDecl => match t {
                Token::EqualsSign => {
                    self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue))
                },
                Token::Character(c) if is_whitespace_char(c) => None,
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
                let standalone = match &*value {
                    "yes" => Some(true),
                    "no"  => Some(false),
                    _     => None
                };
                if standalone.is_some() {
                    this.data.standalone = standalone;
                    this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
                } else {
                    Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into())))
                }
            }),

            DeclarationSubstate::AfterStandaloneDeclValue => match t {
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },
        }
    }
}