1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your // option. This file may not be copied, modified, or distributed // except according to those terms. use std::borrow::Cow; use tendril::StrTendril; use {Attribute, QualName}; pub use self::TagKind::{EmptyTag, EndTag, ShortTag, StartTag}; pub use self::Token::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; pub use self::Token::{CommentToken, DoctypeToken, PIToken, TagToken}; use super::states; /// Tag kind denotes which kind of tag did we encounter. #[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] pub enum TagKind { /// Beginning of a tag (e.g. `<a>`). StartTag, /// End of a tag (e.g. `</a>`). EndTag, /// Empty tag (e.g. `<a/>`). EmptyTag, /// Short tag (e.g. `</>`). ShortTag, } /// XML 5 Tag Token #[derive(PartialEq, Eq, Debug, Clone)] pub struct Tag { /// Token kind denotes which type of token was encountered. /// E.g. if parser parsed `</a>` the token kind would be `EndTag`. pub kind: TagKind, /// Qualified name of the tag. pub name: QualName, /// List of attributes attached to this tag. /// Only valid in start and empty tag. pub attrs: Vec<Attribute>, } impl Tag { /// Sorts attributes in a tag. pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool { if (self.kind != other.kind) || (self.name != other.name) { return false; } let mut self_attrs = self.attrs.clone(); let mut other_attrs = other.attrs.clone(); self_attrs.sort(); other_attrs.sort(); self_attrs == other_attrs } } /// A `DOCTYPE` token. /// Doctype token in XML5 is rather limited for reasons, such as: /// security and simplicity. XML5 only supports declaring DTD with /// name, public identifier and system identifier #[derive(PartialEq, Eq, Clone, Debug)] pub struct Doctype { /// Name of DOCTYPE declared pub name: Option<StrTendril>, /// Public identifier of this DOCTYPE. pub public_id: Option<StrTendril>, /// System identifier of this DOCTYPE. pub system_id: Option<StrTendril>, } impl Doctype { /// Constructs an empty DOCTYPE, with all fields set to None. pub fn new() -> Doctype { Doctype { name: None, public_id: None, system_id: None, } } } /// A ProcessingInstruction token. #[derive(PartialEq, Eq, Clone, Debug)] pub struct Pi { /// What is the name of processing instruction. pub target: StrTendril, /// Text of processing instruction. pub data: StrTendril, } /// Describes tokens encountered during parsing of input. #[derive(PartialEq, Eq, Debug)] pub enum Token { /// Doctype token DoctypeToken(Doctype), /// Token tag founds. This token applies to all /// possible kinds of tags (like start, end, empty tag, etc.). TagToken(Tag), /// Processing Instruction token PIToken(Pi), /// Comment token. CommentToken(StrTendril), /// Token that represents a series of characters. CharacterTokens(StrTendril), /// End of File found. EOFToken, /// NullCharacter encountered. NullCharacterToken, /// Error happened ParseError(Cow<'static, str>), } /// Types which can receive tokens from the tokenizer. pub trait TokenSink { /// Process a token. fn process_token(&mut self, token: Token); /// Signal to the sink that parsing has ended. fn end(&mut self) {} /// The tokenizer will call this after emitting any start tag. /// This allows the tree builder to change the tokenizer's state. /// By default no state changes occur. fn query_state_change(&mut self) -> Option<states::XmlState> { None } }