1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
// Copyright 2015 The xml5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This crate provides a push based XML parser library that //! adheres to XML5 specification. In other words this library //! trades well-formedness for error recovery. //! //! The idea behind this, was to minimize number of errors from //! tools that generate XML (e.g. `S` won't just return `S` //! as text, but will parse it into `S` ). //! You can check out full specification [here](https://ygg01.github.io/xml5_draft/). //! //! What this library provides is a solid XML parser that can: //! //! * Parse somewhat erroneous XML input //! * Provide support for [Numeric character references](https://en.wikipedia.org/wiki/Numeric_character_reference). //! * Provide partial [XML namespace](http://www.w3.org/TR/xml-names11/) support. //! * Provide full set of SVG/MathML entities //! //! What isn't in scope for this library: //! //! * Document Type Definition parsing - this is pretty hard to do right and nowadays, its used //! #![crate_name="xml5ever"] #![crate_type="dylib"] #![deny(missing_docs)] #[macro_use] extern crate log; #[macro_use] extern crate mac; #[macro_use] extern crate string_cache; extern crate phf; extern crate time; /// Re-export the tendril crate so that users don’t need to depend on it. pub mod tendril { extern crate tendril; pub use self::tendril::*; } macro_rules! time { ($e:expr) => {{ let t0 = ::time::precise_time_ns(); let result = $e; let dt = ::time::precise_time_ns() - t0; (result, dt) }} } #[macro_use] mod util; /// XML5 tokenizer - converts input into tokens pub mod tokenizer; /// XML5 tree builder - converts tokens into a tree like structure pub mod tree_builder; /// A simple reference-counted that serves as a default tree structure pub mod rcdom; use tokenizer::{XmlTokenizerOpts, XmlTokenizer, TokenSink}; use tree_builder::{TreeSink, XmlTreeBuilder}; /// Parse and send results to a `TreeSink`. /// /// ## Example /// /// ```ignore /// let mut sink = MySink; /// parse_to(&mut sink, iter::once(my_str), Default::default()); /// ``` pub fn parse_to< Sink:TreeSink, It: IntoIterator<Item=tendril::StrTendril> >( sink: Sink, input: It, opts: XmlTokenizerOpts) -> Sink { let tb = XmlTreeBuilder::new(sink); let mut tok = XmlTokenizer::new(tb, opts); for s in input { tok.feed(s); } tok.end(); tok.unwrap().unwrap() } /// Parse into a type which implements `ParseResult`. /// /// ## Example /// /// ```ignore /// let dom: RcDom = parse(iter::once(my_str), Default::default()); /// ``` pub fn parse<Output, It>(input: It, opts: XmlTokenizerOpts) -> Output where Output: ParseResult, It: IntoIterator<Item=tendril::StrTendril>, { let sink = parse_to(Default::default(), input, opts); ParseResult::get_result(sink) } /// Results which can be extracted from a `TreeSink`. /// /// Implement this for your parse tree data type so that it /// can be returned by `parse()`. pub trait ParseResult { /// Type of consumer of tree modifications. /// It also extends `Default` for convenience. type Sink: TreeSink + Default; /// Returns parsed tree data type fn get_result(sink: Self::Sink) -> Self; } /// Tokenize and send results to a `XTokenSink`. /// /// ## Example /// /// ```ignore /// let mut sink = MySink; /// tokenize_to(&mut sink, iter::once(my_str), Default::default()); /// ``` pub fn tokenize_to< Sink: TokenSink, It: IntoIterator<Item=tendril::StrTendril> >( sink: Sink, input: It, opts: XmlTokenizerOpts) -> Sink { let mut tok = XmlTokenizer::new(sink, opts); for s in input { tok.feed(s); } tok.end(); tok.unwrap() }