1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
#![cfg_attr(feature = "simd", feature(portable_simd))]
#![doc = include_str!("../README.md")]
#![deny(missing_docs)]
mod bytes;
/// Errors that occur throughout the crate
pub mod errors;
/// Inline data structures
pub mod inline;
mod parser;
/// Query selector API
pub mod queryselector;
mod stream;
#[cfg(test)]
mod tests;
mod util;
mod vdom;
#[doc(hidden)]
#[cfg(feature = "__INTERNALS_DO_NOT_USE")]
pub mod simd;
#[cfg(not(feature = "__INTERNALS_DO_NOT_USE"))]
mod simd;
pub use bytes::Bytes;
pub use errors::ParseError;
pub use parser::*;
use queryselector::Selector;
pub use vdom::{VDom, VDomGuard};
/// Parses the given input string
///
/// This is the "entry point" and function that is called to parse HTML.
/// The input string must be kept alive, and must outlive `VDom`.
/// If you need an "owned" version that takes an input string and can be kept around forever,
/// consider using `parse_owned()`.
///
/// # Errors
/// Throughout the parser it is assumed that spans never overflow a `u32`.
/// To prevent this, this function will return an error if the input string length would overflow a `u32`.
/// If the input string length fits in a `u32`, then it is safe to assume that none of the substrings can overflow a `u32`.
///
/// # Example
/// ```
/// # use tl::*;
/// let dom = parse("<div>Hello, world!</div>", ParserOptions::default()).unwrap();
/// assert_eq!(dom.query_selector("div").unwrap().count(), 1);
/// ```
pub fn parse(input: &str, options: ParserOptions) -> Result<VDom<'_>, ParseError> {
let mut parser = Parser::new(input, options);
parser.parse()?;
Ok(VDom::from(parser))
}
/// Parses a query selector
///
/// # Example
/// ```
/// # use tl::queryselector::selector::Selector;
/// let selector = tl::parse_query_selector("div#test");
///
/// match selector {
/// Some(Selector::And(left, right)) => {
/// assert!(matches!(&*left, Selector::Tag(b"div")));
/// assert!(matches!(&*right, Selector::Id(b"test")));
/// },
/// _ => unreachable!()
/// }
/// ```
pub fn parse_query_selector(input: &str) -> Option<Selector<'_>> {
let selector = queryselector::Parser::new(input.as_bytes()).selector()?;
Some(selector)
}
/// Parses the given input string and returns an owned, RAII guarded DOM
///
/// # Errors
/// See [parse]
///
/// # Safety
/// This uses `unsafe` code to create a self-referential-like struct.
/// The given input string is first leaked and turned into raw pointer, and its lifetime will be promoted to 'static.
/// Once `VDomGuard` goes out of scope, the string will be freed.
/// It should not be possible to cause UB in its current form and might become a safe function in the future.
pub unsafe fn parse_owned(input: String, options: ParserOptions) -> Result<VDomGuard, ParseError> {
VDomGuard::parse(input, options)
}