1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
//! tl is an efficient and easy to use HTML parser written in Rust.
//!
//! It does minimal to no copying during parsing by borrowing parts of the input string.
//! Additionally, it keeps track of parsed elements and stores elements with an id attribute
//! in an internal HashMap, which makes element lookups by ID/class name very fast.
//!
//! ## Examples
//! Finding an element by its id attribute and printing the inner text:
//! ```rust
//! let input = r#"<p id="text">Hello</p>"#;
//! let dom = tl::parse(input);
//!
//! let element = dom.get_element_by_id("text").expect("Failed to find element");
//!
//! println!("Inner text: {}", element.inner_text());
//! ```
//!
//! ## Owned DOM
//! Calling `tl::parse()` returns a DOM struct that borrows from the input string, which means that the string must be kept alive.
//! The input string must outlive this DOM. If this is not acceptable or you need to keep the DOM around for longer,
//! consider using `tl::parse_owned()`.
//! `VDomGuard` takes ownership over the string, which means you don't have to keep the string around.
//! ```rust
//! // Notice how it takes ownership over the string:
//! let dom_guard = unsafe { tl::parse_owned(String::from(r#"<p id="text">Hello</p>"#)) };
//!
//! // Obtain reference to underlying VDom
//! let dom = dom_guard.get_ref();
//!
//! // Now, use `dom` as you would if it was a regular `VDom`
//! let element = dom.get_element_by_id("text").expect("Failed to find element");
//!
//! println!("Inner text: {}", element.inner_text());
//! ```
//!
//! ## Bytes
//! Some methods return a `Bytes` struct, which is an internal struct that is used to borrow
//! a part of the input string. This is mainly used over a raw `&[u8]` for its `Debug` implementation.

#![deny(missing_docs)]

mod bytes;
mod parser;
mod stream;
#[cfg(test)]
mod tests;
mod util;
mod vdom;

pub use bytes::{AsBytes, Bytes};
use parser::Parser;
pub use parser::{tag::Attributes, tag::HTMLTag, tag::Node, HTMLVersion, Tree};
pub use vdom::{VDom, VDomGuard};

/// Parses the given input string
///
/// This is the "entry point" and function you will call to parse HTML.
/// The input string must be kept alive, and must outlive `VDom`.
/// If you need an "owned" version that takes an input string and can be kept around forever,
/// consider using `parse_owned()`.
pub fn parse(input: &str) -> VDom<'_> {
    VDom::from(Parser::new(input).parse())
}

/// Parses the given input string and returns an owned, RAII guarded DOM
///
/// ## Safety
/// This uses a lot of `unsafe` behind the scenes to create a self-referential-like struct.
/// The given input string is first leaked and turned into raw pointer, and its lifetime will be promoted to 'static.
/// Once `VDomGuard` goes out of scope, the string will be freed.
/// It should not be possible to cause UB in its current form and might become a safe function in the future.
pub unsafe fn parse_owned<'a>(input: String) -> VDomGuard<'a> {
    VDomGuard::parse(input)
}