Skip to main content

tokenize_into

Function tokenize_into 

Source
pub fn tokenize_into<S: TreeSink>(input: &str, sink: &mut S)
Expand description

Tokenize HTML directly into a TreeSink, bypassing all intermediate allocations.

Fuses SIMD structural indexing, quote-aware masking, and token extraction into a single streaming pass. Instead of constructing Token enums, calls sink methods directly — eliminating Vec<Attribute>, Cow wrappers, and entity-decode String allocations in the hot loop.

§Example

use fhp_tokenizer::{TreeSink, tokenize_into};
use fhp_core::tag::Tag;

struct Counter { tags: usize, texts: usize }
impl TreeSink for Counter {
    fn open_tag(&mut self, _: Tag, _: &str, _: &str, _: bool) { self.tags += 1; }
    fn close_tag(&mut self, _: Tag, _: &str) {}
    fn text(&mut self, _: &str) { self.texts += 1; }
    fn comment(&mut self, _: &str) {}
    fn doctype(&mut self, _: &str) {}
    fn cdata(&mut self, _: &str) {}
}

let mut c = Counter { tags: 0, texts: 0 };
tokenize_into("<div>hello</div>", &mut c);
assert_eq!(c.tags, 1);
assert_eq!(c.texts, 1);