#![allow(clippy::wrong_self_convention)]
use std::borrow::Cow;
use markup5ever::{
buffer_queue::BufferQueue,
tendril::{
self,
stream::{TendrilSink, Utf8LossyDecoder},
StrTendril,
},
};
use crate::{
interface::{create_element, Attribute, QualName, TreeSink},
tokenizer::{Tokenizer, TokenizerResult},
tree_builder::TreeBuilder,
};
pub fn parse_document<Sink>(sink: Sink) -> Parser<Sink>
where
Sink: TreeSink,
{
let tb = TreeBuilder::new(sink);
let tokenizer = Tokenizer::new(tb);
Parser {
tokenizer,
input_buffer: BufferQueue::new(),
}
}
pub fn parse_fragment<Sink>(
mut sink: Sink,
context_name: QualName,
context_attrs: Vec<Attribute>,
) -> Parser<Sink>
where
Sink: TreeSink,
{
let context_elem = create_element(&mut sink, context_name, context_attrs);
parse_fragment_for_element(sink, context_elem)
}
pub fn parse_fragment_for_element<Sink>(sink: Sink, context_element: Sink::Handle) -> Parser<Sink>
where
Sink: TreeSink,
{
let tb = TreeBuilder::new_for_fragment(sink, context_element);
let tokenizer = Tokenizer::new(tb);
Parser {
tokenizer,
input_buffer: BufferQueue::new(),
}
}
pub struct Parser<Sink>
where
Sink: TreeSink,
{
pub tokenizer: Tokenizer<TreeBuilder<Sink::Handle, Sink>>,
pub input_buffer: BufferQueue,
}
impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> {
fn process(&mut self, t: StrTendril) {
self.input_buffer.push_back(t);
while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
}
fn error(&mut self, desc: Cow<'static, str>) {
self.tokenizer.sink.sink.parse_error(desc)
}
type Output = Sink::Output;
fn finish(mut self) -> Self::Output {
while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
assert!(self.input_buffer.is_empty());
self.tokenizer.end();
self.tokenizer.sink.sink.finish()
}
}
impl<Sink: TreeSink> Parser<Sink> {
pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
Utf8LossyDecoder::new(self)
}
}