use crate::buffer_queue::BufferQueue;
use crate::tokenizer::{Tokenizer, TokenizerOpts};
use crate::tree_builder::{create_element, TreeBuilder, TreeBuilderOpts, TreeSink};
use crate::{Attribute, QualName};
use markup5ever::TokenizerResult;
use std::borrow::Cow;
use crate::tendril;
use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
use crate::tendril::StrTendril;
#[derive(Clone, Default)]
pub struct ParseOpts {
pub tokenizer: TokenizerOpts,
pub tree_builder: TreeBuilderOpts,
}
pub fn parse_document<Sink>(sink: Sink, opts: ParseOpts) -> Parser<Sink>
where
Sink: TreeSink,
{
let tb = TreeBuilder::new(sink, opts.tree_builder);
let tok = Tokenizer::new(tb, opts.tokenizer);
Parser {
tokenizer: tok,
input_buffer: BufferQueue::default(),
}
}
pub fn parse_fragment<Sink>(
sink: Sink,
opts: ParseOpts,
context_name: QualName,
context_attrs: Vec<Attribute>,
context_element_allows_scripting: bool,
) -> Parser<Sink>
where
Sink: TreeSink,
{
let context_elem = create_element(&sink, context_name, context_attrs);
parse_fragment_for_element(
sink,
opts,
context_elem,
context_element_allows_scripting,
None,
)
}
pub fn parse_fragment_for_element<Sink>(
sink: Sink,
opts: ParseOpts,
context_element: Sink::Handle,
context_element_allows_scripting: bool,
form_element: Option<Sink::Handle>,
) -> Parser<Sink>
where
Sink: TreeSink,
{
let tree_builder =
TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder);
let tokenizer_options = TokenizerOpts {
initial_state: Some(
tree_builder.tokenizer_state_for_context_elem(context_element_allows_scripting),
),
..opts.tokenizer
};
let tokenizer = Tokenizer::new(tree_builder, tokenizer_options);
Parser {
tokenizer,
input_buffer: BufferQueue::default(),
}
}
pub struct Parser<Sink>
where
Sink: TreeSink,
{
pub tokenizer: Tokenizer<TreeBuilder<Sink::Handle, Sink>>,
pub input_buffer: BufferQueue,
}
impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> {
fn process(&mut self, t: StrTendril) {
self.input_buffer.push_back(t);
self.loop_until_done();
}
fn error(&mut self, desc: Cow<'static, str>) {
self.tokenizer.sink.sink.parse_error(desc)
}
type Output = Sink::Output;
fn finish(self) -> Self::Output {
self.loop_until_done();
assert!(
self.input_buffer.is_empty(),
"parser finished with remaining input"
);
self.tokenizer.end();
self.tokenizer.sink.sink.finish()
}
}
impl<Sink: TreeSink> Parser<Sink> {
#[allow(clippy::wrong_self_convention)]
pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
Utf8LossyDecoder::new(self)
}
fn loop_until_done(&self) {
loop {
if matches!(
self.tokenizer.feed(&self.input_buffer),
TokenizerResult::Done
) {
break;
}
}
}
}