use super::*;
use crate::base::{Bytes, Range};
use crate::html::{LocalName, Namespace};
use crate::parser::{
Lexeme, LexemeSink, NonTagContentLexeme, ParserDirective, ParserOutputSink, TagHintSink,
TagLexeme, TagTokenOutline,
};
use crate::rewritable_units::{
DocumentEnd, Serialize, ToToken, Token, TokenCaptureFlags, TokenCapturer, TokenCapturerEvent,
};
use crate::rewriter::RewritingError;
use encoding_rs::Encoding;
use std::rc::Rc;
use TagTokenOutline::*;
pub struct AuxStartTagInfo<'i> {
pub input: &'i Bytes<'i>,
pub attr_buffer: SharedAttributeBuffer,
pub self_closing: bool,
}
type AuxStartTagInfoRequest<C> =
Box<dyn FnOnce(&mut C, AuxStartTagInfo<'_>) -> Result<TokenCaptureFlags, RewritingError>>;
pub enum DispatcherError<C> {
InfoRequest(AuxStartTagInfoRequest<C>),
RewritingError(RewritingError),
}
pub type StartTagHandlingResult<C> = Result<TokenCaptureFlags, DispatcherError<C>>;
pub trait TransformController: Sized {
fn initial_capture_flags(&self) -> TokenCaptureFlags;
fn handle_start_tag(&mut self, name: LocalName, ns: Namespace) -> StartTagHandlingResult<Self>;
fn handle_end_tag(&mut self, name: LocalName) -> TokenCaptureFlags;
fn handle_token(&mut self, token: &mut Token) -> Result<(), RewritingError>;
fn handle_end(&mut self, document_end: &mut DocumentEnd) -> Result<(), RewritingError>;
fn should_emit_content(&self) -> bool;
}
pub trait OutputSink {
fn handle_chunk(&mut self, chunk: &[u8]);
}
impl<F: FnMut(&[u8])> OutputSink for F {
fn handle_chunk(&mut self, chunk: &[u8]) {
self(chunk);
}
}
pub struct Dispatcher<C, O>
where
C: TransformController,
O: OutputSink,
{
transform_controller: C,
output_sink: O,
remaining_content_start: usize,
token_capturer: TokenCapturer,
got_flags_from_hint: bool,
pending_element_aux_info_req: Option<AuxStartTagInfoRequest<C>>,
emission_enabled: bool,
encoding: &'static Encoding,
}
impl<C, O> Dispatcher<C, O>
where
C: TransformController,
O: OutputSink,
{
pub fn new(transform_controller: C, output_sink: O, encoding: &'static Encoding) -> Self {
let initial_capture_flags = transform_controller.initial_capture_flags();
Dispatcher {
transform_controller,
output_sink,
remaining_content_start: 0,
token_capturer: TokenCapturer::new(initial_capture_flags, encoding),
got_flags_from_hint: false,
pending_element_aux_info_req: None,
emission_enabled: true,
encoding,
}
}
pub fn flush_remaining_input(&mut self, input: &[u8], consumed_byte_count: usize) {
let output = &input[self.remaining_content_start..consumed_byte_count];
if self.emission_enabled && !output.is_empty() {
self.output_sink.handle_chunk(&output);
}
self.remaining_content_start = 0;
}
pub fn finish(&mut self, input: &[u8]) -> Result<(), RewritingError> {
self.flush_remaining_input(input, input.len());
let mut document_end = DocumentEnd::new(&mut self.output_sink, self.encoding);
self.transform_controller.handle_end(&mut document_end)?;
self.output_sink.handle_chunk(&[]);
Ok(())
}
fn try_produce_token_from_lexeme<'i, T>(
&mut self,
lexeme: &Lexeme<'i, T>,
) -> Result<(), RewritingError>
where
Lexeme<'i, T>: ToToken,
{
let transform_controller = &mut self.transform_controller;
let output_sink = &mut self.output_sink;
let emission_enabled = self.emission_enabled;
let lexeme_range = lexeme.raw_range();
let remaining_content_start = self.remaining_content_start;
let mut lexeme_consumed = false;
self.token_capturer.feed(lexeme, |event| {
match event {
TokenCapturerEvent::LexemeConsumed => {
let chunk = lexeme.input().slice(Range {
start: remaining_content_start,
end: lexeme_range.start,
});
lexeme_consumed = true;
if emission_enabled && chunk.len() > 0 {
output_sink.handle_chunk(&chunk);
}
}
TokenCapturerEvent::TokenProduced(mut token) => {
trace!(@output token);
transform_controller.handle_token(&mut token)?;
if emission_enabled {
token.to_bytes(&mut |c| output_sink.handle_chunk(c));
}
}
}
Ok(())
})?;
if lexeme_consumed {
self.remaining_content_start = lexeme_range.end;
}
Ok(())
}
#[inline]
fn get_next_parser_directive(&self) -> ParserDirective {
if self.token_capturer.has_captures() {
ParserDirective::Lex
} else {
ParserDirective::WherePossibleScanForTagsOnly
}
}
fn adjust_capture_flags_for_tag_lexeme(
&mut self,
lexeme: &TagLexeme,
) -> Result<(), RewritingError> {
let input = lexeme.input();
macro_rules! get_flags_from_aux_info_res {
($handler:expr, $attributes:expr, $self_closing:expr) => {
$handler(
&mut self.transform_controller,
AuxStartTagInfo {
input,
attr_buffer: Rc::clone($attributes),
self_closing: $self_closing,
},
)
};
}
let capture_flags = match self.pending_element_aux_info_req.take() {
Some(aux_info_req) => match *lexeme.token_outline() {
StartTag {
ref attributes,
self_closing,
..
} => get_flags_from_aux_info_res!(aux_info_req, attributes, self_closing),
_ => unreachable!("Tag should be a start tag at this point"),
},
None => match *lexeme.token_outline() {
StartTag {
name,
name_hash,
ns,
ref attributes,
self_closing,
} => {
let name = LocalName::new(input, name, name_hash);
match self.transform_controller.handle_start_tag(name, ns) {
Ok(flags) => Ok(flags),
Err(DispatcherError::InfoRequest(aux_info_req)) => {
get_flags_from_aux_info_res!(aux_info_req, attributes, self_closing)
}
Err(DispatcherError::RewritingError(e)) => Err(e),
}
}
EndTag { name, name_hash } => {
let name = LocalName::new(input, name, name_hash);
Ok(self.transform_controller.handle_end_tag(name))
}
},
};
match capture_flags {
Ok(flags) => {
self.token_capturer.set_capture_flags(flags);
Ok(())
}
Err(e) => Err(e),
}
}
#[inline]
fn apply_capture_flags_from_hint_and_get_next_parser_directive(
&mut self,
flags: TokenCaptureFlags,
) -> ParserDirective {
self.token_capturer.set_capture_flags(flags);
self.got_flags_from_hint = true;
self.get_next_parser_directive()
}
#[inline]
fn flush_pending_captured_text(&mut self) -> Result<(), RewritingError> {
let transform_controller = &mut self.transform_controller;
let output_sink = &mut self.output_sink;
let emission_enabled = self.emission_enabled;
self.token_capturer.flush_pending_text(&mut |event| {
if let TokenCapturerEvent::TokenProduced(mut token) = event {
trace!(@output token);
transform_controller.handle_token(&mut token)?;
if emission_enabled {
token.to_bytes(&mut |c| output_sink.handle_chunk(c));
}
}
Ok(())
})?;
Ok(())
}
#[inline]
fn should_stop_removing_element_content(&self) -> bool {
!self.emission_enabled && self.transform_controller.should_emit_content()
}
}
impl<C, O> LexemeSink for Dispatcher<C, O>
where
C: TransformController,
O: OutputSink,
{
fn handle_tag(&mut self, lexeme: &TagLexeme) -> Result<ParserDirective, RewritingError> {
self.flush_pending_captured_text()?;
if self.got_flags_from_hint {
self.got_flags_from_hint = false;
} else {
self.adjust_capture_flags_for_tag_lexeme(lexeme)?;
}
if let TagTokenOutline::EndTag { .. } = lexeme.token_outline() {
if self.should_stop_removing_element_content() {
self.emission_enabled = true;
self.remaining_content_start = lexeme.raw_range().start;
}
}
self.try_produce_token_from_lexeme(lexeme)?;
self.emission_enabled = self.transform_controller.should_emit_content();
Ok(self.get_next_parser_directive())
}
#[inline]
fn handle_non_tag_content(
&mut self,
lexeme: &NonTagContentLexeme,
) -> Result<(), RewritingError> {
self.try_produce_token_from_lexeme(lexeme)
}
}
impl<C, O> TagHintSink for Dispatcher<C, O>
where
C: TransformController,
O: OutputSink,
{
fn handle_start_tag_hint(
&mut self,
name: LocalName,
ns: Namespace,
) -> Result<ParserDirective, RewritingError> {
match self.transform_controller.handle_start_tag(name, ns) {
Ok(flags) => {
Ok(self.apply_capture_flags_from_hint_and_get_next_parser_directive(flags))
}
Err(DispatcherError::InfoRequest(aux_info_req)) => {
self.got_flags_from_hint = false;
self.pending_element_aux_info_req = Some(aux_info_req);
Ok(ParserDirective::Lex)
}
Err(DispatcherError::RewritingError(e)) => Err(e),
}
}
fn handle_end_tag_hint(&mut self, name: LocalName) -> Result<ParserDirective, RewritingError> {
self.flush_pending_captured_text()?;
let mut flags = self.transform_controller.handle_end_tag(name);
if self.should_stop_removing_element_content() {
flags |= TokenCaptureFlags::NEXT_END_TAG;
}
Ok(self.apply_capture_flags_from_hint_and_get_next_parser_directive(flags))
}
}
impl<C, O> ParserOutputSink for Dispatcher<C, O>
where
C: TransformController,
O: OutputSink,
{
}