lol_html/rewriter/
settings.rs

1use crate::rewritable_units::{Comment, Doctype, DocumentEnd, Element, EndTag, TextChunk};
2use crate::selectors_vm::Selector;
3// N.B. `use crate::` will break this because the constructor is not public, only the struct itself
4use super::AsciiCompatibleEncoding;
5use std::borrow::Cow;
6use std::error::Error;
7
8/// Trait used to parameterize the type of handlers used in the rewriter.
9///
10/// This trait is meant to be an implementation detail for the [`Send`-compatible type aliases](crate::send).
11/// We don't recommend writing code generic over [`HandlerTypes`], because it makes working with closures much more difficult.
12///
13/// Many types like [`Element`] and [`ElementHandler`] have a hidden generic type that defaults to `LocalHandlerTypes`.
14/// If you need to use `Send`-able handlers, remove the default type by replacing it with `_`, e.g. `Element<'_, '_, _>`.
15#[diagnostic::on_unimplemented(
16    note = "If `{Self}` is a generic type, add `{Self}: HandlerTypes` trait bound, otherwise replace `{Self}` with `LocalHandlerTypes`",
17    note = "The concrete type of `{Self}` can only be either `LocalHandlerTypes` to allow non-`Send` closures in content handlers, or `SendHandlerTypes` to require `Send` closures"
18)]
19pub trait HandlerTypes: Sized {
20    /// Handler type for [`Doctype`].
21    type DoctypeHandler<'handler>: FnMut(&mut Doctype<'_>) -> HandlerResult + 'handler;
22    /// Handler type for [`Comment`].
23    ///
24    /// The entire content of the comment will be buffered.
25    type CommentHandler<'handler>: FnMut(&mut Comment<'_>) -> HandlerResult + 'handler;
26    /// Handler type for [`TextChunk`] fragments. Beware: this is tricky to use.
27    ///
28    /// The text chunks are **not** text DOM nodes. They are fragments of text nodes, split at arbitrary points.
29    ///
30    /// See [`TextChunk`] documentation for more info. See also [`TextChunk::last_in_text_node()`].
31    type TextHandler<'handler>: FnMut(&mut TextChunk<'_>) -> HandlerResult + 'handler;
32    /// Handler type for [`Element`].
33    type ElementHandler<'handler>: FnMut(&mut Element<'_, '_, Self>) -> HandlerResult + 'handler;
34    /// Handler type for [`EndTag`].
35    type EndTagHandler<'handler>: FnOnce(&mut EndTag<'_>) -> HandlerResult + 'handler;
36    /// Handler type for [`DocumentEnd`].
37    type EndHandler<'handler>: FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + 'handler;
38
39    // Inside the HTML rewriter we need to create handlers, and they need to be the most constrained
40    // possible version of a handler (i.e. if we have `Send` and non-`Send` handlers we need to
41    // create a `Send` handler to make it compatible with both classes of handlers), so that's
42    // what we offer below.
43    //
44    // Note that in the HTML rewriter all we have is an abstract `H` that implements `HandlerTypes`.
45    // Therefore, there is no direct way of create a handler that is compatible with *all* possible
46    // implementations of `HandlerTypes`, so each implementation of `HandlerTypes` needs to provide
47    // a way to create a handler compatible with itself.
48
49    #[doc(hidden)]
50    fn new_end_tag_handler<'handler>(
51        handler: impl IntoHandler<EndTagHandlerSend<'handler>>,
52    ) -> Self::EndTagHandler<'handler>;
53
54    #[doc(hidden)]
55    fn new_element_handler<'handler>(
56        handler: impl IntoHandler<ElementHandlerSend<'handler, Self>>,
57    ) -> Self::ElementHandler<'handler>;
58
59    /// Creates a handler by running multiple handlers in sequence.
60    #[doc(hidden)]
61    fn combine_handlers(handlers: Vec<Self::EndTagHandler<'_>>) -> Self::EndTagHandler<'_>;
62}
63
64/// Handler type for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
65pub struct LocalHandlerTypes {}
66
67impl HandlerTypes for LocalHandlerTypes {
68    type DoctypeHandler<'h> = DoctypeHandler<'h>;
69    type CommentHandler<'h> = CommentHandler<'h>;
70    type TextHandler<'h> = TextHandler<'h>;
71    type ElementHandler<'h> = ElementHandler<'h>;
72    type EndTagHandler<'h> = EndTagHandler<'h>;
73    type EndHandler<'h> = EndHandler<'h>;
74
75    fn new_end_tag_handler<'h>(
76        handler: impl IntoHandler<EndTagHandlerSend<'h>>,
77    ) -> Self::EndTagHandler<'h> {
78        handler.into_handler()
79    }
80
81    fn new_element_handler<'h>(
82        handler: impl IntoHandler<ElementHandlerSend<'h, Self>>,
83    ) -> Self::ElementHandler<'h> {
84        handler.into_handler()
85    }
86
87    fn combine_handlers(handlers: Vec<Self::EndTagHandler<'_>>) -> Self::EndTagHandler<'_> {
88        Box::new(move |end_tag: &mut EndTag<'_>| {
89            for handler in handlers {
90                handler(end_tag)?;
91            }
92
93            Ok(())
94        })
95    }
96}
97
98/// Marker type for sendable handlers. Use aliases from the [`send`](crate::send) module.
99#[doc(hidden)]
100pub struct SendHandlerTypes {}
101
102impl HandlerTypes for SendHandlerTypes {
103    type DoctypeHandler<'h> = DoctypeHandlerSend<'h>;
104    type CommentHandler<'h> = CommentHandlerSend<'h>;
105    type TextHandler<'h> = TextHandlerSend<'h>;
106    type ElementHandler<'h> = ElementHandlerSend<'h, Self>;
107    type EndTagHandler<'h> = EndTagHandlerSend<'h>;
108    type EndHandler<'h> = EndHandlerSend<'h>;
109
110    fn new_end_tag_handler<'h>(
111        handler: impl IntoHandler<Self::EndTagHandler<'h>>,
112    ) -> Self::EndTagHandler<'h> {
113        handler.into_handler()
114    }
115
116    fn new_element_handler<'h>(
117        handler: impl IntoHandler<Self::ElementHandler<'h>>,
118    ) -> Self::ElementHandler<'h> {
119        handler.into_handler()
120    }
121
122    fn combine_handlers(handlers: Vec<Self::EndTagHandler<'_>>) -> Self::EndTagHandler<'_> {
123        Box::new(move |end_tag: &mut EndTag<'_>| {
124            for handler in handlers {
125                handler(end_tag)?;
126            }
127
128            Ok(())
129        })
130    }
131}
132
133/// The result of a handler.
134pub type HandlerResult = Result<(), Box<dyn Error + Send + Sync + 'static>>;
135
136/// Boxed closure for handling the [document type declaration].
137///
138/// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
139pub type DoctypeHandler<'h> = Box<dyn FnMut(&mut Doctype<'_>) -> HandlerResult + 'h>;
140/// Boxed closure for handling HTML comments.
141pub type CommentHandler<'h> = Box<dyn FnMut(&mut Comment<'_>) -> HandlerResult + 'h>;
142/// Boxed closure for handling text chunks present the HTML.
143pub type TextHandler<'h> = Box<dyn FnMut(&mut TextChunk<'_>) -> HandlerResult + 'h>;
144/// Boxed closure for handling elements matched by a selector.
145pub type ElementHandler<'h, H = LocalHandlerTypes> =
146    Box<dyn FnMut(&mut Element<'_, '_, H>) -> HandlerResult + 'h>;
147/// Boxed closure for handling end tags.
148pub type EndTagHandler<'h> = Box<dyn FnOnce(&mut EndTag<'_>) -> HandlerResult + 'h>;
149/// Boxed closure for handling the document end. This is called after the last chunk is processed.
150pub type EndHandler<'h> = Box<dyn FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + 'h>;
151
152/// [Sendable](crate::send) boxed closure for handling the [document type declaration].
153///
154/// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
155///
156/// See also non-sendable [`DoctypeHandler`](crate::DoctypeHandler).
157pub type DoctypeHandlerSend<'h> = Box<dyn FnMut(&mut Doctype<'_>) -> HandlerResult + Send + 'h>;
158/// [Sendable](crate::send) boxed closure for handling HTML comments.
159///
160/// See also non-sendable [`CommentHandler`](crate::CommentHandler).
161pub type CommentHandlerSend<'h> = Box<dyn FnMut(&mut Comment<'_>) -> HandlerResult + Send + 'h>;
162/// [Sendable](crate::send) boxed closure for handling text chunks](TextChunk) present the HTML.
163///
164/// See also non-sendable [`TextHandler`](crate::TextHandler).
165pub type TextHandlerSend<'h> = Box<dyn FnMut(&mut TextChunk<'_>) -> HandlerResult + Send + 'h>;
166/// [Sendable](crate::send) boxed closure for handling elements matched by a selector.
167pub type ElementHandlerSend<'h, H = SendHandlerTypes> =
168    Box<dyn FnMut(&mut Element<'_, '_, H>) -> HandlerResult + Send + 'h>;
169/// [Sendable](crate::send) boxed closure for handling end tags.
170///
171/// See also non-sendable [`EndTagHandler`](crate::EndTagHandler).
172pub type EndTagHandlerSend<'h> = Box<dyn FnOnce(&mut EndTag<'_>) -> HandlerResult + Send + 'h>;
173/// [Sendable](crate::send) boxed closure for handling the document end. This is called after the last chunk is processed.
174///
175/// See also non-sendable [`EndHandler`](crate::EndHandler).
176pub type EndHandlerSend<'h> = Box<dyn FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + Send + 'h>;
177
178/// Trait that allows closures to be used as handlers
179#[diagnostic::on_unimplemented(
180    message = "Handler could not be made from `{Self}`\nThe internal `IntoHandler` trait is implemented for closures like `FnMut(&mut _) -> HandlerResult` and `FnOnce(&mut _) -> HandlerResult`, with `+ Send` if needed",
181    note = "Ensure that the closure's arguments are correct (add explicit parameter types if needed) and that it implements `Send` if using `Send`-able handlers"
182)]
183#[doc(hidden)]
184pub trait IntoHandler<T: Sized> {
185    fn into_handler(self) -> T;
186}
187
188impl<'h, F: FnMut(&mut Doctype<'_>) -> HandlerResult + 'h> IntoHandler<DoctypeHandler<'h>> for F {
189    fn into_handler(self) -> DoctypeHandler<'h> {
190        Box::new(self)
191    }
192}
193
194impl<'h, F: FnMut(&mut Comment<'_>) -> HandlerResult + 'h> IntoHandler<CommentHandler<'h>> for F {
195    fn into_handler(self) -> CommentHandler<'h> {
196        Box::new(self)
197    }
198}
199
200impl<'h, F: FnMut(&mut TextChunk<'_>) -> HandlerResult + 'h> IntoHandler<TextHandler<'h>> for F {
201    fn into_handler(self) -> TextHandler<'h> {
202        Box::new(self)
203    }
204}
205
206impl<'h, F: FnMut(&mut Element<'_, '_, LocalHandlerTypes>) -> HandlerResult + 'h>
207    IntoHandler<ElementHandler<'h>> for F
208{
209    fn into_handler(self) -> ElementHandler<'h> {
210        Box::new(self)
211    }
212}
213
214impl<'h, F: FnOnce(&mut EndTag<'_>) -> HandlerResult + 'h> IntoHandler<EndTagHandler<'h>> for F {
215    fn into_handler(self) -> EndTagHandler<'h> {
216        Box::new(self)
217    }
218}
219
220impl<'h, F: FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + 'h> IntoHandler<EndHandler<'h>> for F {
221    fn into_handler(self) -> EndHandler<'h> {
222        Box::new(self)
223    }
224}
225
226impl<'h, F: FnMut(&mut Doctype<'_>) -> HandlerResult + Send + 'h>
227    IntoHandler<DoctypeHandlerSend<'h>> for F
228{
229    fn into_handler(self) -> DoctypeHandlerSend<'h> {
230        Box::new(self)
231    }
232}
233
234impl<'h, F: FnMut(&mut Comment<'_>) -> HandlerResult + Send + 'h>
235    IntoHandler<CommentHandlerSend<'h>> for F
236{
237    fn into_handler(self) -> CommentHandlerSend<'h> {
238        Box::new(self)
239    }
240}
241
242impl<'h, F: FnMut(&mut TextChunk<'_>) -> HandlerResult + Send + 'h> IntoHandler<TextHandlerSend<'h>>
243    for F
244{
245    fn into_handler(self) -> TextHandlerSend<'h> {
246        Box::new(self)
247    }
248}
249
250impl<'h, H: HandlerTypes, F: FnMut(&mut Element<'_, '_, H>) -> HandlerResult + Send + 'h>
251    IntoHandler<ElementHandlerSend<'h, H>> for F
252{
253    fn into_handler(self) -> ElementHandlerSend<'h, H> {
254        Box::new(self)
255    }
256}
257
258impl<'h, F: FnOnce(&mut EndTag<'_>) -> HandlerResult + Send + 'h> IntoHandler<EndTagHandlerSend<'h>>
259    for F
260{
261    fn into_handler(self) -> EndTagHandlerSend<'h> {
262        Box::new(self)
263    }
264}
265
266impl<'h, F: FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + Send + 'h>
267    IntoHandler<EndHandlerSend<'h>> for F
268{
269    fn into_handler(self) -> EndHandlerSend<'h> {
270        Box::new(self)
271    }
272}
273
274/// Specifies element content handlers associated with a selector.
275pub struct ElementContentHandlers<'h, H: HandlerTypes = LocalHandlerTypes> {
276    /// Element handler. See [`element!`](crate::element) and [`HandlerTypes::ElementHandler`].
277    pub element: Option<H::ElementHandler<'h>>,
278    /// Comment handler. See [`comments!`](crate::comments) and [`HandlerTypes::CommentHandler`].
279    pub comments: Option<H::CommentHandler<'h>>,
280    /// Text handler that receives fragments of text nodes. See [`TextChunk`], [`text!`](crate::text), and [`HandlerTypes::TextHandler`].
281    pub text: Option<H::TextHandler<'h>>,
282}
283
284impl<H: HandlerTypes> Default for ElementContentHandlers<'_, H> {
285    fn default() -> Self {
286        ElementContentHandlers {
287            element: None,
288            comments: None,
289            text: None,
290        }
291    }
292}
293
294impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> {
295    /// Sets a handler for elements matched by a selector.
296    #[inline]
297    #[must_use]
298    pub fn element(mut self, handler: impl IntoHandler<H::ElementHandler<'h>>) -> Self {
299        self.element = Some(handler.into_handler());
300
301        self
302    }
303
304    /// Sets a handler for HTML comments in the inner content of elements matched by a selector.
305    #[inline]
306    #[must_use]
307    pub fn comments(mut self, handler: impl IntoHandler<H::CommentHandler<'h>>) -> Self {
308        self.comments = Some(handler.into_handler());
309
310        self
311    }
312
313    /// Sets a handler for text chunks in the inner content of elements matched by a selector.
314    #[inline]
315    #[must_use]
316    pub fn text(mut self, handler: impl IntoHandler<H::TextHandler<'h>>) -> Self {
317        self.text = Some(handler.into_handler());
318
319        self
320    }
321}
322
323/// Specifies document-level content handlers.
324///
325/// Some content can't be captured by CSS selectors as it lays outside of content of any
326/// of the HTML elements. Document-level handlers allow capture such a content:
327///
328/// ```html
329/// <!doctype html>
330/// <!--
331///     I can't be captured with a selector, but I can be
332///     captured with a document-level comment handler
333/// -->
334/// <html>
335/// <!-- I can be captured with a selector -->
336/// </html>
337/// ```
338pub struct DocumentContentHandlers<'h, H: HandlerTypes = LocalHandlerTypes> {
339    /// Doctype handler. See [`doctype!`](crate::doctype) and [`HandlerTypes::DoctypeHandler`].
340    pub doctype: Option<H::DoctypeHandler<'h>>,
341    /// Comment handler. See [`doc_comments!`](crate::doc_comments) and [`HandlerTypes::CommentHandler`].
342    pub comments: Option<H::CommentHandler<'h>>,
343    /// Text handler that receives fragments of text nodes. See [`TextChunk`], [`doc_text!`](crate::doc_text), and [`HandlerTypes::TextHandler`].
344    pub text: Option<H::TextHandler<'h>>,
345    /// End handler. See [`HandlerTypes::EndHandler`].
346    pub end: Option<H::EndHandler<'h>>,
347}
348
349impl<H: HandlerTypes> Default for DocumentContentHandlers<'_, H> {
350    fn default() -> Self {
351        DocumentContentHandlers {
352            doctype: None,
353            comments: None,
354            text: None,
355            end: None,
356        }
357    }
358}
359
360impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> {
361    /// Sets a handler for the [document type declaration].
362    ///
363    /// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
364    #[inline]
365    #[must_use]
366    pub fn doctype(mut self, handler: impl IntoHandler<H::DoctypeHandler<'h>>) -> Self {
367        self.doctype = Some(handler.into_handler());
368
369        self
370    }
371
372    /// Sets a handler for all HTML comments present in the input HTML markup.
373    #[inline]
374    #[must_use]
375    pub fn comments(mut self, handler: impl IntoHandler<H::CommentHandler<'h>>) -> Self {
376        self.comments = Some(handler.into_handler());
377
378        self
379    }
380
381    /// Sets a handler for all text chunks present in the input HTML markup.
382    #[inline]
383    #[must_use]
384    pub fn text(mut self, handler: impl IntoHandler<H::TextHandler<'h>>) -> Self {
385        self.text = Some(handler.into_handler());
386
387        self
388    }
389
390    /// Sets a handler for the document end, which is called after the last chunk is processed.
391    #[inline]
392    #[must_use]
393    pub fn end(mut self, handler: impl IntoHandler<H::EndHandler<'h>>) -> Self {
394        self.end = Some(handler.into_handler());
395
396        self
397    }
398}
399
400#[doc(hidden)]
401#[macro_export]
402macro_rules! __element_content_handler {
403    ($selector:expr, $handler_name:ident, $handler:expr) => {
404        (
405            ::std::borrow::Cow::Owned($selector.parse::<$crate::Selector>().unwrap()),
406            $crate::ElementContentHandlers::default().$handler_name($handler),
407        )
408    };
409}
410
411/// A convenience macro to construct a [rewriting handler](ElementContentHandlers) for elements that can be matched by the
412/// specified CSS selector.
413///
414/// # Example
415/// ```
416/// use lol_html::{rewrite_str, element, RewriteStrSettings};
417/// use lol_html::html_content::ContentType;
418///
419/// let html = rewrite_str(
420///     r#"<span id="foo"></span>"#,
421///     RewriteStrSettings {
422///         element_content_handlers: vec![
423///             element!("#foo", |el| {
424///                 el.set_inner_content("Hello!", ContentType::Text);
425///
426///                 Ok(())
427///             })
428///         ],
429///         ..RewriteStrSettings::new()
430///     }
431/// ).unwrap();
432///
433/// assert_eq!(html, r#"<span id="foo">Hello!</span>"#);
434/// ```
435///
436/// When using [sendable handlers](crate::send), beware that the [`Element`] type has a generic argument that controls `Send` compatibility.
437/// Use [`send::Element`](crate::send::Element) or write the closure's argument's type as `&mut Element<'_, '_, _>`.
438///
439/// This macro can create either sendable or non-sendable handlers, but not both in a generic context.
440/// `H: HandlerTypes` bound won't work with this macro.
441#[macro_export(local_inner_macros)]
442macro_rules! element {
443    ($selector:expr, $handler:expr) => {{
444        // Without this rust won't be able to always infer the type of the handler.
445        #[inline(always)]
446        const fn type_hint<'h, T, H: $crate::HandlerTypes>(h: T) -> T
447        where
448            T: FnMut(&mut $crate::html_content::Element<'_, '_, H>) -> $crate::HandlerResult + 'h,
449        {
450            h
451        }
452
453        __element_content_handler!($selector, element, type_hint($handler))
454    }};
455}
456
457/// A convenience macro to construct a [rewriting handler](ElementContentHandlers) for fragments of text in the inner content of an
458/// element that can be matched by the specified CSS selector. Beware: this is tricky to use.
459///
460/// The text chunks may split the text nodes into smaller fragments. See [`TextChunk`] for more info.
461///
462/// # Example
463/// ```
464/// use lol_html::{rewrite_str, text, RewriteStrSettings};
465/// use lol_html::html_content::ContentType;
466///
467/// let html = rewrite_str(
468///     r#"<span>Hello</span>"#,
469///     RewriteStrSettings {
470///         element_content_handlers: vec![
471///             text!("span", |t| {
472///                 if t.last_in_text_node() {
473///                     t.after(" world", ContentType::Text);
474///                 }
475///
476///                 Ok(())
477///             })
478///         ],
479///         ..RewriteStrSettings::new()
480///     }
481/// ).unwrap();
482///
483/// assert_eq!(html, r#"<span>Hello world</span>"#);
484/// ```
485///
486/// This macro can create either [sendable](crate::send) or non-sendable handlers, but not both in a generic context.
487/// `H: HandlerTypes` bound won't work with this macro.
488#[macro_export(local_inner_macros)]
489macro_rules! text {
490    ($selector:expr, $handler:expr) => {{
491        // Without this rust won't be able to always infer the type of the handler.
492        #[inline(always)]
493        fn type_hint<T>(h: T) -> T
494        where
495            T: FnMut(&mut $crate::html_content::TextChunk) -> $crate::HandlerResult,
496        {
497            h
498        }
499
500        __element_content_handler!($selector, text, type_hint($handler))
501    }};
502}
503
504/// A convenience macro to construct a [rewriting handler](ElementContentHandlers) for HTML comments in the inner content of
505/// an element that can be matched by the specified CSS selector.
506///
507/// # Example
508/// ```
509/// use lol_html::{rewrite_str, comments, RewriteStrSettings};
510/// use lol_html::html_content::ContentType;
511///
512/// let html = rewrite_str(
513///     r#"<span><!-- 42 --></span>"#,
514///     RewriteStrSettings {
515///         element_content_handlers: vec![
516///             comments!("span", |c| {
517///                 c.set_text("Hello!").unwrap();
518///
519///                 Ok(())
520///             })
521///         ],
522///         ..RewriteStrSettings::new()
523///     }
524/// ).unwrap();
525///
526/// assert_eq!(html, r#"<span><!--Hello!--></span>"#);
527/// ```
528///
529/// This macro can create either [sendable](crate::send) or non-sendable handlers, but not both in a generic context.
530/// `H: HandlerTypes` bound won't work with this macro.
531#[macro_export(local_inner_macros)]
532macro_rules! comments {
533    ($selector:expr, $handler:expr) => {{
534        // Without this rust won't be able to always infer the type of the handler.
535        #[inline(always)]
536        const fn type_hint<T>(h: T) -> T
537        where
538            T: FnMut(&mut $crate::html_content::Comment<'_>) -> $crate::HandlerResult,
539        {
540            h
541        }
542
543        __element_content_handler!($selector, comments, type_hint($handler))
544    }};
545}
546
547/// A convenience macro to construct a [`StreamingHandler`](crate::html_content::StreamingHandler) from a closure.
548///
549/// For use with [`Element::streaming_replace`], etc.
550///
551/// The closure must be `'static` (can't capture by a temporary reference), and `Send`, even when using [non-sendable](crate::send) rewriter.
552///
553/// ```rust
554/// use lol_html::{element, streaming, RewriteStrSettings};
555/// use lol_html::html_content::ContentType;
556///
557/// RewriteStrSettings {
558///     element_content_handlers: vec![
559///         element!("div", |element| {
560///             element.streaming_replace(streaming!(|sink| {
561///                 sink.write_str("…", ContentType::Html);
562///                 sink.write_str("…", ContentType::Html);
563///                 Ok(())
564///             }));
565///             Ok(())
566///         })
567///     ],
568///     ..RewriteStrSettings::default()
569/// };
570/// ```
571///
572/// Note: if you get "implementation of `FnOnce` is not general enough" error, add explicit argument
573/// `sink: &mut StreamingHandlerSink<'_>` to the closure.
574#[macro_export(local_inner_macros)]
575macro_rules! streaming {
576    ($closure:expr) => {{
577        use ::std::error::Error;
578        use $crate::html_content::StreamingHandlerSink;
579        // Without this rust won't be able to always infer the type of the handler.
580        #[inline(always)]
581        const fn streaming_macro_type_hint<StreamingHandler>(
582            handler_closure: StreamingHandler,
583        ) -> StreamingHandler
584        where
585            StreamingHandler:
586                FnOnce(&mut StreamingHandlerSink<'_>) -> Result<(), Box<dyn Error + Send + Sync>> + 'static,
587        {
588            handler_closure
589        }
590
591        Box::new(streaming_macro_type_hint($closure))
592            as Box<dyn $crate::html_content::StreamingHandler + Send>
593    }};
594}
595
596#[doc(hidden)]
597#[macro_export]
598macro_rules! __document_content_handler {
599    ($handler_name:ident, $handler:expr) => {
600        $crate::DocumentContentHandlers::default().$handler_name($handler)
601    };
602}
603
604/// A convenience macro to construct a [handler](DocumentContentHandlers) for [document type declarations] in the HTML document.
605///
606/// # Example
607/// ```
608/// use lol_html::{rewrite_str, doctype, RewriteStrSettings};
609/// use lol_html::html_content::ContentType;
610///
611/// rewrite_str(
612///     r#"<!doctype html>"#,
613///     RewriteStrSettings {
614///         document_content_handlers: vec![
615///             doctype!(|d| {
616///                 assert_eq!(d.name().unwrap(), "html");
617///
618///                 Ok(())
619///             })
620///         ],
621///         ..RewriteStrSettings::new()
622///     }
623/// ).unwrap();
624/// ```
625///
626/// [document type declarations]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
627#[macro_export(local_inner_macros)]
628macro_rules! doctype {
629    ($handler:expr) => {{
630        // Without this rust won't be able to always infer the type of the handler.
631        #[inline(always)]
632        const fn type_hint<T>(h: T) -> T
633        where
634            T: FnMut(&mut $crate::html_content::Doctype<'_>) -> $crate::HandlerResult,
635        {
636            h
637        }
638
639        __document_content_handler!(doctype, type_hint($handler))
640    }};
641}
642
643/// A convenience macro to construct a [rewriting handler](DocumentContentHandlers) for all text chunks in the HTML document. Beware: this is tricky to use.
644///
645/// The text chunks may split the text nodes into smaller fragments. See [`TextChunk`] for more info.
646///
647/// # Example
648/// ```
649/// use lol_html::{rewrite_str, doc_text, RewriteStrSettings};
650/// use lol_html::html_content::ContentType;
651///
652/// let html = rewrite_str(
653///     r#"Hello<span>Hello</span>Hello"#,
654///     RewriteStrSettings {
655///         document_content_handlers: vec![
656///             doc_text!(|t| {
657///                 if t.last_in_text_node() {
658///                     t.after(" world", ContentType::Text);
659///                 }
660///
661///                 Ok(())
662///             })
663///         ],
664///         ..RewriteStrSettings::new()
665///     }
666/// ).unwrap();
667///
668/// assert_eq!(html, r#"Hello world<span>Hello world</span>Hello world"#);
669/// ```
670#[macro_export(local_inner_macros)]
671macro_rules! doc_text {
672    ($handler:expr) => {{
673        // Without this rust won't be able to always infer the type of the handler.
674        #[inline(always)]
675        const fn type_hint<T>(h: T) -> T
676        where
677            T: FnMut(&mut $crate::html_content::TextChunk<'_>) -> $crate::HandlerResult,
678        {
679            h
680        }
681
682        __document_content_handler!(text, type_hint($handler))
683    }};
684}
685
686/// A convenience macro to construct a [rewriting handler](DocumentContentHandlers) for all HTML comments in the HTML document.
687///
688/// # Example
689/// ```
690/// use lol_html::{rewrite_str, doc_comments, RewriteStrSettings};
691/// use lol_html::html_content::ContentType;
692///
693/// let html = rewrite_str(
694///     r#"<!-- 42 --><span><!-- 42 --></span><!-- 42 -->"#,
695///     RewriteStrSettings {
696///         document_content_handlers: vec![
697///             doc_comments!(|c| {
698///                 c.set_text("Hello!").unwrap();
699///
700///                 Ok(())
701///             })
702///         ],
703///         ..RewriteStrSettings::new()
704///     }
705/// ).unwrap();
706///
707/// assert_eq!(html, r#"<!--Hello!--><span><!--Hello!--></span><!--Hello!-->"#);
708/// ```
709#[macro_export(local_inner_macros)]
710macro_rules! doc_comments {
711    ($handler:expr) => {{
712        // Without this rust won't be able to always infer the type of the handler.
713        #[inline(always)]
714        const fn type_hint<T>(h: T) -> T
715        where
716            T: FnMut(&mut $crate::html_content::Comment<'_>) -> $crate::HandlerResult,
717        {
718            h
719        }
720
721        __document_content_handler!(comments, type_hint($handler))
722    }};
723}
724
725/// A convenience macro to construct a [rewriting handler](DocumentContentHandlers) for the end of the document.
726///
727/// This handler will only be called after the rewriter has finished processing the final chunk.
728///
729/// # Example
730/// ```
731/// use lol_html::{rewrite_str, element, end, RewriteStrSettings};
732/// use lol_html::html_content::ContentType;
733///
734/// let html = rewrite_str(
735///     r#"<span>foo</span>"#,
736///     RewriteStrSettings {
737///         element_content_handlers: vec![
738///             element!("span", |el| {
739///                 el.append("bar", ContentType::Text);
740///
741///                 Ok(())
742///             })
743///         ],
744///         document_content_handlers: vec![
745///             end!(|end| {
746///                 end.append("<div>baz</div>", ContentType::Html);
747///
748///                 Ok(())
749///             })
750///         ],
751///         ..RewriteStrSettings::new()
752///     }
753/// ).unwrap();
754///
755/// assert_eq!(html, r#"<span>foobar</span><div>baz</div>"#);
756/// ```
757#[macro_export(local_inner_macros)]
758macro_rules! end {
759    ($handler:expr) => {{
760        // Without this rust won't be able to always infer the type of the handler.
761        #[inline(always)]
762        const fn type_hint<T>(h: T) -> T
763        where
764            T: FnOnce(&mut $crate::html_content::DocumentEnd<'_>) -> $crate::HandlerResult,
765        {
766            h
767        }
768
769        __document_content_handler!(end, type_hint($handler))
770    }};
771}
772
773/// Specifies the memory settings for [`HtmlRewriter`].
774///
775/// [`HtmlRewriter`]: struct.HtmlRewriter.html
776// NOTE: exposed in C API as well, thus repr(C).
777#[repr(C)]
778pub struct MemorySettings {
779    /// Specifies the number of bytes that should be preallocated on [`HtmlRewriter`] instantiation
780    /// for the internal parsing buffer.
781    ///
782    /// In some cases (e.g. when rewriter encounters a start tag represented by two or more input
783    /// chunks) the rewriter needs to buffer input content.
784    ///
785    /// Internal parsing buffer is used in such cases. Reallocations and, thus, performance
786    /// degradation can be avoided by preallocating the buffer ahead of time. As a drawback of
787    /// this approach, every instance of the rewriter will consume the preallocated amount of
788    /// memory.
789    ///
790    /// It's up to the user to adjust the limit according to their environment limitations.
791    ///
792    /// ### Default
793    ///
794    /// `1024` bytes when constructed with `MemorySettings::new()`.
795    ///
796    /// [`HtmlRewriter`]: struct.HtmlRewriter.html
797    pub preallocated_parsing_buffer_size: usize,
798
799    /// Sets a hard limit in bytes on memory consumption of a [`HtmlRewriter`] instance.
800    ///
801    /// Rewriter's [`write`] and [`end`] methods will error if this limit is exceeded.
802    ///
803    /// Note, that value doesn't reflect the exact threshold after which the rewriter will bailout.
804    /// It is impossible to account for all the memory consumed without a significant performance
805    /// penalty. So, instead, we try to provide the best approximation by measuring the memory
806    /// consumed by internal buffers that grow depending on the input.
807    ///
808    /// ### Default
809    ///
810    /// [`std::usize::MAX`] when constructed with `MemorySettings::new()`.
811    ///
812    /// [`HtmlRewriter`]: struct.HtmlRewriter.html
813    /// [`std::usize::MAX`]: https://doc.rust-lang.org/std/usize/constant.MAX.html
814    /// [`write`]: struct.HtmlRewriter.html#method.write
815    /// [`end`]: struct.HtmlRewriter.html#method.end
816    pub max_allowed_memory_usage: usize,
817}
818
819impl Default for MemorySettings {
820    #[inline]
821    fn default() -> Self {
822        Self {
823            preallocated_parsing_buffer_size: 1024,
824            max_allowed_memory_usage: usize::MAX,
825        }
826    }
827}
828
829impl MemorySettings {
830    /// Create a new [`MemorySettings`] with default values.
831    #[must_use]
832    pub fn new() -> Self {
833        Self::default()
834    }
835}
836
837/// Specifies settings for [`HtmlRewriter`].
838///
839/// [`HtmlRewriter`]: struct.HtmlRewriter.html
840pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> {
841    /// Specifies CSS selectors and rewriting handlers for elements and their inner content.
842    ///
843    /// ### Hint
844    ///
845    /// [`element`], [`comments`] and [`text`] convenience macros can be used to construct a
846    /// `(Selector, ElementContentHandlers)` tuple.
847    ///
848    /// ### Example
849    /// ```
850    /// use std::borrow::Cow;
851    /// use lol_html::{ElementContentHandlers, Settings};
852    /// use lol_html::html_content::{Comment, Element};
853    ///
854    /// let settings = Settings {
855    ///     element_content_handlers: vec! [
856    ///         (
857    ///             Cow::Owned("div[foo]".parse().unwrap()),
858    ///             ElementContentHandlers::default().element(|el: &mut Element| {
859    ///                 // ...
860    ///
861    ///                 Ok(())
862    ///             })
863    ///         ),
864    ///         (
865    ///             Cow::Owned("body".parse().unwrap()),
866    ///             ElementContentHandlers::default().comments(|c: &mut Comment| {
867    ///                 // ...
868    ///
869    ///                 Ok(())
870    ///             })
871    ///         )
872    ///     ],
873    ///     ..Settings::new()
874    /// };
875    /// ```
876    ///
877    /// [`element`]: macro.element.html
878    /// [`comments`]: macro.comments.html
879    /// [`text`]: macro.text.html
880    pub element_content_handlers: Vec<(
881        Cow<'selectors, Selector>,
882        ElementContentHandlers<'handlers, H>,
883    )>,
884
885    /// Specifies rewriting handlers for the content without associating it to a particular
886    /// CSS selector.
887    ///
888    /// Refer to [`DocumentContentHandlers`] documentation for more information.
889    ///
890    /// ### Hint
891    /// [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros can be used to construct
892    /// items of this vector.
893    ///
894    /// [`DocumentContentHandlers`]: struct.DocumentContentHandlers.html
895    /// [`doctype`]: macro.doctype.html
896    /// [`doc_comments`]: macro.doc_comments.html
897    /// [`doc_text`]: macro.doc_text.html
898    pub document_content_handlers: Vec<DocumentContentHandlers<'handlers, H>>,
899
900    /// Specifies the [character encoding] for the input and the output of the rewriter.
901    ///
902    /// Can be a [label] for any of the web-compatible encodings with an exception for `UTF-16LE`,
903    /// `UTF-16BE`, `ISO-2022-JP` and `replacement` (these non-ASCII-compatible encodings
904    /// are not supported).
905    ///
906    /// [character encoding]: https://developer.mozilla.org/en-US/docs/Glossary/character_encoding
907    /// [label]: https://encoding.spec.whatwg.org/#names-and-labels
908    ///
909    /// ### Default
910    ///
911    /// `"utf-8"` when constructed with `Settings::new()`.
912    pub encoding: AsciiCompatibleEncoding,
913
914    /// Specifies the memory settings.
915    pub memory_settings: MemorySettings,
916
917    /// If set to `true` the rewriter bails out if it encounters markup that drives the HTML parser
918    /// into ambigious state.
919    ///
920    /// Since the rewriter operates on a token stream and doesn't have access to a full
921    /// DOM-tree, there are certain rare cases of non-conforming HTML markup which can't be
922    /// guaranteed to be parsed correctly without an ability to backtrace the tree.
923    ///
924    /// Therefore, due to security considerations, sometimes it's preferable to abort the
925    /// rewriting process in case of such uncertainty.
926    ///
927    /// One of the simplest examples of such markup is the following:
928    ///
929    /// ```html
930    /// ...
931    /// <select><xmp><script>"use strict";</script></select>
932    /// ...
933    /// ```
934    ///
935    /// The `<xmp>` element is not allowed inside the `<select>` element, so in a browser the start
936    /// tag for `<xmp>` will be ignored and following `<script>` element will be parsed and executed.
937    ///
938    /// On the other hand, the `<select>` element itself can be also ignored depending on the
939    /// context in which it was parsed. In this case, the `<xmp>` element will not be ignored
940    /// and the `<script>` element along with its content will be parsed as a simple text inside
941    /// it.
942    ///
943    /// So, in this case the parser needs an ability to backtrace the DOM-tree to figure out the
944    /// correct parsing context.
945    ///
946    /// ### Default
947    ///
948    /// `true` when constructed with `Settings::new()`.
949    pub strict: bool,
950
951    /// If enabled the rewriter enables support for [Edge Side Includes] tags, treating them as
952    /// [void elements] and allowing them to be replaced with desired content.
953    ///
954    /// [Edge Side Includes]: https://www.w3.org/TR/esi-lang/
955    /// [void elements]: https://developer.mozilla.org/en-US/docs/Glossary/Void_element
956    pub enable_esi_tags: bool,
957
958    /// If enabled the rewriter will dynamically change the charset when it encounters a `meta` tag
959    /// that specifies the charset.
960    ///
961    /// The charset can be modified by the `meta` tag with
962    ///
963    /// ```html
964    /// <meta charset="windows-1251">
965    /// ```
966    ///
967    /// or
968    ///
969    /// ```html
970    /// <meta http-equiv="content-type" content="text/html; charset=windows-1251">
971    /// ```
972    ///
973    /// Note that an explicit `charset` in the `Content-type` header should take precedence over
974    /// the `meta` tag, so only enable this if the content type does not explicitly specify a
975    /// charset.  For details check [this][html5encoding].
976    ///
977    /// [html5encoding]: https://blog.whatwg.org/the-road-to-html-5-character-encoding
978    ///
979    /// ### Default
980    ///
981    /// `false` when constructed with `Settings::new()`.
982    pub adjust_charset_on_meta_tag: bool,
983}
984
985impl Default for Settings<'_, '_, LocalHandlerTypes> {
986    #[inline]
987    fn default() -> Self {
988        Self::new()
989    }
990}
991
992impl Settings<'_, '_, LocalHandlerTypes> {
993    /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
994    #[inline]
995    #[must_use]
996    pub fn new() -> Self {
997        Self::new_for_handler_types()
998    }
999}
1000
1001impl Settings<'_, '_, SendHandlerTypes> {
1002    /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
1003    #[inline]
1004    #[must_use]
1005    pub fn new_send() -> Self {
1006        Self::new_for_handler_types()
1007    }
1008}
1009
1010impl<H: HandlerTypes> Settings<'_, '_, H> {
1011    /// Creates [`Settings`].
1012    #[inline]
1013    #[must_use]
1014    pub fn new_for_handler_types() -> Self {
1015        Settings {
1016            element_content_handlers: vec![],
1017            document_content_handlers: vec![],
1018            encoding: AsciiCompatibleEncoding(encoding_rs::UTF_8),
1019            memory_settings: MemorySettings::default(),
1020            strict: true,
1021            enable_esi_tags: false,
1022            adjust_charset_on_meta_tag: false,
1023        }
1024    }
1025}
1026
1027impl<'h, 's, H: HandlerTypes> From<RewriteStrSettings<'h, 's, H>> for Settings<'h, 's, H> {
1028    #[inline]
1029    fn from(settings: RewriteStrSettings<'h, 's, H>) -> Self {
1030        Settings {
1031            element_content_handlers: settings.element_content_handlers,
1032            document_content_handlers: settings.document_content_handlers,
1033            strict: settings.strict,
1034            enable_esi_tags: settings.enable_esi_tags,
1035            ..Settings::new_for_handler_types()
1036        }
1037    }
1038}
1039
1040/// Specifies settings for the [`rewrite_str`] function.
1041///
1042/// [`rewrite_str`]: fn.rewrite_str.html
1043pub struct RewriteStrSettings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> {
1044    /// Specifies CSS selectors and rewriting handlers for elements and their inner content.
1045    ///
1046    /// ### Hint
1047    ///
1048    /// [`element`], [`comments`] and [`text`] convenience macros can be used to construct a
1049    /// `(Selector, ElementContentHandlers)` tuple.
1050    ///
1051    /// ### Example
1052    /// ```
1053    /// use std::borrow::Cow;
1054    /// use lol_html::{ElementContentHandlers, RewriteStrSettings};
1055    /// use lol_html::html_content::{Comment, Element};
1056    ///
1057    /// let settings = RewriteStrSettings {
1058    ///     element_content_handlers: vec! [
1059    ///         (
1060    ///             Cow::Owned("div[foo]".parse().unwrap()),
1061    ///             ElementContentHandlers::default().element(|el: &mut Element| {
1062    ///                 // ...
1063    ///
1064    ///                 Ok(())
1065    ///             })
1066    ///         ),
1067    ///         (
1068    ///             Cow::Owned("div[foo]".parse().unwrap()),
1069    ///             ElementContentHandlers::default().comments(|c: &mut Comment| {
1070    ///                 // ...
1071    ///
1072    ///                 Ok(())
1073    ///             })
1074    ///         )
1075    ///     ],
1076    ///     ..RewriteStrSettings::new()
1077    /// };
1078    /// ```
1079    ///
1080    /// [`element`]: macro.element.html
1081    /// [`comments`]: macro.comments.html
1082    /// [`text`]: macro.text.html
1083    pub element_content_handlers: Vec<(
1084        Cow<'selectors, Selector>,
1085        ElementContentHandlers<'handlers, H>,
1086    )>,
1087
1088    /// Specifies rewriting handlers for the content without associating it to a particular
1089    /// CSS selector.
1090    ///
1091    /// Refer to [`DocumentContentHandlers`] documentation for more information.
1092    ///
1093    /// ### Hint
1094    /// [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros can be used to construct
1095    /// items of this vector.
1096    ///
1097    /// [`DocumentContentHandlers`]: struct.DocumentContentHandlers.html
1098    /// [`doctype`]: macro.doctype.html
1099    /// [`doc_comments`]: macro.doc_comments.html
1100    /// [`doc_text`]: macro.doc_text.html
1101    pub document_content_handlers: Vec<DocumentContentHandlers<'handlers, H>>,
1102
1103    /// If set to `true` the rewriter bails out if it encounters markup that drives the HTML parser
1104    /// into ambigious state.
1105    ///
1106    /// Since the rewriter operates on a token stream and doesn't have access to a full
1107    /// DOM-tree, there are certain rare cases of non-conforming HTML markup which can't be
1108    /// guaranteed to be parsed correctly without an ability to backtrace the tree.
1109    ///
1110    /// Therefore, due to security considerations, sometimes it's preferable to abort the
1111    /// rewriting process in case of such uncertainty.
1112    ///
1113    /// One of the simplest examples of such markup is the following:
1114    ///
1115    /// ```html
1116    /// ...
1117    /// <select><xmp><script>"use strict";</script></select>
1118    /// ...
1119    /// ```
1120    ///
1121    /// The `<xmp>` element is not allowed inside the `<select>` element, so in a browser the start
1122    /// tag for `<xmp>` will be ignored and following `<script>` element will be parsed and executed.
1123    ///
1124    /// On the other hand, the `<select>` element itself can be also ignored depending on the
1125    /// context in which it was parsed. In this case, the `<xmp>` element will not be ignored
1126    /// and the `<script>` element along with its content will be parsed as a simple text inside
1127    /// it.
1128    ///
1129    /// So, in this case the parser needs an ability to backtrace the DOM-tree to figure out the
1130    /// correct parsing context.
1131    ///
1132    /// ### Default
1133    ///
1134    /// `true` when constructed with `Settings::new()`.
1135    pub strict: bool,
1136
1137    /// If enabled the rewriter enables support for [Edge Side Includes] tags, treating them as
1138    /// [void elements] and allowing them to be replaced with desired content.
1139    ///
1140    /// [Edge Side Includes]: https://www.w3.org/TR/esi-lang/
1141    /// [void elements]: https://developer.mozilla.org/en-US/docs/Glossary/Void_element
1142    pub enable_esi_tags: bool,
1143}
1144
1145impl Default for RewriteStrSettings<'_, '_, LocalHandlerTypes> {
1146    #[inline]
1147    fn default() -> Self {
1148        Self::new()
1149    }
1150}
1151
1152impl RewriteStrSettings<'_, '_, LocalHandlerTypes> {
1153    /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
1154    #[inline]
1155    #[must_use]
1156    pub const fn new() -> Self {
1157        Self::new_for_handler_types()
1158    }
1159}
1160
1161impl RewriteStrSettings<'_, '_, SendHandlerTypes> {
1162    /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
1163    #[inline]
1164    #[must_use]
1165    pub const fn new_send() -> Self {
1166        Self::new_for_handler_types()
1167    }
1168}
1169
1170impl<H: HandlerTypes> RewriteStrSettings<'_, '_, H> {
1171    /// Creates [`RewriteStrSettings`].
1172    #[inline]
1173    #[must_use]
1174    pub const fn new_for_handler_types() -> Self {
1175        RewriteStrSettings {
1176            element_content_handlers: vec![],
1177            document_content_handlers: vec![],
1178            strict: true,
1179            enable_esi_tags: true,
1180        }
1181    }
1182}