lol_html/rewriter/settings.rs
1use crate::rewritable_units::{Comment, Doctype, DocumentEnd, Element, EndTag, TextChunk};
2use crate::selectors_vm::Selector;
3// N.B. `use crate::` will break this because the constructor is not public, only the struct itself
4use super::AsciiCompatibleEncoding;
5use std::borrow::Cow;
6use std::error::Error;
7
8/// Trait used to parameterize the type of handlers used in the rewriter.
9///
10/// This trait is meant to be an implementation detail for the [`Send`-compatible type aliases](crate::send).
11/// We don't recommend writing code generic over [`HandlerTypes`], because it makes working with closures much more difficult.
12///
13/// Many types like [`Element`] and [`ElementHandler`] have a hidden generic type that defaults to `LocalHandlerTypes`.
14/// If you need to use `Send`-able handlers, remove the default type by replacing it with `_`, e.g. `Element<'_, '_, _>`.
15#[diagnostic::on_unimplemented(
16 note = "If `{Self}` is a generic type, add `{Self}: HandlerTypes` trait bound, otherwise replace `{Self}` with `LocalHandlerTypes`",
17 note = "The concrete type of `{Self}` can only be either `LocalHandlerTypes` to allow non-`Send` closures in content handlers, or `SendHandlerTypes` to require `Send` closures"
18)]
19pub trait HandlerTypes: Sized {
20 /// Handler type for [`Doctype`].
21 type DoctypeHandler<'handler>: FnMut(&mut Doctype<'_>) -> HandlerResult + 'handler;
22 /// Handler type for [`Comment`].
23 ///
24 /// The entire content of the comment will be buffered.
25 type CommentHandler<'handler>: FnMut(&mut Comment<'_>) -> HandlerResult + 'handler;
26 /// Handler type for [`TextChunk`] fragments. Beware: this is tricky to use.
27 ///
28 /// The text chunks are **not** text DOM nodes. They are fragments of text nodes, split at arbitrary points.
29 ///
30 /// See [`TextChunk`] documentation for more info. See also [`TextChunk::last_in_text_node()`].
31 type TextHandler<'handler>: FnMut(&mut TextChunk<'_>) -> HandlerResult + 'handler;
32 /// Handler type for [`Element`].
33 type ElementHandler<'handler>: FnMut(&mut Element<'_, '_, Self>) -> HandlerResult + 'handler;
34 /// Handler type for [`EndTag`].
35 type EndTagHandler<'handler>: FnOnce(&mut EndTag<'_>) -> HandlerResult + 'handler;
36 /// Handler type for [`DocumentEnd`].
37 type EndHandler<'handler>: FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + 'handler;
38
39 // Inside the HTML rewriter we need to create handlers, and they need to be the most constrained
40 // possible version of a handler (i.e. if we have `Send` and non-`Send` handlers we need to
41 // create a `Send` handler to make it compatible with both classes of handlers), so that's
42 // what we offer below.
43 //
44 // Note that in the HTML rewriter all we have is an abstract `H` that implements `HandlerTypes`.
45 // Therefore, there is no direct way of create a handler that is compatible with *all* possible
46 // implementations of `HandlerTypes`, so each implementation of `HandlerTypes` needs to provide
47 // a way to create a handler compatible with itself.
48
49 #[doc(hidden)]
50 fn new_end_tag_handler<'handler>(
51 handler: impl IntoHandler<EndTagHandlerSend<'handler>>,
52 ) -> Self::EndTagHandler<'handler>;
53
54 #[doc(hidden)]
55 fn new_element_handler<'handler>(
56 handler: impl IntoHandler<ElementHandlerSend<'handler, Self>>,
57 ) -> Self::ElementHandler<'handler>;
58
59 /// Creates a handler by running multiple handlers in sequence.
60 #[doc(hidden)]
61 fn combine_handlers(handlers: Vec<Self::EndTagHandler<'_>>) -> Self::EndTagHandler<'_>;
62}
63
64/// Handler type for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
65pub struct LocalHandlerTypes {}
66
67impl HandlerTypes for LocalHandlerTypes {
68 type DoctypeHandler<'h> = DoctypeHandler<'h>;
69 type CommentHandler<'h> = CommentHandler<'h>;
70 type TextHandler<'h> = TextHandler<'h>;
71 type ElementHandler<'h> = ElementHandler<'h>;
72 type EndTagHandler<'h> = EndTagHandler<'h>;
73 type EndHandler<'h> = EndHandler<'h>;
74
75 fn new_end_tag_handler<'h>(
76 handler: impl IntoHandler<EndTagHandlerSend<'h>>,
77 ) -> Self::EndTagHandler<'h> {
78 handler.into_handler()
79 }
80
81 fn new_element_handler<'h>(
82 handler: impl IntoHandler<ElementHandlerSend<'h, Self>>,
83 ) -> Self::ElementHandler<'h> {
84 handler.into_handler()
85 }
86
87 fn combine_handlers(handlers: Vec<Self::EndTagHandler<'_>>) -> Self::EndTagHandler<'_> {
88 Box::new(move |end_tag: &mut EndTag<'_>| {
89 for handler in handlers {
90 handler(end_tag)?;
91 }
92
93 Ok(())
94 })
95 }
96}
97
98/// Marker type for sendable handlers. Use aliases from the [`send`](crate::send) module.
99#[doc(hidden)]
100pub struct SendHandlerTypes {}
101
102impl HandlerTypes for SendHandlerTypes {
103 type DoctypeHandler<'h> = DoctypeHandlerSend<'h>;
104 type CommentHandler<'h> = CommentHandlerSend<'h>;
105 type TextHandler<'h> = TextHandlerSend<'h>;
106 type ElementHandler<'h> = ElementHandlerSend<'h, Self>;
107 type EndTagHandler<'h> = EndTagHandlerSend<'h>;
108 type EndHandler<'h> = EndHandlerSend<'h>;
109
110 fn new_end_tag_handler<'h>(
111 handler: impl IntoHandler<Self::EndTagHandler<'h>>,
112 ) -> Self::EndTagHandler<'h> {
113 handler.into_handler()
114 }
115
116 fn new_element_handler<'h>(
117 handler: impl IntoHandler<Self::ElementHandler<'h>>,
118 ) -> Self::ElementHandler<'h> {
119 handler.into_handler()
120 }
121
122 fn combine_handlers(handlers: Vec<Self::EndTagHandler<'_>>) -> Self::EndTagHandler<'_> {
123 Box::new(move |end_tag: &mut EndTag<'_>| {
124 for handler in handlers {
125 handler(end_tag)?;
126 }
127
128 Ok(())
129 })
130 }
131}
132
133/// The result of a handler.
134pub type HandlerResult = Result<(), Box<dyn Error + Send + Sync + 'static>>;
135
136/// Boxed closure for handling the [document type declaration].
137///
138/// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
139pub type DoctypeHandler<'h> = Box<dyn FnMut(&mut Doctype<'_>) -> HandlerResult + 'h>;
140/// Boxed closure for handling HTML comments.
141pub type CommentHandler<'h> = Box<dyn FnMut(&mut Comment<'_>) -> HandlerResult + 'h>;
142/// Boxed closure for handling text chunks present the HTML.
143pub type TextHandler<'h> = Box<dyn FnMut(&mut TextChunk<'_>) -> HandlerResult + 'h>;
144/// Boxed closure for handling elements matched by a selector.
145pub type ElementHandler<'h, H = LocalHandlerTypes> =
146 Box<dyn FnMut(&mut Element<'_, '_, H>) -> HandlerResult + 'h>;
147/// Boxed closure for handling end tags.
148pub type EndTagHandler<'h> = Box<dyn FnOnce(&mut EndTag<'_>) -> HandlerResult + 'h>;
149/// Boxed closure for handling the document end. This is called after the last chunk is processed.
150pub type EndHandler<'h> = Box<dyn FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + 'h>;
151
152/// [Sendable](crate::send) boxed closure for handling the [document type declaration].
153///
154/// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
155///
156/// See also non-sendable [`DoctypeHandler`](crate::DoctypeHandler).
157pub type DoctypeHandlerSend<'h> = Box<dyn FnMut(&mut Doctype<'_>) -> HandlerResult + Send + 'h>;
158/// [Sendable](crate::send) boxed closure for handling HTML comments.
159///
160/// See also non-sendable [`CommentHandler`](crate::CommentHandler).
161pub type CommentHandlerSend<'h> = Box<dyn FnMut(&mut Comment<'_>) -> HandlerResult + Send + 'h>;
162/// [Sendable](crate::send) boxed closure for handling text chunks](TextChunk) present the HTML.
163///
164/// See also non-sendable [`TextHandler`](crate::TextHandler).
165pub type TextHandlerSend<'h> = Box<dyn FnMut(&mut TextChunk<'_>) -> HandlerResult + Send + 'h>;
166/// [Sendable](crate::send) boxed closure for handling elements matched by a selector.
167pub type ElementHandlerSend<'h, H = SendHandlerTypes> =
168 Box<dyn FnMut(&mut Element<'_, '_, H>) -> HandlerResult + Send + 'h>;
169/// [Sendable](crate::send) boxed closure for handling end tags.
170///
171/// See also non-sendable [`EndTagHandler`](crate::EndTagHandler).
172pub type EndTagHandlerSend<'h> = Box<dyn FnOnce(&mut EndTag<'_>) -> HandlerResult + Send + 'h>;
173/// [Sendable](crate::send) boxed closure for handling the document end. This is called after the last chunk is processed.
174///
175/// See also non-sendable [`EndHandler`](crate::EndHandler).
176pub type EndHandlerSend<'h> = Box<dyn FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + Send + 'h>;
177
178/// Trait that allows closures to be used as handlers
179#[diagnostic::on_unimplemented(
180 message = "Handler could not be made from `{Self}`\nThe internal `IntoHandler` trait is implemented for closures like `FnMut(&mut _) -> HandlerResult` and `FnOnce(&mut _) -> HandlerResult`, with `+ Send` if needed",
181 note = "Ensure that the closure's arguments are correct (add explicit parameter types if needed) and that it implements `Send` if using `Send`-able handlers"
182)]
183#[doc(hidden)]
184pub trait IntoHandler<T: Sized> {
185 fn into_handler(self) -> T;
186}
187
188impl<'h, F: FnMut(&mut Doctype<'_>) -> HandlerResult + 'h> IntoHandler<DoctypeHandler<'h>> for F {
189 fn into_handler(self) -> DoctypeHandler<'h> {
190 Box::new(self)
191 }
192}
193
194impl<'h, F: FnMut(&mut Comment<'_>) -> HandlerResult + 'h> IntoHandler<CommentHandler<'h>> for F {
195 fn into_handler(self) -> CommentHandler<'h> {
196 Box::new(self)
197 }
198}
199
200impl<'h, F: FnMut(&mut TextChunk<'_>) -> HandlerResult + 'h> IntoHandler<TextHandler<'h>> for F {
201 fn into_handler(self) -> TextHandler<'h> {
202 Box::new(self)
203 }
204}
205
206impl<'h, F: FnMut(&mut Element<'_, '_, LocalHandlerTypes>) -> HandlerResult + 'h>
207 IntoHandler<ElementHandler<'h>> for F
208{
209 fn into_handler(self) -> ElementHandler<'h> {
210 Box::new(self)
211 }
212}
213
214impl<'h, F: FnOnce(&mut EndTag<'_>) -> HandlerResult + 'h> IntoHandler<EndTagHandler<'h>> for F {
215 fn into_handler(self) -> EndTagHandler<'h> {
216 Box::new(self)
217 }
218}
219
220impl<'h, F: FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + 'h> IntoHandler<EndHandler<'h>> for F {
221 fn into_handler(self) -> EndHandler<'h> {
222 Box::new(self)
223 }
224}
225
226impl<'h, F: FnMut(&mut Doctype<'_>) -> HandlerResult + Send + 'h>
227 IntoHandler<DoctypeHandlerSend<'h>> for F
228{
229 fn into_handler(self) -> DoctypeHandlerSend<'h> {
230 Box::new(self)
231 }
232}
233
234impl<'h, F: FnMut(&mut Comment<'_>) -> HandlerResult + Send + 'h>
235 IntoHandler<CommentHandlerSend<'h>> for F
236{
237 fn into_handler(self) -> CommentHandlerSend<'h> {
238 Box::new(self)
239 }
240}
241
242impl<'h, F: FnMut(&mut TextChunk<'_>) -> HandlerResult + Send + 'h> IntoHandler<TextHandlerSend<'h>>
243 for F
244{
245 fn into_handler(self) -> TextHandlerSend<'h> {
246 Box::new(self)
247 }
248}
249
250impl<'h, H: HandlerTypes, F: FnMut(&mut Element<'_, '_, H>) -> HandlerResult + Send + 'h>
251 IntoHandler<ElementHandlerSend<'h, H>> for F
252{
253 fn into_handler(self) -> ElementHandlerSend<'h, H> {
254 Box::new(self)
255 }
256}
257
258impl<'h, F: FnOnce(&mut EndTag<'_>) -> HandlerResult + Send + 'h> IntoHandler<EndTagHandlerSend<'h>>
259 for F
260{
261 fn into_handler(self) -> EndTagHandlerSend<'h> {
262 Box::new(self)
263 }
264}
265
266impl<'h, F: FnOnce(&mut DocumentEnd<'_>) -> HandlerResult + Send + 'h>
267 IntoHandler<EndHandlerSend<'h>> for F
268{
269 fn into_handler(self) -> EndHandlerSend<'h> {
270 Box::new(self)
271 }
272}
273
274/// Specifies element content handlers associated with a selector.
275pub struct ElementContentHandlers<'h, H: HandlerTypes = LocalHandlerTypes> {
276 /// Element handler. See [`element!`](crate::element) and [`HandlerTypes::ElementHandler`].
277 pub element: Option<H::ElementHandler<'h>>,
278 /// Comment handler. See [`comments!`](crate::comments) and [`HandlerTypes::CommentHandler`].
279 pub comments: Option<H::CommentHandler<'h>>,
280 /// Text handler that receives fragments of text nodes. See [`TextChunk`], [`text!`](crate::text), and [`HandlerTypes::TextHandler`].
281 pub text: Option<H::TextHandler<'h>>,
282}
283
284impl<H: HandlerTypes> Default for ElementContentHandlers<'_, H> {
285 fn default() -> Self {
286 ElementContentHandlers {
287 element: None,
288 comments: None,
289 text: None,
290 }
291 }
292}
293
294impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> {
295 /// Sets a handler for elements matched by a selector.
296 #[inline]
297 #[must_use]
298 pub fn element(mut self, handler: impl IntoHandler<H::ElementHandler<'h>>) -> Self {
299 self.element = Some(handler.into_handler());
300
301 self
302 }
303
304 /// Sets a handler for HTML comments in the inner content of elements matched by a selector.
305 #[inline]
306 #[must_use]
307 pub fn comments(mut self, handler: impl IntoHandler<H::CommentHandler<'h>>) -> Self {
308 self.comments = Some(handler.into_handler());
309
310 self
311 }
312
313 /// Sets a handler for text chunks in the inner content of elements matched by a selector.
314 #[inline]
315 #[must_use]
316 pub fn text(mut self, handler: impl IntoHandler<H::TextHandler<'h>>) -> Self {
317 self.text = Some(handler.into_handler());
318
319 self
320 }
321}
322
323/// Specifies document-level content handlers.
324///
325/// Some content can't be captured by CSS selectors as it lays outside of content of any
326/// of the HTML elements. Document-level handlers allow capture such a content:
327///
328/// ```html
329/// <!doctype html>
330/// <!--
331/// I can't be captured with a selector, but I can be
332/// captured with a document-level comment handler
333/// -->
334/// <html>
335/// <!-- I can be captured with a selector -->
336/// </html>
337/// ```
338pub struct DocumentContentHandlers<'h, H: HandlerTypes = LocalHandlerTypes> {
339 /// Doctype handler. See [`doctype!`](crate::doctype) and [`HandlerTypes::DoctypeHandler`].
340 pub doctype: Option<H::DoctypeHandler<'h>>,
341 /// Comment handler. See [`doc_comments!`](crate::doc_comments) and [`HandlerTypes::CommentHandler`].
342 pub comments: Option<H::CommentHandler<'h>>,
343 /// Text handler that receives fragments of text nodes. See [`TextChunk`], [`doc_text!`](crate::doc_text), and [`HandlerTypes::TextHandler`].
344 pub text: Option<H::TextHandler<'h>>,
345 /// End handler. See [`HandlerTypes::EndHandler`].
346 pub end: Option<H::EndHandler<'h>>,
347}
348
349impl<H: HandlerTypes> Default for DocumentContentHandlers<'_, H> {
350 fn default() -> Self {
351 DocumentContentHandlers {
352 doctype: None,
353 comments: None,
354 text: None,
355 end: None,
356 }
357 }
358}
359
360impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> {
361 /// Sets a handler for the [document type declaration].
362 ///
363 /// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
364 #[inline]
365 #[must_use]
366 pub fn doctype(mut self, handler: impl IntoHandler<H::DoctypeHandler<'h>>) -> Self {
367 self.doctype = Some(handler.into_handler());
368
369 self
370 }
371
372 /// Sets a handler for all HTML comments present in the input HTML markup.
373 #[inline]
374 #[must_use]
375 pub fn comments(mut self, handler: impl IntoHandler<H::CommentHandler<'h>>) -> Self {
376 self.comments = Some(handler.into_handler());
377
378 self
379 }
380
381 /// Sets a handler for all text chunks present in the input HTML markup.
382 #[inline]
383 #[must_use]
384 pub fn text(mut self, handler: impl IntoHandler<H::TextHandler<'h>>) -> Self {
385 self.text = Some(handler.into_handler());
386
387 self
388 }
389
390 /// Sets a handler for the document end, which is called after the last chunk is processed.
391 #[inline]
392 #[must_use]
393 pub fn end(mut self, handler: impl IntoHandler<H::EndHandler<'h>>) -> Self {
394 self.end = Some(handler.into_handler());
395
396 self
397 }
398}
399
400#[doc(hidden)]
401#[macro_export]
402macro_rules! __element_content_handler {
403 ($selector:expr, $handler_name:ident, $handler:expr) => {
404 (
405 ::std::borrow::Cow::Owned($selector.parse::<$crate::Selector>().unwrap()),
406 $crate::ElementContentHandlers::default().$handler_name($handler),
407 )
408 };
409}
410
411/// A convenience macro to construct a [rewriting handler](ElementContentHandlers) for elements that can be matched by the
412/// specified CSS selector.
413///
414/// # Example
415/// ```
416/// use lol_html::{rewrite_str, element, RewriteStrSettings};
417/// use lol_html::html_content::ContentType;
418///
419/// let html = rewrite_str(
420/// r#"<span id="foo"></span>"#,
421/// RewriteStrSettings {
422/// element_content_handlers: vec![
423/// element!("#foo", |el| {
424/// el.set_inner_content("Hello!", ContentType::Text);
425///
426/// Ok(())
427/// })
428/// ],
429/// ..RewriteStrSettings::new()
430/// }
431/// ).unwrap();
432///
433/// assert_eq!(html, r#"<span id="foo">Hello!</span>"#);
434/// ```
435///
436/// When using [sendable handlers](crate::send), beware that the [`Element`] type has a generic argument that controls `Send` compatibility.
437/// Use [`send::Element`](crate::send::Element) or write the closure's argument's type as `&mut Element<'_, '_, _>`.
438///
439/// This macro can create either sendable or non-sendable handlers, but not both in a generic context.
440/// `H: HandlerTypes` bound won't work with this macro.
441#[macro_export(local_inner_macros)]
442macro_rules! element {
443 ($selector:expr, $handler:expr) => {{
444 // Without this rust won't be able to always infer the type of the handler.
445 #[inline(always)]
446 const fn type_hint<'h, T, H: $crate::HandlerTypes>(h: T) -> T
447 where
448 T: FnMut(&mut $crate::html_content::Element<'_, '_, H>) -> $crate::HandlerResult + 'h,
449 {
450 h
451 }
452
453 __element_content_handler!($selector, element, type_hint($handler))
454 }};
455}
456
457/// A convenience macro to construct a [rewriting handler](ElementContentHandlers) for fragments of text in the inner content of an
458/// element that can be matched by the specified CSS selector. Beware: this is tricky to use.
459///
460/// The text chunks may split the text nodes into smaller fragments. See [`TextChunk`] for more info.
461///
462/// # Example
463/// ```
464/// use lol_html::{rewrite_str, text, RewriteStrSettings};
465/// use lol_html::html_content::ContentType;
466///
467/// let html = rewrite_str(
468/// r#"<span>Hello</span>"#,
469/// RewriteStrSettings {
470/// element_content_handlers: vec![
471/// text!("span", |t| {
472/// if t.last_in_text_node() {
473/// t.after(" world", ContentType::Text);
474/// }
475///
476/// Ok(())
477/// })
478/// ],
479/// ..RewriteStrSettings::new()
480/// }
481/// ).unwrap();
482///
483/// assert_eq!(html, r#"<span>Hello world</span>"#);
484/// ```
485///
486/// This macro can create either [sendable](crate::send) or non-sendable handlers, but not both in a generic context.
487/// `H: HandlerTypes` bound won't work with this macro.
488#[macro_export(local_inner_macros)]
489macro_rules! text {
490 ($selector:expr, $handler:expr) => {{
491 // Without this rust won't be able to always infer the type of the handler.
492 #[inline(always)]
493 fn type_hint<T>(h: T) -> T
494 where
495 T: FnMut(&mut $crate::html_content::TextChunk) -> $crate::HandlerResult,
496 {
497 h
498 }
499
500 __element_content_handler!($selector, text, type_hint($handler))
501 }};
502}
503
504/// A convenience macro to construct a [rewriting handler](ElementContentHandlers) for HTML comments in the inner content of
505/// an element that can be matched by the specified CSS selector.
506///
507/// # Example
508/// ```
509/// use lol_html::{rewrite_str, comments, RewriteStrSettings};
510/// use lol_html::html_content::ContentType;
511///
512/// let html = rewrite_str(
513/// r#"<span><!-- 42 --></span>"#,
514/// RewriteStrSettings {
515/// element_content_handlers: vec![
516/// comments!("span", |c| {
517/// c.set_text("Hello!").unwrap();
518///
519/// Ok(())
520/// })
521/// ],
522/// ..RewriteStrSettings::new()
523/// }
524/// ).unwrap();
525///
526/// assert_eq!(html, r#"<span><!--Hello!--></span>"#);
527/// ```
528///
529/// This macro can create either [sendable](crate::send) or non-sendable handlers, but not both in a generic context.
530/// `H: HandlerTypes` bound won't work with this macro.
531#[macro_export(local_inner_macros)]
532macro_rules! comments {
533 ($selector:expr, $handler:expr) => {{
534 // Without this rust won't be able to always infer the type of the handler.
535 #[inline(always)]
536 const fn type_hint<T>(h: T) -> T
537 where
538 T: FnMut(&mut $crate::html_content::Comment<'_>) -> $crate::HandlerResult,
539 {
540 h
541 }
542
543 __element_content_handler!($selector, comments, type_hint($handler))
544 }};
545}
546
547/// A convenience macro to construct a [`StreamingHandler`](crate::html_content::StreamingHandler) from a closure.
548///
549/// For use with [`Element::streaming_replace`], etc.
550///
551/// The closure must be `'static` (can't capture by a temporary reference), and `Send`, even when using [non-sendable](crate::send) rewriter.
552///
553/// ```rust
554/// use lol_html::{element, streaming, RewriteStrSettings};
555/// use lol_html::html_content::ContentType;
556///
557/// RewriteStrSettings {
558/// element_content_handlers: vec![
559/// element!("div", |element| {
560/// element.streaming_replace(streaming!(|sink| {
561/// sink.write_str("…", ContentType::Html);
562/// sink.write_str("…", ContentType::Html);
563/// Ok(())
564/// }));
565/// Ok(())
566/// })
567/// ],
568/// ..RewriteStrSettings::default()
569/// };
570/// ```
571///
572/// Note: if you get "implementation of `FnOnce` is not general enough" error, add explicit argument
573/// `sink: &mut StreamingHandlerSink<'_>` to the closure.
574#[macro_export(local_inner_macros)]
575macro_rules! streaming {
576 ($closure:expr) => {{
577 use ::std::error::Error;
578 use $crate::html_content::StreamingHandlerSink;
579 // Without this rust won't be able to always infer the type of the handler.
580 #[inline(always)]
581 const fn streaming_macro_type_hint<StreamingHandler>(
582 handler_closure: StreamingHandler,
583 ) -> StreamingHandler
584 where
585 StreamingHandler:
586 FnOnce(&mut StreamingHandlerSink<'_>) -> Result<(), Box<dyn Error + Send + Sync>> + 'static,
587 {
588 handler_closure
589 }
590
591 Box::new(streaming_macro_type_hint($closure))
592 as Box<dyn $crate::html_content::StreamingHandler + Send>
593 }};
594}
595
596#[doc(hidden)]
597#[macro_export]
598macro_rules! __document_content_handler {
599 ($handler_name:ident, $handler:expr) => {
600 $crate::DocumentContentHandlers::default().$handler_name($handler)
601 };
602}
603
604/// A convenience macro to construct a [handler](DocumentContentHandlers) for [document type declarations] in the HTML document.
605///
606/// # Example
607/// ```
608/// use lol_html::{rewrite_str, doctype, RewriteStrSettings};
609/// use lol_html::html_content::ContentType;
610///
611/// rewrite_str(
612/// r#"<!doctype html>"#,
613/// RewriteStrSettings {
614/// document_content_handlers: vec![
615/// doctype!(|d| {
616/// assert_eq!(d.name().unwrap(), "html");
617///
618/// Ok(())
619/// })
620/// ],
621/// ..RewriteStrSettings::new()
622/// }
623/// ).unwrap();
624/// ```
625///
626/// [document type declarations]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype
627#[macro_export(local_inner_macros)]
628macro_rules! doctype {
629 ($handler:expr) => {{
630 // Without this rust won't be able to always infer the type of the handler.
631 #[inline(always)]
632 const fn type_hint<T>(h: T) -> T
633 where
634 T: FnMut(&mut $crate::html_content::Doctype<'_>) -> $crate::HandlerResult,
635 {
636 h
637 }
638
639 __document_content_handler!(doctype, type_hint($handler))
640 }};
641}
642
643/// A convenience macro to construct a [rewriting handler](DocumentContentHandlers) for all text chunks in the HTML document. Beware: this is tricky to use.
644///
645/// The text chunks may split the text nodes into smaller fragments. See [`TextChunk`] for more info.
646///
647/// # Example
648/// ```
649/// use lol_html::{rewrite_str, doc_text, RewriteStrSettings};
650/// use lol_html::html_content::ContentType;
651///
652/// let html = rewrite_str(
653/// r#"Hello<span>Hello</span>Hello"#,
654/// RewriteStrSettings {
655/// document_content_handlers: vec![
656/// doc_text!(|t| {
657/// if t.last_in_text_node() {
658/// t.after(" world", ContentType::Text);
659/// }
660///
661/// Ok(())
662/// })
663/// ],
664/// ..RewriteStrSettings::new()
665/// }
666/// ).unwrap();
667///
668/// assert_eq!(html, r#"Hello world<span>Hello world</span>Hello world"#);
669/// ```
670#[macro_export(local_inner_macros)]
671macro_rules! doc_text {
672 ($handler:expr) => {{
673 // Without this rust won't be able to always infer the type of the handler.
674 #[inline(always)]
675 const fn type_hint<T>(h: T) -> T
676 where
677 T: FnMut(&mut $crate::html_content::TextChunk<'_>) -> $crate::HandlerResult,
678 {
679 h
680 }
681
682 __document_content_handler!(text, type_hint($handler))
683 }};
684}
685
686/// A convenience macro to construct a [rewriting handler](DocumentContentHandlers) for all HTML comments in the HTML document.
687///
688/// # Example
689/// ```
690/// use lol_html::{rewrite_str, doc_comments, RewriteStrSettings};
691/// use lol_html::html_content::ContentType;
692///
693/// let html = rewrite_str(
694/// r#"<!-- 42 --><span><!-- 42 --></span><!-- 42 -->"#,
695/// RewriteStrSettings {
696/// document_content_handlers: vec![
697/// doc_comments!(|c| {
698/// c.set_text("Hello!").unwrap();
699///
700/// Ok(())
701/// })
702/// ],
703/// ..RewriteStrSettings::new()
704/// }
705/// ).unwrap();
706///
707/// assert_eq!(html, r#"<!--Hello!--><span><!--Hello!--></span><!--Hello!-->"#);
708/// ```
709#[macro_export(local_inner_macros)]
710macro_rules! doc_comments {
711 ($handler:expr) => {{
712 // Without this rust won't be able to always infer the type of the handler.
713 #[inline(always)]
714 const fn type_hint<T>(h: T) -> T
715 where
716 T: FnMut(&mut $crate::html_content::Comment<'_>) -> $crate::HandlerResult,
717 {
718 h
719 }
720
721 __document_content_handler!(comments, type_hint($handler))
722 }};
723}
724
725/// A convenience macro to construct a [rewriting handler](DocumentContentHandlers) for the end of the document.
726///
727/// This handler will only be called after the rewriter has finished processing the final chunk.
728///
729/// # Example
730/// ```
731/// use lol_html::{rewrite_str, element, end, RewriteStrSettings};
732/// use lol_html::html_content::ContentType;
733///
734/// let html = rewrite_str(
735/// r#"<span>foo</span>"#,
736/// RewriteStrSettings {
737/// element_content_handlers: vec![
738/// element!("span", |el| {
739/// el.append("bar", ContentType::Text);
740///
741/// Ok(())
742/// })
743/// ],
744/// document_content_handlers: vec![
745/// end!(|end| {
746/// end.append("<div>baz</div>", ContentType::Html);
747///
748/// Ok(())
749/// })
750/// ],
751/// ..RewriteStrSettings::new()
752/// }
753/// ).unwrap();
754///
755/// assert_eq!(html, r#"<span>foobar</span><div>baz</div>"#);
756/// ```
757#[macro_export(local_inner_macros)]
758macro_rules! end {
759 ($handler:expr) => {{
760 // Without this rust won't be able to always infer the type of the handler.
761 #[inline(always)]
762 const fn type_hint<T>(h: T) -> T
763 where
764 T: FnOnce(&mut $crate::html_content::DocumentEnd<'_>) -> $crate::HandlerResult,
765 {
766 h
767 }
768
769 __document_content_handler!(end, type_hint($handler))
770 }};
771}
772
773/// Specifies the memory settings for [`HtmlRewriter`].
774///
775/// [`HtmlRewriter`]: struct.HtmlRewriter.html
776// NOTE: exposed in C API as well, thus repr(C).
777#[repr(C)]
778pub struct MemorySettings {
779 /// Specifies the number of bytes that should be preallocated on [`HtmlRewriter`] instantiation
780 /// for the internal parsing buffer.
781 ///
782 /// In some cases (e.g. when rewriter encounters a start tag represented by two or more input
783 /// chunks) the rewriter needs to buffer input content.
784 ///
785 /// Internal parsing buffer is used in such cases. Reallocations and, thus, performance
786 /// degradation can be avoided by preallocating the buffer ahead of time. As a drawback of
787 /// this approach, every instance of the rewriter will consume the preallocated amount of
788 /// memory.
789 ///
790 /// It's up to the user to adjust the limit according to their environment limitations.
791 ///
792 /// ### Default
793 ///
794 /// `1024` bytes when constructed with `MemorySettings::new()`.
795 ///
796 /// [`HtmlRewriter`]: struct.HtmlRewriter.html
797 pub preallocated_parsing_buffer_size: usize,
798
799 /// Sets a hard limit in bytes on memory consumption of a [`HtmlRewriter`] instance.
800 ///
801 /// Rewriter's [`write`] and [`end`] methods will error if this limit is exceeded.
802 ///
803 /// Note, that value doesn't reflect the exact threshold after which the rewriter will bailout.
804 /// It is impossible to account for all the memory consumed without a significant performance
805 /// penalty. So, instead, we try to provide the best approximation by measuring the memory
806 /// consumed by internal buffers that grow depending on the input.
807 ///
808 /// ### Default
809 ///
810 /// [`std::usize::MAX`] when constructed with `MemorySettings::new()`.
811 ///
812 /// [`HtmlRewriter`]: struct.HtmlRewriter.html
813 /// [`std::usize::MAX`]: https://doc.rust-lang.org/std/usize/constant.MAX.html
814 /// [`write`]: struct.HtmlRewriter.html#method.write
815 /// [`end`]: struct.HtmlRewriter.html#method.end
816 pub max_allowed_memory_usage: usize,
817}
818
819impl Default for MemorySettings {
820 #[inline]
821 fn default() -> Self {
822 Self {
823 preallocated_parsing_buffer_size: 1024,
824 max_allowed_memory_usage: usize::MAX,
825 }
826 }
827}
828
829impl MemorySettings {
830 /// Create a new [`MemorySettings`] with default values.
831 #[must_use]
832 pub fn new() -> Self {
833 Self::default()
834 }
835}
836
837/// Specifies settings for [`HtmlRewriter`].
838///
839/// [`HtmlRewriter`]: struct.HtmlRewriter.html
840pub struct Settings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> {
841 /// Specifies CSS selectors and rewriting handlers for elements and their inner content.
842 ///
843 /// ### Hint
844 ///
845 /// [`element`], [`comments`] and [`text`] convenience macros can be used to construct a
846 /// `(Selector, ElementContentHandlers)` tuple.
847 ///
848 /// ### Example
849 /// ```
850 /// use std::borrow::Cow;
851 /// use lol_html::{ElementContentHandlers, Settings};
852 /// use lol_html::html_content::{Comment, Element};
853 ///
854 /// let settings = Settings {
855 /// element_content_handlers: vec! [
856 /// (
857 /// Cow::Owned("div[foo]".parse().unwrap()),
858 /// ElementContentHandlers::default().element(|el: &mut Element| {
859 /// // ...
860 ///
861 /// Ok(())
862 /// })
863 /// ),
864 /// (
865 /// Cow::Owned("body".parse().unwrap()),
866 /// ElementContentHandlers::default().comments(|c: &mut Comment| {
867 /// // ...
868 ///
869 /// Ok(())
870 /// })
871 /// )
872 /// ],
873 /// ..Settings::new()
874 /// };
875 /// ```
876 ///
877 /// [`element`]: macro.element.html
878 /// [`comments`]: macro.comments.html
879 /// [`text`]: macro.text.html
880 pub element_content_handlers: Vec<(
881 Cow<'selectors, Selector>,
882 ElementContentHandlers<'handlers, H>,
883 )>,
884
885 /// Specifies rewriting handlers for the content without associating it to a particular
886 /// CSS selector.
887 ///
888 /// Refer to [`DocumentContentHandlers`] documentation for more information.
889 ///
890 /// ### Hint
891 /// [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros can be used to construct
892 /// items of this vector.
893 ///
894 /// [`DocumentContentHandlers`]: struct.DocumentContentHandlers.html
895 /// [`doctype`]: macro.doctype.html
896 /// [`doc_comments`]: macro.doc_comments.html
897 /// [`doc_text`]: macro.doc_text.html
898 pub document_content_handlers: Vec<DocumentContentHandlers<'handlers, H>>,
899
900 /// Specifies the [character encoding] for the input and the output of the rewriter.
901 ///
902 /// Can be a [label] for any of the web-compatible encodings with an exception for `UTF-16LE`,
903 /// `UTF-16BE`, `ISO-2022-JP` and `replacement` (these non-ASCII-compatible encodings
904 /// are not supported).
905 ///
906 /// [character encoding]: https://developer.mozilla.org/en-US/docs/Glossary/character_encoding
907 /// [label]: https://encoding.spec.whatwg.org/#names-and-labels
908 ///
909 /// ### Default
910 ///
911 /// `"utf-8"` when constructed with `Settings::new()`.
912 pub encoding: AsciiCompatibleEncoding,
913
914 /// Specifies the memory settings.
915 pub memory_settings: MemorySettings,
916
917 /// If set to `true` the rewriter bails out if it encounters markup that drives the HTML parser
918 /// into ambigious state.
919 ///
920 /// Since the rewriter operates on a token stream and doesn't have access to a full
921 /// DOM-tree, there are certain rare cases of non-conforming HTML markup which can't be
922 /// guaranteed to be parsed correctly without an ability to backtrace the tree.
923 ///
924 /// Therefore, due to security considerations, sometimes it's preferable to abort the
925 /// rewriting process in case of such uncertainty.
926 ///
927 /// One of the simplest examples of such markup is the following:
928 ///
929 /// ```html
930 /// ...
931 /// <select><xmp><script>"use strict";</script></select>
932 /// ...
933 /// ```
934 ///
935 /// The `<xmp>` element is not allowed inside the `<select>` element, so in a browser the start
936 /// tag for `<xmp>` will be ignored and following `<script>` element will be parsed and executed.
937 ///
938 /// On the other hand, the `<select>` element itself can be also ignored depending on the
939 /// context in which it was parsed. In this case, the `<xmp>` element will not be ignored
940 /// and the `<script>` element along with its content will be parsed as a simple text inside
941 /// it.
942 ///
943 /// So, in this case the parser needs an ability to backtrace the DOM-tree to figure out the
944 /// correct parsing context.
945 ///
946 /// ### Default
947 ///
948 /// `true` when constructed with `Settings::new()`.
949 pub strict: bool,
950
951 /// If enabled the rewriter enables support for [Edge Side Includes] tags, treating them as
952 /// [void elements] and allowing them to be replaced with desired content.
953 ///
954 /// [Edge Side Includes]: https://www.w3.org/TR/esi-lang/
955 /// [void elements]: https://developer.mozilla.org/en-US/docs/Glossary/Void_element
956 pub enable_esi_tags: bool,
957
958 /// If enabled the rewriter will dynamically change the charset when it encounters a `meta` tag
959 /// that specifies the charset.
960 ///
961 /// The charset can be modified by the `meta` tag with
962 ///
963 /// ```html
964 /// <meta charset="windows-1251">
965 /// ```
966 ///
967 /// or
968 ///
969 /// ```html
970 /// <meta http-equiv="content-type" content="text/html; charset=windows-1251">
971 /// ```
972 ///
973 /// Note that an explicit `charset` in the `Content-type` header should take precedence over
974 /// the `meta` tag, so only enable this if the content type does not explicitly specify a
975 /// charset. For details check [this][html5encoding].
976 ///
977 /// [html5encoding]: https://blog.whatwg.org/the-road-to-html-5-character-encoding
978 ///
979 /// ### Default
980 ///
981 /// `false` when constructed with `Settings::new()`.
982 pub adjust_charset_on_meta_tag: bool,
983}
984
985impl Default for Settings<'_, '_, LocalHandlerTypes> {
986 #[inline]
987 fn default() -> Self {
988 Self::new()
989 }
990}
991
992impl Settings<'_, '_, LocalHandlerTypes> {
993 /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
994 #[inline]
995 #[must_use]
996 pub fn new() -> Self {
997 Self::new_for_handler_types()
998 }
999}
1000
1001impl Settings<'_, '_, SendHandlerTypes> {
1002 /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
1003 #[inline]
1004 #[must_use]
1005 pub fn new_send() -> Self {
1006 Self::new_for_handler_types()
1007 }
1008}
1009
1010impl<H: HandlerTypes> Settings<'_, '_, H> {
1011 /// Creates [`Settings`].
1012 #[inline]
1013 #[must_use]
1014 pub fn new_for_handler_types() -> Self {
1015 Settings {
1016 element_content_handlers: vec![],
1017 document_content_handlers: vec![],
1018 encoding: AsciiCompatibleEncoding(encoding_rs::UTF_8),
1019 memory_settings: MemorySettings::default(),
1020 strict: true,
1021 enable_esi_tags: false,
1022 adjust_charset_on_meta_tag: false,
1023 }
1024 }
1025}
1026
1027impl<'h, 's, H: HandlerTypes> From<RewriteStrSettings<'h, 's, H>> for Settings<'h, 's, H> {
1028 #[inline]
1029 fn from(settings: RewriteStrSettings<'h, 's, H>) -> Self {
1030 Settings {
1031 element_content_handlers: settings.element_content_handlers,
1032 document_content_handlers: settings.document_content_handlers,
1033 strict: settings.strict,
1034 enable_esi_tags: settings.enable_esi_tags,
1035 ..Settings::new_for_handler_types()
1036 }
1037 }
1038}
1039
1040/// Specifies settings for the [`rewrite_str`] function.
1041///
1042/// [`rewrite_str`]: fn.rewrite_str.html
1043pub struct RewriteStrSettings<'handlers, 'selectors, H: HandlerTypes = LocalHandlerTypes> {
1044 /// Specifies CSS selectors and rewriting handlers for elements and their inner content.
1045 ///
1046 /// ### Hint
1047 ///
1048 /// [`element`], [`comments`] and [`text`] convenience macros can be used to construct a
1049 /// `(Selector, ElementContentHandlers)` tuple.
1050 ///
1051 /// ### Example
1052 /// ```
1053 /// use std::borrow::Cow;
1054 /// use lol_html::{ElementContentHandlers, RewriteStrSettings};
1055 /// use lol_html::html_content::{Comment, Element};
1056 ///
1057 /// let settings = RewriteStrSettings {
1058 /// element_content_handlers: vec! [
1059 /// (
1060 /// Cow::Owned("div[foo]".parse().unwrap()),
1061 /// ElementContentHandlers::default().element(|el: &mut Element| {
1062 /// // ...
1063 ///
1064 /// Ok(())
1065 /// })
1066 /// ),
1067 /// (
1068 /// Cow::Owned("div[foo]".parse().unwrap()),
1069 /// ElementContentHandlers::default().comments(|c: &mut Comment| {
1070 /// // ...
1071 ///
1072 /// Ok(())
1073 /// })
1074 /// )
1075 /// ],
1076 /// ..RewriteStrSettings::new()
1077 /// };
1078 /// ```
1079 ///
1080 /// [`element`]: macro.element.html
1081 /// [`comments`]: macro.comments.html
1082 /// [`text`]: macro.text.html
1083 pub element_content_handlers: Vec<(
1084 Cow<'selectors, Selector>,
1085 ElementContentHandlers<'handlers, H>,
1086 )>,
1087
1088 /// Specifies rewriting handlers for the content without associating it to a particular
1089 /// CSS selector.
1090 ///
1091 /// Refer to [`DocumentContentHandlers`] documentation for more information.
1092 ///
1093 /// ### Hint
1094 /// [`doctype`], [`doc_comments`] and [`doc_text`] convenience macros can be used to construct
1095 /// items of this vector.
1096 ///
1097 /// [`DocumentContentHandlers`]: struct.DocumentContentHandlers.html
1098 /// [`doctype`]: macro.doctype.html
1099 /// [`doc_comments`]: macro.doc_comments.html
1100 /// [`doc_text`]: macro.doc_text.html
1101 pub document_content_handlers: Vec<DocumentContentHandlers<'handlers, H>>,
1102
1103 /// If set to `true` the rewriter bails out if it encounters markup that drives the HTML parser
1104 /// into ambigious state.
1105 ///
1106 /// Since the rewriter operates on a token stream and doesn't have access to a full
1107 /// DOM-tree, there are certain rare cases of non-conforming HTML markup which can't be
1108 /// guaranteed to be parsed correctly without an ability to backtrace the tree.
1109 ///
1110 /// Therefore, due to security considerations, sometimes it's preferable to abort the
1111 /// rewriting process in case of such uncertainty.
1112 ///
1113 /// One of the simplest examples of such markup is the following:
1114 ///
1115 /// ```html
1116 /// ...
1117 /// <select><xmp><script>"use strict";</script></select>
1118 /// ...
1119 /// ```
1120 ///
1121 /// The `<xmp>` element is not allowed inside the `<select>` element, so in a browser the start
1122 /// tag for `<xmp>` will be ignored and following `<script>` element will be parsed and executed.
1123 ///
1124 /// On the other hand, the `<select>` element itself can be also ignored depending on the
1125 /// context in which it was parsed. In this case, the `<xmp>` element will not be ignored
1126 /// and the `<script>` element along with its content will be parsed as a simple text inside
1127 /// it.
1128 ///
1129 /// So, in this case the parser needs an ability to backtrace the DOM-tree to figure out the
1130 /// correct parsing context.
1131 ///
1132 /// ### Default
1133 ///
1134 /// `true` when constructed with `Settings::new()`.
1135 pub strict: bool,
1136
1137 /// If enabled the rewriter enables support for [Edge Side Includes] tags, treating them as
1138 /// [void elements] and allowing them to be replaced with desired content.
1139 ///
1140 /// [Edge Side Includes]: https://www.w3.org/TR/esi-lang/
1141 /// [void elements]: https://developer.mozilla.org/en-US/docs/Glossary/Void_element
1142 pub enable_esi_tags: bool,
1143}
1144
1145impl Default for RewriteStrSettings<'_, '_, LocalHandlerTypes> {
1146 #[inline]
1147 fn default() -> Self {
1148 Self::new()
1149 }
1150}
1151
1152impl RewriteStrSettings<'_, '_, LocalHandlerTypes> {
1153 /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
1154 #[inline]
1155 #[must_use]
1156 pub const fn new() -> Self {
1157 Self::new_for_handler_types()
1158 }
1159}
1160
1161impl RewriteStrSettings<'_, '_, SendHandlerTypes> {
1162 /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
1163 #[inline]
1164 #[must_use]
1165 pub const fn new_send() -> Self {
1166 Self::new_for_handler_types()
1167 }
1168}
1169
1170impl<H: HandlerTypes> RewriteStrSettings<'_, '_, H> {
1171 /// Creates [`RewriteStrSettings`].
1172 #[inline]
1173 #[must_use]
1174 pub const fn new_for_handler_types() -> Self {
1175 RewriteStrSettings {
1176 element_content_handlers: vec![],
1177 document_content_handlers: vec![],
1178 strict: true,
1179 enable_esi_tags: true,
1180 }
1181 }
1182}