css_inline/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(
3    clippy::pedantic,
4    clippy::doc_markdown,
5    clippy::redundant_closure,
6    clippy::explicit_iter_loop,
7    clippy::match_same_arms,
8    clippy::needless_borrow,
9    clippy::print_stdout,
10    clippy::arithmetic_side_effects,
11    clippy::cast_possible_truncation,
12    clippy::unwrap_used,
13    clippy::map_unwrap_or,
14    clippy::trivially_copy_pass_by_ref,
15    clippy::needless_pass_by_value,
16    missing_docs,
17    missing_debug_implementations,
18    trivial_casts,
19    trivial_numeric_casts,
20    unreachable_pub,
21    unused_extern_crates,
22    unused_import_braces,
23    unused_qualifications,
24    variant_size_differences,
25    rust_2018_idioms,
26    rust_2018_compatibility,
27    rust_2021_compatibility
28)]
29#![allow(clippy::module_name_repetitions)]
30pub mod error;
31mod hasher;
32mod html;
33mod parser;
34mod resolver;
35
36pub use error::InlineError;
37use indexmap::IndexMap;
38#[cfg(feature = "stylesheet-cache")]
39use lru::{DefaultHasher, LruCache};
40use selectors::context::SelectorCaches;
41use std::{borrow::Cow, fmt::Formatter, hash::BuildHasherDefault, io::Write, sync::Arc};
42
43use crate::html::ElementStyleMap;
44use hasher::BuildNoHashHasher;
45use html::{Document, InliningMode};
46pub use resolver::{DefaultStylesheetResolver, StylesheetResolver};
47pub use url::{ParseError, Url};
48
49/// An LRU Cache for external stylesheets.
50#[cfg(feature = "stylesheet-cache")]
51pub type StylesheetCache<S = DefaultHasher> = LruCache<String, String, S>;
52
53/// Configuration options for CSS inlining process.
54#[allow(clippy::struct_excessive_bools)]
55pub struct InlineOptions<'a> {
56    /// Whether to inline CSS from "style" tags.
57    ///
58    /// Sometimes HTML may include a lot of boilerplate styles, that are not applicable in every
59    /// scenario and it is useful to ignore them and use `extra_css` instead.
60    pub inline_style_tags: bool,
61    /// Keep "style" tags after inlining.
62    pub keep_style_tags: bool,
63    /// Keep "link" tags after inlining.
64    pub keep_link_tags: bool,
65    /// Keep "at-rules" after inlining.
66    pub keep_at_rules: bool,
67    /// Used for loading external stylesheets via relative URLs.
68    pub base_url: Option<Url>,
69    /// Whether remote stylesheets should be loaded or not.
70    pub load_remote_stylesheets: bool,
71    /// External stylesheet cache.
72    #[cfg(feature = "stylesheet-cache")]
73    pub cache: Option<std::sync::Mutex<StylesheetCache>>,
74    // The point of using `Cow` here is Python bindings, where it is problematic to pass a reference
75    // without dealing with memory leaks & unsafe. With `Cow` we can use moved values as `String` in
76    // Python wrapper for `CSSInliner` and `&str` in Rust & simple functions on the Python side
77    /// Additional CSS to inline.
78    pub extra_css: Option<Cow<'a, str>>,
79    /// Pre-allocate capacity for HTML nodes during parsing.
80    /// It can improve performance when you have an estimate of the number of nodes in your HTML document.
81    pub preallocate_node_capacity: usize,
82    /// A way to resolve stylesheets from various sources.
83    pub resolver: Arc<dyn StylesheetResolver>,
84}
85
86impl std::fmt::Debug for InlineOptions<'_> {
87    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
88        let mut debug = f.debug_struct("InlineOptions");
89        debug
90            .field("inline_style_tags", &self.inline_style_tags)
91            .field("keep_style_tags", &self.keep_style_tags)
92            .field("keep_link_tags", &self.keep_link_tags)
93            .field("base_url", &self.base_url)
94            .field("load_remote_stylesheets", &self.load_remote_stylesheets);
95        #[cfg(feature = "stylesheet-cache")]
96        {
97            debug.field("cache", &self.cache);
98        }
99        debug
100            .field("extra_css", &self.extra_css)
101            .field("preallocate_node_capacity", &self.preallocate_node_capacity)
102            .finish_non_exhaustive()
103    }
104}
105
106impl<'a> InlineOptions<'a> {
107    /// Override whether "style" tags should be inlined.
108    #[must_use]
109    pub fn inline_style_tags(mut self, inline_style_tags: bool) -> Self {
110        self.inline_style_tags = inline_style_tags;
111        self
112    }
113
114    /// Override whether "style" tags should be kept after processing.
115    #[must_use]
116    pub fn keep_style_tags(mut self, keep_style_tags: bool) -> Self {
117        self.keep_style_tags = keep_style_tags;
118        self
119    }
120
121    /// Override whether "link" tags should be kept after processing.
122    #[must_use]
123    pub fn keep_link_tags(mut self, keep_link_tags: bool) -> Self {
124        self.keep_link_tags = keep_link_tags;
125        self
126    }
127
128    /// Override whether "at-rules" should be kept after processing.
129    #[must_use]
130    pub fn keep_at_rules(mut self, keep_at_rules: bool) -> Self {
131        self.keep_at_rules = keep_at_rules;
132        self
133    }
134
135    /// Set base URL that will be used for loading external stylesheets via relative URLs.
136    #[must_use]
137    pub fn base_url(mut self, base_url: Option<Url>) -> Self {
138        self.base_url = base_url;
139        self
140    }
141
142    /// Override whether remote stylesheets should be loaded.
143    #[must_use]
144    pub fn load_remote_stylesheets(mut self, load_remote_stylesheets: bool) -> Self {
145        self.load_remote_stylesheets = load_remote_stylesheets;
146        self
147    }
148
149    /// Set external stylesheet cache.
150    #[must_use]
151    #[cfg(feature = "stylesheet-cache")]
152    pub fn cache(mut self, cache: impl Into<Option<StylesheetCache>>) -> Self {
153        if let Some(cache) = cache.into() {
154            self.cache = Some(std::sync::Mutex::new(cache));
155        } else {
156            self.cache = None;
157        }
158        self
159    }
160
161    /// Set additional CSS to inline.
162    #[must_use]
163    pub fn extra_css(mut self, extra_css: Option<Cow<'a, str>>) -> Self {
164        self.extra_css = extra_css;
165        self
166    }
167
168    /// Set the initial node capacity for HTML tree.
169    #[must_use]
170    pub fn preallocate_node_capacity(mut self, preallocate_node_capacity: usize) -> Self {
171        self.preallocate_node_capacity = preallocate_node_capacity;
172        self
173    }
174
175    /// Set the way to resolve stylesheets from various sources.
176    #[must_use]
177    pub fn resolver(mut self, resolver: Arc<dyn StylesheetResolver>) -> Self {
178        self.resolver = resolver;
179        self
180    }
181
182    /// Create a new `CSSInliner` instance from this options.
183    #[must_use]
184    pub const fn build(self) -> CSSInliner<'a> {
185        CSSInliner::new(self)
186    }
187}
188
189impl Default for InlineOptions<'_> {
190    #[inline]
191    fn default() -> Self {
192        InlineOptions {
193            inline_style_tags: true,
194            keep_style_tags: false,
195            keep_link_tags: false,
196            keep_at_rules: false,
197            base_url: None,
198            load_remote_stylesheets: true,
199            #[cfg(feature = "stylesheet-cache")]
200            cache: None,
201            extra_css: None,
202            preallocate_node_capacity: 32,
203            resolver: Arc::new(DefaultStylesheetResolver),
204        }
205    }
206}
207
208/// A specialized `Result` type for CSS inlining operations.
209pub type Result<T> = std::result::Result<T, InlineError>;
210
211/// Customizable CSS inliner.
212#[derive(Debug)]
213pub struct CSSInliner<'a> {
214    options: InlineOptions<'a>,
215}
216
217const GROWTH_COEFFICIENT: f64 = 1.5;
218// A rough coefficient to calculate the number of individual declarations based on the total CSS size.
219const DECLARATION_SIZE_COEFFICIENT: f64 = 30.0;
220
221fn allocate_output_buffer(html: &str) -> Vec<u8> {
222    // Allocating more memory than the input HTML, as the inlined version is usually bigger
223    #[allow(
224        clippy::cast_precision_loss,
225        clippy::cast_sign_loss,
226        clippy::cast_possible_truncation
227    )]
228    Vec::with_capacity(
229        (html.len() as f64 * GROWTH_COEFFICIENT)
230            .min(usize::MAX as f64)
231            .round() as usize,
232    )
233}
234
235impl<'a> CSSInliner<'a> {
236    /// Create a new `CSSInliner` instance with given options.
237    #[must_use]
238    #[inline]
239    pub const fn new(options: InlineOptions<'a>) -> Self {
240        CSSInliner { options }
241    }
242
243    /// Return a default `InlineOptions` that can fully configure the CSS inliner.
244    ///
245    /// # Examples
246    ///
247    /// Get default `InlineOptions`, then change base url
248    ///
249    /// ```rust
250    /// use css_inline::{CSSInliner, Url};
251    /// # use url::ParseError;
252    /// # fn run() -> Result<(), ParseError> {
253    /// let url = Url::parse("https://api.example.com")?;
254    /// let inliner = CSSInliner::options()
255    ///     .base_url(Some(url))
256    ///     .build();
257    /// # Ok(())
258    /// # }
259    /// # run().unwrap();
260    /// ```
261    #[must_use]
262    #[inline]
263    pub fn options() -> InlineOptions<'a> {
264        InlineOptions::default()
265    }
266
267    /// Inline CSS styles from <style> tags to matching elements in the HTML tree and return a
268    /// string.
269    ///
270    /// # Errors
271    ///
272    /// Inlining might fail for the following reasons:
273    ///   - Missing stylesheet file;
274    ///   - Remote stylesheet is not available;
275    ///   - IO errors;
276    ///   - Internal CSS selector parsing error;
277    ///
278    /// # Panics
279    ///
280    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
281    /// using the same inliner panicked while resolving external stylesheets.
282    #[inline]
283    pub fn inline(&self, html: &str) -> Result<String> {
284        let mut out = allocate_output_buffer(html);
285        self.inline_to(html, &mut out)?;
286        Ok(String::from_utf8_lossy(&out).to_string())
287    }
288
289    /// Inline CSS & write the result to a generic writer. Use it if you want to write
290    /// the inlined document to a file.
291    ///
292    /// # Errors
293    ///
294    /// Inlining might fail for the following reasons:
295    ///   - Missing stylesheet file;
296    ///   - Remote stylesheet is not available;
297    ///   - IO errors;
298    ///   - Internal CSS selector parsing error;
299    ///
300    /// # Panics
301    ///
302    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
303    /// using the same inliner panicked while resolving external stylesheets.
304    #[inline]
305    pub fn inline_to<W: Write>(&self, html: &str, target: &mut W) -> Result<()> {
306        self.inline_to_impl(html, None, target, InliningMode::Document)
307    }
308
309    /// Inline CSS into an HTML fragment.
310    ///
311    /// # Errors
312    ///
313    /// Inlining might fail for the following reasons:
314    ///   - Missing stylesheet file;
315    ///   - Remote stylesheet is not available;
316    ///   - IO errors;
317    ///   - Internal CSS selector parsing error;
318    ///
319    /// # Panics
320    ///
321    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
322    /// using the same inliner panicked while resolving external stylesheets.
323    pub fn inline_fragment(&self, html: &str, css: &str) -> Result<String> {
324        let mut out = allocate_output_buffer(html);
325        self.inline_fragment_to(html, css, &mut out)?;
326        Ok(String::from_utf8_lossy(&out).to_string())
327    }
328
329    /// Inline CSS into an HTML fragment and write the result to a generic writer.
330    ///
331    /// # Errors
332    ///
333    /// Inlining might fail for the following reasons:
334    ///   - Missing stylesheet file;
335    ///   - Remote stylesheet is not available;
336    ///   - IO errors;
337    ///   - Internal CSS selector parsing error;
338    ///
339    /// # Panics
340    ///
341    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
342    /// using the same inliner panicked while resolving external stylesheets.
343    pub fn inline_fragment_to<W: Write>(
344        &self,
345        html: &str,
346        css: &str,
347        target: &mut W,
348    ) -> Result<()> {
349        self.inline_to_impl(html, Some(css), target, InliningMode::Fragment)
350    }
351
352    #[allow(clippy::too_many_lines)]
353    fn inline_to_impl<W: Write>(
354        &self,
355        html: &str,
356        css: Option<&str>,
357        target: &mut W,
358        mode: InliningMode,
359    ) -> Result<()> {
360        let document = Document::parse_with_options(
361            html.as_bytes(),
362            self.options.preallocate_node_capacity,
363            mode,
364        );
365        // CSS rules may overlap, and the final set of rules applied to an element depend on
366        // selectors' specificity - selectors with higher specificity have more priority.
367        // Inlining happens in two major steps:
368        //   1. All available styles are mapped to respective elements together with their
369        //      selector's specificity. When two rules overlap on the same declaration, then
370        //      the one with higher specificity replaces another.
371        //   2. Resulting styles are merged into existing "style" tags.
372        let mut size_estimate: usize = if self.options.inline_style_tags {
373            document
374                .styles()
375                .map(|s| {
376                    // Add 1 to account for the extra `\n` char we add between styles
377                    s.len().saturating_add(1)
378                })
379                .sum()
380        } else {
381            0
382        };
383        if let Some(extra_css) = &self.options.extra_css {
384            size_estimate = size_estimate.saturating_add(extra_css.len());
385        }
386        if let Some(css) = css {
387            size_estimate = size_estimate.saturating_add(css.len());
388        }
389        let mut raw_styles = String::with_capacity(size_estimate);
390        if self.options.inline_style_tags || self.options.keep_at_rules {
391            for style in document.styles() {
392                raw_styles.push_str(style);
393                raw_styles.push('\n');
394            }
395        }
396        if self.options.load_remote_stylesheets {
397            let mut links = document.stylesheets().collect::<Vec<&str>>();
398            links.sort_unstable();
399            links.dedup();
400            for href in &links {
401                let url = self.get_full_url(href);
402                #[cfg(feature = "stylesheet-cache")]
403                if let Some(lock) = self.options.cache.as_ref() {
404                    let mut cache = lock.lock().expect("Cache lock is poisoned");
405                    if let Some(cached) = cache.get(url.as_ref()) {
406                        raw_styles.push_str(cached);
407                        raw_styles.push('\n');
408                        continue;
409                    }
410                }
411
412                let css = self.options.resolver.retrieve(url.as_ref())?;
413                raw_styles.push_str(&css);
414                raw_styles.push('\n');
415
416                #[cfg(feature = "stylesheet-cache")]
417                if let Some(lock) = self.options.cache.as_ref() {
418                    let mut cache = lock.lock().expect("Cache lock is poisoned");
419                    cache.put(url.into_owned(), css);
420                }
421            }
422        }
423        if let Some(extra_css) = &self.options.extra_css {
424            raw_styles.push_str(extra_css);
425        }
426        if let Some(css) = css {
427            raw_styles.push_str(css);
428        }
429        let mut parse_input = cssparser::ParserInput::new(&raw_styles);
430        let mut parser = cssparser::Parser::new(&mut parse_input);
431        // Allocating some memory for all the parsed declarations
432        #[allow(
433            clippy::cast_precision_loss,
434            clippy::cast_sign_loss,
435            clippy::cast_possible_truncation
436        )]
437        let mut declarations = Vec::with_capacity(
438            ((raw_styles.len() as f64 / DECLARATION_SIZE_COEFFICIENT)
439                .min(usize::MAX as f64)
440                .round() as usize)
441                .max(16),
442        );
443        let mut rule_list = Vec::with_capacity(declarations.capacity() / 3);
444        let at_rules = if self.options.keep_at_rules {
445            let mut at_rules = String::new();
446            for rule in cssparser::StyleSheetParser::new(
447                &mut parser,
448                &mut parser::AtRuleFilteringParser::new(&mut declarations, &mut at_rules),
449            )
450            .flatten()
451            {
452                if self.options.inline_style_tags {
453                    rule_list.push(rule);
454                }
455            }
456            Some(at_rules)
457        } else if !raw_styles.is_empty() {
458            // At this point, we collected some styles from at least one source, hence we need to process it.
459            for rule in cssparser::StyleSheetParser::new(
460                &mut parser,
461                &mut parser::CSSRuleListParser::new(&mut declarations),
462            )
463            .flatten()
464            {
465                rule_list.push(rule);
466            }
467            None
468        } else {
469            None
470        };
471        let mut styles = IndexMap::with_capacity_and_hasher(
472            document.elements.len().max(16),
473            BuildNoHashHasher::default(),
474        );
475        // This cache is unused but required in the `selectors` API
476        let mut caches = SelectorCaches::default();
477        for (selectors, (start, end)) in &rule_list {
478            // Only CSS Syntax Level 3 is supported, therefore it is OK to split by `,`
479            // With `is` or `where` selectors (Level 4) this split should be done on the parser level
480            for selector in selectors.split(',') {
481                if let Ok(matching_elements) = document.select(selector, &mut caches) {
482                    let specificity = matching_elements.specificity();
483                    for matching_element in matching_elements {
484                        let element_styles =
485                            styles.entry(matching_element.node_id).or_insert_with(|| {
486                                ElementStyleMap::with_capacity_and_hasher(
487                                    end.saturating_sub(*start).saturating_add(4),
488                                    BuildHasherDefault::default(),
489                                )
490                            });
491                        // Iterate over pairs of property name & value
492                        // Example: `padding`, `0`
493                        for (name, value) in &declarations[*start..*end] {
494                            match element_styles.entry(name.as_ref()) {
495                                indexmap::map::Entry::Occupied(mut entry) => {
496                                    match (
497                                        value.trim_end().ends_with("!important"),
498                                        entry.get().1.trim_end().ends_with("!important"),
499                                    ) {
500                                        // Equal importance; the higher specificity wins.
501                                        (false, false) | (true, true) => {
502                                            if entry.get().0 <= specificity {
503                                                entry.insert((specificity, *value));
504                                            }
505                                        }
506                                        // Only the new value is important; it wins.
507                                        (true, false) => {
508                                            entry.insert((specificity, *value));
509                                        }
510                                        // The old value is important and the new one is not; keep
511                                        // the old value.
512                                        (false, true) => {}
513                                    }
514                                }
515                                indexmap::map::Entry::Vacant(entry) => {
516                                    entry.insert((specificity, *value));
517                                }
518                            }
519                        }
520                    }
521                }
522                // Ignore not parsable selectors. E.g. there is no parser for @media queries
523                // Which means that they will fall into this category and will be ignored
524            }
525        }
526        document.serialize(
527            target,
528            styles,
529            self.options.keep_style_tags,
530            self.options.keep_link_tags,
531            at_rules.as_ref(),
532            mode,
533        )?;
534        Ok(())
535    }
536
537    fn get_full_url<'u>(&self, href: &'u str) -> Cow<'u, str> {
538        // Valid absolute URL
539        if Url::parse(href).is_ok() {
540            return Cow::Borrowed(href);
541        }
542        if let Some(base_url) = &self.options.base_url {
543            // Use the same scheme as the base URL
544            if href.starts_with("//") {
545                return Cow::Owned(format!("{}:{}", base_url.scheme(), href));
546            }
547            // Not a URL, then it is a relative URL
548            if let Ok(new_url) = base_url.join(href) {
549                return Cow::Owned(new_url.into());
550            }
551        }
552        // If it is not a valid URL and there is no base URL specified, we assume a local path
553        Cow::Borrowed(href)
554    }
555}
556
557impl Default for CSSInliner<'_> {
558    #[inline]
559    fn default() -> Self {
560        CSSInliner::new(InlineOptions::default())
561    }
562}
563
564/// Shortcut for inlining CSS with default parameters.
565///
566/// # Errors
567///
568/// Inlining might fail for the following reasons:
569///   - Missing stylesheet file;
570///   - Remote stylesheet is not available;
571///   - IO errors;
572///   - Internal CSS selector parsing error;
573///
574/// # Panics
575///
576/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
577/// using the same inliner panicked while resolving external stylesheets.
578#[inline]
579pub fn inline(html: &str) -> Result<String> {
580    CSSInliner::default().inline(html)
581}
582
583/// Shortcut for inlining CSS with default parameters and writing the output to a generic writer.
584///
585/// # Errors
586///
587/// Inlining might fail for the following reasons:
588///   - Missing stylesheet file;
589///   - Remote stylesheet is not available;
590///   - IO errors;
591///   - Internal CSS selector parsing error;
592///
593/// # Panics
594///
595/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
596/// using the same inliner panicked while resolving external stylesheets.
597#[inline]
598pub fn inline_to<W: Write>(html: &str, target: &mut W) -> Result<()> {
599    CSSInliner::default().inline_to(html, target)
600}
601
602/// Shortcut for inlining CSS into an HTML fragment with default parameters.
603///
604/// # Errors
605///
606/// Inlining might fail for the following reasons:
607///   - Missing stylesheet file;
608///   - Remote stylesheet is not available;
609///   - IO errors;
610///   - Internal CSS selector parsing error;
611///
612/// # Panics
613///
614/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
615/// using the same inliner panicked while resolving external stylesheets.
616#[inline]
617pub fn inline_fragment(html: &str, css: &str) -> Result<String> {
618    CSSInliner::default().inline_fragment(html, css)
619}
620
621/// Shortcut for inlining CSS into an HTML fragment with default parameters and writing the output to a generic writer.
622///
623/// # Errors
624///
625/// Inlining might fail for the following reasons:
626///   - Missing stylesheet file;
627///   - Remote stylesheet is not available;
628///   - IO errors;
629///   - Internal CSS selector parsing error;
630///
631/// # Panics
632///
633/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
634/// using the same inliner panicked while resolving external stylesheets.
635#[inline]
636pub fn inline_fragment_to<W: Write>(html: &str, css: &str, target: &mut W) -> Result<()> {
637    CSSInliner::default().inline_fragment_to(html, css, target)
638}
639
640#[cfg(test)]
641mod tests {
642    use crate::{CSSInliner, InlineOptions};
643
644    #[test]
645    fn test_inliner_sync_send() {
646        fn assert_send<T: Send + Sync>() {}
647        assert_send::<CSSInliner<'_>>();
648        assert_send::<InlineOptions<'_>>();
649    }
650}