css_inline/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(
3    clippy::pedantic,
4    clippy::doc_markdown,
5    clippy::redundant_closure,
6    clippy::explicit_iter_loop,
7    clippy::match_same_arms,
8    clippy::needless_borrow,
9    clippy::print_stdout,
10    clippy::arithmetic_side_effects,
11    clippy::cast_possible_truncation,
12    clippy::unwrap_used,
13    clippy::map_unwrap_or,
14    clippy::trivially_copy_pass_by_ref,
15    clippy::needless_pass_by_value,
16    missing_docs,
17    missing_debug_implementations,
18    trivial_casts,
19    trivial_numeric_casts,
20    unreachable_pub,
21    unused_extern_crates,
22    unused_import_braces,
23    unused_qualifications,
24    variant_size_differences,
25    rust_2018_idioms,
26    rust_2018_compatibility,
27    rust_2021_compatibility
28)]
29#![allow(clippy::module_name_repetitions)]
30pub mod error;
31mod hasher;
32mod html;
33mod parser;
34mod resolver;
35
36pub use error::InlineError;
37use indexmap::IndexMap;
38#[cfg(feature = "stylesheet-cache")]
39use lru::{DefaultHasher, LruCache};
40use selectors::NthIndexCache;
41use std::{borrow::Cow, fmt::Formatter, hash::BuildHasherDefault, io::Write, sync::Arc};
42
43use crate::html::ElementStyleMap;
44use hasher::BuildNoHashHasher;
45use html::{Document, InliningMode};
46pub use resolver::{DefaultStylesheetResolver, StylesheetResolver};
47pub use url::{ParseError, Url};
48
49/// An LRU Cache for external stylesheets.
50#[cfg(feature = "stylesheet-cache")]
51pub type StylesheetCache<S = DefaultHasher> = LruCache<String, String, S>;
52
53/// Configuration options for CSS inlining process.
54#[allow(clippy::struct_excessive_bools)]
55pub struct InlineOptions<'a> {
56    /// Whether to inline CSS from "style" tags.
57    ///
58    /// Sometimes HTML may include a lot of boilerplate styles, that are not applicable in every
59    /// scenario and it is useful to ignore them and use `extra_css` instead.
60    pub inline_style_tags: bool,
61    /// Keep "style" tags after inlining.
62    pub keep_style_tags: bool,
63    /// Keep "link" tags after inlining.
64    pub keep_link_tags: bool,
65    /// Used for loading external stylesheets via relative URLs.
66    pub base_url: Option<Url>,
67    /// Whether remote stylesheets should be loaded or not.
68    pub load_remote_stylesheets: bool,
69    /// External stylesheet cache.
70    #[cfg(feature = "stylesheet-cache")]
71    pub cache: Option<std::sync::Mutex<StylesheetCache>>,
72    // The point of using `Cow` here is Python bindings, where it is problematic to pass a reference
73    // without dealing with memory leaks & unsafe. With `Cow` we can use moved values as `String` in
74    // Python wrapper for `CSSInliner` and `&str` in Rust & simple functions on the Python side
75    /// Additional CSS to inline.
76    pub extra_css: Option<Cow<'a, str>>,
77    /// Pre-allocate capacity for HTML nodes during parsing.
78    /// It can improve performance when you have an estimate of the number of nodes in your HTML document.
79    pub preallocate_node_capacity: usize,
80    /// A way to resolve stylesheets from various sources.
81    pub resolver: Arc<dyn StylesheetResolver>,
82}
83
84impl std::fmt::Debug for InlineOptions<'_> {
85    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
86        let mut debug = f.debug_struct("InlineOptions");
87        debug
88            .field("inline_style_tags", &self.inline_style_tags)
89            .field("keep_style_tags", &self.keep_style_tags)
90            .field("keep_link_tags", &self.keep_link_tags)
91            .field("base_url", &self.base_url)
92            .field("load_remote_stylesheets", &self.load_remote_stylesheets);
93        #[cfg(feature = "stylesheet-cache")]
94        {
95            debug.field("cache", &self.cache);
96        }
97        debug
98            .field("extra_css", &self.extra_css)
99            .field("preallocate_node_capacity", &self.preallocate_node_capacity)
100            .finish_non_exhaustive()
101    }
102}
103
104impl<'a> InlineOptions<'a> {
105    /// Override whether "style" tags should be inlined.
106    #[must_use]
107    pub fn inline_style_tags(mut self, inline_style_tags: bool) -> Self {
108        self.inline_style_tags = inline_style_tags;
109        self
110    }
111
112    /// Override whether "style" tags should be kept after processing.
113    #[must_use]
114    pub fn keep_style_tags(mut self, keep_style_tags: bool) -> Self {
115        self.keep_style_tags = keep_style_tags;
116        self
117    }
118
119    /// Override whether "link" tags should be kept after processing.
120    #[must_use]
121    pub fn keep_link_tags(mut self, keep_link_tags: bool) -> Self {
122        self.keep_link_tags = keep_link_tags;
123        self
124    }
125
126    /// Set base URL that will be used for loading external stylesheets via relative URLs.
127    #[must_use]
128    pub fn base_url(mut self, base_url: Option<Url>) -> Self {
129        self.base_url = base_url;
130        self
131    }
132
133    /// Override whether remote stylesheets should be loaded.
134    #[must_use]
135    pub fn load_remote_stylesheets(mut self, load_remote_stylesheets: bool) -> Self {
136        self.load_remote_stylesheets = load_remote_stylesheets;
137        self
138    }
139
140    /// Set external stylesheet cache.
141    #[must_use]
142    #[cfg(feature = "stylesheet-cache")]
143    pub fn cache(mut self, cache: impl Into<Option<StylesheetCache>>) -> Self {
144        if let Some(cache) = cache.into() {
145            self.cache = Some(std::sync::Mutex::new(cache));
146        } else {
147            self.cache = None;
148        }
149        self
150    }
151
152    /// Set additional CSS to inline.
153    #[must_use]
154    pub fn extra_css(mut self, extra_css: Option<Cow<'a, str>>) -> Self {
155        self.extra_css = extra_css;
156        self
157    }
158
159    /// Set the initial node capacity for HTML tree.
160    #[must_use]
161    pub fn preallocate_node_capacity(mut self, preallocate_node_capacity: usize) -> Self {
162        self.preallocate_node_capacity = preallocate_node_capacity;
163        self
164    }
165
166    /// Set the way to resolve stylesheets from various sources.
167    #[must_use]
168    pub fn resolver(mut self, resolver: Arc<dyn StylesheetResolver>) -> Self {
169        self.resolver = resolver;
170        self
171    }
172
173    /// Create a new `CSSInliner` instance from this options.
174    #[must_use]
175    pub const fn build(self) -> CSSInliner<'a> {
176        CSSInliner::new(self)
177    }
178}
179
180impl Default for InlineOptions<'_> {
181    #[inline]
182    fn default() -> Self {
183        InlineOptions {
184            inline_style_tags: true,
185            keep_style_tags: false,
186            keep_link_tags: false,
187            base_url: None,
188            load_remote_stylesheets: true,
189            #[cfg(feature = "stylesheet-cache")]
190            cache: None,
191            extra_css: None,
192            preallocate_node_capacity: 32,
193            resolver: Arc::new(DefaultStylesheetResolver),
194        }
195    }
196}
197
198/// A specialized `Result` type for CSS inlining operations.
199pub type Result<T> = std::result::Result<T, InlineError>;
200
201/// Customizable CSS inliner.
202#[derive(Debug)]
203pub struct CSSInliner<'a> {
204    options: InlineOptions<'a>,
205}
206
207const GROWTH_COEFFICIENT: f64 = 1.5;
208// A rough coefficient to calculate the number of individual declarations based on the total CSS size.
209const DECLARATION_SIZE_COEFFICIENT: f64 = 30.0;
210
211fn allocate_output_buffer(html: &str) -> Vec<u8> {
212    // Allocating more memory than the input HTML, as the inlined version is usually bigger
213    #[allow(
214        clippy::cast_precision_loss,
215        clippy::cast_sign_loss,
216        clippy::cast_possible_truncation
217    )]
218    Vec::with_capacity(
219        (html.len() as f64 * GROWTH_COEFFICIENT)
220            .min(usize::MAX as f64)
221            .round() as usize,
222    )
223}
224
225impl<'a> CSSInliner<'a> {
226    /// Create a new `CSSInliner` instance with given options.
227    #[must_use]
228    #[inline]
229    pub const fn new(options: InlineOptions<'a>) -> Self {
230        CSSInliner { options }
231    }
232
233    /// Return a default `InlineOptions` that can fully configure the CSS inliner.
234    ///
235    /// # Examples
236    ///
237    /// Get default `InlineOptions`, then change base url
238    ///
239    /// ```rust
240    /// use css_inline::{CSSInliner, Url};
241    /// # use url::ParseError;
242    /// # fn run() -> Result<(), ParseError> {
243    /// let url = Url::parse("https://api.example.com")?;
244    /// let inliner = CSSInliner::options()
245    ///     .base_url(Some(url))
246    ///     .build();
247    /// # Ok(())
248    /// # }
249    /// # run().unwrap();
250    /// ```
251    #[must_use]
252    #[inline]
253    pub fn options() -> InlineOptions<'a> {
254        InlineOptions::default()
255    }
256
257    /// Inline CSS styles from <style> tags to matching elements in the HTML tree and return a
258    /// string.
259    ///
260    /// # Errors
261    ///
262    /// Inlining might fail for the following reasons:
263    ///   - Missing stylesheet file;
264    ///   - Remote stylesheet is not available;
265    ///   - IO errors;
266    ///   - Internal CSS selector parsing error;
267    ///
268    /// # Panics
269    ///
270    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
271    /// using the same inliner panicked while resolving external stylesheets.
272    #[inline]
273    pub fn inline(&self, html: &str) -> Result<String> {
274        let mut out = allocate_output_buffer(html);
275        self.inline_to(html, &mut out)?;
276        Ok(String::from_utf8_lossy(&out).to_string())
277    }
278
279    /// Inline CSS & write the result to a generic writer. Use it if you want to write
280    /// the inlined document to a file.
281    ///
282    /// # Errors
283    ///
284    /// Inlining might fail for the following reasons:
285    ///   - Missing stylesheet file;
286    ///   - Remote stylesheet is not available;
287    ///   - IO errors;
288    ///   - Internal CSS selector parsing error;
289    ///
290    /// # Panics
291    ///
292    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
293    /// using the same inliner panicked while resolving external stylesheets.
294    #[inline]
295    pub fn inline_to<W: Write>(&self, html: &str, target: &mut W) -> Result<()> {
296        self.inline_to_impl(html, None, target, InliningMode::Document)
297    }
298
299    /// Inline CSS into an HTML fragment.
300    ///
301    /// # Errors
302    ///
303    /// Inlining might fail for the following reasons:
304    ///   - Missing stylesheet file;
305    ///   - Remote stylesheet is not available;
306    ///   - IO errors;
307    ///   - Internal CSS selector parsing error;
308    ///
309    /// # Panics
310    ///
311    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
312    /// using the same inliner panicked while resolving external stylesheets.
313    pub fn inline_fragment(&self, html: &str, css: &str) -> Result<String> {
314        let mut out = allocate_output_buffer(html);
315        self.inline_fragment_to(html, css, &mut out)?;
316        Ok(String::from_utf8_lossy(&out).to_string())
317    }
318
319    /// Inline CSS into an HTML fragment and write the result to a generic writer.
320    ///
321    /// # Errors
322    ///
323    /// Inlining might fail for the following reasons:
324    ///   - Missing stylesheet file;
325    ///   - Remote stylesheet is not available;
326    ///   - IO errors;
327    ///   - Internal CSS selector parsing error;
328    ///
329    /// # Panics
330    ///
331    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
332    /// using the same inliner panicked while resolving external stylesheets.
333    pub fn inline_fragment_to<W: Write>(
334        &self,
335        html: &str,
336        css: &str,
337        target: &mut W,
338    ) -> Result<()> {
339        self.inline_to_impl(html, Some(css), target, InliningMode::Fragment)
340    }
341
342    #[allow(clippy::too_many_lines)]
343    fn inline_to_impl<W: Write>(
344        &self,
345        html: &str,
346        css: Option<&str>,
347        target: &mut W,
348        mode: InliningMode,
349    ) -> Result<()> {
350        let document = Document::parse_with_options(
351            html.as_bytes(),
352            self.options.preallocate_node_capacity,
353            mode,
354        );
355        // CSS rules may overlap, and the final set of rules applied to an element depend on
356        // selectors' specificity - selectors with higher specificity have more priority.
357        // Inlining happens in two major steps:
358        //   1. All available styles are mapped to respective elements together with their
359        //      selector's specificity. When two rules overlap on the same declaration, then
360        //      the one with higher specificity replaces another.
361        //   2. Resulting styles are merged into existing "style" tags.
362        let mut size_estimate: usize = if self.options.inline_style_tags {
363            document
364                .styles()
365                .map(|s| {
366                    // Add 1 to account for the extra `\n` char we add between styles
367                    s.len().saturating_add(1)
368                })
369                .sum()
370        } else {
371            0
372        };
373        if let Some(extra_css) = &self.options.extra_css {
374            size_estimate = size_estimate.saturating_add(extra_css.len());
375        }
376        if let Some(css) = css {
377            size_estimate = size_estimate.saturating_add(css.len());
378        }
379        let mut raw_styles = String::with_capacity(size_estimate);
380        if self.options.inline_style_tags {
381            for style in document.styles() {
382                raw_styles.push_str(style);
383                raw_styles.push('\n');
384            }
385        }
386        if self.options.load_remote_stylesheets {
387            let mut links = document.stylesheets().collect::<Vec<&str>>();
388            links.sort_unstable();
389            links.dedup();
390            for href in &links {
391                let url = self.get_full_url(href);
392                #[cfg(feature = "stylesheet-cache")]
393                if let Some(lock) = self.options.cache.as_ref() {
394                    let mut cache = lock.lock().expect("Cache lock is poisoned");
395                    if let Some(cached) = cache.get(url.as_ref()) {
396                        raw_styles.push_str(cached);
397                        raw_styles.push('\n');
398                        continue;
399                    }
400                }
401
402                let css = self.options.resolver.retrieve(url.as_ref())?;
403                raw_styles.push_str(&css);
404                raw_styles.push('\n');
405
406                #[cfg(feature = "stylesheet-cache")]
407                if let Some(lock) = self.options.cache.as_ref() {
408                    let mut cache = lock.lock().expect("Cache lock is poisoned");
409                    cache.put(url.into_owned(), css);
410                }
411            }
412        }
413        if let Some(extra_css) = &self.options.extra_css {
414            raw_styles.push_str(extra_css);
415        }
416        if let Some(css) = css {
417            raw_styles.push_str(css);
418        }
419        let mut styles = IndexMap::with_capacity_and_hasher(128, BuildNoHashHasher::default());
420        let mut parse_input = cssparser::ParserInput::new(&raw_styles);
421        let mut parser = cssparser::Parser::new(&mut parse_input);
422        // Allocating some memory for all the parsed declarations
423        #[allow(
424            clippy::cast_precision_loss,
425            clippy::cast_sign_loss,
426            clippy::cast_possible_truncation
427        )]
428        let mut declarations = Vec::with_capacity(
429            ((raw_styles.len() as f64 / DECLARATION_SIZE_COEFFICIENT)
430                .min(usize::MAX as f64)
431                .round() as usize)
432                .max(16),
433        );
434        let mut rule_list = Vec::with_capacity(declarations.capacity() / 3);
435        for rule in cssparser::StyleSheetParser::new(
436            &mut parser,
437            &mut parser::CSSRuleListParser::new(&mut declarations),
438        )
439        .flatten()
440        {
441            rule_list.push(rule);
442        }
443        // This cache is unused but required in the `selectors` API
444        let mut caches = NthIndexCache::default();
445        for (selectors, (start, end)) in &rule_list {
446            // Only CSS Syntax Level 3 is supported, therefore it is OK to split by `,`
447            // With `is` or `where` selectors (Level 4) this split should be done on the parser level
448            for selector in selectors.split(',') {
449                if let Ok(matching_elements) = document.select(selector, &mut caches) {
450                    let specificity = matching_elements.specificity();
451                    for matching_element in matching_elements {
452                        let element_styles =
453                            styles.entry(matching_element.node_id).or_insert_with(|| {
454                                ElementStyleMap::with_capacity_and_hasher(
455                                    end.saturating_sub(*start).saturating_add(4),
456                                    BuildHasherDefault::default(),
457                                )
458                            });
459                        // Iterate over pairs of property name & value
460                        // Example: `padding`, `0`
461                        for (name, value) in &declarations[*start..*end] {
462                            match element_styles.entry(name.as_ref()) {
463                                indexmap::map::Entry::Occupied(mut entry) => {
464                                    match (
465                                        value.contains("!important"),
466                                        entry.get().1.contains("!important"),
467                                    ) {
468                                        // Equal importance; the higher specificity wins.
469                                        (false, false) | (true, true) => {
470                                            if entry.get().0 <= specificity {
471                                                entry.insert((specificity, *value));
472                                            }
473                                        }
474                                        // Only the new value is important; it wins.
475                                        (true, false) => {
476                                            entry.insert((specificity, *value));
477                                        }
478                                        // The old value is important and the new one is not; keep
479                                        // the old value.
480                                        (false, true) => {}
481                                    }
482                                }
483                                indexmap::map::Entry::Vacant(entry) => {
484                                    entry.insert((specificity, *value));
485                                }
486                            }
487                        }
488                    }
489                }
490                // Ignore not parsable selectors. E.g. there is no parser for @media queries
491                // Which means that they will fall into this category and will be ignored
492            }
493        }
494        document.serialize(
495            target,
496            styles,
497            self.options.keep_style_tags,
498            self.options.keep_link_tags,
499            mode,
500        )?;
501        Ok(())
502    }
503
504    fn get_full_url<'u>(&self, href: &'u str) -> Cow<'u, str> {
505        // Valid absolute URL
506        if Url::parse(href).is_ok() {
507            return Cow::Borrowed(href);
508        }
509        if let Some(base_url) = &self.options.base_url {
510            // Use the same scheme as the base URL
511            if href.starts_with("//") {
512                return Cow::Owned(format!("{}:{}", base_url.scheme(), href));
513            }
514            // Not a URL, then it is a relative URL
515            if let Ok(new_url) = base_url.join(href) {
516                return Cow::Owned(new_url.into());
517            }
518        }
519        // If it is not a valid URL and there is no base URL specified, we assume a local path
520        Cow::Borrowed(href)
521    }
522}
523
524impl Default for CSSInliner<'_> {
525    #[inline]
526    fn default() -> Self {
527        CSSInliner::new(InlineOptions::default())
528    }
529}
530
531/// Shortcut for inlining CSS with default parameters.
532///
533/// # Errors
534///
535/// Inlining might fail for the following reasons:
536///   - Missing stylesheet file;
537///   - Remote stylesheet is not available;
538///   - IO errors;
539///   - Internal CSS selector parsing error;
540///
541/// # Panics
542///
543/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
544/// using the same inliner panicked while resolving external stylesheets.
545#[inline]
546pub fn inline(html: &str) -> Result<String> {
547    CSSInliner::default().inline(html)
548}
549
550/// Shortcut for inlining CSS with default parameters and writing the output to a generic writer.
551///
552/// # Errors
553///
554/// Inlining might fail for the following reasons:
555///   - Missing stylesheet file;
556///   - Remote stylesheet is not available;
557///   - IO errors;
558///   - Internal CSS selector parsing error;
559///
560/// # Panics
561///
562/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
563/// using the same inliner panicked while resolving external stylesheets.
564#[inline]
565pub fn inline_to<W: Write>(html: &str, target: &mut W) -> Result<()> {
566    CSSInliner::default().inline_to(html, target)
567}
568
569/// Shortcut for inlining CSS into an HTML fragment with default parameters.
570///
571/// # Errors
572///
573/// Inlining might fail for the following reasons:
574///   - Missing stylesheet file;
575///   - Remote stylesheet is not available;
576///   - IO errors;
577///   - Internal CSS selector parsing error;
578///
579/// # Panics
580///
581/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
582/// using the same inliner panicked while resolving external stylesheets.
583#[inline]
584pub fn inline_fragment(html: &str, css: &str) -> Result<String> {
585    CSSInliner::default().inline_fragment(html, css)
586}
587
588/// Shortcut for inlining CSS into an HTML fragment with default parameters and writing the output to a generic writer.
589///
590/// # Errors
591///
592/// Inlining might fail for the following reasons:
593///   - Missing stylesheet file;
594///   - Remote stylesheet is not available;
595///   - IO errors;
596///   - Internal CSS selector parsing error;
597///
598/// # Panics
599///
600/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
601/// using the same inliner panicked while resolving external stylesheets.
602#[inline]
603pub fn inline_fragment_to<W: Write>(html: &str, css: &str, target: &mut W) -> Result<()> {
604    CSSInliner::default().inline_fragment_to(html, css, target)
605}
606
607#[cfg(test)]
608mod tests {
609    use crate::{CSSInliner, InlineOptions};
610
611    #[test]
612    fn test_inliner_sync_send() {
613        fn assert_send<T: Send + Sync>() {}
614        assert_send::<CSSInliner<'_>>();
615        assert_send::<InlineOptions<'_>>();
616    }
617}