css_inline/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(
3    clippy::pedantic,
4    clippy::doc_markdown,
5    clippy::redundant_closure,
6    clippy::explicit_iter_loop,
7    clippy::match_same_arms,
8    clippy::needless_borrow,
9    clippy::print_stdout,
10    clippy::arithmetic_side_effects,
11    clippy::cast_possible_truncation,
12    clippy::unwrap_used,
13    clippy::map_unwrap_or,
14    clippy::trivially_copy_pass_by_ref,
15    clippy::needless_pass_by_value,
16    missing_docs,
17    missing_debug_implementations,
18    trivial_casts,
19    trivial_numeric_casts,
20    unreachable_pub,
21    unused_extern_crates,
22    unused_import_braces,
23    unused_qualifications,
24    variant_size_differences,
25    rust_2018_idioms,
26    rust_2018_compatibility,
27    rust_2021_compatibility
28)]
29#![allow(clippy::module_name_repetitions)]
30pub mod error;
31mod hasher;
32mod html;
33mod parser;
34mod resolver;
35
36pub use error::InlineError;
37use indexmap::IndexMap;
38#[cfg(feature = "stylesheet-cache")]
39use lru::{DefaultHasher, LruCache};
40use selectors::context::SelectorCaches;
41use std::{borrow::Cow, fmt::Formatter, hash::BuildHasherDefault, io::Write, sync::Arc};
42
43use crate::html::ElementStyleMap;
44use hasher::BuildNoHashHasher;
45use html::{Document, InliningMode};
46pub use resolver::{DefaultStylesheetResolver, StylesheetResolver};
47pub use url::{ParseError, Url};
48
49/// An LRU Cache for external stylesheets.
50#[cfg(feature = "stylesheet-cache")]
51pub type StylesheetCache<S = DefaultHasher> = LruCache<String, String, S>;
52
53/// Configuration options for CSS inlining process.
54#[allow(clippy::struct_excessive_bools)]
55pub struct InlineOptions<'a> {
56    /// Whether to inline CSS from "style" tags.
57    ///
58    /// Sometimes HTML may include a lot of boilerplate styles, that are not applicable in every
59    /// scenario and it is useful to ignore them and use `extra_css` instead.
60    pub inline_style_tags: bool,
61    /// Keep "style" tags after inlining.
62    pub keep_style_tags: bool,
63    /// Keep "link" tags after inlining.
64    pub keep_link_tags: bool,
65    /// Keep "at-rules" after inlining.
66    pub keep_at_rules: bool,
67    /// Remove trailing semicolons and spaces between properties and values.
68    pub minify_css: bool,
69    /// Used for loading external stylesheets via relative URLs.
70    pub base_url: Option<Url>,
71    /// Whether remote stylesheets should be loaded or not.
72    pub load_remote_stylesheets: bool,
73    /// External stylesheet cache.
74    #[cfg(feature = "stylesheet-cache")]
75    pub cache: Option<std::sync::Mutex<StylesheetCache>>,
76    // The point of using `Cow` here is Python bindings, where it is problematic to pass a reference
77    // without dealing with memory leaks & unsafe. With `Cow` we can use moved values as `String` in
78    // Python wrapper for `CSSInliner` and `&str` in Rust & simple functions on the Python side
79    /// Additional CSS to inline.
80    pub extra_css: Option<Cow<'a, str>>,
81    /// Pre-allocate capacity for HTML nodes during parsing.
82    /// It can improve performance when you have an estimate of the number of nodes in your HTML document.
83    pub preallocate_node_capacity: usize,
84    /// A way to resolve stylesheets from various sources.
85    pub resolver: Arc<dyn StylesheetResolver>,
86}
87
88impl std::fmt::Debug for InlineOptions<'_> {
89    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
90        let mut debug = f.debug_struct("InlineOptions");
91        debug
92            .field("inline_style_tags", &self.inline_style_tags)
93            .field("keep_style_tags", &self.keep_style_tags)
94            .field("keep_link_tags", &self.keep_link_tags)
95            .field("base_url", &self.base_url)
96            .field("load_remote_stylesheets", &self.load_remote_stylesheets);
97        #[cfg(feature = "stylesheet-cache")]
98        {
99            debug.field("cache", &self.cache);
100        }
101        debug
102            .field("extra_css", &self.extra_css)
103            .field("preallocate_node_capacity", &self.preallocate_node_capacity)
104            .finish_non_exhaustive()
105    }
106}
107
108impl<'a> InlineOptions<'a> {
109    /// Override whether "style" tags should be inlined.
110    #[must_use]
111    pub fn inline_style_tags(mut self, inline_style_tags: bool) -> Self {
112        self.inline_style_tags = inline_style_tags;
113        self
114    }
115
116    /// Override whether "style" tags should be kept after processing.
117    #[must_use]
118    pub fn keep_style_tags(mut self, keep_style_tags: bool) -> Self {
119        self.keep_style_tags = keep_style_tags;
120        self
121    }
122
123    /// Override whether "link" tags should be kept after processing.
124    #[must_use]
125    pub fn keep_link_tags(mut self, keep_link_tags: bool) -> Self {
126        self.keep_link_tags = keep_link_tags;
127        self
128    }
129
130    /// Override whether "at-rules" should be kept after processing.
131    #[must_use]
132    pub fn keep_at_rules(mut self, keep_at_rules: bool) -> Self {
133        self.keep_at_rules = keep_at_rules;
134        self
135    }
136
137    /// Override whether trailing semicolons and spaces between properties and values should be removed.
138    #[must_use]
139    pub fn minify_css(mut self, minify_css: bool) -> Self {
140        self.minify_css = minify_css;
141        self
142    }
143
144    /// Set base URL that will be used for loading external stylesheets via relative URLs.
145    #[must_use]
146    pub fn base_url(mut self, base_url: Option<Url>) -> Self {
147        self.base_url = base_url;
148        self
149    }
150
151    /// Override whether remote stylesheets should be loaded.
152    #[must_use]
153    pub fn load_remote_stylesheets(mut self, load_remote_stylesheets: bool) -> Self {
154        self.load_remote_stylesheets = load_remote_stylesheets;
155        self
156    }
157
158    /// Set external stylesheet cache.
159    #[must_use]
160    #[cfg(feature = "stylesheet-cache")]
161    pub fn cache(mut self, cache: impl Into<Option<StylesheetCache>>) -> Self {
162        if let Some(cache) = cache.into() {
163            self.cache = Some(std::sync::Mutex::new(cache));
164        } else {
165            self.cache = None;
166        }
167        self
168    }
169
170    /// Set additional CSS to inline.
171    #[must_use]
172    pub fn extra_css(mut self, extra_css: Option<Cow<'a, str>>) -> Self {
173        self.extra_css = extra_css;
174        self
175    }
176
177    /// Set the initial node capacity for HTML tree.
178    #[must_use]
179    pub fn preallocate_node_capacity(mut self, preallocate_node_capacity: usize) -> Self {
180        self.preallocate_node_capacity = preallocate_node_capacity;
181        self
182    }
183
184    /// Set the way to resolve stylesheets from various sources.
185    #[must_use]
186    pub fn resolver(mut self, resolver: Arc<dyn StylesheetResolver>) -> Self {
187        self.resolver = resolver;
188        self
189    }
190
191    /// Create a new `CSSInliner` instance from this options.
192    #[must_use]
193    pub const fn build(self) -> CSSInliner<'a> {
194        CSSInliner::new(self)
195    }
196}
197
198impl Default for InlineOptions<'_> {
199    #[inline]
200    fn default() -> Self {
201        InlineOptions {
202            inline_style_tags: true,
203            keep_style_tags: false,
204            keep_link_tags: false,
205            keep_at_rules: false,
206            minify_css: false,
207            base_url: None,
208            load_remote_stylesheets: true,
209            #[cfg(feature = "stylesheet-cache")]
210            cache: None,
211            extra_css: None,
212            preallocate_node_capacity: 32,
213            resolver: Arc::new(DefaultStylesheetResolver),
214        }
215    }
216}
217
218/// A specialized `Result` type for CSS inlining operations.
219pub type Result<T> = std::result::Result<T, InlineError>;
220
221/// Customizable CSS inliner.
222#[derive(Debug)]
223pub struct CSSInliner<'a> {
224    options: InlineOptions<'a>,
225}
226
227const GROWTH_COEFFICIENT: f64 = 1.5;
228// A rough coefficient to calculate the number of individual declarations based on the total CSS size.
229const DECLARATION_SIZE_COEFFICIENT: f64 = 30.0;
230
231fn allocate_output_buffer(html: &str) -> Vec<u8> {
232    // Allocating more memory than the input HTML, as the inlined version is usually bigger
233    #[allow(
234        clippy::cast_precision_loss,
235        clippy::cast_sign_loss,
236        clippy::cast_possible_truncation
237    )]
238    Vec::with_capacity(
239        (html.len() as f64 * GROWTH_COEFFICIENT)
240            .min(usize::MAX as f64)
241            .round() as usize,
242    )
243}
244
245impl<'a> CSSInliner<'a> {
246    /// Create a new `CSSInliner` instance with given options.
247    #[must_use]
248    #[inline]
249    pub const fn new(options: InlineOptions<'a>) -> Self {
250        CSSInliner { options }
251    }
252
253    /// Return a default `InlineOptions` that can fully configure the CSS inliner.
254    ///
255    /// # Examples
256    ///
257    /// Get default `InlineOptions`, then change base url
258    ///
259    /// ```rust
260    /// use css_inline::{CSSInliner, Url};
261    /// # use url::ParseError;
262    /// # fn run() -> Result<(), ParseError> {
263    /// let url = Url::parse("https://api.example.com")?;
264    /// let inliner = CSSInliner::options()
265    ///     .base_url(Some(url))
266    ///     .build();
267    /// # Ok(())
268    /// # }
269    /// # run().unwrap();
270    /// ```
271    #[must_use]
272    #[inline]
273    pub fn options() -> InlineOptions<'a> {
274        InlineOptions::default()
275    }
276
277    /// Inline CSS styles from <style> tags to matching elements in the HTML tree and return a
278    /// string.
279    ///
280    /// # Errors
281    ///
282    /// Inlining might fail for the following reasons:
283    ///   - Missing stylesheet file;
284    ///   - Remote stylesheet is not available;
285    ///   - IO errors;
286    ///   - Internal CSS selector parsing error;
287    ///
288    /// # Panics
289    ///
290    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
291    /// using the same inliner panicked while resolving external stylesheets.
292    #[inline]
293    pub fn inline(&self, html: &str) -> Result<String> {
294        let mut out = allocate_output_buffer(html);
295        self.inline_to(html, &mut out)?;
296        Ok(String::from_utf8_lossy(&out).to_string())
297    }
298
299    /// Inline CSS & write the result to a generic writer. Use it if you want to write
300    /// the inlined document to a file.
301    ///
302    /// # Errors
303    ///
304    /// Inlining might fail for the following reasons:
305    ///   - Missing stylesheet file;
306    ///   - Remote stylesheet is not available;
307    ///   - IO errors;
308    ///   - Internal CSS selector parsing error;
309    ///
310    /// # Panics
311    ///
312    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
313    /// using the same inliner panicked while resolving external stylesheets.
314    #[inline]
315    pub fn inline_to<W: Write>(&self, html: &str, target: &mut W) -> Result<()> {
316        self.inline_to_impl(html, None, target, InliningMode::Document)
317    }
318
319    /// Inline CSS into an HTML fragment.
320    ///
321    /// # Errors
322    ///
323    /// Inlining might fail for the following reasons:
324    ///   - Missing stylesheet file;
325    ///   - Remote stylesheet is not available;
326    ///   - IO errors;
327    ///   - Internal CSS selector parsing error;
328    ///
329    /// # Panics
330    ///
331    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
332    /// using the same inliner panicked while resolving external stylesheets.
333    pub fn inline_fragment(&self, html: &str, css: &str) -> Result<String> {
334        let mut out = allocate_output_buffer(html);
335        self.inline_fragment_to(html, css, &mut out)?;
336        Ok(String::from_utf8_lossy(&out).to_string())
337    }
338
339    /// Inline CSS into an HTML fragment and write the result to a generic writer.
340    ///
341    /// # Errors
342    ///
343    /// Inlining might fail for the following reasons:
344    ///   - Missing stylesheet file;
345    ///   - Remote stylesheet is not available;
346    ///   - IO errors;
347    ///   - Internal CSS selector parsing error;
348    ///
349    /// # Panics
350    ///
351    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
352    /// using the same inliner panicked while resolving external stylesheets.
353    pub fn inline_fragment_to<W: Write>(
354        &self,
355        html: &str,
356        css: &str,
357        target: &mut W,
358    ) -> Result<()> {
359        self.inline_to_impl(html, Some(css), target, InliningMode::Fragment)
360    }
361
362    #[allow(clippy::too_many_lines)]
363    fn inline_to_impl<W: Write>(
364        &self,
365        html: &str,
366        css: Option<&str>,
367        target: &mut W,
368        mode: InliningMode,
369    ) -> Result<()> {
370        let document = Document::parse_with_options(
371            html.as_bytes(),
372            self.options.preallocate_node_capacity,
373            mode,
374        );
375        // CSS rules may overlap, and the final set of rules applied to an element depend on
376        // selectors' specificity - selectors with higher specificity have more priority.
377        // Inlining happens in two major steps:
378        //   1. All available styles are mapped to respective elements together with their
379        //      selector's specificity. When two rules overlap on the same declaration, then
380        //      the one with higher specificity replaces another.
381        //   2. Resulting styles are merged into existing "style" tags.
382        let mut size_estimate: usize = if self.options.inline_style_tags {
383            document
384                .styles()
385                .map(|s| {
386                    // Add 1 to account for the extra `\n` char we add between styles
387                    s.len().saturating_add(1)
388                })
389                .sum()
390        } else {
391            0
392        };
393        if let Some(extra_css) = &self.options.extra_css {
394            size_estimate = size_estimate.saturating_add(extra_css.len());
395        }
396        if let Some(css) = css {
397            size_estimate = size_estimate.saturating_add(css.len());
398        }
399        let mut raw_styles = String::with_capacity(size_estimate);
400        if self.options.inline_style_tags || self.options.keep_at_rules {
401            for style in document.styles() {
402                raw_styles.push_str(style);
403                raw_styles.push('\n');
404            }
405        }
406        if self.options.load_remote_stylesheets {
407            let mut links = document.stylesheets().collect::<Vec<&str>>();
408            links.sort_unstable();
409            links.dedup();
410            for href in &links {
411                let url = self.get_full_url(href);
412                #[cfg(feature = "stylesheet-cache")]
413                if let Some(lock) = self.options.cache.as_ref() {
414                    let mut cache = lock.lock().expect("Cache lock is poisoned");
415                    if let Some(cached) = cache.get(url.as_ref()) {
416                        raw_styles.push_str(cached);
417                        raw_styles.push('\n');
418                        continue;
419                    }
420                }
421
422                let css = self.options.resolver.retrieve(url.as_ref())?;
423                raw_styles.push_str(&css);
424                raw_styles.push('\n');
425
426                #[cfg(feature = "stylesheet-cache")]
427                if let Some(lock) = self.options.cache.as_ref() {
428                    let mut cache = lock.lock().expect("Cache lock is poisoned");
429                    cache.put(url.into_owned(), css);
430                }
431            }
432        }
433        if let Some(extra_css) = &self.options.extra_css {
434            raw_styles.push_str(extra_css);
435        }
436        if let Some(css) = css {
437            raw_styles.push_str(css);
438        }
439        let mut parse_input = cssparser::ParserInput::new(&raw_styles);
440        let mut parser = cssparser::Parser::new(&mut parse_input);
441        // Allocating some memory for all the parsed declarations
442        #[allow(
443            clippy::cast_precision_loss,
444            clippy::cast_sign_loss,
445            clippy::cast_possible_truncation
446        )]
447        let mut declarations = Vec::with_capacity(
448            ((raw_styles.len() as f64 / DECLARATION_SIZE_COEFFICIENT)
449                .min(usize::MAX as f64)
450                .round() as usize)
451                .max(16),
452        );
453        let mut rule_list = Vec::with_capacity(declarations.capacity() / 3);
454        let at_rules = if self.options.keep_at_rules {
455            let mut at_rules = String::new();
456            for rule in cssparser::StyleSheetParser::new(
457                &mut parser,
458                &mut parser::AtRuleFilteringParser::new(&mut declarations, &mut at_rules),
459            )
460            .flatten()
461            {
462                if self.options.inline_style_tags {
463                    rule_list.push(rule);
464                }
465            }
466            Some(at_rules)
467        } else if !raw_styles.is_empty() {
468            // At this point, we collected some styles from at least one source, hence we need to process it.
469            for rule in cssparser::StyleSheetParser::new(
470                &mut parser,
471                &mut parser::CSSRuleListParser::new(&mut declarations),
472            )
473            .flatten()
474            {
475                rule_list.push(rule);
476            }
477            None
478        } else {
479            None
480        };
481        let mut styles = IndexMap::with_capacity_and_hasher(
482            document.elements.len().max(16),
483            BuildNoHashHasher::default(),
484        );
485        // This cache is unused but required in the `selectors` API
486        let mut caches = SelectorCaches::default();
487        for (selectors, (start, end)) in &rule_list {
488            // Only CSS Syntax Level 3 is supported, therefore it is OK to split by `,`
489            // With `is` or `where` selectors (Level 4) this split should be done on the parser level
490            for selector in selectors.split(',') {
491                if let Ok(matching_elements) = document.select(selector, &mut caches) {
492                    let specificity = matching_elements.specificity();
493                    for matching_element in matching_elements {
494                        let element_styles =
495                            styles.entry(matching_element.node_id).or_insert_with(|| {
496                                ElementStyleMap::with_capacity_and_hasher(
497                                    end.saturating_sub(*start).saturating_add(4),
498                                    BuildHasherDefault::default(),
499                                )
500                            });
501                        // Iterate over pairs of property name & value
502                        // Example: `padding`, `0`
503                        for (name, value) in &declarations[*start..*end] {
504                            match element_styles.entry(name.as_ref()) {
505                                indexmap::map::Entry::Occupied(mut entry) => {
506                                    match (
507                                        value.trim_end().ends_with("!important"),
508                                        entry.get().1.trim_end().ends_with("!important"),
509                                    ) {
510                                        // Equal importance; the higher specificity wins.
511                                        (false, false) | (true, true) => {
512                                            if entry.get().0 <= specificity {
513                                                entry.insert((specificity, *value));
514                                            }
515                                        }
516                                        // Only the new value is important; it wins.
517                                        (true, false) => {
518                                            entry.insert((specificity, *value));
519                                        }
520                                        // The old value is important and the new one is not; keep
521                                        // the old value.
522                                        (false, true) => {}
523                                    }
524                                }
525                                indexmap::map::Entry::Vacant(entry) => {
526                                    entry.insert((specificity, *value));
527                                }
528                            }
529                        }
530                    }
531                }
532                // Ignore not parsable selectors. E.g. there is no parser for @media queries
533                // Which means that they will fall into this category and will be ignored
534            }
535        }
536        document.serialize(
537            target,
538            styles,
539            self.options.keep_style_tags,
540            self.options.keep_link_tags,
541            self.options.minify_css,
542            at_rules.as_ref(),
543            mode,
544        )?;
545        Ok(())
546    }
547
548    fn get_full_url<'u>(&self, href: &'u str) -> Cow<'u, str> {
549        // Valid absolute URL
550        if Url::parse(href).is_ok() {
551            return Cow::Borrowed(href);
552        }
553        if let Some(base_url) = &self.options.base_url {
554            // Use the same scheme as the base URL
555            if href.starts_with("//") {
556                return Cow::Owned(format!("{}:{}", base_url.scheme(), href));
557            }
558            // Not a URL, then it is a relative URL
559            if let Ok(new_url) = base_url.join(href) {
560                return Cow::Owned(new_url.into());
561            }
562        }
563        // If it is not a valid URL and there is no base URL specified, we assume a local path
564        Cow::Borrowed(href)
565    }
566}
567
568impl Default for CSSInliner<'_> {
569    #[inline]
570    fn default() -> Self {
571        CSSInliner::new(InlineOptions::default())
572    }
573}
574
575/// Shortcut for inlining CSS with default parameters.
576///
577/// # Errors
578///
579/// Inlining might fail for the following reasons:
580///   - Missing stylesheet file;
581///   - Remote stylesheet is not available;
582///   - IO errors;
583///   - Internal CSS selector parsing error;
584///
585/// # Panics
586///
587/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
588/// using the same inliner panicked while resolving external stylesheets.
589#[inline]
590pub fn inline(html: &str) -> Result<String> {
591    CSSInliner::default().inline(html)
592}
593
594/// Shortcut for inlining CSS with default parameters and writing the output to a generic writer.
595///
596/// # Errors
597///
598/// Inlining might fail for the following reasons:
599///   - Missing stylesheet file;
600///   - Remote stylesheet is not available;
601///   - IO errors;
602///   - Internal CSS selector parsing error;
603///
604/// # Panics
605///
606/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
607/// using the same inliner panicked while resolving external stylesheets.
608#[inline]
609pub fn inline_to<W: Write>(html: &str, target: &mut W) -> Result<()> {
610    CSSInliner::default().inline_to(html, target)
611}
612
613/// Shortcut for inlining CSS into an HTML fragment with default parameters.
614///
615/// # Errors
616///
617/// Inlining might fail for the following reasons:
618///   - Missing stylesheet file;
619///   - Remote stylesheet is not available;
620///   - IO errors;
621///   - Internal CSS selector parsing error;
622///
623/// # Panics
624///
625/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
626/// using the same inliner panicked while resolving external stylesheets.
627#[inline]
628pub fn inline_fragment(html: &str, css: &str) -> Result<String> {
629    CSSInliner::default().inline_fragment(html, css)
630}
631
632/// Shortcut for inlining CSS into an HTML fragment with default parameters and writing the output to a generic writer.
633///
634/// # Errors
635///
636/// Inlining might fail for the following reasons:
637///   - Missing stylesheet file;
638///   - Remote stylesheet is not available;
639///   - IO errors;
640///   - Internal CSS selector parsing error;
641///
642/// # Panics
643///
644/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
645/// using the same inliner panicked while resolving external stylesheets.
646#[inline]
647pub fn inline_fragment_to<W: Write>(html: &str, css: &str, target: &mut W) -> Result<()> {
648    CSSInliner::default().inline_fragment_to(html, css, target)
649}
650
651#[cfg(test)]
652mod tests {
653    use crate::{CSSInliner, InlineOptions};
654
655    #[test]
656    fn test_inliner_sync_send() {
657        fn assert_send<T: Send + Sync>() {}
658        assert_send::<CSSInliner<'_>>();
659        assert_send::<InlineOptions<'_>>();
660    }
661}