css_inline/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(
3    clippy::pedantic,
4    clippy::doc_markdown,
5    clippy::redundant_closure,
6    clippy::explicit_iter_loop,
7    clippy::match_same_arms,
8    clippy::needless_borrow,
9    clippy::print_stdout,
10    clippy::arithmetic_side_effects,
11    clippy::cast_possible_truncation,
12    clippy::unwrap_used,
13    clippy::map_unwrap_or,
14    clippy::trivially_copy_pass_by_ref,
15    clippy::needless_pass_by_value,
16    missing_docs,
17    missing_debug_implementations,
18    trivial_casts,
19    trivial_numeric_casts,
20    unreachable_pub,
21    unused_extern_crates,
22    unused_import_braces,
23    unused_qualifications,
24    variant_size_differences,
25    rust_2018_idioms,
26    rust_2018_compatibility,
27    rust_2021_compatibility
28)]
29#![allow(clippy::module_name_repetitions)]
30pub mod error;
31mod hasher;
32mod html;
33mod parser;
34mod resolver;
35
36pub use error::InlineError;
37use indexmap::IndexMap;
38#[cfg(feature = "stylesheet-cache")]
39use lru::{DefaultHasher, LruCache};
40use std::{borrow::Cow, fmt::Formatter, hash::BuildHasherDefault, io::Write, sync::Arc};
41
42use crate::html::ElementStyleMap;
43use hasher::BuildNoHashHasher;
44use html::{Document, InliningMode};
45pub use resolver::{DefaultStylesheetResolver, StylesheetResolver};
46pub use url::{ParseError, Url};
47
48/// An LRU Cache for external stylesheets.
49#[cfg(feature = "stylesheet-cache")]
50pub type StylesheetCache<S = DefaultHasher> = LruCache<String, String, S>;
51
52/// Configuration options for CSS inlining process.
53#[allow(clippy::struct_excessive_bools)]
54pub struct InlineOptions<'a> {
55    /// Whether to inline CSS from "style" tags.
56    ///
57    /// Sometimes HTML may include a lot of boilerplate styles, that are not applicable in every
58    /// scenario and it is useful to ignore them and use `extra_css` instead.
59    pub inline_style_tags: bool,
60    /// Keep "style" tags after inlining.
61    pub keep_style_tags: bool,
62    /// Keep "link" tags after inlining.
63    pub keep_link_tags: bool,
64    /// Used for loading external stylesheets via relative URLs.
65    pub base_url: Option<Url>,
66    /// Whether remote stylesheets should be loaded or not.
67    pub load_remote_stylesheets: bool,
68    /// External stylesheet cache.
69    #[cfg(feature = "stylesheet-cache")]
70    pub cache: Option<std::sync::Mutex<StylesheetCache>>,
71    // The point of using `Cow` here is Python bindings, where it is problematic to pass a reference
72    // without dealing with memory leaks & unsafe. With `Cow` we can use moved values as `String` in
73    // Python wrapper for `CSSInliner` and `&str` in Rust & simple functions on the Python side
74    /// Additional CSS to inline.
75    pub extra_css: Option<Cow<'a, str>>,
76    /// Pre-allocate capacity for HTML nodes during parsing.
77    /// It can improve performance when you have an estimate of the number of nodes in your HTML document.
78    pub preallocate_node_capacity: usize,
79    /// A way to resolve stylesheets from various sources.
80    pub resolver: Arc<dyn StylesheetResolver>,
81}
82
83impl std::fmt::Debug for InlineOptions<'_> {
84    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
85        let mut debug = f.debug_struct("InlineOptions");
86        debug
87            .field("inline_style_tags", &self.inline_style_tags)
88            .field("keep_style_tags", &self.keep_style_tags)
89            .field("keep_link_tags", &self.keep_link_tags)
90            .field("base_url", &self.base_url)
91            .field("load_remote_stylesheets", &self.load_remote_stylesheets);
92        #[cfg(feature = "stylesheet-cache")]
93        {
94            debug.field("cache", &self.cache);
95        }
96        debug
97            .field("extra_css", &self.extra_css)
98            .field("preallocate_node_capacity", &self.preallocate_node_capacity)
99            .finish_non_exhaustive()
100    }
101}
102
103impl<'a> InlineOptions<'a> {
104    /// Override whether "style" tags should be inlined.
105    #[must_use]
106    pub fn inline_style_tags(mut self, inline_style_tags: bool) -> Self {
107        self.inline_style_tags = inline_style_tags;
108        self
109    }
110
111    /// Override whether "style" tags should be kept after processing.
112    #[must_use]
113    pub fn keep_style_tags(mut self, keep_style_tags: bool) -> Self {
114        self.keep_style_tags = keep_style_tags;
115        self
116    }
117
118    /// Override whether "link" tags should be kept after processing.
119    #[must_use]
120    pub fn keep_link_tags(mut self, keep_link_tags: bool) -> Self {
121        self.keep_link_tags = keep_link_tags;
122        self
123    }
124
125    /// Set base URL that will be used for loading external stylesheets via relative URLs.
126    #[must_use]
127    pub fn base_url(mut self, base_url: Option<Url>) -> Self {
128        self.base_url = base_url;
129        self
130    }
131
132    /// Override whether remote stylesheets should be loaded.
133    #[must_use]
134    pub fn load_remote_stylesheets(mut self, load_remote_stylesheets: bool) -> Self {
135        self.load_remote_stylesheets = load_remote_stylesheets;
136        self
137    }
138
139    /// Set external stylesheet cache.
140    #[must_use]
141    #[cfg(feature = "stylesheet-cache")]
142    pub fn cache(mut self, cache: impl Into<Option<StylesheetCache>>) -> Self {
143        if let Some(cache) = cache.into() {
144            self.cache = Some(std::sync::Mutex::new(cache));
145        } else {
146            self.cache = None;
147        }
148        self
149    }
150
151    /// Set additional CSS to inline.
152    #[must_use]
153    pub fn extra_css(mut self, extra_css: Option<Cow<'a, str>>) -> Self {
154        self.extra_css = extra_css;
155        self
156    }
157
158    /// Set the initial node capacity for HTML tree.
159    #[must_use]
160    pub fn preallocate_node_capacity(mut self, preallocate_node_capacity: usize) -> Self {
161        self.preallocate_node_capacity = preallocate_node_capacity;
162        self
163    }
164
165    /// Set the way to resolve stylesheets from various sources.
166    #[must_use]
167    pub fn resolver(mut self, resolver: Arc<dyn StylesheetResolver>) -> Self {
168        self.resolver = resolver;
169        self
170    }
171
172    /// Create a new `CSSInliner` instance from this options.
173    #[must_use]
174    pub const fn build(self) -> CSSInliner<'a> {
175        CSSInliner::new(self)
176    }
177}
178
179impl Default for InlineOptions<'_> {
180    #[inline]
181    fn default() -> Self {
182        InlineOptions {
183            inline_style_tags: true,
184            keep_style_tags: false,
185            keep_link_tags: false,
186            base_url: None,
187            load_remote_stylesheets: true,
188            #[cfg(feature = "stylesheet-cache")]
189            cache: None,
190            extra_css: None,
191            preallocate_node_capacity: 32,
192            resolver: Arc::new(DefaultStylesheetResolver),
193        }
194    }
195}
196
197/// A specialized `Result` type for CSS inlining operations.
198pub type Result<T> = std::result::Result<T, InlineError>;
199
200/// Customizable CSS inliner.
201#[derive(Debug)]
202pub struct CSSInliner<'a> {
203    options: InlineOptions<'a>,
204}
205
206const GROWTH_COEFFICIENT: f64 = 1.5;
207// A rough coefficient to calculate the number of individual declarations based on the total CSS size.
208const DECLARATION_SIZE_COEFFICIENT: f64 = 30.0;
209
210fn allocate_output_buffer(html: &str) -> Vec<u8> {
211    // Allocating more memory than the input HTML, as the inlined version is usually bigger
212    #[allow(
213        clippy::cast_precision_loss,
214        clippy::cast_sign_loss,
215        clippy::cast_possible_truncation
216    )]
217    Vec::with_capacity(
218        (html.len() as f64 * GROWTH_COEFFICIENT)
219            .min(usize::MAX as f64)
220            .round() as usize,
221    )
222}
223
224impl<'a> CSSInliner<'a> {
225    /// Create a new `CSSInliner` instance with given options.
226    #[must_use]
227    #[inline]
228    pub const fn new(options: InlineOptions<'a>) -> Self {
229        CSSInliner { options }
230    }
231
232    /// Return a default `InlineOptions` that can fully configure the CSS inliner.
233    ///
234    /// # Examples
235    ///
236    /// Get default `InlineOptions`, then change base url
237    ///
238    /// ```rust
239    /// use css_inline::{CSSInliner, Url};
240    /// # use url::ParseError;
241    /// # fn run() -> Result<(), ParseError> {
242    /// let url = Url::parse("https://api.example.com")?;
243    /// let inliner = CSSInliner::options()
244    ///     .base_url(Some(url))
245    ///     .build();
246    /// # Ok(())
247    /// # }
248    /// # run().unwrap();
249    /// ```
250    #[must_use]
251    #[inline]
252    pub fn options() -> InlineOptions<'a> {
253        InlineOptions::default()
254    }
255
256    /// Inline CSS styles from <style> tags to matching elements in the HTML tree and return a
257    /// string.
258    ///
259    /// # Errors
260    ///
261    /// Inlining might fail for the following reasons:
262    ///   - Missing stylesheet file;
263    ///   - Remote stylesheet is not available;
264    ///   - IO errors;
265    ///   - Internal CSS selector parsing error;
266    ///
267    /// # Panics
268    ///
269    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
270    /// using the same inliner panicked while resolving external stylesheets.
271    #[inline]
272    pub fn inline(&self, html: &str) -> Result<String> {
273        let mut out = allocate_output_buffer(html);
274        self.inline_to(html, &mut out)?;
275        Ok(String::from_utf8_lossy(&out).to_string())
276    }
277
278    /// Inline CSS & write the result to a generic writer. Use it if you want to write
279    /// the inlined document to a file.
280    ///
281    /// # Errors
282    ///
283    /// Inlining might fail for the following reasons:
284    ///   - Missing stylesheet file;
285    ///   - Remote stylesheet is not available;
286    ///   - IO errors;
287    ///   - Internal CSS selector parsing error;
288    ///
289    /// # Panics
290    ///
291    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
292    /// using the same inliner panicked while resolving external stylesheets.
293    #[inline]
294    pub fn inline_to<W: Write>(&self, html: &str, target: &mut W) -> Result<()> {
295        self.inline_to_impl(html, None, target, InliningMode::Document)
296    }
297
298    /// Inline CSS into an HTML fragment.
299    ///
300    /// # Errors
301    ///
302    /// Inlining might fail for the following reasons:
303    ///   - Missing stylesheet file;
304    ///   - Remote stylesheet is not available;
305    ///   - IO errors;
306    ///   - Internal CSS selector parsing error;
307    ///
308    /// # Panics
309    ///
310    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
311    /// using the same inliner panicked while resolving external stylesheets.
312    pub fn inline_fragment(&self, html: &str, css: &str) -> Result<String> {
313        let mut out = allocate_output_buffer(html);
314        self.inline_fragment_to(html, css, &mut out)?;
315        Ok(String::from_utf8_lossy(&out).to_string())
316    }
317
318    /// Inline CSS into an HTML fragment and write the result to a generic writer.
319    ///
320    /// # Errors
321    ///
322    /// Inlining might fail for the following reasons:
323    ///   - Missing stylesheet file;
324    ///   - Remote stylesheet is not available;
325    ///   - IO errors;
326    ///   - Internal CSS selector parsing error;
327    ///
328    /// # Panics
329    ///
330    /// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
331    /// using the same inliner panicked while resolving external stylesheets.
332    pub fn inline_fragment_to<W: Write>(
333        &self,
334        html: &str,
335        css: &str,
336        target: &mut W,
337    ) -> Result<()> {
338        self.inline_to_impl(html, Some(css), target, InliningMode::Fragment)
339    }
340
341    #[allow(clippy::too_many_lines)]
342    fn inline_to_impl<W: Write>(
343        &self,
344        html: &str,
345        css: Option<&str>,
346        target: &mut W,
347        mode: InliningMode,
348    ) -> Result<()> {
349        let document = Document::parse_with_options(
350            html.as_bytes(),
351            self.options.preallocate_node_capacity,
352            mode,
353        );
354        // CSS rules may overlap, and the final set of rules applied to an element depend on
355        // selectors' specificity - selectors with higher specificity have more priority.
356        // Inlining happens in two major steps:
357        //   1. All available styles are mapped to respective elements together with their
358        //      selector's specificity. When two rules overlap on the same declaration, then
359        //      the one with higher specificity replaces another.
360        //   2. Resulting styles are merged into existing "style" tags.
361        let mut size_estimate: usize = if self.options.inline_style_tags {
362            document
363                .styles()
364                .map(|s| {
365                    // Add 1 to account for the extra `\n` char we add between styles
366                    s.len().saturating_add(1)
367                })
368                .sum()
369        } else {
370            0
371        };
372        if let Some(extra_css) = &self.options.extra_css {
373            size_estimate = size_estimate.saturating_add(extra_css.len());
374        }
375        if let Some(css) = css {
376            size_estimate = size_estimate.saturating_add(css.len());
377        }
378        let mut raw_styles = String::with_capacity(size_estimate);
379        if self.options.inline_style_tags {
380            for style in document.styles() {
381                raw_styles.push_str(style);
382                raw_styles.push('\n');
383            }
384        }
385        if self.options.load_remote_stylesheets {
386            let mut links = document.stylesheets().collect::<Vec<&str>>();
387            links.sort_unstable();
388            links.dedup();
389            for href in &links {
390                let url = self.get_full_url(href);
391                #[cfg(feature = "stylesheet-cache")]
392                if let Some(lock) = self.options.cache.as_ref() {
393                    let mut cache = lock.lock().expect("Cache lock is poisoned");
394                    if let Some(cached) = cache.get(url.as_ref()) {
395                        raw_styles.push_str(cached);
396                        raw_styles.push('\n');
397                        continue;
398                    }
399                }
400
401                let css = self.options.resolver.retrieve(url.as_ref())?;
402                raw_styles.push_str(&css);
403                raw_styles.push('\n');
404
405                #[cfg(feature = "stylesheet-cache")]
406                if let Some(lock) = self.options.cache.as_ref() {
407                    let mut cache = lock.lock().expect("Cache lock is poisoned");
408                    cache.put(url.into_owned(), css);
409                }
410            }
411        }
412        if let Some(extra_css) = &self.options.extra_css {
413            raw_styles.push_str(extra_css);
414        }
415        if let Some(css) = css {
416            raw_styles.push_str(css);
417        }
418        let mut styles = IndexMap::with_capacity_and_hasher(128, BuildNoHashHasher::default());
419        let mut parse_input = cssparser::ParserInput::new(&raw_styles);
420        let mut parser = cssparser::Parser::new(&mut parse_input);
421        // Allocating some memory for all the parsed declarations
422        #[allow(
423            clippy::cast_precision_loss,
424            clippy::cast_sign_loss,
425            clippy::cast_possible_truncation
426        )]
427        let mut declarations = Vec::with_capacity(
428            ((raw_styles.len() as f64 / DECLARATION_SIZE_COEFFICIENT)
429                .min(usize::MAX as f64)
430                .round() as usize)
431                .max(16),
432        );
433        let mut rule_list = Vec::with_capacity(declarations.capacity() / 3);
434        for rule in cssparser::StyleSheetParser::new(
435            &mut parser,
436            &mut parser::CSSRuleListParser::new(&mut declarations),
437        )
438        .flatten()
439        {
440            rule_list.push(rule);
441        }
442        let mut caches = document.build_caches();
443        for (selectors, (start, end)) in &rule_list {
444            // Only CSS Syntax Level 3 is supported, therefore it is OK to split by `,`
445            // With `is` or `where` selectors (Level 4) this split should be done on the parser level
446            for selector in selectors.split(',') {
447                if let Ok(matching_elements) = document.select(selector, &mut caches) {
448                    let specificity = matching_elements.specificity();
449                    for matching_element in matching_elements {
450                        let element_styles =
451                            styles.entry(matching_element.node_id).or_insert_with(|| {
452                                ElementStyleMap::with_capacity_and_hasher(
453                                    end.saturating_sub(*start).saturating_add(4),
454                                    BuildHasherDefault::default(),
455                                )
456                            });
457                        // Iterate over pairs of property name & value
458                        // Example: `padding`, `0`
459                        for (name, value) in &declarations[*start..*end] {
460                            match element_styles.entry(name.as_ref()) {
461                                indexmap::map::Entry::Occupied(mut entry) => {
462                                    match (
463                                        value.contains("!important"),
464                                        entry.get().1.contains("!important"),
465                                    ) {
466                                        // Equal importance; the higher specificity wins.
467                                        (false, false) | (true, true) => {
468                                            if entry.get().0 <= specificity {
469                                                entry.insert((specificity, *value));
470                                            }
471                                        }
472                                        // Only the new value is important; it wins.
473                                        (true, false) => {
474                                            entry.insert((specificity, *value));
475                                        }
476                                        // The old value is important and the new one is not; keep
477                                        // the old value.
478                                        (false, true) => {}
479                                    }
480                                }
481                                indexmap::map::Entry::Vacant(entry) => {
482                                    entry.insert((specificity, *value));
483                                }
484                            }
485                        }
486                    }
487                }
488                // Ignore not parsable selectors. E.g. there is no parser for @media queries
489                // Which means that they will fall into this category and will be ignored
490            }
491        }
492        document.serialize(
493            target,
494            styles,
495            self.options.keep_style_tags,
496            self.options.keep_link_tags,
497            mode,
498        )?;
499        Ok(())
500    }
501
502    fn get_full_url<'u>(&self, href: &'u str) -> Cow<'u, str> {
503        // Valid absolute URL
504        if Url::parse(href).is_ok() {
505            return Cow::Borrowed(href);
506        };
507        if let Some(base_url) = &self.options.base_url {
508            // Use the same scheme as the base URL
509            if href.starts_with("//") {
510                return Cow::Owned(format!("{}:{}", base_url.scheme(), href));
511            }
512            // Not a URL, then it is a relative URL
513            if let Ok(new_url) = base_url.join(href) {
514                return Cow::Owned(new_url.into());
515            }
516        };
517        // If it is not a valid URL and there is no base URL specified, we assume a local path
518        Cow::Borrowed(href)
519    }
520}
521
522impl Default for CSSInliner<'_> {
523    #[inline]
524    fn default() -> Self {
525        CSSInliner::new(InlineOptions::default())
526    }
527}
528
529/// Shortcut for inlining CSS with default parameters.
530///
531/// # Errors
532///
533/// Inlining might fail for the following reasons:
534///   - Missing stylesheet file;
535///   - Remote stylesheet is not available;
536///   - IO errors;
537///   - Internal CSS selector parsing error;
538///
539/// # Panics
540///
541/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
542/// using the same inliner panicked while resolving external stylesheets.
543#[inline]
544pub fn inline(html: &str) -> Result<String> {
545    CSSInliner::default().inline(html)
546}
547
548/// Shortcut for inlining CSS with default parameters and writing the output to a generic writer.
549///
550/// # Errors
551///
552/// Inlining might fail for the following reasons:
553///   - Missing stylesheet file;
554///   - Remote stylesheet is not available;
555///   - IO errors;
556///   - Internal CSS selector parsing error;
557///
558/// # Panics
559///
560/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
561/// using the same inliner panicked while resolving external stylesheets.
562#[inline]
563pub fn inline_to<W: Write>(html: &str, target: &mut W) -> Result<()> {
564    CSSInliner::default().inline_to(html, target)
565}
566
567/// Shortcut for inlining CSS into an HTML fragment with default parameters.
568///
569/// # Errors
570///
571/// Inlining might fail for the following reasons:
572///   - Missing stylesheet file;
573///   - Remote stylesheet is not available;
574///   - IO errors;
575///   - Internal CSS selector parsing error;
576///
577/// # Panics
578///
579/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
580/// using the same inliner panicked while resolving external stylesheets.
581#[inline]
582pub fn inline_fragment(html: &str, css: &str) -> Result<String> {
583    CSSInliner::default().inline_fragment(html, css)
584}
585
586/// Shortcut for inlining CSS into an HTML fragment with default parameters and writing the output to a generic writer.
587///
588/// # Errors
589///
590/// Inlining might fail for the following reasons:
591///   - Missing stylesheet file;
592///   - Remote stylesheet is not available;
593///   - IO errors;
594///   - Internal CSS selector parsing error;
595///
596/// # Panics
597///
598/// This function may panic if external stylesheet cache lock is poisoned, i.e. another thread
599/// using the same inliner panicked while resolving external stylesheets.
600#[inline]
601pub fn inline_fragment_to<W: Write>(html: &str, css: &str, target: &mut W) -> Result<()> {
602    CSSInliner::default().inline_fragment_to(html, css, target)
603}
604
605#[cfg(test)]
606mod tests {
607    use crate::{CSSInliner, InlineOptions};
608
609    #[test]
610    fn test_inliner_sync_send() {
611        fn assert_send<T: Send + Sync>() {}
612        assert_send::<CSSInliner<'_>>();
613        assert_send::<InlineOptions<'_>>();
614    }
615}