inline_assets/
lib.rs

1extern crate base64;
2extern crate kuchiki;
3extern crate regex;
4#[macro_use]
5extern crate html5ever;
6
7mod test;
8
9use std::collections::HashSet;
10use std::fs;
11use std::io::ErrorKind as IoErrorKind;
12use std::path::{Path, PathBuf};
13
14use kuchiki::traits::TendrilSink;
15use kuchiki::NodeRef;
16use regex::Captures;
17use std::str::FromStr;
18
19/// Augmented std::io::Error so that it shows what line is causing the problem.
20#[derive(Debug)]
21pub enum FilePathError {
22	/// A std::io::ErrorKind::NotFound error with the offending line in the string parameter
23	InvalidPath(String),
24	/// Any other file read error that is not NotFound
25	FileReadError(String, std::io::Error),
26	/// A css file is imported twice, or there is a dependency loop
27	RepeatedFile,
28}
29
30/// Config struct that is passed to `inline_file()` and `inline_html_string()`
31///
32/// Default enables everything
33#[derive(Debug, Copy, Clone)]
34pub struct Config {
35	/// Whether or not to inline fonts in the css as base64.
36	/// Currently has no effect
37	pub inline_fonts: bool,
38	/// Replace `\r` and `\r\n` with a space character. Useful to keep line numbers the same in the output to help with debugging.
39	pub remove_new_lines: bool,
40}
41
42impl Default for Config {
43	/// Enables everything
44	fn default() -> Config {
45		Config {
46			inline_fonts: false,
47			remove_new_lines: true,
48		}
49	}
50}
51
52impl std::error::Error for FilePathError {
53	fn description(&self) -> &str {
54		&match *self {
55			FilePathError::InvalidPath(_) => "Invalid path, file not found",
56			FilePathError::FileReadError(_, _) => "Error during file reading",
57			FilePathError::RepeatedFile => {
58				"File is imported twice, or there is a circular dependency"
59			}
60		}
61	}
62}
63
64impl std::fmt::Display for FilePathError {
65	fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
66		match *self {
67			FilePathError::InvalidPath(ref line) => write!(f, "Invalid path: {}", line),
68			FilePathError::FileReadError(ref cause, ref io_err) => {
69				write!(f, "Cause: {}, File read error: {}", cause, io_err)
70			}
71			FilePathError::RepeatedFile => write!(
72				f,
73				"A file is imported twice, or there is a circular dependency"
74			),
75		}
76	}
77}
78
79impl FilePathError {
80	fn from_elem(e: std::io::Error, elem: &str) -> Self {
81		match e.kind() {
82			IoErrorKind::NotFound => {
83				FilePathError::InvalidPath(format!("File not found: {}", elem))
84			}
85			_ => FilePathError::FileReadError(elem.to_owned(), e),
86		}
87	}
88}
89
90impl From<std::io::Error> for FilePathError {
91	fn from(e: std::io::Error) -> Self {
92		match e.kind() {
93			IoErrorKind::NotFound => FilePathError::InvalidPath("File not found".to_owned()),
94			_ => FilePathError::FileReadError("N/A".to_owned(), e),
95		}
96	}
97}
98
99/// Returns a `Result<String, FilePathError>` of the html file at file path with all the assets inlined.
100///
101/// ## Arguments
102/// * `file_path` - The path of the html file.
103/// * `inline_fonts` - Pass a config file to select what features to enable. Use `Default::default()` to enable everything
104pub fn inline_file<P: AsRef<Path>>(file_path: P, config: Config) -> Result<String, FilePathError> {
105	let html = fs::read_to_string(&file_path)
106		.map_err(|orig_err| FilePathError::from_elem(orig_err, "Html file not found"))?;
107	inline_html_string(&html, &file_path.as_ref().parent().unwrap(), config)
108}
109
110/// Returns a `Result<String, FilePathError>` with all the assets linked in the the html string inlined.
111///
112/// ## Arguments
113/// * `html` - The html string.
114/// * `root_path` - The root all relative paths in the html will be evaluated with, usually this is the folder the html file is in.
115/// * `config` - Pass a config file to select what features to enable. Use `Default::default()` to enable everything
116///
117pub fn inline_html_string<P: AsRef<Path>>(
118	html: &str,
119	root_path: P,
120	config: Config,
121) -> Result<String, FilePathError> {
122	// FIXME: make actual error return
123	let root_path = root_path.as_ref().canonicalize().unwrap();
124	let document = kuchiki::parse_html().one(html);
125	
126	let mut css_path_set = HashSet::new();
127
128	let mut to_delete_vec = Vec::new();
129
130	for css_match in document.select("script, link").unwrap() {
131		let as_node = css_match.as_node();
132
133		let node = as_node.as_element().unwrap();
134
135		match node.name.local.to_string().as_str() {
136			"script" => {
137				let mut text_attr = node.attributes.borrow_mut();
138				if let Some(c) = text_attr.get("src") {
139					let sub_script_path = PathBuf::from_str(c).expect("script src not valid path");
140					let script_path = if sub_script_path.is_absolute() {
141						sub_script_path
142					} else {
143						root_path.join(sub_script_path)
144					};
145
146					text_attr.remove("src");
147					as_node.append(NodeRef::new_text(
148						fs::read_to_string(&script_path).map_err(|e| {
149							FilePathError::from_elem(e, &script_path.to_string_lossy().to_string())
150						})?,
151					));
152				} else {
153					continue;
154				}
155			}
156			"link" => {
157				let css_path = {
158					let mut text_attr = node.attributes.borrow_mut();
159					let out = if let Some(c) = text_attr
160						.get("rel")
161						.filter(|rel| *rel == "stylesheet")
162						.and(text_attr.get("href"))
163					{
164						root_path.join(PathBuf::from_str(c).expect("href not valid path"))
165					} else {
166						continue;
167					};
168					out
169				};
170
171				let css = inline_css(css_path, &root_path, &mut css_path_set)
172					.expect("Failed to inline css");
173
174				let elem_to_add = NodeRef::new_element(
175					html5ever::QualName::new(None, ns!(html), "style".into()),
176					None,
177				);
178
179				elem_to_add.append(NodeRef::new_text(css));
180				as_node.insert_after(elem_to_add);
181				to_delete_vec.push(css_match);
182			}
183			_ => {}
184		}
185	}
186
187	for css_match in to_delete_vec {
188		css_match.as_node().detach();
189	}
190
191	let answer = document.to_string()
192	// Use new unix style newline
193		.replace("\r\n", "\n");
194	
195	if config.inline_fonts {
196		eprintln!("Inline font option not implemented yet");
197	}
198
199	Ok(if config.remove_new_lines {
200		answer.replace("\r\n", " ").replace("\n", " ")
201	} else {
202		answer
203	})
204}
205
206fn inline_css<P: AsRef<Path>, P2: AsRef<Path>>(
207	css_path: P,
208	root_path: P2,
209	path_set: &mut HashSet<std::path::PathBuf>,
210) -> Result<String, FilePathError> {
211	let css_path = css_path
212		.as_ref()
213		.canonicalize()
214		.map_err(|e| FilePathError::from_elem(e, css_path.as_ref().to_str().unwrap()))?;
215	if !path_set.insert(css_path.clone()) {
216		return Err(FilePathError::RepeatedFile);
217	}
218
219	//	let css_data = fs::read_to_string(&css_path)
220	//		.map_err(|e| FilePathError::from_elem(e, css_path.to_str().unwrap()))?;
221
222	let comment_remover = regex::Regex::new(r#"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/"#).unwrap();
223
224	// Some optimisation could be done here if we don't initialize these every single time.
225	let css_finder: regex::Regex =
226		regex::Regex::new(r#"@import[\s]+url\(["']?([^"']+)["']?\)\s*;"#).unwrap(); // Finds all @import url(style.css)
227	let url_finder = regex::Regex::new(r#"url\s*?\(["']?([^"')]+?)["']?\)"#).unwrap(); // Finds all url(path) in the css and makes them relative to the html file
228
229	let mut is_alright: Result<(), FilePathError> = Ok(());
230	let css_data = css_finder
231		.replace_all(
232			url_finder
233				.replace_all(
234					comment_remover
235						.replace_all(
236							&fs::read_to_string(&css_path).map_err(|e| {
237								FilePathError::from_elem(e, css_path.to_str().unwrap())
238							})?,
239							|_: &Captures| "".to_owned(),
240						)
241						.as_ref(),
242					|caps: &Captures| {
243						if caps[1].len() > 1500 || caps[1].contains("data:") {
244							// Probably not a path if longer than 1500 characters
245							return caps[0].to_owned();
246						}
247						format!(
248							"url({})",
249							if (caps[1].as_ref() as &str).contains("://") {
250								caps[1].to_owned()
251							} else {
252								pathdiff::diff_paths(
253									css_path.parent().unwrap().join(&caps[1]).as_path(),
254									root_path.as_ref(),
255								)
256								.unwrap()
257								.as_path()
258								.to_str()
259								.expect("Path not UTF-8")
260								.replace("\\", "/")
261							}
262						)
263					},
264				)
265				.as_ref(),
266			|caps: &Captures| {
267				match inline_css(
268					root_path.as_ref().join(&caps[1]),
269					root_path.as_ref(),
270					path_set,
271				) {
272					Ok(out) => out,
273					Err(FilePathError::RepeatedFile) => {
274						"".to_owned() // Ignore repeated file
275					}
276					Err(e) => {
277						is_alright = Err(e);
278						return "Error".to_owned();
279					}
280				}
281			},
282		)
283		.to_string();
284
285	if is_alright.is_err() {
286		return Err(is_alright.unwrap_err());
287	}
288
289	Ok(css_data)
290}