use crate::parsing::{Resource, ResourceMap};
use html5ever::{interface::QualName, local_name, namespace_url, ns};
use kuchiki::traits::TendrilSink;
use kuchiki::{parse_html, NodeData, NodeRef};
use std::io;
use std::path::Path;
use url::Url;
#[derive(Debug)]
pub struct PageArchive {
pub url: Url,
pub content: String,
pub resource_map: ResourceMap,
}
impl PageArchive {
pub fn embed_resources(&self) -> String {
let document = parse_html().one(self.content.as_str());
for element in document.select("img").unwrap() {
let node = element.as_node();
if let NodeData::Element(data) = node.data() {
let mut attr = data.attributes.borrow_mut();
if let Some(u) = attr.get_mut("src") {
if let Ok(url) = self.url.join(u) {
if let Some(Resource::Image(image_data)) =
self.resource_map.get(&url)
{
*u = image_data.to_data_uri();
}
}
}
}
}
for element in document.select("link").unwrap() {
let node = element.as_node();
let mut css_data: Option<&String> = None;
if let NodeData::Element(data) = node.data() {
let attr = data.attributes.borrow();
if Some("stylesheet") == attr.get("rel") {
if let Some(u) = attr.get("href") {
if let Ok(u) = self.url.join(u) {
if let Some(Resource::Css(css)) =
self.resource_map.get(&u)
{
css_data = Some(css);
}
}
}
}
}
if let Some(css) = css_data {
if let Some(parent) = node.parent() {
let style = NodeRef::new_element(
QualName::new(None, ns!(html), local_name!("style")),
None,
);
style.append(NodeRef::new_text(css));
parent.append(style);
node.detach();
}
}
}
for element in document.select("script").unwrap() {
let node = element.as_node();
if let NodeData::Element(data) = node.data() {
let mut attr = data.attributes.borrow_mut();
if let Some(u) = attr.get_mut("src") {
if let Ok(url) = self.url.join(u) {
if let Some(Resource::Javascript(script_text)) =
self.resource_map.get(&url)
{
node.append(NodeRef::new_text(script_text));
}
}
}
let _ = attr.remove("src");
}
}
document.to_string()
}
pub fn write_to_disk<P: AsRef<Path>>(
&self,
_output_dir: &P,
) -> Result<(), io::Error> {
todo!()
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::*;
use bytes::Bytes;
#[test]
fn test_single_css() {
let content = r#"
<html>
<head>
<link rel="stylesheet" href="style.css" />
</head>
<body></body>
</html>
"#
.to_string();
let url = Url::parse("http://example.com").unwrap();
let mut resource_map = ResourceMap::new();
resource_map.insert(
url.join("style.css").unwrap(),
Resource::Css(
r#"
body { background-color: blue; }
"#
.to_string(),
),
);
let archive = PageArchive {
url,
content,
resource_map,
};
let output = archive.embed_resources();
assert_eq!(
output.replace("\t", "").replace("\n", ""),
r#"
<html>
<head>
<style>
body { background-color: blue; }
</style>
</head>
<body></body>
</html>
"#
.to_string()
.replace("\t", "")
.replace("\n", "")
);
}
#[test]
fn test_single_image() {
let content = r#"
<html>
<head></head>
<body>
<img src="rustacean.png" />
</body>
</html>
"#
.to_string();
let url = Url::parse("http://example.com").unwrap();
let mut resource_map = ResourceMap::new();
resource_map.insert(
url.join("rustacean.png").unwrap(),
Resource::Image(ImageResource {
data: Bytes::from(
include_bytes!(
"../dynamic_tests/resources/rustacean-flat-happy.png"
)
.to_vec(),
),
mimetype: "image/png".to_string(),
}),
);
let archive = PageArchive {
url,
content,
resource_map,
};
let output = archive.embed_resources();
println!("{}", output);
assert!(output.contains(
r#"<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAB"#
));
assert!(output.contains("gfuBxu3QDwEsoDXx5J5KCU+2/DF2JAQAoDHV"))
}
#[test]
fn test_single_js() {
let content = r#"
<html>
<head>
<script src="script.js"></script>
</head>
<body></body>
</html>
"#
.to_string();
let url = Url::parse("http://example.com").unwrap();
let mut resource_map = ResourceMap::new();
resource_map.insert(
url.join("script.js").unwrap(),
Resource::Javascript(
r#"
function do_stuff() {
console.log("Hello!");
}
"#
.to_string(),
),
);
let archive = PageArchive {
url,
content,
resource_map,
};
let output = archive.embed_resources();
assert_eq!(
output.replace("\t", "").replace("\n", ""),
r#"
<html><head>
<script>
function do_stuff() {
console.log("Hello!");
}
</script>
</head>
<body></body>
</html>
"#
.to_string()
.replace("\t", "")
.replace("\n", "")
);
}
}