use lol_html::{element, HtmlRewriter, Settings};
use mailparse::body::Body;
use mailparse::parse_mail;
use regex::{Captures, Regex};
use std::borrow::Cow;
use std::collections::HashMap;
#[macro_use]
extern crate lazy_static;
extern crate base64;
fn proxify_css(body: &str) -> Cow<str> {
lazy_static! {
static ref RE: Regex =
Regex::new(r#"(?i)(?m)url\s*\(\s*([$&+,:;=?@#'"<>*%!/.-a-zA-Z_]+)\s*\)"#).unwrap();
}
RE.replace_all(body, |caps: &Captures| {
let match_url = &caps[0][5..caps[0].len() - 2];
if match_url.starts_with("data:") != true {
let mut filename = match_url.split('/');
let name = filename.next_back();
format!("url(\"{}\")", name.unwrap())
} else {
format!("{}", &caps[0])
}
})
}
fn parse_by_slash(path: String) -> String {
let mut filename = path.split('/');
let name = filename.next_back();
name.unwrap().to_string()
}
fn set_filename(filename: String, file_format: String) -> (String, String) {
let mut filename2 = filename.split('/');
let name = filename2.next_back();
let src1 = name.unwrap();
let filename1 = src1.split('?').next().unwrap();
let mut filename = filename1.split(':');
let name = filename.next_back().unwrap();
let mut name1 = name.split('.');
let src1 = name1.next();
(
src1.unwrap().to_string(),
String::from(src1.unwrap().to_string().replace("#", "")) + &String::from(file_format),
)
}
pub fn rewrite(mht_file: Vec<u8>, mht_filename: String) -> HashMap<String, Vec<u8>> {
let mut extracted_file: HashMap<String, Vec<u8>> = HashMap::new();
let parsed = parse_mail(&mht_file).unwrap();
let mut link_to_hash: HashMap<String, String> = HashMap::new();
let mut output = vec![];
{
let mut rewriter = HtmlRewriter::try_new(
Settings {
element_content_handlers: vec![element!(
"img[src], link[rel=stylesheet][href], iframe[src]",
|el| {
if el.get_attribute("rel") == Some("stylesheet".to_string()) {
let src = el.get_attribute("href").unwrap();
let (src1, newname) = set_filename(src, ".css".to_string());
link_to_hash.insert(el.get_attribute("href").unwrap(), newname);
el.set_attribute(
"href",
&(src1.replace("#", "") + &String::from(".css")),
)
.unwrap();
} else if el.tag_name() == "iframe" {
let src = el.get_attribute("src").unwrap();
let (src1, newname) = set_filename(src, ".html".to_string());
link_to_hash.insert(
el.get_attribute("src")
.unwrap()
.replace("cid:", "<")
.to_string() + ">",
newname,
);
el.set_attribute(
"src",
&(src1.replace("#", "") + &String::from(".html")),
)
.unwrap();
} else {
let src = el.get_attribute("src").unwrap();
let mut filename = src.split('/');
let name = filename.next_back();
let src1 = name.unwrap();
let filename1 = src1.split('?').next().unwrap();
link_to_hash
.insert(el.get_attribute("src").unwrap(), filename1.to_string());
el.set_attribute("src", &filename1.to_string()).unwrap();
}
Ok(())
}
)],
..Settings::default()
},
|c: &[u8]| output.extend_from_slice(c),
)
.unwrap();
rewriter
.write(&parsed.subparts[0].get_body_raw().unwrap())
.unwrap();
rewriter.end().unwrap();
drop(rewriter);
}
extracted_file.insert("index.html".to_string(), output);
for sub in parsed.subparts {
let name = sub.headers[2].get_value().unwrap();
let ctype = sub.headers[0].get_value().unwrap();
let cid = sub.headers[1].get_value().unwrap();
let creation;
if link_to_hash.get(&name) != None {
creation = link_to_hash.get(&name);
} else {
creation = link_to_hash.get(&cid);
}
match creation {
Some(name1) => {
let dname = mht_filename.clone();
let mut filename = name1.split(':');
let name = filename.next_back();
let src1 = name.unwrap().replace("#", "");
let src2 = dname + &String::from("/") + &src1;
match sub.get_body_encoded().unwrap() {
Body::Base64(body) | Body::QuotedPrintable(body) => {
if ctype == "text/css" {
let st: &str = &*body.get_decoded_as_string().unwrap();
let after = proxify_css(st);
extracted_file.insert(src2.clone(), after.as_bytes().to_vec());
} else {
extracted_file.insert(src2.clone(), body.get_decoded().unwrap());
}
}
Body::SevenBit(body) | Body::EightBit(body) => {
println!("mail body: {:?}", body.get_raw());
}
Body::Binary(body) => {
println!("mail body binary: {:?}", body.get_raw());
}
}
}
None => {
let mut filename = parse_by_slash(name);
filename = mht_filename.clone() + &String::from("/") + &filename;
match sub.get_body_encoded().unwrap() {
Body::Base64(body) | Body::QuotedPrintable(body) => {
extracted_file.insert(filename.clone(), body.get_decoded().unwrap());
}
Body::SevenBit(body) | Body::EightBit(body) => {
println!("mail body: {:?}", body.get_raw());
}
Body::Binary(body) => {
println!("mail body binary: {:?}", body.get_raw());
}
}
}
}
}
extracted_file
}