use crate::config::LocalLinkKind;
use crate::markup_language::MarkupLanguage;
use parking_lot::RwLock;
use parse_hyperlinks::parser::Link;
use parse_hyperlinks_extras::iterator_html::HyperlinkInlineImage;
use parse_hyperlinks_extras::parser::parse_html::take_link;
use percent_encoding::{percent_decode_str, utf8_percent_encode, NON_ALPHANUMERIC};
use std::{
collections::HashSet,
path::{Component, Path, PathBuf},
sync::Arc,
};
pub(crate) const HTML_EXT: &str = ".html";
fn assemble_link(
root_path: &Path,
docdir: &Path,
dest: &Path,
rewrite_rel_links: bool,
rewrite_abs_links: bool,
) -> Option<PathBuf> {
fn append(path: &mut PathBuf, append: &Path) {
for dir in append.components() {
match dir {
Component::ParentDir => {
if !path.pop() {
let path_is_relative = {
let mut c = path.components();
!(c.next() == Some(Component::RootDir)
|| c.next() == Some(Component::RootDir))
};
if path_is_relative {
path.push(Component::ParentDir.as_os_str());
} else {
path.clear();
break;
}
}
}
Component::Normal(c) => path.push(c),
_ => {}
}
}
}
let dest_is_relative = {
let mut c = dest.components();
!(c.next() == Some(Component::RootDir) || c.next() == Some(Component::RootDir))
};
debug_assert!(docdir.starts_with(root_path));
let mut link = match (rewrite_rel_links, rewrite_abs_links, dest_is_relative) {
(true, false, true) => {
let link = PathBuf::from(Component::RootDir.as_os_str());
link.join(docdir.strip_prefix(root_path).ok()?)
}
(true, true, true) => docdir.to_path_buf(),
(false, _, true) => PathBuf::new(),
(_, false, false) => PathBuf::from(Component::RootDir.as_os_str()),
(_, true, false) => root_path.to_path_buf(),
};
append(&mut link, dest);
if link.as_os_str().is_empty() {
None
} else {
Some(link)
}
}
#[inline]
fn rewrite_link(
link: &str,
root_path: &Path,
docdir: &Path,
rewrite_rel_links: bool,
rewrite_abs_links: bool,
rewrite_ext: bool,
) -> Option<(String, PathBuf)> {
const ASCIISET: percent_encoding::AsciiSet = NON_ALPHANUMERIC
.remove(b'/')
.remove(b'.')
.remove(b'_')
.remove(b'-');
match take_link(link) {
Ok(("", ("", Link::Text2Dest(text, dest, title)))) => {
debug_assert!(!link.contains("://"));
let rewrite_ext = rewrite_ext
&& !matches!(
MarkupLanguage::from(Path::new(&*dest)),
MarkupLanguage::None
);
let dest = dest
.trim_start_matches("http:")
.trim_start_matches("https:");
let mut short_text = text.to_string();
if text.starts_with("http:") || text.starts_with("https:") {
let text = text
.trim_start_matches("http:")
.trim_start_matches("https:");
let text = PathBuf::from(&*percent_decode_str(text).decode_utf8().unwrap());
let text = text
.file_stem()
.unwrap_or_default()
.to_str()
.unwrap_or_default();
short_text = text.to_string();
}
let dest = &*percent_decode_str(dest).decode_utf8().unwrap();
let dest = if rewrite_ext {
let mut dest = dest.to_string();
dest.push_str(HTML_EXT);
PathBuf::from(dest)
} else {
PathBuf::from(dest)
};
let destout = assemble_link(
root_path,
docdir,
&dest,
rewrite_rel_links,
rewrite_abs_links,
)?;
let destout_encoded = destout.to_str().unwrap_or_default();
#[cfg(windows)]
let destout_encoded = destout_encoded
.chars()
.map(|c| if c == '\\' { '/' } else { c })
.collect::<String>();
#[cfg(windows)]
let destout_encoded = destout_encoded.as_str();
let destout_encoded = utf8_percent_encode(destout_encoded, &ASCIISET).to_string();
Some((
format!(
"<a href=\"{}\" title=\"{}\">{}</a>",
destout_encoded, title, short_text
),
destout,
))
}
Ok(("", ("", Link::Image(text, dest)))) => {
let dest = PathBuf::from(&*percent_decode_str(&dest).decode_utf8().unwrap());
let destout = assemble_link(
root_path,
docdir,
&dest,
rewrite_rel_links,
rewrite_abs_links,
)?;
let destout_encoded = destout.to_str().unwrap_or_default();
let destout_encoded = destout_encoded
.chars()
.map(|c| if c == '\\' { '/' } else { c })
.collect::<String>();
let destout_encoded = destout_encoded.as_str();
let destout_encoded = utf8_percent_encode(destout_encoded, &ASCIISET).to_string();
Some((
format!("<img src=\"{}\" alt=\"{}\" />", destout_encoded, text),
destout,
))
}
Ok((_, (_, _))) | Err(_) => None,
}
}
#[inline]
pub fn rewrite_links(
html: String,
root_path: &Path,
docdir: &Path,
local_link_kind: LocalLinkKind,
rewrite_ext: bool,
allowed_local_links: Arc<RwLock<HashSet<PathBuf>>>,
) -> String {
let (rewrite_rel_links, rewrite_abs_links) = match local_link_kind {
LocalLinkKind::Off => (false, false),
LocalLinkKind::Short => (true, false),
LocalLinkKind::Long => (true, true),
};
let mut allowed_urls = allowed_local_links.write();
let mut rest = &*html;
let mut html_out = String::new();
for ((skipped, consumed, remaining), link) in HyperlinkInlineImage::new(&html) {
html_out.push_str(skipped);
rest = remaining;
if link.contains("://") || link.starts_with("mailto:") || link.starts_with("tel:") {
html_out.push_str(consumed);
continue;
}
if let Some((consumed_new, dest)) = rewrite_link(
consumed,
root_path,
docdir,
rewrite_rel_links,
rewrite_abs_links,
rewrite_ext,
) {
html_out.push_str(&consumed_new);
allowed_urls.insert(dest);
} else {
log::debug!("Viewer: invalid_local_links: {}", consumed);
html_out.push_str("<i>INVALID LOCAL LINK</i>");
}
}
html_out.push_str(rest);
if allowed_urls.is_empty() {
log::debug!(
"Viewer: note file has no local hyperlinks. No additional local files are served.",
);
} else {
log::debug!(
"Viewer: referenced allowed local files: {}",
allowed_urls
.iter()
.map(|p| {
let mut s = "\n '".to_string();
s.push_str(&p.display().to_string());
s
})
.collect::<String>()
);
}
html_out
}
#[cfg(test)]
mod tests {
use parking_lot::RwLock;
use std::{
collections::HashSet,
path::{Path, PathBuf},
sync::Arc,
};
use crate::html::assemble_link;
use crate::html::rewrite_link;
use crate::html::rewrite_links;
#[test]
fn test_assemble_link() {
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("../local/link to/note.md"),
true,
false,
)
.unwrap();
assert_eq!(output, Path::new("/doc/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("../local/link to/note.md"),
false,
false,
)
.unwrap();
assert_eq!(output, Path::new("../local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/test/../abs/local/link to/note.md"),
false,
false,
)
.unwrap();
assert_eq!(output, Path::new("/abs/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/../local/link to/note.md"),
false,
false,
);
assert_eq!(output, None);
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/abs/local/link to/note.md"),
false,
true,
)
.unwrap();
assert_eq!(output, Path::new("/my/abs/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/test/../abs/local/link to/note.md"),
false,
false,
)
.unwrap();
assert_eq!(output, Path::new("/abs/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("abs/local/link to/note.md"),
true,
true,
)
.unwrap();
assert_eq!(output, Path::new("/my/doc/path/abs/local/link to/note.md"));
}
#[test]
#[should_panic(expected = "assertion failed: !link.contains(\\\"://\\\")")]
fn test_rewrite_link1() {
let root_path = Path::new("/my/");
let docdir = Path::new("/my/abs/note path/");
let input = "<a href=\"ftp://getreu.net\">Blog</a>";
let _ = rewrite_link(input, root_path, docdir, true, false, false).unwrap();
}
#[test]
fn test_rewrite_link2() {
let root_path = Path::new("/my/");
let docdir = Path::new("/my/abs/note path/");
let input = "<img src=\"down/./down/../../t%20m%20p.jpg\" alt=\"Image\" />";
let expected = "<img src=\"/abs/note%20path/t%20m%20p.jpg\" \
alt=\"Image\" />";
let (outhtml, outpath) =
rewrite_link(input, root_path, docdir, true, false, false).unwrap();
assert_eq!(outhtml, expected);
assert_eq!(outpath, PathBuf::from("/abs/note path/t m p.jpg"));
let input = "<img src=\"down/./../../t%20m%20p.jpg\" alt=\"Image\" />";
let expected = "<img src=\"../t%20m%20p.jpg\" alt=\"Image\" />";
let (outhtml, outpath) =
rewrite_link(input, root_path, docdir, false, false, false).unwrap();
assert_eq!(outhtml, expected);
assert_eq!(outpath, PathBuf::from("../t m p.jpg"));
let input = "<a href=\"./down/./../my%20note%201.md\">my note 1</a>";
let expected = "<a href=\"/abs/note%20path/my%20note%201.md\" \
title=\"\">my note 1</a>";
let (outhtml, outpath) =
rewrite_link(input, root_path, docdir, true, false, false).unwrap();
assert_eq!(outhtml, expected);
assert_eq!(outpath, PathBuf::from("/abs/note path/my note 1.md"));
let input = "<a href=\"/dir/./down/../my%20note%201.md\">my note 1</a>";
let expected = "<a href=\"/dir/my%20note%201.md\" \
title=\"\">my note 1</a>";
let (outhtml, outpath) =
rewrite_link(input, root_path, docdir, true, false, false).unwrap();
assert_eq!(outhtml, expected);
assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
let input = "<a href=\"./down/./../dir/my%20note%201.md\">my note 1</a>";
let expected = "<a href=\"dir/my%20note%201.md\" \
title=\"\">my note 1</a>";
let (outhtml, outpath) =
rewrite_link(input, root_path, docdir, false, false, false).unwrap();
assert_eq!(outhtml, expected);
assert_eq!(outpath, PathBuf::from("dir/my note 1.md"));
let input = "<a href=\"./down/./../dir/my%20note%201.md\">my note 1</a>";
let expected = "<a href=\"/abs/note%20path/dir/my%20note%201.md.html\" \
title=\"\">my note 1</a>";
let (outhtml, outpath) = rewrite_link(input, root_path, docdir, true, false, true).unwrap();
assert_eq!(outhtml, expected);
assert_eq!(
outpath,
PathBuf::from("/abs/note path/dir/my note 1.md.html")
);
let input = "<a href=\"./down/./../dir/my%20note%201.md\">my note 1</a>";
let expected = "<a href=\"/path/dir/my%20note%201.md\" title=\"\">my note 1</a>";
let (outhtml, outpath) = rewrite_link(
input,
Path::new("/my/note/"),
Path::new("/my/note/path/"),
true,
false,
false,
)
.unwrap();
assert_eq!(outhtml, expected);
assert_eq!(outpath, PathBuf::from("/path/dir/my note 1.md"));
let input = "<a href=\"/down/./../dir/my%20note%201.md\">my note 1</a>";
let expected = "<a href=\"/dir/my%20note%201.md\" title=\"\">my note 1</a>";
let (outhtml, outpath) = rewrite_link(
input,
root_path,
Path::new("/my/ignored/"),
true,
false,
false,
)
.unwrap();
assert_eq!(outhtml, expected);
assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
let input = "<a href=\"/down/../../dir/my%20note%201.md\">my note 1</a>";
let output = rewrite_link(
input,
root_path,
Path::new("/my/notepath/"),
true,
false,
false,
);
assert_eq!(output, None);
let input = "<a href=\"../../dir/my%20note%201.md\">my note 1</a>";
let output = rewrite_link(
input,
root_path,
Path::new("/my/notepath/"),
true,
false,
false,
);
assert_eq!(output, None);
let root_path = Path::new("/");
let input = "<a href=\"../../dir/my%20note%201.md\">my note 1</a>";
let output = rewrite_link(input, root_path, Path::new("/my/"), true, false, false);
assert_eq!(output, None);
let root_path = Path::new("/my");
let input = "<a href=\"../../dir/my%20note%201.md\">my note 1</a>";
let output = rewrite_link(
input,
root_path,
Path::new("/my/notepath"),
true,
false,
false,
);
assert_eq!(output, None);
}
#[test]
fn test_rewrite_abs_links() {
use crate::config::LocalLinkKind;
let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
let input = "abc<a href=\"ftp://getreu.net\">Blog</a>\
def<a href=\"https://getreu.net\">https://getreu.net</a>\
ghi<img src=\"t%20m%20p.jpg\" alt=\"test 1\" />\
jkl<a href=\"down/../down/my%20note%201.md\">my note 1</a>\
mno<a href=\"http:./down/../dir/my%20note.md\">\
http:./down/../dir/my%20note.md</a>\
pqr<a href=\"http:/down/../dir/my%20note.md\">\
http:./down/../dir/my%20note.md</a>\
stu<a href=\"http:/../dir/underflow/my%20note.md\">\
not allowed dir</a>\
vwx<a href=\"http:../../../not allowed dir/my%20note.md\">\
not allowed</a>"
.to_string();
let expected = "abc<a href=\"ftp://getreu.net\">Blog</a>\
def<a href=\"https://getreu.net\">https://getreu.net</a>\
ghi<img src=\"/abs/note%20path/t%20m%20p.jpg\" alt=\"test 1\" />\
jkl<a href=\"/abs/note%20path/down/my%20note%201.md\" title=\"\">my note 1</a>\
mno<a href=\"/abs/note%20path/dir/my%20note.md\" title=\"\">my note</a>\
pqr<a href=\"/dir/my%20note.md\" title=\"\">my note</a>\
stu<i>INVALID LOCAL LINK</i>\
vwx<i>INVALID LOCAL LINK</i>"
.to_string();
let root_path = Path::new("/my/");
let docdir = Path::new("/my/abs/note path/");
let output = rewrite_links(
input,
root_path,
docdir,
LocalLinkKind::Short,
false,
allowed_urls.clone(),
);
let url = allowed_urls.read_recursive();
assert_eq!(output, expected);
assert!(url.contains(&PathBuf::from("/abs/note path/t m p.jpg")));
assert!(url.contains(&PathBuf::from("/abs/note path/dir/my note.md")));
assert!(url.contains(&PathBuf::from("/abs/note path/down/my note 1.md")));
}
}