use crate::filename::{Extension, NotePathStr};
use crate::{config::LocalLinkKind, error::NoteError};
use html_escape;
use parking_lot::RwLock;
use parse_hyperlinks::parser::Link;
use parse_hyperlinks_extras::iterator_html::HyperlinkInlineImage;
use percent_encoding::percent_decode_str;
use std::{
borrow::Cow,
collections::HashSet,
path::{Component, Path, PathBuf},
sync::Arc,
};
pub(crate) const HTML_EXT: &str = ".html";
fn assemble_link(
root_path: &Path,
docdir: &Path,
dest: &Path,
rewrite_rel_paths: bool,
rewrite_abs_paths: bool,
) -> Option<PathBuf> {
fn append(path: &mut PathBuf, append: &Path) {
for dir in append.components() {
match dir {
Component::ParentDir => {
if !path.pop() {
let path_is_relative = {
let mut c = path.components();
!(c.next() == Some(Component::RootDir)
|| c.next() == Some(Component::RootDir))
};
if path_is_relative {
path.push(Component::ParentDir.as_os_str());
} else {
path.clear();
break;
}
}
}
Component::Normal(c) => path.push(c),
_ => {}
}
}
}
let dest_is_relative = {
let mut c = dest.components();
!(c.next() == Some(Component::RootDir) || c.next() == Some(Component::RootDir))
};
debug_assert!(docdir.starts_with(root_path));
let mut link = match (rewrite_rel_paths, rewrite_abs_paths, dest_is_relative) {
(true, false, true) => {
let link = PathBuf::from(Component::RootDir.as_os_str());
link.join(docdir.strip_prefix(root_path).ok()?)
}
(true, true, true) => docdir.to_path_buf(),
(false, _, true) => PathBuf::new(),
(_, false, false) => PathBuf::from(Component::RootDir.as_os_str()),
(_, true, false) => root_path.to_path_buf(),
};
append(&mut link, dest);
if link.as_os_str().is_empty() {
None
} else {
Some(link)
}
}
trait Hyperlink {
fn decode_html_escape_and_percent(&mut self);
fn rewrite_local_link(
&mut self,
root_path: &Path,
docdir: &Path,
rewrite_rel_paths: bool,
rewrite_abs_paths: bool,
rewrite_ext: bool,
) -> Result<Option<PathBuf>, NoteError>;
fn to_html(&self) -> String;
}
impl<'a> Hyperlink for Link<'a> {
#[inline]
fn decode_html_escape_and_percent(&mut self) {
let empty_title = &mut Cow::from("");
let (text, dest, title) = match self {
Link::Text2Dest(text, dest, title) => (text, dest, title),
Link::Image(alt, source) => (alt, source, empty_title),
_ => unimplemented!(),
};
{
let decoded_text = html_escape::decode_html_entities(&*text);
if matches!(&decoded_text, Cow::Owned(..)) {
let decoded_text = Cow::Owned(decoded_text.into_owned());
let _ = std::mem::replace(text, decoded_text);
}
let decoded_dest = html_escape::decode_html_entities(&*dest);
if matches!(&decoded_dest, Cow::Owned(..)) {
let decoded_dest = Cow::Owned(decoded_dest.into_owned());
let _ = std::mem::replace(dest, decoded_dest);
}
let decoded_title = html_escape::decode_html_entities(&*title);
if matches!(&decoded_title, Cow::Owned(..)) {
let decoded_title = Cow::Owned(decoded_title.into_owned());
let _ = std::mem::replace(title, decoded_title);
}
}
let decoded_dest = percent_decode_str(&*dest).decode_utf8().unwrap();
if matches!(&decoded_dest, Cow::Owned(..)) {
let decoded_dest = Cow::Owned(decoded_dest.into_owned());
let _ = std::mem::replace(dest, decoded_dest);
}
let decoded_text = percent_decode_str(&*text).decode_utf8().unwrap();
if &decoded_text == dest {
let _ = std::mem::replace(text, dest.clone());
}
}
fn rewrite_local_link(
&mut self,
root_path: &Path,
docdir: &Path,
rewrite_rel_paths: bool,
rewrite_abs_paths: bool,
rewrite_ext: bool,
) -> Result<Option<PathBuf>, NoteError> {
let (text, dest) = match self {
Link::Text2Dest(text, dest, _title) => (text, dest),
Link::Image(alt, source) => (alt, source),
_ => return Err(NoteError::InvalidLocalLink),
};
if (dest.contains("://") && !dest.contains(":///"))
|| dest.starts_with("mailto:")
|| dest.starts_with("tel:")
{
return Ok(None);
}
if *text == *dest && (text.contains(':') || text.contains('@')) {
let short_text = text
.trim_start_matches("http://")
.trim_start_matches("http:")
.trim_start_matches("tpnote:");
let short_text = short_text
.rsplit_once(['/', '\\'])
.map(|(_path, stem)| stem)
.unwrap_or(short_text);
let sort_tag1 = short_text.split_sort_tag().0;
let (sort_tag_stem, ext) = short_text.rsplit_once('.').unwrap_or((short_text, ""));
let sort_tag2 = sort_tag_stem.split_sort_tag().0;
let short_text = if sort_tag1 == sort_tag2 && <str as Extension>::is_tpnote_ext(ext) {
sort_tag_stem
} else {
short_text
};
let new_text = Cow::Owned(short_text.to_string());
let _ = std::mem::replace(text, new_text);
}
{
let short_dest = dest
.trim_start_matches("http://")
.trim_start_matches("http:")
.trim_start_matches("tpnote:");
let short_dest = if let Cow::Owned(_) = dest {
Cow::Owned(short_dest.to_string())
} else {
Cow::Borrowed(short_dest)
};
let short_dest = if rewrite_ext && dest.has_tpnote_ext() {
Cow::Owned(format!("{}{}", short_dest, HTML_EXT))
} else {
short_dest
};
let dest_out = assemble_link(
root_path,
docdir,
Path::new(&short_dest.as_ref()),
rewrite_rel_paths,
rewrite_abs_paths,
)
.ok_or(NoteError::InvalidLocalLink)?;
let new_dest = Cow::Owned(dest_out.to_str().unwrap_or_default().to_string());
let _ = std::mem::replace(dest, new_dest);
Ok(Some(dest_out))
}
}
fn to_html(&self) -> String {
let empty_title = &Cow::from("");
let (text, dest, title) = match self {
Link::Text2Dest(text, dest, title) => (text, dest, title),
Link::Image(alt, source) => (alt, source, empty_title),
_ => unimplemented!(),
};
let dest = if (*dest).contains('\\') {
Cow::Owned(dest.to_string().replace('\\', "/"))
} else {
Cow::Borrowed(&**dest)
};
let text = html_escape::encode_text(&text);
let dest = html_escape::encode_double_quoted_attribute(&dest);
let title = html_escape::encode_double_quoted_attribute(&title);
let title = if !title.is_empty() {
format!(" title=\"{}\"", title)
} else {
title.to_string()
};
match self {
Link::Text2Dest(_, _, _) => format!("<a href=\"{}\"{}>{}</a>", dest, title, text),
Link::Image(_, _) => format!("<img src=\"{}\" alt=\"{}\" />", dest, text),
_ => unimplemented!(),
}
}
}
#[inline]
pub fn rewrite_links(
html_input: String,
root_path: &Path,
docdir: &Path,
local_link_kind: LocalLinkKind,
rewrite_ext: bool,
allowed_local_links: Arc<RwLock<HashSet<PathBuf>>>,
) -> String {
let (rewrite_rel_paths, rewrite_abs_paths) = match local_link_kind {
LocalLinkKind::Off => (false, false),
LocalLinkKind::Short => (true, false),
LocalLinkKind::Long => (true, true),
};
let mut allowed_urls = allowed_local_links.write();
let mut rest = &*html_input;
let mut html_out = String::new();
for ((skipped, _consumed, remaining), mut link) in HyperlinkInlineImage::new(&html_input) {
html_out.push_str(skipped);
rest = remaining;
link.decode_html_escape_and_percent();
match link.rewrite_local_link(
root_path,
docdir,
rewrite_rel_paths,
rewrite_abs_paths,
rewrite_ext,
) {
Ok(Some(dest_path)) => {
allowed_urls.insert(dest_path);
html_out.push_str(&link.to_html());
}
Ok(None) => html_out.push_str(&link.to_html()),
Err(e) => html_out.push_str(&e.to_string()),
};
}
html_out.push_str(rest);
if allowed_urls.is_empty() {
log::debug!(
"Viewer: note file has no local hyperlinks. No additional local files are served.",
);
} else {
log::debug!(
"Viewer: referenced allowed local files: {}",
allowed_urls
.iter()
.map(|p| {
let mut s = "\n '".to_string();
s.push_str(&p.display().to_string());
s
})
.collect::<String>()
);
}
html_out
}
#[cfg(test)]
mod tests {
use crate::error::NoteError;
use crate::html::assemble_link;
use crate::html::rewrite_links;
use parking_lot::RwLock;
use parse_hyperlinks::parser::Link;
use parse_hyperlinks_extras::parser::parse_html::take_link;
use std::borrow::Cow;
use std::{
collections::HashSet,
path::{Path, PathBuf},
sync::Arc,
};
use super::Hyperlink;
#[test]
fn test_assemble_link() {
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("../local/link to/note.md"),
true,
false,
)
.unwrap();
assert_eq!(output, Path::new("/doc/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("../local/link to/note.md"),
false,
false,
)
.unwrap();
assert_eq!(output, Path::new("../local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/test/../abs/local/link to/note.md"),
false,
false,
)
.unwrap();
assert_eq!(output, Path::new("/abs/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/../local/link to/note.md"),
false,
false,
);
assert_eq!(output, None);
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/abs/local/link to/note.md"),
false,
true,
)
.unwrap();
assert_eq!(output, Path::new("/my/abs/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("/test/../abs/local/link to/note.md"),
false,
false,
)
.unwrap();
assert_eq!(output, Path::new("/abs/local/link to/note.md"));
let output = assemble_link(
Path::new("/my"),
Path::new("/my/doc/path"),
Path::new("abs/local/link to/note.md"),
true,
true,
)
.unwrap();
assert_eq!(output, Path::new("/my/doc/path/abs/local/link to/note.md"));
}
#[test]
fn test_decode_html_escape_and_percent() {
let mut input = Link::Text2Dest(Cow::from("text"), Cow::from("dest"), Cow::from("title"));
let expected = Link::Text2Dest(Cow::from("text"), Cow::from("dest"), Cow::from("title"));
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
let mut input = Link::Text2Dest(
Cow::from("te%20xt"),
Cow::from("de%20st"),
Cow::from("title"),
);
let expected =
Link::Text2Dest(Cow::from("te%20xt"), Cow::from("de st"), Cow::from("title"));
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
let mut input = Link::Text2Dest(
Cow::from("d:e%20st"),
Cow::from("d:e%20st"),
Cow::from("title"),
);
let expected =
Link::Text2Dest(Cow::from("d:e st"), Cow::from("d:e st"), Cow::from("title"));
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
let mut input = Link::Text2Dest(
Cow::from("d:e%20&st%26"),
Cow::from("d:e%20%26st&"),
Cow::from("title"),
);
let expected = Link::Text2Dest(
Cow::from("d:e &st&"),
Cow::from("d:e &st&"),
Cow::from("title"),
);
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
let mut input = Link::Text2Dest(
Cow::from("a&"lt"),
Cow::from("a&"lt"),
Cow::from("a&"lt"),
);
let expected = Link::Text2Dest(
Cow::from("a&\"lt"),
Cow::from("a&\"lt"),
Cow::from("a&\"lt"),
);
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
let mut input = Link::Image(Cow::from("al%20t"), Cow::from("de%20st"));
let expected = Link::Image(Cow::from("al%20t"), Cow::from("de st"));
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
let mut input = Link::Image(Cow::from("a\\lt"), Cow::from("d\\est"));
let expected = Link::Image(Cow::from("a\\lt"), Cow::from("d\\est"));
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
let mut input = Link::Image(Cow::from("a&"lt"), Cow::from("a&"lt"));
let expected = Link::Image(Cow::from("a&\"lt"), Cow::from("a&\"lt"));
input.decode_html_escape_and_percent();
let output = input;
assert_eq!(output, expected);
}
#[test]
fn test_rewrite_local_link() {
let root_path = Path::new("/my/");
let docdir = Path::new("/my/abs/note path/");
let mut input = take_link("<a href=\"ftp://getreu.net\">Blog</a>")
.unwrap()
.1
.1;
assert!(input
.rewrite_local_link(root_path, docdir, true, false, false)
.unwrap()
.is_none());
let root_path = Path::new("/my/");
let docdir = Path::new("/my/abs/note path/");
let mut input = take_link("<img src=\"down/./down/../../t m p.jpg\" alt=\"Image\" />")
.unwrap()
.1
.1;
let expected = "<img src=\"/abs/note path/t m p.jpg\" \
alt=\"Image\" />";
let outpath = input
.rewrite_local_link(root_path, docdir, true, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/abs/note path/t m p.jpg"));
let mut input = take_link("<img src=\"down/./../../t m p.jpg\" alt=\"Image\" />")
.unwrap()
.1
.1;
let expected = "<img src=\"/abs/t m p.jpg\" alt=\"Image\" />";
let outpath = input
.rewrite_local_link(root_path, docdir, true, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/abs/t m p.jpg"));
let mut input = take_link("<a href=\"./down/./../my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let expected = "<a href=\"/abs/note path/my note 1.md\">my note 1</a>";
let outpath = input
.rewrite_local_link(root_path, docdir, true, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/abs/note path/my note 1.md"));
let mut input = take_link("<a href=\"/dir/./down/../my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let expected = "<a href=\"/dir/my note 1.md\">my note 1</a>";
let outpath = input
.rewrite_local_link(root_path, docdir, true, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
let mut input = take_link("<a href=\"./down/./../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let expected = "<a href=\"dir/my note 1.md\">my note 1</a>";
let outpath = input
.rewrite_local_link(root_path, docdir, false, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("dir/my note 1.md"));
let mut input = take_link("<a href=\"./down/./../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let expected = "<a href=\"/abs/note path/dir/my note 1.md.html\">my note 1</a>";
let outpath = input
.rewrite_local_link(root_path, docdir, true, false, true)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(
outpath,
PathBuf::from("/abs/note path/dir/my note 1.md.html")
);
let mut input = take_link("<a href=\"./down/./../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let expected = "<a href=\"/path/dir/my note 1.md\">my note 1</a>";
let outpath = input
.rewrite_local_link(
Path::new("/my/note/"),
Path::new("/my/note/path/"),
true,
false,
false,
)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/path/dir/my note 1.md"));
let mut input = take_link("<a href=\"/down/./../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let expected = "<a href=\"/dir/my note 1.md\">my note 1</a>";
let outpath = input
.rewrite_local_link(root_path, Path::new("/my/ignored/"), true, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
let mut input = take_link("<a href=\"/down/../../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let output = input
.rewrite_local_link(root_path, Path::new("/my/notepath/"), true, false, false)
.unwrap_err();
assert!(matches!(output, NoteError::InvalidLocalLink));
let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let output = input
.rewrite_local_link(root_path, Path::new("/my/notepath/"), true, false, false)
.unwrap_err();
assert!(matches!(output, NoteError::InvalidLocalLink));
let root_path = Path::new("/");
let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let output = input
.rewrite_local_link(root_path, Path::new("/my/"), true, false, false)
.unwrap_err();
assert!(matches!(output, NoteError::InvalidLocalLink));
let root_path = Path::new("/my");
let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
.unwrap()
.1
.1;
let output = input
.rewrite_local_link(root_path, Path::new("/my/notepath"), true, false, false)
.unwrap_err();
assert!(matches!(output, NoteError::InvalidLocalLink));
let root_path = Path::new("/my");
let mut input =
take_link("<a href=\"tpnote:dir/3.0-my note.md\">tpnote:dir/3.0-my note.md</a>")
.unwrap()
.1
.1;
let outpath = input
.rewrite_local_link(root_path, Path::new("/my/path"), true, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
let expected = "<a href=\"/path/dir/3.0-my note.md\">3.0-my note</a>";
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/path/dir/3.0-my note.md"));
let root_path = Path::new("/my");
let mut input = take_link("<a href=\"tpnote:dir/3.0\">tpnote:dir/3.0</a>")
.unwrap()
.1
.1;
let outpath = input
.rewrite_local_link(root_path, Path::new("/my/path"), true, false, false)
.unwrap()
.unwrap();
let output = input.to_html();
let expected = "<a href=\"/path/dir/3.0\">3.0</a>";
assert_eq!(output, expected);
assert_eq!(outpath, PathBuf::from("/path/dir/3.0"));
}
#[test]
fn test_to_html() {
let input = Link::Text2Dest(
Cow::from("te\\x/t"),
Cow::from("de\\s/t"),
Cow::from("ti\\t/le"),
);
let expected = "<a href=\"de/s/t\" title=\"ti\\t/le\">te\\x/t</a>";
let output = input.to_html();
assert_eq!(output, expected);
let input = Link::Text2Dest(
Cow::from("te&> xt"),
Cow::from("de&> st"),
Cow::from("ti&> tle"),
);
let expected = "<a href=\"de&> st\" title=\"ti&> tle\">te&> xt</a>";
let output = input.to_html();
assert_eq!(output, expected);
let input = Link::Image(Cow::from("al&> t"), Cow::from("so&> urce"));
let expected = "<img src=\"so&> urce\" alt=\"al&> t\" />";
let output = input.to_html();
assert_eq!(output, expected);
let input = Link::Text2Dest(Cow::from("te&> xt"), Cow::from("de&> st"), Cow::from(""));
let expected = "<a href=\"de&> st\">te&> xt</a>";
let output = input.to_html();
assert_eq!(output, expected);
}
#[test]
fn test_rewrite_links() {
use crate::config::LocalLinkKind;
let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
let input = "abc<a href=\"ftp://getreu.net\">Blog</a>\
def<a href=\"https://getreu.net\">https://getreu.net</a>\
ghi<img src=\"t m p.jpg\" alt=\"test 1\" />\
jkl<a href=\"down/../down/my note 1.md\">my note 1</a>\
mno<a href=\"http:./down/../dir/my note.md\">\
http:./down/../dir/my note.md</a>\
pqr<a href=\"http:/down/../dir/my note.md\">\
http:/down/../dir/my note.md</a>\
stu<a href=\"http:/../dir/underflow/my note.md\">\
not allowed dir</a>\
vwx<a href=\"http:../../../not allowed dir/my note.md\">\
not allowed</a>"
.to_string();
let expected = "abc<a href=\"ftp://getreu.net\">Blog</a>\
def<a href=\"https://getreu.net\">https://getreu.net</a>\
ghi<img src=\"/abs/note path/t m p.jpg\" alt=\"test 1\" />\
jkl<a href=\"/abs/note path/down/my note 1.md\">my note 1</a>\
mno<a href=\"/abs/note path/dir/my note.md\">my note</a>\
pqr<a href=\"/dir/my note.md\">my note</a>\
stu<i>INVALID LOCAL LINK</i>\
vwx<i>INVALID LOCAL LINK</i>"
.to_string();
let root_path = Path::new("/my/");
let docdir = Path::new("/my/abs/note path/");
let output = rewrite_links(
input,
root_path,
docdir,
LocalLinkKind::Short,
false,
allowed_urls.clone(),
);
let url = allowed_urls.read_recursive();
assert_eq!(output, expected);
assert!(url.contains(&PathBuf::from("/abs/note path/t m p.jpg")));
assert!(url.contains(&PathBuf::from("/abs/note path/dir/my note.md")));
assert!(url.contains(&PathBuf::from("/abs/note path/down/my note 1.md")));
}
}