use crate::validation::{Context, Reason};
use std::{
collections::HashMap,
ffi::{OsStr, OsString},
fmt::{self, Debug, Formatter},
io,
path::{Component, Path, PathBuf},
sync::Arc,
};
pub fn resolve_link(
current_directory: &Path,
link: &Path,
options: &Options,
) -> Result<PathBuf, Reason> {
let joined = options.join(current_directory, link)?;
let candidates = options.possible_names(joined);
for candidate in candidates {
log::trace!(
"Checking if \"{}\" points to \"{}\"",
link.display(),
candidate.display(),
);
if let Ok(canonical) = options.canonicalize(&candidate) {
options.sanity_check(&canonical)?;
return Ok(canonical);
}
}
log::trace!("None of the candidates exist for \"{}\"", link.display());
Err(Reason::Io(io::ErrorKind::NotFound.into()))
}
pub fn check_filesystem<C>(
current_directory: &Path,
path: &Path,
fragment: Option<&str>,
ctx: &C,
) -> Result<(), Reason>
where
C: Context + ?Sized,
{
log::debug!(
"Checking \"{}\" in the context of \"{}\"",
path.display(),
current_directory.display()
);
let options = ctx.filesystem_options();
let resolved_location = resolve_link(current_directory, path, options)?;
log::debug!(
"\"{}\" resolved to \"{}\"",
path.display(),
resolved_location.display()
);
if let Some(fragment) = fragment {
log::warn!(
"Not checking that the \"{}\" section exists in \"{}\" because fragment resolution isn't implemented",
fragment,
resolved_location.display(),
);
}
if let Err(reason) =
options.run_custom_validation(&resolved_location, fragment)
{
log::debug!(
"Custom validation reported \"{}\" as invalid because {}",
resolved_location.display(),
reason
);
return Err(reason);
}
Ok(())
}
#[derive(Clone)]
#[cfg_attr(
feature = "serde-1",
derive(serde::Serialize, serde::Deserialize),
serde(default)
)]
pub struct Options {
root_directory: Option<PathBuf>,
default_file: OsString,
links_may_traverse_the_root_directory: bool,
alternate_extensions: HashMap<String, Vec<OsString>>,
#[serde(skip, default = "nop_custom_validation")]
custom_validation: Arc<dyn Fn(&Path, Option<&str>) -> Result<(), Reason>>,
}
impl Options {
pub const DEFAULT_FILE: &'static str = "index.html";
pub fn default_alternate_extensions(
) -> impl IntoIterator<Item = (OsString, impl IntoIterator<Item = OsString>)>
{
const MAPPING: &'static [(&'static str, &'static [&'static str])] =
&[("md", &["html"])];
MAPPING.iter().map(|(ext, alts)| {
(OsString::from(ext), alts.iter().map(OsString::from))
})
}
pub fn new() -> Self {
Options {
root_directory: None,
default_file: OsString::from(Options::DEFAULT_FILE),
links_may_traverse_the_root_directory: false,
alternate_extensions: Options::default_alternate_extensions()
.into_iter()
.map(|(key, values)| {
(
key.to_string_lossy().to_lowercase(),
values.into_iter().map(Into::into).collect(),
)
})
.collect(),
custom_validation: nop_custom_validation(),
}
}
pub fn root_directory(&self) -> Option<&Path> {
self.root_directory.as_ref().map(|p| &**p)
}
pub fn with_root_directory<P: AsRef<Path>>(
self,
root_directory: P,
) -> io::Result<Self> {
Ok(Options {
root_directory: Some(dunce::canonicalize(root_directory)?),
..self
})
}
pub fn default_file(&self) -> &OsStr { &self.default_file }
pub fn set_default_file<O: Into<OsString>>(self, default_file: O) -> Self {
Options {
default_file: default_file.into(),
..self
}
}
pub fn alternate_extensions(
&self,
) -> impl Iterator<Item = (&OsStr, impl Iterator<Item = &OsStr>)> {
self.alternate_extensions.iter().map(|(key, value)| {
(OsStr::new(key), value.iter().map(|alt| alt.as_os_str()))
})
}
pub fn set_alternate_extensions<S, I, V>(mut self, alternates: I) -> Self
where
I: IntoIterator<Item = (S, V)>,
S: Into<OsString>,
V: IntoIterator<Item = S>,
{
self.alternate_extensions = alternates
.into_iter()
.map(|(key, values)| {
(
key.into().to_string_lossy().to_lowercase(),
values.into_iter().map(Into::into).collect(),
)
})
.collect();
self
}
pub fn links_may_traverse_the_root_directory(&self) -> bool {
self.links_may_traverse_the_root_directory
}
pub fn set_links_may_traverse_the_root_directory(
self,
value: bool,
) -> Self {
Options {
links_may_traverse_the_root_directory: value,
..self
}
}
pub fn set_custom_validation<F>(self, custom_validation: F) -> Self
where
F: Fn(&Path, Option<&str>) -> Result<(), Reason> + 'static,
{
let custom_validation = Arc::new(custom_validation);
Options {
custom_validation,
..self
}
}
fn join(
&self,
current_dir: &Path,
second: &Path,
) -> Result<PathBuf, Reason> {
log::trace!(
"Appending \"{}\" to \"{}\"",
second.display(),
current_dir.display()
);
if second.has_root() {
match self.root_directory() {
Some(root) => {
let mut buffer = root.to_path_buf();
buffer.extend(remove_absolute_components(second));
Ok(buffer)
},
None => {
log::warn!("The bit to be appended is absolute, but we don't have a \"root\" directory to resolve relative to");
Err(Reason::TraversesParentDirectories)
},
}
} else {
Ok(current_dir.join(second))
}
}
fn canonicalize(&self, path: &Path) -> Result<PathBuf, Reason> {
let mut canonical = dunce::canonicalize(path)?;
if canonical.is_dir() {
log::trace!(
"Appending the default file name because \"{}\" is a directory",
canonical.display()
);
canonical.push(&self.default_file);
canonical = dunce::canonicalize(canonical)?;
}
Ok(canonical)
}
fn sanity_check(&self, path: &Path) -> Result<(), Reason> {
log::trace!("Applying sanity checks to \"{}\"", path.display());
if let Some(root) = self.root_directory() {
log::trace!(
"Checking if \"{}\" is allowed to leave \"{}\"",
path.display(),
root.display()
);
if !(self.links_may_traverse_the_root_directory
|| path.starts_with(root))
{
log::trace!(
"\"{}\" traverses outside the \"root\" directory",
path.display()
);
return Err(Reason::TraversesParentDirectories);
}
}
Ok(())
}
fn possible_names(
&self,
original: PathBuf,
) -> impl IntoIterator<Item = PathBuf> {
let mut names = vec![original.clone()];
if let Some(alternatives) = original
.extension()
.map(|ext| ext.to_string_lossy().to_lowercase())
.and_then(|ext| self.alternate_extensions.get(&ext))
{
for alternative in alternatives {
names.push(original.with_extension(alternative));
}
}
log::trace!(
"Possible candidates for \"{}\" are {:?}",
original.display(),
names
);
names
}
fn run_custom_validation(
&self,
resolved_path: &Path,
fragment: Option<&str>,
) -> Result<(), Reason> {
(self.custom_validation)(resolved_path, fragment)
}
}
fn nop_custom_validation(
) -> Arc<dyn Fn(&Path, Option<&str>) -> Result<(), Reason>> {
Arc::new(|_, _| Ok(()))
}
impl Default for Options {
fn default() -> Self { Options::new() }
}
impl Debug for Options {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let Options {
root_directory,
default_file,
links_may_traverse_the_root_directory,
alternate_extensions,
custom_validation: _,
} = self;
f.debug_struct("Options")
.field("root_directory", root_directory)
.field("default_file", default_file)
.field(
"links_may_traverse_the_root_directory",
links_may_traverse_the_root_directory,
)
.field("alternate_extensions", alternate_extensions)
.finish()
}
}
impl PartialEq for Options {
fn eq(&self, other: &Options) -> bool {
let Options {
root_directory,
default_file,
links_may_traverse_the_root_directory,
alternate_extensions,
custom_validation: _,
} = self;
root_directory == &other.root_directory
&& default_file == &other.default_file
&& links_may_traverse_the_root_directory
== &other.links_may_traverse_the_root_directory
&& alternate_extensions == &other.alternate_extensions
}
}
fn remove_absolute_components(
path: &Path,
) -> impl Iterator<Item = Component> + '_ {
path.components()
.skip_while(|c| matches!(c, Component::Prefix(_) | Component::RootDir))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::BasicContext;
use std::sync::atomic::{AtomicBool, Ordering};
fn validation_dir() -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR"))
.join("src")
.join("validation")
}
fn touch<S: AsRef<Path>>(filename: S, directories: &[&Path]) {
for dir in directories {
std::fs::create_dir_all(dir).unwrap();
let item = dir.join(filename.as_ref());
let _f = std::fs::File::create(&item).unwrap();
}
}
fn init_logging() {
let _ = env_logger::builder()
.filter(Some("linkcheck"), log::LevelFilter::Trace)
.is_test(true)
.try_init();
}
#[test]
fn resolve_mod_relative_to_validation_dir() {
init_logging();
let current_dir = validation_dir();
let link = "mod.rs";
let options = Options::default();
let got =
resolve_link(¤t_dir, Path::new(link), &options).unwrap();
assert_eq!(got, current_dir.join(link));
}
#[test]
fn custom_validation_function_gets_called() {
init_logging();
let current_dir = validation_dir();
let link = "mod.rs";
let called = Arc::new(AtomicBool::new(false));
let called_2 = Arc::clone(&called);
let mut ctx = BasicContext::default();
ctx.options = Options::default().set_custom_validation(move |_, _| {
called_2.store(true, Ordering::SeqCst);
Ok(())
});
check_filesystem(¤t_dir, Path::new(link), None, &ctx).unwrap();
assert!(called.load(Ordering::SeqCst))
}
#[test]
fn detect_possible_directory_traversal_attacks() {
init_logging();
let temp = tempfile::tempdir().unwrap();
let temp = dunce::canonicalize(temp.path()).unwrap();
let foo = temp.join("foo");
let bar = foo.join("bar");
let baz = bar.join("baz");
let options = Options::default().with_root_directory(&temp).unwrap();
touch(&options.default_file, &[&temp, &foo, &bar, &baz]);
let current_dir = baz.as_path();
let resolve = |link: &str| -> Result<PathBuf, Reason> {
resolve_link(current_dir, Path::new(link), &options)
};
assert_eq!(
resolve(".").unwrap(),
current_dir.join(&options.default_file)
);
assert_eq!(resolve("..").unwrap(), bar.join(&options.default_file));
assert_eq!(resolve("../..").unwrap(), foo.join(&options.default_file));
assert_eq!(
resolve("../../..").unwrap(),
temp.join(&options.default_file)
);
let bad_path = if cfg!(windows) {
"../../../../../../../../../../../../../../../../../Windows/System32/cmd.exe"
} else {
"../../../../../../../../../../../../../../../../../etc/passwd"
};
let traverses_parent_dir = resolve(bad_path).unwrap_err();
assert!(
matches!(traverses_parent_dir, Reason::TraversesParentDirectories),
"{:?} should have traversed the parent directory",
traverses_parent_dir
);
}
#[test]
fn links_with_a_leading_slash_are_relative_to_the_root() {
init_logging();
let temp = tempfile::tempdir().unwrap();
let temp = dunce::canonicalize(temp.path()).unwrap();
let foo = temp.join("foo");
let bar = temp.join("bar");
let options = Options::default().with_root_directory(&temp).unwrap();
touch(&options.default_file, &[&temp, &foo, &bar]);
let link = Path::new("/bar");
let got = resolve_link(&foo, link, &options).unwrap();
assert_eq!(got, bar.join(&options.default_file));
}
#[test]
fn link_to_a_file_we_know_doesnt_exist() {
init_logging();
let temp = tempfile::tempdir().unwrap();
let temp = dunce::canonicalize(temp.path()).unwrap();
let options = Options::default().with_root_directory(&temp).unwrap();
let link = Path::new("./bar");
let err = resolve_link(&temp, link, &options).unwrap_err();
assert!(err.file_not_found());
}
#[test]
fn absolute_link_with_no_root_set_is_an_error() {
init_logging();
let temp = tempfile::tempdir().unwrap();
let temp = dunce::canonicalize(temp.path()).unwrap();
let options = Options::default();
let link = Path::new("/bar");
let err = resolve_link(&temp, link, &options).unwrap_err();
assert!(matches!(err, Reason::TraversesParentDirectories));
}
#[test]
fn a_link_that_is_allowed_to_traverse_the_root_dir() {
init_logging();
let temp = tempfile::tempdir().unwrap();
let temp = dunce::canonicalize(temp.path()).unwrap();
let foo = temp.join("foo");
let bar = temp.join("bar");
touch(Options::DEFAULT_FILE, &[&temp, &foo, &bar]);
let options = Options::default()
.with_root_directory(&foo)
.unwrap()
.set_links_may_traverse_the_root_directory(true);
let link = Path::new("../bar/index.html");
let got = resolve_link(&foo, link, &options).unwrap();
assert_eq!(got, bar.join("index.html"));
}
#[test]
fn markdown_files_can_be_used_as_html() {
init_logging();
let temp = tempfile::tempdir().unwrap();
let temp = dunce::canonicalize(temp.path()).unwrap();
touch("index.html", &[&temp]);
let link = "index.md";
let options = Options::default()
.set_alternate_extensions(Options::default_alternate_extensions());
let got = resolve_link(&temp, Path::new(link), &options).unwrap();
assert_eq!(got, temp.join("index.html"));
}
#[test]
fn join_paths() {
init_logging();
let temp = tempfile::tempdir().unwrap();
let temp = dunce::canonicalize(temp.path()).unwrap();
let foo = temp.join("foo");
let bar = foo.join("bar");
let baz = bar.join("baz");
let baz_index = baz.join("index.html");
touch("index.html", &[&temp, &foo, &bar, &baz]);
let options = Options::default().with_root_directory(&temp).unwrap();
let inputs = vec![
("/foo", &temp, &foo),
("foo", &temp, &foo),
("foo/bar", &temp, &bar),
("foo/bar/baz", &temp, &baz),
("/foo/bar/baz/index.html", &temp, &baz_index),
("bar/baz", &foo, &baz),
("baz", &bar, &baz),
("index.html", &baz, &baz_index),
];
for (link, base, should_be) in inputs {
let got = options.join(base, Path::new(link)).unwrap();
assert_eq!(got, *should_be);
}
}
}