use percent_encoding::percent_decode_str;
use reqwest::Url;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use crate::{
Base, BasicAuthCredentials, ErrorKind, LycheeResult, Request, RequestError, Uri,
basic_auth::BasicAuthExtractor,
types::{ResolvedInputSource, uri::raw::RawUri},
utils::{path, url},
};
pub(crate) fn extract_credentials(
extractor: Option<&BasicAuthExtractor>,
uri: &Uri,
) -> Option<BasicAuthCredentials> {
extractor.as_ref().and_then(|ext| ext.matches(uri))
}
fn create_request(
raw_uri: &RawUri,
source: &ResolvedInputSource,
root_dir: Option<&PathBuf>,
base: Option<&Base>,
extractor: Option<&BasicAuthExtractor>,
) -> LycheeResult<Request> {
let uri = try_parse_into_uri(raw_uri, source, root_dir, base)?;
let source = source.clone();
let element = raw_uri.element.clone();
let attribute = raw_uri.attribute.clone();
let credentials = extract_credentials(extractor, &uri);
Ok(Request::new(uri, source, element, attribute, credentials))
}
fn try_parse_into_uri(
raw_uri: &RawUri,
source: &ResolvedInputSource,
root_dir: Option<&PathBuf>,
base: Option<&Base>,
) -> LycheeResult<Uri> {
let text = prepend_root_dir_if_absolute_local_link(&raw_uri.text, root_dir);
let uri = match Uri::try_from(raw_uri.clone()) {
Ok(uri) => uri,
Err(_) => match base {
Some(base_url) => match base_url.join(&text) {
Some(url) => Uri { url },
None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
},
None => match source {
ResolvedInputSource::FsPath(root) => {
create_uri_from_file_path(root, &text, root_dir.is_none())?
}
_ => return Err(ErrorKind::UnsupportedUriType(text)),
},
},
};
Ok(uri)
}
pub(crate) fn is_anchor(text: &str) -> bool {
text.starts_with('#')
}
fn create_uri_from_file_path(
file_path: &Path,
link_text: &str,
ignore_absolute_local_links: bool,
) -> LycheeResult<Uri> {
let target_path = if is_anchor(link_text) {
let file_name = file_path
.file_name()
.and_then(|name| name.to_str())
.ok_or_else(|| ErrorKind::InvalidFile(file_path.to_path_buf()))?;
format!("{file_name}{link_text}")
} else {
link_text.to_string()
};
let Ok(constructed_url) =
resolve_and_create_url(file_path, &target_path, ignore_absolute_local_links)
else {
return Err(ErrorKind::InvalidPathToUri(target_path));
};
Ok(Uri {
url: constructed_url,
})
}
pub(crate) fn create(
uris: Vec<RawUri>,
source: &ResolvedInputSource,
root_dir: Option<&PathBuf>,
base: Option<&Base>,
extractor: Option<&BasicAuthExtractor>,
) -> Vec<Result<Request, RequestError>> {
let base = base.cloned().or_else(|| Base::from_source(source));
let mut requests = HashSet::<Request>::new();
let mut errors = Vec::<RequestError>::new();
for raw_uri in uris {
let result = create_request(&raw_uri, source, root_dir, base.as_ref(), extractor);
match result {
Ok(request) => {
requests.insert(request);
}
Err(e) => errors.push(RequestError::CreateRequestItem(
raw_uri.clone(),
source.clone(),
e,
)),
}
}
(requests.into_iter().map(Result::Ok))
.chain(errors.into_iter().map(Result::Err))
.collect()
}
fn resolve_and_create_url(
src_path: &Path,
dest_path: &str,
ignore_absolute_local_links: bool,
) -> LycheeResult<Url> {
let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path);
let decoded_dest = percent_decode_str(dest_path).decode_utf8()?;
let Ok(Some(resolved_path)) = path::resolve(
src_path,
&PathBuf::from(&*decoded_dest),
ignore_absolute_local_links,
) else {
return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string()));
};
let Ok(mut url) = Url::from_file_path(&resolved_path) else {
return Err(ErrorKind::InvalidUrlFromPath(resolved_path.clone()));
};
url.set_fragment(fragment);
Ok(url)
}
fn prepend_root_dir_if_absolute_local_link(text: &str, root_dir: Option<&PathBuf>) -> String {
if text.starts_with('/')
&& let Some(path) = root_dir
&& let Some(path_str) = path.to_str()
{
return format!("{path_str}{text}");
}
text.to_string()
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use std::num::NonZeroUsize;
use crate::types::uri::raw::RawUriSpan;
use super::*;
fn create_ok_only(
uris: Vec<RawUri>,
source: &ResolvedInputSource,
root_dir: Option<&PathBuf>,
base: Option<&Base>,
extractor: Option<&BasicAuthExtractor>,
) -> Vec<Request> {
create(uris, source, root_dir, base, extractor)
.into_iter()
.filter_map(Result::ok)
.collect()
}
fn raw_uri(text: &'static str) -> RawUri {
RawUri {
text: text.to_string(),
element: None,
attribute: None,
span: RawUriSpan {
line: NonZeroUsize::MAX,
column: None,
},
}
}
#[test]
fn test_is_anchor() {
assert!(is_anchor("#anchor"));
assert!(!is_anchor("notan#anchor"));
}
#[test]
fn test_create_uri_from_path() {
let result =
resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", true).unwrap();
assert_eq!(result.as_str(), "file:///test+encoding");
}
#[test]
fn test_relative_url_resolution() {
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let uris = vec![raw_uri("relative.html")];
let requests = create_ok_only(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
);
}
#[test]
fn test_absolute_url_resolution() {
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let uris = vec![raw_uri("https://another.com/page")];
let requests = create_ok_only(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://another.com/page")
);
}
#[test]
fn test_root_relative_url_resolution() {
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let uris = vec![raw_uri("/root-relative")];
let requests = create_ok_only(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/root-relative")
);
}
#[test]
fn test_parent_directory_url_resolution() {
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let uris = vec![raw_uri("../parent")];
let requests = create_ok_only(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/parent")
);
}
#[test]
fn test_fragment_url_resolution() {
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let uris = vec![raw_uri("#fragment")];
let requests = create_ok_only(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
);
}
#[test]
fn test_relative_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("relative.html")];
let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///some/relative.html")
);
}
#[test]
fn test_absolute_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("https://another.com/page")];
let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://another.com/page")
);
}
#[test]
fn test_root_relative_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("/root-relative")];
let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative")
);
}
#[test]
fn test_parent_directory_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("../parent")];
let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///parent")
);
}
#[test]
fn test_fragment_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("#fragment")];
let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment")
);
}
#[test]
fn test_relative_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("relative.html")];
let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
);
}
#[test]
fn test_absolute_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("https://another.com/page")];
let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://another.com/page")
);
}
#[test]
fn test_root_relative_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("/root-relative")];
let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/tmp/lychee/root-relative")
);
}
#[test]
fn test_parent_directory_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("../parent")];
let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/parent")
);
}
#[test]
fn test_fragment_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![raw_uri("#fragment")];
let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
);
}
#[test]
fn test_no_base_url_resolution() {
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let uris = vec![raw_uri("https://example.com/page")];
let requests = create_ok_only(uris, &source, None, None, None);
assert_eq!(requests.len(), 1);
assert!(
requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/page")
);
}
#[test]
fn test_create_request_from_relative_file_path() {
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let input_source = ResolvedInputSource::FsPath(PathBuf::from("page.html"));
let actual = create_request(
&raw_uri("file.html"),
&input_source,
None,
Some(&base),
None,
)
.unwrap();
assert_eq!(
actual,
Request::new(
Uri {
url: Url::from_file_path("/tmp/lychee/file.html").unwrap()
},
input_source,
None,
None,
None,
)
);
}
#[test]
fn test_create_request_from_relative_file_path_errors() {
assert!(
create_request(
&raw_uri("file.html"),
&ResolvedInputSource::Stdin,
None,
None,
None,
)
.is_err()
);
assert!(
create_request(
&raw_uri("/file.html"),
&ResolvedInputSource::FsPath(PathBuf::from("page.html")),
None,
None,
None,
)
.is_err()
);
}
#[test]
fn test_create_request_from_absolute_file_path() {
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let input_source = ResolvedInputSource::FsPath(PathBuf::from("/tmp/lychee/page.html"));
let actual = create_request(
&raw_uri("/usr/local/share/doc/example.html"),
&input_source,
None,
Some(&base),
None,
)
.unwrap();
assert_eq!(
actual,
Request::new(
Uri {
url: Url::from_file_path("/usr/local/share/doc/example.html").unwrap()
},
input_source,
None,
None,
None,
)
);
}
#[test]
fn test_parse_relative_path_into_uri() {
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let raw_uri = raw_uri("relative.html");
let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
}
#[test]
fn test_parse_absolute_path_into_uri() {
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let source = ResolvedInputSource::String(Cow::Borrowed(""));
let raw_uri = raw_uri("absolute.html");
let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
}
#[test]
fn test_prepend_with_absolute_local_link_and_root_dir() {
let text = "/absolute/path";
let root_dir = PathBuf::from("/root");
let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
assert_eq!(result, "/root/absolute/path");
}
#[test]
fn test_prepend_with_absolute_local_link_and_no_root_dir() {
let text = "/absolute/path";
let result = prepend_root_dir_if_absolute_local_link(text, None);
assert_eq!(result, "/absolute/path");
}
#[test]
fn test_prepend_with_relative_link_and_root_dir() {
let text = "relative/path";
let root_dir = PathBuf::from("/root");
let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
assert_eq!(result, "relative/path");
}
#[test]
fn test_prepend_with_relative_link_and_no_root_dir() {
let text = "relative/path";
let result = prepend_root_dir_if_absolute_local_link(text, None);
assert_eq!(result, "relative/path");
}
}