use roxmltree::Node;
const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
pub(crate) fn compute_effective_xml_base(
start: Node<'_, '_>,
node_set: Option<&dyn Fn(Node) -> bool>,
) -> Option<String> {
let mut bases: Vec<&str> = Vec::new();
let mut current = Some(start);
while let Some(n) = current {
if n.is_element() {
if let Some(pred) = node_set
&& pred(n)
{
if let Some(base) = xml_base_value(n) {
bases.push(base);
}
break;
}
if let Some(base) = xml_base_value(n) {
bases.push(base);
}
}
current = n.parent();
}
if bases.is_empty() {
return None;
}
bases.reverse();
let mut effective = bases[0].to_string();
for &relative in &bases[1..] {
effective = resolve_uri(&effective, relative);
}
Some(effective)
}
fn xml_base_value<'a>(node: Node<'a, '_>) -> Option<&'a str> {
for attr in node.attributes() {
if attr.namespace() == Some(XML_NS) && attr.name() == "base" {
let value = attr.value();
if value.is_empty() {
return None;
}
return Some(value);
}
}
None
}
pub(crate) fn resolve_uri(base: &str, reference: &str) -> String {
if reference.is_empty() {
return base.to_string();
}
if has_scheme(reference) {
return reference.to_string();
}
let base_parts = match parse_base(base) {
Some(parts) => parts,
None => {
if reference.starts_with("//") || reference.starts_with('/') {
return reference.to_string();
}
let (ref_path, ref_suffix) = split_path_suffix(reference);
let base_path_only = strip_query_fragment(base);
let merged = merge_paths(base_path_only, ref_path);
let cleaned = remove_dot_segments(&merged);
return format!("{cleaned}{ref_suffix}");
}
};
let scheme = base_parts.scheme;
let authority = base_parts.authority;
let base_path = base_parts.path;
if reference.starts_with('?') || reference.starts_with('#') {
let base_no_qf = strip_query_fragment(base);
if reference.starts_with('?') {
return format!("{base_no_qf}{reference}");
}
let base_no_frag = base.split('#').next().unwrap_or(base);
return format!("{base_no_frag}{reference}");
}
let (ref_path, ref_suffix) = split_path_suffix(reference);
if let Some(rest) = ref_path.strip_prefix("//") {
let mut auth_end = rest.len();
for ch in ['/', '?', '#'] {
if let Some(pos) = rest.find(ch)
&& pos < auth_end
{
auth_end = pos;
}
}
let new_authority = &rest[..auth_end];
let new_path = remove_dot_segments(&rest[auth_end..]);
let mut result = recompose(scheme, Some(new_authority), &new_path);
result.push_str(ref_suffix);
return result;
}
if ref_path.starts_with('/') {
let cleaned = remove_dot_segments(ref_path);
let mut result = recompose(scheme, authority, &cleaned);
result.push_str(ref_suffix);
return result;
}
let clean_base_path = strip_query_fragment(base_path);
let merged = merge_paths(clean_base_path, ref_path);
let cleaned = remove_dot_segments(&merged);
let mut result = recompose(scheme, authority, &cleaned);
result.push_str(ref_suffix);
result
}
fn has_scheme(uri: &str) -> bool {
if let Some(colon_pos) = uri.find(':') {
colon_pos > 0
&& uri.as_bytes()[0].is_ascii_alphabetic()
&& uri[..colon_pos]
.bytes()
.all(|b| b.is_ascii_alphanumeric() || b == b'+' || b == b'-' || b == b'.')
} else {
false
}
}
struct BaseParts<'a> {
scheme: &'a str,
authority: Option<&'a str>,
path: &'a str,
}
fn parse_base(base: &str) -> Option<BaseParts<'_>> {
if !has_scheme(base) {
return None;
}
let scheme_end = base.find(':')?;
let scheme = &base[..scheme_end];
let mut rest = &base[scheme_end + 1..];
let mut authority = None;
if rest.starts_with("//") {
rest = &rest[2..];
let mut auth_end = rest.len();
for ch in ['/', '?', '#'] {
if let Some(pos) = rest.find(ch)
&& pos < auth_end
{
auth_end = pos;
}
}
authority = Some(&rest[..auth_end]);
rest = &rest[auth_end..];
}
Some(BaseParts {
scheme,
authority,
path: rest,
})
}
fn recompose(scheme: &str, authority: Option<&str>, path: &str) -> String {
match authority {
Some(auth) => format!("{scheme}://{auth}{path}"),
None => format!("{scheme}:{path}"),
}
}
fn split_path_suffix(reference: &str) -> (&str, &str) {
let mut split_at = reference.len();
for ch in ['?', '#'] {
if let Some(pos) = reference[1..].find(ch) {
let abs_pos = pos + 1;
if abs_pos < split_at {
split_at = abs_pos;
}
}
}
(&reference[..split_at], &reference[split_at..])
}
fn strip_query_fragment(s: &str) -> &str {
let end = s
.find('?')
.unwrap_or(s.len())
.min(s.find('#').unwrap_or(s.len()));
&s[..end]
}
fn merge_paths(base_path: &str, reference: &str) -> String {
if base_path.is_empty() {
format!("/{reference}")
} else {
match base_path.rfind('/') {
Some(pos) => format!("{}{reference}", &base_path[..=pos]),
None => reference.to_string(),
}
}
}
#[cfg(test)]
mod merge_tests {
use super::*;
#[test]
fn non_hierarchical_base_does_not_add_slash() {
assert_eq!(merge_paths("foo:bar", "baz"), "baz");
}
}
fn remove_dot_segments(path: &str) -> String {
let is_absolute = path.starts_with('/');
let mut segments: Vec<&str> = Vec::new();
for segment in path.split('/') {
match segment {
"." => {
}
".." => {
let can_pop = match segments.last() {
Some(&"") => false, Some(&"..") => false, Some(_) => true,
None => false,
};
if can_pop {
segments.pop();
} else if !is_absolute {
segments.push("..");
}
}
s => segments.push(s),
}
}
let mut result = segments.join("/");
if (path.ends_with("/.") || path.ends_with("/..")) && !result.ends_with('/') {
result.push('/');
}
result
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use roxmltree::Document;
#[test]
fn resolve_absolute_reference() {
assert_eq!(
resolve_uri("http://a.com/b", "http://other.com/c"),
"http://other.com/c"
);
}
#[test]
fn resolve_empty_reference() {
assert_eq!(resolve_uri("http://a.com/b/c", ""), "http://a.com/b/c");
}
#[test]
fn resolve_authority_override() {
assert_eq!(
resolve_uri("http://a.com/b", "//other.com/c"),
"http://other.com/c"
);
}
#[test]
fn resolve_absolute_path() {
assert_eq!(resolve_uri("http://a.com/b/c", "/d/e"), "http://a.com/d/e");
}
#[test]
fn resolve_relative_path_simple() {
assert_eq!(
resolve_uri("http://example.com/a/b/", "c/d"),
"http://example.com/a/b/c/d"
);
}
#[test]
fn resolve_relative_path_sibling() {
assert_eq!(
resolve_uri("http://example.com/a/b", "c"),
"http://example.com/a/c"
);
}
#[test]
fn resolve_relative_path_parent() {
assert_eq!(
resolve_uri("http://example.com/a/b/c", "../d"),
"http://example.com/a/d"
);
}
#[test]
fn resolve_relative_path_double_parent() {
assert_eq!(
resolve_uri("http://example.com/a/b/c/", "../../d"),
"http://example.com/a/d"
);
}
#[test]
fn resolve_root_base_with_relative() {
assert_eq!(
resolve_uri("http://example.com/", "sub/"),
"http://example.com/sub/"
);
}
#[test]
fn resolve_dot_current_dir() {
assert_eq!(
resolve_uri("http://example.com/a/b/", "./c"),
"http://example.com/a/b/c"
);
}
#[test]
fn resolve_schemeless_base_merges_paths() {
assert_eq!(resolve_uri("sub/dir/", "file.xml"), "sub/dir/file.xml");
assert_eq!(resolve_uri("a/b/", "../c"), "a/c");
assert_eq!(resolve_uri("a/b", "c"), "a/c");
}
#[test]
fn resolve_urn_reference() {
assert_eq!(
resolve_uri("http://example.com/a", "urn:foo:bar"),
"urn:foo:bar"
);
}
#[test]
fn resolve_parent_beyond_root() {
assert_eq!(
resolve_uri("http://example.com/a", "../../b"),
"http://example.com/b"
);
}
#[test]
fn resolve_file_scheme_no_authority() {
assert_eq!(
resolve_uri("file:///home/user/doc.xml", "sub/file.xml"),
"file:///home/user/sub/file.xml"
);
}
#[test]
fn resolve_base_with_query_fragment() {
assert_eq!(
resolve_uri("http://example.com/a/b?q=1#f", "c"),
"http://example.com/a/c"
);
}
#[test]
fn resolve_reference_with_query() {
assert_eq!(
resolve_uri("http://example.com/a/b", "c?x=1"),
"http://example.com/a/c?x=1"
);
}
#[test]
fn resolve_reference_with_fragment() {
assert_eq!(
resolve_uri("http://example.com/a/b", "c#frag"),
"http://example.com/a/c#frag"
);
}
#[test]
fn resolve_reference_with_query_and_fragment() {
assert_eq!(
resolve_uri("http://example.com/a/b", "c?q=1#f"),
"http://example.com/a/c?q=1#f"
);
}
#[test]
fn resolve_absolute_path_with_query() {
assert_eq!(
resolve_uri("http://example.com/a", "/b/c?q"),
"http://example.com/b/c?q"
);
}
#[test]
fn empty_xml_base_treated_as_absent() {
let xml = r#"<root xml:base=""><child xml:base="http://ex.com/"/></root>"#;
let doc = Document::parse(xml).unwrap();
let root = doc.root_element();
assert_eq!(xml_base_value(root), None);
let child = root.first_element_child().unwrap();
assert_eq!(xml_base_value(child), Some("http://ex.com/"));
}
#[test]
fn remove_dots_simple() {
assert_eq!(remove_dot_segments("/a/b/c"), "/a/b/c");
}
#[test]
fn remove_single_dot() {
assert_eq!(remove_dot_segments("/a/./b"), "/a/b");
}
#[test]
fn remove_double_dot() {
assert_eq!(remove_dot_segments("/a/b/../c"), "/a/c");
}
#[test]
fn remove_dots_trailing_slash() {
assert_eq!(remove_dot_segments("/a/b/.."), "/a/");
}
#[test]
fn remove_dots_at_root() {
assert_eq!(remove_dot_segments("/../a"), "/a");
}
#[test]
fn remove_dots_relative_leading_dotdot() {
assert_eq!(remove_dot_segments("../../a"), "../../a");
assert_eq!(remove_dot_segments("foo/../../bar"), "../bar");
}
#[test]
fn scheme_detection() {
assert!(has_scheme("http://example.com"));
assert!(has_scheme("https://x"));
assert!(has_scheme("urn:foo:bar"));
assert!(has_scheme("ftp://a"));
assert!(!has_scheme("/a/b"));
assert!(!has_scheme("a/b"));
assert!(!has_scheme(""));
assert!(!has_scheme("://bad"));
}
#[test]
fn effective_base_single_ancestor() {
let xml = r#"<root xml:base="http://example.com/"><child/></root>"#;
let doc = Document::parse(xml).unwrap();
let child = doc.root_element().first_element_child().unwrap();
let base = compute_effective_xml_base(child.parent().unwrap(), None);
assert_eq!(base.as_deref(), Some("http://example.com/"));
}
#[test]
fn effective_base_chain_resolved() {
let xml = r#"<a xml:base="http://example.com/"><b xml:base="sub/"><c/></b></a>"#;
let doc = Document::parse(xml).unwrap();
let a = doc.root_element();
let b = a.first_element_child().unwrap();
let c = b.first_element_child().unwrap();
let base = compute_effective_xml_base(c.parent().unwrap(), None);
assert_eq!(base.as_deref(), Some("http://example.com/sub/"));
}
#[test]
fn effective_base_three_levels() {
let xml =
r#"<a xml:base="http://ex.com/"><b xml:base="x/"><c xml:base="y/"><d/></c></b></a>"#;
let doc = Document::parse(xml).unwrap();
let a = doc.root_element();
let b = a.first_element_child().unwrap();
let c = b.first_element_child().unwrap();
let d = c.first_element_child().unwrap();
let base = compute_effective_xml_base(d.parent().unwrap(), None);
assert_eq!(base.as_deref(), Some("http://ex.com/x/y/"));
}
#[test]
fn effective_base_none_when_no_xml_base() {
let xml = r#"<root><child/></root>"#;
let doc = Document::parse(xml).unwrap();
let child = doc.root_element().first_element_child().unwrap();
assert_eq!(
compute_effective_xml_base(child.parent().unwrap(), None),
None
);
}
#[test]
fn effective_base_with_dotdot() {
let xml = r#"<a xml:base="http://example.com/a/b/"><b xml:base="../c/"><d/></b></a>"#;
let doc = Document::parse(xml).unwrap();
let a = doc.root_element();
let b = a.first_element_child().unwrap();
let d = b.first_element_child().unwrap();
let base = compute_effective_xml_base(d.parent().unwrap(), None);
assert_eq!(base.as_deref(), Some("http://example.com/a/c/"));
}
}