use std::collections::HashMap;
use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub struct BaseDirective {
pub base_iri: String,
}
impl BaseDirective {
pub fn new(iri: String) -> Self {
Self { base_iri: iri }
}
pub fn as_str(&self) -> &str {
&self.base_iri
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum IriError {
NoBase,
InvalidBase(String),
UnknownPrefix(String),
MalformedIri(String),
}
impl fmt::Display for IriError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IriError::NoBase => write!(f, "No base IRI set for relative IRI resolution"),
IriError::InvalidBase(s) => write!(f, "Invalid base IRI: '{s}'"),
IriError::UnknownPrefix(p) => write!(f, "Unknown prefix: '{p}:'"),
IriError::MalformedIri(s) => write!(f, "Malformed IRI: '{s}'"),
}
}
}
impl std::error::Error for IriError {}
#[derive(Debug, Default, Clone)]
pub struct IriResolver {
pub base: Option<BaseDirective>,
pub prefixes: HashMap<String, String>,
}
impl IriResolver {
pub fn new() -> Self {
Self::default()
}
fn is_absolute(iri: &str) -> bool {
iri.contains("://") || iri.starts_with("urn:")
}
pub fn set_base(&mut self, iri: &str) -> Result<(), IriError> {
if iri.starts_with("http://") || iri.starts_with("https://") || iri.starts_with("urn:") {
self.base = Some(BaseDirective::new(iri.to_string()));
Ok(())
} else {
Err(IriError::InvalidBase(iri.to_string()))
}
}
pub fn set_prefix(&mut self, prefix: &str, iri: &str) {
self.prefixes.insert(prefix.to_string(), iri.to_string());
}
pub fn resolve_relative(&self, iri: &str) -> Result<String, IriError> {
let iri = if iri.starts_with('<') && iri.ends_with('>') {
&iri[1..iri.len() - 1]
} else {
iri
};
if Self::is_absolute(iri) {
return Ok(iri.to_string());
}
let base = self.base.as_ref().ok_or(IriError::NoBase)?;
let base_str = base.as_str();
if iri.starts_with("//") {
let scheme = base_str
.split("://")
.next()
.ok_or_else(|| IriError::InvalidBase(base_str.to_string()))?;
return Ok(format!("{scheme}:{iri}"));
}
if iri.starts_with('#') {
let base_no_fragment = base_str.split('#').next().unwrap_or(base_str);
return Ok(format!("{base_no_fragment}{iri}"));
}
if iri.starts_with('?') {
let base_no_query = base_str.split('?').next().unwrap_or(base_str);
let base_no_frag = base_no_query.split('#').next().unwrap_or(base_no_query);
return Ok(format!("{base_no_frag}{iri}"));
}
if iri.starts_with('/') {
let authority_end = Self::authority_end(base_str);
let authority = &base_str[..authority_end];
return Ok(format!("{authority}{iri}"));
}
let base_dir = Self::base_directory(base_str);
let merged = format!("{base_dir}{iri}");
Ok(Self::remove_dot_segments(&merged))
}
fn authority_end(base: &str) -> usize {
if let Some(sep) = base.find("://") {
let after_scheme = sep + 3;
if let Some(slash) = base[after_scheme..].find('/') {
after_scheme + slash
} else {
base.len()
}
} else if base.starts_with("urn:") {
base.len()
} else {
0
}
}
fn base_directory(base: &str) -> &str {
if let Some(pos) = base.rfind('/') {
&base[..=pos]
} else {
base
}
}
fn remove_dot_segments(path: &str) -> String {
let (prefix, rest) = if let Some(sep) = path.find("://") {
let after = sep + 3;
if let Some(slash) = path[after..].find('/') {
let split = after + slash;
(&path[..split], &path[split..])
} else {
(path, "")
}
} else {
("", path)
};
let mut output: Vec<&str> = Vec::new();
for seg in rest.split('/') {
match seg {
"" if output.is_empty() => {
}
"." => {}
".." => {
output.pop();
}
s => output.push(s),
}
}
if prefix.is_empty() {
format!("/{}", output.join("/"))
} else {
format!("{prefix}/{}", output.join("/"))
}
}
pub fn resolve_prefixed(&self, qname: &str) -> Result<String, IriError> {
let colon = qname
.find(':')
.ok_or_else(|| IriError::MalformedIri(format!("No ':' in prefixed name: '{qname}'")))?;
let prefix = &qname[..colon];
let local = &qname[colon + 1..];
let ns = self
.prefixes
.get(prefix)
.ok_or_else(|| IriError::UnknownPrefix(prefix.to_string()))?;
Ok(format!("{ns}{local}"))
}
pub fn resolve(&self, token: &str) -> Result<String, IriError> {
let stripped = if token.starts_with('<') && token.ends_with('>') {
&token[1..token.len() - 1]
} else {
token
};
if Self::is_absolute(stripped) {
return Ok(stripped.to_string());
}
if !token.starts_with('<') && token.contains(':') {
return self.resolve_prefixed(token);
}
self.resolve_relative(token)
}
pub fn parse_base_directive(line: &str) -> Option<String> {
let trimmed = line.trim();
let upper = trimmed.to_ascii_uppercase();
let rest = if upper.starts_with("@BASE") {
&trimmed[5..]
} else if upper.starts_with("BASE") {
&trimmed[4..]
} else {
return None;
};
Self::extract_iri(rest.trim())
}
pub fn parse_prefix_directive(line: &str) -> Option<(String, String)> {
let trimmed = line.trim();
let upper = trimmed.to_ascii_uppercase();
let rest = if upper.starts_with("@PREFIX") {
&trimmed[7..]
} else if upper.starts_with("PREFIX") {
&trimmed[6..]
} else {
return None;
};
let rest = rest.trim();
let colon = rest.find(':')?;
let prefix = rest[..colon].trim().to_string();
let after_colon = rest[colon + 1..].trim();
let iri = Self::extract_iri(after_colon)?;
Some((prefix, iri))
}
fn extract_iri(s: &str) -> Option<String> {
let s = s.trim();
if !s.starts_with('<') {
return None;
}
let end = s.find('>')?;
Some(s[1..end].to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_set_base_http() {
let mut r = IriResolver::new();
assert!(r.set_base("http://example.org/").is_ok());
assert_eq!(
r.base.as_ref().map(|b| b.as_str()),
Some("http://example.org/")
);
}
#[test]
fn test_set_base_https() {
let mut r = IriResolver::new();
assert!(r.set_base("https://example.org/").is_ok());
}
#[test]
fn test_set_base_urn() {
let mut r = IriResolver::new();
assert!(r.set_base("urn:example:foo").is_ok());
}
#[test]
fn test_set_base_invalid() {
let mut r = IriResolver::new();
let e = r.set_base("relative/path").expect_err("should fail");
assert!(matches!(e, IriError::InvalidBase(_)));
}
#[test]
fn test_resolve_absolute_passthrough() {
let r = IriResolver::new();
let result = r.resolve_relative("http://other.org/foo").expect("resolve");
assert_eq!(result, "http://other.org/foo");
}
#[test]
fn test_resolve_angle_bracket_absolute() {
let r = IriResolver::new();
let result = r
.resolve_relative("<http://other.org/foo>")
.expect("resolve");
assert_eq!(result, "http://other.org/foo");
}
#[test]
fn test_resolve_relative_path() {
let mut r = IriResolver::new();
r.set_base("http://example.org/base/").expect("set_base");
let result = r.resolve_relative("foo").expect("resolve");
assert_eq!(result, "http://example.org/base/foo");
}
#[test]
fn test_resolve_relative_path_no_trailing_slash() {
let mut r = IriResolver::new();
r.set_base("http://example.org/base/doc").expect("set_base");
let result = r.resolve_relative("other").expect("resolve");
assert!(result.contains("base/other"));
}
#[test]
fn test_resolve_fragment() {
let mut r = IriResolver::new();
r.set_base("http://example.org/doc").expect("set_base");
let result = r.resolve_relative("#section1").expect("resolve");
assert_eq!(result, "http://example.org/doc#section1");
}
#[test]
fn test_resolve_fragment_strips_existing_fragment() {
let mut r = IriResolver::new();
r.set_base("http://example.org/doc#old").expect("set_base");
let result = r.resolve_relative("#new").expect("resolve");
assert_eq!(result, "http://example.org/doc#new");
}
#[test]
fn test_resolve_query() {
let mut r = IriResolver::new();
r.set_base("http://example.org/search?q=foo")
.expect("set_base");
let result = r.resolve_relative("?q=bar").expect("resolve");
assert_eq!(result, "http://example.org/search?q=bar");
}
#[test]
fn test_resolve_absolute_path() {
let mut r = IriResolver::new();
r.set_base("http://example.org/a/b/c").expect("set_base");
let result = r.resolve_relative("/root").expect("resolve");
assert_eq!(result, "http://example.org/root");
}
#[test]
fn test_resolve_no_base_error() {
let r = IriResolver::new();
let e = r.resolve_relative("relative").expect_err("no base");
assert_eq!(e, IriError::NoBase);
}
#[test]
fn test_resolve_dot_dot_segments() {
let mut r = IriResolver::new();
r.set_base("http://example.org/a/b/c/").expect("set_base");
let result = r.resolve_relative("../../d").expect("resolve");
assert!(result.contains("/a/d"));
}
#[test]
fn test_resolve_urn_absolute() {
let r = IriResolver::new();
let result = r.resolve_relative("urn:example:thing").expect("resolve");
assert_eq!(result, "urn:example:thing");
}
#[test]
fn test_resolve_prefixed_basic() {
let mut r = IriResolver::new();
r.set_prefix("ex", "http://example.org/");
let result = r.resolve_prefixed("ex:Person").expect("resolve");
assert_eq!(result, "http://example.org/Person");
}
#[test]
fn test_resolve_prefixed_empty_local() {
let mut r = IriResolver::new();
r.set_prefix("ex", "http://example.org/");
let result = r.resolve_prefixed("ex:").expect("resolve");
assert_eq!(result, "http://example.org/");
}
#[test]
fn test_resolve_prefixed_unknown_prefix() {
let r = IriResolver::new();
let e = r.resolve_prefixed("unknown:thing").expect_err("error");
assert!(matches!(e, IriError::UnknownPrefix(_)));
}
#[test]
fn test_resolve_prefixed_no_colon() {
let r = IriResolver::new();
let e = r.resolve_prefixed("nocolon").expect_err("error");
assert!(matches!(e, IriError::MalformedIri(_)));
}
#[test]
fn test_resolve_absolute_iri() {
let r = IriResolver::new();
let result = r.resolve("http://example.org/foo").expect("resolve");
assert_eq!(result, "http://example.org/foo");
}
#[test]
fn test_resolve_prefixed_via_resolve() {
let mut r = IriResolver::new();
r.set_prefix("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
let result = r.resolve("rdf:type").expect("resolve");
assert_eq!(result, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
}
#[test]
fn test_resolve_bracketed_iri() {
let mut r = IriResolver::new();
r.set_base("http://example.org/").expect("set_base");
let result = r.resolve("<relative>").expect("resolve");
assert_eq!(result, "http://example.org/relative");
}
#[test]
fn test_parse_base_turtle_style() {
let result = IriResolver::parse_base_directive("@base <http://example.org/> .");
assert_eq!(result, Some("http://example.org/".to_string()));
}
#[test]
fn test_parse_base_sparql_style() {
let result = IriResolver::parse_base_directive("BASE <http://example.org/>");
assert_eq!(result, Some("http://example.org/".to_string()));
}
#[test]
fn test_parse_base_case_insensitive() {
let result = IriResolver::parse_base_directive("base <http://example.org/>");
assert_eq!(result, Some("http://example.org/".to_string()));
}
#[test]
fn test_parse_base_no_match() {
let result = IriResolver::parse_base_directive("PREFIX ex: <http://example.org/>");
assert!(result.is_none());
}
#[test]
fn test_parse_prefix_turtle_style() {
let result = IriResolver::parse_prefix_directive("@prefix ex: <http://example.org/> .");
assert_eq!(
result,
Some(("ex".to_string(), "http://example.org/".to_string()))
);
}
#[test]
fn test_parse_prefix_sparql_style() {
let result = IriResolver::parse_prefix_directive(
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
);
assert_eq!(
result,
Some((
"rdf".to_string(),
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string()
))
);
}
#[test]
fn test_parse_prefix_case_insensitive() {
let result = IriResolver::parse_prefix_directive("prefix ex: <http://example.org/>");
assert!(result.is_some());
}
#[test]
fn test_parse_prefix_empty_prefix() {
let result = IriResolver::parse_prefix_directive("@prefix : <http://default.org/> .");
assert_eq!(
result,
Some(("".to_string(), "http://default.org/".to_string()))
);
}
#[test]
fn test_parse_prefix_no_match() {
let result = IriResolver::parse_prefix_directive("@base <http://example.org/>");
assert!(result.is_none());
}
#[test]
fn test_iri_error_no_base_display() {
let e = IriError::NoBase;
assert!(!e.to_string().is_empty());
}
#[test]
fn test_iri_error_invalid_base_display() {
let e = IriError::InvalidBase("bad".to_string());
assert!(e.to_string().contains("bad"));
}
#[test]
fn test_iri_error_unknown_prefix_display() {
let e = IriError::UnknownPrefix("ex".to_string());
assert!(e.to_string().contains("ex"));
}
#[test]
fn test_iri_error_malformed_display() {
let e = IriError::MalformedIri(":::".to_string());
assert!(e.to_string().contains(":::"));
}
#[test]
fn test_resolve_document_iris() {
let mut r = IriResolver::new();
r.set_base("http://example.org/ontology/")
.expect("set_base");
r.set_prefix("owl", "http://www.w3.org/2002/07/owl#");
r.set_prefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
let c = r.resolve("owl:Class").expect("owl:Class");
assert_eq!(c, "http://www.w3.org/2002/07/owl#Class");
let rel = r.resolve("<Person>").expect("<Person>");
assert_eq!(rel, "http://example.org/ontology/Person");
let abs = r.resolve("http://other.org/Thing").expect("absolute");
assert_eq!(abs, "http://other.org/Thing");
}
}