use std::fmt;
use crate::tree::{Document, NodeKind};
const CATALOG_NAMESPACE: &str = "urn:oasis:names:tc:entity:xmlns:xml:catalog";
#[derive(Debug, Clone)]
pub struct Catalog {
entries: Vec<CatalogEntry>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CatalogEntry {
Public {
public_id: String,
uri: String,
},
System {
system_id: String,
uri: String,
},
RewriteSystem {
start: String,
rewrite_prefix: String,
},
RewriteUri {
start: String,
rewrite_prefix: String,
},
Uri {
name: String,
uri: String,
},
DelegatePublic {
start: String,
catalog: String,
},
DelegateSystem {
start: String,
catalog: String,
},
NextCatalog {
catalog: String,
},
SystemSuffix {
suffix: String,
uri: String,
},
UriSuffix {
suffix: String,
uri: String,
},
}
#[derive(Debug, Clone)]
pub struct CatalogError {
pub message: String,
}
impl fmt::Display for CatalogError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "catalog error: {}", self.message)
}
}
impl std::error::Error for CatalogError {}
impl Catalog {
#[must_use]
pub fn new() -> Self {
Self {
entries: Vec::new(),
}
}
pub fn parse(xml: &str) -> Result<Self, CatalogError> {
let doc = Document::parse_str(xml).map_err(|e| CatalogError {
message: format!("failed to parse catalog XML: {e}"),
})?;
let root_element = doc.root_element().ok_or_else(|| CatalogError {
message: "catalog document has no root element".to_string(),
})?;
let root_name = doc.node_name(root_element).unwrap_or("");
let root_ns = doc.node_namespace(root_element);
if root_name != "catalog" {
return Err(CatalogError {
message: format!("expected root element 'catalog', found '{root_name}'"),
});
}
if root_ns != Some(CATALOG_NAMESPACE) {
return Err(CatalogError {
message: format!("root element must be in namespace '{CATALOG_NAMESPACE}'"),
});
}
let mut catalog = Self::new();
for child in doc.children(root_element) {
if let NodeKind::Element { ref name, .. } = doc.node(child).kind {
if let Some(entry) = parse_catalog_entry(&doc, child, name)? {
catalog.entries.push(entry);
}
}
}
Ok(catalog)
}
pub fn add_entry(&mut self, entry: CatalogEntry) {
self.entries.push(entry);
}
#[must_use]
pub fn resolve_public(&self, public_id: &str) -> Option<String> {
let normalized = normalize_public_id(public_id);
for entry in &self.entries {
if let CatalogEntry::Public {
public_id: ref pid,
ref uri,
} = *entry
{
if normalize_public_id(pid) == normalized {
return Some(uri.clone());
}
}
}
let mut best_delegate: Option<(&str, usize)> = None;
for entry in &self.entries {
if let CatalogEntry::DelegatePublic {
ref start,
ref catalog,
} = *entry
{
if normalized.starts_with(start.as_str())
&& start.len() > best_delegate.map_or(0, |(_, len)| len)
{
best_delegate = Some((catalog.as_str(), start.len()));
}
}
}
if let Some((catalog_uri, _)) = best_delegate {
return Some(catalog_uri.to_string());
}
None
}
#[must_use]
pub fn resolve_system(&self, system_id: &str) -> Option<String> {
for entry in &self.entries {
if let CatalogEntry::System {
system_id: ref sid,
ref uri,
} = *entry
{
if sid == system_id {
return Some(uri.clone());
}
}
}
if let Some(result) = self.resolve_rewrite_system(system_id) {
return Some(result);
}
if let Some(result) = self.resolve_system_suffix(system_id) {
return Some(result);
}
let mut best_delegate: Option<(&str, usize)> = None;
for entry in &self.entries {
if let CatalogEntry::DelegateSystem {
ref start,
ref catalog,
} = *entry
{
if system_id.starts_with(start.as_str())
&& start.len() > best_delegate.map_or(0, |(_, len)| len)
{
best_delegate = Some((catalog.as_str(), start.len()));
}
}
}
if let Some((catalog_uri, _)) = best_delegate {
return Some(catalog_uri.to_string());
}
None
}
#[must_use]
pub fn resolve_uri(&self, uri: &str) -> Option<String> {
for entry in &self.entries {
if let CatalogEntry::Uri {
ref name,
uri: ref target,
} = *entry
{
if name == uri {
return Some(target.clone());
}
}
}
if let Some(result) = self.resolve_rewrite_uri(uri) {
return Some(result);
}
let mut best_suffix: Option<(&str, usize)> = None;
for entry in &self.entries {
if let CatalogEntry::UriSuffix {
ref suffix,
uri: ref target,
} = *entry
{
if uri.ends_with(suffix.as_str())
&& suffix.len() > best_suffix.map_or(0, |(_, len)| len)
{
best_suffix = Some((target.as_str(), suffix.len()));
}
}
}
if let Some((target, _)) = best_suffix {
return Some(target.to_string());
}
None
}
#[must_use]
pub fn resolve(&self, public_id: Option<&str>, system_id: Option<&str>) -> Option<String> {
if let Some(sid) = system_id {
if let Some(resolved) = self.resolve_system(sid) {
return Some(resolved);
}
}
if let Some(pid) = public_id {
if let Some(resolved) = self.resolve_public(pid) {
return Some(resolved);
}
}
None
}
pub fn merge(&mut self, other: &Catalog) {
self.entries.extend(other.entries.iter().cloned());
}
#[must_use]
pub fn len(&self) -> usize {
self.entries.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn entries(&self) -> impl Iterator<Item = &CatalogEntry> {
self.entries.iter()
}
fn resolve_rewrite_system(&self, system_id: &str) -> Option<String> {
let mut best: Option<(&str, &str, usize)> = None;
for entry in &self.entries {
if let CatalogEntry::RewriteSystem {
ref start,
ref rewrite_prefix,
} = *entry
{
if system_id.starts_with(start.as_str())
&& start.len() > best.map_or(0, |(_, _, len)| len)
{
best = Some((start.as_str(), rewrite_prefix.as_str(), start.len()));
}
}
}
best.map(|(start, rewrite_prefix, _)| {
format!("{rewrite_prefix}{}", &system_id[start.len()..])
})
}
fn resolve_system_suffix(&self, system_id: &str) -> Option<String> {
let mut best: Option<(&str, usize)> = None;
for entry in &self.entries {
if let CatalogEntry::SystemSuffix {
ref suffix,
ref uri,
} = *entry
{
if system_id.ends_with(suffix.as_str())
&& suffix.len() > best.map_or(0, |(_, len)| len)
{
best = Some((uri.as_str(), suffix.len()));
}
}
}
best.map(|(uri, _)| uri.to_string())
}
fn resolve_rewrite_uri(&self, uri: &str) -> Option<String> {
let mut best: Option<(&str, &str, usize)> = None;
for entry in &self.entries {
if let CatalogEntry::RewriteUri {
ref start,
ref rewrite_prefix,
} = *entry
{
if uri.starts_with(start.as_str())
&& start.len() > best.map_or(0, |(_, _, len)| len)
{
best = Some((start.as_str(), rewrite_prefix.as_str(), start.len()));
}
}
}
best.map(|(start, rewrite_prefix, _)| format!("{rewrite_prefix}{}", &uri[start.len()..]))
}
}
impl Default for Catalog {
fn default() -> Self {
Self::new()
}
}
fn normalize_public_id(public_id: &str) -> String {
public_id.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn parse_catalog_entry(
doc: &Document,
node: crate::NodeId,
name: &str,
) -> Result<Option<CatalogEntry>, CatalogError> {
match name {
"public" => {
let public_id = require_attr(doc, node, "publicId", "public")?;
let uri = require_attr(doc, node, "uri", "public")?;
Ok(Some(CatalogEntry::Public { public_id, uri }))
}
"system" => {
let system_id = require_attr(doc, node, "systemId", "system")?;
let uri = require_attr(doc, node, "uri", "system")?;
Ok(Some(CatalogEntry::System { system_id, uri }))
}
"rewriteSystem" => {
let start = require_attr(doc, node, "systemIdStartString", "rewriteSystem")?;
let rewrite_prefix = require_attr(doc, node, "rewritePrefix", "rewriteSystem")?;
Ok(Some(CatalogEntry::RewriteSystem {
start,
rewrite_prefix,
}))
}
"rewriteURI" => {
let start = require_attr(doc, node, "uriStartString", "rewriteURI")?;
let rewrite_prefix = require_attr(doc, node, "rewritePrefix", "rewriteURI")?;
Ok(Some(CatalogEntry::RewriteUri {
start,
rewrite_prefix,
}))
}
"uri" => {
let name = require_attr(doc, node, "name", "uri")?;
let uri = require_attr(doc, node, "uri", "uri")?;
Ok(Some(CatalogEntry::Uri { name, uri }))
}
"delegatePublic" => {
let start = require_attr(doc, node, "publicIdStartString", "delegatePublic")?;
let catalog = require_attr(doc, node, "catalog", "delegatePublic")?;
Ok(Some(CatalogEntry::DelegatePublic { start, catalog }))
}
"delegateSystem" => {
let start = require_attr(doc, node, "systemIdStartString", "delegateSystem")?;
let catalog = require_attr(doc, node, "catalog", "delegateSystem")?;
Ok(Some(CatalogEntry::DelegateSystem { start, catalog }))
}
"nextCatalog" => {
let catalog = require_attr(doc, node, "catalog", "nextCatalog")?;
Ok(Some(CatalogEntry::NextCatalog { catalog }))
}
"systemSuffix" => {
let suffix = require_attr(doc, node, "systemIdSuffix", "systemSuffix")?;
let uri = require_attr(doc, node, "uri", "systemSuffix")?;
Ok(Some(CatalogEntry::SystemSuffix { suffix, uri }))
}
"uriSuffix" => {
let suffix = require_attr(doc, node, "uriSuffix", "uriSuffix")?;
let uri = require_attr(doc, node, "uri", "uriSuffix")?;
Ok(Some(CatalogEntry::UriSuffix { suffix, uri }))
}
_ => Ok(None),
}
}
fn require_attr(
doc: &Document,
node: crate::NodeId,
attr_name: &str,
element_name: &str,
) -> Result<String, CatalogError> {
doc.attribute(node, attr_name)
.map(ToString::to_string)
.ok_or_else(|| CatalogError {
message: format!(
"missing required attribute '{attr_name}' on <{element_name}> element"
),
})
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
fn catalog_xml(body: &str) -> String {
format!(r#"<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">{body}</catalog>"#)
}
#[test]
fn test_parse_simple_catalog_with_public_entry() {
let xml = catalog_xml(
r#"<public publicId="-//W3C//DTD XHTML 1.0 Strict//EN" uri="dtd/xhtml1-strict.dtd"/>"#,
);
let catalog = Catalog::parse(&xml).unwrap();
assert_eq!(catalog.len(), 1);
assert_eq!(
catalog.entries().next(),
Some(&CatalogEntry::Public {
public_id: "-//W3C//DTD XHTML 1.0 Strict//EN".to_string(),
uri: "dtd/xhtml1-strict.dtd".to_string(),
})
);
}
#[test]
fn test_parse_catalog_with_system_entry() {
let xml = catalog_xml(
r#"<system systemId="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" uri="dtd/xhtml1-strict.dtd"/>"#,
);
let catalog = Catalog::parse(&xml).unwrap();
assert_eq!(catalog.len(), 1);
assert_eq!(
catalog.entries().next(),
Some(&CatalogEntry::System {
system_id: "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".to_string(),
uri: "dtd/xhtml1-strict.dtd".to_string(),
})
);
}
#[test]
fn test_parse_catalog_with_rewrite_entries() {
let xml = catalog_xml(
r#"<rewriteSystem systemIdStartString="http://www.w3.org/TR/" rewritePrefix="file:///usr/share/xml/w3c/"/>
<rewriteURI uriStartString="http://example.com/" rewritePrefix="file:///local/"/>"#,
);
let catalog = Catalog::parse(&xml).unwrap();
assert_eq!(catalog.len(), 2);
}
#[test]
fn test_resolve_public_identifier() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::Public {
public_id: "-//W3C//DTD XHTML 1.0 Strict//EN".to_string(),
uri: "dtd/xhtml1-strict.dtd".to_string(),
});
assert_eq!(
catalog.resolve_public("-//W3C//DTD XHTML 1.0 Strict//EN"),
Some("dtd/xhtml1-strict.dtd".to_string())
);
}
#[test]
fn test_resolve_system_identifier() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::System {
system_id: "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".to_string(),
uri: "dtd/xhtml1-strict.dtd".to_string(),
});
assert_eq!(
catalog.resolve_system("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"),
Some("dtd/xhtml1-strict.dtd".to_string())
);
}
#[test]
fn test_resolve_system_with_rewrite_prefix() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::RewriteSystem {
start: "http://www.w3.org/TR/".to_string(),
rewrite_prefix: "file:///usr/share/xml/w3c/".to_string(),
});
assert_eq!(
catalog.resolve_system("http://www.w3.org/TR/xhtml1/DTD/strict.dtd"),
Some("file:///usr/share/xml/w3c/xhtml1/DTD/strict.dtd".to_string())
);
}
#[test]
fn test_resolve_uri() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::Uri {
name: "http://example.com/schema.xsd".to_string(),
uri: "local/schema.xsd".to_string(),
});
assert_eq!(
catalog.resolve_uri("http://example.com/schema.xsd"),
Some("local/schema.xsd".to_string())
);
}
#[test]
fn test_resolve_with_suffix_matching() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::SystemSuffix {
suffix: "strict.dtd".to_string(),
uri: "local/strict.dtd".to_string(),
});
assert_eq!(
catalog.resolve_system("http://example.com/path/to/strict.dtd"),
Some("local/strict.dtd".to_string())
);
}
#[test]
fn test_no_match_returns_none() {
let catalog = Catalog::new();
assert_eq!(catalog.resolve_public("-//Unknown//EN"), None);
assert_eq!(
catalog.resolve_system("http://unknown.example.com/foo"),
None
);
assert_eq!(catalog.resolve_uri("http://unknown.example.com/bar"), None);
assert_eq!(catalog.resolve(None, None), None);
}
#[test]
fn test_merge_two_catalogs() {
let mut catalog1 = Catalog::new();
catalog1.add_entry(CatalogEntry::Public {
public_id: "-//A//EN".to_string(),
uri: "a.dtd".to_string(),
});
let mut catalog2 = Catalog::new();
catalog2.add_entry(CatalogEntry::Public {
public_id: "-//B//EN".to_string(),
uri: "b.dtd".to_string(),
});
catalog1.merge(&catalog2);
assert_eq!(catalog1.len(), 2);
assert_eq!(
catalog1.resolve_public("-//A//EN"),
Some("a.dtd".to_string())
);
assert_eq!(
catalog1.resolve_public("-//B//EN"),
Some("b.dtd".to_string())
);
}
#[test]
fn test_empty_catalog() {
let catalog = Catalog::new();
assert!(catalog.is_empty());
assert_eq!(catalog.len(), 0);
}
#[test]
fn test_add_entry_programmatically() {
let mut catalog = Catalog::new();
assert!(catalog.is_empty());
catalog.add_entry(CatalogEntry::System {
system_id: "http://example.com/test.dtd".to_string(),
uri: "test.dtd".to_string(),
});
assert!(!catalog.is_empty());
assert_eq!(catalog.len(), 1);
assert_eq!(
catalog.resolve_system("http://example.com/test.dtd"),
Some("test.dtd".to_string())
);
}
#[test]
fn test_catalog_len_and_is_empty() {
let mut catalog = Catalog::new();
assert_eq!(catalog.len(), 0);
assert!(catalog.is_empty());
catalog.add_entry(CatalogEntry::NextCatalog {
catalog: "other.xml".to_string(),
});
assert_eq!(catalog.len(), 1);
assert!(!catalog.is_empty());
catalog.add_entry(CatalogEntry::NextCatalog {
catalog: "another.xml".to_string(),
});
assert_eq!(catalog.len(), 2);
}
#[test]
fn test_complex_catalog_with_multiple_entry_types() {
let xml = catalog_xml(
r#"<public publicId="-//W3C//DTD XHTML 1.0 Strict//EN" uri="dtd/xhtml1-strict.dtd"/>
<system systemId="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" uri="dtd/xhtml1-strict.dtd"/>
<rewriteSystem systemIdStartString="http://www.w3.org/TR/" rewritePrefix="file:///local/w3c/"/>
<uri name="http://example.com/schema.xsd" uri="local/schema.xsd"/>
<nextCatalog catalog="other-catalog.xml"/>"#,
);
let catalog = Catalog::parse(&xml).unwrap();
assert_eq!(catalog.len(), 5);
assert!(catalog
.resolve_public("-//W3C//DTD XHTML 1.0 Strict//EN")
.is_some());
assert!(catalog
.resolve_system("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")
.is_some());
assert!(catalog
.resolve_system("http://www.w3.org/TR/other/doc.xml")
.is_some());
assert!(catalog
.resolve_uri("http://example.com/schema.xsd")
.is_some());
}
#[test]
fn test_resolve_prefers_system_over_public() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::Public {
public_id: "-//Example//EN".to_string(),
uri: "public-result.dtd".to_string(),
});
catalog.add_entry(CatalogEntry::System {
system_id: "http://example.com/doc.dtd".to_string(),
uri: "system-result.dtd".to_string(),
});
assert_eq!(
catalog.resolve(Some("-//Example//EN"), Some("http://example.com/doc.dtd")),
Some("system-result.dtd".to_string())
);
assert_eq!(
catalog.resolve(Some("-//Example//EN"), None),
Some("public-result.dtd".to_string())
);
assert_eq!(
catalog.resolve(None, Some("http://example.com/doc.dtd")),
Some("system-result.dtd".to_string())
);
}
#[test]
fn test_rewrite_system_longest_prefix_wins() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::RewriteSystem {
start: "http://www.w3.org/".to_string(),
rewrite_prefix: "file:///short/".to_string(),
});
catalog.add_entry(CatalogEntry::RewriteSystem {
start: "http://www.w3.org/TR/xhtml1/".to_string(),
rewrite_prefix: "file:///long/".to_string(),
});
assert_eq!(
catalog.resolve_system("http://www.w3.org/TR/xhtml1/DTD/strict.dtd"),
Some("file:///long/DTD/strict.dtd".to_string())
);
assert_eq!(
catalog.resolve_system("http://www.w3.org/other/file.xml"),
Some("file:///short/other/file.xml".to_string())
);
}
#[test]
fn test_catalog_error_display() {
let err = CatalogError {
message: "missing required attribute".to_string(),
};
assert_eq!(err.to_string(), "catalog error: missing required attribute");
}
#[test]
fn test_parse_invalid_xml_returns_error() {
let result = Catalog::parse("not valid xml <><>");
assert!(result.is_err());
}
#[test]
fn test_parse_wrong_root_element() {
let xml = r#"<notcatalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog"/>"#;
let result = Catalog::parse(xml);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.message.contains("expected root element 'catalog'"));
}
#[test]
fn test_parse_missing_namespace() {
let xml = r#"<catalog><public publicId="test" uri="test.dtd"/></catalog>"#;
let result = Catalog::parse(xml);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.message.contains("namespace"));
}
#[test]
fn test_parse_missing_required_attribute() {
let xml = catalog_xml(r#"<public publicId="-//Test//EN"/>"#);
let result = Catalog::parse(&xml);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.message.contains("uri"));
}
#[test]
fn test_public_id_whitespace_normalization() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::Public {
public_id: "-//W3C//DTD XHTML 1.0//EN".to_string(),
uri: "xhtml.dtd".to_string(),
});
assert_eq!(
catalog.resolve_public("-//W3C//DTD XHTML 1.0//EN"),
Some("xhtml.dtd".to_string())
);
}
#[test]
fn test_uri_suffix_matching() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::UriSuffix {
suffix: "schema.xsd".to_string(),
uri: "local/schema.xsd".to_string(),
});
assert_eq!(
catalog.resolve_uri("http://example.com/path/to/schema.xsd"),
Some("local/schema.xsd".to_string())
);
assert_eq!(catalog.resolve_uri("http://example.com/other.xsd"), None);
}
#[test]
fn test_rewrite_uri() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::RewriteUri {
start: "http://example.com/schemas/".to_string(),
rewrite_prefix: "file:///local/schemas/".to_string(),
});
assert_eq!(
catalog.resolve_uri("http://example.com/schemas/types/main.xsd"),
Some("file:///local/schemas/types/main.xsd".to_string())
);
}
#[test]
fn test_delegate_public() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::DelegatePublic {
start: "-//W3C//".to_string(),
catalog: "w3c-catalog.xml".to_string(),
});
assert_eq!(
catalog.resolve_public("-//W3C//DTD XHTML 1.0//EN"),
Some("w3c-catalog.xml".to_string())
);
assert_eq!(catalog.resolve_public("-//OASIS//DTD DocBook//EN"), None);
}
#[test]
fn test_delegate_system() {
let mut catalog = Catalog::new();
catalog.add_entry(CatalogEntry::DelegateSystem {
start: "http://www.w3.org/".to_string(),
catalog: "w3c-catalog.xml".to_string(),
});
assert_eq!(
catalog.resolve_system("http://www.w3.org/TR/xhtml1/DTD/strict.dtd"),
Some("w3c-catalog.xml".to_string())
);
assert_eq!(catalog.resolve_system("http://example.com/other.dtd"), None);
}
#[test]
fn test_default_trait() {
let catalog = Catalog::default();
assert!(catalog.is_empty());
}
#[test]
fn test_catalog_error_is_error_trait() {
let err = CatalogError {
message: "test error".to_string(),
};
let _: &dyn std::error::Error = &err;
}
}