use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt;
use std::io::Write;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum RdfTerm {
Iri(String),
BlankNode(String),
PlainLiteral(String),
TypedLiteral {
value: String,
datatype: String,
},
LangLiteral {
value: String,
lang: String,
},
}
impl RdfTerm {
pub fn iri(s: impl Into<String>) -> Self {
Self::Iri(s.into())
}
pub fn literal(s: impl Into<String>) -> Self {
Self::PlainLiteral(s.into())
}
pub fn typed(s: impl Into<String>, dt: impl Into<String>) -> Self {
Self::TypedLiteral {
value: s.into(),
datatype: dt.into(),
}
}
pub fn lang(s: impl Into<String>, lang: impl Into<String>) -> Self {
Self::LangLiteral {
value: s.into(),
lang: lang.into(),
}
}
pub fn blank(id: impl Into<String>) -> Self {
Self::BlankNode(id.into())
}
pub fn is_iri(&self) -> bool {
matches!(self, Self::Iri(_))
}
pub fn is_literal(&self) -> bool {
matches!(
self,
Self::PlainLiteral(_) | Self::TypedLiteral { .. } | Self::LangLiteral { .. }
)
}
}
impl fmt::Display for RdfTerm {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Iri(iri) => write!(f, "<{iri}>"),
Self::BlankNode(id) => write!(f, "_:{id}"),
Self::PlainLiteral(val) => write!(f, "\"{val}\""),
Self::TypedLiteral { value, datatype } => write!(f, "\"{value}\"^^<{datatype}>"),
Self::LangLiteral { value, lang } => write!(f, "\"{value}\"@{lang}"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RdfTriple {
pub subject: RdfTerm,
pub predicate: RdfTerm,
pub object: RdfTerm,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WriterConfig {
pub indent: usize,
pub abbreviated: bool,
pub xml_declaration: bool,
pub base_uri: Option<String>,
pub sort_output: bool,
}
impl Default for WriterConfig {
fn default() -> Self {
Self {
indent: 2,
abbreviated: true,
xml_declaration: true,
base_uri: None,
sort_output: false,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RdfXmlWriteError {
InvalidIri(String),
BlankNodePredicate,
IoError(String),
XmlEncodingError(String),
}
impl fmt::Display for RdfXmlWriteError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidIri(iri) => write!(f, "Invalid IRI: {iri}"),
Self::BlankNodePredicate => write!(f, "Blank nodes cannot be predicates in RDF/XML"),
Self::IoError(msg) => write!(f, "IO error: {msg}"),
Self::XmlEncodingError(msg) => write!(f, "XML encoding error: {msg}"),
}
}
}
impl std::error::Error for RdfXmlWriteError {}
pub struct RdfXmlWriter {
config: WriterConfig,
prefixes: HashMap<String, String>,
triples: Vec<RdfTriple>,
triple_count: usize,
}
impl RdfXmlWriter {
pub fn new(config: WriterConfig) -> Self {
let mut prefixes = HashMap::new();
prefixes.insert(
"rdf".to_string(),
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
);
prefixes.insert(
"rdfs".to_string(),
"http://www.w3.org/2000/01/rdf-schema#".to_string(),
);
Self {
config,
prefixes,
triples: Vec::new(),
triple_count: 0,
}
}
pub fn with_defaults() -> Self {
Self::new(WriterConfig::default())
}
pub fn add_prefix(&mut self, prefix: &str, namespace: &str) {
self.prefixes
.insert(prefix.to_string(), namespace.to_string());
}
pub fn write_triple(&mut self, triple: &RdfTriple) -> Result<(), RdfXmlWriteError> {
if matches!(triple.predicate, RdfTerm::BlankNode(_)) {
return Err(RdfXmlWriteError::BlankNodePredicate);
}
self.triples.push(triple.clone());
self.triple_count += 1;
Ok(())
}
pub fn finish(&self) -> Result<String, RdfXmlWriteError> {
let mut buf = Vec::new();
self.write_to(&mut buf)
.map_err(|e| RdfXmlWriteError::IoError(e.to_string()))?;
String::from_utf8(buf).map_err(|e| RdfXmlWriteError::XmlEncodingError(e.to_string()))
}
pub fn write_to<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
let indent_str = " ".repeat(self.config.indent);
if self.config.xml_declaration {
writeln!(writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
}
write!(writer, "<rdf:RDF")?;
let mut sorted_prefixes: Vec<_> = self.prefixes.iter().collect();
sorted_prefixes.sort_by_key(|(k, _)| (*k).clone());
for (prefix, ns) in &sorted_prefixes {
write!(
writer,
"\n{indent_str}xmlns:{prefix}=\"{}\"",
xml_escape(ns)
)?;
}
if let Some(base) = &self.config.base_uri {
write!(writer, "\n{indent_str}xml:base=\"{base}\"")?;
}
writeln!(writer, ">")?;
let groups = self.group_by_subject();
let mut subjects: Vec<_> = groups.keys().collect();
if self.config.sort_output {
subjects.sort();
}
for subject in subjects {
let predicates = &groups[subject.as_str()];
self.write_description(writer, subject, predicates, &indent_str)?;
}
writeln!(writer, "</rdf:RDF>")?;
Ok(())
}
fn group_by_subject(&self) -> HashMap<String, Vec<(&RdfTerm, &RdfTerm)>> {
let mut groups: HashMap<String, Vec<(&RdfTerm, &RdfTerm)>> = HashMap::new();
for triple in &self.triples {
let key = Self::subject_key(&triple.subject);
groups
.entry(key)
.or_default()
.push((&triple.predicate, &triple.object));
}
groups
}
fn subject_key(term: &RdfTerm) -> String {
match term {
RdfTerm::Iri(iri) => iri.clone(),
RdfTerm::BlankNode(id) => format!("_:{id}"),
_ => format!("{term}"),
}
}
fn write_description<W: Write>(
&self,
writer: &mut W,
subject: &str,
predicates: &[(&RdfTerm, &RdfTerm)],
indent: &str,
) -> std::io::Result<()> {
write!(writer, "{indent}<rdf:Description")?;
if let Some(node_id) = subject.strip_prefix("_:") {
write!(writer, " rdf:nodeID=\"{node_id}\"")?;
} else {
write!(writer, " rdf:about=\"{}\"", xml_escape(subject))?;
}
if predicates.is_empty() {
writeln!(writer, "/>")?;
return Ok(());
}
writeln!(writer, ">")?;
let inner_indent = format!("{indent}{indent}");
for (pred, obj) in predicates {
self.write_property(writer, pred, obj, &inner_indent)?;
}
writeln!(writer, "{indent}</rdf:Description>")?;
Ok(())
}
fn write_property<W: Write>(
&self,
writer: &mut W,
predicate: &RdfTerm,
object: &RdfTerm,
indent: &str,
) -> std::io::Result<()> {
let pred_qname = match predicate {
RdfTerm::Iri(iri) => self.iri_to_qname(iri),
_ => None,
};
let tag = pred_qname.unwrap_or_else(|| match predicate {
RdfTerm::Iri(iri) => format!("rdf:_unknown_{}", iri.len()),
_ => "rdf:_unknown".to_string(),
});
match object {
RdfTerm::Iri(iri) if self.config.abbreviated => {
writeln!(
writer,
"{indent}<{tag} rdf:resource=\"{}\"/>",
xml_escape(iri)
)?;
}
RdfTerm::BlankNode(id) if self.config.abbreviated => {
writeln!(writer, "{indent}<{tag} rdf:nodeID=\"{id}\"/>")?;
}
RdfTerm::PlainLiteral(val) => {
writeln!(writer, "{indent}<{tag}>{}</{tag}>", xml_escape(val))?;
}
RdfTerm::TypedLiteral { value, datatype } => {
writeln!(
writer,
"{indent}<{tag} rdf:datatype=\"{}\">{}</{tag}>",
xml_escape(datatype),
xml_escape(value)
)?;
}
RdfTerm::LangLiteral { value, lang } => {
writeln!(
writer,
"{indent}<{tag} xml:lang=\"{lang}\">{}</{tag}>",
xml_escape(value)
)?;
}
RdfTerm::Iri(iri) => {
writeln!(
writer,
"{indent}<{tag} rdf:resource=\"{}\"/>",
xml_escape(iri)
)?;
}
RdfTerm::BlankNode(id) => {
writeln!(writer, "{indent}<{tag} rdf:nodeID=\"{id}\"/>")?;
}
}
Ok(())
}
fn iri_to_qname(&self, iri: &str) -> Option<String> {
for (prefix, ns) in &self.prefixes {
if let Some(local) = iri.strip_prefix(ns.as_str()) {
if is_valid_xml_name(local) {
return Some(format!("{prefix}:{local}"));
}
}
}
None
}
pub fn triple_count(&self) -> usize {
self.triple_count
}
pub fn prefix_count(&self) -> usize {
self.prefixes.len()
}
}
impl Default for RdfXmlWriter {
fn default() -> Self {
Self::with_defaults()
}
}
fn xml_escape(s: &str) -> String {
let mut result = String::with_capacity(s.len());
for c in s.chars() {
match c {
'&' => result.push_str("&"),
'<' => result.push_str("<"),
'>' => result.push_str(">"),
'"' => result.push_str("""),
'\'' => result.push_str("'"),
_ => result.push(c),
}
}
result
}
fn is_valid_xml_name(s: &str) -> bool {
if s.is_empty() {
return false;
}
let first = s.chars().next().expect("non-empty");
if !first.is_ascii_alphabetic() && first != '_' {
return false;
}
s.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
}
#[cfg(test)]
mod tests {
use super::*;
fn ex(s: &str) -> String {
format!("http://example.org/{s}")
}
fn sample_writer() -> RdfXmlWriter {
let mut w = RdfXmlWriter::with_defaults();
w.add_prefix("ex", "http://example.org/");
w
}
#[test]
fn test_empty_document() {
let w = sample_writer();
let xml = w.finish().expect("should succeed");
assert!(xml.contains("<?xml"));
assert!(xml.contains("<rdf:RDF"));
assert!(xml.contains("</rdf:RDF>"));
}
#[test]
fn test_single_triple_iri_object() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("knows")),
object: RdfTerm::iri(ex("bob")),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("rdf:about=\"http://example.org/alice\""));
assert!(xml.contains("rdf:resource=\"http://example.org/bob\""));
assert!(xml.contains("ex:knows"));
}
#[test]
fn test_single_triple_literal_object() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("name")),
object: RdfTerm::literal("Alice"),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains(">Alice</ex:name>"));
}
#[test]
fn test_typed_literal() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("age")),
object: RdfTerm::typed("30", "http://www.w3.org/2001/XMLSchema#integer"),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("rdf:datatype"));
assert!(xml.contains("30"));
}
#[test]
fn test_lang_literal() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("name")),
object: RdfTerm::lang("Alice", "en"),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("xml:lang=\"en\""));
assert!(xml.contains("Alice"));
}
#[test]
fn test_blank_node_subject() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::blank("b0"),
predicate: RdfTerm::iri(ex("name")),
object: RdfTerm::literal("Unknown"),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("rdf:nodeID=\"b0\""));
}
#[test]
fn test_blank_node_object() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("address")),
object: RdfTerm::blank("addr1"),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("rdf:nodeID=\"addr1\""));
}
#[test]
fn test_blank_node_predicate_rejected() {
let mut w = sample_writer();
let result = w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::blank("b0"),
object: RdfTerm::literal("val"),
});
assert!(result.is_err());
}
#[test]
fn test_multiple_triples_same_subject() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("name")),
object: RdfTerm::literal("Alice"),
})
.expect("write");
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("age")),
object: RdfTerm::typed("30", "http://www.w3.org/2001/XMLSchema#integer"),
})
.expect("write");
let xml = w.finish().expect("finish");
let desc_count = xml.matches("rdf:Description").count();
assert_eq!(desc_count, 2); }
#[test]
fn test_multiple_subjects() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("name")),
object: RdfTerm::literal("Alice"),
})
.expect("write");
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("bob")),
predicate: RdfTerm::iri(ex("name")),
object: RdfTerm::literal("Bob"),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("alice"));
assert!(xml.contains("bob"));
assert_eq!(xml.matches("rdf:Description").count(), 4);
}
#[test]
fn test_xml_escape_ampersand() {
assert_eq!(xml_escape("a&b"), "a&b");
}
#[test]
fn test_xml_escape_lt_gt() {
assert_eq!(xml_escape("<b>"), "<b>");
}
#[test]
fn test_xml_escape_quotes() {
assert_eq!(xml_escape("\"'"), ""'");
}
#[test]
fn test_xml_escape_clean() {
assert_eq!(xml_escape("hello"), "hello");
}
#[test]
fn test_literal_with_special_chars() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("note")),
object: RdfTerm::literal("A & B < C > D"),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("A & B < C > D"));
}
#[test]
fn test_valid_xml_name() {
assert!(is_valid_xml_name("foo"));
assert!(is_valid_xml_name("_foo"));
assert!(is_valid_xml_name("foo123"));
assert!(is_valid_xml_name("foo-bar"));
assert!(!is_valid_xml_name(""));
assert!(!is_valid_xml_name("123foo"));
assert!(!is_valid_xml_name("foo bar"));
}
#[test]
fn test_iri_to_qname() {
let w = sample_writer();
assert_eq!(
w.iri_to_qname("http://example.org/name"),
Some("ex:name".to_string())
);
assert_eq!(
w.iri_to_qname("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
Some("rdf:type".to_string())
);
}
#[test]
fn test_iri_to_qname_no_match() {
let w = sample_writer();
assert_eq!(w.iri_to_qname("http://unknown.org/foo"), None);
}
#[test]
fn test_config_no_xml_declaration() {
let config = WriterConfig {
xml_declaration: false,
..Default::default()
};
let w = RdfXmlWriter::new(config);
let xml = w.finish().expect("finish");
assert!(!xml.contains("<?xml"));
}
#[test]
fn test_config_base_uri() {
let config = WriterConfig {
base_uri: Some("http://example.org/".to_string()),
..Default::default()
};
let w = RdfXmlWriter::new(config);
let xml = w.finish().expect("finish");
assert!(xml.contains("xml:base=\"http://example.org/\""));
}
#[test]
fn test_config_sorted_output() {
let config = WriterConfig {
sort_output: true,
..Default::default()
};
let mut w = RdfXmlWriter::new(config);
w.add_prefix("ex", "http://example.org/");
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("z_last")),
predicate: RdfTerm::iri(ex("p")),
object: RdfTerm::literal("1"),
})
.expect("write");
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("a_first")),
predicate: RdfTerm::iri(ex("p")),
object: RdfTerm::literal("2"),
})
.expect("write");
let xml = w.finish().expect("finish");
let a_pos = xml.find("a_first").expect("a_first found");
let z_pos = xml.find("z_last").expect("z_last found");
assert!(a_pos < z_pos);
}
#[test]
fn test_triple_count() {
let mut w = sample_writer();
assert_eq!(w.triple_count(), 0);
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("s")),
predicate: RdfTerm::iri(ex("p")),
object: RdfTerm::literal("o"),
})
.expect("write");
assert_eq!(w.triple_count(), 1);
}
#[test]
fn test_prefix_count() {
let mut w = RdfXmlWriter::with_defaults();
assert_eq!(w.prefix_count(), 2); w.add_prefix("ex", "http://example.org/");
assert_eq!(w.prefix_count(), 3);
}
#[test]
fn test_write_to() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("name")),
object: RdfTerm::literal("Alice"),
})
.expect("write");
let mut buf = Vec::new();
w.write_to(&mut buf).expect("write_to");
let xml = String::from_utf8(buf).expect("utf8");
assert!(xml.contains("Alice"));
}
#[test]
fn test_rdf_term_is_iri() {
assert!(RdfTerm::iri("http://x").is_iri());
assert!(!RdfTerm::literal("hello").is_iri());
}
#[test]
fn test_rdf_term_is_literal() {
assert!(RdfTerm::literal("hello").is_literal());
assert!(RdfTerm::typed("42", "xsd:int").is_literal());
assert!(RdfTerm::lang("hello", "en").is_literal());
assert!(!RdfTerm::iri("http://x").is_literal());
}
#[test]
fn test_rdf_term_display() {
assert_eq!(format!("{}", RdfTerm::iri("http://x")), "<http://x>");
assert_eq!(format!("{}", RdfTerm::literal("hi")), "\"hi\"");
assert_eq!(
format!("{}", RdfTerm::typed("42", "xsd:int")),
"\"42\"^^<xsd:int>"
);
assert_eq!(format!("{}", RdfTerm::lang("hi", "en")), "\"hi\"@en");
assert_eq!(format!("{}", RdfTerm::blank("b0")), "_:b0");
}
#[test]
fn test_error_display() {
let e = RdfXmlWriteError::InvalidIri("bad".to_string());
assert!(format!("{e}").contains("bad"));
let e = RdfXmlWriteError::BlankNodePredicate;
assert!(format!("{e}").contains("predicate"));
let e = RdfXmlWriteError::IoError("broken pipe".to_string());
assert!(format!("{e}").contains("broken pipe"));
let e = RdfXmlWriteError::XmlEncodingError("utf8".to_string());
assert!(format!("{e}").contains("utf8"));
}
#[test]
fn test_non_abbreviated() {
let config = WriterConfig {
abbreviated: false,
..Default::default()
};
let mut w = RdfXmlWriter::new(config);
w.add_prefix("ex", "http://example.org/");
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri(ex("knows")),
object: RdfTerm::iri(ex("bob")),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("rdf:resource"));
}
#[test]
fn test_default_writer() {
let w = RdfXmlWriter::default();
assert_eq!(w.triple_count(), 0);
assert_eq!(w.prefix_count(), 2);
}
#[test]
fn test_rdf_type_triple() {
let mut w = sample_writer();
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex("alice")),
predicate: RdfTerm::iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type".to_string()),
object: RdfTerm::iri(ex("Person")),
})
.expect("write");
let xml = w.finish().expect("finish");
assert!(xml.contains("rdf:type"));
}
#[test]
fn test_many_triples() {
let mut w = sample_writer();
for i in 0..100 {
w.write_triple(&RdfTriple {
subject: RdfTerm::iri(ex(&format!("s{i}"))),
predicate: RdfTerm::iri(ex("p")),
object: RdfTerm::literal(format!("value_{i}")),
})
.expect("write");
}
assert_eq!(w.triple_count(), 100);
let xml = w.finish().expect("finish");
assert!(xml.contains("value_99"));
}
}