use const_format::concatcp;
use oxrdf::vocab::rdf::TYPE;
use oxrdf::vocab::rdfs::SUB_CLASS_OF;
use oxrdf::{Literal, NamedNode, NamedNodeRef, TermRef, TripleRef};
use uuid::Uuid;
use xml::reader::{EventReader, XmlEvent};
use crate::writer::RdfWriter;
#[derive(Debug, Clone)]
struct Node {
path: String,
id: NamedNode,
}
const X2R: &str = "https://decisym.ai/xml2rdf/model#";
const XML_ELEMENT: NamedNodeRef<'_> = NamedNodeRef::new_unchecked(concatcp!(X2R, "XmlNode"));
const XML_ATTRIBUTE: NamedNodeRef<'_> = NamedNodeRef::new_unchecked(concatcp!(X2R, "XmlAttribute"));
const HAS_CHILD: NamedNodeRef<'_> = NamedNodeRef::new_unchecked(concatcp!(X2R, "hasChild"));
const HAS_ATTRIBUTE: NamedNodeRef<'_> = NamedNodeRef::new_unchecked(concatcp!(X2R, "hasAttribute"));
const HAS_NAME: NamedNodeRef<'_> = NamedNodeRef::new_unchecked(concatcp!(X2R, "hasName"));
const HAS_VALUE: NamedNodeRef<'_> = NamedNodeRef::new_unchecked(concatcp!(X2R, "hasValue"));
pub fn parse_xml(
files: Vec<String>,
output: &mut dyn RdfWriter,
namespace: &str,
) -> std::io::Result<()> {
for file in files.into_iter() {
let file = std::fs::File::open(file)?;
let file_reader = std::io::BufReader::new(file);
let parser = EventReader::new(file_reader);
let mut stack: Vec<Node> = Vec::new();
let mut subject: Option<Node> = None;
for e in parser {
match e {
Ok(XmlEvent::StartElement {
name, attributes, ..
}) => {
let id = Uuid::new_v4().hyphenated().to_string();
let path = if let Some(parent) = stack.last_mut() {
format!("{}.{}", parent.path, name.local_name)
} else {
format!("{X2R}{}", name.local_name)
};
subject = Some(Node {
id: NamedNode::new(format!("{}/{}", namespace, id).as_str()).unwrap(),
path,
});
if let Some(ref s) = subject {
if let Some(parent) = stack.last_mut() {
output.add_triple(TripleRef::new(
parent.id.as_ref(),
HAS_CHILD,
s.id.as_ref(),
))?;
}
let object = NamedNode::new(&s.path).unwrap();
output.add_triple(TripleRef::new(s.id.as_ref(), TYPE, object.as_ref()))?;
let object = Literal::new_simple_literal(name.local_name.clone());
output.add_triple(TripleRef::new(
s.id.as_ref(),
HAS_NAME,
TermRef::Literal(object.as_ref()),
))?;
output.add_triple(TripleRef::new(
s.id.as_ref(),
SUB_CLASS_OF,
XML_ELEMENT,
))?;
stack.push(s.clone());
}
for attr in attributes {
if let Some(ref s) = subject {
let attrib_id = Uuid::new_v4().hyphenated().to_string();
let path = format!("{}.-{}", s.path, attr.name.local_name);
let attr_subject =
NamedNode::new(format!("{}/{}", namespace, attrib_id)).unwrap();
output.add_triple(TripleRef::new(
s.id.as_ref(),
HAS_ATTRIBUTE,
attr_subject.as_ref(),
))?;
let attr_object = NamedNode::new(path).unwrap();
output.add_triple(TripleRef::new(
attr_subject.as_ref(),
TYPE,
attr_object.as_ref(),
))?;
output.add_triple(TripleRef::new(
attr_object.as_ref(),
SUB_CLASS_OF,
XML_ATTRIBUTE,
))?;
if !attr.value.is_empty() {
let attr_object = Literal::new_simple_literal(&attr.value);
output.add_triple(TripleRef::new(
attr_subject.as_ref(),
HAS_VALUE,
TermRef::Literal(attr_object.as_ref()),
))?;
} else {
}
}
}
}
Ok(XmlEvent::Characters(text)) => {
let text = text.trim();
if !text.is_empty() {
if let Some(ref s) = subject {
let content_object = Literal::new_simple_literal(text);
output.add_triple(TripleRef::new(
s.id.as_ref(),
HAS_VALUE,
TermRef::Literal(content_object.as_ref()),
))?;
}
}
}
Ok(XmlEvent::EndElement { .. }) => {
stack.pop();
subject = None;
}
_ => {}
}
}
}
Ok(())
}