use std::collections::HashMap;
use std::io::Cursor;
use std::path::PathBuf;
use super::config::{AssemblerConfig, DatasetConfig, GraphConfig, StoreBackend};
use super::vocab::{
JA_CONTENT_URL, JA_DEFAULT_GRAPH, JA_GRAPH, JA_GRAPH_NAME, JA_MEMORY_DATASET, JA_MEMORY_MODEL,
JA_NAMED_GRAPH, JA_RDF_DATASET, RDF_TYPE, TDB2_DATASET, TDB2_LOCATION,
};
#[derive(Debug)]
pub enum AssemblerError {
ParseError(String),
MissingRequired { resource: String, property: String },
InvalidLocation(String),
}
impl std::fmt::Display for AssemblerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AssemblerError::ParseError(msg) => write!(f, "Assembler parse error: {msg}"),
AssemblerError::MissingRequired { resource, property } => {
write!(f, "Missing required property <{property}> on <{resource}>")
}
AssemblerError::InvalidLocation(msg) => {
write!(f, "Invalid tdb2:location: {msg}")
}
}
}
}
impl std::error::Error for AssemblerError {}
type AdjMap = HashMap<String, Vec<(String, String)>>;
fn adjacency_map(triples: &[(String, String, String)]) -> AdjMap {
let mut map: AdjMap = HashMap::new();
for (s, p, o) in triples {
map.entry(s.clone())
.or_default()
.push((p.clone(), o.clone()));
}
map
}
fn objects_of<'a>(map: &'a AdjMap, subject: &str, predicate: &str) -> Vec<&'a str> {
match map.get(subject) {
None => vec![],
Some(pairs) => pairs
.iter()
.filter(|(p, _)| p == predicate)
.map(|(_, o)| o.as_str())
.collect(),
}
}
fn first_object<'a>(map: &'a AdjMap, subject: &str, predicate: &str) -> Option<&'a str> {
objects_of(map, subject, predicate).into_iter().next()
}
fn strip_literal(raw: &str) -> &str {
if let Some(inner) = raw.strip_prefix('"') {
let end = inner.rfind('"').unwrap_or(inner.len());
&inner[..end]
} else {
raw
}
}
fn resolve_backend(
map: &AdjMap,
resource: &str,
type_iri: &str,
) -> Result<StoreBackend, AssemblerError> {
if type_iri == JA_MEMORY_MODEL || type_iri == JA_MEMORY_DATASET || type_iri == JA_RDF_DATASET {
Ok(StoreBackend::InMemory)
} else if type_iri == TDB2_DATASET {
let loc_raw = first_object(map, resource, TDB2_LOCATION).ok_or_else(|| {
AssemblerError::MissingRequired {
resource: resource.to_owned(),
property: TDB2_LOCATION.to_owned(),
}
})?;
let loc_str = strip_literal(loc_raw);
if loc_str.is_empty() {
return Err(AssemblerError::InvalidLocation(
"tdb2:location value is empty".to_owned(),
));
}
Ok(StoreBackend::Tdb2 {
location: PathBuf::from(loc_str),
})
} else {
Ok(StoreBackend::Unknown(type_iri.to_owned()))
}
}
fn build_graph_config(
map: &AdjMap,
graph_resource: &str,
graph_name: Option<String>,
) -> GraphConfig {
let backend = objects_of(map, graph_resource, RDF_TYPE)
.into_iter()
.find_map(|type_iri| resolve_backend(map, graph_resource, type_iri).ok())
.unwrap_or(StoreBackend::InMemory);
let mut content_urls: Vec<String> = Vec::new();
for url_raw in objects_of(map, graph_resource, JA_CONTENT_URL) {
content_urls.push(strip_literal(url_raw).to_owned());
}
for content_bnode in objects_of(map, graph_resource, super::vocab::JA_CONTENT) {
for url_raw in objects_of(map, content_bnode, JA_CONTENT_URL) {
content_urls.push(strip_literal(url_raw).to_owned());
}
}
GraphConfig {
graph_name,
backend,
content_urls,
}
}
fn build_dataset_config(
map: &AdjMap,
resource: &str,
type_iri: &str,
) -> Result<DatasetConfig, AssemblerError> {
let backend = resolve_backend(map, resource, type_iri)?;
let mut named_graphs: Vec<GraphConfig> = Vec::new();
for ng_bnode in objects_of(map, resource, JA_NAMED_GRAPH) {
let graph_name = first_object(map, ng_bnode, JA_GRAPH_NAME).map(|s| s.to_owned());
if let Some(model_resource) = first_object(map, ng_bnode, JA_GRAPH) {
named_graphs.push(build_graph_config(map, model_resource, graph_name));
} else {
named_graphs.push(build_graph_config(map, ng_bnode, graph_name));
}
}
let default_graph = first_object(map, resource, JA_DEFAULT_GRAPH)
.map(|model_resource| build_graph_config(map, model_resource, None));
Ok(DatasetConfig {
resource_iri: resource.to_owned(),
backend,
named_graphs,
default_graph,
})
}
pub struct AssemblerBuilder;
impl AssemblerBuilder {
pub fn from_triples(
triples: &[(String, String, String)],
) -> Result<AssemblerConfig, AssemblerError> {
let map = adjacency_map(triples);
let mut datasets: Vec<DatasetConfig> = Vec::new();
let typed_subjects: Vec<(String, String)> = map
.iter()
.filter(|(subject, _)| !subject.starts_with("_:"))
.flat_map(|(subject, pairs)| {
pairs
.iter()
.filter(|(pred, _)| pred == RDF_TYPE)
.map(|(_, obj)| (subject.clone(), obj.clone()))
.collect::<Vec<_>>()
})
.collect();
for (subject, type_iri) in typed_subjects {
match build_dataset_config(&map, &subject, &type_iri) {
Ok(cfg) => datasets.push(cfg),
Err(e) => return Err(e),
}
}
datasets.sort_by(|a, b| a.resource_iri.cmp(&b.resource_iri));
Ok(AssemblerConfig { datasets })
}
pub fn from_turtle(input: &str) -> Result<AssemblerConfig, AssemblerError> {
let reader = Cursor::new(input.as_bytes());
let parser = oxttl::TurtleParser::new().lenient();
let mut triples: Vec<(String, String, String)> = Vec::new();
for result in parser.for_reader(reader) {
match result {
Ok(triple) => {
let subject = subject_to_key(&triple.subject);
let predicate = triple.predicate.as_str().to_owned();
let object = term_to_value(&triple.object);
triples.push((subject, predicate, object));
}
Err(e) => {
return Err(AssemblerError::ParseError(e.to_string()));
}
}
}
Self::from_triples(&triples)
}
}
fn subject_to_key(subject: &oxrdf::NamedOrBlankNode) -> String {
match subject {
oxrdf::NamedOrBlankNode::NamedNode(n) => n.as_str().to_owned(),
oxrdf::NamedOrBlankNode::BlankNode(b) => format!("_:{}", b.as_str()),
}
}
fn term_to_value(term: &oxrdf::Term) -> String {
match term {
oxrdf::Term::NamedNode(n) => n.as_str().to_owned(),
oxrdf::Term::BlankNode(b) => format!("_:{}", b.as_str()),
oxrdf::Term::Literal(lit) => {
format!("\"{}\"", lit.value())
}
#[allow(unreachable_patterns)]
_ => term.to_string(),
}
}