use std::collections::{HashMap, HashSet};
use thiserror::Error;
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum ConstructError {
#[error("unbound variable in template: ?{name}")]
UnboundVariable { name: String },
#[error("empty term in template triple at position {position}")]
EmptyTerm { position: &'static str },
#[error("blank node in predicate position: {node}")]
BlankNodeInPredicate { node: String },
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum RdfTerm {
Iri(String),
Literal {
value: String,
datatype: Option<String>,
lang_tag: Option<String>,
},
BlankNode(String),
Variable(String),
}
impl RdfTerm {
pub fn is_variable(&self) -> bool {
matches!(self, RdfTerm::Variable(_))
}
pub fn is_blank_node(&self) -> bool {
matches!(self, RdfTerm::BlankNode(_))
}
pub fn variable_name(&self) -> Option<&str> {
match self {
RdfTerm::Variable(n) => Some(n.as_str()),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TemplateTriple {
pub subject: RdfTerm,
pub predicate: RdfTerm,
pub object: RdfTerm,
}
impl TemplateTriple {
pub fn new(subject: RdfTerm, predicate: RdfTerm, object: RdfTerm) -> Self {
TemplateTriple {
subject,
predicate,
object,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct GroundTriple {
pub subject: String,
pub predicate: String,
pub object: String,
}
#[derive(Debug, Clone, Default)]
pub struct SolutionRow {
bindings: HashMap<String, String>,
}
impl SolutionRow {
pub fn new() -> Self {
SolutionRow {
bindings: HashMap::new(),
}
}
pub fn bind(&mut self, var: impl Into<String>, value: impl Into<String>) {
self.bindings.insert(var.into(), value.into());
}
pub fn get(&self, var: &str) -> Option<&str> {
self.bindings.get(var).map(String::as_str)
}
pub fn is_bound(&self, var: &str) -> bool {
self.bindings.contains_key(var)
}
pub fn bound_vars(&self) -> impl Iterator<Item = &str> {
self.bindings.keys().map(String::as_str)
}
}
#[derive(Debug, Clone, Default)]
pub struct ConstructStats {
pub rows_processed: usize,
pub raw_triple_count: usize,
pub skipped_unbound: usize,
pub duplicates_eliminated: usize,
pub blank_nodes_generated: usize,
}
pub struct BlankNodeAllocator {
counter: u64,
}
impl BlankNodeAllocator {
fn new() -> Self {
BlankNodeAllocator { counter: 0 }
}
fn fresh(&mut self, row_index: usize, template_label: &str) -> String {
self.counter += 1;
format!("_:b{}_{}_r{}", self.counter, template_label, row_index)
}
}
pub struct ConstructBuilder {
template: Vec<TemplateTriple>,
skip_on_unbound: bool,
}
impl ConstructBuilder {
pub fn new(template: Vec<TemplateTriple>) -> Self {
ConstructBuilder {
template,
skip_on_unbound: true,
}
}
pub fn from_where_shorthand(pattern: Vec<TemplateTriple>) -> Self {
Self::new(pattern)
}
pub fn skip_unbound(mut self, skip: bool) -> Self {
self.skip_on_unbound = skip;
self
}
pub fn instantiate_row(
&self,
row: &SolutionRow,
row_index: usize,
alloc: &mut BlankNodeAllocator,
stats: &mut ConstructStats,
) -> Result<Vec<GroundTriple>, ConstructError> {
let mut bnode_map: HashMap<String, String> = HashMap::new();
let mut triples = Vec::new();
for tpl in &self.template {
let s =
self.resolve_term(&tpl.subject, row, row_index, alloc, &mut bnode_map, stats)?;
let p =
self.resolve_term(&tpl.predicate, row, row_index, alloc, &mut bnode_map, stats)?;
let o = self.resolve_term(&tpl.object, row, row_index, alloc, &mut bnode_map, stats)?;
match (s, p, o) {
(Some(s_val), Some(p_val), Some(o_val)) => {
if p_val.starts_with("_:") {
return Err(ConstructError::BlankNodeInPredicate { node: p_val });
}
triples.push(GroundTriple {
subject: s_val,
predicate: p_val,
object: o_val,
});
}
_ => {
stats.skipped_unbound += 1;
}
}
}
Ok(triples)
}
fn resolve_term(
&self,
term: &RdfTerm,
row: &SolutionRow,
row_index: usize,
alloc: &mut BlankNodeAllocator,
bnode_map: &mut HashMap<String, String>,
stats: &mut ConstructStats,
) -> Result<Option<String>, ConstructError> {
match term {
RdfTerm::Iri(iri) => Ok(Some(format!("<{}>", iri))),
RdfTerm::Literal {
value,
datatype,
lang_tag,
} => {
let serialised = if let Some(dt) = datatype {
format!("\"{}\"^^<{}>", value, dt)
} else if let Some(lang) = lang_tag {
format!("\"{}\"@{}", value, lang)
} else {
format!("\"{}\"", value)
};
Ok(Some(serialised))
}
RdfTerm::BlankNode(label) => {
let fresh = bnode_map.entry(label.clone()).or_insert_with(|| {
stats.blank_nodes_generated += 1;
alloc.fresh(row_index, label)
});
Ok(Some(fresh.clone()))
}
RdfTerm::Variable(name) => {
if let Some(val) = row.get(name) {
Ok(Some(val.to_owned()))
} else if self.skip_on_unbound {
Ok(None)
} else {
Err(ConstructError::UnboundVariable { name: name.clone() })
}
}
}
}
pub fn build(
&self,
solutions: &[SolutionRow],
) -> Result<(Vec<GroundTriple>, ConstructStats), ConstructError> {
let mut stats = ConstructStats::default();
let mut alloc = BlankNodeAllocator::new();
let mut seen: HashSet<GroundTriple> = HashSet::new();
let mut result: Vec<GroundTriple> = Vec::new();
for (row_index, row) in solutions.iter().enumerate() {
stats.rows_processed += 1;
let row_triples = self.instantiate_row(row, row_index, &mut alloc, &mut stats)?;
stats.raw_triple_count += row_triples.len();
for triple in row_triples {
if seen.insert(triple.clone()) {
result.push(triple);
} else {
stats.duplicates_eliminated += 1;
}
}
}
Ok((result, stats))
}
pub fn variable_present_in_all(var: &str, solutions: &[SolutionRow]) -> bool {
solutions.iter().all(|r| r.is_bound(var))
}
pub fn variable_present_in_any(var: &str, solutions: &[SolutionRow]) -> bool {
solutions.iter().any(|r| r.is_bound(var))
}
pub fn template(&self) -> &[TemplateTriple] {
&self.template
}
}
impl RdfTerm {
pub fn iri(iri: impl Into<String>) -> Self {
RdfTerm::Iri(iri.into())
}
pub fn string_literal(value: impl Into<String>) -> Self {
RdfTerm::Literal {
value: value.into(),
datatype: None,
lang_tag: None,
}
}
pub fn typed_literal(value: impl Into<String>, datatype: impl Into<String>) -> Self {
RdfTerm::Literal {
value: value.into(),
datatype: Some(datatype.into()),
lang_tag: None,
}
}
pub fn lang_literal(value: impl Into<String>, lang: impl Into<String>) -> Self {
RdfTerm::Literal {
value: value.into(),
datatype: None,
lang_tag: Some(lang.into()),
}
}
pub fn boolean(v: bool) -> Self {
RdfTerm::typed_literal(v.to_string(), "http://www.w3.org/2001/XMLSchema#boolean")
}
pub fn integer(v: i64) -> Self {
RdfTerm::typed_literal(v.to_string(), "http://www.w3.org/2001/XMLSchema#integer")
}
pub fn double(v: f64) -> Self {
RdfTerm::typed_literal(v.to_string(), "http://www.w3.org/2001/XMLSchema#double")
}
pub fn var(name: impl Into<String>) -> Self {
RdfTerm::Variable(name.into())
}
pub fn blank(label: impl Into<String>) -> Self {
RdfTerm::BlankNode(label.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_row(pairs: &[(&str, &str)]) -> SolutionRow {
let mut row = SolutionRow::new();
for (k, v) in pairs {
row.bind(*k, *v);
}
row
}
#[test]
fn test_single_iri_triple() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::iri("http://example.org/o"),
)];
let builder = ConstructBuilder::new(template);
let row = SolutionRow::new();
let (triples, stats) = builder.build(&[row]).expect("build");
assert_eq!(triples.len(), 1);
assert_eq!(stats.rows_processed, 1);
assert_eq!(stats.raw_triple_count, 1);
}
#[test]
fn test_variable_binding() {
let template = vec![TemplateTriple::new(
RdfTerm::var("s"),
RdfTerm::iri("http://example.org/type"),
RdfTerm::var("t"),
)];
let builder = ConstructBuilder::new(template);
let row = make_row(&[
("s", "<http://example.org/Alice>"),
("t", "<http://example.org/Person>"),
]);
let (triples, _stats) = builder.build(&[row]).expect("build");
assert_eq!(triples.len(), 1);
assert_eq!(triples[0].subject, "<http://example.org/Alice>");
assert_eq!(triples[0].object, "<http://example.org/Person>");
}
#[test]
fn test_unbound_variable_skipped_by_default() {
let template = vec![TemplateTriple::new(
RdfTerm::var("s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::var("o"),
)];
let builder = ConstructBuilder::new(template);
let row = make_row(&[("s", "<http://example.org/Alice>")]);
let (triples, stats) = builder.build(&[row]).expect("build");
assert_eq!(triples.len(), 0);
assert_eq!(stats.skipped_unbound, 1);
}
#[test]
fn test_unbound_variable_error_mode() {
let template = vec![TemplateTriple::new(
RdfTerm::var("s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::var("o"),
)];
let builder = ConstructBuilder::new(template).skip_unbound(false);
let row = make_row(&[("s", "<http://example.org/Alice>")]);
let result = builder.build(&[row]);
assert!(result.is_err());
assert!(
matches!(result.unwrap_err(), ConstructError::UnboundVariable { name } if name == "o")
);
}
#[test]
fn test_blank_node_unique_per_row() {
let template = vec![TemplateTriple::new(
RdfTerm::blank("b"),
RdfTerm::iri("http://example.org/value"),
RdfTerm::var("v"),
)];
let builder = ConstructBuilder::new(template);
let rows = vec![make_row(&[("v", "\"1\"")]), make_row(&[("v", "\"2\"")])];
let (triples, stats) = builder.build(&rows).expect("build");
assert_eq!(triples.len(), 2);
assert_ne!(triples[0].subject, triples[1].subject);
assert_eq!(stats.blank_nodes_generated, 2);
}
#[test]
fn test_blank_node_shared_within_row() {
let template = vec![
TemplateTriple::new(
RdfTerm::blank("b"),
RdfTerm::iri("http://example.org/type"),
RdfTerm::iri("http://example.org/Thing"),
),
TemplateTriple::new(
RdfTerm::blank("b"),
RdfTerm::iri("http://example.org/name"),
RdfTerm::string_literal("test"),
),
];
let builder = ConstructBuilder::new(template);
let row = SolutionRow::new();
let (triples, stats) = builder.build(&[row]).expect("build");
assert_eq!(triples.len(), 2);
assert_eq!(triples[0].subject, triples[1].subject);
assert_eq!(stats.blank_nodes_generated, 1);
}
#[test]
fn test_duplicate_triple_elimination() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::iri("http://example.org/o"),
)];
let builder = ConstructBuilder::new(template);
let rows = vec![SolutionRow::new(), SolutionRow::new(), SolutionRow::new()];
let (triples, stats) = builder.build(&rows).expect("build");
assert_eq!(triples.len(), 1);
assert_eq!(stats.duplicates_eliminated, 2);
assert_eq!(stats.raw_triple_count, 3);
}
#[test]
fn test_no_duplicates_when_all_distinct() {
let template = vec![TemplateTriple::new(
RdfTerm::var("s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::iri("http://example.org/o"),
)];
let builder = ConstructBuilder::new(template);
let rows = vec![
make_row(&[("s", "<http://example.org/A>")]),
make_row(&[("s", "<http://example.org/B>")]),
];
let (triples, stats) = builder.build(&rows).expect("build");
assert_eq!(triples.len(), 2);
assert_eq!(stats.duplicates_eliminated, 0);
}
#[test]
fn test_construct_where_shorthand() {
let pattern = vec![TemplateTriple::new(
RdfTerm::var("x"),
RdfTerm::iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
RdfTerm::var("t"),
)];
let builder = ConstructBuilder::from_where_shorthand(pattern.clone());
assert_eq!(builder.template().len(), pattern.len());
}
#[test]
fn test_string_literal_binding() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/name"),
RdfTerm::string_literal("Alice"),
)];
let builder = ConstructBuilder::new(template);
let (triples, _stats) = builder.build(&[SolutionRow::new()]).expect("build");
assert_eq!(triples[0].object, "\"Alice\"");
}
#[test]
fn test_integer_literal_binding() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/age"),
RdfTerm::integer(42),
)];
let builder = ConstructBuilder::new(template);
let (triples, _stats) = builder.build(&[SolutionRow::new()]).expect("build");
assert!(triples[0].object.contains("42"));
assert!(triples[0].object.contains("integer"));
}
#[test]
fn test_boolean_literal_binding() {
let term = RdfTerm::boolean(true);
if let RdfTerm::Literal {
value, datatype, ..
} = &term
{
assert_eq!(value, "true");
assert!(datatype.as_deref().unwrap_or("").contains("boolean"));
} else {
panic!("expected Literal");
}
}
#[test]
#[allow(clippy::approx_constant)]
fn test_double_literal_binding() {
let term = RdfTerm::double(3.14);
if let RdfTerm::Literal {
value, datatype, ..
} = &term
{
assert!(value.contains("3.14"));
assert!(datatype.as_deref().unwrap_or("").contains("double"));
} else {
panic!("expected Literal");
}
}
#[test]
fn test_lang_tagged_literal() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/label"),
RdfTerm::lang_literal("Hallo", "de"),
)];
let builder = ConstructBuilder::new(template);
let (triples, _) = builder.build(&[SolutionRow::new()]).expect("build");
assert_eq!(triples[0].object, "\"Hallo\"@de");
}
#[test]
fn test_variable_present_in_all() {
let rows = vec![
make_row(&[("x", "1"), ("y", "2")]),
make_row(&[("x", "3"), ("y", "4")]),
];
assert!(ConstructBuilder::variable_present_in_all("x", &rows));
assert!(ConstructBuilder::variable_present_in_all("y", &rows));
assert!(!ConstructBuilder::variable_present_in_all("z", &rows));
}
#[test]
fn test_variable_present_in_any() {
let rows = vec![make_row(&[("x", "1")]), make_row(&[("y", "2")])];
assert!(ConstructBuilder::variable_present_in_any("x", &rows));
assert!(ConstructBuilder::variable_present_in_any("y", &rows));
assert!(!ConstructBuilder::variable_present_in_any("z", &rows));
}
#[test]
fn test_variable_present_partial_binding() {
let rows = vec![
make_row(&[("x", "1"), ("y", "2")]),
make_row(&[("x", "3")]), ];
assert!(!ConstructBuilder::variable_present_in_all("y", &rows));
assert!(ConstructBuilder::variable_present_in_any("y", &rows));
}
#[test]
fn test_construct_stats_populated() {
let template = vec![
TemplateTriple::new(
RdfTerm::var("s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::var("o"),
),
TemplateTriple::new(
RdfTerm::var("s"),
RdfTerm::iri("http://example.org/q"),
RdfTerm::var("missing"),
),
];
let builder = ConstructBuilder::new(template);
let rows = vec![
make_row(&[
("s", "<http://example.org/A>"),
("o", "<http://example.org/B>"),
]),
make_row(&[
("s", "<http://example.org/A>"),
("o", "<http://example.org/B>"),
]),
];
let (triples, stats) = builder.build(&rows).expect("build");
assert_eq!(stats.rows_processed, 2);
assert_eq!(stats.raw_triple_count, 2);
assert_eq!(stats.skipped_unbound, 2);
assert_eq!(triples.len(), 1);
assert_eq!(stats.duplicates_eliminated, 1);
}
#[test]
fn test_empty_solution_set() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::iri("http://example.org/o"),
)];
let builder = ConstructBuilder::new(template);
let (triples, stats) = builder.build(&[]).expect("build");
assert_eq!(triples.len(), 0);
assert_eq!(stats.rows_processed, 0);
}
#[test]
fn test_blank_node_in_predicate_rejected() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::blank("b"),
RdfTerm::iri("http://example.org/o"),
)];
let builder = ConstructBuilder::new(template);
let result = builder.build(&[SolutionRow::new()]);
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ConstructError::BlankNodeInPredicate { .. }
));
}
#[test]
fn test_multiple_variables_multiple_rows() {
let template = vec![
TemplateTriple::new(
RdfTerm::var("person"),
RdfTerm::iri("http://schema.org/name"),
RdfTerm::var("name"),
),
TemplateTriple::new(
RdfTerm::var("person"),
RdfTerm::iri("http://schema.org/age"),
RdfTerm::var("age"),
),
];
let builder = ConstructBuilder::new(template);
let rows = vec![
make_row(&[
("person", "<http://example.org/Alice>"),
("name", "\"Alice\""),
("age", "\"30\"^^<http://www.w3.org/2001/XMLSchema#integer>"),
]),
make_row(&[
("person", "<http://example.org/Bob>"),
("name", "\"Bob\""),
("age", "\"25\"^^<http://www.w3.org/2001/XMLSchema#integer>"),
]),
];
let (triples, stats) = builder.build(&rows).expect("build");
assert_eq!(triples.len(), 4);
assert_eq!(stats.rows_processed, 2);
assert_eq!(stats.duplicates_eliminated, 0);
}
#[test]
fn test_typed_literal_serialisation() {
let term = RdfTerm::typed_literal("2024-01-01", "http://www.w3.org/2001/XMLSchema#date");
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/date"),
term,
)];
let builder = ConstructBuilder::new(template);
let (triples, _) = builder.build(&[SolutionRow::new()]).expect("build");
assert_eq!(
triples[0].object,
"\"2024-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>"
);
}
#[test]
fn test_solution_row_is_bound() {
let mut row = SolutionRow::new();
row.bind("x", "value");
assert!(row.is_bound("x"));
assert!(!row.is_bound("y"));
}
#[test]
fn test_solution_row_bound_vars_iteration() {
let row = make_row(&[("a", "1"), ("b", "2"), ("c", "3")]);
let vars: Vec<&str> = row.bound_vars().collect();
assert_eq!(vars.len(), 3);
}
#[test]
fn test_large_result_set_deduplication() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::iri("http://example.org/o"),
)];
let builder = ConstructBuilder::new(template);
let rows: Vec<SolutionRow> = (0..100).map(|_| SolutionRow::new()).collect();
let (triples, stats) = builder.build(&rows).expect("build");
assert_eq!(triples.len(), 1);
assert_eq!(stats.duplicates_eliminated, 99);
}
#[test]
fn test_rdf_term_is_variable() {
assert!(RdfTerm::var("x").is_variable());
assert!(!RdfTerm::iri("http://example.org/x").is_variable());
}
#[test]
fn test_rdf_term_is_blank_node() {
assert!(RdfTerm::blank("b0").is_blank_node());
assert!(!RdfTerm::var("x").is_blank_node());
}
#[test]
fn test_rdf_term_variable_name() {
assert_eq!(RdfTerm::var("foo").variable_name(), Some("foo"));
assert_eq!(RdfTerm::iri("http://example.org/").variable_name(), None);
}
#[test]
fn test_iri_term_serialisation() {
let template = vec![TemplateTriple::new(
RdfTerm::iri("http://example.org/s"),
RdfTerm::iri("http://example.org/p"),
RdfTerm::iri("http://example.org/o"),
)];
let builder = ConstructBuilder::new(template);
let (triples, _) = builder.build(&[SolutionRow::new()]).expect("build");
assert!(triples[0].subject.starts_with('<'));
assert!(triples[0].subject.ends_with('>'));
}
#[test]
fn test_ground_triple_equality() {
let t1 = GroundTriple {
subject: "s".into(),
predicate: "p".into(),
object: "o".into(),
};
let t2 = GroundTriple {
subject: "s".into(),
predicate: "p".into(),
object: "o".into(),
};
assert_eq!(t1, t2);
}
#[test]
fn test_ground_triple_inequality() {
let t1 = GroundTriple {
subject: "s1".into(),
predicate: "p".into(),
object: "o".into(),
};
let t2 = GroundTriple {
subject: "s2".into(),
predicate: "p".into(),
object: "o".into(),
};
assert_ne!(t1, t2);
}
#[test]
fn test_blank_node_allocator_monotonic() {
let mut alloc = BlankNodeAllocator::new();
let a = alloc.fresh(0, "b");
let b = alloc.fresh(0, "b");
assert_ne!(a, b);
}
#[test]
fn test_blank_node_allocator_different_rows() {
let mut alloc = BlankNodeAllocator::new();
let r0 = alloc.fresh(0, "same");
let r1 = alloc.fresh(1, "same");
assert_ne!(r0, r1);
}
#[test]
fn test_template_triple_fields() {
let t = TemplateTriple::new(
RdfTerm::iri("http://s"),
RdfTerm::iri("http://p"),
RdfTerm::iri("http://o"),
);
assert!(matches!(t.subject, RdfTerm::Iri(_)));
assert!(matches!(t.predicate, RdfTerm::Iri(_)));
assert!(matches!(t.object, RdfTerm::Iri(_)));
}
#[test]
fn test_stats_blank_nodes_counted_across_rows() {
let template = vec![TemplateTriple::new(
RdfTerm::blank("b"),
RdfTerm::iri("http://p"),
RdfTerm::iri("http://o"),
)];
let builder = ConstructBuilder::new(template);
let rows = vec![SolutionRow::new(), SolutionRow::new(), SolutionRow::new()];
let (_, stats) = builder.build(&rows).expect("build");
assert_eq!(stats.blank_nodes_generated, 3);
}
#[test]
fn test_template_clone() {
let t = TemplateTriple::new(RdfTerm::var("x"), RdfTerm::iri("p"), RdfTerm::var("y"));
let t2 = t.clone();
assert_eq!(t, t2);
}
#[test]
fn test_template_accessor() {
let template = vec![
TemplateTriple::new(RdfTerm::var("a"), RdfTerm::iri("b"), RdfTerm::var("c")),
TemplateTriple::new(RdfTerm::iri("x"), RdfTerm::iri("y"), RdfTerm::iri("z")),
];
let builder = ConstructBuilder::new(template);
assert_eq!(builder.template().len(), 2);
}
#[test]
fn test_skip_unbound_chained() {
let builder = ConstructBuilder::new(vec![]).skip_unbound(false);
let (triples, _) = builder.build(&[]).expect("build empty");
assert!(triples.is_empty());
}
}