use crate::model::*;
use crate::query::algebra::{Expression, TermPattern};
use crate::OxirsError;
use smallvec::SmallVec;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct BindingSet {
pub variables: SmallVec<[Variable; 8]>,
pub bindings: Vec<TermBinding>,
pub constraints: Vec<Constraint>,
var_index: HashMap<Variable, usize>,
}
#[derive(Debug, Clone)]
pub struct TermBinding {
pub variable: Variable,
pub term: Term,
pub metadata: BindingMetadata,
}
#[derive(Debug, Clone, Default)]
pub struct BindingMetadata {
pub source_pattern_id: usize,
pub confidence: f64,
pub is_fixed: bool,
}
#[derive(Debug, Clone)]
pub enum Constraint {
TypeConstraint {
variable: Variable,
allowed_types: HashSet<TermType>,
},
ValueConstraint {
variable: Variable,
constraint: ValueConstraintType,
},
RelationshipConstraint {
left: Variable,
right: Variable,
relation: RelationType,
},
FilterConstraint { expression: Expression },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TermType {
NamedNode,
BlankNode,
Literal,
NumericLiteral,
StringLiteral,
BooleanLiteral,
DateTimeLiteral,
}
#[derive(Debug, Clone)]
pub enum ValueConstraintType {
NumericRange { min: f64, max: f64 },
StringPattern(regex::Regex),
OneOf(HashSet<Term>),
NoneOf(HashSet<Term>),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RelationType {
Equal,
NotEqual,
LessThan,
LessThanOrEqual,
GreaterThan,
GreaterThanOrEqual,
}
impl BindingSet {
pub fn new() -> Self {
Self {
variables: SmallVec::new(),
bindings: Vec::new(),
constraints: Vec::new(),
var_index: HashMap::new(),
}
}
pub fn with_variables(vars: Vec<Variable>) -> Self {
let mut var_index = HashMap::new();
for (idx, var) in vars.iter().enumerate() {
var_index.insert(var.clone(), idx);
}
Self {
variables: vars.into(),
bindings: Vec::new(),
constraints: Vec::new(),
var_index,
}
}
pub fn add_variable(&mut self, var: Variable) -> usize {
if let Some(&idx) = self.var_index.get(&var) {
idx
} else {
let idx = self.variables.len();
self.variables.push(var.clone());
self.var_index.insert(var, idx);
idx
}
}
pub fn bind(
&mut self,
var: Variable,
term: Term,
metadata: BindingMetadata,
) -> Result<(), OxirsError> {
if !self.var_index.contains_key(&var) {
self.add_variable(var.clone());
}
self.validate_binding(&var, &term)?;
self.bindings.retain(|b| b.variable != var);
self.bindings.push(TermBinding {
variable: var,
term,
metadata,
});
Ok(())
}
pub fn get(&self, var: &Variable) -> Option<&Term> {
self.bindings
.iter()
.find(|b| &b.variable == var)
.map(|b| &b.term)
}
pub fn is_bound(&self, var: &Variable) -> bool {
self.bindings.iter().any(|b| &b.variable == var)
}
pub fn unbound_variables(&self) -> Vec<&Variable> {
self.variables
.iter()
.filter(|v| !self.is_bound(v))
.collect()
}
pub fn add_constraint(&mut self, constraint: Constraint) {
self.constraints.push(constraint);
}
fn validate_binding(&self, var: &Variable, term: &Term) -> Result<(), OxirsError> {
for constraint in &self.constraints {
match constraint {
Constraint::TypeConstraint {
variable,
allowed_types,
} => {
if variable == var && !self.check_type_constraint(term, allowed_types) {
return Err(OxirsError::Query(format!(
"Type constraint violation for variable {var}"
)));
}
}
Constraint::ValueConstraint {
variable,
constraint,
} => {
if variable == var && !self.check_value_constraint(term, constraint) {
return Err(OxirsError::Query(format!(
"Value constraint violation for variable {var}"
)));
}
}
_ => {} }
}
Ok(())
}
fn check_type_constraint(&self, term: &Term, allowed_types: &HashSet<TermType>) -> bool {
let term_type = match term {
Term::NamedNode(_) => TermType::NamedNode,
Term::BlankNode(_) => TermType::BlankNode,
Term::Literal(lit) => {
let datatype = lit.datatype();
let datatype_str = datatype.as_str();
if datatype_str == "http://www.w3.org/2001/XMLSchema#integer"
|| datatype_str == "http://www.w3.org/2001/XMLSchema#decimal"
|| datatype_str == "http://www.w3.org/2001/XMLSchema#double"
{
TermType::NumericLiteral
} else if datatype_str == "http://www.w3.org/2001/XMLSchema#boolean" {
TermType::BooleanLiteral
} else if datatype_str == "http://www.w3.org/2001/XMLSchema#dateTime" {
TermType::DateTimeLiteral
} else if datatype_str == "http://www.w3.org/2001/XMLSchema#string"
|| datatype_str == "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
{
TermType::StringLiteral
} else {
TermType::Literal
}
}
_ => return false, };
if allowed_types.contains(&term_type) {
return true;
}
match term_type {
TermType::NumericLiteral
| TermType::StringLiteral
| TermType::BooleanLiteral
| TermType::DateTimeLiteral => allowed_types.contains(&TermType::Literal),
_ => false,
}
}
fn check_value_constraint(&self, term: &Term, constraint: &ValueConstraintType) -> bool {
match constraint {
ValueConstraintType::NumericRange { min, max } => {
if let Term::Literal(lit) = term {
if let Ok(value) = lit.value().parse::<f64>() {
return value >= *min && value <= *max;
}
}
false
}
ValueConstraintType::StringPattern(regex) => {
if let Term::Literal(lit) = term {
return regex.is_match(lit.value());
}
false
}
ValueConstraintType::OneOf(allowed) => allowed.contains(term),
ValueConstraintType::NoneOf(forbidden) => !forbidden.contains(term),
}
}
pub fn to_map(&self) -> HashMap<Variable, Term> {
self.bindings
.iter()
.map(|b| (b.variable.clone(), b.term.clone()))
.collect()
}
pub fn merge(&mut self, other: &BindingSet) -> Result<(), OxirsError> {
for var in &other.variables {
self.add_variable(var.clone());
}
for binding in &other.bindings {
if let Some(existing) = self.get(&binding.variable) {
if existing != &binding.term {
return Err(OxirsError::Query(format!(
"Conflicting bindings for variable {}",
binding.variable
)));
}
} else {
self.bindings.push(binding.clone());
}
}
self.constraints.extend(other.constraints.clone());
Ok(())
}
pub fn apply_to_pattern(&self, pattern: &TermPattern) -> TermPattern {
match pattern {
TermPattern::Variable(var) => {
if let Some(term) = self.get(var) {
match term {
Term::NamedNode(n) => TermPattern::NamedNode(n.clone()),
Term::BlankNode(b) => TermPattern::BlankNode(b.clone()),
Term::Literal(l) => TermPattern::Literal(l.clone()),
_ => pattern.clone(), }
} else {
pattern.clone()
}
}
_ => pattern.clone(),
}
}
}
pub struct BindingOptimizer {
binding_cache: HashMap<String, Arc<BindingSet>>,
stats: BindingStats,
}
#[derive(Debug, Default)]
struct BindingStats {
bindings_created: usize,
cache_hits: usize,
cache_misses: usize,
constraint_violations: usize,
}
impl BindingOptimizer {
pub fn new() -> Self {
Self {
binding_cache: HashMap::new(),
stats: BindingStats::default(),
}
}
pub fn optimize_bindings(
&mut self,
variables: Vec<Variable>,
constraints: Vec<Constraint>,
) -> Arc<BindingSet> {
let cache_key = self.create_cache_key(&variables, &constraints);
if let Some(cached) = self.binding_cache.get(&cache_key) {
self.stats.cache_hits += 1;
return Arc::clone(cached);
}
self.stats.cache_misses += 1;
let mut binding_set = BindingSet::with_variables(variables);
for constraint in constraints {
binding_set.add_constraint(constraint);
}
self.propagate_constraints(&mut binding_set);
let arc_set = Arc::new(binding_set);
self.binding_cache.insert(cache_key, Arc::clone(&arc_set));
arc_set
}
fn create_cache_key(&self, variables: &[Variable], constraints: &[Constraint]) -> String {
let mut key = String::new();
for var in variables {
key.push_str(var.as_str());
key.push(',');
}
key.push('|');
key.push_str(&format!("{}", constraints.len()));
key
}
fn propagate_constraints(&self, binding_set: &mut BindingSet) {
let mut constraint_graph: HashMap<Variable, Vec<usize>> = HashMap::new();
for (idx, constraint) in binding_set.constraints.iter().enumerate() {
match constraint {
Constraint::TypeConstraint { variable, .. }
| Constraint::ValueConstraint { variable, .. } => {
constraint_graph
.entry(variable.clone())
.or_default()
.push(idx);
}
Constraint::RelationshipConstraint { left, right, .. } => {
constraint_graph.entry(left.clone()).or_default().push(idx);
constraint_graph.entry(right.clone()).or_default().push(idx);
}
_ => {}
}
}
self.propagate_equality_constraints(binding_set, constraint_graph);
}
fn propagate_equality_constraints(
&self,
binding_set: &mut BindingSet,
constraint_graph: HashMap<Variable, Vec<usize>>,
) {
let mut equiv_classes: HashMap<Variable, Variable> = HashMap::new();
for constraint in &binding_set.constraints {
if let Constraint::RelationshipConstraint {
left,
right,
relation: RelationType::Equal,
} = constraint
{
let left_root = self.find_root(left, &equiv_classes);
let right_root = self.find_root(right, &equiv_classes);
if left_root != right_root {
equiv_classes.insert(left_root, right_root.clone());
}
}
}
for (var, root) in &equiv_classes {
if var != root {
if let Some(root_constraints) = constraint_graph.get(root) {
for &_constraint in root_constraints {
}
}
}
}
}
fn find_root<'a>(
&self,
var: &'a Variable,
equiv_classes: &'a HashMap<Variable, Variable>,
) -> Variable {
let mut current = var.clone();
while let Some(parent) = equiv_classes.get(¤t) {
if parent == ¤t {
break;
}
current = parent.clone();
}
current
}
pub fn stats(&self) -> String {
format!(
"Bindings created: {}, Cache hits: {}, Cache misses: {}, Violations: {}",
self.stats.bindings_created,
self.stats.cache_hits,
self.stats.cache_misses,
self.stats.constraint_violations
)
}
}
pub struct BindingIterator {
base_bindings: Vec<HashMap<Variable, Term>>,
variables: Vec<Variable>,
possible_values: HashMap<Variable, Vec<Term>>,
current_position: Vec<usize>,
constraints: Vec<Constraint>,
}
impl BindingIterator {
pub fn new(
base_bindings: Vec<HashMap<Variable, Term>>,
variables: Vec<Variable>,
possible_values: HashMap<Variable, Vec<Term>>,
constraints: Vec<Constraint>,
) -> Self {
let current_position = vec![0; variables.len()];
Self {
base_bindings,
variables,
possible_values,
current_position,
constraints,
}
}
pub fn next_valid(&mut self) -> Option<HashMap<Variable, Term>> {
while let Some(binding) = self.next_combination() {
if self.validate_binding(&binding) {
return Some(binding);
}
}
None
}
fn next_combination(&mut self) -> Option<HashMap<Variable, Term>> {
if self.base_bindings.is_empty() {
return None;
}
if self.current_position.iter().all(|&p| p == 0) && !self.current_position.is_empty() {
} else if self.current_position.is_empty() {
return None;
}
let mut result = self.base_bindings[0].clone();
for (i, var) in self.variables.iter().enumerate() {
if let Some(values) = self.possible_values.get(var) {
if self.current_position[i] < values.len() {
result.insert(var.clone(), values[self.current_position[i]].clone());
}
}
}
self.increment_position();
Some(result)
}
fn increment_position(&mut self) {
for i in (0..self.current_position.len()).rev() {
if let Some(values) = self.possible_values.get(&self.variables[i]) {
if self.current_position[i] + 1 < values.len() {
self.current_position[i] += 1;
return;
} else {
self.current_position[i] = 0;
}
}
}
self.current_position.clear();
}
fn validate_binding(&self, binding: &HashMap<Variable, Term>) -> bool {
for constraint in &self.constraints {
if let Constraint::RelationshipConstraint {
left,
right,
relation,
} = constraint
{
if let (Some(left_val), Some(right_val)) = (binding.get(left), binding.get(right)) {
if !self.check_relation(left_val, right_val, *relation) {
return false;
}
}
}
}
true
}
fn check_relation(&self, left: &Term, right: &Term, relation: RelationType) -> bool {
match relation {
RelationType::Equal => left == right,
RelationType::NotEqual => left != right,
_ => {
if let (Term::Literal(l_lit), Term::Literal(r_lit)) = (left, right) {
if let (Ok(l_val), Ok(r_val)) =
(l_lit.value().parse::<f64>(), r_lit.value().parse::<f64>())
{
match relation {
RelationType::LessThan => l_val < r_val,
RelationType::LessThanOrEqual => l_val <= r_val,
RelationType::GreaterThan => l_val > r_val,
RelationType::GreaterThanOrEqual => l_val >= r_val,
_ => false,
}
} else {
false
}
} else {
false
}
}
}
}
}
impl Default for BindingSet {
fn default() -> Self {
Self::new()
}
}
impl Default for BindingOptimizer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_binding_set_basic() {
let mut bindings = BindingSet::new();
let var_x = Variable::new("x").expect("valid variable name");
let var_y = Variable::new("y").expect("valid variable name");
bindings.add_variable(var_x.clone());
bindings.add_variable(var_y.clone());
let term = Term::NamedNode(NamedNode::new("http://example.org/alice").expect("valid IRI"));
bindings
.bind(var_x.clone(), term.clone(), BindingMetadata::default())
.expect("operation should succeed");
assert_eq!(bindings.get(&var_x), Some(&term));
assert_eq!(bindings.get(&var_y), None);
let unbound = bindings.unbound_variables();
assert_eq!(unbound.len(), 1);
assert_eq!(unbound[0], &var_y);
}
#[test]
fn test_type_constraints() {
let mut bindings = BindingSet::new();
let var = Variable::new("x").expect("valid variable name");
bindings.add_constraint(Constraint::TypeConstraint {
variable: var.clone(),
allowed_types: vec![TermType::Literal, TermType::NumericLiteral]
.into_iter()
.collect(),
});
let named_node =
Term::NamedNode(NamedNode::new("http://example.org/thing").expect("valid IRI"));
assert!(bindings
.bind(var.clone(), named_node, BindingMetadata::default())
.is_err());
let literal = Term::Literal(Literal::new("test"));
assert!(bindings
.bind(var.clone(), literal, BindingMetadata::default())
.is_ok());
}
#[test]
fn test_value_constraints() {
let mut bindings = BindingSet::new();
let var = Variable::new("age").expect("valid variable name");
bindings.add_constraint(Constraint::ValueConstraint {
variable: var.clone(),
constraint: ValueConstraintType::NumericRange {
min: 0.0,
max: 150.0,
},
});
let valid_age = Term::Literal(Literal::new("25"));
assert!(bindings
.bind(var.clone(), valid_age, BindingMetadata::default())
.is_ok());
let invalid_age = Term::Literal(Literal::new("200"));
assert!(bindings
.bind(var.clone(), invalid_age, BindingMetadata::default())
.is_err());
}
#[test]
fn test_binding_merge() {
let mut bindings1 = BindingSet::new();
let mut bindings2 = BindingSet::new();
let var_x = Variable::new("x").expect("valid variable name");
let var_y = Variable::new("y").expect("valid variable name");
let term_x = Term::NamedNode(NamedNode::new("http://example.org/x").expect("valid IRI"));
let term_y = Term::NamedNode(NamedNode::new("http://example.org/y").expect("valid IRI"));
bindings1
.bind(var_x.clone(), term_x.clone(), BindingMetadata::default())
.expect("operation should succeed");
bindings2
.bind(var_y.clone(), term_y.clone(), BindingMetadata::default())
.expect("operation should succeed");
bindings1.merge(&bindings2).expect("merge should succeed");
assert_eq!(bindings1.get(&var_x), Some(&term_x));
assert_eq!(bindings1.get(&var_y), Some(&term_y));
}
#[test]
fn test_binding_optimizer() {
let mut optimizer = BindingOptimizer::new();
let vars = vec![
Variable::new("x").expect("valid variable name"),
Variable::new("y").expect("valid variable name"),
];
let constraints = vec![];
let _bindings1 = optimizer.optimize_bindings(vars.clone(), constraints.clone());
let _bindings2 = optimizer.optimize_bindings(vars, constraints);
let stats = optimizer.stats();
assert!(stats.contains("Cache hits: 1"));
}
}