use serde::{Deserialize, Serialize};
use std::fmt;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum IrOp {
TriplePattern {
subject: IrTerm,
predicate: IrTerm,
object: IrTerm,
},
QuotedTriplePattern {
inner: Box<IrOp>,
position: QuotePosition,
},
Filter { condition: IrExpr },
Join {
left: Box<IrOp>,
right: Box<IrOp>,
join_type: JoinType,
},
Union { left: Box<IrOp>, right: Box<IrOp> },
Project { vars: Vec<String>, child: Box<IrOp> },
Distinct { child: Box<IrOp> },
Slice {
child: Box<IrOp>,
limit: Option<usize>,
offset: usize,
},
Order {
child: Box<IrOp>,
conditions: Vec<OrderCondition>,
},
IndexScan {
index_type: IndexType,
keys: Vec<IrTerm>,
},
SeqScan,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QuotePosition {
Subject,
Object,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum JoinType {
Inner,
Left,
Optional,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum IndexType {
SPO, POS, OSP, }
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum IrTerm {
Variable(String),
Iri(String),
Literal {
value: String,
datatype: Option<String>,
language: Option<String>,
},
BlankNode(String),
QuotedTriple(Box<(IrTerm, IrTerm, IrTerm)>),
Any,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum IrExpr {
Var(String),
Const(IrValue),
BinOp {
op: BinOp,
left: Box<IrExpr>,
right: Box<IrExpr>,
},
UnaryOp { op: UnaryOp, operand: Box<IrExpr> },
FunctionCall { name: String, args: Vec<IrExpr> },
Exists { pattern: Box<IrOp>, negated: bool },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BinOp {
Eq,
Ne,
Lt,
Le,
Gt,
Ge,
And,
Or,
Add,
Sub,
Mul,
Div,
Concat,
Regex,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum UnaryOp {
Not,
Neg,
IsIri,
IsBlank,
IsLiteral,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum IrValue {
Bool(bool),
Int(i64),
Float(f64),
String(String),
Null,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum OrderCondition {
Asc(String), Desc(String), }
#[derive(Debug, Clone)]
pub struct IrQueryPlan {
pub root: IrOp,
pub estimated_cost: f64,
pub parallel_hints: ParallelHints,
pub memory_hints: MemoryHints,
}
#[derive(Debug, Clone, Default)]
pub struct ParallelHints {
pub can_parallelize_scans: bool,
pub can_parallelize_joins: bool,
pub suggested_parallelism: Option<usize>,
}
#[derive(Debug, Clone, Default)]
pub struct MemoryHints {
pub estimated_results: usize,
pub uses_index: bool,
pub sequential_access: bool,
}
impl IrQueryPlan {
pub fn new(root: IrOp) -> Self {
Self {
root,
estimated_cost: 0.0,
parallel_hints: ParallelHints::default(),
memory_hints: MemoryHints::default(),
}
}
pub fn estimate_cost(&mut self) {
self.estimated_cost = Self::cost_recursive(&self.root);
}
fn cost_recursive(op: &IrOp) -> f64 {
match op {
IrOp::TriplePattern { .. } => 10.0,
IrOp::QuotedTriplePattern { .. } => 15.0,
IrOp::Filter { .. } => 5.0,
IrOp::Join { left, right, .. } => {
50.0 + Self::cost_recursive(left) + Self::cost_recursive(right)
}
IrOp::Union { left, right } => Self::cost_recursive(left) + Self::cost_recursive(right),
IrOp::Project { child, .. } => 2.0 + Self::cost_recursive(child),
IrOp::Distinct { child } => 20.0 + Self::cost_recursive(child),
IrOp::Slice { child, .. } => Self::cost_recursive(child),
IrOp::Order { child, .. } => 30.0 + Self::cost_recursive(child),
IrOp::IndexScan { .. } => 5.0,
IrOp::SeqScan => 100.0,
}
}
pub fn analyze_parallelism(&mut self) {
self.parallel_hints = Self::analyze_parallel_recursive(&self.root);
}
fn analyze_parallel_recursive(op: &IrOp) -> ParallelHints {
match op {
IrOp::TriplePattern { .. } | IrOp::IndexScan { .. } => ParallelHints {
can_parallelize_scans: true,
can_parallelize_joins: false,
suggested_parallelism: Some(num_cpus::get()),
},
IrOp::Join {
left,
right,
join_type,
} => {
let left_hints = Self::analyze_parallel_recursive(left);
let right_hints = Self::analyze_parallel_recursive(right);
ParallelHints {
can_parallelize_scans: left_hints.can_parallelize_scans
&& right_hints.can_parallelize_scans,
can_parallelize_joins: *join_type == JoinType::Inner,
suggested_parallelism: Some(num_cpus::get()),
}
}
_ => ParallelHints::default(),
}
}
}
impl fmt::Display for IrOp {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IrOp::TriplePattern {
subject,
predicate,
object,
} => write!(f, "({:?} {:?} {:?})", subject, predicate, object),
IrOp::QuotedTriplePattern { inner, position } => {
write!(f, "<<{:?}>> at {:?}", inner, position)
}
IrOp::Filter { condition } => write!(f, "FILTER({:?})", condition),
IrOp::Join { join_type, .. } => write!(f, "JOIN({:?})", join_type),
IrOp::Union { .. } => write!(f, "UNION"),
IrOp::Project { vars, .. } => write!(f, "PROJECT({:?})", vars),
IrOp::Distinct { .. } => write!(f, "DISTINCT"),
IrOp::IndexScan { index_type, .. } => write!(f, "INDEX_SCAN({:?})", index_type),
_ => write!(f, "{:?}", self),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ir_term_creation() {
let var = IrTerm::Variable("s".to_string());
assert!(matches!(var, IrTerm::Variable(_)));
let iri = IrTerm::Iri("http://example.org/foo".to_string());
assert!(matches!(iri, IrTerm::Iri(_)));
}
#[test]
fn test_triple_pattern_ir() {
let pattern = IrOp::TriplePattern {
subject: IrTerm::Variable("s".to_string()),
predicate: IrTerm::Variable("p".to_string()),
object: IrTerm::Variable("o".to_string()),
};
match pattern {
IrOp::TriplePattern { .. } => {
}
_ => panic!("Expected TriplePattern"),
}
}
#[test]
fn test_cost_estimation() {
let pattern = IrOp::TriplePattern {
subject: IrTerm::Variable("s".to_string()),
predicate: IrTerm::Variable("p".to_string()),
object: IrTerm::Variable("o".to_string()),
};
let mut plan = IrQueryPlan::new(pattern);
plan.estimate_cost();
assert_eq!(plan.estimated_cost, 10.0);
}
#[test]
fn test_join_cost_estimation() {
let left = IrOp::TriplePattern {
subject: IrTerm::Variable("s".to_string()),
predicate: IrTerm::Variable("p".to_string()),
object: IrTerm::Variable("o".to_string()),
};
let right = IrOp::TriplePattern {
subject: IrTerm::Variable("s".to_string()),
predicate: IrTerm::Iri("http://ex.org/name".to_string()),
object: IrTerm::Variable("name".to_string()),
};
let join = IrOp::Join {
left: Box::new(left),
right: Box::new(right),
join_type: JoinType::Inner,
};
let mut plan = IrQueryPlan::new(join);
plan.estimate_cost();
assert_eq!(plan.estimated_cost, 70.0); }
#[test]
fn test_parallelism_analysis() {
let pattern = IrOp::TriplePattern {
subject: IrTerm::Variable("s".to_string()),
predicate: IrTerm::Variable("p".to_string()),
object: IrTerm::Variable("o".to_string()),
};
let mut plan = IrQueryPlan::new(pattern);
plan.analyze_parallelism();
assert!(plan.parallel_hints.can_parallelize_scans);
assert!(plan.parallel_hints.suggested_parallelism.is_some());
}
#[test]
fn test_quoted_triple_ir() {
let inner = IrOp::TriplePattern {
subject: IrTerm::Variable("s".to_string()),
predicate: IrTerm::Variable("p".to_string()),
object: IrTerm::Variable("o".to_string()),
};
let quoted = IrOp::QuotedTriplePattern {
inner: Box::new(inner),
position: QuotePosition::Subject,
};
match quoted {
IrOp::QuotedTriplePattern { position, .. } => {
assert_eq!(position, QuotePosition::Subject);
}
_ => panic!("Expected QuotedTriplePattern"),
}
}
#[test]
fn test_ir_expression() {
let expr = IrExpr::BinOp {
op: BinOp::Eq,
left: Box::new(IrExpr::Var("x".to_string())),
right: Box::new(IrExpr::Const(IrValue::Int(42))),
};
match expr {
IrExpr::BinOp { op, .. } => assert_eq!(op, BinOp::Eq),
_ => panic!("Expected BinOp"),
}
}
}