1use oxirs_core::model::{
7 BlankNode as CoreBlankNode, Literal as CoreLiteral, NamedNode, Object, Predicate, Subject,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fmt;
12
13pub use oxirs_core::model::Variable;
15
16pub type Iri = NamedNode;
18
19#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
21pub struct Literal {
22 pub value: String,
23 pub language: Option<String>,
24 pub datatype: Option<NamedNode>,
25}
26
27impl fmt::Display for Literal {
28 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29 write!(f, "\"{}\"", self.value)?;
30 if let Some(lang) = &self.language {
31 write!(f, "@{lang}")?;
32 } else if let Some(dt) = &self.datatype {
33 write!(f, "^^{dt}")?;
34 }
35 Ok(())
36 }
37}
38
39impl Literal {
40 pub fn with_language(value: String, language: String) -> Self {
42 Self {
43 value,
44 language: Some(language),
45 datatype: None,
46 }
47 }
48}
49
50impl From<CoreLiteral> for Literal {
51 fn from(core_literal: CoreLiteral) -> Self {
52 let (value, datatype, language) = core_literal.destruct();
53 Self {
54 value,
55 language,
56 datatype,
57 }
58 }
59}
60
61impl From<Literal> for CoreLiteral {
62 fn from(literal: Literal) -> Self {
63 if let Some(lang) = literal.language {
64 CoreLiteral::new_language_tagged_literal(&literal.value, lang)
65 .unwrap_or_else(|_| CoreLiteral::new_simple_literal(literal.value))
66 } else if let Some(datatype) = literal.datatype {
67 CoreLiteral::new_typed_literal(literal.value, datatype)
68 } else {
69 CoreLiteral::new_simple_literal(literal.value)
70 }
71 }
72}
73
74#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
76pub enum Term {
77 Variable(Variable),
78 Iri(NamedNode),
79 Literal(Literal),
80 BlankNode(String),
81 QuotedTriple(Box<TriplePattern>),
82 PropertyPath(PropertyPath),
83}
84
85impl fmt::Display for Term {
86 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87 match self {
88 Term::Variable(v) => write!(f, "?{v}"),
89 Term::Iri(iri) => write!(f, "{iri}"),
90 Term::Literal(lit) => write!(f, "{lit}"),
91 Term::BlankNode(id) => write!(f, "_:{id}"),
92 Term::QuotedTriple(triple) => write!(
93 f,
94 "<<{} {} {}>>",
95 triple.subject, triple.predicate, triple.object
96 ),
97 Term::PropertyPath(path) => write!(f, "{path}"),
98 }
99 }
100}
101
102impl From<Subject> for Term {
103 fn from(subject: Subject) -> Self {
104 match subject {
105 Subject::NamedNode(n) => Term::Iri(n),
106 Subject::BlankNode(b) => Term::BlankNode(b.id().to_string()),
107 Subject::Variable(v) => Term::Variable(v),
108 Subject::QuotedTriple(quoted_triple) => {
109 Term::QuotedTriple(Box::new(TriplePattern {
111 subject: Term::from(quoted_triple.subject().clone()),
112 predicate: Term::from(quoted_triple.predicate().clone()),
113 object: Term::from(quoted_triple.object().clone()),
114 }))
115 }
116 }
117 }
118}
119
120impl From<Predicate> for Term {
121 fn from(predicate: Predicate) -> Self {
122 match predicate {
123 Predicate::NamedNode(n) => Term::Iri(n),
124 Predicate::Variable(v) => Term::Variable(v),
125 }
126 }
127}
128
129impl From<Object> for Term {
130 fn from(object: Object) -> Self {
131 match object {
132 Object::NamedNode(n) => Term::Iri(n),
133 Object::BlankNode(b) => Term::BlankNode(b.id().to_string()),
134 Object::Literal(l) => Term::Literal(l.into()),
135 Object::Variable(v) => Term::Variable(v),
136 Object::QuotedTriple(quoted_triple) => {
137 Term::QuotedTriple(Box::new(TriplePattern {
139 subject: Term::from(quoted_triple.subject().clone()),
140 predicate: Term::from(quoted_triple.predicate().clone()),
141 object: Term::from(quoted_triple.object().clone()),
142 }))
143 }
144 }
145 }
146}
147
148impl From<NamedNode> for Term {
149 fn from(node: NamedNode) -> Self {
150 Term::Iri(node)
151 }
152}
153
154impl From<CoreBlankNode> for Term {
155 fn from(node: CoreBlankNode) -> Self {
156 Term::BlankNode(node.id().to_string())
157 }
158}
159
160impl From<CoreLiteral> for Term {
161 fn from(literal: CoreLiteral) -> Self {
162 Term::Literal(literal.into())
163 }
164}
165
166impl From<Variable> for Term {
167 fn from(variable: Variable) -> Self {
168 Term::Variable(variable)
169 }
170}
171
172#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
174pub struct TriplePattern {
175 pub subject: Term,
176 pub predicate: Term,
177 pub object: Term,
178}
179
180impl fmt::Display for TriplePattern {
181 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
182 write!(f, "{} {} {}", self.subject, self.predicate, self.object)
183 }
184}
185
186#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
188pub enum Expression {
189 Variable(Variable),
191 Literal(Literal),
193 Iri(Iri),
195 Function { name: String, args: Vec<Expression> },
197 Binary {
199 op: BinaryOperator,
200 left: Box<Expression>,
201 right: Box<Expression>,
202 },
203 Unary {
205 op: UnaryOperator,
206 operand: Box<Expression>,
207 },
208 Conditional {
210 condition: Box<Expression>,
211 then_expr: Box<Expression>,
212 else_expr: Box<Expression>,
213 },
214 Bound(Variable),
216 Exists(Box<Algebra>),
218 NotExists(Box<Algebra>),
220}
221
222#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
224pub enum BinaryOperator {
225 Add,
226 Subtract,
227 Multiply,
228 Divide,
229 Equal,
230 NotEqual,
231 Less,
232 LessEqual,
233 Greater,
234 GreaterEqual,
235 And,
236 Or,
237 SameTerm,
238 In,
239 NotIn,
240}
241
242impl std::fmt::Display for BinaryOperator {
243 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
244 match self {
245 BinaryOperator::Add => write!(f, "+"),
246 BinaryOperator::Subtract => write!(f, "-"),
247 BinaryOperator::Multiply => write!(f, "*"),
248 BinaryOperator::Divide => write!(f, "/"),
249 BinaryOperator::Equal => write!(f, "="),
250 BinaryOperator::NotEqual => write!(f, "!="),
251 BinaryOperator::Less => write!(f, "<"),
252 BinaryOperator::LessEqual => write!(f, "<="),
253 BinaryOperator::Greater => write!(f, ">"),
254 BinaryOperator::GreaterEqual => write!(f, ">="),
255 BinaryOperator::And => write!(f, "&&"),
256 BinaryOperator::Or => write!(f, "||"),
257 BinaryOperator::SameTerm => write!(f, "sameTerm"),
258 BinaryOperator::In => write!(f, "IN"),
259 BinaryOperator::NotIn => write!(f, "NOT IN"),
260 }
261 }
262}
263
264#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
266pub enum UnaryOperator {
267 Not,
268 Plus,
269 Minus,
270 IsIri,
271 IsBlank,
272 IsLiteral,
273 IsNumeric,
274}
275
276#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
278pub enum Aggregate {
279 Count {
280 distinct: bool,
281 expr: Option<Expression>,
282 },
283 Sum {
284 distinct: bool,
285 expr: Expression,
286 },
287 Min {
288 distinct: bool,
289 expr: Expression,
290 },
291 Max {
292 distinct: bool,
293 expr: Expression,
294 },
295 Avg {
296 distinct: bool,
297 expr: Expression,
298 },
299 Sample {
300 distinct: bool,
301 expr: Expression,
302 },
303 GroupConcat {
304 distinct: bool,
305 expr: Expression,
306 separator: Option<String>,
307 },
308}
309
310pub type Binding = HashMap<Variable, Term>;
312
313pub type Solution = Vec<Binding>;
315
316#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
318pub struct OrderCondition {
319 pub expr: Expression,
320 pub ascending: bool,
321}
322
323#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
325pub struct GroupCondition {
326 pub expr: Expression,
327 pub alias: Option<Variable>,
328}
329
330#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
332pub enum PropertyPath {
333 Iri(Iri),
335 Variable(Variable),
337 Inverse(Box<PropertyPath>),
339 Sequence(Box<PropertyPath>, Box<PropertyPath>),
341 Alternative(Box<PropertyPath>, Box<PropertyPath>),
343 ZeroOrMore(Box<PropertyPath>),
345 OneOrMore(Box<PropertyPath>),
347 ZeroOrOne(Box<PropertyPath>),
349 NegatedPropertySet(Vec<PropertyPath>),
351}
352
353#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
355pub struct PropertyPathPattern {
356 pub subject: Term,
357 pub path: PropertyPath,
358 pub object: Term,
359}
360
361#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
363pub enum Algebra {
364 Bgp(Vec<TriplePattern>),
366
367 PropertyPath {
369 subject: Term,
370 path: PropertyPath,
371 object: Term,
372 },
373
374 Join {
376 left: Box<Algebra>,
377 right: Box<Algebra>,
378 },
379
380 LeftJoin {
382 left: Box<Algebra>,
383 right: Box<Algebra>,
384 filter: Option<Expression>,
385 },
386
387 Union {
389 left: Box<Algebra>,
390 right: Box<Algebra>,
391 },
392
393 Filter {
395 pattern: Box<Algebra>,
396 condition: Expression,
397 },
398
399 Extend {
401 pattern: Box<Algebra>,
402 variable: Variable,
403 expr: Expression,
404 },
405
406 Minus {
408 left: Box<Algebra>,
409 right: Box<Algebra>,
410 },
411
412 Service {
414 endpoint: Term,
415 pattern: Box<Algebra>,
416 silent: bool,
417 },
418
419 Graph { graph: Term, pattern: Box<Algebra> },
421
422 Project {
424 pattern: Box<Algebra>,
425 variables: Vec<Variable>,
426 },
427
428 Distinct { pattern: Box<Algebra> },
430
431 Reduced { pattern: Box<Algebra> },
433
434 Slice {
436 pattern: Box<Algebra>,
437 offset: Option<usize>,
438 limit: Option<usize>,
439 },
440
441 OrderBy {
443 pattern: Box<Algebra>,
444 conditions: Vec<OrderCondition>,
445 },
446
447 Group {
449 pattern: Box<Algebra>,
450 variables: Vec<GroupCondition>,
451 aggregates: Vec<(Variable, Aggregate)>,
452 },
453
454 Having {
456 pattern: Box<Algebra>,
457 condition: Expression,
458 },
459
460 Values {
462 variables: Vec<Variable>,
463 bindings: Vec<Binding>,
464 },
465
466 Table,
468
469 Zero,
471
472 Empty,
474}
475
476#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
478pub enum JoinAlgorithm {
479 #[default]
480 HashJoin,
481 SortMergeJoin,
482 NestedLoopJoin,
483 IndexNestedLoopJoin,
484 BindJoin,
485}
486
487#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
489pub enum FilterPlacement {
490 Early, Late, #[default]
493 Optimal, }
495
496#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
498pub struct ServiceCapabilities {
499 pub supports_projection: bool,
500 pub supports_filtering: bool,
501 pub supports_ordering: bool,
502 pub supports_aggregation: bool,
503 pub max_query_size: Option<usize>,
504}
505
506#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
508pub enum ProjectionType {
509 #[default]
510 Standard,
511 Streaming,
512 Cached,
513}
514
515#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
517pub enum SortAlgorithm {
518 #[default]
519 QuickSort,
520 MergeSort,
521 HeapSort,
522 ExternalSort,
523}
524
525#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
527pub enum GroupingAlgorithm {
528 #[default]
529 HashGrouping,
530 SortGrouping,
531 StreamingGrouping,
532}
533
534#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
536pub enum MaterializationStrategy {
537 InMemory,
538 Disk,
539 #[default]
540 Adaptive,
541}
542
543#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
545pub enum ParallelismType {
546 #[default]
547 DataParallel,
548 PipelineParallel,
549 Hybrid,
550}
551
552pub use crate::optimizer::index_types::IndexType;
554
555#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
557pub struct Statistics {
558 pub cardinality: u64,
560 pub selectivity: f64,
562 pub available_indexes: Vec<IndexType>,
564 pub cost: f64,
566 pub memory_estimate: u64,
568 pub io_estimate: u64,
570}
571
572#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
574pub struct OptimizationHints {
575 pub join_algorithm: Option<JoinAlgorithm>,
577 pub filter_placement: FilterPlacement,
579 pub materialization: MaterializationStrategy,
581 pub parallelism: Option<ParallelismType>,
583 pub preferred_indexes: Vec<IndexType>,
585 pub statistics: Option<Statistics>,
587}
588
589#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
591pub struct AnnotatedAlgebra {
592 pub algebra: Algebra,
594 pub hints: OptimizationHints,
596 pub context: Option<String>,
598}
599
600impl PropertyPath {
601 pub fn iri(iri: Iri) -> Self {
603 PropertyPath::Iri(iri)
604 }
605
606 pub fn inverse(path: PropertyPath) -> Self {
608 PropertyPath::Inverse(Box::new(path))
609 }
610
611 pub fn sequence(left: PropertyPath, right: PropertyPath) -> Self {
613 PropertyPath::Sequence(Box::new(left), Box::new(right))
614 }
615
616 pub fn alternative(left: PropertyPath, right: PropertyPath) -> Self {
618 PropertyPath::Alternative(Box::new(left), Box::new(right))
619 }
620
621 pub fn zero_or_more(path: PropertyPath) -> Self {
623 PropertyPath::ZeroOrMore(Box::new(path))
624 }
625
626 pub fn one_or_more(path: PropertyPath) -> Self {
628 PropertyPath::OneOrMore(Box::new(path))
629 }
630
631 pub fn zero_or_one(path: PropertyPath) -> Self {
633 PropertyPath::ZeroOrOne(Box::new(path))
634 }
635
636 pub fn is_simple(&self) -> bool {
638 matches!(self, PropertyPath::Iri(_) | PropertyPath::Variable(_))
639 }
640
641 pub fn variables(&self) -> Vec<Variable> {
643 let mut vars = Vec::new();
644 self.collect_variables(&mut vars);
645 vars.sort();
646 vars.dedup();
647 vars
648 }
649
650 fn collect_variables(&self, vars: &mut Vec<Variable>) {
651 match self {
652 PropertyPath::Variable(var) => vars.push(var.clone()),
653 PropertyPath::Inverse(path) => path.collect_variables(vars),
654 PropertyPath::Sequence(left, right) | PropertyPath::Alternative(left, right) => {
655 left.collect_variables(vars);
656 right.collect_variables(vars);
657 }
658 PropertyPath::ZeroOrMore(path)
659 | PropertyPath::OneOrMore(path)
660 | PropertyPath::ZeroOrOne(path) => path.collect_variables(vars),
661 PropertyPath::NegatedPropertySet(paths) => {
662 for path in paths {
663 path.collect_variables(vars);
664 }
665 }
666 PropertyPath::Iri(_) => {}
667 }
668 }
669
670 pub fn complexity(&self) -> usize {
672 match self {
673 PropertyPath::Iri(_) | PropertyPath::Variable(_) => 1,
674 PropertyPath::Inverse(path) => path.complexity() + 10,
675 PropertyPath::Sequence(left, right) => left.complexity() + right.complexity() + 20,
676 PropertyPath::Alternative(left, right) => {
677 std::cmp::max(left.complexity(), right.complexity()) + 15
678 }
679 PropertyPath::ZeroOrMore(_) | PropertyPath::OneOrMore(_) => 1000, PropertyPath::ZeroOrOne(path) => path.complexity() + 5,
681 PropertyPath::NegatedPropertySet(paths) => {
682 paths.iter().map(|p| p.complexity()).sum::<usize>() + 50
683 }
684 }
685 }
686}
687
688impl PropertyPathPattern {
689 pub fn new(subject: Term, path: PropertyPath, object: Term) -> Self {
691 PropertyPathPattern {
692 subject,
693 path,
694 object,
695 }
696 }
697
698 pub fn variables(&self) -> Vec<Variable> {
700 let mut vars = Vec::new();
701 self.collect_variables(&mut vars);
702 vars.sort();
703 vars.dedup();
704 vars
705 }
706
707 fn collect_variables(&self, vars: &mut Vec<Variable>) {
708 self.subject.collect_variables(vars);
709 self.path.collect_variables(vars);
710 self.object.collect_variables(vars);
711 }
712}
713
714impl fmt::Display for PropertyPath {
715 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
716 match self {
717 PropertyPath::Iri(iri) => write!(f, "{iri}"),
718 PropertyPath::Variable(var) => write!(f, "?{var}"),
719 PropertyPath::Inverse(path) => write!(f, "^{path}"),
720 PropertyPath::Sequence(left, right) => write!(f, "{left}/{right}"),
721 PropertyPath::Alternative(left, right) => write!(f, "{left}|{right}"),
722 PropertyPath::ZeroOrMore(path) => write!(f, "{path}*"),
723 PropertyPath::OneOrMore(path) => write!(f, "{path} +"),
724 PropertyPath::ZeroOrOne(path) => write!(f, "{path}?"),
725 PropertyPath::NegatedPropertySet(paths) => {
726 write!(f, "!(")?;
727 for (i, path) in paths.iter().enumerate() {
728 if i > 0 {
729 write!(f, "|")?
730 }
731 write!(f, "{path}")?;
732 }
733 write!(f, ")")
734 }
735 }
736 }
737}
738
739impl fmt::Display for PropertyPathPattern {
740 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
741 write!(f, "{} {} {}", self.subject, self.path, self.object)
742 }
743}
744
745impl Algebra {
746 pub fn bgp(patterns: Vec<TriplePattern>) -> Self {
748 Algebra::Bgp(patterns)
749 }
750
751 pub fn property_path(subject: Term, path: PropertyPath, object: Term) -> Self {
753 Algebra::PropertyPath {
754 subject,
755 path,
756 object,
757 }
758 }
759
760 pub fn join(left: Algebra, right: Algebra) -> Self {
762 Algebra::Join {
763 left: Box::new(left),
764 right: Box::new(right),
765 }
766 }
767
768 pub fn left_join(left: Algebra, right: Algebra, filter: Option<Expression>) -> Self {
770 Algebra::LeftJoin {
771 left: Box::new(left),
772 right: Box::new(right),
773 filter,
774 }
775 }
776
777 pub fn union(left: Algebra, right: Algebra) -> Self {
779 Algebra::Union {
780 left: Box::new(left),
781 right: Box::new(right),
782 }
783 }
784
785 pub fn filter(pattern: Algebra, condition: Expression) -> Self {
787 Algebra::Filter {
788 pattern: Box::new(pattern),
789 condition,
790 }
791 }
792
793 pub fn extend(pattern: Algebra, variable: Variable, expr: Expression) -> Self {
795 Algebra::Extend {
796 pattern: Box::new(pattern),
797 variable,
798 expr,
799 }
800 }
801
802 pub fn project(pattern: Algebra, variables: Vec<Variable>) -> Self {
804 Algebra::Project {
805 pattern: Box::new(pattern),
806 variables,
807 }
808 }
809
810 pub fn slice(pattern: Algebra, offset: Option<usize>, limit: Option<usize>) -> Self {
812 Algebra::Slice {
813 pattern: Box::new(pattern),
814 offset,
815 limit,
816 }
817 }
818
819 pub fn variables(&self) -> Vec<Variable> {
821 let mut vars = Vec::new();
822 self.collect_variables(&mut vars);
823 vars.sort();
824 vars.dedup();
825 vars
826 }
827
828 fn collect_variables(&self, vars: &mut Vec<Variable>) {
829 match self {
830 Algebra::Bgp(patterns) => {
831 for pattern in patterns {
832 pattern.collect_variables(vars);
833 }
834 }
835 Algebra::PropertyPath {
836 subject,
837 path,
838 object,
839 } => {
840 subject.collect_variables(vars);
841 path.collect_variables(vars);
842 object.collect_variables(vars);
843 }
844 Algebra::Join { left, right }
845 | Algebra::Union { left, right }
846 | Algebra::Minus { left, right } => {
847 left.collect_variables(vars);
848 right.collect_variables(vars);
849 }
850 Algebra::LeftJoin {
851 left,
852 right,
853 filter,
854 } => {
855 left.collect_variables(vars);
856 right.collect_variables(vars);
857 if let Some(filter) = filter {
858 filter.collect_variables(vars);
859 }
860 }
861 Algebra::Filter { pattern, condition } => {
862 pattern.collect_variables(vars);
863 condition.collect_variables(vars);
864 }
865 Algebra::Extend {
866 pattern,
867 variable,
868 expr,
869 } => {
870 pattern.collect_variables(vars);
871 vars.push(variable.clone());
872 expr.collect_variables(vars);
873 }
874 Algebra::Service { pattern, .. } => {
875 pattern.collect_variables(vars);
876 }
877 Algebra::Graph { pattern, .. } => {
878 pattern.collect_variables(vars);
879 }
880 Algebra::Project { pattern, variables } => {
881 pattern.collect_variables(vars);
882 vars.extend(variables.clone());
883 }
884 Algebra::Distinct { pattern }
885 | Algebra::Reduced { pattern }
886 | Algebra::Slice { pattern, .. } => {
887 pattern.collect_variables(vars);
888 }
889 Algebra::OrderBy {
890 pattern,
891 conditions,
892 } => {
893 pattern.collect_variables(vars);
894 for condition in conditions {
895 condition.expr.collect_variables(vars);
896 }
897 }
898 Algebra::Group {
899 pattern,
900 variables: group_vars,
901 aggregates,
902 } => {
903 pattern.collect_variables(vars);
904 for group_var in group_vars {
905 group_var.expr.collect_variables(vars);
906 if let Some(alias) = &group_var.alias {
907 vars.push(alias.clone());
908 }
909 }
910 for (var, aggregate) in aggregates {
911 vars.push(var.clone());
912 aggregate.collect_variables(vars);
913 }
914 }
915 Algebra::Having { pattern, condition } => {
916 pattern.collect_variables(vars);
917 condition.collect_variables(vars);
918 }
919 Algebra::Values { variables, .. } => {
920 vars.extend(variables.clone());
921 }
922 Algebra::Table | Algebra::Zero | Algebra::Empty => {}
923 }
924 }
925}
926
927impl TriplePattern {
928 pub fn new(subject: Term, predicate: Term, object: Term) -> Self {
930 TriplePattern {
931 subject,
932 predicate,
933 object,
934 }
935 }
936
937 fn collect_variables(&self, vars: &mut Vec<Variable>) {
938 self.subject.collect_variables(vars);
939 self.predicate.collect_variables(vars);
940 self.object.collect_variables(vars);
941 }
942
943 pub fn variables(&self) -> Vec<Variable> {
945 let mut vars = Vec::new();
946 self.collect_variables(&mut vars);
947 vars
948 }
949}
950
951impl Term {
952 fn collect_variables(&self, vars: &mut Vec<Variable>) {
953 if let Term::Variable(var) = self {
954 vars.push(var.clone());
955 }
956 }
957}
958
959impl Expression {
960 fn collect_variables(&self, vars: &mut Vec<Variable>) {
961 match self {
962 Expression::Variable(var) => vars.push(var.clone()),
963 Expression::Function { args, .. } => {
964 for arg in args {
965 arg.collect_variables(vars);
966 }
967 }
968 Expression::Binary { left, right, .. } => {
969 left.collect_variables(vars);
970 right.collect_variables(vars);
971 }
972 Expression::Unary { operand, .. } => {
973 operand.collect_variables(vars);
974 }
975 Expression::Conditional {
976 condition,
977 then_expr,
978 else_expr,
979 } => {
980 condition.collect_variables(vars);
981 then_expr.collect_variables(vars);
982 else_expr.collect_variables(vars);
983 }
984 Expression::Bound(var) => vars.push(var.clone()),
985 Expression::Exists(algebra) | Expression::NotExists(algebra) => {
986 algebra.collect_variables(vars);
987 }
988 Expression::Literal(_) | Expression::Iri(_) => {}
989 }
990 }
991}
992
993impl Aggregate {
994 fn collect_variables(&self, vars: &mut Vec<Variable>) {
995 match self {
996 Aggregate::Count {
997 expr: Some(expr), ..
998 }
999 | Aggregate::Sum { expr, .. }
1000 | Aggregate::Min { expr, .. }
1001 | Aggregate::Max { expr, .. }
1002 | Aggregate::Avg { expr, .. }
1003 | Aggregate::Sample { expr, .. }
1004 | Aggregate::GroupConcat { expr, .. } => {
1005 expr.collect_variables(vars);
1006 }
1007 Aggregate::Count { expr: None, .. } => {}
1008 }
1009 }
1010}
1011
1012#[macro_export]
1014macro_rules! triple {
1015 ($s:expr_2021, $p:expr_2021, $o:expr_2021) => {
1016 TriplePattern::new($s, $p, $o)
1017 };
1018}
1019
1020#[macro_export]
1021macro_rules! var {
1022 ($name:expr_2021) => {
1023 Term::Variable($name.to_string())
1024 };
1025}
1026
1027#[macro_export]
1028macro_rules! iri {
1029 ($iri:expr_2021) => {
1030 Term::Iri(NamedNode::new($iri).expect("macro argument should be valid IRI"))
1031 };
1032}
1033
1034#[macro_export]
1035macro_rules! literal {
1036 ($value:expr_2021) => {
1037 Term::Literal(Literal::new($value.to_string(), None, None))
1038 };
1039 ($value:expr_2021, lang: $lang:expr_2021) => {
1040 Term::Literal(Literal::new(
1041 $value.to_string(),
1042 Some($lang.to_string()),
1043 None,
1044 ))
1045 };
1046 ($value:expr_2021, datatype: $dt:expr_2021) => {
1047 Term::Literal(Literal::new(
1048 $value.to_string(),
1049 None,
1050 Some(NamedNode::new($dt).expect("macro datatype argument should be valid IRI")),
1051 ))
1052 };
1053}
1054
1055impl Default for ServiceCapabilities {
1056 fn default() -> Self {
1057 Self {
1058 supports_projection: true,
1059 supports_filtering: true,
1060 supports_ordering: false,
1061 supports_aggregation: false,
1062 max_query_size: None,
1063 }
1064 }
1065}
1066
1067impl Literal {
1068 pub fn new(value: String, language: Option<String>, datatype: Option<Iri>) -> Self {
1070 Literal {
1071 value,
1072 language,
1073 datatype,
1074 }
1075 }
1076
1077 pub fn string(value: impl Into<String>) -> Self {
1079 Literal {
1080 value: value.into(),
1081 language: None,
1082 datatype: None,
1083 }
1084 }
1085
1086 pub fn lang_string(value: impl Into<String>, language: impl Into<String>) -> Self {
1088 Literal {
1089 value: value.into(),
1090 language: Some(language.into()),
1091 datatype: None,
1092 }
1093 }
1094
1095 pub fn typed(value: impl Into<String>, datatype: Iri) -> Self {
1097 Literal {
1098 value: value.into(),
1099 language: None,
1100 datatype: Some(datatype),
1101 }
1102 }
1103
1104 pub fn integer(value: i64) -> Self {
1106 Literal::typed(
1107 value.to_string(),
1108 NamedNode::new("http://www.w3.org/2001/XMLSchema#integer")
1109 .expect("XSD URI is well-formed and valid"),
1110 )
1111 }
1112
1113 pub fn decimal(value: f64) -> Self {
1115 Literal::typed(
1116 value.to_string(),
1117 NamedNode::new("http://www.w3.org/2001/XMLSchema#decimal")
1118 .expect("XSD URI is well-formed and valid"),
1119 )
1120 }
1121
1122 pub fn boolean(value: bool) -> Self {
1124 Literal::typed(
1125 value.to_string(),
1126 NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
1127 .expect("XSD URI is well-formed and valid"),
1128 )
1129 }
1130
1131 pub fn date(value: impl Into<String>) -> Self {
1133 Literal::typed(
1134 value.into(),
1135 NamedNode::new("http://www.w3.org/2001/XMLSchema#date")
1136 .expect("XSD URI is well-formed and valid"),
1137 )
1138 }
1139
1140 pub fn datetime(value: impl Into<String>) -> Self {
1142 Literal::typed(
1143 value.into(),
1144 NamedNode::new("http://www.w3.org/2001/XMLSchema#dateTime")
1145 .expect("XSD URI is well-formed and valid"),
1146 )
1147 }
1148
1149 pub fn effective_datatype(&self) -> Iri {
1151 if let Some(ref dt) = self.datatype {
1152 dt.clone()
1153 } else if self.language.is_some() {
1154 NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
1155 .expect("RDF URI is well-formed and valid")
1156 } else {
1157 NamedNode::new("http://www.w3.org/2001/XMLSchema#string")
1158 .expect("XSD URI is well-formed and valid")
1159 }
1160 }
1161
1162 pub fn is_numeric(&self) -> bool {
1164 if let Some(ref dt) = self.datatype {
1165 matches!(
1166 dt.as_str(),
1167 "http://www.w3.org/2001/XMLSchema#integer"
1168 | "http://www.w3.org/2001/XMLSchema#decimal"
1169 | "http://www.w3.org/2001/XMLSchema#float"
1170 | "http://www.w3.org/2001/XMLSchema#double"
1171 | "http://www.w3.org/2001/XMLSchema#long"
1172 | "http://www.w3.org/2001/XMLSchema#int"
1173 | "http://www.w3.org/2001/XMLSchema#short"
1174 | "http://www.w3.org/2001/XMLSchema#byte"
1175 | "http://www.w3.org/2001/XMLSchema#unsignedLong"
1176 | "http://www.w3.org/2001/XMLSchema#unsignedInt"
1177 | "http://www.w3.org/2001/XMLSchema#unsignedShort"
1178 | "http://www.w3.org/2001/XMLSchema#unsignedByte"
1179 | "http://www.w3.org/2001/XMLSchema#positiveInteger"
1180 | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger"
1181 | "http://www.w3.org/2001/XMLSchema#negativeInteger"
1182 | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
1183 )
1184 } else {
1185 false
1186 }
1187 }
1188
1189 pub fn is_string(&self) -> bool {
1191 self.datatype.is_none() && self.language.is_none()
1192 }
1193
1194 pub fn is_lang_string(&self) -> bool {
1196 self.language.is_some()
1197 }
1198
1199 pub fn is_boolean(&self) -> bool {
1201 if let Some(ref dt) = self.datatype {
1202 dt.as_str() == "http://www.w3.org/2001/XMLSchema#boolean"
1203 } else {
1204 false
1205 }
1206 }
1207
1208 pub fn is_datetime(&self) -> bool {
1210 if let Some(ref dt) = self.datatype {
1211 matches!(
1212 dt.as_str(),
1213 "http://www.w3.org/2001/XMLSchema#date"
1214 | "http://www.w3.org/2001/XMLSchema#dateTime"
1215 | "http://www.w3.org/2001/XMLSchema#time"
1216 | "http://www.w3.org/2001/XMLSchema#gYear"
1217 | "http://www.w3.org/2001/XMLSchema#gYearMonth"
1218 | "http://www.w3.org/2001/XMLSchema#gMonth"
1219 | "http://www.w3.org/2001/XMLSchema#gMonthDay"
1220 | "http://www.w3.org/2001/XMLSchema#gDay"
1221 | "http://www.w3.org/2001/XMLSchema#duration"
1222 | "http://www.w3.org/2001/XMLSchema#dayTimeDuration"
1223 | "http://www.w3.org/2001/XMLSchema#yearMonthDuration"
1224 )
1225 } else {
1226 false
1227 }
1228 }
1229}
1230
1231impl Default for Statistics {
1232 fn default() -> Self {
1233 Self {
1234 cardinality: 0,
1235 selectivity: 1.0,
1236 available_indexes: Vec::new(),
1237 cost: 0.0,
1238 memory_estimate: 0,
1239 io_estimate: 0,
1240 }
1241 }
1242}
1243
1244impl Statistics {
1245 pub fn with_cardinality(cardinality: u64) -> Self {
1247 Self {
1248 cardinality,
1249 selectivity: 1.0,
1250 available_indexes: Vec::new(),
1251 cost: cardinality as f64,
1252 memory_estimate: cardinality * 64, io_estimate: cardinality / 1000, }
1255 }
1256
1257 pub fn with_selectivity(mut self, selectivity: f64) -> Self {
1259 self.selectivity = selectivity.clamp(0.0, 1.0);
1260 self.cardinality = (self.cardinality as f64 * self.selectivity) as u64;
1261 self.cost *= self.selectivity;
1262 self
1263 }
1264
1265 pub fn with_index(mut self, index: IndexType) -> Self {
1267 self.available_indexes.push(index);
1268 self.cost *= 0.8;
1270 self
1271 }
1272
1273 pub fn combine(&self, other: &Statistics) -> Self {
1275 Self {
1276 cardinality: self.cardinality * other.cardinality,
1277 selectivity: self.selectivity * other.selectivity,
1278 available_indexes: self
1279 .available_indexes
1280 .iter()
1281 .chain(other.available_indexes.iter())
1282 .cloned()
1283 .collect(),
1284 cost: self.cost + other.cost,
1285 memory_estimate: self.memory_estimate + other.memory_estimate,
1286 io_estimate: self.io_estimate + other.io_estimate,
1287 }
1288 }
1289}
1290
1291impl OptimizationHints {
1292 pub fn for_bgp(patterns: &[TriplePattern]) -> Self {
1294 let mut hints = OptimizationHints::default();
1295
1296 let cardinality = match patterns.len() {
1298 0 => 0,
1299 1 => 1000, n => 1000 / (n as u64 * n as u64), };
1302
1303 hints.statistics = Some(Statistics::with_cardinality(cardinality));
1304
1305 for pattern in patterns {
1307 if let Term::Variable(_) = pattern.subject {
1308 hints.preferred_indexes.push(IndexType::PredicateIndex);
1309 }
1310 if let Term::Variable(_) = pattern.predicate {
1311 hints.preferred_indexes.push(IndexType::SubjectIndex);
1312 }
1313 if let Term::Variable(_) = pattern.object {
1314 hints
1315 .preferred_indexes
1316 .push(IndexType::SubjectPredicateIndex);
1317 }
1318 }
1319
1320 hints
1321 }
1322
1323 pub fn for_join(left_hints: &OptimizationHints, right_hints: &OptimizationHints) -> Self {
1325 let mut hints = OptimizationHints::default();
1326
1327 if let (Some(left_stats), Some(right_stats)) =
1329 (&left_hints.statistics, &right_hints.statistics)
1330 {
1331 hints.statistics = Some(left_stats.combine(right_stats));
1332
1333 hints.join_algorithm = Some(match (left_stats.cardinality, right_stats.cardinality) {
1335 (l, r) if l < 1000 && r < 1000 => JoinAlgorithm::NestedLoopJoin,
1336 (l, r) if l > 100000 || r > 100000 => JoinAlgorithm::SortMergeJoin,
1337 _ => JoinAlgorithm::HashJoin,
1338 });
1339 }
1340
1341 hints.preferred_indexes = left_hints
1343 .preferred_indexes
1344 .iter()
1345 .chain(right_hints.preferred_indexes.iter())
1346 .cloned()
1347 .collect();
1348
1349 hints
1350 }
1351
1352 pub fn for_filter(pattern_hints: &OptimizationHints, condition: &Expression) -> Self {
1354 let mut hints = pattern_hints.clone();
1355
1356 if let Some(ref mut stats) = hints.statistics {
1358 let filter_selectivity = estimate_filter_selectivity(condition);
1359 *stats = stats.clone().with_selectivity(filter_selectivity);
1360 }
1361
1362 hints.filter_placement = if estimate_filter_selectivity(condition) < 0.1 {
1364 FilterPlacement::Early
1365 } else {
1366 FilterPlacement::Optimal
1367 };
1368
1369 hints
1370 }
1371}
1372
1373impl AnnotatedAlgebra {
1374 pub fn new(algebra: Algebra) -> Self {
1376 let hints = match &algebra {
1377 Algebra::Bgp(patterns) => OptimizationHints::for_bgp(patterns),
1378 Algebra::Join { left: _, right: _ } => {
1379 OptimizationHints::default()
1381 }
1382 Algebra::Filter { .. } => OptimizationHints::default(),
1383 _ => OptimizationHints::default(),
1384 };
1385
1386 Self {
1387 algebra,
1388 hints,
1389 context: None,
1390 }
1391 }
1392
1393 pub fn with_hints(algebra: Algebra, hints: OptimizationHints) -> Self {
1395 Self {
1396 algebra,
1397 hints,
1398 context: None,
1399 }
1400 }
1401
1402 pub fn with_context(mut self, context: String) -> Self {
1404 self.context = Some(context);
1405 self
1406 }
1407
1408 pub fn estimated_cost(&self) -> f64 {
1410 self.hints
1411 .statistics
1412 .as_ref()
1413 .map(|s| s.cost)
1414 .unwrap_or(0.0)
1415 }
1416
1417 pub fn estimated_cardinality(&self) -> u64 {
1419 self.hints
1420 .statistics
1421 .as_ref()
1422 .map(|s| s.cardinality)
1423 .unwrap_or(0)
1424 }
1425}
1426
1427fn estimate_filter_selectivity(condition: &Expression) -> f64 {
1429 match condition {
1430 Expression::Binary { op, .. } => match op {
1431 BinaryOperator::Equal => 0.01, BinaryOperator::NotEqual => 0.99, BinaryOperator::Less
1434 | BinaryOperator::LessEqual
1435 | BinaryOperator::Greater
1436 | BinaryOperator::GreaterEqual => 0.33, BinaryOperator::And => 0.25, BinaryOperator::Or => 0.75, _ => 0.5, },
1441 Expression::Function { name, .. } => match name.as_str() {
1442 "regex" | "contains" => 0.2, "bound" => 0.8, "isIRI" | "isLiteral" | "isBlank" => 0.3, _ => 0.5, },
1447 Expression::Unary {
1448 op: UnaryOperator::Not,
1449 ..
1450 } => 0.5, Expression::Unary { .. } => 0.5,
1452 _ => 0.5, }
1454}
1455
1456#[derive(Debug, Clone, Default)]
1458pub struct EvaluationContext {
1459 pub bindings: HashMap<Variable, Term>,
1461 pub dataset: Option<String>,
1463 pub options: HashMap<String, String>,
1465}
1466
1467#[cfg(test)]
1468mod algebra_tests;