Skip to main content

oxirs_arq/algebra/
mod.rs

1//! SPARQL Algebra Module
2//!
3//! This module provides the core algebraic representation of SPARQL queries,
4//! including basic graph patterns, joins, unions, filters, and other operations.
5
6use oxirs_core::model::{
7    BlankNode as CoreBlankNode, Literal as CoreLiteral, NamedNode, Object, Predicate, Subject,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fmt;
12
13/// Variable identifier - reuse from core
14pub use oxirs_core::model::Variable;
15
16/// IRI (Internationalized Resource Identifier) - use NamedNode from core
17pub type Iri = NamedNode;
18
19/// Literal value - create a bridge type that can convert to/from core literal
20#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
21pub struct Literal {
22    pub value: String,
23    pub language: Option<String>,
24    pub datatype: Option<NamedNode>,
25}
26
27impl fmt::Display for Literal {
28    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29        write!(f, "\"{}\"", self.value)?;
30        if let Some(lang) = &self.language {
31            write!(f, "@{lang}")?;
32        } else if let Some(dt) = &self.datatype {
33            write!(f, "^^{dt}")?;
34        }
35        Ok(())
36    }
37}
38
39impl Literal {
40    /// Create a language-tagged literal
41    pub fn with_language(value: String, language: String) -> Self {
42        Self {
43            value,
44            language: Some(language),
45            datatype: None,
46        }
47    }
48}
49
50impl From<CoreLiteral> for Literal {
51    fn from(core_literal: CoreLiteral) -> Self {
52        let (value, datatype, language) = core_literal.destruct();
53        Self {
54            value,
55            language,
56            datatype,
57        }
58    }
59}
60
61impl From<Literal> for CoreLiteral {
62    fn from(literal: Literal) -> Self {
63        if let Some(lang) = literal.language {
64            CoreLiteral::new_language_tagged_literal(&literal.value, lang)
65                .unwrap_or_else(|_| CoreLiteral::new_simple_literal(literal.value))
66        } else if let Some(datatype) = literal.datatype {
67            CoreLiteral::new_typed_literal(literal.value, datatype)
68        } else {
69            CoreLiteral::new_simple_literal(literal.value)
70        }
71    }
72}
73
74/// RDF term (subject, predicate, or object) - bridge with core types
75#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
76pub enum Term {
77    Variable(Variable),
78    Iri(NamedNode),
79    Literal(Literal),
80    BlankNode(String),
81    QuotedTriple(Box<TriplePattern>),
82    PropertyPath(PropertyPath),
83}
84
85impl fmt::Display for Term {
86    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87        match self {
88            Term::Variable(v) => write!(f, "?{v}"),
89            Term::Iri(iri) => write!(f, "{iri}"),
90            Term::Literal(lit) => write!(f, "{lit}"),
91            Term::BlankNode(id) => write!(f, "_:{id}"),
92            Term::QuotedTriple(triple) => write!(
93                f,
94                "<<{} {} {}>>",
95                triple.subject, triple.predicate, triple.object
96            ),
97            Term::PropertyPath(path) => write!(f, "{path}"),
98        }
99    }
100}
101
102impl From<Subject> for Term {
103    fn from(subject: Subject) -> Self {
104        match subject {
105            Subject::NamedNode(n) => Term::Iri(n),
106            Subject::BlankNode(b) => Term::BlankNode(b.id().to_string()),
107            Subject::Variable(v) => Term::Variable(v),
108            Subject::QuotedTriple(quoted_triple) => {
109                // Implement proper quoted triple support for RDF-star
110                Term::QuotedTriple(Box::new(TriplePattern {
111                    subject: Term::from(quoted_triple.subject().clone()),
112                    predicate: Term::from(quoted_triple.predicate().clone()),
113                    object: Term::from(quoted_triple.object().clone()),
114                }))
115            }
116        }
117    }
118}
119
120impl From<Predicate> for Term {
121    fn from(predicate: Predicate) -> Self {
122        match predicate {
123            Predicate::NamedNode(n) => Term::Iri(n),
124            Predicate::Variable(v) => Term::Variable(v),
125        }
126    }
127}
128
129impl From<Object> for Term {
130    fn from(object: Object) -> Self {
131        match object {
132            Object::NamedNode(n) => Term::Iri(n),
133            Object::BlankNode(b) => Term::BlankNode(b.id().to_string()),
134            Object::Literal(l) => Term::Literal(l.into()),
135            Object::Variable(v) => Term::Variable(v),
136            Object::QuotedTriple(quoted_triple) => {
137                // Implement proper quoted triple support for RDF-star
138                Term::QuotedTriple(Box::new(TriplePattern {
139                    subject: Term::from(quoted_triple.subject().clone()),
140                    predicate: Term::from(quoted_triple.predicate().clone()),
141                    object: Term::from(quoted_triple.object().clone()),
142                }))
143            }
144        }
145    }
146}
147
148impl From<NamedNode> for Term {
149    fn from(node: NamedNode) -> Self {
150        Term::Iri(node)
151    }
152}
153
154impl From<CoreBlankNode> for Term {
155    fn from(node: CoreBlankNode) -> Self {
156        Term::BlankNode(node.id().to_string())
157    }
158}
159
160impl From<CoreLiteral> for Term {
161    fn from(literal: CoreLiteral) -> Self {
162        Term::Literal(literal.into())
163    }
164}
165
166impl From<Variable> for Term {
167    fn from(variable: Variable) -> Self {
168        Term::Variable(variable)
169    }
170}
171
172/// Triple pattern
173#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
174pub struct TriplePattern {
175    pub subject: Term,
176    pub predicate: Term,
177    pub object: Term,
178}
179
180impl fmt::Display for TriplePattern {
181    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
182        write!(f, "{} {} {}", self.subject, self.predicate, self.object)
183    }
184}
185
186/// SPARQL expression
187#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
188pub enum Expression {
189    /// Variable reference
190    Variable(Variable),
191    /// Literal value
192    Literal(Literal),
193    /// IRI reference
194    Iri(Iri),
195    /// Function call
196    Function { name: String, args: Vec<Expression> },
197    /// Binary operation
198    Binary {
199        op: BinaryOperator,
200        left: Box<Expression>,
201        right: Box<Expression>,
202    },
203    /// Unary operation
204    Unary {
205        op: UnaryOperator,
206        operand: Box<Expression>,
207    },
208    /// Conditional expression (IF)
209    Conditional {
210        condition: Box<Expression>,
211        then_expr: Box<Expression>,
212        else_expr: Box<Expression>,
213    },
214    /// Bound variable check
215    Bound(Variable),
216    /// Exists clause
217    Exists(Box<Algebra>),
218    /// Not exists clause
219    NotExists(Box<Algebra>),
220}
221
222/// Binary operators
223#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
224pub enum BinaryOperator {
225    Add,
226    Subtract,
227    Multiply,
228    Divide,
229    Equal,
230    NotEqual,
231    Less,
232    LessEqual,
233    Greater,
234    GreaterEqual,
235    And,
236    Or,
237    SameTerm,
238    In,
239    NotIn,
240}
241
242impl std::fmt::Display for BinaryOperator {
243    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
244        match self {
245            BinaryOperator::Add => write!(f, "+"),
246            BinaryOperator::Subtract => write!(f, "-"),
247            BinaryOperator::Multiply => write!(f, "*"),
248            BinaryOperator::Divide => write!(f, "/"),
249            BinaryOperator::Equal => write!(f, "="),
250            BinaryOperator::NotEqual => write!(f, "!="),
251            BinaryOperator::Less => write!(f, "<"),
252            BinaryOperator::LessEqual => write!(f, "<="),
253            BinaryOperator::Greater => write!(f, ">"),
254            BinaryOperator::GreaterEqual => write!(f, ">="),
255            BinaryOperator::And => write!(f, "&&"),
256            BinaryOperator::Or => write!(f, "||"),
257            BinaryOperator::SameTerm => write!(f, "sameTerm"),
258            BinaryOperator::In => write!(f, "IN"),
259            BinaryOperator::NotIn => write!(f, "NOT IN"),
260        }
261    }
262}
263
264/// Unary operators
265#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
266pub enum UnaryOperator {
267    Not,
268    Plus,
269    Minus,
270    IsIri,
271    IsBlank,
272    IsLiteral,
273    IsNumeric,
274}
275
276/// Aggregate function
277#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
278pub enum Aggregate {
279    Count {
280        distinct: bool,
281        expr: Option<Expression>,
282    },
283    Sum {
284        distinct: bool,
285        expr: Expression,
286    },
287    Min {
288        distinct: bool,
289        expr: Expression,
290    },
291    Max {
292        distinct: bool,
293        expr: Expression,
294    },
295    Avg {
296        distinct: bool,
297        expr: Expression,
298    },
299    Sample {
300        distinct: bool,
301        expr: Expression,
302    },
303    GroupConcat {
304        distinct: bool,
305        expr: Expression,
306        separator: Option<String>,
307    },
308}
309
310/// Variable binding
311pub type Binding = HashMap<Variable, Term>;
312
313/// Solution sequence (set of bindings)
314pub type Solution = Vec<Binding>;
315
316/// Order condition
317#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
318pub struct OrderCondition {
319    pub expr: Expression,
320    pub ascending: bool,
321}
322
323/// Group condition
324#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
325pub struct GroupCondition {
326    pub expr: Expression,
327    pub alias: Option<Variable>,
328}
329
330/// Property path expressions for advanced SPARQL 1.1 graph navigation
331#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
332pub enum PropertyPath {
333    /// Direct property IRI
334    Iri(Iri),
335    /// Variable property
336    Variable(Variable),
337    /// Inverse property path (^property)
338    Inverse(Box<PropertyPath>),
339    /// Sequence path (path1/path2)
340    Sequence(Box<PropertyPath>, Box<PropertyPath>),
341    /// Alternative path (path1|path2)
342    Alternative(Box<PropertyPath>, Box<PropertyPath>),
343    /// Zero or more (path*)
344    ZeroOrMore(Box<PropertyPath>),
345    /// One or more (path+)
346    OneOrMore(Box<PropertyPath>),
347    /// Zero or one (path?)
348    ZeroOrOne(Box<PropertyPath>),
349    /// Negated property set (!property)
350    NegatedPropertySet(Vec<PropertyPath>),
351}
352
353/// Property path triple pattern
354#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
355pub struct PropertyPathPattern {
356    pub subject: Term,
357    pub path: PropertyPath,
358    pub object: Term,
359}
360
361/// SPARQL algebra expressions
362#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
363pub enum Algebra {
364    /// Basic Graph Pattern
365    Bgp(Vec<TriplePattern>),
366
367    /// Property Path Pattern
368    PropertyPath {
369        subject: Term,
370        path: PropertyPath,
371        object: Term,
372    },
373
374    /// Join two patterns
375    Join {
376        left: Box<Algebra>,
377        right: Box<Algebra>,
378    },
379
380    /// Left join (OPTIONAL)
381    LeftJoin {
382        left: Box<Algebra>,
383        right: Box<Algebra>,
384        filter: Option<Expression>,
385    },
386
387    /// Union of patterns
388    Union {
389        left: Box<Algebra>,
390        right: Box<Algebra>,
391    },
392
393    /// Filter pattern
394    Filter {
395        pattern: Box<Algebra>,
396        condition: Expression,
397    },
398
399    /// Extend pattern (BIND)
400    Extend {
401        pattern: Box<Algebra>,
402        variable: Variable,
403        expr: Expression,
404    },
405
406    /// Minus pattern
407    Minus {
408        left: Box<Algebra>,
409        right: Box<Algebra>,
410    },
411
412    /// Service pattern (federation)
413    Service {
414        endpoint: Term,
415        pattern: Box<Algebra>,
416        silent: bool,
417    },
418
419    /// Graph pattern
420    Graph { graph: Term, pattern: Box<Algebra> },
421
422    /// Projection
423    Project {
424        pattern: Box<Algebra>,
425        variables: Vec<Variable>,
426    },
427
428    /// Distinct
429    Distinct { pattern: Box<Algebra> },
430
431    /// Reduced
432    Reduced { pattern: Box<Algebra> },
433
434    /// Slice (LIMIT/OFFSET)
435    Slice {
436        pattern: Box<Algebra>,
437        offset: Option<usize>,
438        limit: Option<usize>,
439    },
440
441    /// Order by
442    OrderBy {
443        pattern: Box<Algebra>,
444        conditions: Vec<OrderCondition>,
445    },
446
447    /// Group by
448    Group {
449        pattern: Box<Algebra>,
450        variables: Vec<GroupCondition>,
451        aggregates: Vec<(Variable, Aggregate)>,
452    },
453
454    /// Having
455    Having {
456        pattern: Box<Algebra>,
457        condition: Expression,
458    },
459
460    /// Values clause
461    Values {
462        variables: Vec<Variable>,
463        bindings: Vec<Binding>,
464    },
465
466    /// Table (empty result)
467    Table,
468
469    /// Zero matches
470    Zero,
471
472    /// Empty result set
473    Empty,
474}
475
476/// Join algorithm hints
477#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
478pub enum JoinAlgorithm {
479    #[default]
480    HashJoin,
481    SortMergeJoin,
482    NestedLoopJoin,
483    IndexNestedLoopJoin,
484    BindJoin,
485}
486
487/// Filter placement hints
488#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
489pub enum FilterPlacement {
490    Early, // Push down as much as possible
491    Late,  // Keep at current level
492    #[default]
493    Optimal, // Let optimizer decide
494}
495
496/// Service capabilities
497#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
498pub struct ServiceCapabilities {
499    pub supports_projection: bool,
500    pub supports_filtering: bool,
501    pub supports_ordering: bool,
502    pub supports_aggregation: bool,
503    pub max_query_size: Option<usize>,
504}
505
506/// Projection types
507#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
508pub enum ProjectionType {
509    #[default]
510    Standard,
511    Streaming,
512    Cached,
513}
514
515/// Sort algorithms
516#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
517pub enum SortAlgorithm {
518    #[default]
519    QuickSort,
520    MergeSort,
521    HeapSort,
522    ExternalSort,
523}
524
525/// Grouping algorithms
526#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
527pub enum GroupingAlgorithm {
528    #[default]
529    HashGrouping,
530    SortGrouping,
531    StreamingGrouping,
532}
533
534/// Materialization strategies
535#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
536pub enum MaterializationStrategy {
537    InMemory,
538    Disk,
539    #[default]
540    Adaptive,
541}
542
543/// Parallelism types
544#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
545pub enum ParallelismType {
546    #[default]
547    DataParallel,
548    PipelineParallel,
549    Hybrid,
550}
551
552/// Re-export IndexType from optimizer module
553pub use crate::optimizer::index_types::IndexType;
554
555/// Statistics for cost-based optimization
556#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
557pub struct Statistics {
558    /// Estimated number of triples/results
559    pub cardinality: u64,
560    /// Selectivity factor (0.0 to 1.0)
561    pub selectivity: f64,
562    /// Index availability
563    pub available_indexes: Vec<IndexType>,
564    /// Approximate cost (arbitrary units)
565    pub cost: f64,
566    /// Memory requirement estimate (bytes)
567    pub memory_estimate: u64,
568    /// IO operations estimate
569    pub io_estimate: u64,
570}
571
572/// Optimization hints for algebra nodes
573#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
574pub struct OptimizationHints {
575    /// Preferred join algorithm
576    pub join_algorithm: Option<JoinAlgorithm>,
577    /// Filter placement strategy
578    pub filter_placement: FilterPlacement,
579    /// Materialization strategy
580    pub materialization: MaterializationStrategy,
581    /// Parallelism recommendations
582    pub parallelism: Option<ParallelismType>,
583    /// Index hints
584    pub preferred_indexes: Vec<IndexType>,
585    /// Cost estimates
586    pub statistics: Option<Statistics>,
587}
588
589/// Enhanced algebra with optimization annotations
590#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
591pub struct AnnotatedAlgebra {
592    /// The core algebra expression
593    pub algebra: Algebra,
594    /// Optimization hints
595    pub hints: OptimizationHints,
596    /// Execution context
597    pub context: Option<String>,
598}
599
600impl PropertyPath {
601    /// Create a direct property path
602    pub fn iri(iri: Iri) -> Self {
603        PropertyPath::Iri(iri)
604    }
605
606    /// Create an inverse property path
607    pub fn inverse(path: PropertyPath) -> Self {
608        PropertyPath::Inverse(Box::new(path))
609    }
610
611    /// Create a sequence property path
612    pub fn sequence(left: PropertyPath, right: PropertyPath) -> Self {
613        PropertyPath::Sequence(Box::new(left), Box::new(right))
614    }
615
616    /// Create an alternative property path
617    pub fn alternative(left: PropertyPath, right: PropertyPath) -> Self {
618        PropertyPath::Alternative(Box::new(left), Box::new(right))
619    }
620
621    /// Create a zero-or-more property path
622    pub fn zero_or_more(path: PropertyPath) -> Self {
623        PropertyPath::ZeroOrMore(Box::new(path))
624    }
625
626    /// Create a one-or-more property path
627    pub fn one_or_more(path: PropertyPath) -> Self {
628        PropertyPath::OneOrMore(Box::new(path))
629    }
630
631    /// Create a zero-or-one property path
632    pub fn zero_or_one(path: PropertyPath) -> Self {
633        PropertyPath::ZeroOrOne(Box::new(path))
634    }
635
636    /// Check if path is simple (direct property)
637    pub fn is_simple(&self) -> bool {
638        matches!(self, PropertyPath::Iri(_) | PropertyPath::Variable(_))
639    }
640
641    /// Get all variables mentioned in this property path
642    pub fn variables(&self) -> Vec<Variable> {
643        let mut vars = Vec::new();
644        self.collect_variables(&mut vars);
645        vars.sort();
646        vars.dedup();
647        vars
648    }
649
650    fn collect_variables(&self, vars: &mut Vec<Variable>) {
651        match self {
652            PropertyPath::Variable(var) => vars.push(var.clone()),
653            PropertyPath::Inverse(path) => path.collect_variables(vars),
654            PropertyPath::Sequence(left, right) | PropertyPath::Alternative(left, right) => {
655                left.collect_variables(vars);
656                right.collect_variables(vars);
657            }
658            PropertyPath::ZeroOrMore(path)
659            | PropertyPath::OneOrMore(path)
660            | PropertyPath::ZeroOrOne(path) => path.collect_variables(vars),
661            PropertyPath::NegatedPropertySet(paths) => {
662                for path in paths {
663                    path.collect_variables(vars);
664                }
665            }
666            PropertyPath::Iri(_) => {}
667        }
668    }
669
670    /// Estimate complexity of property path evaluation
671    pub fn complexity(&self) -> usize {
672        match self {
673            PropertyPath::Iri(_) | PropertyPath::Variable(_) => 1,
674            PropertyPath::Inverse(path) => path.complexity() + 10,
675            PropertyPath::Sequence(left, right) => left.complexity() + right.complexity() + 20,
676            PropertyPath::Alternative(left, right) => {
677                std::cmp::max(left.complexity(), right.complexity()) + 15
678            }
679            PropertyPath::ZeroOrMore(_) | PropertyPath::OneOrMore(_) => 1000, // High complexity
680            PropertyPath::ZeroOrOne(path) => path.complexity() + 5,
681            PropertyPath::NegatedPropertySet(paths) => {
682                paths.iter().map(|p| p.complexity()).sum::<usize>() + 50
683            }
684        }
685    }
686}
687
688impl PropertyPathPattern {
689    /// Create a new property path pattern
690    pub fn new(subject: Term, path: PropertyPath, object: Term) -> Self {
691        PropertyPathPattern {
692            subject,
693            path,
694            object,
695        }
696    }
697
698    /// Get all variables mentioned in this pattern
699    pub fn variables(&self) -> Vec<Variable> {
700        let mut vars = Vec::new();
701        self.collect_variables(&mut vars);
702        vars.sort();
703        vars.dedup();
704        vars
705    }
706
707    fn collect_variables(&self, vars: &mut Vec<Variable>) {
708        self.subject.collect_variables(vars);
709        self.path.collect_variables(vars);
710        self.object.collect_variables(vars);
711    }
712}
713
714impl fmt::Display for PropertyPath {
715    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
716        match self {
717            PropertyPath::Iri(iri) => write!(f, "{iri}"),
718            PropertyPath::Variable(var) => write!(f, "?{var}"),
719            PropertyPath::Inverse(path) => write!(f, "^{path}"),
720            PropertyPath::Sequence(left, right) => write!(f, "{left}/{right}"),
721            PropertyPath::Alternative(left, right) => write!(f, "{left}|{right}"),
722            PropertyPath::ZeroOrMore(path) => write!(f, "{path}*"),
723            PropertyPath::OneOrMore(path) => write!(f, "{path} +"),
724            PropertyPath::ZeroOrOne(path) => write!(f, "{path}?"),
725            PropertyPath::NegatedPropertySet(paths) => {
726                write!(f, "!(")?;
727                for (i, path) in paths.iter().enumerate() {
728                    if i > 0 {
729                        write!(f, "|")?
730                    }
731                    write!(f, "{path}")?;
732                }
733                write!(f, ")")
734            }
735        }
736    }
737}
738
739impl fmt::Display for PropertyPathPattern {
740    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
741        write!(f, "{} {} {}", self.subject, self.path, self.object)
742    }
743}
744
745impl Algebra {
746    /// Create a new BGP from triple patterns
747    pub fn bgp(patterns: Vec<TriplePattern>) -> Self {
748        Algebra::Bgp(patterns)
749    }
750
751    /// Create a property path algebra node
752    pub fn property_path(subject: Term, path: PropertyPath, object: Term) -> Self {
753        Algebra::PropertyPath {
754            subject,
755            path,
756            object,
757        }
758    }
759
760    /// Create a join of two patterns
761    pub fn join(left: Algebra, right: Algebra) -> Self {
762        Algebra::Join {
763            left: Box::new(left),
764            right: Box::new(right),
765        }
766    }
767
768    /// Create a left join (optional)
769    pub fn left_join(left: Algebra, right: Algebra, filter: Option<Expression>) -> Self {
770        Algebra::LeftJoin {
771            left: Box::new(left),
772            right: Box::new(right),
773            filter,
774        }
775    }
776
777    /// Create a union of two patterns
778    pub fn union(left: Algebra, right: Algebra) -> Self {
779        Algebra::Union {
780            left: Box::new(left),
781            right: Box::new(right),
782        }
783    }
784
785    /// Create a filter pattern
786    pub fn filter(pattern: Algebra, condition: Expression) -> Self {
787        Algebra::Filter {
788            pattern: Box::new(pattern),
789            condition,
790        }
791    }
792
793    /// Create an extend pattern (BIND)
794    pub fn extend(pattern: Algebra, variable: Variable, expr: Expression) -> Self {
795        Algebra::Extend {
796            pattern: Box::new(pattern),
797            variable,
798            expr,
799        }
800    }
801
802    /// Create a projection
803    pub fn project(pattern: Algebra, variables: Vec<Variable>) -> Self {
804        Algebra::Project {
805            pattern: Box::new(pattern),
806            variables,
807        }
808    }
809
810    /// Create a slice (LIMIT/OFFSET)
811    pub fn slice(pattern: Algebra, offset: Option<usize>, limit: Option<usize>) -> Self {
812        Algebra::Slice {
813            pattern: Box::new(pattern),
814            offset,
815            limit,
816        }
817    }
818
819    /// Get all variables mentioned in this algebra expression
820    pub fn variables(&self) -> Vec<Variable> {
821        let mut vars = Vec::new();
822        self.collect_variables(&mut vars);
823        vars.sort();
824        vars.dedup();
825        vars
826    }
827
828    fn collect_variables(&self, vars: &mut Vec<Variable>) {
829        match self {
830            Algebra::Bgp(patterns) => {
831                for pattern in patterns {
832                    pattern.collect_variables(vars);
833                }
834            }
835            Algebra::PropertyPath {
836                subject,
837                path,
838                object,
839            } => {
840                subject.collect_variables(vars);
841                path.collect_variables(vars);
842                object.collect_variables(vars);
843            }
844            Algebra::Join { left, right }
845            | Algebra::Union { left, right }
846            | Algebra::Minus { left, right } => {
847                left.collect_variables(vars);
848                right.collect_variables(vars);
849            }
850            Algebra::LeftJoin {
851                left,
852                right,
853                filter,
854            } => {
855                left.collect_variables(vars);
856                right.collect_variables(vars);
857                if let Some(filter) = filter {
858                    filter.collect_variables(vars);
859                }
860            }
861            Algebra::Filter { pattern, condition } => {
862                pattern.collect_variables(vars);
863                condition.collect_variables(vars);
864            }
865            Algebra::Extend {
866                pattern,
867                variable,
868                expr,
869            } => {
870                pattern.collect_variables(vars);
871                vars.push(variable.clone());
872                expr.collect_variables(vars);
873            }
874            Algebra::Service { pattern, .. } => {
875                pattern.collect_variables(vars);
876            }
877            Algebra::Graph { pattern, .. } => {
878                pattern.collect_variables(vars);
879            }
880            Algebra::Project { pattern, variables } => {
881                pattern.collect_variables(vars);
882                vars.extend(variables.clone());
883            }
884            Algebra::Distinct { pattern }
885            | Algebra::Reduced { pattern }
886            | Algebra::Slice { pattern, .. } => {
887                pattern.collect_variables(vars);
888            }
889            Algebra::OrderBy {
890                pattern,
891                conditions,
892            } => {
893                pattern.collect_variables(vars);
894                for condition in conditions {
895                    condition.expr.collect_variables(vars);
896                }
897            }
898            Algebra::Group {
899                pattern,
900                variables: group_vars,
901                aggregates,
902            } => {
903                pattern.collect_variables(vars);
904                for group_var in group_vars {
905                    group_var.expr.collect_variables(vars);
906                    if let Some(alias) = &group_var.alias {
907                        vars.push(alias.clone());
908                    }
909                }
910                for (var, aggregate) in aggregates {
911                    vars.push(var.clone());
912                    aggregate.collect_variables(vars);
913                }
914            }
915            Algebra::Having { pattern, condition } => {
916                pattern.collect_variables(vars);
917                condition.collect_variables(vars);
918            }
919            Algebra::Values { variables, .. } => {
920                vars.extend(variables.clone());
921            }
922            Algebra::Table | Algebra::Zero | Algebra::Empty => {}
923        }
924    }
925}
926
927impl TriplePattern {
928    /// Create a new triple pattern
929    pub fn new(subject: Term, predicate: Term, object: Term) -> Self {
930        TriplePattern {
931            subject,
932            predicate,
933            object,
934        }
935    }
936
937    fn collect_variables(&self, vars: &mut Vec<Variable>) {
938        self.subject.collect_variables(vars);
939        self.predicate.collect_variables(vars);
940        self.object.collect_variables(vars);
941    }
942
943    /// Returns all variables in this triple pattern
944    pub fn variables(&self) -> Vec<Variable> {
945        let mut vars = Vec::new();
946        self.collect_variables(&mut vars);
947        vars
948    }
949}
950
951impl Term {
952    fn collect_variables(&self, vars: &mut Vec<Variable>) {
953        if let Term::Variable(var) = self {
954            vars.push(var.clone());
955        }
956    }
957}
958
959impl Expression {
960    fn collect_variables(&self, vars: &mut Vec<Variable>) {
961        match self {
962            Expression::Variable(var) => vars.push(var.clone()),
963            Expression::Function { args, .. } => {
964                for arg in args {
965                    arg.collect_variables(vars);
966                }
967            }
968            Expression::Binary { left, right, .. } => {
969                left.collect_variables(vars);
970                right.collect_variables(vars);
971            }
972            Expression::Unary { operand, .. } => {
973                operand.collect_variables(vars);
974            }
975            Expression::Conditional {
976                condition,
977                then_expr,
978                else_expr,
979            } => {
980                condition.collect_variables(vars);
981                then_expr.collect_variables(vars);
982                else_expr.collect_variables(vars);
983            }
984            Expression::Bound(var) => vars.push(var.clone()),
985            Expression::Exists(algebra) | Expression::NotExists(algebra) => {
986                algebra.collect_variables(vars);
987            }
988            Expression::Literal(_) | Expression::Iri(_) => {}
989        }
990    }
991}
992
993impl Aggregate {
994    fn collect_variables(&self, vars: &mut Vec<Variable>) {
995        match self {
996            Aggregate::Count {
997                expr: Some(expr), ..
998            }
999            | Aggregate::Sum { expr, .. }
1000            | Aggregate::Min { expr, .. }
1001            | Aggregate::Max { expr, .. }
1002            | Aggregate::Avg { expr, .. }
1003            | Aggregate::Sample { expr, .. }
1004            | Aggregate::GroupConcat { expr, .. } => {
1005                expr.collect_variables(vars);
1006            }
1007            Aggregate::Count { expr: None, .. } => {}
1008        }
1009    }
1010}
1011
1012/// Convenience macros for building algebra expressions
1013#[macro_export]
1014macro_rules! triple {
1015    ($s:expr_2021, $p:expr_2021, $o:expr_2021) => {
1016        TriplePattern::new($s, $p, $o)
1017    };
1018}
1019
1020#[macro_export]
1021macro_rules! var {
1022    ($name:expr_2021) => {
1023        Term::Variable($name.to_string())
1024    };
1025}
1026
1027#[macro_export]
1028macro_rules! iri {
1029    ($iri:expr_2021) => {
1030        Term::Iri(NamedNode::new($iri).expect("macro argument should be valid IRI"))
1031    };
1032}
1033
1034#[macro_export]
1035macro_rules! literal {
1036    ($value:expr_2021) => {
1037        Term::Literal(Literal::new($value.to_string(), None, None))
1038    };
1039    ($value:expr_2021, lang: $lang:expr_2021) => {
1040        Term::Literal(Literal::new(
1041            $value.to_string(),
1042            Some($lang.to_string()),
1043            None,
1044        ))
1045    };
1046    ($value:expr_2021, datatype: $dt:expr_2021) => {
1047        Term::Literal(Literal::new(
1048            $value.to_string(),
1049            None,
1050            Some(NamedNode::new($dt).expect("macro datatype argument should be valid IRI")),
1051        ))
1052    };
1053}
1054
1055impl Default for ServiceCapabilities {
1056    fn default() -> Self {
1057        Self {
1058            supports_projection: true,
1059            supports_filtering: true,
1060            supports_ordering: false,
1061            supports_aggregation: false,
1062            max_query_size: None,
1063        }
1064    }
1065}
1066
1067impl Literal {
1068    /// Create a new literal with value only
1069    pub fn new(value: String, language: Option<String>, datatype: Option<Iri>) -> Self {
1070        Literal {
1071            value,
1072            language,
1073            datatype,
1074        }
1075    }
1076
1077    /// Create a simple string literal
1078    pub fn string(value: impl Into<String>) -> Self {
1079        Literal {
1080            value: value.into(),
1081            language: None,
1082            datatype: None,
1083        }
1084    }
1085
1086    /// Create a language-tagged literal
1087    pub fn lang_string(value: impl Into<String>, language: impl Into<String>) -> Self {
1088        Literal {
1089            value: value.into(),
1090            language: Some(language.into()),
1091            datatype: None,
1092        }
1093    }
1094
1095    /// Create a typed literal
1096    pub fn typed(value: impl Into<String>, datatype: Iri) -> Self {
1097        Literal {
1098            value: value.into(),
1099            language: None,
1100            datatype: Some(datatype),
1101        }
1102    }
1103
1104    /// Create an integer literal
1105    pub fn integer(value: i64) -> Self {
1106        Literal::typed(
1107            value.to_string(),
1108            NamedNode::new("http://www.w3.org/2001/XMLSchema#integer")
1109                .expect("XSD URI is well-formed and valid"),
1110        )
1111    }
1112
1113    /// Create a decimal literal
1114    pub fn decimal(value: f64) -> Self {
1115        Literal::typed(
1116            value.to_string(),
1117            NamedNode::new("http://www.w3.org/2001/XMLSchema#decimal")
1118                .expect("XSD URI is well-formed and valid"),
1119        )
1120    }
1121
1122    /// Create a boolean literal
1123    pub fn boolean(value: bool) -> Self {
1124        Literal::typed(
1125            value.to_string(),
1126            NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
1127                .expect("XSD URI is well-formed and valid"),
1128        )
1129    }
1130
1131    /// Create a date literal
1132    pub fn date(value: impl Into<String>) -> Self {
1133        Literal::typed(
1134            value.into(),
1135            NamedNode::new("http://www.w3.org/2001/XMLSchema#date")
1136                .expect("XSD URI is well-formed and valid"),
1137        )
1138    }
1139
1140    /// Create a datetime literal
1141    pub fn datetime(value: impl Into<String>) -> Self {
1142        Literal::typed(
1143            value.into(),
1144            NamedNode::new("http://www.w3.org/2001/XMLSchema#dateTime")
1145                .expect("XSD URI is well-formed and valid"),
1146        )
1147    }
1148
1149    /// Get the effective datatype (with default string type if none specified)
1150    pub fn effective_datatype(&self) -> Iri {
1151        if let Some(ref dt) = self.datatype {
1152            dt.clone()
1153        } else if self.language.is_some() {
1154            NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
1155                .expect("RDF URI is well-formed and valid")
1156        } else {
1157            NamedNode::new("http://www.w3.org/2001/XMLSchema#string")
1158                .expect("XSD URI is well-formed and valid")
1159        }
1160    }
1161
1162    /// Check if this is a numeric literal
1163    pub fn is_numeric(&self) -> bool {
1164        if let Some(ref dt) = self.datatype {
1165            matches!(
1166                dt.as_str(),
1167                "http://www.w3.org/2001/XMLSchema#integer"
1168                    | "http://www.w3.org/2001/XMLSchema#decimal"
1169                    | "http://www.w3.org/2001/XMLSchema#float"
1170                    | "http://www.w3.org/2001/XMLSchema#double"
1171                    | "http://www.w3.org/2001/XMLSchema#long"
1172                    | "http://www.w3.org/2001/XMLSchema#int"
1173                    | "http://www.w3.org/2001/XMLSchema#short"
1174                    | "http://www.w3.org/2001/XMLSchema#byte"
1175                    | "http://www.w3.org/2001/XMLSchema#unsignedLong"
1176                    | "http://www.w3.org/2001/XMLSchema#unsignedInt"
1177                    | "http://www.w3.org/2001/XMLSchema#unsignedShort"
1178                    | "http://www.w3.org/2001/XMLSchema#unsignedByte"
1179                    | "http://www.w3.org/2001/XMLSchema#positiveInteger"
1180                    | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger"
1181                    | "http://www.w3.org/2001/XMLSchema#negativeInteger"
1182                    | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
1183            )
1184        } else {
1185            false
1186        }
1187    }
1188
1189    /// Check if this is a string literal
1190    pub fn is_string(&self) -> bool {
1191        self.datatype.is_none() && self.language.is_none()
1192    }
1193
1194    /// Check if this is a language-tagged literal
1195    pub fn is_lang_string(&self) -> bool {
1196        self.language.is_some()
1197    }
1198
1199    /// Check if this is a boolean literal
1200    pub fn is_boolean(&self) -> bool {
1201        if let Some(ref dt) = self.datatype {
1202            dt.as_str() == "http://www.w3.org/2001/XMLSchema#boolean"
1203        } else {
1204            false
1205        }
1206    }
1207
1208    /// Check if this is a date/time literal
1209    pub fn is_datetime(&self) -> bool {
1210        if let Some(ref dt) = self.datatype {
1211            matches!(
1212                dt.as_str(),
1213                "http://www.w3.org/2001/XMLSchema#date"
1214                    | "http://www.w3.org/2001/XMLSchema#dateTime"
1215                    | "http://www.w3.org/2001/XMLSchema#time"
1216                    | "http://www.w3.org/2001/XMLSchema#gYear"
1217                    | "http://www.w3.org/2001/XMLSchema#gYearMonth"
1218                    | "http://www.w3.org/2001/XMLSchema#gMonth"
1219                    | "http://www.w3.org/2001/XMLSchema#gMonthDay"
1220                    | "http://www.w3.org/2001/XMLSchema#gDay"
1221                    | "http://www.w3.org/2001/XMLSchema#duration"
1222                    | "http://www.w3.org/2001/XMLSchema#dayTimeDuration"
1223                    | "http://www.w3.org/2001/XMLSchema#yearMonthDuration"
1224            )
1225        } else {
1226            false
1227        }
1228    }
1229}
1230
1231impl Default for Statistics {
1232    fn default() -> Self {
1233        Self {
1234            cardinality: 0,
1235            selectivity: 1.0,
1236            available_indexes: Vec::new(),
1237            cost: 0.0,
1238            memory_estimate: 0,
1239            io_estimate: 0,
1240        }
1241    }
1242}
1243
1244impl Statistics {
1245    /// Create statistics with estimated cardinality
1246    pub fn with_cardinality(cardinality: u64) -> Self {
1247        Self {
1248            cardinality,
1249            selectivity: 1.0,
1250            available_indexes: Vec::new(),
1251            cost: cardinality as f64,
1252            memory_estimate: cardinality * 64, // Rough estimate
1253            io_estimate: cardinality / 1000,   // Pages
1254        }
1255    }
1256
1257    /// Update statistics with selectivity factor
1258    pub fn with_selectivity(mut self, selectivity: f64) -> Self {
1259        self.selectivity = selectivity.clamp(0.0, 1.0);
1260        self.cardinality = (self.cardinality as f64 * self.selectivity) as u64;
1261        self.cost *= self.selectivity;
1262        self
1263    }
1264
1265    /// Add available index
1266    pub fn with_index(mut self, index: IndexType) -> Self {
1267        self.available_indexes.push(index);
1268        // Reduce cost if good indexes are available
1269        self.cost *= 0.8;
1270        self
1271    }
1272
1273    /// Combine statistics (for joins)
1274    pub fn combine(&self, other: &Statistics) -> Self {
1275        Self {
1276            cardinality: self.cardinality * other.cardinality,
1277            selectivity: self.selectivity * other.selectivity,
1278            available_indexes: self
1279                .available_indexes
1280                .iter()
1281                .chain(other.available_indexes.iter())
1282                .cloned()
1283                .collect(),
1284            cost: self.cost + other.cost,
1285            memory_estimate: self.memory_estimate + other.memory_estimate,
1286            io_estimate: self.io_estimate + other.io_estimate,
1287        }
1288    }
1289}
1290
1291impl OptimizationHints {
1292    /// Create hints for BGP patterns
1293    pub fn for_bgp(patterns: &[TriplePattern]) -> Self {
1294        let mut hints = OptimizationHints::default();
1295
1296        // Estimate based on pattern complexity
1297        let cardinality = match patterns.len() {
1298            0 => 0,
1299            1 => 1000,                         // Single pattern estimate
1300            n => 1000 / (n as u64 * n as u64), // Selectivity decreases with more patterns
1301        };
1302
1303        hints.statistics = Some(Statistics::with_cardinality(cardinality));
1304
1305        // Suggest indexes based on pattern structure
1306        for pattern in patterns {
1307            if let Term::Variable(_) = pattern.subject {
1308                hints.preferred_indexes.push(IndexType::PredicateIndex);
1309            }
1310            if let Term::Variable(_) = pattern.predicate {
1311                hints.preferred_indexes.push(IndexType::SubjectIndex);
1312            }
1313            if let Term::Variable(_) = pattern.object {
1314                hints
1315                    .preferred_indexes
1316                    .push(IndexType::SubjectPredicateIndex);
1317            }
1318        }
1319
1320        hints
1321    }
1322
1323    /// Create hints for join operations
1324    pub fn for_join(left_hints: &OptimizationHints, right_hints: &OptimizationHints) -> Self {
1325        let mut hints = OptimizationHints::default();
1326
1327        // Combine statistics
1328        if let (Some(left_stats), Some(right_stats)) =
1329            (&left_hints.statistics, &right_hints.statistics)
1330        {
1331            hints.statistics = Some(left_stats.combine(right_stats));
1332
1333            // Choose join algorithm based on cardinalities
1334            hints.join_algorithm = Some(match (left_stats.cardinality, right_stats.cardinality) {
1335                (l, r) if l < 1000 && r < 1000 => JoinAlgorithm::NestedLoopJoin,
1336                (l, r) if l > 100000 || r > 100000 => JoinAlgorithm::SortMergeJoin,
1337                _ => JoinAlgorithm::HashJoin,
1338            });
1339        }
1340
1341        // Inherit index preferences
1342        hints.preferred_indexes = left_hints
1343            .preferred_indexes
1344            .iter()
1345            .chain(right_hints.preferred_indexes.iter())
1346            .cloned()
1347            .collect();
1348
1349        hints
1350    }
1351
1352    /// Create hints for filter operations
1353    pub fn for_filter(pattern_hints: &OptimizationHints, condition: &Expression) -> Self {
1354        let mut hints = pattern_hints.clone();
1355
1356        // Apply filter selectivity
1357        if let Some(ref mut stats) = hints.statistics {
1358            let filter_selectivity = estimate_filter_selectivity(condition);
1359            *stats = stats.clone().with_selectivity(filter_selectivity);
1360        }
1361
1362        // Suggest early filter placement for selective filters
1363        hints.filter_placement = if estimate_filter_selectivity(condition) < 0.1 {
1364            FilterPlacement::Early
1365        } else {
1366            FilterPlacement::Optimal
1367        };
1368
1369        hints
1370    }
1371}
1372
1373impl AnnotatedAlgebra {
1374    /// Create annotated algebra with default hints
1375    pub fn new(algebra: Algebra) -> Self {
1376        let hints = match &algebra {
1377            Algebra::Bgp(patterns) => OptimizationHints::for_bgp(patterns),
1378            Algebra::Join { left: _, right: _ } => {
1379                // For now, use default hints - in practice, we'd analyze the children
1380                OptimizationHints::default()
1381            }
1382            Algebra::Filter { .. } => OptimizationHints::default(),
1383            _ => OptimizationHints::default(),
1384        };
1385
1386        Self {
1387            algebra,
1388            hints,
1389            context: None,
1390        }
1391    }
1392
1393    /// Create annotated algebra with custom hints
1394    pub fn with_hints(algebra: Algebra, hints: OptimizationHints) -> Self {
1395        Self {
1396            algebra,
1397            hints,
1398            context: None,
1399        }
1400    }
1401
1402    /// Add execution context
1403    pub fn with_context(mut self, context: String) -> Self {
1404        self.context = Some(context);
1405        self
1406    }
1407
1408    /// Get estimated cost
1409    pub fn estimated_cost(&self) -> f64 {
1410        self.hints
1411            .statistics
1412            .as_ref()
1413            .map(|s| s.cost)
1414            .unwrap_or(0.0)
1415    }
1416
1417    /// Get estimated cardinality
1418    pub fn estimated_cardinality(&self) -> u64 {
1419        self.hints
1420            .statistics
1421            .as_ref()
1422            .map(|s| s.cardinality)
1423            .unwrap_or(0)
1424    }
1425}
1426
1427/// Estimate selectivity of a filter condition (rough heuristic)
1428fn estimate_filter_selectivity(condition: &Expression) -> f64 {
1429    match condition {
1430        Expression::Binary { op, .. } => match op {
1431            BinaryOperator::Equal => 0.01,    // Very selective
1432            BinaryOperator::NotEqual => 0.99, // Not selective
1433            BinaryOperator::Less
1434            | BinaryOperator::LessEqual
1435            | BinaryOperator::Greater
1436            | BinaryOperator::GreaterEqual => 0.33, // Range
1437            BinaryOperator::And => 0.25,      // Compound - more selective
1438            BinaryOperator::Or => 0.75,       // Compound - less selective
1439            _ => 0.5,                         // Default
1440        },
1441        Expression::Function { name, .. } => match name.as_str() {
1442            "regex" | "contains" => 0.2,              // Text search
1443            "bound" => 0.8,                           // Usually true
1444            "isIRI" | "isLiteral" | "isBlank" => 0.3, // Type checks
1445            _ => 0.5,                                 // Default
1446        },
1447        Expression::Unary {
1448            op: UnaryOperator::Not,
1449            ..
1450        } => 0.5, // Invert selectivity (simplified)
1451        Expression::Unary { .. } => 0.5,
1452        _ => 0.5, // Default selectivity
1453    }
1454}
1455
1456/// Evaluation context for query execution
1457#[derive(Debug, Clone, Default)]
1458pub struct EvaluationContext {
1459    /// Variable bindings
1460    pub bindings: HashMap<Variable, Term>,
1461    /// Dataset being queried
1462    pub dataset: Option<String>,
1463    /// Query execution options
1464    pub options: HashMap<String, String>,
1465}
1466
1467#[cfg(test)]
1468mod algebra_tests;