Skip to main content

oxirs_core/query/
algebra.rs

1//! SPARQL 1.1 Query Algebra representation
2//!
3//! Based on the W3C SPARQL 1.1 Query specification:
4//! <https://www.w3.org/TR/sparql11-query/#sparqlQuery>
5
6use crate::model::*;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::fmt;
10
11// Re-export TriplePattern from model for public access
12pub use crate::model::pattern::TriplePattern;
13
14/// A property path expression for navigating RDF graphs
15#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
16pub enum PropertyPath {
17    /// A simple predicate path
18    Predicate(NamedNode),
19    /// Inverse path: ^path
20    Inverse(Box<PropertyPath>),
21    /// Sequence path: path1 / path2
22    Sequence(Box<PropertyPath>, Box<PropertyPath>),
23    /// Alternative path: path1 | path2
24    Alternative(Box<PropertyPath>, Box<PropertyPath>),
25    /// Zero or more: path*
26    ZeroOrMore(Box<PropertyPath>),
27    /// One or more: path+
28    OneOrMore(Box<PropertyPath>),
29    /// Zero or one: path?
30    ZeroOrOne(Box<PropertyPath>),
31    /// Negated property set: !(p1 | p2 | ...)
32    NegatedPropertySet(Vec<NamedNode>),
33}
34
35impl fmt::Display for PropertyPath {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            PropertyPath::Predicate(p) => write!(f, "{p}"),
39            PropertyPath::Inverse(p) => write!(f, "^{p}"),
40            PropertyPath::Sequence(a, b) => write!(f, "({a} / {b})"),
41            PropertyPath::Alternative(a, b) => write!(f, "({a} | {b})"),
42            PropertyPath::ZeroOrMore(p) => write!(f, "({p})*"),
43            PropertyPath::OneOrMore(p) => write!(f, "({p})+"),
44            PropertyPath::ZeroOrOne(p) => write!(f, "({p})?"),
45            PropertyPath::NegatedPropertySet(ps) => {
46                write!(f, "!(")?;
47                for (i, p) in ps.iter().enumerate() {
48                    if i > 0 {
49                        write!(f, " | ")?;
50                    }
51                    write!(f, "{p}")?;
52                }
53                write!(f, ")")
54            }
55        }
56    }
57}
58
59/// A SPARQL expression
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61pub enum Expression {
62    /// A constant term
63    Term(Term),
64    /// A variable
65    Variable(Variable),
66    /// Logical AND
67    And(Box<Expression>, Box<Expression>),
68    /// Logical OR
69    Or(Box<Expression>, Box<Expression>),
70    /// Logical NOT
71    Not(Box<Expression>),
72    /// Equality: =
73    Equal(Box<Expression>, Box<Expression>),
74    /// Inequality: !=
75    NotEqual(Box<Expression>, Box<Expression>),
76    /// Less than: <
77    Less(Box<Expression>, Box<Expression>),
78    /// Less than or equal: <=
79    LessOrEqual(Box<Expression>, Box<Expression>),
80    /// Greater than: >
81    Greater(Box<Expression>, Box<Expression>),
82    /// Greater than or equal: >=
83    GreaterOrEqual(Box<Expression>, Box<Expression>),
84    /// Addition: +
85    Add(Box<Expression>, Box<Expression>),
86    /// Subtraction: -
87    Subtract(Box<Expression>, Box<Expression>),
88    /// Multiplication: *
89    Multiply(Box<Expression>, Box<Expression>),
90    /// Division: /
91    Divide(Box<Expression>, Box<Expression>),
92    /// Unary plus: +expr
93    UnaryPlus(Box<Expression>),
94    /// Unary minus: -expr
95    UnaryMinus(Box<Expression>),
96    /// IN expression
97    In(Box<Expression>, Vec<Expression>),
98    /// NOT IN expression
99    NotIn(Box<Expression>, Vec<Expression>),
100    /// EXISTS pattern
101    Exists(Box<GraphPattern>),
102    /// NOT EXISTS pattern
103    NotExists(Box<GraphPattern>),
104    /// Function call
105    FunctionCall(Function, Vec<Expression>),
106    /// Bound variable test
107    Bound(Variable),
108    /// IF expression
109    If(Box<Expression>, Box<Expression>, Box<Expression>),
110    /// COALESCE expression
111    Coalesce(Vec<Expression>),
112    /// Literal value
113    Literal(crate::model::Literal),
114    /// Test if term is IRI
115    IsIri(Box<Expression>),
116    /// Test if term is blank node
117    IsBlank(Box<Expression>),
118    /// Test if term is literal
119    IsLiteral(Box<Expression>),
120    /// Test if term is numeric
121    IsNumeric(Box<Expression>),
122    /// String value of term
123    Str(Box<Expression>),
124    /// Regular expression matching
125    Regex(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
126}
127
128/// Built-in SPARQL functions
129#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
130pub enum Function {
131    // String functions
132    Str,
133    Lang,
134    LangMatches,
135    Datatype,
136    Iri,
137    Bnode,
138    StrDt,
139    StrLang,
140    StrLen,
141    SubStr,
142    UCase,
143    LCase,
144    StrStarts,
145    StrEnds,
146    Contains,
147    StrBefore,
148    StrAfter,
149    Encode,
150    Concat,
151    Replace,
152    Regex,
153
154    // Numeric functions
155    Abs,
156    Round,
157    Ceil,
158    Floor,
159    Rand,
160
161    // Date/Time functions
162    Now,
163    Year,
164    Month,
165    Day,
166    Hours,
167    Minutes,
168    Seconds,
169    Timezone,
170    Tz,
171
172    // Hash functions
173    Md5,
174    Sha1,
175    Sha256,
176    Sha384,
177    Sha512,
178
179    // Type checking
180    IsIri,
181    IsBlank,
182    IsLiteral,
183    IsNumeric,
184
185    // Custom function
186    Custom(NamedNode),
187}
188
189/// A triple pattern in SPARQL algebra (all positions must be specified)
190#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
191pub struct AlgebraTriplePattern {
192    pub subject: TermPattern,
193    pub predicate: TermPattern,
194    pub object: TermPattern,
195}
196
197impl AlgebraTriplePattern {
198    /// Create a new algebra triple pattern
199    pub fn new(subject: TermPattern, predicate: TermPattern, object: TermPattern) -> Self {
200        Self {
201            subject,
202            predicate,
203            object,
204        }
205    }
206
207    /// Formats using the SPARQL S-Expression syntax
208    pub fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result {
209        f.write_str("(triple ")?;
210        self.subject.fmt_sse(f)?;
211        f.write_str(" ")?;
212        self.predicate.fmt_sse(f)?;
213        f.write_str(" ")?;
214        self.object.fmt_sse(f)?;
215        f.write_str(")")
216    }
217}
218
219impl fmt::Display for AlgebraTriplePattern {
220    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221        write!(f, "{} {} {}", self.subject, self.predicate, self.object)
222    }
223}
224
225impl From<crate::model::pattern::TriplePattern> for Option<AlgebraTriplePattern> {
226    fn from(pattern: crate::model::pattern::TriplePattern) -> Self {
227        use crate::model::pattern::{ObjectPattern, PredicatePattern, SubjectPattern};
228
229        let subject = match pattern.subject? {
230            SubjectPattern::NamedNode(n) => TermPattern::NamedNode(n),
231            SubjectPattern::BlankNode(b) => TermPattern::BlankNode(b),
232            SubjectPattern::Variable(v) => TermPattern::Variable(v),
233        };
234
235        let predicate = match pattern.predicate? {
236            PredicatePattern::NamedNode(n) => TermPattern::NamedNode(n),
237            PredicatePattern::Variable(v) => TermPattern::Variable(v),
238        };
239
240        let object = match pattern.object? {
241            ObjectPattern::NamedNode(n) => TermPattern::NamedNode(n),
242            ObjectPattern::BlankNode(b) => TermPattern::BlankNode(b),
243            ObjectPattern::Literal(l) => TermPattern::Literal(l),
244            ObjectPattern::Variable(v) => TermPattern::Variable(v),
245        };
246
247        Some(AlgebraTriplePattern::new(subject, predicate, object))
248    }
249}
250
251/// A term pattern (can be a concrete term or variable)
252#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
253pub enum TermPattern {
254    NamedNode(NamedNode),
255    BlankNode(BlankNode),
256    Literal(Literal),
257    Variable(Variable),
258    /// Quoted triple pattern for RDF-star support
259    QuotedTriple(Box<AlgebraTriplePattern>),
260}
261
262impl From<Variable> for TermPattern {
263    fn from(v: Variable) -> Self {
264        TermPattern::Variable(v)
265    }
266}
267
268impl From<NamedNode> for TermPattern {
269    fn from(n: NamedNode) -> Self {
270        TermPattern::NamedNode(n)
271    }
272}
273
274impl From<BlankNode> for TermPattern {
275    fn from(b: BlankNode) -> Self {
276        TermPattern::BlankNode(b)
277    }
278}
279
280impl From<Literal> for TermPattern {
281    fn from(l: Literal) -> Self {
282        TermPattern::Literal(l)
283    }
284}
285
286impl TermPattern {
287    /// Check if this pattern is a variable
288    pub fn is_variable(&self) -> bool {
289        matches!(self, TermPattern::Variable(_))
290    }
291
292    /// Formats using the SPARQL S-Expression syntax
293    pub fn fmt_sse(&self, f: &mut impl fmt::Write) -> fmt::Result {
294        match self {
295            TermPattern::NamedNode(node) => write!(f, "{node}"),
296            TermPattern::BlankNode(node) => write!(f, "{node}"),
297            TermPattern::Literal(literal) => write!(f, "{literal}"),
298            TermPattern::Variable(var) => write!(f, "{var}"),
299            TermPattern::QuotedTriple(triple) => {
300                write!(f, "<<")?;
301                triple.subject.fmt_sse(f)?;
302                write!(f, " ")?;
303                triple.predicate.fmt_sse(f)?;
304                write!(f, " ")?;
305                triple.object.fmt_sse(f)?;
306                write!(f, ">>")
307            }
308        }
309    }
310}
311
312impl fmt::Display for TermPattern {
313    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
314        match self {
315            TermPattern::NamedNode(n) => write!(f, "{n}"),
316            TermPattern::BlankNode(b) => write!(f, "{b}"),
317            TermPattern::Literal(l) => write!(f, "{l}"),
318            TermPattern::Variable(v) => write!(f, "{v}"),
319            TermPattern::QuotedTriple(triple) => {
320                write!(
321                    f,
322                    "<<{} {} {}>>",
323                    triple.subject, triple.predicate, triple.object
324                )
325            }
326        }
327    }
328}
329
330/// A graph pattern
331#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
332pub enum GraphPattern {
333    /// Basic graph pattern (set of triple patterns)
334    Bgp(Vec<AlgebraTriplePattern>),
335    /// Path pattern
336    Path {
337        subject: TermPattern,
338        path: PropertyPath,
339        object: TermPattern,
340    },
341    /// Join of two patterns
342    Join(Box<GraphPattern>, Box<GraphPattern>),
343    /// Left join (OPTIONAL)
344    LeftJoin {
345        left: Box<GraphPattern>,
346        right: Box<GraphPattern>,
347        condition: Option<Expression>,
348    },
349    /// Filter pattern
350    Filter {
351        expr: Expression,
352        inner: Box<GraphPattern>,
353    },
354    /// Union of patterns
355    Union(Box<GraphPattern>, Box<GraphPattern>),
356    /// Graph pattern (GRAPH)
357    Graph {
358        graph_name: TermPattern,
359        inner: Box<GraphPattern>,
360    },
361    /// Service pattern (federated query)
362    Service {
363        service: TermPattern,
364        inner: Box<GraphPattern>,
365        silent: bool,
366    },
367    /// Group pattern
368    Group {
369        inner: Box<GraphPattern>,
370        variables: Vec<Variable>,
371        aggregates: Vec<(Variable, AggregateExpression)>,
372    },
373    /// Extend pattern (BIND)
374    Extend {
375        inner: Box<GraphPattern>,
376        variable: Variable,
377        expression: Expression,
378    },
379    /// Minus pattern
380    Minus(Box<GraphPattern>, Box<GraphPattern>),
381    /// Values pattern
382    Values {
383        variables: Vec<Variable>,
384        bindings: Vec<Vec<Option<Term>>>,
385    },
386    /// Order by pattern
387    OrderBy {
388        inner: Box<GraphPattern>,
389        order_by: Vec<OrderExpression>,
390    },
391    /// Project pattern
392    Project {
393        inner: Box<GraphPattern>,
394        variables: Vec<Variable>,
395    },
396    /// Distinct pattern
397    Distinct(Box<GraphPattern>),
398    /// Reduced pattern
399    Reduced(Box<GraphPattern>),
400    /// Slice pattern (LIMIT/OFFSET)
401    Slice {
402        inner: Box<GraphPattern>,
403        offset: usize,
404        limit: Option<usize>,
405    },
406}
407
408/// Aggregate expressions
409#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
410pub enum AggregateExpression {
411    Count {
412        expr: Option<Box<Expression>>,
413        distinct: bool,
414    },
415    Sum {
416        expr: Box<Expression>,
417        distinct: bool,
418    },
419    Avg {
420        expr: Box<Expression>,
421        distinct: bool,
422    },
423    Min {
424        expr: Box<Expression>,
425        distinct: bool,
426    },
427    Max {
428        expr: Box<Expression>,
429        distinct: bool,
430    },
431    GroupConcat {
432        expr: Box<Expression>,
433        distinct: bool,
434        separator: Option<String>,
435    },
436    Sample {
437        expr: Box<Expression>,
438        distinct: bool,
439    },
440}
441
442/// Order expression for ORDER BY
443#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
444pub enum OrderExpression {
445    Asc(Expression),
446    Desc(Expression),
447}
448
449/// SPARQL query forms
450#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
451pub enum QueryForm {
452    /// SELECT query
453    Select {
454        /// SELECT * or specific variables
455        variables: SelectVariables,
456        /// WHERE clause pattern
457        where_clause: GraphPattern,
458        /// Solution modifiers
459        distinct: bool,
460        reduced: bool,
461        order_by: Vec<OrderExpression>,
462        offset: usize,
463        limit: Option<usize>,
464    },
465    /// CONSTRUCT query
466    Construct {
467        /// Template for constructing triples
468        template: Vec<AlgebraTriplePattern>,
469        /// WHERE clause pattern
470        where_clause: GraphPattern,
471        /// Solution modifiers
472        order_by: Vec<OrderExpression>,
473        offset: usize,
474        limit: Option<usize>,
475    },
476    /// DESCRIBE query
477    Describe {
478        /// Resources to describe
479        resources: Vec<TermPattern>,
480        /// Optional WHERE clause
481        where_clause: Option<GraphPattern>,
482        /// Solution modifiers
483        order_by: Vec<OrderExpression>,
484        offset: usize,
485        limit: Option<usize>,
486    },
487    /// ASK query
488    Ask {
489        /// Pattern to check
490        where_clause: GraphPattern,
491    },
492}
493
494/// Variables selection in SELECT
495#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
496pub enum SelectVariables {
497    /// SELECT *
498    All,
499    /// SELECT ?var1 ?var2 ...
500    Specific(Vec<Variable>),
501}
502
503/// A complete SPARQL query
504#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
505pub struct Query {
506    /// Base IRI for relative IRI resolution
507    pub base: Option<NamedNode>,
508    /// Namespace prefixes
509    pub prefixes: HashMap<String, NamedNode>,
510    /// Query form
511    pub form: QueryForm,
512    /// Dataset specification
513    pub dataset: Dataset,
514}
515
516/// Dataset specification for a query
517#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
518pub struct Dataset {
519    /// Default graph IRIs (FROM)
520    pub default: Vec<NamedNode>,
521    /// Named graph IRIs (FROM NAMED)
522    pub named: Vec<NamedNode>,
523}
524
525/// SPARQL Update operations
526#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
527pub enum UpdateOperation {
528    /// INSERT DATA
529    InsertData { data: Vec<Quad> },
530    /// DELETE DATA
531    DeleteData { data: Vec<Quad> },
532    /// DELETE WHERE
533    DeleteWhere { pattern: Vec<QuadPattern> },
534    /// INSERT/DELETE with WHERE
535    Modify {
536        delete: Option<Vec<QuadPattern>>,
537        insert: Option<Vec<QuadPattern>>,
538        where_clause: Box<GraphPattern>,
539        using: Dataset,
540    },
541    /// LOAD
542    Load {
543        source: NamedNode,
544        destination: Option<NamedNode>,
545        silent: bool,
546    },
547    /// CLEAR
548    Clear { graph: GraphTarget, silent: bool },
549    /// CREATE
550    Create { graph: NamedNode, silent: bool },
551    /// DROP
552    Drop { graph: GraphTarget, silent: bool },
553    /// COPY
554    Copy {
555        source: GraphTarget,
556        destination: GraphTarget,
557        silent: bool,
558    },
559    /// MOVE
560    Move {
561        source: GraphTarget,
562        destination: GraphTarget,
563        silent: bool,
564    },
565    /// ADD
566    Add {
567        source: GraphTarget,
568        destination: GraphTarget,
569        silent: bool,
570    },
571}
572
573/// Graph targets for update operations
574#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
575pub enum GraphTarget {
576    Default,
577    Named(NamedNode),
578    All,
579}
580
581/// Quad pattern for updates
582#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
583pub struct QuadPattern {
584    pub subject: TermPattern,
585    pub predicate: TermPattern,
586    pub object: TermPattern,
587    pub graph: Option<TermPattern>,
588}
589
590/// A SPARQL Update request
591#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
592pub struct Update {
593    /// Base IRI for relative IRI resolution
594    pub base: Option<NamedNode>,
595    /// Namespace prefixes
596    pub prefixes: HashMap<String, NamedNode>,
597    /// Update operations
598    pub operations: Vec<UpdateOperation>,
599}