#[cfg(feature = "alloc")]
use alloc::{boxed::Box, vec::Vec};
use serde::{Deserialize, Serialize};
mod parser;
mod property_path;
mod scanner;
pub(crate) use parser::compute_ast_features;
pub(crate) use property_path::parse_property_path;
#[derive(Debug, Clone, Copy, Default, PartialEq, Serialize, Deserialize)]
pub struct SparqlAstFeatures {
pub join_depth: f32,
pub optional_count: f32,
pub filter_count: f32,
pub union_branch_count: f32,
pub has_distinct: f32,
pub has_having: f32,
pub subquery_count: f32,
pub path_expr_count: f32,
pub literal_count: f32,
pub blank_node_count: f32,
}
impl SparqlAstFeatures {
#[must_use]
pub fn clamp(self) -> Self {
Self {
join_depth: self.join_depth.clamp(0.0_f32, 1.0_f32),
optional_count: self.optional_count.clamp(0.0_f32, 1.0_f32),
filter_count: self.filter_count.clamp(0.0_f32, 1.0_f32),
union_branch_count: self.union_branch_count.clamp(0.0_f32, 1.0_f32),
has_distinct: self.has_distinct.clamp(0.0_f32, 1.0_f32),
has_having: self.has_having.clamp(0.0_f32, 1.0_f32),
subquery_count: self.subquery_count.clamp(0.0_f32, 1.0_f32),
path_expr_count: self.path_expr_count.clamp(0.0_f32, 1.0_f32),
literal_count: self.literal_count.clamp(0.0_f32, 1.0_f32),
blank_node_count: self.blank_node_count.clamp(0.0_f32, 1.0_f32),
}
}
}
#[derive(Debug, Clone)]
pub(crate) enum GraphPattern {
Bgp {
triples: u32,
literals: u32,
blank_nodes: u32,
},
Optional(Vec<GraphPattern>),
Union(Vec<Vec<GraphPattern>>),
Filter,
GroupBy,
Having,
Subquery(Box<SparqlAst>),
Service(Vec<GraphPattern>),
Bind,
Values,
}
#[derive(Debug, Clone, Default)]
pub(crate) struct SparqlAst {
pub has_distinct: bool,
pub has_reduced: bool,
pub patterns: Vec<GraphPattern>,
pub has_having: bool,
pub group_by_count: u32,
pub order_by_count: u32,
pub has_limit: bool,
pub path_count: u32,
}
#[cfg(test)]
mod tests {
use super::property_path::parse_property_path;
use super::*;
#[test]
fn test_empty_query_features() {
let features = compute_ast_features("");
assert_eq!(features.optional_count, 0.0);
assert_eq!(features.filter_count, 0.0);
assert_eq!(features.union_branch_count, 0.0);
assert_eq!(features.has_distinct, 0.0);
assert_eq!(features.has_having, 0.0);
assert_eq!(features.subquery_count, 0.0);
}
#[test]
fn test_optional_detected() {
let sparql = "SELECT ?s WHERE { ?s ?p ?o . OPTIONAL { ?s a <http://example.org/T> } }";
let features = compute_ast_features(sparql);
assert!(
features.optional_count > 0.0,
"expected optional_count > 0, got {}",
features.optional_count
);
}
#[test]
fn test_filter_detected() {
let sparql = "SELECT ?s WHERE { ?s ?p ?o . FILTER(?o > 5) }";
let features = compute_ast_features(sparql);
assert!(
features.filter_count > 0.0,
"expected filter_count > 0, got {}",
features.filter_count
);
}
#[test]
fn test_union_detected() {
let sparql =
"SELECT ?s WHERE { { ?s <http://a.org/p> ?o } UNION { ?s <http://b.org/p> ?o } }";
let features = compute_ast_features(sparql);
assert!(
features.union_branch_count > 0.0,
"expected union_branch_count > 0, got {}",
features.union_branch_count
);
}
#[test]
fn test_distinct_detected() {
let sparql = "SELECT DISTINCT ?s WHERE { ?s ?p ?o }";
let features = compute_ast_features(sparql);
assert_eq!(
features.has_distinct, 1.0,
"expected has_distinct = 1.0, got {}",
features.has_distinct
);
}
#[test]
fn test_nested_optional_depth() {
let sparql = "SELECT ?s WHERE { ?s ?p ?o . OPTIONAL { ?s ?q ?r . OPTIONAL { ?r ?t ?u } } }";
let features = compute_ast_features(sparql);
assert!(
features.join_depth > 0.0,
"expected join_depth > 0, got {}",
features.join_depth
);
assert!(
features.optional_count > 0.0,
"expected optional_count > 0, got {}",
features.optional_count
);
}
#[test]
fn test_subquery_detected() {
let sparql = r#"
SELECT ?s ?count WHERE {
?s ?p ?o .
{ SELECT ?s (COUNT(?o) AS ?count) WHERE { ?s ?p ?o } GROUP BY ?s }
}
"#;
let features = compute_ast_features(sparql);
assert!(
features.subquery_count > 0.0,
"expected subquery_count > 0, got {}",
features.subquery_count
);
}
#[test]
fn test_literal_count() {
let sparql = r#"SELECT ?s WHERE { ?s <http://schema.org/name> "hello" }"#;
let features = compute_ast_features(sparql);
assert!(
features.literal_count > 0.0,
"expected literal_count > 0, got {}",
features.literal_count
);
}
#[test]
fn test_having_detected() {
let sparql = r#"
SELECT ?s (COUNT(?o) AS ?count) WHERE { ?s ?p ?o }
GROUP BY ?s HAVING (COUNT(?o) > 5)
"#;
let features = compute_ast_features(sparql);
assert_eq!(
features.has_having, 1.0,
"expected has_having = 1.0, got {}",
features.has_having
);
}
#[test]
fn test_clamp() {
let f = SparqlAstFeatures {
join_depth: 2.0,
optional_count: -1.0,
filter_count: 0.5,
union_branch_count: 1.5,
has_distinct: 0.0,
has_having: 1.0,
subquery_count: 0.0,
path_expr_count: 0.3,
literal_count: 0.9,
blank_node_count: -0.1,
};
let clamped = f.clamp();
assert_eq!(clamped.join_depth, 1.0);
assert_eq!(clamped.optional_count, 0.0);
assert_eq!(clamped.union_branch_count, 1.0);
assert_eq!(clamped.blank_node_count, 0.0);
}
#[test]
fn test_path_inverse_base_iris() {
let path = parse_property_path("^foaf:knows");
let iris = path.base_iris();
assert!(
iris.contains(&"foaf:knows".to_owned()),
"expected 'foaf:knows' in {:?}",
iris
);
}
#[test]
fn test_path_sequence_base_iris() {
let path = parse_property_path("foaf:knows/foaf:name");
let iris = path.base_iris();
assert!(
iris.contains(&"foaf:knows".to_owned()) && iris.contains(&"foaf:name".to_owned()),
"expected both foaf:knows and foaf:name in {:?}",
iris
);
}
#[test]
fn test_path_alternative_base_iris() {
let path = parse_property_path("foaf:knows|foaf:friend");
let iris = path.base_iris();
assert!(
iris.contains(&"foaf:knows".to_owned()) && iris.contains(&"foaf:friend".to_owned()),
"expected both foaf:knows and foaf:friend in {:?}",
iris
);
}
#[test]
fn test_path_zero_or_more_base_iris() {
let path = parse_property_path("foaf:knows*");
let iris = path.base_iris();
assert!(
iris.contains(&"foaf:knows".to_owned()),
"expected 'foaf:knows' in {:?}",
iris
);
}
#[test]
fn test_path_nested_quantifier_base_iris() {
let path = parse_property_path("(foaf:knows/foaf:name)+");
let iris = path.base_iris();
assert!(
iris.contains(&"foaf:knows".to_owned()) && iris.contains(&"foaf:name".to_owned()),
"expected both foaf:knows and foaf:name in {:?}",
iris
);
}
#[test]
fn test_path_negated_set_base_iris() {
let path = parse_property_path("!(foaf:knows|rdfs:type)");
let iris = path.base_iris();
assert!(
iris.contains(&"foaf:knows".to_owned()) && iris.contains(&"rdfs:type".to_owned()),
"expected both foaf:knows and rdfs:type in {:?}",
iris
);
}
}