use pgrx::prelude::*;
use serde_json::{json, Value};
use spargebra::algebra::GraphPattern;
use spargebra::term::{NamedNodePattern, TermPattern, TriplePattern};
use spargebra::{Query, SparqlParser};
#[pg_extern]
fn sparql_parse(query: &str) -> pgrx::JsonB {
let parsed = SparqlParser::new()
.parse_query(query)
.unwrap_or_else(|e| panic!("sparql_parse: {e}"));
pgrx::JsonB(serialize_query(&parsed))
}
fn serialize_query(q: &Query) -> Value {
match q {
Query::Select { pattern, .. } => {
let (vars, bgp, unsupported) = walk_select_pattern(pattern);
json!({
"form": "SELECT",
"variables": vars,
"bgp_pattern_count": bgp.len(),
"bgp_patterns": bgp,
"unsupported_algebra": unsupported,
})
}
Query::Construct { .. } => {
json!({ "form": "CONSTRUCT", "supported": false,
"reason": "CONSTRUCT not in Phase 2.2 scope" })
}
Query::Ask { pattern, .. } => {
let (_vars, bgp, unsupported) = walk_select_pattern(pattern);
json!({
"form": "ASK",
"bgp_pattern_count": bgp.len(),
"bgp_patterns": bgp,
"unsupported_algebra": unsupported,
})
}
Query::Describe { .. } => {
json!({ "form": "DESCRIBE", "supported": false,
"reason": "DESCRIBE not in Phase 2.2 scope" })
}
}
}
fn walk_select_pattern(pattern: &GraphPattern) -> (Vec<String>, Vec<Value>, Vec<&'static str>) {
let mut vars: Vec<String> = Vec::new();
let mut bgp: Vec<Value> = Vec::new();
let mut unsupported: Vec<&'static str> = Vec::new();
walk(pattern, &mut vars, &mut bgp, &mut unsupported);
(vars, bgp, unsupported)
}
fn walk(
p: &GraphPattern,
vars: &mut Vec<String>,
bgp: &mut Vec<Value>,
unsupported: &mut Vec<&'static str>,
) {
match p {
GraphPattern::Project { inner, variables } => {
for v in variables {
let name = v.as_str().to_string();
if !vars.contains(&name) {
vars.push(name);
}
}
walk(inner, vars, bgp, unsupported);
}
GraphPattern::Bgp { patterns } => {
for tp in patterns {
bgp.push(triple_to_json(tp));
collect_vars(tp, vars);
}
}
GraphPattern::Distinct { inner } => walk(inner, vars, bgp, unsupported),
GraphPattern::Reduced { inner } => walk(inner, vars, bgp, unsupported),
GraphPattern::Slice { inner, .. } => walk(inner, vars, bgp, unsupported),
GraphPattern::OrderBy { inner, .. } => walk(inner, vars, bgp, unsupported),
GraphPattern::Filter { inner, .. } => walk(inner, vars, bgp, unsupported),
GraphPattern::LeftJoin { left, right, .. } => {
walk(left, vars, bgp, unsupported);
walk(right, vars, bgp, unsupported);
}
GraphPattern::Union { left, right } => {
walk(left, vars, bgp, unsupported);
walk(right, vars, bgp, unsupported);
}
GraphPattern::Minus { left, right } => {
walk(left, vars, bgp, unsupported);
walk(right, vars, bgp, unsupported);
}
GraphPattern::Join { .. } => unsupported.push("Join (non-BGP)"),
GraphPattern::Graph { .. } => unsupported.push("Graph (named graph clause)"),
GraphPattern::Group { inner, .. } => {
walk(inner, vars, bgp, unsupported);
}
GraphPattern::Extend { inner, .. } => {
walk(inner, vars, bgp, unsupported);
}
GraphPattern::Path { .. } => unsupported.push("Path (property path)"),
GraphPattern::Values { .. } => unsupported.push("Values (inline VALUES)"),
GraphPattern::Service { .. } => unsupported.push("Service (federation)"),
_ => unsupported.push("other"),
}
}
fn collect_vars(tp: &TriplePattern, out: &mut Vec<String>) {
if let TermPattern::Variable(v) = &tp.subject {
let n = v.as_str().to_string();
if !out.contains(&n) {
out.push(n);
}
}
if let NamedNodePattern::Variable(v) = &tp.predicate {
let n = v.as_str().to_string();
if !out.contains(&n) {
out.push(n);
}
}
if let TermPattern::Variable(v) = &tp.object {
let n = v.as_str().to_string();
if !out.contains(&n) {
out.push(n);
}
}
}
fn triple_to_json(tp: &TriplePattern) -> Value {
json!({
"s": term_pattern_to_json(&tp.subject),
"p": named_node_pattern_to_json(&tp.predicate),
"o": term_pattern_to_json(&tp.object),
})
}
fn term_pattern_to_json(t: &TermPattern) -> Value {
match t {
TermPattern::Variable(v) => json!({ "var": v.as_str() }),
TermPattern::NamedNode(n) => json!({ "iri": n.as_str() }),
TermPattern::BlankNode(b) => json!({ "bnode": b.as_str() }),
TermPattern::Literal(l) => {
let mut obj = serde_json::Map::new();
obj.insert("literal".into(), Value::String(l.value().to_string()));
if let Some(lang) = l.language() {
obj.insert("lang".into(), Value::String(lang.to_string()));
} else {
obj.insert(
"datatype".into(),
Value::String(l.datatype().as_str().to_string()),
);
}
Value::Object(obj)
}
_ => json!({ "unsupported": format!("{:?}", t) }),
}
}
fn named_node_pattern_to_json(n: &NamedNodePattern) -> Value {
match n {
NamedNodePattern::NamedNode(nn) => json!({ "iri": nn.as_str() }),
NamedNodePattern::Variable(v) => json!({ "var": v.as_str() }),
}
}
#[cfg(any(test, feature = "pg_test"))]
#[pg_schema]
mod tests {
use pgrx::prelude::*;
#[pg_test]
fn sparql_parse_basic_select() {
let j: pgrx::JsonB = Spi::get_one_with_args(
"SELECT pgrdf.sparql_parse($1)",
&["SELECT ?s ?p ?o WHERE { ?s ?p ?o }".into()],
)
.unwrap()
.unwrap();
let v = &j.0;
assert_eq!(v["form"], "SELECT");
assert_eq!(v["bgp_pattern_count"], 1);
let vars = v["variables"].as_array().unwrap();
assert_eq!(vars.len(), 3);
assert_eq!(vars[0], "s");
assert_eq!(vars[1], "p");
assert_eq!(vars[2], "o");
}
#[pg_test]
fn sparql_parse_bgp_with_named_predicate() {
let q = r#"
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?person ?name
WHERE { ?person foaf:name ?name }
"#;
let j: pgrx::JsonB = Spi::get_one_with_args("SELECT pgrdf.sparql_parse($1)", &[q.into()])
.unwrap()
.unwrap();
let v = &j.0;
assert_eq!(v["form"], "SELECT");
assert_eq!(v["bgp_pattern_count"], 1);
let tp = &v["bgp_patterns"][0];
assert_eq!(tp["s"]["var"], "person");
assert_eq!(tp["p"]["iri"], "http://xmlns.com/foaf/0.1/name");
assert_eq!(tp["o"]["var"], "name");
}
#[pg_test]
fn sparql_parse_multipattern_bgp() {
let q = r#"
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?p ?n ?m
WHERE {
?p foaf:name ?n .
?p foaf:mbox ?m .
}
"#;
let j: pgrx::JsonB = Spi::get_one_with_args("SELECT pgrdf.sparql_parse($1)", &[q.into()])
.unwrap()
.unwrap();
let v = &j.0;
assert_eq!(v["bgp_pattern_count"], 2);
}
#[pg_test]
fn sparql_parse_filter_is_supported() {
let q = "SELECT ?s WHERE { ?s ?p ?o FILTER(isIRI(?o)) }";
let j: pgrx::JsonB = Spi::get_one_with_args("SELECT pgrdf.sparql_parse($1)", &[q.into()])
.unwrap()
.unwrap();
let v = &j.0;
let unsupported = v["unsupported_algebra"].as_array().unwrap();
assert!(
!unsupported.iter().any(|x| x.as_str() == Some("Filter")),
"Filter should not be flagged as unsupported anymore, got {unsupported:?}"
);
assert_eq!(v["bgp_pattern_count"], 1);
}
#[pg_test]
fn sparql_parse_optional_is_supported() {
let q = "SELECT ?s ?n WHERE { ?s ?p ?o OPTIONAL { ?s <http://x/n> ?n } }";
let j: pgrx::JsonB = Spi::get_one_with_args("SELECT pgrdf.sparql_parse($1)", &[q.into()])
.unwrap()
.unwrap();
let v = &j.0;
let unsupported = v["unsupported_algebra"].as_array().unwrap();
assert!(
!unsupported
.iter()
.any(|x| x.as_str().is_some_and(|s| s.contains("OPTIONAL"))),
"OPTIONAL should not be flagged anymore, got {unsupported:?}"
);
assert_eq!(v["bgp_pattern_count"], 2);
}
#[pg_test]
fn sparql_parse_union_is_supported() {
let q = "SELECT ?s WHERE { { ?s <http://x/a> ?o } UNION { ?s <http://x/b> ?o } }";
let j: pgrx::JsonB = Spi::get_one_with_args("SELECT pgrdf.sparql_parse($1)", &[q.into()])
.unwrap()
.unwrap();
let v = &j.0;
let unsupported = v["unsupported_algebra"].as_array().unwrap();
assert!(
!unsupported.iter().any(|x| x.as_str() == Some("Union")),
"UNION should not be flagged anymore, got {unsupported:?}"
);
assert_eq!(v["bgp_pattern_count"], 2);
}
#[pg_test]
fn sparql_parse_minus_is_supported() {
let q = "SELECT ?s WHERE { ?s ?p ?o MINUS { ?s <http://x/a> ?b } }";
let j: pgrx::JsonB = Spi::get_one_with_args("SELECT pgrdf.sparql_parse($1)", &[q.into()])
.unwrap()
.unwrap();
let v = &j.0;
let unsupported = v["unsupported_algebra"].as_array().unwrap();
assert!(
!unsupported.iter().any(|x| x.as_str() == Some("Minus")),
"MINUS should not be flagged anymore, got {unsupported:?}"
);
assert_eq!(v["bgp_pattern_count"], 2);
}
#[pg_test]
fn sparql_parse_flags_unsupported_path() {
let q = "SELECT ?s ?o WHERE { ?s <http://x/a>* ?o }";
let j: pgrx::JsonB = Spi::get_one_with_args("SELECT pgrdf.sparql_parse($1)", &[q.into()])
.unwrap()
.unwrap();
let v = &j.0;
let unsupported = v["unsupported_algebra"].as_array().unwrap();
assert!(
unsupported
.iter()
.any(|x| x.as_str().is_some_and(|s| s.contains("Path"))),
"expected Path to be flagged, got {unsupported:?}"
);
}
#[pg_test]
fn sparql_parse_syntax_error_panics() {
let err = std::panic::catch_unwind(|| {
let _: Option<pgrx::JsonB> = Spi::get_one_with_args(
"SELECT pgrdf.sparql_parse($1)",
&["SELECT ?s WHERE { ?s ?p".into()],
)
.ok()
.flatten();
});
let _ = err;
}
}