#[cfg(feature = "alloc")]
use alloc::{
format,
string::{String, ToString},
};
#[cfg(all(feature = "alloc", feature = "sparql"))]
use alloc::vec::Vec;
use hashbrown::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Term {
Variable(String),
Iri(String),
PrefixedName(String, String),
Literal(String),
BlankNode(String),
}
impl Term {
pub fn resolve(&self, prefix_map: &HashMap<String, String>) -> Self {
match self {
Term::PrefixedName(prefix, local) => {
if let Some(base) = prefix_map.get(prefix.as_str()) {
Term::Iri(format!("{}{}", base, local))
} else {
self.clone()
}
}
_ => self.clone(),
}
}
#[must_use]
pub fn from_token(token: &str) -> Self {
let t = token.trim();
if t.is_empty() {
return Term::Iri(String::new());
}
if t.starts_with('?') || t.starts_with('$') {
return Term::Variable(t[1..].to_string());
}
if t.starts_with('<') && t.ends_with('>') {
return Term::Iri(t[1..t.len() - 1].to_string());
}
if let Some(label) = t.strip_prefix("_:") {
return Term::BlankNode(label.to_string());
}
if t.starts_with('"') || t.starts_with('\'') {
return Term::Literal(t.to_string());
}
if t.starts_with(|c: char| c.is_ascii_digit() || c == '-' || c == '+') {
return Term::Literal(t.to_string());
}
if t == "a" {
return Term::PrefixedName("rdf".to_string(), "type".to_string());
}
if let Some(colon_pos) = t.find(':') {
if !t.contains("://") {
let prefix = t[..colon_pos].to_string();
let local = t[colon_pos + 1..].to_string();
return Term::PrefixedName(prefix, local);
}
}
Term::Iri(t.to_string())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct StructuredTriple {
pub subject: Term,
pub predicate: Term,
pub object: Term,
}
#[cfg(feature = "sparql")]
pub(crate) fn extract_structured_triples(sparql: &str) -> Vec<StructuredTriple> {
let mut triples = Vec::new();
let where_body = match find_where_body(sparql) {
Some(body) => body,
None => return triples,
};
for segment in where_body.split(['.', ';']) {
let trimmed = segment.trim();
if trimmed.is_empty() || trimmed.starts_with('}') || trimmed.starts_with('{') {
continue;
}
let tokens: Vec<&str> = trimmed.split_whitespace().collect();
if tokens.len() < 3 {
continue;
}
if is_sparql_keyword(tokens[0]) || is_sparql_keyword(tokens[1]) {
continue;
}
let pred_token = tokens[1];
if predicate_token_has_path_operator(pred_token) {
continue;
}
let subject = Term::from_token(tokens[0]);
let predicate = Term::from_token(pred_token);
let object_raw = tokens[2..].join(" ");
let object = Term::from_token(&object_raw);
triples.push(StructuredTriple {
subject,
predicate,
object,
});
}
triples
}
#[cfg(feature = "sparql")]
pub(crate) fn augment_path_structured_triples(sparql: &str, dest: &mut Vec<StructuredTriple>) {
let where_body = match find_where_body(sparql) {
Some(body) => body,
None => return,
};
for segment in where_body.split(['.', ';']) {
let trimmed = segment.trim();
if trimmed.is_empty() || trimmed.starts_with('}') || trimmed.starts_with('{') {
continue;
}
let tokens: Vec<&str> = trimmed.split_whitespace().collect();
if tokens.len() < 3 {
continue;
}
let pred_token = tokens[1];
if !predicate_token_has_path_operator(pred_token) {
continue;
}
if is_sparql_keyword(tokens[0]) || is_sparql_keyword(tokens[1]) {
continue;
}
let subject = Term::from_token(tokens[0]);
let object_raw = tokens[2..].join(" ");
let object = Term::from_token(&object_raw);
let path = crate::core::sparql_ast::parse_property_path(pred_token);
for raw_iri in path.base_iris() {
if raw_iri.is_empty() {
continue;
}
let predicate = Term::from_token(&raw_iri);
dest.push(StructuredTriple {
subject: subject.clone(),
predicate,
object: object.clone(),
});
}
}
}
#[cfg(feature = "sparql")]
fn is_sparql_keyword(token: &str) -> bool {
matches!(
token.to_ascii_uppercase().as_str(),
"OPTIONAL"
| "UNION"
| "FILTER"
| "GRAPH"
| "SERVICE"
| "BIND"
| "VALUES"
| "MINUS"
| "SELECT"
| "WHERE"
| "CONSTRUCT"
| "DESCRIBE"
| "ASK"
| "FROM"
| "NAMED"
| "GROUP"
| "BY"
| "HAVING"
| "ORDER"
| "LIMIT"
| "OFFSET"
| "DISTINCT"
| "REDUCED"
| "NOT"
| "EXISTS"
| "LET"
)
}
#[cfg(feature = "sparql")]
pub(crate) fn predicate_token_has_path_operator(token: &str) -> bool {
let bytes = token.as_bytes();
let mut in_iri = false;
for &b in bytes {
if b == b'<' {
in_iri = true;
continue;
}
if b == b'>' {
in_iri = false;
continue;
}
if !in_iri
&& matches!(
b,
b'*' | b'+' | b'?' | b'^' | b'/' | b'|' | b'!' | b'(' | b')'
)
{
return true;
}
}
false
}
#[cfg(feature = "sparql")]
pub(crate) fn find_where_body(sparql: &str) -> Option<&str> {
let upper = sparql.to_ascii_uppercase();
let where_pos = upper.find("WHERE")?;
let after = &sparql[where_pos + 5..];
let brace = after.find('{')?;
let body_start = where_pos + 5 + brace + 1;
let mut depth: u32 = 1;
let mut end = body_start;
for (i, b) in sparql[body_start..].bytes().enumerate() {
match b {
b'{' => depth += 1,
b'}' => {
depth -= 1;
if depth == 0 {
end = body_start + i;
break;
}
}
_ => {}
}
}
if depth == 0 {
Some(&sparql[body_start..end])
} else {
None
}
}