#![cfg(feature = "sparql")]
#[cfg(feature = "alloc")]
use alloc::{
borrow::ToOwned,
format,
string::{String, ToString},
vec,
vec::Vec,
};
use hashbrown::HashMap;
#[derive(Clone, Copy, PartialEq)]
enum ScanState {
Normal,
InDoubleQuote,
InSingleQuote,
InUri,
InComment,
}
pub(crate) fn extract_prefix_map(sparql: &str) -> HashMap<String, String> {
let mut map = HashMap::new();
let bytes = sparql.as_bytes();
let len = bytes.len();
let mut state = ScanState::Normal;
let mut i = 0;
while i < len {
match state {
ScanState::Normal => {
match bytes[i] {
b'"' => {
state = ScanState::InDoubleQuote;
i += 1;
}
b'\'' => {
state = ScanState::InSingleQuote;
i += 1;
}
b'<' => {
state = ScanState::InUri;
i += 1;
}
b'#' => {
state = ScanState::InComment;
i += 1;
}
b'P' | b'p' if is_whole_word_keyword(bytes, i, "PREFIX") => {
i += 6;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
let name_start = i;
while i < len
&& (bytes[i].is_ascii_alphanumeric()
|| bytes[i] == b'_'
|| bytes[i] == b'-')
{
i += 1;
}
let name_end = i;
if i < len && bytes[i] == b':' {
i += 1; while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i < len && bytes[i] == b'<' {
i += 1; let uri_start = i;
while i < len && bytes[i] != b'>' {
i += 1;
}
let uri_end = i;
if i < len {
i += 1;
} let prefix_name = &sparql[name_start..name_end];
let namespace = &sparql[uri_start..uri_end];
let key = format!("{}:", prefix_name.to_lowercase());
map.insert(key, namespace.to_string());
}
}
}
b'P' | b'p' => {
i += 1;
}
_ => {
i += 1;
}
}
}
ScanState::InDoubleQuote => {
match bytes[i] {
b'\\' => {
i += 2;
} b'"' => {
state = ScanState::Normal;
i += 1;
}
_ => {
i += 1;
}
}
}
ScanState::InSingleQuote => {
match bytes[i] {
b'\\' => {
i += 2;
} b'\'' => {
state = ScanState::Normal;
i += 1;
}
_ => {
i += 1;
}
}
}
ScanState::InUri => {
if bytes[i] == b'>' {
state = ScanState::Normal;
}
i += 1;
}
ScanState::InComment => {
if bytes[i] == b'\n' {
state = ScanState::Normal;
}
i += 1;
}
}
}
map
}
pub(crate) fn expand_prefixed_name(token: &str, map: &HashMap<String, String>) -> Option<String> {
if token.contains("://") {
return None;
}
let colon = token.find(':')?;
let prefix_key = token[..=colon].to_lowercase(); let local = &token[colon + 1..];
let namespace = map.get(&prefix_key)?;
Some(format!("{}{}", namespace, local))
}
pub(crate) fn extract_expanded_prefixed_uris(
sparql: &str,
map: &HashMap<String, String>,
) -> Vec<String> {
let mut results = Vec::new();
if map.is_empty() {
return results;
}
let bytes = sparql.as_bytes();
let len = bytes.len();
let mut state = ScanState::Normal;
let mut i = 0;
while i < len {
match state {
ScanState::Normal => {
match bytes[i] {
b'"' => {
state = ScanState::InDoubleQuote;
i += 1;
}
b'\'' => {
state = ScanState::InSingleQuote;
i += 1;
}
b'<' => {
state = ScanState::InUri;
i += 1;
}
b'#' => {
state = ScanState::InComment;
i += 1;
}
c if c.is_ascii_alphabetic() || c == b'_' => {
let tok_start = i;
while i < len
&& (bytes[i].is_ascii_alphanumeric()
|| bytes[i] == b'_'
|| bytes[i] == b'-')
{
i += 1;
}
if i < len && bytes[i] == b':' {
let prefix_end = i; let colon_pos = i;
i += 1; if i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
let local_start = i;
while i < len
&& (bytes[i].is_ascii_alphanumeric()
|| bytes[i] == b'_'
|| bytes[i] == b'-')
{
i += 1;
}
let local = &sparql[local_start..i];
if !local.is_empty() {
let prefix_str = sparql[tok_start..prefix_end].to_lowercase();
let key = format!("{}:", prefix_str);
if let Some(ns) = map.get(&key) {
results.push(format!("{}{}", ns, local));
}
}
} else {
let _ = colon_pos; }
}
}
_ => {
i += 1;
}
}
}
ScanState::InDoubleQuote => match bytes[i] {
b'\\' => {
i += 2;
}
b'"' => {
state = ScanState::Normal;
i += 1;
}
_ => {
i += 1;
}
},
ScanState::InSingleQuote => match bytes[i] {
b'\\' => {
i += 2;
}
b'\'' => {
state = ScanState::Normal;
i += 1;
}
_ => {
i += 1;
}
},
ScanState::InUri => {
if bytes[i] == b'>' {
state = ScanState::Normal;
}
i += 1;
}
ScanState::InComment => {
if bytes[i] == b'\n' {
state = ScanState::Normal;
}
i += 1;
}
}
}
results
}
pub(crate) fn extract_select_variables(sparql: &str) -> Vec<String> {
let select_pos = match find_keyword(sparql, "SELECT") {
Some(pos) => pos,
None => return Vec::new(),
};
let bytes = sparql.as_bytes();
let len = bytes.len();
let mut i = select_pos + 6;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
for kw in &["DISTINCT", "REDUCED"] {
if i + kw.len() <= len {
let chunk = &sparql[i..i + kw.len()];
if chunk.eq_ignore_ascii_case(kw) {
let after = i + kw.len();
if after >= len || !bytes[after].is_ascii_alphanumeric() {
i = after;
while i < len && bytes[i].is_ascii_whitespace() {
i += 1;
}
break;
}
}
}
}
if i < len && bytes[i] == b'*' {
return vec!["*".to_owned()];
}
let mut vars = Vec::new();
while i < len {
match bytes[i] {
b' ' | b'\t' | b'\r' | b'\n' => {
i += 1;
}
b'?' | b'$' => {
i += 1; let var_start = i;
while i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
i += 1;
}
let name = &sparql[var_start..i];
if !name.is_empty() {
vars.push(name.to_owned());
}
}
_ => {
break;
}
}
}
vars
}
fn find_keyword(text: &str, keyword: &str) -> Option<usize> {
let bytes = text.as_bytes();
let klen = keyword.len();
let len = bytes.len();
let mut state = ScanState::Normal;
let mut i = 0;
while i < len {
match state {
ScanState::Normal => {
match bytes[i] {
b'"' => {
state = ScanState::InDoubleQuote;
i += 1;
}
b'\'' => {
state = ScanState::InSingleQuote;
i += 1;
}
b'<' => {
state = ScanState::InUri;
i += 1;
}
b'#' => {
state = ScanState::InComment;
i += 1;
}
_ => {
if i + klen <= len {
let chunk = &text[i..i + klen];
if chunk.eq_ignore_ascii_case(keyword) {
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric();
let after_ok =
(i + klen) >= len || !bytes[i + klen].is_ascii_alphanumeric();
if before_ok && after_ok {
return Some(i);
}
}
}
i += 1;
}
}
}
ScanState::InDoubleQuote => match bytes[i] {
b'\\' => {
i += 2;
}
b'"' => {
state = ScanState::Normal;
i += 1;
}
_ => {
i += 1;
}
},
ScanState::InSingleQuote => match bytes[i] {
b'\\' => {
i += 2;
}
b'\'' => {
state = ScanState::Normal;
i += 1;
}
_ => {
i += 1;
}
},
ScanState::InUri => {
if bytes[i] == b'>' {
state = ScanState::Normal;
}
i += 1;
}
ScanState::InComment => {
if bytes[i] == b'\n' {
state = ScanState::Normal;
}
i += 1;
}
}
}
None
}
fn is_whole_word_keyword(bytes: &[u8], pos: usize, keyword: &str) -> bool {
let klen = keyword.len();
if pos + klen > bytes.len() {
return false;
}
let chunk = match core::str::from_utf8(&bytes[pos..pos + klen]) {
Ok(s) => s,
Err(_) => return false,
};
if !chunk.eq_ignore_ascii_case(keyword) {
return false;
}
if pos > 0 && bytes[pos - 1].is_ascii_alphanumeric() {
return false;
}
let after_pos = pos + klen;
if after_pos < bytes.len() && bytes[after_pos].is_ascii_alphanumeric() {
return false;
}
true
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn prefix_map_basic() {
let sparql =
"PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?s WHERE { ?s foaf:name ?n }";
let map = extract_prefix_map(sparql);
assert_eq!(
map.get("foaf:").map(String::as_str),
Some("http://xmlns.com/foaf/0.1/")
);
}
#[test]
fn prefix_map_multiple() {
let sparql = "PREFIX ex: <http://example.org/> PREFIX dbo: <http://dbpedia.org/ontology/> SELECT ?x WHERE { ?x ex:role dbo:Person }";
let map = extract_prefix_map(sparql);
assert_eq!(
map.get("ex:").map(String::as_str),
Some("http://example.org/")
);
assert_eq!(
map.get("dbo:").map(String::as_str),
Some("http://dbpedia.org/ontology/")
);
}
#[test]
fn prefix_map_default_namespace() {
let sparql = "PREFIX : <http://example.org/default/> SELECT ?s WHERE { ?s :foo ?o }";
let map = extract_prefix_map(sparql);
assert_eq!(
map.get(":").map(String::as_str),
Some("http://example.org/default/")
);
}
#[test]
fn prefix_map_skips_literals() {
let sparql = r#"SELECT ?s WHERE { ?s <http://ex.org/p> "PREFIX foo: <http://notreal/>" }"#;
let map = extract_prefix_map(sparql);
assert!(
map.is_empty(),
"should not parse PREFIX inside string literal"
);
}
#[test]
fn prefix_map_skips_comments() {
let sparql = "# PREFIX foo: <http://notreal/>\nSELECT ?s WHERE { ?s ?p ?o }";
let map = extract_prefix_map(sparql);
assert!(map.is_empty(), "should not parse PREFIX inside comment");
}
#[test]
fn expand_prefixed_name_basic() {
let mut map = HashMap::new();
map.insert("foaf:".to_owned(), "http://xmlns.com/foaf/0.1/".to_owned());
let result = expand_prefixed_name("foaf:Person", &map);
assert_eq!(result.as_deref(), Some("http://xmlns.com/foaf/0.1/Person"));
}
#[test]
fn expand_prefixed_name_full_uri_unchanged() {
let map = HashMap::new();
let result = expand_prefixed_name("http://example.org/foo", &map);
assert!(result.is_none(), "full URIs should not be expanded");
}
#[test]
fn expand_prefixed_name_unknown_prefix() {
let map = HashMap::new();
let result = expand_prefixed_name("unknown:bar", &map);
assert!(result.is_none());
}
#[test]
fn extract_expanded_uris_basic() {
let sparql = "PREFIX ex: <http://example.org/> SELECT ?s WHERE { ?s ex:foo ?o }";
let map = extract_prefix_map(sparql);
let uris = extract_expanded_prefixed_uris(sparql, &map);
assert!(
uris.contains(&"http://example.org/foo".to_owned()),
"should contain expanded uri; got: {:?}",
uris
);
}
#[test]
fn extract_expanded_uris_skips_prefix_decl() {
let sparql = "PREFIX ex: <http://example.org/> SELECT ?s WHERE { ?s ex:name ?o }";
let map = extract_prefix_map(sparql);
let uris = extract_expanded_prefixed_uris(sparql, &map);
assert_eq!(
uris.iter()
.filter(|u| u.as_str() == "http://example.org/name")
.count(),
1
);
}
#[test]
fn select_vars_basic() {
let sparql = "SELECT ?a ?b ?c WHERE { ?a ?b ?c }";
let vars = extract_select_variables(sparql);
assert_eq!(vars, vec!["a", "b", "c"]);
}
#[test]
fn select_vars_distinct() {
let sparql = "SELECT DISTINCT ?x ?y WHERE { ?x ?p ?y }";
let vars = extract_select_variables(sparql);
assert_eq!(vars, vec!["x", "y"]);
}
#[test]
fn select_vars_star() {
let sparql = "SELECT * WHERE { ?s ?p ?o }";
let vars = extract_select_variables(sparql);
assert_eq!(vars, vec!["*"]);
}
#[test]
fn select_vars_non_select() {
let sparql = "ASK { ?s <http://example.org/p> ?o }";
let vars = extract_select_variables(sparql);
assert!(vars.is_empty());
}
#[test]
fn select_vars_construct() {
let sparql = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }";
let vars = extract_select_variables(sparql);
assert!(vars.is_empty());
}
}