use axum::{
extract::{Query, State},
http::StatusCode,
response::{IntoResponse, Response},
Json,
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use crate::error::FusekiResult;
use crate::server::AppState;
#[derive(Debug, Clone, Deserialize)]
pub struct QueryValidationRequest {
pub query: String,
#[serde(default = "default_sparql_syntax")]
pub syntax: String,
#[serde(default = "default_true")]
pub include_algebra: bool,
#[serde(default = "default_true")]
pub include_optimized: bool,
}
fn default_sparql_syntax() -> String {
"sparql11".to_string()
}
fn default_true() -> bool {
true
}
#[derive(Debug, Clone, Serialize)]
pub struct QueryValidationResponse {
pub valid: bool,
pub input: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub formatted: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub query_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub algebra: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub algebra_optimized: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub variables: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prefixes: Option<Vec<PrefixMapping>>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub errors: Vec<ValidationError>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub warnings: Vec<ValidationWarning>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct UpdateValidationRequest {
pub update: String,
#[serde(default = "default_sparql_syntax")]
pub syntax: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct UpdateValidationResponse {
pub valid: bool,
pub input: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub formatted: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub operations: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub affected_graphs: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub errors: Vec<ValidationError>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub warnings: Vec<ValidationWarning>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct IriValidationRequest {
pub iris: Vec<String>,
#[serde(default = "default_true")]
pub check_relative: bool,
}
#[derive(Debug, Clone, Serialize)]
pub struct IriValidationResponse {
pub results: Vec<IriValidationResult>,
pub summary: ValidationSummary,
}
#[derive(Debug, Clone, Serialize)]
pub struct IriValidationResult {
pub iri: String,
pub valid: bool,
pub is_absolute: bool,
pub is_relative: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub scheme: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub errors: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct DataValidationRequest {
pub data: String,
#[serde(default = "default_format")]
pub format: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub base: Option<String>,
}
fn default_format() -> String {
"turtle".to_string()
}
#[derive(Debug, Clone, Serialize)]
pub struct DataValidationResponse {
pub valid: bool,
pub format: String,
pub triple_count: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub graph_count: Option<usize>,
pub subject_count: usize,
pub predicate_count: usize,
pub object_count: usize,
pub blank_node_count: usize,
pub literal_count: usize,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub errors: Vec<ValidationError>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub warnings: Vec<ValidationWarning>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub sample_triples: Vec<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct LangTagValidationRequest {
pub tags: Vec<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct LangTagValidationResponse {
pub results: Vec<LangTagValidationResult>,
pub summary: ValidationSummary,
}
#[derive(Debug, Clone, Serialize)]
pub struct LangTagValidationResult {
pub tag: String,
pub valid: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub language: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub script: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub region: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub variants: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub extensions: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub private_use: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub errors: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ValidationError {
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub line: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub column: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub code: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ValidationWarning {
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub line: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub code: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct PrefixMapping {
pub prefix: String,
pub iri: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct ValidationSummary {
pub total: usize,
pub valid: usize,
pub invalid: usize,
pub warnings: usize,
}
#[derive(Debug, Clone, Deserialize)]
pub struct QueryValidationParams {
pub query: Option<String>,
pub syntax: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct UpdateValidationParams {
pub update: Option<String>,
pub syntax: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct IriValidationParams {
pub iri: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct LangTagValidationParams {
pub tag: Option<String>,
}
pub async fn validate_query(
State(_state): State<Arc<AppState>>,
Json(request): Json<QueryValidationRequest>,
) -> Response {
let result = validate_sparql_query_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_query_get(
State(_state): State<Arc<AppState>>,
Query(params): Query<QueryValidationParams>,
) -> Response {
let query = match params.query {
Some(q) => q,
None => {
return (
StatusCode::BAD_REQUEST,
Json(QueryValidationResponse {
valid: false,
input: String::new(),
formatted: None,
query_type: None,
algebra: None,
algebra_optimized: None,
variables: None,
prefixes: None,
errors: vec![ValidationError {
message: "Missing 'query' parameter".to_string(),
line: None,
column: None,
code: Some("MISSING_PARAM".to_string()),
}],
warnings: vec![],
}),
)
.into_response();
}
};
let request = QueryValidationRequest {
query,
syntax: params.syntax.unwrap_or_else(default_sparql_syntax),
include_algebra: true,
include_optimized: true,
};
let result = validate_sparql_query_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_update(
State(_state): State<Arc<AppState>>,
Json(request): Json<UpdateValidationRequest>,
) -> Response {
let result = validate_sparql_update_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_update_get(
State(_state): State<Arc<AppState>>,
Query(params): Query<UpdateValidationParams>,
) -> Response {
let update = match params.update {
Some(u) => u,
None => {
return (
StatusCode::BAD_REQUEST,
Json(UpdateValidationResponse {
valid: false,
input: String::new(),
formatted: None,
operations: vec![],
affected_graphs: vec![],
errors: vec![ValidationError {
message: "Missing 'update' parameter".to_string(),
line: None,
column: None,
code: Some("MISSING_PARAM".to_string()),
}],
warnings: vec![],
}),
)
.into_response();
}
};
let request = UpdateValidationRequest {
update,
syntax: params.syntax.unwrap_or_else(default_sparql_syntax),
};
let result = validate_sparql_update_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_iri(
State(_state): State<Arc<AppState>>,
Json(request): Json<IriValidationRequest>,
) -> Response {
let result = validate_iris_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_iri_get(
State(_state): State<Arc<AppState>>,
Query(params): Query<IriValidationParams>,
) -> Response {
let iris = match params.iri {
Some(iri_str) => iri_str.split(',').map(|s| s.trim().to_string()).collect(),
None => {
return (
StatusCode::BAD_REQUEST,
Json(IriValidationResponse {
results: vec![],
summary: ValidationSummary {
total: 0,
valid: 0,
invalid: 0,
warnings: 0,
},
}),
)
.into_response();
}
};
let request = IriValidationRequest {
iris,
check_relative: true,
};
let result = validate_iris_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_data(
State(_state): State<Arc<AppState>>,
Json(request): Json<DataValidationRequest>,
) -> Response {
let result = validate_rdf_data_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_langtag(
State(_state): State<Arc<AppState>>,
Json(request): Json<LangTagValidationRequest>,
) -> Response {
let result = validate_langtags_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
pub async fn validate_langtag_get(
State(_state): State<Arc<AppState>>,
Query(params): Query<LangTagValidationParams>,
) -> Response {
let tags = match params.tag {
Some(tag_str) => tag_str.split(',').map(|s| s.trim().to_string()).collect(),
None => {
return (
StatusCode::BAD_REQUEST,
Json(LangTagValidationResponse {
results: vec![],
summary: ValidationSummary {
total: 0,
valid: 0,
invalid: 0,
warnings: 0,
},
}),
)
.into_response();
}
};
let request = LangTagValidationRequest { tags };
let result = validate_langtags_internal(&request);
(StatusCode::OK, Json(result)).into_response()
}
fn validate_sparql_query_internal(request: &QueryValidationRequest) -> QueryValidationResponse {
let query_str = request.query.trim();
if query_str.is_empty() {
return QueryValidationResponse {
valid: false,
input: request.query.clone(),
formatted: None,
query_type: None,
algebra: None,
algebra_optimized: None,
variables: None,
prefixes: None,
errors: vec![ValidationError {
message: "Empty query string".to_string(),
line: Some(1),
column: Some(1),
code: Some("EMPTY_QUERY".to_string()),
}],
warnings: vec![],
};
}
match crate::handlers::sparql::validate_sparql_query(query_str) {
Ok(_) => {
let query_type = detect_query_type(query_str);
let variables = extract_query_variables(query_str);
let prefixes = extract_prefixes(query_str);
let formatted = format_query(query_str);
let algebra = if request.include_algebra {
Some(generate_algebra_representation(query_str))
} else {
None
};
let algebra_optimized = if request.include_optimized {
Some(generate_optimized_algebra(query_str))
} else {
None
};
let mut warnings = vec![];
if query_str.contains("SELECT *") {
warnings.push(ValidationWarning {
message: "Using SELECT * may return more data than needed".to_string(),
line: None,
code: Some("SELECT_STAR".to_string()),
});
}
if !query_str.to_uppercase().contains("LIMIT")
&& query_type == Some("SELECT".to_string())
{
warnings.push(ValidationWarning {
message: "Query has no LIMIT clause, may return large result sets".to_string(),
line: None,
code: Some("NO_LIMIT".to_string()),
});
}
QueryValidationResponse {
valid: true,
input: request.query.clone(),
formatted: Some(formatted),
query_type,
algebra,
algebra_optimized,
variables: Some(variables),
prefixes: Some(prefixes),
errors: vec![],
warnings,
}
}
Err(e) => {
let (line, column) = extract_error_location(&e.to_string());
QueryValidationResponse {
valid: false,
input: request.query.clone(),
formatted: None,
query_type: None,
algebra: None,
algebra_optimized: None,
variables: None,
prefixes: None,
errors: vec![ValidationError {
message: e.to_string(),
line,
column,
code: Some("PARSE_ERROR".to_string()),
}],
warnings: vec![],
}
}
}
}
fn validate_sparql_update_internal(request: &UpdateValidationRequest) -> UpdateValidationResponse {
let update_str = request.update.trim();
if update_str.is_empty() {
return UpdateValidationResponse {
valid: false,
input: request.update.clone(),
formatted: None,
operations: vec![],
affected_graphs: vec![],
errors: vec![ValidationError {
message: "Empty update string".to_string(),
line: Some(1),
column: Some(1),
code: Some("EMPTY_UPDATE".to_string()),
}],
warnings: vec![],
};
}
let is_valid_update = validate_sparql_update_syntax(update_str);
match is_valid_update {
Ok(_) => {
let operations = extract_update_operations(update_str);
let affected_graphs = extract_affected_graphs(update_str);
let formatted = format_update(update_str);
let mut warnings = vec![];
if update_str.to_uppercase().contains("DELETE WHERE")
&& !update_str.to_uppercase().contains("GRAPH")
{
warnings.push(ValidationWarning {
message: "DELETE WHERE without GRAPH clause affects default graph".to_string(),
line: None,
code: Some("DEFAULT_GRAPH_DELETE".to_string()),
});
}
if update_str.to_uppercase().contains("DROP ALL") {
warnings.push(ValidationWarning {
message: "DROP ALL will remove all data from all graphs".to_string(),
line: None,
code: Some("DROP_ALL".to_string()),
});
}
if update_str.to_uppercase().contains("CLEAR ALL") {
warnings.push(ValidationWarning {
message: "CLEAR ALL will remove all triples from all graphs".to_string(),
line: None,
code: Some("CLEAR_ALL".to_string()),
});
}
UpdateValidationResponse {
valid: true,
input: request.update.clone(),
formatted: Some(formatted),
operations,
affected_graphs,
errors: vec![],
warnings,
}
}
Err(e) => {
let (line, column) = extract_error_location(&e.to_string());
UpdateValidationResponse {
valid: false,
input: request.update.clone(),
formatted: None,
operations: vec![],
affected_graphs: vec![],
errors: vec![ValidationError {
message: e.to_string(),
line,
column,
code: Some("PARSE_ERROR".to_string()),
}],
warnings: vec![],
}
}
}
}
fn validate_iris_internal(request: &IriValidationRequest) -> IriValidationResponse {
let mut results = Vec::new();
let mut valid_count = 0;
let mut warning_count = 0;
for iri_str in &request.iris {
let iri_str = iri_str.trim();
let mut errors = Vec::new();
let mut warnings = Vec::new();
let mut is_valid = true;
let mut is_absolute = false;
let mut is_relative = false;
let mut scheme = None;
if iri_str.is_empty() {
errors.push("Empty IRI string".to_string());
is_valid = false;
} else {
match oxirs_core::model::NamedNode::new(iri_str) {
Ok(node) => {
let iri = node.as_str();
if iri.contains("://") {
is_absolute = true;
if let Some(idx) = iri.find("://") {
scheme = Some(iri[..idx].to_string());
}
} else if iri.starts_with("urn:") || iri.starts_with("mailto:") {
is_absolute = true;
if let Some(idx) = iri.find(':') {
scheme = Some(iri[..idx].to_string());
}
} else {
is_relative = true;
if request.check_relative {
warnings.push("Relative IRI detected".to_string());
warning_count += 1;
}
}
if iri.contains(' ') {
warnings.push("IRI contains spaces".to_string());
warning_count += 1;
}
if iri.contains("..") {
warnings.push("IRI contains '..' path segments".to_string());
warning_count += 1;
}
if iri.ends_with('#') || iri.ends_with('/') {
}
valid_count += 1;
}
Err(e) => {
errors.push(format!("Invalid IRI: {}", e));
is_valid = false;
}
}
}
results.push(IriValidationResult {
iri: iri_str.to_string(),
valid: is_valid,
is_absolute,
is_relative,
scheme,
errors,
warnings,
});
}
IriValidationResponse {
summary: ValidationSummary {
total: results.len(),
valid: valid_count,
invalid: results.len() - valid_count,
warnings: warning_count,
},
results,
}
}
fn validate_rdf_data_internal(request: &DataValidationRequest) -> DataValidationResponse {
let data_str = request.data.trim();
if data_str.is_empty() {
return DataValidationResponse {
valid: false,
format: request.format.clone(),
triple_count: 0,
graph_count: None,
subject_count: 0,
predicate_count: 0,
object_count: 0,
blank_node_count: 0,
literal_count: 0,
errors: vec![ValidationError {
message: "Empty data string".to_string(),
line: Some(1),
column: Some(1),
code: Some("EMPTY_DATA".to_string()),
}],
warnings: vec![],
sample_triples: vec![],
};
}
let format = normalize_format(&request.format);
let base_iri = request
.base
.clone()
.unwrap_or_else(|| "http://example.org/base/".to_string());
match parse_rdf_data(data_str, &format, &base_iri) {
Ok(parse_result) => {
let mut warnings = vec![];
if parse_result.blank_node_count > parse_result.triple_count / 2 {
warnings.push(ValidationWarning {
message: "High ratio of blank nodes to triples".to_string(),
line: None,
code: Some("HIGH_BNODE_RATIO".to_string()),
});
}
DataValidationResponse {
valid: true,
format,
triple_count: parse_result.triple_count,
graph_count: parse_result.graph_count,
subject_count: parse_result.subject_count,
predicate_count: parse_result.predicate_count,
object_count: parse_result.object_count,
blank_node_count: parse_result.blank_node_count,
literal_count: parse_result.literal_count,
errors: vec![],
warnings,
sample_triples: parse_result.sample_triples,
}
}
Err(e) => {
let (line, column) = extract_error_location(&e);
DataValidationResponse {
valid: false,
format,
triple_count: 0,
graph_count: None,
subject_count: 0,
predicate_count: 0,
object_count: 0,
blank_node_count: 0,
literal_count: 0,
errors: vec![ValidationError {
message: e,
line,
column,
code: Some("PARSE_ERROR".to_string()),
}],
warnings: vec![],
sample_triples: vec![],
}
}
}
}
fn validate_langtags_internal(request: &LangTagValidationRequest) -> LangTagValidationResponse {
let mut results = Vec::new();
let mut valid_count = 0;
let mut warning_count = 0;
for tag_str in &request.tags {
let tag_str = tag_str.trim();
let mut errors = Vec::new();
let mut warnings = Vec::new();
let mut is_valid = true;
let mut language = None;
let mut script = None;
let mut region = None;
let mut variants = Vec::new();
let mut extensions = Vec::new();
let mut private_use = None;
if tag_str.is_empty() {
errors.push("Empty language tag".to_string());
is_valid = false;
} else {
let result = parse_language_tag(tag_str);
match result {
Ok(parsed) => {
language = parsed.language;
script = parsed.script;
region = parsed.region;
variants = parsed.variants;
extensions = parsed.extensions;
private_use = parsed.private_use;
if language.is_none() && private_use.is_none() {
errors.push("Missing primary language subtag".to_string());
is_valid = false;
}
if let Some(ref lang) = language {
if is_deprecated_language(lang) {
warnings.push(format!("Language subtag '{}' is deprecated", lang));
warning_count += 1;
}
}
if script.is_some() && language.is_none() {
warnings.push("Script subtag without language subtag".to_string());
warning_count += 1;
}
if is_valid {
valid_count += 1;
}
}
Err(e) => {
errors.push(e);
is_valid = false;
}
}
}
results.push(LangTagValidationResult {
tag: tag_str.to_string(),
valid: is_valid,
language,
script,
region,
variants,
extensions,
private_use,
errors,
warnings,
});
}
LangTagValidationResponse {
summary: ValidationSummary {
total: results.len(),
valid: valid_count,
invalid: results.len() - valid_count,
warnings: warning_count,
},
results,
}
}
fn validate_sparql_update_syntax(update: &str) -> Result<(), String> {
if update.trim().is_empty() {
return Err("Empty update string".to_string());
}
let operations_patterns = [
r"(?i)\bINSERT\s+DATA\b",
r"(?i)\bDELETE\s+DATA\b",
r"(?i)\bINSERT\s*\{", r"(?i)\bDELETE\s*\{", r"(?i)\bLOAD\b",
r"(?i)\bCLEAR\b",
r"(?i)\bDROP\b",
r"(?i)\bCREATE\b",
r"(?i)\bCOPY\b",
r"(?i)\bMOVE\b",
r"(?i)\bADD\b",
r"(?i)\bDELETE\s+WHERE\b",
r"(?i)\bINSERT\s+WHERE\b",
r"(?i)\bWITH\s+", ];
let has_valid_operation = operations_patterns.iter().any(|pattern| {
regex::Regex::new(pattern)
.map(|re| re.is_match(update))
.unwrap_or(false)
});
if !has_valid_operation {
return Err("Update must contain a valid SPARQL Update operation (INSERT, DELETE, LOAD, CLEAR, DROP, CREATE, COPY, MOVE, ADD)".to_string());
}
let open_braces = update.matches('{').count();
let close_braces = update.matches('}').count();
if open_braces != close_braces {
return Err(format!(
"Mismatched braces: {} open, {} close",
open_braces, close_braces
));
}
Ok(())
}
fn detect_query_type(query: &str) -> Option<String> {
let upper = query.to_uppercase();
if upper.contains("SELECT") {
Some("SELECT".to_string())
} else if upper.contains("CONSTRUCT") {
Some("CONSTRUCT".to_string())
} else if upper.contains("ASK") {
Some("ASK".to_string())
} else if upper.contains("DESCRIBE") {
Some("DESCRIBE".to_string())
} else {
None
}
}
fn extract_query_variables(query: &str) -> Vec<String> {
let mut variables = Vec::new();
let mut chars = query.chars().peekable();
while let Some(c) = chars.next() {
if c == '?' || c == '$' {
let mut var_name = String::new();
while let Some(&nc) = chars.peek() {
if nc.is_alphanumeric() || nc == '_' {
var_name.push(chars.next().expect("char should exist after peek"));
} else {
break;
}
}
if !var_name.is_empty() && !variables.contains(&var_name) {
variables.push(var_name);
}
}
}
variables
}
fn extract_prefixes(query: &str) -> Vec<PrefixMapping> {
let mut prefixes = Vec::new();
let prefix_regex =
regex::Regex::new(r"(?i)PREFIX\s+(\w*):\s*<([^>]+)>").expect("valid regex pattern");
for cap in prefix_regex.captures_iter(query) {
prefixes.push(PrefixMapping {
prefix: cap.get(1).map_or("", |m| m.as_str()).to_string(),
iri: cap.get(2).map_or("", |m| m.as_str()).to_string(),
});
}
prefixes
}
fn format_query(query: &str) -> String {
let mut formatted = query.to_string();
let keywords = [
"SELECT",
"CONSTRUCT",
"ASK",
"DESCRIBE",
"WHERE",
"ORDER BY",
"LIMIT",
"OFFSET",
"FILTER",
"OPTIONAL",
"UNION",
"BIND",
"VALUES",
"GROUP BY",
"HAVING",
];
for keyword in keywords {
let pattern = format!(r"(?i)\b{}\b", keyword);
let re = regex::Regex::new(&pattern).expect("valid regex pattern");
formatted = re
.replace_all(&formatted, |caps: ®ex::Captures| {
format!(
"\n{}",
caps.get(0).expect("capture group 0 should exist").as_str()
)
})
.to_string();
}
let multi_newline = regex::Regex::new(r"\n\s*\n").expect("valid regex pattern");
formatted = multi_newline.replace_all(&formatted, "\n").to_string();
formatted.trim().to_string()
}
fn format_update(update: &str) -> String {
let mut formatted = update.to_string();
let keywords = [
"INSERT", "DELETE", "WHERE", "DATA", "GRAPH", "WITH", "USING", "CREATE", "DROP", "COPY",
"MOVE", "ADD", "CLEAR", "LOAD",
];
for keyword in keywords {
let pattern = format!(r"(?i)\b{}\b", keyword);
let re = regex::Regex::new(&pattern).expect("valid regex pattern");
formatted = re
.replace_all(&formatted, |caps: ®ex::Captures| {
format!(
"\n{}",
caps.get(0).expect("capture group 0 should exist").as_str()
)
})
.to_string();
}
let multi_newline = regex::Regex::new(r"\n\s*\n").expect("valid regex pattern");
formatted = multi_newline.replace_all(&formatted, "\n").to_string();
formatted.trim().to_string()
}
fn generate_algebra_representation(query: &str) -> String {
let query_type = detect_query_type(query).unwrap_or_else(|| "UNKNOWN".to_string());
let variables = extract_query_variables(query);
let var_str = variables
.iter()
.map(|v| format!("?{}", v))
.collect::<Vec<_>>()
.join(" ");
let triple_pattern_re = regex::Regex::new(r"\{([^{}]+)\}").expect("valid regex pattern");
let patterns: Vec<String> = triple_pattern_re
.captures_iter(query)
.filter_map(|cap| cap.get(1).map(|m| m.as_str().trim().to_string()))
.collect();
format!(
"({}
(project ({})
(bgp
{}
)))",
query_type.to_lowercase(),
var_str,
patterns.join("\n ")
)
}
fn generate_optimized_algebra(query: &str) -> String {
let algebra = generate_algebra_representation(query);
format!("; Optimized\n{}", algebra)
}
fn extract_update_operations(update: &str) -> Vec<String> {
let mut operations = Vec::new();
let upper = update.to_uppercase();
let op_types = [
"INSERT DATA",
"DELETE DATA",
"INSERT",
"DELETE",
"CLEAR",
"DROP",
"CREATE",
"COPY",
"MOVE",
"ADD",
"LOAD",
];
for op in op_types {
if upper.contains(op) {
operations.push(op.to_string());
}
}
let mut seen = std::collections::HashSet::new();
operations.retain(|op| seen.insert(op.clone()));
operations
}
fn extract_affected_graphs(update: &str) -> Vec<String> {
let mut graphs = Vec::new();
let graph_re = regex::Regex::new(r"(?i)GRAPH\s*<([^>]+)>").expect("valid regex pattern");
for cap in graph_re.captures_iter(update) {
if let Some(g) = cap.get(1) {
let graph_uri = g.as_str().to_string();
if !graphs.contains(&graph_uri) {
graphs.push(graph_uri);
}
}
}
if update.to_uppercase().contains("DEFAULT") && !graphs.contains(&"default".to_string()) {
graphs.push("default".to_string());
}
if update.to_uppercase().contains(" ALL") && !graphs.contains(&"all".to_string()) {
graphs.push("all".to_string());
}
graphs
}
fn extract_error_location(error: &str) -> (Option<usize>, Option<usize>) {
let line_re = regex::Regex::new(r"line\s*(\d+)").expect("valid regex pattern");
let col_re = regex::Regex::new(r"col(?:umn)?\s*(\d+)").expect("valid regex pattern");
let line = line_re
.captures(error)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().parse().ok());
let column = col_re
.captures(error)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().parse().ok());
(line, column)
}
fn normalize_format(format: &str) -> String {
match format.to_lowercase().as_str() {
"turtle" | "ttl" | "text/turtle" => "turtle".to_string(),
"ntriples" | "nt" | "application/n-triples" => "ntriples".to_string(),
"nquads" | "nq" | "application/n-quads" => "nquads".to_string(),
"rdfxml" | "rdf/xml" | "application/rdf+xml" => "rdfxml".to_string(),
"jsonld" | "json-ld" | "application/ld+json" => "jsonld".to_string(),
"trig" | "application/trig" => "trig".to_string(),
_ => format.to_lowercase(),
}
}
struct RdfParseResult {
triple_count: usize,
graph_count: Option<usize>,
subject_count: usize,
predicate_count: usize,
object_count: usize,
blank_node_count: usize,
literal_count: usize,
sample_triples: Vec<String>,
}
fn parse_rdf_data(data: &str, format: &str, _base_iri: &str) -> Result<RdfParseResult, String> {
use std::collections::HashSet;
let mut subjects = HashSet::new();
let mut predicates = HashSet::new();
let mut objects = HashSet::new();
let mut blank_nodes = HashSet::new();
let mut literal_count = 0;
let mut sample_triples = Vec::new();
let triples = match format {
"turtle" | "ttl" => oxirs_core::format::turtle::TurtleParser::new()
.parse_str(data)
.map_err(|e| format!("Turtle parse error: {}", e))?,
"ntriples" | "nt" => oxirs_core::format::ntriples::NTriplesParser::new()
.parse_str(data)
.map_err(|e| format!("N-Triples parse error: {}", e))?,
"rdfxml" | "rdf/xml" => oxirs_core::format::rdfxml::RdfXmlParser::new()
.parse_str(data)
.map_err(|e| format!("RDF/XML parse error: {}", e))?,
_ => {
return Err(format!("Unsupported format: {}", format));
}
};
let triple_count = triples.len();
for triple in &triples {
let subject_str = format!("{:?}", triple.subject());
if subject_str.contains("BlankNode") {
blank_nodes.insert(subject_str.clone());
}
subjects.insert(subject_str);
predicates.insert(format!("{:?}", triple.predicate()));
let object_str = format!("{:?}", triple.object());
if object_str.contains("BlankNode") {
blank_nodes.insert(object_str.clone());
} else if object_str.contains("Literal") {
literal_count += 1;
}
objects.insert(object_str);
if sample_triples.len() < 10 {
sample_triples.push(format!(
"{:?} {:?} {:?}",
triple.subject(),
triple.predicate(),
triple.object()
));
}
}
Ok(RdfParseResult {
triple_count,
graph_count: None, subject_count: subjects.len(),
predicate_count: predicates.len(),
object_count: objects.len(),
blank_node_count: blank_nodes.len(),
literal_count,
sample_triples,
})
}
struct ParsedLanguageTag {
language: Option<String>,
script: Option<String>,
region: Option<String>,
variants: Vec<String>,
extensions: Vec<String>,
private_use: Option<String>,
}
fn parse_language_tag(tag: &str) -> Result<ParsedLanguageTag, String> {
let parts: Vec<&str> = tag.split('-').collect();
if parts.is_empty() {
return Err("Empty language tag".to_string());
}
let mut language = None;
let mut script = None;
let mut region = None;
let mut variants = Vec::new();
let mut extensions = Vec::new();
let mut private_use = None;
let mut i = 0;
if parts[0].to_lowercase() == "x" {
if parts.len() > 1 {
private_use = Some(parts[1..].join("-"));
}
return Ok(ParsedLanguageTag {
language,
script,
region,
variants,
extensions,
private_use,
});
}
if parts[i].len() >= 2
&& parts[i].len() <= 3
&& parts[i].chars().all(|c| c.is_ascii_alphabetic())
{
language = Some(parts[i].to_lowercase());
i += 1;
} else if parts[i].len() == 4 && parts[i].chars().all(|c| c.is_ascii_alphabetic()) {
language = Some(parts[i].to_lowercase());
i += 1;
} else {
return Err(format!("Invalid primary language subtag: {}", parts[0]));
}
while i < parts.len()
&& parts[i].len() == 3
&& parts[i].chars().all(|c| c.is_ascii_alphabetic())
{
if let Some(ref mut lang) = language {
*lang = format!("{}-{}", lang, parts[i].to_lowercase());
}
i += 1;
if i >= 4 {
break; }
}
if i < parts.len() && parts[i].len() == 4 && parts[i].chars().all(|c| c.is_ascii_alphabetic()) {
let s = parts[i];
script = Some(format!(
"{}{}",
s.chars()
.next()
.expect("script subtag should have at least one char")
.to_uppercase(),
s[1..].to_lowercase()
));
i += 1;
}
if i < parts.len() {
let p = parts[i];
if (p.len() == 2 && p.chars().all(|c| c.is_ascii_alphabetic()))
|| (p.len() == 3 && p.chars().all(|c| c.is_ascii_digit()))
{
region = Some(p.to_uppercase());
i += 1;
}
}
while i < parts.len() {
let p = parts[i];
if (p.len() >= 5 && p.len() <= 8 && p.chars().all(|c| c.is_ascii_alphanumeric()))
|| (p.len() == 4
&& p.chars().next().is_some_and(|c| c.is_ascii_digit())
&& p.chars().all(|c| c.is_ascii_alphanumeric()))
{
variants.push(p.to_lowercase());
i += 1;
} else {
break;
}
}
while i < parts.len() {
let p = parts[i];
if p.len() == 1 && p.chars().all(|c| c.is_ascii_alphanumeric()) && p.to_lowercase() != "x" {
let singleton = p.to_lowercase();
let mut ext_parts = vec![singleton];
i += 1;
while i < parts.len() {
let ep = parts[i];
if ep.len() >= 2 && ep.len() <= 8 && ep.chars().all(|c| c.is_ascii_alphanumeric()) {
ext_parts.push(ep.to_lowercase());
i += 1;
} else {
break;
}
}
if ext_parts.len() > 1 {
extensions.push(ext_parts.join("-"));
}
} else {
break;
}
}
if i < parts.len() && parts[i].to_lowercase() == "x" {
i += 1;
if i < parts.len() {
private_use = Some(parts[i..].join("-"));
}
}
Ok(ParsedLanguageTag {
language,
script,
region,
variants,
extensions,
private_use,
})
}
fn is_deprecated_language(lang: &str) -> bool {
let deprecated = [
"iw",
"ji",
"in",
"no-bok",
"no-nyn",
"sgn-be-fr",
"sgn-be-nl",
"sgn-ch-de",
];
deprecated.contains(&lang.to_lowercase().as_str())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_sparql_query_valid() {
let request = QueryValidationRequest {
query: "SELECT ?s ?p ?o WHERE { ?s ?p ?o }".to_string(),
syntax: "sparql11".to_string(),
include_algebra: true,
include_optimized: true,
};
let response = validate_sparql_query_internal(&request);
assert!(response.valid);
assert_eq!(response.query_type, Some("SELECT".to_string()));
assert!(response.variables.is_some());
let vars = response.variables.unwrap();
assert!(vars.contains(&"s".to_string()));
assert!(vars.contains(&"p".to_string()));
assert!(vars.contains(&"o".to_string()));
}
#[test]
fn test_validate_sparql_query_invalid() {
let request = QueryValidationRequest {
query: "SELEKT ?s WHERE { ?s ?p ?o }".to_string(),
syntax: "sparql11".to_string(),
include_algebra: false,
include_optimized: false,
};
let response = validate_sparql_query_internal(&request);
assert!(!response.valid);
assert!(!response.errors.is_empty());
}
#[test]
fn test_validate_sparql_query_empty() {
let request = QueryValidationRequest {
query: "".to_string(),
syntax: "sparql11".to_string(),
include_algebra: false,
include_optimized: false,
};
let response = validate_sparql_query_internal(&request);
assert!(!response.valid);
assert_eq!(response.errors[0].code, Some("EMPTY_QUERY".to_string()));
}
#[test]
fn test_validate_sparql_update_valid() {
let request = UpdateValidationRequest {
update: "INSERT DATA { <http://example/s> <http://example/p> \"value\" }".to_string(),
syntax: "sparql11".to_string(),
};
let response = validate_sparql_update_internal(&request);
assert!(response.valid);
assert!(response.operations.contains(&"INSERT DATA".to_string()));
}
#[test]
fn test_validate_sparql_update_invalid() {
let request = UpdateValidationRequest {
update: "INSERTT DATA { <http://example/s> <http://example/p> \"value\" }".to_string(),
syntax: "sparql11".to_string(),
};
let response = validate_sparql_update_internal(&request);
assert!(!response.valid);
}
#[test]
fn test_validate_iri_valid() {
let request = IriValidationRequest {
iris: vec![
"http://example.org/resource".to_string(),
"https://www.w3.org/2001/XMLSchema#string".to_string(),
"urn:isbn:0451450523".to_string(),
],
check_relative: true,
};
let response = validate_iris_internal(&request);
assert_eq!(response.summary.total, 3);
assert_eq!(response.summary.valid, 3);
assert!(response.results[0].is_absolute);
assert_eq!(response.results[0].scheme, Some("http".to_string()));
assert_eq!(response.results[2].scheme, Some("urn".to_string()));
}
#[test]
fn test_validate_iri_relative() {
let request = IriValidationRequest {
iris: vec!["resource/path".to_string()],
check_relative: true,
};
let response = validate_iris_internal(&request);
}
#[test]
fn test_validate_rdf_data_turtle() {
let request = DataValidationRequest {
data: r#"
@prefix ex: <http://example.org/> .
ex:subject ex:predicate "object" .
ex:s2 ex:p2 ex:o2 .
"#
.to_string(),
format: "turtle".to_string(),
base: Some("http://example.org/base/".to_string()),
};
let response = validate_rdf_data_internal(&request);
assert!(response.valid);
assert_eq!(response.triple_count, 2);
}
#[test]
fn test_validate_rdf_data_invalid() {
let request = DataValidationRequest {
data: "@prefix ex: <broken".to_string(),
format: "turtle".to_string(),
base: None,
};
let response = validate_rdf_data_internal(&request);
assert!(!response.valid);
assert!(!response.errors.is_empty());
}
#[test]
fn test_validate_langtag_valid() {
let request = LangTagValidationRequest {
tags: vec![
"en".to_string(),
"en-US".to_string(),
"zh-Hans-CN".to_string(),
"de-DE-1996".to_string(),
],
};
let response = validate_langtags_internal(&request);
assert_eq!(response.summary.total, 4);
assert_eq!(response.summary.valid, 4);
assert_eq!(response.results[0].language, Some("en".to_string()));
assert_eq!(response.results[1].region, Some("US".to_string()));
assert_eq!(response.results[2].script, Some("Hans".to_string()));
}
#[test]
fn test_validate_langtag_private_use() {
let request = LangTagValidationRequest {
tags: vec!["x-custom".to_string()],
};
let response = validate_langtags_internal(&request);
assert_eq!(response.summary.valid, 1);
assert_eq!(response.results[0].private_use, Some("custom".to_string()));
}
#[test]
fn test_detect_query_type() {
assert_eq!(
detect_query_type("SELECT ?x WHERE { ?x ?y ?z }"),
Some("SELECT".to_string())
);
assert_eq!(
detect_query_type("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"),
Some("CONSTRUCT".to_string())
);
assert_eq!(
detect_query_type("ASK { ?s ?p ?o }"),
Some("ASK".to_string())
);
assert_eq!(
detect_query_type("DESCRIBE <http://example.org>"),
Some("DESCRIBE".to_string())
);
}
#[test]
fn test_extract_query_variables() {
let vars = extract_query_variables(
"SELECT ?name ?age WHERE { ?person foaf:name ?name ; foaf:age ?age }",
);
assert!(vars.contains(&"name".to_string()));
assert!(vars.contains(&"age".to_string()));
assert!(vars.contains(&"person".to_string()));
}
#[test]
fn test_extract_prefixes() {
let prefixes = extract_prefixes("PREFIX foaf: <http://xmlns.com/foaf/0.1/>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT * WHERE { ?s ?p ?o }");
assert_eq!(prefixes.len(), 2);
assert!(prefixes.iter().any(|p| p.prefix == "foaf"));
assert!(prefixes.iter().any(|p| p.prefix == "rdf"));
}
#[test]
fn test_extract_update_operations() {
let ops = extract_update_operations(
"INSERT DATA { <s> <p> \"o\" } ; DELETE DATA { <s2> <p2> \"o2\" }",
);
assert!(ops.contains(&"INSERT DATA".to_string()));
assert!(ops.contains(&"DELETE DATA".to_string()));
}
#[test]
fn test_extract_affected_graphs() {
let graphs = extract_affected_graphs(
"INSERT DATA { GRAPH <http://example.org/g1> { <s> <p> \"o\" } }",
);
assert!(graphs.contains(&"http://example.org/g1".to_string()));
}
#[test]
fn test_parse_language_tag() {
let result = parse_language_tag("en-Latn-US-valencia").unwrap();
assert_eq!(result.language, Some("en".to_string()));
assert_eq!(result.script, Some("Latn".to_string()));
assert_eq!(result.region, Some("US".to_string()));
assert!(result.variants.contains(&"valencia".to_string()));
}
#[test]
fn test_is_deprecated_language() {
assert!(is_deprecated_language("iw")); assert!(!is_deprecated_language("he")); assert!(is_deprecated_language("ji")); }
}