use crate::manifest::types::{GgenManifest, QuerySource, TemplateSource};
use crate::utils::error::{Error, Result};
use std::path::Path;
pub struct ManifestValidator<'a> {
manifest: &'a GgenManifest,
base_path: &'a Path,
}
impl<'a> ManifestValidator<'a> {
pub fn new(manifest: &'a GgenManifest, base_path: &'a Path) -> Self {
Self {
manifest,
base_path,
}
}
pub fn validate(&self) -> Result<()> {
self.validate_project()?;
self.validate_ontology()?;
self.validate_inference_rules()?;
self.validate_generation_rules()?;
self.validate_shacl_paths()?;
Ok(())
}
fn validate_project(&self) -> Result<()> {
if self.manifest.project.name.is_empty() {
return Err(Error::new("project.name cannot be empty"));
}
if self.manifest.project.version.is_empty() {
return Err(Error::new("project.version cannot be empty"));
}
Ok(())
}
fn validate_ontology(&self) -> Result<()> {
let source_path = self.base_path.join(&self.manifest.ontology.source);
if !source_path.exists() {
return Err(Error::new(&format!(
"Ontology source not found: {}",
source_path.display()
)));
}
for import in &self.manifest.ontology.imports {
let import_path = self.base_path.join(import);
if !import_path.exists() {
return Err(Error::new(&format!(
"Ontology import not found: {}",
import_path.display()
)));
}
}
Ok(())
}
fn validate_inference_rules(&self) -> Result<()> {
let mut seen_orders: Vec<i32> = Vec::new();
for rule in &self.manifest.inference.rules {
if rule.name.is_empty() {
return Err(Error::new("inference.rules[].name cannot be empty"));
}
if rule.construct.is_empty() {
return Err(Error::new(&format!(
"inference.rules[{}].construct cannot be empty",
rule.name
)));
}
let construct_upper = rule.construct.to_uppercase();
if !construct_upper.contains("ORDER BY") {
if self.manifest.validation.strict_mode {
return Err(Error::new(&format!(
"error[E0011]: Inference rule '{}' CONSTRUCT query lacks ORDER BY\n |\n = strict_mode is enabled: non-deterministic triple ordering is rejected\n = help: Add ORDER BY to your CONSTRUCT query to guarantee deterministic output\n = help: Or set `strict_mode = false` in [validation] to downgrade to a warning",
rule.name
)));
}
log::warn!(
"Inference rule '{}' CONSTRUCT query lacks ORDER BY - may produce non-deterministic results",
rule.name
);
}
if seen_orders.contains(&rule.order) {
log::warn!(
"Inference rule '{}' has duplicate order value {}",
rule.name,
rule.order
);
}
seen_orders.push(rule.order);
}
Ok(())
}
fn validate_generation_rules(&self) -> Result<()> {
for rule in &self.manifest.generation.rules {
if rule.name.is_empty() {
return Err(Error::new("generation.rules[].name cannot be empty"));
}
if let QuerySource::File { file } = &rule.query {
let query_path = self.base_path.join(file);
if !query_path.exists() {
return Err(Error::new(&format!(
"Query file not found for rule '{}': {}",
rule.name,
query_path.display()
)));
}
let content = std::fs::read_to_string(&query_path).map_err(|e| {
Error::new(&format!(
"Failed to read query file for rule '{}': {}",
rule.name, e
))
})?;
if query_contains_values(&content) {
return Err(Error::new(&format!(
"error[E0010]: VALUES data must be inline in ggen.toml\n --> rule: '{}'\n --> file: {}\n |\n = VALUES clauses belong in ggen.toml as `query = {{ inline = \"SELECT ... WHERE {{ VALUES ... }}\" }}`\n = External .rq files are for queries against real RDF triples only\n = help: Move the VALUES block into ggen.toml and delete the .rq file",
rule.name,
query_path.display()
)));
}
}
if let TemplateSource::File { file } = &rule.template {
let template_path = self.base_path.join(file);
if !template_path.exists() {
return Err(Error::new(&format!(
"Template file not found for rule '{}': {}",
rule.name,
template_path.display()
)));
}
}
let rule_pack_name: Option<&str> = match &rule.query {
QuerySource::Pack { pack, .. } => Some(pack.as_str()),
_ => match &rule.template {
crate::manifest::TemplateSource::Pack { pack, .. } => Some(pack.as_str()),
_ => None,
},
};
if let Some(pack_name) = rule_pack_name {
if !self
.manifest
.packs
.iter()
.any(|p| p.name == pack_name)
{
return Err(Error::new(&format!(
"error[E0014]: Pack '{}' used in rule '{}' is not declared in [[packs]]",
pack_name, rule.name
)));
}
}
let query_text_opt: Option<String> = match &rule.query {
QuerySource::Inline { inline } => Some(inline.clone()),
QuerySource::File { file } => {
let qpath = self.base_path.join(file);
std::fs::read_to_string(&qpath).ok()
}
QuerySource::Pack { .. } => None, };
if let Some(ref query_text) = query_text_opt {
if !query_has_order_by(query_text) {
if self.manifest.validation.strict_mode {
return Err(Error::new(&format!(
"error[E0013]: Generation rule '{}' SELECT query lacks ORDER BY\n |\n = strict_mode is enabled: non-deterministic row ordering is rejected\n = help: Add ORDER BY to your SELECT query to guarantee deterministic template rendering\n = help: Or set `strict_mode = false` in [validation] to downgrade to a warning",
rule.name
)));
}
log::warn!(
"Generation rule '{}' SELECT query lacks ORDER BY — row order may vary across runs",
rule.name
);
}
}
if rule.output_file.is_empty() {
return Err(Error::new(&format!(
"generation.rules[{}].output_file cannot be empty",
rule.name
)));
}
}
Ok(())
}
fn validate_shacl_paths(&self) -> Result<()> {
for shacl_path in &self.manifest.validation.shacl {
let full_path = self.base_path.join(shacl_path);
if !full_path.exists() {
return Err(Error::new(&format!(
"SHACL shape file not found: {}",
full_path.display()
)));
}
}
Ok(())
}
}
pub fn query_has_order_by(sparql: &str) -> bool {
sparql.to_uppercase().contains("ORDER BY")
}
pub fn query_contains_values(query: &str) -> bool {
query
.lines()
.filter(|line| !line.trim_start().starts_with('#'))
.any(|line| {
line.split_whitespace()
.any(|tok| tok.eq_ignore_ascii_case("VALUES"))
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::manifest::ManifestParser;
fn create_test_manifest() -> GgenManifest {
let toml = r#"
[project]
name = "test"
version = "1.0.0"
[ontology]
source = "Cargo.toml" # Use existing file for test
[generation]
rules = []
"#;
ManifestParser::parse_str(toml).unwrap()
}
#[test]
fn test_validate_empty_project_name() {
let toml = r#"
[project]
name = ""
version = "1.0.0"
[ontology]
source = "test.ttl"
[generation]
rules = []
"#;
let manifest = ManifestParser::parse_str(toml).unwrap();
let validator = ManifestValidator::new(&manifest, Path::new("."));
assert!(validator.validate().is_err());
}
#[test]
fn test_validate_missing_ontology() {
let manifest = create_test_manifest();
let validator = ManifestValidator::new(&manifest, Path::new("/nonexistent/path"));
assert!(validator.validate().is_err());
}
}