use crate::utils::error::{Error, Result};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AllowedVocabulary {
pub namespace: String,
pub prefix: String,
pub requires_proof: bool,
pub description: Option<String>,
}
impl AllowedVocabulary {
pub fn new(namespace: impl Into<String>, prefix: impl Into<String>) -> Self {
Self {
namespace: namespace.into(),
prefix: prefix.into(),
requires_proof: false,
description: None,
}
}
pub fn with_requires_proof(mut self, requires: bool) -> Self {
self.requires_proof = requires;
self
}
pub fn with_description(mut self, desc: impl Into<String>) -> Self {
self.description = Some(desc.into());
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForbiddenVocabulary {
pub namespace: String,
pub reason: String,
}
impl ForbiddenVocabulary {
pub fn new(namespace: impl Into<String>, reason: impl Into<String>) -> Self {
Self {
namespace: namespace.into(),
reason: reason.into(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VocabularyViolation {
pub namespace: String,
pub is_forbidden: bool,
pub message: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct VocabularyRegistry {
pub allowed: BTreeMap<String, AllowedVocabulary>,
pub forbidden: BTreeMap<String, ForbiddenVocabulary>,
pub proof_receipts: BTreeMap<String, String>,
}
impl VocabularyRegistry {
pub fn new() -> Self {
Self::default()
}
pub fn with_standard_vocabularies() -> Self {
let mut registry = Self::new();
let path = std::path::Path::new(".specify/ontologies/standard-vocabularies.ttl");
if path.exists() {
if let Ok(content) = std::fs::read_to_string(path) {
for line in content.lines() {
if line.contains("gv26:StandardVocabulary") && line.contains("<") {
if let Some(start) = line.find('<') {
if let Some(end) = line.find('>') {
let ns = &line[start + 1..end];
registry.add_allowed(AllowedVocabulary::new(ns, "boot"));
}
}
}
}
}
}
if registry.allowed.is_empty() {
registry.add_allowed(
AllowedVocabulary::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf")
.with_description("RDF syntax vocabulary"),
);
registry.add_allowed(
AllowedVocabulary::new("http://www.w3.org/2000/01/rdf-schema#", "rdfs")
.with_description("RDF Schema vocabulary"),
);
registry.add_allowed(
AllowedVocabulary::new("http://www.w3.org/2002/07/owl#", "owl")
.with_description("Web Ontology Language"),
);
registry.add_allowed(
AllowedVocabulary::new("http://ggen.dev/v26_5_19#", "gv26")
.with_description("ggen v26_5_19 ontology"),
);
registry.add_allowed(
AllowedVocabulary::new("http://ggen.ai/ontology/meta#", "meta")
.with_description("ggen meta-ontology"),
);
registry.add_allowed(
AllowedVocabulary::new("https://ggen.io/marketplace/", "ggen")
.with_description("ggen marketplace vocabulary"),
);
}
registry
}
pub fn add_allowed(&mut self, vocab: AllowedVocabulary) {
self.allowed.insert(vocab.namespace.clone(), vocab);
}
pub fn add_forbidden(&mut self, vocab: ForbiddenVocabulary) {
self.forbidden.insert(vocab.namespace.clone(), vocab);
}
pub fn add_proof_receipt(
&mut self, namespace: impl Into<String>, receipt_id: impl Into<String>,
) {
self.proof_receipts
.insert(namespace.into(), receipt_id.into());
}
pub fn is_allowed(&self, namespace: &str) -> bool {
for allowed_ns in self.allowed.keys() {
if namespace.starts_with(allowed_ns) {
return true;
}
}
for proof_ns in self.proof_receipts.keys() {
if namespace.starts_with(proof_ns) {
return true;
}
}
false
}
pub fn is_forbidden(&self, namespace: &str) -> bool {
for forbidden_ns in self.forbidden.keys() {
if namespace.starts_with(forbidden_ns) {
return true;
}
}
false
}
pub fn validate_namespaces(&self, namespaces: &BTreeSet<String>) -> Result<()> {
let mut violations = Vec::new();
for namespace in namespaces {
if self.is_forbidden(namespace) {
if let Some(forbidden) = self.forbidden.get(namespace) {
violations.push(VocabularyViolation {
namespace: namespace.clone(),
is_forbidden: true,
message: format!(
"Forbidden namespace: {} - {}",
namespace, forbidden.reason
),
});
}
} else if !self.is_allowed(namespace) {
violations.push(VocabularyViolation {
namespace: namespace.clone(),
is_forbidden: false,
message: format!(
"Unknown namespace '{}'. Add to allowed list or provide proof receipt.",
namespace
),
});
}
}
if violations.is_empty() {
Ok(())
} else {
let messages: Vec<_> = violations.iter().map(|v| v.message.clone()).collect();
Err(Error::new(&format!(
"Vocabulary violations:\n{}",
messages.join("\n")
)))
}
}
pub fn extract_namespaces(content: &str) -> BTreeSet<String> {
let mut namespaces = BTreeSet::new();
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("@prefix") {
if let Some(start) = trimmed.find('<') {
if let Some(end) = trimmed.find('>') {
let ns = &trimmed[start + 1..end];
namespaces.insert(ns.to_string());
}
}
}
}
let iri_pattern = regex::Regex::new(r"<([^>]+)>").unwrap();
for cap in iri_pattern.captures_iter(content) {
if let Some(iri) = cap.get(1) {
let iri_str = iri.as_str();
if let Some(idx) = iri_str.rfind('#').or_else(|| iri_str.rfind('/')) {
let ns = &iri_str[..=idx];
namespaces.insert(ns.to_string());
}
}
}
namespaces
}
pub fn get_prefix(&self, namespace: &str) -> Option<&str> {
for (ns, vocab) in &self.allowed {
if namespace.starts_with(ns) {
return Some(&vocab.prefix);
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[ignore]
#[test]
fn test_registry_standard_vocabularies() {
let registry = VocabularyRegistry::with_standard_vocabularies();
assert!(registry.is_allowed("http://www.w3.org/2000/01/rdf-schema#Class"));
assert!(registry.is_allowed("http://www.w3.org/2002/07/owl#Thing"));
assert!(registry.is_allowed("https://schema.org/Person"));
assert!(registry.is_allowed("http://ggen.dev/v26_5_19#Pass"));
}
#[test]
fn test_registry_unknown_namespace() {
let registry = VocabularyRegistry::with_standard_vocabularies();
assert!(!registry.is_allowed("http://unknown.example.org/Foo"));
}
#[test]
fn test_registry_forbidden_namespace() {
let mut registry = VocabularyRegistry::with_standard_vocabularies();
registry.add_forbidden(ForbiddenVocabulary::new(
"http://evil.example.org/",
"Known malicious vocabulary",
));
assert!(registry.is_forbidden("http://evil.example.org/Malware"));
}
#[test]
fn test_registry_proof_receipt() {
let mut registry = VocabularyRegistry::with_standard_vocabularies();
assert!(!registry.is_allowed("http://custom.example.org/MyClass"));
registry.add_proof_receipt("http://custom.example.org/", "receipt-12345");
assert!(registry.is_allowed("http://custom.example.org/MyClass"));
}
#[test]
fn test_extract_namespaces() {
let content = r#"
@prefix ex: <http://example.org/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
ex:alice rdf:type ex:Person .
"#;
let namespaces = VocabularyRegistry::extract_namespaces(content);
assert!(namespaces.contains("http://example.org/"));
assert!(namespaces.contains("http://www.w3.org/1999/02/22-rdf-syntax-ns#"));
}
#[ignore]
#[test]
fn test_validate_namespaces() {
let registry = VocabularyRegistry::with_standard_vocabularies();
let mut valid_ns = BTreeSet::new();
valid_ns.insert("http://www.w3.org/2000/01/rdf-schema#".to_string());
valid_ns.insert("https://schema.org/".to_string());
assert!(registry.validate_namespaces(&valid_ns).is_ok());
let mut invalid_ns = BTreeSet::new();
invalid_ns.insert("http://unknown.example.org/".to_string());
assert!(registry.validate_namespaces(&invalid_ns).is_err());
}
}