use crate::schema::SchemaError;
use log::info;
use serde_json::{Map, Value};
use sha2::{Digest, Sha256};
pub struct SchemaHasher;
impl SchemaHasher {
pub fn calculate_hash(schema_json: &Value) -> Result<String, SchemaError> {
let mut schema_for_hash = schema_json.clone();
Self::remove_excluded_fields(&mut schema_for_hash);
let canonical_json = Self::to_canonical_json(&schema_for_hash)?;
let mut hasher = Sha256::new();
hasher.update(canonical_json.as_bytes());
let hash_bytes = hasher.finalize();
Ok(format!("{:x}", hash_bytes))
}
fn remove_excluded_fields(value: &mut Value) {
match value {
Value::Object(obj) => {
obj.remove("hash");
obj.remove("payment_config");
obj.remove("permission_policy");
obj.remove("name");
for (_, nested_value) in obj.iter_mut() {
Self::remove_excluded_fields(nested_value);
}
}
Value::Array(arr) => {
for item in arr.iter_mut() {
Self::remove_excluded_fields(item);
}
}
_ => {
}
}
}
pub fn add_hash_to_schema(schema_json: &mut Value) -> Result<String, SchemaError> {
let hash = Self::calculate_hash(schema_json)?;
if let Value::Object(ref mut obj) = schema_json {
obj.insert("hash".to_string(), Value::String(hash.clone()));
} else {
return Err(SchemaError::InvalidData(
"Schema must be a JSON object".to_string(),
));
}
Ok(hash)
}
pub fn verify_schema_hash(schema_json: &Value) -> Result<bool, SchemaError> {
if let Value::Object(obj) = schema_json {
if let Some(Value::String(stored_hash)) = obj.get("hash") {
let calculated_hash = Self::calculate_hash(schema_json)?;
Ok(stored_hash == &calculated_hash)
} else {
Ok(false)
}
} else {
Err(SchemaError::InvalidData(
"Schema must be a JSON object".to_string(),
))
}
}
pub fn hash_schema_file(file_path: &std::path::Path) -> Result<String, SchemaError> {
info!("Processing schema file: {}", file_path.display());
let content = std::fs::read_to_string(file_path)
.map_err(|e| SchemaError::InvalidData(format!("Failed to read file: {}", e)))?;
let mut schema_json: Value = serde_json::from_str(&content)
.map_err(|e| SchemaError::InvalidData(format!("Invalid JSON: {}", e)))?;
let hash = Self::add_hash_to_schema(&mut schema_json)?;
let formatted_json = serde_json::to_string_pretty(&schema_json)
.map_err(|e| SchemaError::InvalidData(format!("Failed to format JSON: {}", e)))?;
std::fs::write(file_path, formatted_json)
.map_err(|e| SchemaError::InvalidData(format!("Failed to write file: {}", e)))?;
info!(
"Updated schema file {} with hash: {}",
file_path.display(),
hash
);
Ok(hash)
}
pub fn hash_schemas_directory<P: AsRef<std::path::Path>>(
directory_path: P,
) -> Result<Vec<(String, String)>, SchemaError> {
let available_schemas_dir = directory_path.as_ref();
let mut results = Vec::new();
info!(
"Processing schemas in directory: {}",
available_schemas_dir.display()
);
if !available_schemas_dir.exists() {
return Err(SchemaError::InvalidData(format!(
"Directory does not exist: {}",
available_schemas_dir.display()
)));
}
let entries = std::fs::read_dir(available_schemas_dir)
.map_err(|e| SchemaError::InvalidData(format!("Failed to read directory: {}", e)))?;
for entry in entries {
let entry = entry
.map_err(|e| SchemaError::InvalidData(format!("Failed to read entry: {}", e)))?;
let path = entry.path();
if path.extension().map(|e| e == "json").unwrap_or(false) {
match Self::hash_schema_file(&path) {
Ok(hash) => {
let filename = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
info!("✅ Processed: {} -> {}", filename, hash);
results.push((filename.clone(), hash.clone()));
}
Err(e) => {
let filename = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
info!("❌ Failed to process {}: {}", filename, e);
return Err(e);
}
}
}
}
info!("Successfully processed {} schema files", results.len());
Ok(results)
}
pub fn hash_available_schemas_directory() -> Result<Vec<(String, String)>, SchemaError> {
SchemaHasher::hash_schemas_directory("available_schemas")
}
pub fn verify_schemas_directory<P: AsRef<std::path::Path>>(
directory_path: P,
) -> Result<Vec<(String, bool)>, SchemaError> {
let available_schemas_dir = directory_path.as_ref();
let mut results = Vec::new();
info!(
"Verifying schemas in directory: {}",
available_schemas_dir.display()
);
let entries = std::fs::read_dir(available_schemas_dir)
.map_err(|e| SchemaError::InvalidData(format!("Failed to read directory: {}", e)))?;
for entry in entries {
let entry = entry
.map_err(|e| SchemaError::InvalidData(format!("Failed to read entry: {}", e)))?;
let path = entry.path();
if path.extension().map(|e| e == "json").unwrap_or(false) {
let content = std::fs::read_to_string(&path)
.map_err(|e| SchemaError::InvalidData(format!("Failed to read file: {}", e)))?;
let schema_json: Value = serde_json::from_str(&content)
.map_err(|e| SchemaError::InvalidData(format!("Invalid JSON: {}", e)))?;
let is_valid = Self::verify_schema_hash(&schema_json)?;
let filename = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
results.push((filename.clone(), is_valid));
if is_valid {
info!("✅ Valid hash: {}", filename);
} else {
info!("❌ Invalid/missing hash: {}", filename);
}
}
}
Ok(results)
}
pub fn verify_available_schemas_directory() -> Result<Vec<(String, bool)>, SchemaError> {
SchemaHasher::verify_schemas_directory("available_schemas")
}
fn to_canonical_json(value: &Value) -> Result<String, SchemaError> {
match value {
Value::Object(obj) => {
let mut sorted_obj = Map::new();
let mut keys: Vec<_> = obj.keys().collect();
keys.sort();
for key in keys {
if let Some(val) = obj.get(key) {
let canonical_val = Self::to_canonical_json(val)?;
let parsed_val: Value =
serde_json::from_str(&canonical_val).map_err(|e| {
SchemaError::InvalidData(format!(
"Failed to parse canonical JSON: {}",
e
))
})?;
sorted_obj.insert(key.clone(), parsed_val);
}
}
serde_json::to_string(&Value::Object(sorted_obj)).map_err(|e| {
SchemaError::InvalidData(format!("Failed to serialize canonical JSON: {}", e))
})
}
Value::Array(arr) => {
let mut canonical_items = Vec::new();
for item in arr {
let canonical_item = Self::to_canonical_json(item)?;
let parsed_item: Value =
serde_json::from_str(&canonical_item).map_err(|e| {
SchemaError::InvalidData(format!(
"Failed to parse canonical JSON: {}",
e
))
})?;
canonical_items.push(parsed_item);
}
serde_json::to_string(&Value::Array(canonical_items)).map_err(|e| {
SchemaError::InvalidData(format!("Failed to serialize canonical JSON: {}", e))
})
}
_ => serde_json::to_string(value)
.map_err(|e| SchemaError::InvalidData(format!("Failed to serialize JSON: {}", e))),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_calculate_hash() {
let schema = json!({
"name": "TestSchema",
"fields": {
"field1": {
"field_type": "Single"
}
}
});
let hash = SchemaHasher::calculate_hash(&schema).unwrap();
assert!(!hash.is_empty());
assert_eq!(hash.len(), 64); }
#[test]
fn test_hash_consistency() {
let schema1 = json!({
"name": "TestSchema",
"fields": {
"field1": {"field_type": "Single"},
"field2": {"field_type": "Collection"}
}
});
let schema2 = json!({
"fields": {
"field2": {"field_type": "Collection"},
"field1": {"field_type": "Single"}
},
"name": "TestSchema"
});
let hash1 = SchemaHasher::calculate_hash(&schema1).unwrap();
let hash2 = SchemaHasher::calculate_hash(&schema2).unwrap();
assert_eq!(hash1, hash2);
}
#[test]
fn test_hash_excludes_existing_hash() {
let mut schema = json!({
"name": "TestSchema",
"fields": {
"field1": {"field_type": "Single"}
}
});
let hash1 = SchemaHasher::calculate_hash(&schema).unwrap();
SchemaHasher::add_hash_to_schema(&mut schema).unwrap();
let hash2 = SchemaHasher::calculate_hash(&schema).unwrap();
assert_eq!(hash1, hash2);
}
#[test]
fn test_add_and_verify_hash() {
let mut schema = json!({
"name": "TestSchema",
"fields": {
"field1": {"field_type": "Single"}
}
});
let hash = SchemaHasher::add_hash_to_schema(&mut schema).unwrap();
assert!(!hash.is_empty());
let is_valid = SchemaHasher::verify_schema_hash(&schema).unwrap();
assert!(is_valid);
}
}