use std::path::Path;
use serde_json::Value;
use crate::error::{EidoError, Result};
const SAMPLES_KEY: &str = "samples";
const TANGIBLE_KEY: &str = "tangible";
const FILES_KEY: &str = "files";
const IMPORTS_KEY: &str = "imports";
#[derive(Debug, Clone)]
pub struct EidoSchema {
pub raw: Value,
pub project_schema: Option<Value>,
pub sample_schema: Option<Value>,
pub tangible: Vec<String>,
pub files: Vec<String>,
pub imports: Vec<EidoSchema>,
}
pub fn load_schema(path: &str) -> Result<EidoSchema> {
if is_url(path) {
load_schema_from_url(path)
} else {
let path = Path::new(path);
load_schema_from_path(path)
}
}
pub fn load_schema_from_value(value: Value) -> Result<EidoSchema> {
build_schema(value, None)
}
fn load_schema_from_path(path: &Path) -> Result<EidoSchema> {
let content = std::fs::read_to_string(path)?;
let value: Value = if path.extension().is_some_and(|ext| ext == "json") {
serde_json::from_str(&content)?
} else {
serde_yaml::from_str(&content)?
};
build_schema(value, path.parent())
}
fn is_url(path: &str) -> bool {
path.starts_with("http://") || path.starts_with("https://")
}
#[cfg(feature = "native")]
fn fetch_url_content(url: &str) -> Result<String> {
let mut response = ureq::get(url)
.call()
.map_err(|e| EidoError::SchemaLoad(format!("Failed to fetch schema from {url}: {e}")))?;
let body = response
.body_mut()
.read_to_string()
.map_err(|e| EidoError::SchemaLoad(format!("Failed to read response from {url}: {e}")))?;
Ok(body)
}
#[cfg(not(feature = "native"))]
fn fetch_url_content(url: &str) -> Result<String> {
Err(EidoError::SchemaLoad(format!(
"URL schema loading requires the 'native' feature: {url}"
)))
}
fn load_schema_from_url(url: &str) -> Result<EidoSchema> {
let content = fetch_url_content(url)?;
let value: Value = serde_yaml::from_str(&content)?;
build_schema(value, None)
}
fn build_schema(raw: Value, base_dir: Option<&Path>) -> Result<EidoSchema> {
let imports = resolve_imports(&raw, base_dir)?;
let tangible = extract_string_array(&raw, TANGIBLE_KEY);
let files = extract_string_array(&raw, FILES_KEY);
let (project_schema, sample_schema) = extract_schemas(&raw);
Ok(EidoSchema {
raw,
project_schema,
sample_schema,
tangible,
files,
imports,
})
}
fn resolve_imports(schema: &Value, base_dir: Option<&Path>) -> Result<Vec<EidoSchema>> {
let Some(imports) = schema.get(IMPORTS_KEY) else {
return Ok(Vec::new());
};
let import_list = imports.as_array().ok_or_else(|| {
EidoError::SchemaLoad("'imports' must be an array of schema paths".to_string())
})?;
let mut resolved = Vec::new();
for import_val in import_list {
let import_path_str = import_val.as_str().ok_or_else(|| {
EidoError::SchemaLoad(format!("import entry must be a string, got: {import_val}"))
})?;
if is_url(import_path_str) {
resolved.push(load_schema_from_url(import_path_str)?);
} else {
let import_path = if let Some(base) = base_dir {
base.join(import_path_str)
} else {
import_path_str.into()
};
resolved.push(load_schema_from_path(&import_path)?);
}
}
Ok(resolved)
}
fn extract_schemas(raw: &Value) -> (Option<Value>, Option<Value>) {
let properties = match raw.get("properties") {
Some(p) if p.is_object() => p,
_ => return (None, None),
};
let sample_schema = properties
.get(SAMPLES_KEY)
.and_then(|s| s.get("items"))
.cloned()
.map(|mut schema| {
preprocess_multi_value(&mut schema);
wrap_as_object_schema(schema)
});
let project_schema = {
let mut proj = raw.clone();
if let Some(req) = proj.get_mut("required") {
if let Some(arr) = req.as_array_mut() {
arr.retain(|v| v.as_str() != Some(SAMPLES_KEY));
}
}
if let Some(props) = proj.get_mut("properties") {
if let Some(obj) = props.as_object_mut() {
obj.remove(SAMPLES_KEY);
if !obj.is_empty() { Some(proj) } else { None }
} else {
None
}
} else {
None
}
};
(project_schema, sample_schema)
}
fn wrap_as_object_schema(items_schema: Value) -> Value {
if items_schema.get("type").and_then(|t| t.as_str()) == Some("object") {
return items_schema;
}
let mut obj = serde_json::Map::new();
obj.insert("type".to_string(), Value::String("object".to_string()));
if let Some(props) = items_schema.get("properties") {
obj.insert("properties".to_string(), props.clone());
}
if let Some(req) = items_schema.get("required") {
obj.insert("required".to_string(), req.clone());
}
Value::Object(obj)
}
fn preprocess_multi_value(schema: &mut Value) {
let required: Vec<String> = schema
.get("required")
.and_then(|r| r.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let Some(properties) = schema.get_mut("properties") else {
return;
};
let Some(props_obj) = properties.as_object_mut() else {
return;
};
for (key, prop_value) in props_obj.iter_mut() {
let is_required = required.contains(key);
wrap_scalar_as_any_of(prop_value, is_required);
}
}
fn wrap_scalar_as_any_of(prop: &mut Value, is_required: bool) {
let type_str = match prop.get("type").and_then(|t| t.as_str()) {
Some(t) => t.to_string(),
None => return,
};
let scalar_types = ["string", "number", "integer", "boolean"];
if !scalar_types.contains(&type_str.as_str()) {
return;
}
if prop.get("anyOf").is_some() {
return;
}
let original = prop.clone();
let array_variant = serde_json::json!({
"type": "array",
"items": { "type": type_str }
});
let mut variants = vec![original, array_variant];
if !is_required {
variants.push(serde_json::json!({ "type": "null" }));
}
*prop = serde_json::json!({
"anyOf": variants
});
}
fn extract_string_array(value: &Value, key: &str) -> Vec<String> {
value
.get(key)
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_multi_value_wrapping() {
let mut schema = serde_json::json!({
"properties": {
"name": { "type": "string" },
"age": { "type": "integer" },
"scores": { "type": "array", "items": { "type": "number" } }
},
"required": ["name"]
});
preprocess_multi_value(&mut schema);
let props = schema["properties"].as_object().unwrap();
assert!(props["name"].get("anyOf").is_some());
assert!(props["age"].get("anyOf").is_some());
assert!(props["scores"].get("anyOf").is_none());
assert_eq!(props["scores"]["type"], "array");
let name_variants = props["name"]["anyOf"].as_array().unwrap();
assert_eq!(name_variants.len(), 2);
let age_variants = props["age"]["anyOf"].as_array().unwrap();
assert_eq!(age_variants.len(), 3);
assert_eq!(age_variants[2]["type"], "null");
}
#[test]
fn test_extract_schemas() {
let raw = serde_json::json!({
"description": "test schema",
"properties": {
"samples": {
"type": "array",
"items": {
"type": "object",
"properties": {
"sample_name": { "type": "string" },
"protocol": { "type": "string" }
},
"required": ["sample_name"]
}
},
"description": { "type": "string" }
}
});
let (project_schema, sample_schema) = extract_schemas(&raw);
let sample = sample_schema.unwrap();
assert!(sample["properties"]["sample_name"].is_object());
assert!(sample["properties"]["sample_name"].get("anyOf").is_some());
assert_eq!(sample["required"][0], "sample_name");
let project = project_schema.unwrap();
assert!(project["properties"].get("samples").is_none());
assert!(project["properties"]["description"].is_object());
}
#[test]
fn test_extract_tangible_and_files() {
let raw = serde_json::json!({
"tangible": ["read1", "genome_file"],
"files": ["read1", "read2"]
});
assert_eq!(
extract_string_array(&raw, "tangible"),
vec!["read1", "genome_file"]
);
assert_eq!(extract_string_array(&raw, "files"), vec!["read1", "read2"]);
}
#[test]
fn test_load_from_value() {
let raw = serde_json::json!({
"properties": {
"samples": {
"type": "array",
"items": {
"type": "object",
"properties": {
"sample_name": { "type": "string" }
},
"required": ["sample_name"]
}
}
},
"tangible": ["file_path"]
});
let schema = load_schema_from_value(raw).unwrap();
assert!(schema.sample_schema.is_some());
assert_eq!(schema.tangible, vec!["file_path"]);
assert!(schema.imports.is_empty());
}
}