use crate::error::{Error, Result};
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct Schema {
pub name: String,
pub format: SchemaFormat,
pub fields: Vec<Field>,
pub doc_comment: Option<String>,
pub methods: Vec<String>,
pub crud: bool,
}
impl Schema {
pub fn new(name: String, format: SchemaFormat) -> Self {
Self {
name,
format,
fields: Vec::new(),
doc_comment: None,
methods: Vec::new(),
crud: false,
}
}
pub fn add_field(&mut self, field: Field) {
self.fields.push(field);
}
pub fn get_field(&self, name: &str) -> Option<&Field> {
self.fields.iter().find(|f| f.name == name)
}
pub fn validate(&self) -> Result<()> {
let mut seen = HashMap::new();
for field in &self.fields {
if seen.insert(&field.name, ()).is_some() {
return Err(Error::Schema {
schema_name: self.name.clone(),
message: format!("Duplicate field '{}'", field.name),
});
}
}
for field in &self.fields {
field.validate()?;
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SchemaFormat {
Native,
JsonSchema,
TypeScript,
Rust,
}
impl SchemaFormat {
pub fn from_str(s: &str) -> Self {
match s.to_lowercase().as_str() {
"native" => SchemaFormat::Native,
"json_schema" | "jsonschema" => SchemaFormat::JsonSchema,
"typescript" | "ts" => SchemaFormat::TypeScript,
"rust" => SchemaFormat::Rust,
_ => SchemaFormat::Native,
}
}
}
#[derive(Debug, Clone)]
pub struct Field {
pub name: String,
pub field_type: Type,
pub nullable: bool,
pub default: Option<String>,
pub attributes: Vec<FieldAttribute>,
pub doc_comment: Option<String>,
pub embedding_config: Option<EmbeddingConfig>,
}
impl Field {
pub fn new(name: String, field_type: Type) -> Self {
Self {
name,
field_type,
nullable: false,
default: None,
attributes: Vec::new(),
doc_comment: None,
embedding_config: None,
}
}
pub fn validate(&self) -> Result<()> {
self.field_type.validate()?;
let mut has_unique = false;
let mut has_private = false;
for attr in &self.attributes {
match attr {
FieldAttribute::Unique => has_unique = true,
FieldAttribute::Private => has_private = true,
_ => {}
}
}
if has_unique && has_private {
return Err(Error::Validation {
message: format!("Field '{}' cannot be both unique and private", self.name),
context: None,
});
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EmbeddingConfig {
pub model: String,
pub source_field: String,
pub dimension: Option<usize>,
pub paradigm: Option<EmbeddingParadigm>,
}
impl EmbeddingConfig {
pub fn new(model: String, source_field: String) -> Self {
Self {
model,
source_field,
dimension: None,
paradigm: None,
}
}
pub fn with_dimension(model: String, source_field: String, dimension: usize) -> Self {
Self {
model,
source_field,
dimension: Some(dimension),
paradigm: None,
}
}
pub fn with_paradigm(mut self, paradigm: EmbeddingParadigm) -> Self {
self.paradigm = Some(paradigm);
self
}
pub fn validate(&self) -> Result<()> {
if self.model.is_empty() {
return Err(Error::Validation {
message: "Embedding model cannot be empty".to_string(),
context: None,
});
}
if self.source_field.is_empty() {
return Err(Error::Validation {
message: "Embedding source_field cannot be empty".to_string(),
context: None,
});
}
if let Some(dim) = self.dimension {
if dim == 0 {
return Err(Error::Validation {
message: "Embedding dimension must be greater than 0".to_string(),
context: None,
});
}
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EmbeddingParadigm {
Dense,
MultiVector,
Sparse,
VisionLanguage,
TimeSeries,
}
impl EmbeddingParadigm {
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"dense" => Some(EmbeddingParadigm::Dense),
"multi-vector" | "multivector" | "colbert" => Some(EmbeddingParadigm::MultiVector),
"sparse" | "splade" => Some(EmbeddingParadigm::Sparse),
"vision-language" | "visionlanguage" | "colpali" => {
Some(EmbeddingParadigm::VisionLanguage)
}
"timeseries" | "time-series" => Some(EmbeddingParadigm::TimeSeries),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FieldAttribute {
Unique,
Private,
Index,
Auto,
Default(String),
Validate(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Type {
String,
Integer,
Float,
Boolean,
Timestamp,
EntityId,
Enum(Vec<String>),
Option(Box<Type>),
Vec(Box<Type>),
Custom(String),
Named(String),
JsonValue,
Vector,
Unit,
}
impl Type {
pub fn validate(&self) -> Result<()> {
match self {
Type::Enum(variants) if variants.is_empty() => Err(Error::Validation {
message: "Enum type must have at least one variant".to_string(),
context: None,
}),
Type::Option(inner) => inner.validate(),
Type::Vec(inner) => inner.validate(),
_ => Ok(()),
}
}
pub fn to_rust_type(&self) -> String {
match self {
Type::String => "String".to_string(),
Type::Integer => "i64".to_string(),
Type::Float => "f64".to_string(),
Type::Boolean => "bool".to_string(),
Type::Timestamp => "u64".to_string(),
Type::EntityId => "EntityId".to_string(),
Type::Enum(variants) => {
format!("Enum{}", variants.join(""))
}
Type::Option(inner) => format!("Option<{}>", inner.to_rust_type()),
Type::Vec(inner) => format!("Vec<{}>", inner.to_rust_type()),
Type::Custom(name) => name.clone(),
Type::Named(name) => name.clone(),
Type::JsonValue => "serde_json::Value".to_string(),
Type::Vector => "Vec<f32>".to_string(),
Type::Unit => "()".to_string(),
}
}
pub fn from_str(s: &str) -> Result<Self> {
let s = s.trim();
if s.ends_with('!') {
return Type::from_str(&s[..s.len() - 1]);
}
if s.starts_with("Vec<") && s.ends_with('>') {
let inner = &s[4..s.len() - 1];
return Ok(Type::Vec(Box::new(Type::from_str(inner)?)));
}
if s.starts_with("Option<") && s.ends_with('>') {
let inner = &s[7..s.len() - 1];
return Ok(Type::Option(Box::new(Type::from_str(inner)?)));
}
if s.starts_with("Enum(") && s.ends_with(')') {
let variants_str = &s[5..s.len() - 1];
let variants: Vec<String> = variants_str
.split(',')
.map(|v| v.trim().trim_matches('"').to_string())
.collect();
return Ok(Type::Enum(variants));
}
match s {
"String" => Ok(Type::String),
"Integer" => Ok(Type::Integer),
"Float" => Ok(Type::Float),
"Boolean" => Ok(Type::Boolean),
"Timestamp" => Ok(Type::Timestamp),
"EntityId" => Ok(Type::EntityId),
"JsonValue" => Ok(Type::JsonValue),
"Vector" => Ok(Type::Vector),
_ => Ok(Type::Custom(s.to_string())),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_schema_creation() {
let mut schema = Schema::new("User".to_string(), SchemaFormat::Native);
assert_eq!(schema.name, "User");
assert_eq!(schema.fields.len(), 0);
schema.add_field(Field::new("id".to_string(), Type::EntityId));
assert_eq!(schema.fields.len(), 1);
}
#[test]
fn test_schema_validation_duplicate_fields() {
let mut schema = Schema::new("User".to_string(), SchemaFormat::Native);
schema.add_field(Field::new("id".to_string(), Type::EntityId));
schema.add_field(Field::new("id".to_string(), Type::String));
assert!(schema.validate().is_err());
}
#[test]
fn test_type_from_str() {
assert_eq!(Type::from_str("String").unwrap(), Type::String);
assert_eq!(Type::from_str("Integer").unwrap(), Type::Integer);
assert_eq!(
Type::from_str("Vec<String>").unwrap(),
Type::Vec(Box::new(Type::String))
);
assert_eq!(
Type::from_str("Option<Integer>").unwrap(),
Type::Option(Box::new(Type::Integer))
);
}
#[test]
fn test_type_to_rust_type() {
assert_eq!(Type::String.to_rust_type(), "String");
assert_eq!(Type::Integer.to_rust_type(), "i64");
assert_eq!(
Type::Vec(Box::new(Type::String)).to_rust_type(),
"Vec<String>"
);
assert_eq!(
Type::Option(Box::new(Type::Integer)).to_rust_type(),
"Option<i64>"
);
}
#[test]
fn test_enum_type_validation() {
let empty_enum = Type::Enum(vec![]);
assert!(empty_enum.validate().is_err());
let valid_enum = Type::Enum(vec!["admin".to_string(), "user".to_string()]);
assert!(valid_enum.validate().is_ok());
}
#[test]
fn test_field_validation_conflicting_attributes() {
let mut field = Field::new("password".to_string(), Type::String);
field.attributes.push(FieldAttribute::Unique);
field.attributes.push(FieldAttribute::Private);
assert!(field.validate().is_err());
}
#[test]
fn test_embedding_config_validation() {
let valid_config =
EmbeddingConfig::new("bge-base-en-v1.5".to_string(), "content".to_string());
assert!(valid_config.validate().is_ok());
let valid_with_dim = EmbeddingConfig::with_dimension(
"bge-base-en-v1.5".to_string(),
"content".to_string(),
768,
);
assert!(valid_with_dim.validate().is_ok());
let empty_model = EmbeddingConfig::new("".to_string(), "content".to_string());
assert!(empty_model.validate().is_err());
let empty_source = EmbeddingConfig::new("bge-base-en-v1.5".to_string(), "".to_string());
assert!(empty_source.validate().is_err());
let zero_dim = EmbeddingConfig::with_dimension(
"bge-base-en-v1.5".to_string(),
"content".to_string(),
0,
);
assert!(zero_dim.validate().is_err());
}
#[test]
fn test_embedding_paradigm_parsing() {
assert_eq!(
EmbeddingParadigm::from_str("dense"),
Some(EmbeddingParadigm::Dense)
);
assert_eq!(
EmbeddingParadigm::from_str("multi-vector"),
Some(EmbeddingParadigm::MultiVector)
);
assert_eq!(
EmbeddingParadigm::from_str("colbert"),
Some(EmbeddingParadigm::MultiVector)
);
assert_eq!(
EmbeddingParadigm::from_str("sparse"),
Some(EmbeddingParadigm::Sparse)
);
assert_eq!(EmbeddingParadigm::from_str("invalid"), None);
}
#[test]
fn test_vector_type() {
assert_eq!(Type::Vector.to_rust_type(), "Vec<f32>");
assert_eq!(Type::from_str("Vector").unwrap(), Type::Vector);
}
}