use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use crate::types::{value::FieldValue, MeruError, Result};
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum ColumnType {
Boolean,
Int32,
Int64,
Float,
Double,
ByteArray,
FixedLenByteArray(i32),
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct ColumnDef {
pub name: String,
pub col_type: ColumnType,
pub nullable: bool,
#[serde(default)]
pub field_id: Option<u32>,
#[serde(default)]
pub initial_default: Option<FieldValue>,
#[serde(default)]
pub write_default: Option<FieldValue>,
}
impl Default for ColumnDef {
fn default() -> Self {
Self {
name: String::new(),
col_type: ColumnType::Int64,
nullable: false,
field_id: None,
initial_default: None,
write_default: None,
}
}
}
impl ColumnDef {
pub fn builder(name: impl Into<String>, col_type: ColumnType) -> ColumnDefBuilder {
ColumnDefBuilder {
name: name.into(),
col_type,
nullable: false,
field_id: None,
initial_default: None,
write_default: None,
}
}
}
pub struct ColumnDefBuilder {
name: String,
col_type: ColumnType,
nullable: bool,
field_id: Option<u32>,
initial_default: Option<FieldValue>,
write_default: Option<FieldValue>,
}
impl ColumnDefBuilder {
pub fn nullable(mut self, v: bool) -> Self {
self.nullable = v;
self
}
pub fn field_id(mut self, id: u32) -> Self {
self.field_id = Some(id);
self
}
pub fn initial_default(mut self, v: FieldValue) -> Self {
self.initial_default = Some(v);
self
}
pub fn write_default(mut self, v: FieldValue) -> Self {
self.write_default = Some(v);
self
}
pub fn build(self) -> ColumnDef {
ColumnDef {
name: self.name,
col_type: self.col_type,
nullable: self.nullable,
field_id: self.field_id,
initial_default: self.initial_default,
write_default: self.write_default,
}
}
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct TableSchema {
pub table_name: String,
pub columns: Vec<ColumnDef>,
pub primary_key: Vec<usize>,
#[serde(default)]
pub schema_id: u32,
#[serde(default)]
pub last_column_id: u32,
}
impl TableSchema {
pub fn builder(table_name: impl Into<String>) -> TableSchemaBuilder {
TableSchemaBuilder {
table_name: table_name.into(),
columns: Vec::new(),
primary_key: Vec::new(),
schema_id: 0,
last_column_id: 0,
}
}
pub fn validate(&mut self) -> Result<()> {
self.normalize_field_ids();
self.validate_readonly()
}
pub fn validate_readonly(&self) -> Result<()> {
if self.columns.is_empty() {
return Err(MeruError::InvalidArgument(
"schema must have at least one column".into(),
));
}
if self.primary_key.is_empty() {
return Err(MeruError::InvalidArgument(
"primary key must have at least one column".into(),
));
}
let mut seen = HashSet::new();
for &idx in &self.primary_key {
if idx >= self.columns.len() {
return Err(MeruError::InvalidArgument(format!(
"primary_key index {idx} out of bounds (columns len={})",
self.columns.len()
)));
}
if !seen.insert(idx) {
return Err(MeruError::InvalidArgument(format!(
"duplicate primary_key column index {idx}"
)));
}
if self.columns[idx].nullable {
return Err(MeruError::InvalidArgument(format!(
"primary key column '{}' must be non-nullable",
self.columns[idx].name
)));
}
}
let mut field_ids_seen: HashSet<u32> = HashSet::new();
for col in &self.columns {
if let Some(id) = col.field_id {
if !field_ids_seen.insert(id) {
return Err(MeruError::InvalidArgument(format!(
"duplicate field_id {id} on column '{}'",
col.name
)));
}
if id > self.last_column_id {
return Err(MeruError::InvalidArgument(format!(
"field_id {id} on column '{}' exceeds last_column_id {}",
col.name, self.last_column_id
)));
}
}
}
Ok(())
}
pub fn normalize_field_ids(&mut self) {
if self.columns.iter().all(|c| c.field_id.is_some()) {
return;
}
let mut next_id = self.last_column_id.saturating_add(1);
for col in &mut self.columns {
if col.field_id.is_none() {
col.field_id = Some(next_id);
next_id = next_id.saturating_add(1);
}
}
self.last_column_id = next_id.saturating_sub(1);
}
pub fn column_by_name(&self, name: &str) -> Option<(usize, &ColumnDef)> {
self.columns
.iter()
.enumerate()
.find(|(_, c)| c.name == name)
}
pub fn pk_len(&self) -> usize {
self.primary_key.len()
}
}
pub struct TableSchemaBuilder {
table_name: String,
columns: Vec<ColumnDef>,
primary_key: Vec<usize>,
schema_id: u32,
last_column_id: u32,
}
impl TableSchemaBuilder {
pub fn add_column(mut self, col: ColumnDef) -> Self {
self.columns.push(col);
self
}
pub fn primary_key(mut self, pk: Vec<usize>) -> Self {
self.primary_key = pk;
self
}
pub fn schema_id(mut self, id: u32) -> Self {
self.schema_id = id;
self
}
pub fn last_column_id(mut self, id: u32) -> Self {
self.last_column_id = id;
self
}
pub fn build(self) -> TableSchema {
TableSchema {
table_name: self.table_name,
columns: self.columns,
primary_key: self.primary_key,
schema_id: self.schema_id,
last_column_id: self.last_column_id,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_schema(pk_nullable: bool) -> TableSchema {
TableSchema {
table_name: "t".into(),
columns: vec![
ColumnDef {
name: "id".into(),
col_type: ColumnType::Int64,
nullable: pk_nullable,
..Default::default()
},
ColumnDef {
name: "val".into(),
col_type: ColumnType::ByteArray,
nullable: true,
..Default::default()
},
],
primary_key: vec![0],
..Default::default()
}
}
#[test]
fn valid_schema_passes() {
make_schema(false).validate().unwrap();
}
#[test]
fn nullable_pk_rejected() {
assert!(make_schema(true).validate().is_err());
}
#[test]
fn empty_pk_rejected() {
let mut s = make_schema(false);
s.primary_key.clear();
assert!(s.validate().is_err());
}
#[test]
fn out_of_bounds_pk_rejected() {
let mut s = make_schema(false);
s.primary_key = vec![99];
assert!(s.validate().is_err());
}
#[test]
fn duplicate_pk_col_rejected() {
let mut s = make_schema(false);
s.primary_key = vec![0, 0];
assert!(s.validate().is_err());
}
#[test]
fn validate_normalizes_field_ids() {
let mut s = make_schema(false);
assert!(s.columns.iter().all(|c| c.field_id.is_none()));
s.validate().unwrap();
assert_eq!(s.columns[0].field_id, Some(1));
assert_eq!(s.columns[1].field_id, Some(2));
assert_eq!(s.last_column_id, 2);
}
#[test]
fn validate_is_idempotent() {
let mut s = make_schema(false);
s.validate().unwrap();
let snapshot = s.clone();
s.validate().unwrap();
assert_eq!(s, snapshot);
}
#[test]
fn builder_round_trips() {
let schema = TableSchema::builder("events")
.add_column(
ColumnDef::builder("id", ColumnType::Int64)
.nullable(false)
.build(),
)
.add_column(
ColumnDef::builder("payload", ColumnType::ByteArray)
.nullable(true)
.build(),
)
.primary_key(vec![0])
.build();
let mut s = schema;
s.validate().unwrap();
assert_eq!(s.columns.len(), 2);
assert_eq!(s.columns[0].field_id, Some(1));
}
#[test]
fn duplicate_field_ids_rejected() {
let mut s = make_schema(false);
s.columns[0].field_id = Some(1);
s.columns[1].field_id = Some(1);
s.last_column_id = 1;
assert!(s.validate_readonly().is_err());
}
#[test]
fn serde_legacy_json_defaults_new_fields() {
let legacy = r#"{
"table_name": "t",
"columns": [
{"name": "id", "col_type": "Int64", "nullable": false},
{"name": "val", "col_type": "ByteArray", "nullable": true}
],
"primary_key": [0]
}"#;
let s: TableSchema = serde_json::from_str(legacy).unwrap();
assert_eq!(s.columns[0].field_id, None);
assert_eq!(s.columns[1].field_id, None);
assert_eq!(s.schema_id, 0);
assert_eq!(s.last_column_id, 0);
}
}