use std::collections::{HashMap, HashSet};
use serde::{Deserialize, Serialize};
use serde_json::Value;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct SchemaConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub owner: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub version: Option<u32>,
#[serde(default = "default_auto_migrate")]
pub auto_migrate: bool,
pub conversations: TableConfig,
pub responses: TableConfig,
pub conversation_items: TableConfig,
pub conversation_item_links: TableConfig,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct TableConfig {
pub table: String,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub columns: HashMap<String, String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub extra_columns: HashMap<String, ColumnDef>,
#[serde(default, skip_serializing_if = "HashSet::is_empty")]
pub skip_columns: HashSet<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ColumnDef {
pub sql_type: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub default_value: Option<Value>,
}
fn default_auto_migrate() -> bool {
std::env::var("DB_AUTO_MIGRATE")
.ok()
.map(|v| v.eq_ignore_ascii_case("true") || v == "1")
.unwrap_or(false)
}
impl Default for SchemaConfig {
fn default() -> Self {
Self {
owner: None,
version: None,
auto_migrate: default_auto_migrate(),
conversations: TableConfig::with_table("conversations"),
responses: TableConfig::with_table("responses"),
conversation_items: TableConfig::with_table("conversation_items"),
conversation_item_links: TableConfig::with_table("conversation_item_links"),
}
}
}
impl TableConfig {
pub fn with_table(name: &str) -> Self {
Self {
table: name.to_string(),
..Default::default()
}
}
pub fn col<'a>(&'a self, field: &'a str) -> &'a str {
self.columns.get(field).map(String::as_str).unwrap_or(field)
}
pub fn is_skipped(&self, field: &str) -> bool {
self.skip_columns.contains(field)
}
pub fn qualified_table(&self, owner: Option<&str>) -> String {
match owner {
Some(o) => format!("{o}.\"{}\"", self.table),
None => self.table.clone(),
}
}
}
impl SchemaConfig {
pub fn uppercase_for_oracle(&mut self) {
for tc in [
&mut self.conversations,
&mut self.responses,
&mut self.conversation_items,
&mut self.conversation_item_links,
] {
tc.table.make_ascii_uppercase();
for val in tc.columns.values_mut() {
val.make_ascii_uppercase();
}
let keys: Vec<String> = tc.extra_columns.keys().cloned().collect();
for key in keys {
if let Some(def) = tc.extra_columns.remove(&key) {
tc.extra_columns.insert(key.to_ascii_uppercase(), def);
}
}
}
}
pub fn validate(&self) -> Result<(), String> {
if let Some(ref owner) = self.owner {
validate_identifier(owner).map_err(|e| format!("owner: {e}"))?;
}
Self::validate_table("conversations", &self.conversations)?;
Self::validate_table("responses", &self.responses)?;
Self::validate_table("conversation_items", &self.conversation_items)?;
Self::validate_table("conversation_item_links", &self.conversation_item_links)?;
Ok(())
}
fn validate_table(label: &str, tc: &TableConfig) -> Result<(), String> {
validate_identifier(&tc.table).map_err(|e| {
if tc.table.is_empty() {
format!("{label}.table: table name is required (got empty string — did you omit the 'table' key in your config?)")
} else {
format!("{label}.table: {e}")
}
})?;
for (logical, physical) in &tc.columns {
validate_identifier(logical)
.map_err(|e| format!("{label}.columns key '{logical}': {e}"))?;
validate_identifier(physical)
.map_err(|e| format!("{label}.columns value '{physical}': {e}"))?;
}
for (name, def) in &tc.extra_columns {
validate_identifier(name)
.map_err(|e| format!("{label}.extra_columns key '{name}': {e}"))?;
if def.sql_type.is_empty() {
return Err(format!(
"{label}.extra_columns['{name}']: sql_type must not be empty"
));
}
validate_sql_type(&def.sql_type)
.map_err(|e| format!("{label}.extra_columns['{name}'].sql_type: {e}"))?;
}
let core = core_columns_for(label);
for name in tc.extra_columns.keys() {
let upper = name.to_ascii_uppercase();
if core.iter().any(|c| c.to_ascii_uppercase() == upper) {
return Err(format!(
"{label}.extra_columns: '{name}' shadows a core column name"
));
}
}
let mut folded: HashSet<String> = HashSet::new();
for name in tc.extra_columns.keys() {
let upper = name.to_ascii_uppercase();
if !folded.insert(upper) {
return Err(format!(
"{label}.extra_columns: case-insensitive collision on '{name}' \
(Oracle normalizes identifiers to uppercase)"
));
}
}
for name in &tc.skip_columns {
validate_identifier(name).map_err(|e| format!("{label}.skip_columns '{name}': {e}"))?;
if primary_key_columns_for(label).contains(&name.as_str()) {
return Err(format!(
"{label}.skip_columns: cannot skip '{name}' — it is part of the primary key"
));
}
if !core.contains(&name.as_str()) {
return Err(format!(
"{label}.skip_columns: '{name}' is not a recognized column \
(known: {core:?})"
));
}
}
Ok(())
}
}
fn primary_key_columns_for(_label: &str) -> &'static [&'static str] {
&["id"]
}
fn core_columns_for(label: &str) -> &'static [&'static str] {
match label {
"conversations" => &["id", "created_at", "metadata"],
"responses" => &[
"id",
"conversation_id",
"previous_response_id",
"input",
"created_at",
"safety_identifier",
"model",
"raw_response",
],
"conversation_items" => &[
"id",
"response_id",
"item_type",
"role",
"content",
"status",
"created_at",
],
"conversation_item_links" => &["conversation_id", "item_id", "added_at"],
_ => &[],
}
}
const MAX_IDENTIFIER_LEN: usize = 128;
fn validate_identifier(name: &str) -> Result<(), String> {
if name.is_empty() {
return Err("identifier must not be empty".to_string());
}
if name.len() > MAX_IDENTIFIER_LEN {
return Err(format!(
"identifier '{name}' exceeds maximum length of {MAX_IDENTIFIER_LEN} characters"
));
}
if !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
return Err(format!(
"invalid identifier '{name}' — only ASCII alphanumeric and underscores allowed"
));
}
Ok(())
}
const MAX_SQL_TYPE_LEN: usize = 64;
fn validate_sql_type(sql_type: &str) -> Result<(), String> {
if sql_type.trim().is_empty() {
return Err("sql_type must not be whitespace-only".to_string());
}
if sql_type.len() > MAX_SQL_TYPE_LEN {
return Err(format!(
"sql_type '{sql_type}' exceeds maximum length of {MAX_SQL_TYPE_LEN} characters"
));
}
if !sql_type
.chars()
.all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | ' ' | '(' | ')' | ',' | '.'))
{
return Err(format!(
"invalid sql_type '{sql_type}' — only ASCII alphanumeric, underscore, space, \
parentheses, comma, and period are allowed"
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_config_matches_hardcoded_names() {
let cfg = SchemaConfig::default();
assert_eq!(cfg.conversations.table, "conversations");
assert_eq!(cfg.responses.table, "responses");
assert_eq!(cfg.conversation_items.table, "conversation_items");
assert_eq!(cfg.conversation_item_links.table, "conversation_item_links");
assert!(cfg.owner.is_none());
}
#[test]
fn default_config_validates_successfully() {
SchemaConfig::default()
.validate()
.expect("default config should be valid");
}
#[test]
fn col_returns_field_name_when_no_override() {
let tc = TableConfig::with_table("t");
assert_eq!(tc.col("id"), "id");
assert_eq!(tc.col("created_at"), "created_at");
}
#[test]
fn col_returns_override_when_configured() {
let mut tc = TableConfig::with_table("t");
tc.columns
.insert("id".to_string(), "CONVERSATION_ID".to_string());
assert_eq!(tc.col("id"), "CONVERSATION_ID");
assert_eq!(tc.col("created_at"), "created_at");
}
#[test]
fn qualified_table_without_owner() {
let tc = TableConfig::with_table("conversations");
assert_eq!(tc.qualified_table(None), "conversations");
}
#[test]
fn qualified_table_with_owner() {
let tc = TableConfig::with_table("CONVERSATIONS");
assert_eq!(tc.qualified_table(Some("ADMIN")), "ADMIN.\"CONVERSATIONS\"");
}
#[test]
fn uppercase_for_oracle_converts_defaults() {
let mut cfg = SchemaConfig::default();
cfg.uppercase_for_oracle();
assert_eq!(cfg.conversations.table, "CONVERSATIONS");
assert_eq!(cfg.responses.table, "RESPONSES");
assert_eq!(cfg.conversation_items.table, "CONVERSATION_ITEMS");
assert_eq!(cfg.conversation_item_links.table, "CONVERSATION_ITEM_LINKS");
cfg.validate().expect("uppercased config should be valid");
}
#[test]
fn uppercase_for_oracle_converts_custom_table_and_columns() {
let mut cfg = SchemaConfig::default();
cfg.conversations.table = "my_convos".to_string();
cfg.conversations
.columns
.insert("id".to_string(), "conv_id".to_string());
cfg.uppercase_for_oracle();
assert_eq!(cfg.conversations.table, "MY_CONVOS");
assert_eq!(cfg.conversations.col("id"), "CONV_ID");
}
#[test]
fn validate_accepts_valid_identifiers() {
let mut cfg = SchemaConfig {
owner: Some("ADMIN_01".to_string()),
..Default::default()
};
cfg.conversations.table = "MY_CONVERSATIONS".to_string();
cfg.conversations
.columns
.insert("id".to_string(), "CONV_ID".to_string());
cfg.validate().expect("should be valid");
}
#[test]
fn validate_rejects_empty_table_name_with_helpful_message() {
let mut cfg = SchemaConfig::default();
cfg.conversations.table = String::new();
let err = cfg.validate().unwrap_err();
assert!(
err.contains("conversations.table") && err.contains("table name is required"),
"unexpected: {err}"
);
}
#[test]
fn validate_rejects_overly_long_identifier() {
let mut cfg = SchemaConfig::default();
cfg.conversations.table = "a".repeat(129);
let err = cfg.validate().unwrap_err();
assert!(err.contains("exceeds maximum length"), "unexpected: {err}");
}
#[test]
fn validate_rejects_special_characters() {
let mut cfg = SchemaConfig::default();
cfg.conversations.table = "table;DROP".to_string();
let err = cfg.validate().unwrap_err();
assert!(
err.contains("conversations.table") && err.contains("invalid identifier"),
"unexpected: {err}"
);
}
#[test]
fn validate_rejects_dots_in_owner() {
let cfg = SchemaConfig {
owner: Some("ADMIN.SCHEMA".to_string()),
..Default::default()
};
let err = cfg.validate().unwrap_err();
assert!(
err.contains("owner") && err.contains("invalid identifier"),
"unexpected: {err}"
);
}
#[test]
fn validate_rejects_invalid_column_override_key() {
let mut cfg = SchemaConfig::default();
cfg.responses
.columns
.insert("bad;col".to_string(), "GOOD_COL".to_string());
let err = cfg.validate().unwrap_err();
assert!(
err.contains("columns key") && err.contains("invalid identifier"),
"unexpected: {err}"
);
}
#[test]
fn validate_rejects_invalid_column_override_value() {
let mut cfg = SchemaConfig::default();
cfg.responses
.columns
.insert("id".to_string(), "bad column!".to_string());
let err = cfg.validate().unwrap_err();
assert!(
err.contains("columns value") && err.contains("invalid identifier"),
"unexpected: {err}"
);
}
#[test]
fn serde_roundtrip_default() {
let cfg = SchemaConfig::default();
let json = serde_json::to_string(&cfg).expect("serialize");
let restored: SchemaConfig = serde_json::from_str(&json).expect("deserialize");
assert_eq!(cfg, restored);
}
#[test]
fn serde_roundtrip_custom() {
let mut cfg = SchemaConfig {
owner: Some("ADMIN".to_string()),
..Default::default()
};
cfg.conversations.table = "CONVERSATIONS".to_string();
cfg.conversations
.columns
.insert("id".to_string(), "CONVERSATION_ID".to_string());
let json = serde_json::to_string(&cfg).expect("serialize");
let restored: SchemaConfig = serde_json::from_str(&json).expect("deserialize");
assert_eq!(cfg, restored);
}
#[test]
fn serde_deserialize_empty_object_uses_defaults() {
let cfg: SchemaConfig = serde_json::from_str("{}").expect("deserialize empty");
assert_eq!(cfg, SchemaConfig::default());
}
#[test]
fn serde_roundtrip_with_version_and_auto_migrate() {
let cfg = SchemaConfig {
version: Some(3),
auto_migrate: false,
..Default::default()
};
let json = serde_json::to_string(&cfg).expect("serialize");
let restored: SchemaConfig = serde_json::from_str(&json).expect("deserialize");
assert_eq!(restored.version, Some(3));
assert!(!restored.auto_migrate);
}
#[test]
fn serde_defaults_version_none_and_auto_migrate_false() {
let cfg: SchemaConfig = serde_json::from_str("{}").expect("deserialize empty");
assert_eq!(cfg.version, None);
assert!(!cfg.auto_migrate);
}
#[test]
fn serde_version_none_is_omitted_from_json() {
let cfg = SchemaConfig::default();
let json = serde_json::to_string(&cfg).expect("serialize");
assert!(
!json.contains("version"),
"version:None should be skipped: {json}"
);
}
#[test]
fn is_skipped_returns_false_by_default() {
let tc = TableConfig::with_table("t");
assert!(!tc.is_skipped("id"));
assert!(!tc.is_skipped("safety_identifier"));
}
#[test]
fn is_skipped_returns_true_for_configured_fields() {
let mut tc = TableConfig::with_table("t");
tc.skip_columns.insert("safety_identifier".to_string());
tc.skip_columns.insert("raw_response".to_string());
assert!(tc.is_skipped("safety_identifier"));
assert!(tc.is_skipped("raw_response"));
assert!(!tc.is_skipped("id"));
}
#[test]
fn extra_columns_default_is_empty() {
let tc = TableConfig::with_table("t");
assert!(tc.extra_columns.is_empty());
}
#[test]
fn validate_accepts_valid_extra_columns() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"EXPIRES_AT".to_string(),
ColumnDef {
sql_type: "TIMESTAMP".to_string(),
default_value: None,
},
);
cfg.validate().expect("should be valid");
}
#[test]
fn validate_rejects_invalid_extra_column_name() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"bad;col".to_string(),
ColumnDef {
sql_type: "TEXT".to_string(),
default_value: None,
},
);
let err = cfg.validate().unwrap_err();
assert!(
err.contains("extra_columns key") && err.contains("invalid identifier"),
"unexpected: {err}"
);
}
#[test]
fn validate_rejects_empty_sql_type() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"MY_COL".to_string(),
ColumnDef {
sql_type: String::new(),
default_value: None,
},
);
let err = cfg.validate().unwrap_err();
assert!(
err.contains("sql_type must not be empty"),
"unexpected: {err}"
);
}
#[test]
fn validate_rejects_invalid_skip_column_name() {
let mut cfg = SchemaConfig::default();
cfg.responses.skip_columns.insert("bad;col".to_string());
let err = cfg.validate().unwrap_err();
assert!(
err.contains("skip_columns") && err.contains("invalid identifier"),
"unexpected: {err}"
);
}
#[test]
fn validate_accepts_valid_skip_columns() {
let mut cfg = SchemaConfig::default();
cfg.responses
.skip_columns
.insert("safety_identifier".to_string());
cfg.validate().expect("should be valid");
}
#[test]
fn validate_rejects_skip_id() {
let mut cfg = SchemaConfig::default();
cfg.responses.skip_columns.insert("id".to_string());
let err = cfg.validate().unwrap_err();
assert!(
err.contains("cannot skip 'id'") && err.contains("primary key"),
"unexpected: {err}"
);
}
#[test]
fn validate_accepts_valid_sql_types() {
let mut cfg = SchemaConfig::default();
for sql_type in [
"VARCHAR(128)",
"TEXT",
"TIMESTAMP",
"TIMESTAMP WITH TIME ZONE",
"NUMBER(10,2)",
"VARCHAR2(256)",
"BIGINT",
] {
cfg.conversations.extra_columns.insert(
"TEST_COL".to_string(),
ColumnDef {
sql_type: sql_type.to_string(),
default_value: None,
},
);
cfg.validate()
.unwrap_or_else(|e| panic!("sql_type '{sql_type}' should be valid: {e}"));
}
}
#[test]
fn validate_rejects_sql_injection_in_sql_type() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"MY_COL".to_string(),
ColumnDef {
sql_type: "TEXT); DROP TABLE responses; --".to_string(),
default_value: None,
},
);
let err = cfg.validate().unwrap_err();
assert!(
err.contains("sql_type") && err.contains("invalid"),
"unexpected: {err}"
);
}
#[test]
fn validate_rejects_overly_long_sql_type() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"MY_COL".to_string(),
ColumnDef {
sql_type: "A".repeat(65),
default_value: None,
},
);
let err = cfg.validate().unwrap_err();
assert!(
err.contains("sql_type") && err.contains("exceeds maximum length"),
"unexpected: {err}"
);
}
#[test]
fn uppercase_for_oracle_converts_extra_column_keys() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"expires_at".to_string(),
ColumnDef {
sql_type: "TIMESTAMP".to_string(),
default_value: None,
},
);
cfg.uppercase_for_oracle();
assert!(cfg.conversations.extra_columns.contains_key("EXPIRES_AT"));
assert!(!cfg.conversations.extra_columns.contains_key("expires_at"));
}
#[test]
fn uppercase_for_oracle_preserves_skip_column_names_lowercase() {
let mut cfg = SchemaConfig::default();
cfg.responses
.skip_columns
.insert("safety_identifier".to_string());
cfg.uppercase_for_oracle();
assert!(cfg.responses.skip_columns.contains("safety_identifier"));
assert!(!cfg.responses.skip_columns.contains("SAFETY_IDENTIFIER"));
}
#[test]
fn serde_roundtrip_with_extra_and_skip() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"EXPIRES_AT".to_string(),
ColumnDef {
sql_type: "TIMESTAMP".to_string(),
default_value: Some(Value::String("2099-01-01".to_string())),
},
);
cfg.responses
.skip_columns
.insert("safety_identifier".to_string());
let json = serde_json::to_string(&cfg).expect("serialize");
let restored: SchemaConfig = serde_json::from_str(&json).expect("deserialize");
assert_eq!(cfg, restored);
}
#[test]
fn validate_rejects_extra_column_shadowing_core_column() {
let mut cfg = SchemaConfig::default();
cfg.responses.extra_columns.insert(
"CREATED_AT".to_string(),
ColumnDef {
sql_type: "TIMESTAMP".to_string(),
default_value: None,
},
);
let err = cfg.validate().unwrap_err();
assert!(err.contains("shadows a core column"), "unexpected: {err}");
let mut cfg2 = SchemaConfig::default();
cfg2.conversations.extra_columns.insert(
"metadata".to_string(),
ColumnDef {
sql_type: "TEXT".to_string(),
default_value: None,
},
);
let err2 = cfg2.validate().unwrap_err();
assert!(err2.contains("shadows a core column"), "unexpected: {err2}");
}
#[test]
fn validate_rejects_unknown_skip_column() {
let mut cfg = SchemaConfig::default();
cfg.responses
.skip_columns
.insert("safty_identifier".to_string()); let err = cfg.validate().unwrap_err();
assert!(err.contains("not a recognized column"), "unexpected: {err}");
}
#[test]
fn validate_rejects_case_colliding_extra_columns() {
let mut cfg = SchemaConfig::default();
cfg.conversations.extra_columns.insert(
"tenant_id".to_string(),
ColumnDef {
sql_type: "TEXT".to_string(),
default_value: None,
},
);
cfg.conversations.extra_columns.insert(
"TENANT_ID".to_string(),
ColumnDef {
sql_type: "TEXT".to_string(),
default_value: None,
},
);
let err = cfg.validate().unwrap_err();
assert!(
err.contains("case-insensitive collision"),
"unexpected: {err}"
);
}
}