use std::collections::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SchemaFingerprint {
pub tables: HashMap<String, TableSchema>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub relationships: Vec<TableRelationship>,
}
impl SchemaFingerprint {
pub fn new() -> Self {
Self {
tables: HashMap::new(),
relationships: Vec::new(),
}
}
pub fn add_table(&mut self, name: impl Into<String>, schema: TableSchema) {
self.tables.insert(name.into(), schema);
}
pub fn total_columns(&self) -> usize {
self.tables.values().map(|t| t.columns.len()).sum()
}
pub fn get_table(&self, name: &str) -> Option<&TableSchema> {
self.tables.get(name)
}
pub fn is_empty(&self) -> bool {
self.tables.is_empty()
}
}
impl Default for SchemaFingerprint {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TableSchema {
pub name: String,
pub row_count: u64,
pub columns: Vec<FieldSchema>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub primary_key: Vec<String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub metadata: HashMap<String, String>,
}
impl TableSchema {
pub fn new(name: impl Into<String>, row_count: u64) -> Self {
Self {
name: name.into(),
row_count,
columns: Vec::new(),
primary_key: Vec::new(),
metadata: HashMap::new(),
}
}
pub fn add_column(&mut self, column: FieldSchema) {
self.columns.push(column);
}
pub fn get_column(&self, name: &str) -> Option<&FieldSchema> {
self.columns.iter().find(|c| c.name == name)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldSchema {
pub name: String,
pub data_type: DataType,
pub nullable: bool,
pub null_rate: f64,
pub cardinality: u64,
#[serde(default)]
pub is_primary_key: bool,
#[serde(default)]
pub is_foreign_key: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub foreign_key_ref: Option<ForeignKeyRef>,
#[serde(skip_serializing_if = "Option::is_none")]
pub semantic_type: Option<String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub metadata: HashMap<String, String>,
}
impl FieldSchema {
pub fn new(name: impl Into<String>, data_type: DataType) -> Self {
Self {
name: name.into(),
data_type,
nullable: false,
null_rate: 0.0,
cardinality: 0,
is_primary_key: false,
is_foreign_key: false,
foreign_key_ref: None,
semantic_type: None,
metadata: HashMap::new(),
}
}
pub fn with_nullable(mut self, null_rate: f64) -> Self {
self.nullable = null_rate > 0.0;
self.null_rate = null_rate;
self
}
pub fn with_cardinality(mut self, cardinality: u64) -> Self {
self.cardinality = cardinality;
self
}
pub fn as_primary_key(mut self) -> Self {
self.is_primary_key = true;
self
}
pub fn as_foreign_key(mut self, reference: ForeignKeyRef) -> Self {
self.is_foreign_key = true;
self.foreign_key_ref = Some(reference);
self
}
pub fn with_semantic_type(mut self, semantic_type: impl Into<String>) -> Self {
self.semantic_type = Some(semantic_type.into());
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DataType {
Boolean,
Int32,
Int64,
Float32,
Float64,
Decimal,
String,
Date,
Timestamp,
Time,
Uuid,
Binary,
Json,
Unknown,
}
impl DataType {
pub fn is_numeric(&self) -> bool {
matches!(
self,
Self::Int32 | Self::Int64 | Self::Float32 | Self::Float64 | Self::Decimal
)
}
pub fn is_temporal(&self) -> bool {
matches!(self, Self::Date | Self::Timestamp | Self::Time)
}
pub fn is_string(&self) -> bool {
matches!(self, Self::String | Self::Uuid)
}
pub fn is_categorical(&self) -> bool {
matches!(self, Self::Boolean | Self::String)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForeignKeyRef {
pub table: String,
pub column: String,
}
impl ForeignKeyRef {
pub fn new(table: impl Into<String>, column: impl Into<String>) -> Self {
Self {
table: table.into(),
column: column.into(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TableRelationship {
pub from_table: String,
pub from_column: String,
pub to_table: String,
pub to_column: String,
pub cardinality: RelationshipCardinality,
pub confidence: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RelationshipCardinality {
OneToOne,
OneToMany,
ManyToOne,
ManyToMany,
}