use std::collections::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IntegrityFingerprint {
pub foreign_keys: Vec<ForeignKeyDef>,
pub cardinality_stats: HashMap<String, CardinalityStats>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub unique_constraints: Vec<UniqueConstraint>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub check_constraints: Vec<CheckConstraint>,
}
impl IntegrityFingerprint {
pub fn new() -> Self {
Self {
foreign_keys: Vec::new(),
cardinality_stats: HashMap::new(),
unique_constraints: Vec::new(),
check_constraints: Vec::new(),
}
}
pub fn add_foreign_key(&mut self, fk: ForeignKeyDef) {
self.foreign_keys.push(fk);
}
pub fn add_cardinality(&mut self, key: impl Into<String>, stats: CardinalityStats) {
self.cardinality_stats.insert(key.into(), stats);
}
}
impl Default for IntegrityFingerprint {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForeignKeyDef {
pub name: String,
pub from_table: String,
pub from_columns: Vec<String>,
pub to_table: String,
pub to_columns: Vec<String>,
pub inferred: bool,
pub confidence: f64,
pub coverage: f64,
pub has_orphans: bool,
pub orphan_rate: f64,
}
impl ForeignKeyDef {
pub fn new(
name: impl Into<String>,
from_table: impl Into<String>,
from_columns: Vec<String>,
to_table: impl Into<String>,
to_columns: Vec<String>,
) -> Self {
Self {
name: name.into(),
from_table: from_table.into(),
from_columns,
to_table: to_table.into(),
to_columns,
inferred: false,
confidence: 1.0,
coverage: 1.0,
has_orphans: false,
orphan_rate: 0.0,
}
}
pub fn as_inferred(mut self, confidence: f64) -> Self {
self.inferred = true;
self.confidence = confidence;
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CardinalityStats {
pub min_children: u64,
pub max_children: u64,
pub mean_children: f64,
pub median_children: f64,
pub std_dev_children: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub child_count_distribution: Option<Vec<CardinalityBucket>>,
pub one_to_one_rate: f64,
}
impl CardinalityStats {
pub fn new(min: u64, max: u64, mean: f64, median: f64) -> Self {
Self {
min_children: min,
max_children: max,
mean_children: mean,
median_children: median,
std_dev_children: 0.0,
child_count_distribution: None,
one_to_one_rate: 0.0,
}
}
pub fn infer_relationship_type(&self) -> RelationshipType {
if self.max_children == 1 {
RelationshipType::OneToOne
} else if self.min_children == 0 && self.one_to_one_rate > 0.8 {
RelationshipType::ZeroOrOne
} else {
RelationshipType::OneToMany
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CardinalityBucket {
pub lower: u64,
pub upper: Option<u64>,
pub proportion: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RelationshipType {
OneToOne,
ZeroOrOne,
OneToMany,
ZeroToMany,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UniqueConstraint {
pub table: String,
pub columns: Vec<String>,
pub is_satisfied: bool,
pub duplicate_groups: u64,
}
impl UniqueConstraint {
pub fn new(table: impl Into<String>, columns: Vec<String>) -> Self {
Self {
table: table.into(),
columns,
is_satisfied: true,
duplicate_groups: 0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CheckConstraint {
pub table: String,
pub name: String,
pub expression: String,
pub columns: Vec<String>,
pub satisfaction_rate: f64,
}
impl CheckConstraint {
pub fn new(
table: impl Into<String>,
name: impl Into<String>,
expression: impl Into<String>,
) -> Self {
Self {
table: table.into(),
name: name.into(),
expression: expression.into(),
columns: Vec::new(),
satisfaction_rate: 1.0,
}
}
}