use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SqlDialect {
Mysql,
Postgresql,
Mssql,
Oracle,
Sqlite,
}
impl SqlDialect {
pub fn from_str_loose(s: &str) -> Option<Self> {
match s.to_ascii_lowercase().as_str() {
"mysql" | "mariadb" => Some(Self::Mysql),
"postgresql" | "postgres" | "pg" => Some(Self::Postgresql),
"mssql" | "sqlserver" | "sql-server" => Some(Self::Mssql),
"oracle" => Some(Self::Oracle),
"sqlite" => Some(Self::Sqlite),
_ => None,
}
}
}
impl std::fmt::Display for SqlDialect {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Mysql => write!(f, "MySQL"),
Self::Postgresql => write!(f, "PostgreSQL"),
Self::Mssql => write!(f, "SQL Server"),
Self::Oracle => write!(f, "Oracle"),
Self::Sqlite => write!(f, "SQLite"),
}
}
}
#[derive(Debug, Clone)]
pub struct Config {
pub input_path: PathBuf,
pub out_dir: PathBuf,
pub force: bool,
pub dialect: Option<SqlDialect>,
pub schema_only: bool,
pub data_only: bool,
pub tables: Option<HashSet<String>>,
pub workers: usize,
pub shard_rows: Option<usize>,
pub postgres_ddl: bool,
pub dry_run: bool,
pub null_marker: String,
pub tsv: bool,
pub delimiter: u8,
}
impl Default for Config {
fn default() -> Self {
Self {
input_path: PathBuf::new(),
out_dir: PathBuf::from("out"),
force: false,
dialect: None,
schema_only: false,
data_only: false,
tables: None,
workers: num_workers(),
shard_rows: None,
postgres_ddl: true,
dry_run: false,
null_marker: "\\N".to_string(),
tsv: false,
delimiter: b',',
}
}
}
impl Config {
pub fn effective_delimiter(&self) -> u8 {
if self.tsv { b'\t' } else { self.delimiter }
}
pub fn data_extension(&self) -> &str {
if self.tsv { "tsv" } else { "csv" }
}
}
pub fn num_workers() -> usize {
std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StatementKind {
CreateTable,
DropTable,
InsertInto,
CopyData,
SetVariable,
LockTable,
UnlockTables,
Comment,
GoBatch,
Other,
}
#[derive(Debug, Clone)]
pub struct StatementEntry {
pub kind: StatementKind,
pub table_name: Option<String>,
pub byte_offset: u64,
pub byte_length: u64,
pub approx_line: u64,
}
pub struct StatementIndex {
pub entries: Vec<StatementEntry>,
pub tables_seen: HashSet<String>,
}
#[derive(Debug, Clone)]
pub struct TableDef {
pub name: String,
pub columns: Vec<ColumnDef>,
pub primary_key: Option<Vec<String>>,
pub indexes: Vec<IndexDef>,
}
#[derive(Debug, Clone)]
pub struct ColumnDef {
pub name: String,
pub pg_type: String,
pub nullable: bool,
pub default: Option<String>,
pub is_identity: bool,
}
#[derive(Debug, Clone)]
pub struct IndexDef {
pub name: String,
pub columns: Vec<String>,
pub unique: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum SqlValue {
Null,
Integer(i64),
Float(f64),
String(String),
HexString(Vec<u8>),
BitLiteral(u64),
}
#[derive(Debug, Serialize)]
pub struct Summary {
pub tables_processed: usize,
pub total_rows: u64,
pub per_table: HashMap<String, TableSummary>,
pub errors: u64,
pub warnings: u64,
}
#[derive(Debug, Serialize)]
pub struct TableSummary {
pub row_count: u64,
pub csv_path: PathBuf,
}