use clap::{Parser, Subcommand};
use laminate::schema::{ExternalConstraint, InferenceConfig, InferredSchema, JsonType};
use serde_json::Value;
use std::io::Read;
#[derive(Parser)]
#[command(name = "laminate", version, about)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
Infer {
#[arg(short, long)]
source: String,
#[arg(long, default_value = "1.0")]
required_threshold: f64,
#[arg(short, long, default_value = "table")]
format: String,
},
Audit {
#[arg(short, long)]
source: String,
#[arg(long)]
schema: Option<String>,
#[arg(long, default_value = "1.0")]
required_threshold: f64,
#[arg(short, long)]
verbose: bool,
},
Inspect {
#[arg(short, long)]
source: String,
#[arg(short, long)]
path: Option<String>,
#[arg(short = 't', long)]
as_type: Option<String>,
},
}
fn main() {
let cli = Cli::parse();
match cli.command {
Commands::Infer {
source,
required_threshold,
format,
} => cmd_infer(&source, required_threshold, &format),
Commands::Audit {
source,
schema,
required_threshold,
verbose,
} => cmd_audit(&source, schema.as_deref(), required_threshold, verbose),
Commands::Inspect {
source,
path,
as_type,
} => cmd_inspect(&source, path.as_deref(), as_type.as_deref()),
}
}
fn read_source(source: &str) -> String {
if source == "-" {
let mut buf = String::new();
std::io::stdin()
.read_to_string(&mut buf)
.expect("failed to read stdin");
buf
} else {
std::fs::read_to_string(source).unwrap_or_else(|e| {
eprintln!("Error reading {source}: {e}");
std::process::exit(1);
})
}
}
fn parse_rows(json: &str) -> Vec<Value> {
let value: Value = serde_json::from_str(json).unwrap_or_else(|e| {
eprintln!("Error parsing JSON: {e}");
std::process::exit(1);
});
match value {
Value::Array(arr) => arr,
Value::Object(_) => vec![value], _ => {
eprintln!("Expected a JSON array or object");
std::process::exit(1);
}
}
}
fn cmd_infer(source: &str, required_threshold: f64, format: &str) {
let json = read_source(source);
let rows = parse_rows(&json);
let config = InferenceConfig {
required_threshold,
..Default::default()
};
let schema = InferredSchema::from_values_with_config(&rows, &config);
match format {
"json" => {
let mut fields = serde_json::Map::new();
for name in &schema.field_order {
if let Some(defn) = schema.fields.get(name) {
let mut field_obj = serde_json::Map::new();
field_obj.insert(
"type".into(),
Value::String(
defn.dominant_type
.map(|t| t.to_string())
.unwrap_or_else(|| "null".into()),
),
);
field_obj.insert("fill_rate".into(), serde_json::json!(defn.fill_rate()));
field_obj.insert("null_count".into(), serde_json::json!(defn.null_count));
field_obj.insert(
"present_count".into(),
serde_json::json!(defn.present_count),
);
field_obj.insert("absent_count".into(), serde_json::json!(defn.absent_count));
field_obj.insert("mixed_type".into(), serde_json::json!(defn.is_mixed_type()));
field_obj.insert(
"consistency".into(),
serde_json::json!(defn.type_consistency()),
);
if !defn.sample_values.is_empty() {
field_obj.insert(
"sample_values".into(),
serde_json::json!(defn.sample_values),
);
}
fields.insert(name.clone(), Value::Object(field_obj));
}
}
let output = serde_json::json!({
"total_records": schema.total_records,
"fields": fields,
});
println!("{}", serde_json::to_string_pretty(&output).unwrap());
}
_ => {
println!("Schema inferred from {} records:\n", schema.total_records);
println!("{}", schema.summary());
}
}
}
fn cmd_audit(source: &str, schema_path: Option<&str>, required_threshold: f64, verbose: bool) {
let json = read_source(source);
let rows = parse_rows(&json);
let config = InferenceConfig {
required_threshold,
..Default::default()
};
let schema = if let Some(path) = schema_path {
let schema_json = std::fs::read_to_string(path).unwrap_or_else(|e| {
eprintln!("Error reading schema {path}: {e}");
std::process::exit(1);
});
let schema_val: Value = serde_json::from_str(&schema_json).unwrap_or_else(|e| {
eprintln!("Error parsing schema JSON: {e}");
std::process::exit(1);
});
let mut constraints = std::collections::HashMap::new();
if let Some(fields) = schema_val.get("fields").and_then(|f| f.as_object()) {
for (name, defn) in fields {
let expected_type = defn
.get("type")
.and_then(|t| t.as_str())
.and_then(parse_json_type);
let required = defn
.get("required")
.and_then(|r| r.as_bool())
.unwrap_or(false);
let nullable = defn
.get("nullable")
.and_then(|n| n.as_bool())
.unwrap_or(!required);
let max_length = defn
.get("max_length")
.and_then(|m| m.as_u64())
.map(|m| m as usize);
constraints.insert(
name.clone(),
ExternalConstraint {
expected_type,
required,
nullable,
max_length,
..Default::default()
},
);
}
}
InferredSchema::from_values_with_config(&rows, &config).with_constraints(constraints)
} else {
InferredSchema::from_values_with_config(&rows, &config)
};
let report = schema.audit(&rows);
println!("{}", report.summary());
if verbose {
println!();
for v in &report.violations {
println!(" {v}");
}
}
}
fn cmd_inspect(source: &str, path: Option<&str>, as_type: Option<&str>) {
let json = read_source(source);
let fv = laminate::FlexValue::from_json(&json).unwrap_or_else(|e| {
eprintln!("Error parsing JSON: {e}");
std::process::exit(1);
});
let target = if let Some(p) = path {
fv.at(p).unwrap_or_else(|e| {
eprintln!("Path error: {e}");
std::process::exit(1);
})
} else {
fv
};
match as_type {
Some("string") => {
let val: String = target.extract_root().unwrap_or_else(|e| {
eprintln!("Extract error: {e}");
std::process::exit(1);
});
println!("{val}");
}
Some("number" | "int" | "i64") => {
let val: i64 = target.extract_root().unwrap_or_else(|e| {
eprintln!("Extract error: {e}");
std::process::exit(1);
});
println!("{val}");
}
Some("float" | "f64") => {
let val: f64 = target.extract_root().unwrap_or_else(|e| {
eprintln!("Extract error: {e}");
std::process::exit(1);
});
println!("{val}");
}
Some("bool") => {
let val: bool = target.extract_root().unwrap_or_else(|e| {
eprintln!("Extract error: {e}");
std::process::exit(1);
});
println!("{val}");
}
_ => {
println!("{target}");
}
}
}
fn parse_json_type(s: &str) -> Option<JsonType> {
match s.to_lowercase().as_str() {
"string" | "text" | "varchar" => Some(JsonType::String),
"integer" | "int" | "bigint" | "smallint" => Some(JsonType::Integer),
"float" | "double" | "real" | "numeric" | "decimal" => Some(JsonType::Float),
"boolean" | "bool" => Some(JsonType::Bool),
"array" => Some(JsonType::Array),
"object" | "json" | "jsonb" => Some(JsonType::Object),
"null" => Some(JsonType::Null),
_ => None,
}
}