use finetype_core::Taxonomy;
#[derive(Default)]
pub struct SchemaExtensions {
pub by_column: std::collections::HashMap<String, (Option<String>, Option<f64>)>,
}
impl SchemaExtensions {
pub fn extract(schema: &serde_json::Value) -> Self {
let mut ext = SchemaExtensions::default();
if let Some(props) = schema.get("properties").and_then(|p| p.as_object()) {
for (col_name, col_schema) in props {
let label = col_schema
.get("x-finetype-label")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let confidence = col_schema
.get("x-finetype-confidence")
.and_then(|v| v.as_f64());
ext.by_column.insert(col_name.clone(), (label, confidence));
}
}
ext
}
pub fn get(&self, col_name: &str) -> (Option<String>, Option<f64>) {
self.by_column
.get(col_name)
.cloned()
.unwrap_or((None, None))
}
}
pub fn format_column_name(name: &str) -> String {
format!("\"{}\"", name.replace('"', "\"\""))
}
pub fn build_transform_projection(
headers: &[String],
extensions: &SchemaExtensions,
taxonomy: &Taxonomy,
try_wrap: bool,
) -> String {
headers
.iter()
.map(|h| build_transform_projection_one(h, extensions, taxonomy, try_wrap))
.collect::<Vec<_>>()
.join(", ")
}
pub fn build_transform_projection_one(
header: &str,
extensions: &SchemaExtensions,
taxonomy: &Taxonomy,
try_wrap: bool,
) -> String {
let col_ref = format_column_name(header);
let (label_opt, _confidence) = extensions.get(header);
let label = match label_opt {
Some(s) => s,
None => return col_ref,
};
let info = match taxonomy.ddl_info(&label) {
Some(i) => i,
None => return col_ref,
};
if info.duckdb_type == "VARCHAR" {
return col_ref;
}
if let Some(tf) = info.transform.as_ref() {
let cast_expr = tf.replace("{col}", &col_ref);
if try_wrap {
return format!("TRY({}) AS {}", cast_expr, col_ref);
}
return format!("{} AS {}", cast_expr, col_ref);
}
if try_wrap {
format!(
"TRY_CAST({} AS {}) AS {}",
col_ref, info.duckdb_type, col_ref
)
} else {
format!("CAST({} AS {}) AS {}", col_ref, info.duckdb_type, col_ref)
}
}