use std::collections::HashMap;
use crate::{format, interpret, ChronoError, RenderZone};
pub struct Conversion {
pub column: String,
pub format: String,
}
pub struct EnrichOptions {
pub conversions: Vec<Conversion>,
pub auto: bool,
pub replace: bool,
pub zone: RenderZone,
}
const AUTO_MIN_MAGNITUDE: i64 = 100_000_000;
fn csv_err(e: &csv::Error) -> ChronoError {
ChronoError::Render(format!("csv: {e}"))
}
pub fn enrich(csv_text: &str, opts: &EnrichOptions) -> Result<String, ChronoError> {
let mut rdr = csv::ReaderBuilder::new()
.has_headers(true)
.flexible(true)
.from_reader(csv_text.as_bytes());
let headers: Vec<String> = rdr
.headers()
.map_err(|e| csv_err(&e))?
.iter()
.map(String::from)
.collect();
let mut records: Vec<csv::StringRecord> = Vec::new();
for rec in rdr.records() {
records.push(rec.map_err(|e| csv_err(&e))?);
}
let mut plan: HashMap<usize, String> = HashMap::new();
for c in &opts.conversions {
let idx = headers
.iter()
.position(|h| h == &c.column)
.ok_or_else(|| ChronoError::UnknownFormat(format!("column not found: {}", c.column)))?;
format(&c.format)?; plan.insert(idx, c.format.clone());
}
if opts.auto {
for idx in 0..headers.len() {
if plan.contains_key(&idx) {
continue;
}
if let Some(fmt) = detect_column_format(&records, idx) {
plan.insert(idx, fmt);
}
}
}
let mut wtr = csv::WriterBuilder::new()
.terminator(csv::Terminator::Any(b'\n'))
.from_writer(Vec::new());
let mut out_header: Vec<String> = Vec::new();
for (idx, h) in headers.iter().enumerate() {
match plan.get(&idx) {
Some(fmt) if !opts.replace => {
out_header.push(h.clone());
out_header.push(format!("{h}_{fmt}"));
}
_ => out_header.push(h.clone()),
}
}
wtr.write_record(&out_header).map_err(|e| csv_err(&e))?;
for rec in &records {
let mut row: Vec<String> = Vec::new();
for (idx, _h) in headers.iter().enumerate() {
let cell = rec.get(idx).unwrap_or("");
match plan.get(&idx) {
Some(fmt) => {
let rendered = render_cell(cell, fmt, &opts.zone);
if opts.replace {
row.push(rendered.unwrap_or_else(|| cell.to_string()));
} else {
row.push(cell.to_string());
row.push(rendered.unwrap_or_default());
}
}
None => row.push(cell.to_string()),
}
}
wtr.write_record(&row).map_err(|e| csv_err(&e))?;
}
let bytes = wtr
.into_inner()
.map_err(|e| ChronoError::Render(format!("csv: {e}")))?;
String::from_utf8(bytes).map_err(|e| ChronoError::Render(e.to_string()))
}
fn render_cell(cell: &str, fmt: &str, zone: &RenderZone) -> Option<String> {
let f = format(fmt).ok()?;
if let Ok(v) = cell.trim().parse::<i64>() {
if let Ok(inst) = f.decode_int(v) {
return inst.render(zone);
}
}
if let Ok(v) = cell.trim().parse::<f64>() {
if let Ok(inst) = f.decode_float(v) {
return inst.render(zone);
}
}
None
}
fn detect_column_format(records: &[csv::StringRecord], idx: usize) -> Option<String> {
let mut values: Vec<i64> = Vec::new();
for rec in records {
let cell = rec.get(idx).unwrap_or("").trim();
if cell.is_empty() {
continue;
}
let value: i64 = cell.parse().ok()?; if value.abs() < AUTO_MIN_MAGNITUDE {
return None;
}
values.push(value);
}
if values.is_empty() {
return None;
}
let ctx = interpret::InterpretContext {
neighbours: &values,
..Default::default()
};
let mut chosen: Option<String> = None;
for &value in &values {
let candidates = interpret::interpret_int_with_context(value, &ctx);
let top = candidates.first()?;
if top.sentinel {
return None;
}
let in_window = top
.components
.iter()
.any(|(name, v)| *name == "in_window" && *v > 0.0);
if !in_window {
return None;
}
match &chosen {
None => chosen = Some(top.format_id.to_string()),
Some(f) if f != top.format_id => return None, _ => {}
}
}
chosen
}