csv_managed/
schema_cmd.rs1use std::collections::HashSet;
2use std::str::FromStr;
3
4use anyhow::{Context, Result, anyhow};
5use log::info;
6
7use crate::cli::SchemaArgs;
8use crate::schema::{ColumnMeta, ColumnType, Schema, ValueReplacement};
9
10pub fn execute(args: &SchemaArgs) -> Result<()> {
11 let mut columns = parse_columns(&args.columns)
12 .with_context(|| "Parsing --column definitions for schema creation".to_string())?;
13 apply_replacements(&mut columns, &args.replacements)
14 .with_context(|| "Parsing --replace definitions for schema creation".to_string())?;
15
16 let schema = Schema { columns };
17 schema
18 .save(&args.output)
19 .with_context(|| format!("Writing schema to {:?}", args.output))?;
20
21 info!(
22 "Defined schema with {} column(s) written to {:?}",
23 schema.columns.len(),
24 args.output
25 );
26
27 Ok(())
28}
29
30fn parse_columns(specs: &[String]) -> Result<Vec<ColumnMeta>> {
31 let mut columns = Vec::new();
32 let mut seen = HashSet::new();
33 let mut output_names = HashSet::new();
34
35 for raw in specs {
36 for token in raw.split(',') {
37 let token = token.trim();
38 if token.is_empty() {
39 continue;
40 }
41 let (name_part, type_part) = token.split_once(':').ok_or_else(|| {
42 anyhow!("Column definition '{token}' must use the form name:type")
43 })?;
44
45 let name = name_part.trim();
46 if name.is_empty() {
47 return Err(anyhow!(
48 "Column name cannot be empty in definition '{token}'"
49 ));
50 }
51 if !seen.insert(name.to_string()) {
52 return Err(anyhow!("Duplicate column name '{name}' provided"));
53 }
54
55 let (type_raw, rename_raw) = if let Some((ty, rename)) = type_part.split_once("->") {
56 (ty, Some(rename))
57 } else {
58 (type_part, None)
59 };
60
61 let column_type = ColumnType::from_str(type_raw.trim())
62 .map_err(|err| anyhow!("Column '{name}' has invalid type '{type_part}': {err}"))?;
63
64 let rename = rename_raw
65 .map(|value| value.trim())
66 .filter(|value| !value.is_empty())
67 .map(|value| value.to_string());
68
69 if let Some(ref alias) = rename {
70 if alias != name && seen.contains(alias) {
71 return Err(anyhow!(
72 "Output name '{alias}' conflicts with an existing column name"
73 ));
74 }
75 if !output_names.insert(alias.clone()) {
76 return Err(anyhow!("Duplicate output column name '{alias}' provided"));
77 }
78 }
79
80 if rename.is_none() {
81 output_names.insert(name.to_string());
82 }
83
84 columns.push(ColumnMeta {
85 name: name.to_string(),
86 datatype: column_type,
87 rename,
88 value_replacements: Vec::new(),
89 });
90 }
91 }
92
93 if columns.is_empty() {
94 return Err(anyhow!("At least one --column definition is required"));
95 }
96
97 Ok(columns)
98}
99
100fn apply_replacements(columns: &mut [ColumnMeta], specs: &[String]) -> Result<()> {
101 if specs.is_empty() {
102 return Ok(());
103 }
104 let mut lookup = HashSet::new();
105 for column in columns.iter() {
106 lookup.insert(column.name.clone());
107 }
108
109 for raw in specs {
110 let spec = raw.trim();
111 if spec.is_empty() {
112 continue;
113 }
114 let (column_name, mapping) = spec.split_once('=').ok_or_else(|| {
115 anyhow!("Replacement '{spec}' must use the form column=value->new_value")
116 })?;
117 let column_name = column_name.trim();
118 if column_name.is_empty() {
119 return Err(anyhow!("Replacement '{spec}' is missing a column name"));
120 }
121 if !lookup.contains(column_name) {
122 return Err(anyhow!(
123 "Replacement references unknown column '{column_name}'"
124 ));
125 }
126 let (from_raw, to_raw) = mapping.split_once("->").ok_or_else(|| {
127 anyhow!(
128 "Replacement '{spec}' must include '->' to separate original and replacement values"
129 )
130 })?;
131 let from = from_raw.trim().to_string();
132 let to = to_raw.trim().to_string();
133 let column = columns
134 .iter_mut()
135 .find(|c| c.name == column_name)
136 .expect("column should exist");
137 if let Some(existing) = column
138 .value_replacements
139 .iter()
140 .position(|r| r.from == from)
141 {
142 column.value_replacements.remove(existing);
143 }
144 column
145 .value_replacements
146 .push(ValueReplacement { from, to });
147 }
148
149 Ok(())
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155
156 #[test]
157 fn parse_columns_accepts_comma_and_repeats() {
158 let specs = vec![
159 "id:integer,name:string".to_string(),
160 "amount:float".to_string(),
161 ];
162 let columns = parse_columns(&specs).expect("parsed");
163 assert_eq!(columns.len(), 3);
164 assert_eq!(columns[0].name, "id");
165 assert_eq!(columns[1].name, "name");
166 assert_eq!(columns[2].name, "amount");
167 assert_eq!(columns[0].datatype, ColumnType::Integer);
168 assert_eq!(columns[1].datatype, ColumnType::String);
169 assert_eq!(columns[2].datatype, ColumnType::Float);
170 }
171
172 #[test]
173 fn duplicate_columns_are_rejected() {
174 let specs = vec!["id:integer,id:string".to_string()];
175 let err = parse_columns(&specs).unwrap_err();
176 assert!(err.to_string().contains("Duplicate column name"));
177 }
178
179 #[test]
180 fn missing_type_is_rejected() {
181 let specs = vec!["id".to_string()];
182 let err = parse_columns(&specs).unwrap_err();
183 assert!(err.to_string().contains("must use the form"));
184 }
185
186 #[test]
187 fn parse_columns_supports_output_rename() {
188 let specs = vec!["id:integer->Identifier,name:string".to_string()];
189 let columns = parse_columns(&specs).expect("parsed");
190 assert_eq!(columns.len(), 2);
191 assert_eq!(columns[0].rename.as_deref(), Some("Identifier"));
192 assert!(columns[1].rename.is_none());
193 }
194
195 #[test]
196 fn duplicate_output_names_are_rejected() {
197 let specs = vec![
198 "id:integer->Identifier".to_string(),
199 "code:string->Identifier".to_string(),
200 ];
201 let err = parse_columns(&specs).unwrap_err();
202 assert!(err.to_string().contains("Duplicate output column name"));
203 }
204
205 #[test]
206 fn replacements_apply_to_columns() {
207 let specs = vec!["status:string".to_string()];
208 let mut columns = parse_columns(&specs).expect("parsed");
209 let replacements = vec!["status=pending->shipped".to_string()];
210 apply_replacements(&mut columns, &replacements).expect("applied");
211 assert_eq!(columns[0].value_replacements.len(), 1);
212 assert_eq!(columns[0].value_replacements[0].from, "pending");
213 assert_eq!(columns[0].value_replacements[0].to, "shipped");
214 }
215
216 #[test]
217 fn replacements_validate_column_names() {
218 let specs = vec!["status:string".to_string()];
219 let mut columns = parse_columns(&specs).expect("parsed");
220 let replacements = vec!["missing=pending->shipped".to_string()];
221 let err = apply_replacements(&mut columns, &replacements).unwrap_err();
222 assert!(err.to_string().contains("unknown column"));
223 }
224}