1use chrono::NaiveTime;
49use csv::StringRecord;
50use itertools::izip;
51use std::error::Error;
52use std::fmt;
53use std::fs;
54use std::io;
55use std::io::{BufRead, BufReader};
56use std::path::PathBuf;
57use structopt::StructOpt;
58
59#[derive(Debug, StructOpt)]
60#[structopt(name = "csvpsql", about = "Parse csv to sql tables.")]
61pub struct Opt {
62 #[structopt(long, help="Whenever the csv file has no header")]
63 pub no_header: bool,
64
65 #[structopt(long, help="To remove copy command")]
66 pub no_copy: bool,
67
68 #[structopt(long, help="To add drop query")]
69 pub drop: bool,
70
71 #[structopt(short, long, default_value = ",")]
72 pub delimiter: char,
73
74 #[structopt(
75 short,
76 long,
77 help = "Override column name. Separated by comma. Use the csv header or letters by default."
78 )]
79 pub columns: Option<String>,
80
81 #[structopt(
82 short,
83 long,
84 default_value = "",
85 help = "Empty string are null by default"
86 )]
87 pub null_as: String,
88
89 #[structopt(parse(from_os_str))]
90 pub file: Option<PathBuf>,
91
92 #[structopt(
93 short,
94 long,
95 required_unless = "file",
96 help = "File name is used as default"
97 )]
98 pub table_name: Option<String>,
99}
100
101#[derive(Debug, Clone, PartialEq, PartialOrd)]
103enum ColumnType {
104 Unknown,
105 Boolean,
106 Integer,
107 Numeric,
108 Date,
109 Timestamp,
110 Text,
111}
112
113impl fmt::Display for ColumnType {
114 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115 write!(f, "{}", format!("{:?}", self).to_lowercase())
116 }
117}
118
119#[derive(Debug, Clone, PartialEq, PartialOrd)]
120enum ColumnConstraint {
121 Nullable,
122 NotNull,
123}
124
125impl fmt::Display for ColumnConstraint {
126 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127 match *self {
128 ColumnConstraint::NotNull => write!(f, "not null"),
129 ColumnConstraint::Nullable => write!(f, ""),
130 }
131 }
132}
133
134struct Column {
135 name: String,
136 ctype: ColumnType,
137 constraint: ColumnConstraint,
138}
139
140impl fmt::Display for Column {
141 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142 write!(f, "{} {} {}", self.name, self.ctype, self.constraint,)
143 }
144}
145
146type Columns = Vec<Column>;
147
148struct Table {
149 name: String,
150 columns: Columns,
151}
152
153impl fmt::Display for Table {
154 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155 writeln!(f, "create table {} (", self.name)?;
156 for column in &self.columns[0..self.columns.len() - 1] {
157 writeln!(f, " {},", column)?;
158 }
159 writeln!(f, " {}", self.columns[self.columns.len() - 1])?;
160 writeln!(f, ");")?;
161 Ok(())
162 }
163}
164
165fn try_parse_date(field: &str) -> Result<ColumnType, dtparse::ParseError> {
166 let (date, _) = dtparse::parse(field)?;
167 if date.time() == NaiveTime::from_hms(0, 0, 0) {
168 Ok(ColumnType::Date)
169 } else {
170 Ok(ColumnType::Timestamp)
171 }
172}
173
174fn find_type(xfield: &str) -> ColumnType {
175 let parsed_field = xfield.to_lowercase();
176 match parsed_field.as_str() {
177 field if field.is_empty() => ColumnType::Unknown,
178 "true" | "false" => ColumnType::Boolean,
179 field if field.parse::<isize>().is_ok() => ColumnType::Integer,
180 field if field.parse::<f64>().is_ok() => ColumnType::Numeric,
181 field => {
182 if let Ok(c) = try_parse_date(field) {
183 c
184 } else {
185 ColumnType::Text
186 }
187 }
188 }
189}
190
191fn find_constraint(field: &str, null_as: &str) -> ColumnConstraint {
192 if field == null_as {
193 ColumnConstraint::Nullable
194 } else {
195 ColumnConstraint::NotNull
196 }
197}
198
199fn get_table_name(table_name: Option<String>, file: Option<PathBuf>) -> String {
200 let table_name = match (&table_name, &file) {
201 (Some(name), _) => name,
202 (None, Some(file)) => file.file_stem().unwrap().to_str().unwrap(),
203 _ => "csvpsql", };
205 table_name.to_owned()
206}
207
208fn get_column_names(columns: Option<&str>, no_header: bool, header: &StringRecord) -> Vec<String> {
209 match (columns, no_header) {
210 (Some(names), _) => names.split(',').map(|x| x.to_string()).collect(),
211 (None, false) => header
212 .iter()
213 .map(|x| x.to_lowercase().replace(" ", "_"))
214 .collect(),
215 (None, true) => "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z"
216 .split(',')
217 .take(header.len())
218 .map(|x| x.to_string())
219 .collect(),
220 }
221}
222
223fn get_columns(
224 column_names: Vec<String>,
225 column_types: Vec<ColumnType>,
226 column_constraints: Vec<ColumnConstraint>,
227) -> Columns {
228 izip!(column_names, column_types, column_constraints)
229 .map(|(name, ctype, constraint)| Column {
230 name,
231 ctype,
232 constraint,
233 })
234 .collect()
235}
236
237pub fn run(opt: Opt) -> Result<(), Box<dyn Error>> {
238 let filename = opt.file.clone();
240 let reader: Box<dyn BufRead> = match opt.file.clone() {
241 None => Box::new(BufReader::new(io::stdin())),
242 Some(filename) => Box::new(BufReader::new(fs::File::open(filename).unwrap())),
243 };
244 let mut rdr = csv::ReaderBuilder::new()
245 .has_headers(!opt.no_header)
246 .delimiter(opt.delimiter as u8)
247 .from_reader(reader);
248
249 let number_of_columns = rdr.headers()?.len();
250
251 if rdr.records().peekable().peek().is_none() {
253 return Err(Box::from("csv file has no records."));
254 }
255 if let Some(names) = &opt.columns {
256 if names.len() != number_of_columns {
257 return Err(Box::from(
258 "There is more columns in the file than provided by columns option.",
259 ));
260 }
261 }
262
263 let mut column_types = vec![ColumnType::Unknown; number_of_columns];
265 let mut column_constraints = vec![ColumnConstraint::Nullable; number_of_columns];
266
267 for result in rdr.records() {
268 let record = result?;
269 for (i, field) in record.iter().enumerate() {
270 let field_type = find_type(field);
271 if field_type > column_types[i] {
272 column_types[i] = field_type
273 }
274 let field_constraint = find_constraint(field, &opt.null_as);
275 if field_constraint > column_constraints[i] {
276 column_constraints[i] = field_constraint
277 }
278 }
279 }
280
281 let column_types = column_types
282 .iter()
283 .map(|x| match x {
284 ColumnType::Unknown => ColumnType::Text,
285 a => a.clone(),
286 })
287 .collect();
288
289 let column_names = get_column_names(opt.columns.as_deref(), opt.no_header, rdr.headers()?);
291 let columns = get_columns(column_names, column_types, column_constraints);
292 let table_name = get_table_name(opt.table_name, opt.file);
293 let table = Table {
294 name: table_name,
295 columns,
296 };
297
298
299 if opt.drop {
300 println!("drop table if exists {};\n", table.name)
301 }
302
303 println!("{}", table);
304
305 let mut copy_cmd = format!(r"\copy {} from", table.name);
306 copy_cmd = match filename {
307 None => format!("{} stdin", copy_cmd),
308 Some(s) => format!("{} '{}'", copy_cmd, s.display()),
309 };
310 copy_cmd = format!("{} with csv delimiter '{}'", copy_cmd, opt.delimiter);
311 if !opt.no_header {
312 copy_cmd = format!("{} header", copy_cmd)
313 };
314 println!("{};", copy_cmd);
315
316 Ok(())
317}
318
319mod test {
320 #[allow(unused)]
321 use super::*;
322 #[allow(unused)]
323 use std::path::Path;
324
325 #[test]
326 fn test_find_type() {
327 assert_eq!(find_type("true"), ColumnType::Boolean);
328 assert_eq!(find_type("false"), ColumnType::Boolean);
329 assert_eq!(find_type("TRUE"), ColumnType::Boolean);
330 assert_eq!(find_type("0"), ColumnType::Integer);
331 assert_eq!(find_type("0.0"), ColumnType::Numeric);
332 }
333
334 #[test]
335 fn test_parse_date() {
336 assert_eq!(try_parse_date("2020-01-01"), Ok(ColumnType::Date));
337 assert_eq!(dtparse::parse(""), Ok(()));
338 assert_eq!(
339 try_parse_date("2020-01-01 18:30:04 +02:00"),
340 Ok(ColumnType::Timestamp)
341 );
342 }
343
344 #[test]
345 fn test_find_constraint() {
346 assert_eq!(find_constraint("", ""), ColumnConstraint::Nullable);
347 assert_eq!(find_constraint("smth", ""), ColumnConstraint::NotNull);
348 }
349
350 #[test]
351 fn test_get_table_name() {
352 assert_eq!(
353 get_table_name(
354 Option::from("t".to_owned()),
355 Option::from(PathBuf::from(Box::from(Path::new("f.csv"))))
356 ),
357 "t"
358 );
359 assert_eq!(
360 get_table_name(
361 None,
362 Option::from(PathBuf::from(Box::from(Path::new("f.csv"))))
363 ),
364 "f"
365 );
366 }
367}