csvpsql/
lib.rs

1//! `csvpsql` generate Postgres table from csv file.
2//!
3//! # Installation
4//!
5//! ```bash
6//! cargo install csvpsql
7//! ```
8//!
9//! # Usage
10//!
11//! ```bash
12//! USAGE:
13//!     csvpsql [FLAGS] [OPTIONS] --table-name <table-name> [file]
14//!
15//! FLAGS:
16//!         --drop          To drop the table if exists
17//!     -h, --help          Prints help information
18//!         --no-copy       To remove copy command
19//!         --no-header     Whenever the csv file has no header
20//!     -V, --version       Prints version information
21//!
22//! OPTIONS:
23//!     -c, --columns <columns>          Override column name. Separated by comma. Use the csv header or letters by default.
24//!     -d, --delimiter <delimiter>       [default: ,]
25//!     -n, --null-as <null-as>          Empty string are null by default [default: ]
26//!     -t, --table-name <table-name>    File name is used as default
27//!
28//! ARGS:
29//!     <file>
30//! ```
31//!
32//! # Example
33//!
34//! ```bash
35//! $ csvpsql --drop example.csv
36//! drop table if exists example;
37//!
38//! create table example (
39//!    city text not null,
40//!    region text not null,
41//!    country text not null,
42//!    population integer not null
43//!);
44//!
45//! \copy example from 'example.csv' with csv delimiter ',' header;
46//! ```
47
48use chrono::NaiveTime;
49use csv::StringRecord;
50use itertools::izip;
51use std::error::Error;
52use std::fmt;
53use std::fs;
54use std::io;
55use std::io::{BufRead, BufReader};
56use std::path::PathBuf;
57use structopt::StructOpt;
58
59#[derive(Debug, StructOpt)]
60#[structopt(name = "csvpsql", about = "Parse csv to sql tables.")]
61pub struct Opt {
62    #[structopt(long, help="Whenever the csv file has no header")]
63    pub no_header: bool,
64
65    #[structopt(long, help="To remove copy command")]
66    pub no_copy: bool,
67
68    #[structopt(long, help="To add drop query")]
69    pub drop: bool,
70
71    #[structopt(short, long, default_value = ",")]
72    pub delimiter: char,
73
74    #[structopt(
75        short,
76        long,
77        help = "Override column name. Separated by comma. Use the csv header or letters by default."
78    )]
79    pub columns: Option<String>,
80
81    #[structopt(
82        short,
83        long,
84        default_value = "",
85        help = "Empty string are null by default"
86    )]
87    pub null_as: String,
88
89    #[structopt(parse(from_os_str))]
90    pub file: Option<PathBuf>,
91
92    #[structopt(
93        short,
94        long,
95        required_unless = "file",
96        help = "File name is used as default"
97    )]
98    pub table_name: Option<String>,
99}
100
101// TODO: Add missing column types
102#[derive(Debug, Clone, PartialEq, PartialOrd)]
103enum ColumnType {
104    Unknown,
105    Boolean,
106    Integer,
107    Numeric,
108    Date,
109    Timestamp,
110    Text,
111}
112
113impl fmt::Display for ColumnType {
114    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115        write!(f, "{}", format!("{:?}", self).to_lowercase())
116    }
117}
118
119#[derive(Debug, Clone, PartialEq, PartialOrd)]
120enum ColumnConstraint {
121    Nullable,
122    NotNull,
123}
124
125impl fmt::Display for ColumnConstraint {
126    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127        match *self {
128            ColumnConstraint::NotNull => write!(f, "not null"),
129            ColumnConstraint::Nullable => write!(f, ""),
130        }
131    }
132}
133
134struct Column {
135    name: String,
136    ctype: ColumnType,
137    constraint: ColumnConstraint,
138}
139
140impl fmt::Display for Column {
141    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142        write!(f, "{} {} {}", self.name, self.ctype, self.constraint,)
143    }
144}
145
146type Columns = Vec<Column>;
147
148struct Table {
149    name: String,
150    columns: Columns,
151}
152
153impl fmt::Display for Table {
154    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155        writeln!(f, "create table {} (", self.name)?;
156        for column in &self.columns[0..self.columns.len() - 1] {
157            writeln!(f, "    {},", column)?;
158        }
159        writeln!(f, "    {}", self.columns[self.columns.len() - 1])?;
160        writeln!(f, ");")?;
161        Ok(())
162    }
163}
164
165fn try_parse_date(field: &str) -> Result<ColumnType, dtparse::ParseError> {
166    let (date, _) = dtparse::parse(field)?;
167    if date.time() == NaiveTime::from_hms(0, 0, 0) {
168        Ok(ColumnType::Date)
169    } else {
170        Ok(ColumnType::Timestamp)
171    }
172}
173
174fn find_type(xfield: &str) -> ColumnType {
175    let parsed_field = xfield.to_lowercase();
176    match parsed_field.as_str() {
177        field if field.is_empty() => ColumnType::Unknown,
178        "true" | "false" => ColumnType::Boolean,
179        field if field.parse::<isize>().is_ok() => ColumnType::Integer,
180        field if field.parse::<f64>().is_ok() => ColumnType::Numeric,
181        field => {
182            if let Ok(c) = try_parse_date(field) {
183                c
184            } else {
185                ColumnType::Text
186            }
187        }
188    }
189}
190
191fn find_constraint(field: &str, null_as: &str) -> ColumnConstraint {
192    if field == null_as {
193        ColumnConstraint::Nullable
194    } else {
195        ColumnConstraint::NotNull
196    }
197}
198
199fn get_table_name(table_name: Option<String>, file: Option<PathBuf>) -> String {
200    let table_name = match (&table_name, &file) {
201        (Some(name), _) => name,
202        (None, Some(file)) => file.file_stem().unwrap().to_str().unwrap(),
203        _ => "csvpsql", // cannot happen due to structopt rules
204    };
205    table_name.to_owned()
206}
207
208fn get_column_names(columns: Option<&str>, no_header: bool, header: &StringRecord) -> Vec<String> {
209    match (columns, no_header) {
210        (Some(names), _) => names.split(',').map(|x| x.to_string()).collect(),
211        (None, false) => header
212            .iter()
213            .map(|x| x.to_lowercase().replace(" ", "_"))
214            .collect(),
215        (None, true) => "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z"
216            .split(',')
217            .take(header.len())
218            .map(|x| x.to_string())
219            .collect(),
220    }
221}
222
223fn get_columns(
224    column_names: Vec<String>,
225    column_types: Vec<ColumnType>,
226    column_constraints: Vec<ColumnConstraint>,
227) -> Columns {
228    izip!(column_names, column_types, column_constraints)
229        .map(|(name, ctype, constraint)| Column {
230            name,
231            ctype,
232            constraint,
233        })
234        .collect()
235}
236
237pub fn run(opt: Opt) -> Result<(), Box<dyn Error>> {
238    // Read from file or stdin
239    let filename = opt.file.clone();
240    let reader: Box<dyn BufRead> = match opt.file.clone() {
241        None => Box::new(BufReader::new(io::stdin())),
242        Some(filename) => Box::new(BufReader::new(fs::File::open(filename).unwrap())),
243    };
244    let mut rdr = csv::ReaderBuilder::new()
245        .has_headers(!opt.no_header)
246        .delimiter(opt.delimiter as u8)
247        .from_reader(reader);
248
249    let number_of_columns = rdr.headers()?.len();
250
251    // Error check
252    if rdr.records().peekable().peek().is_none() {
253        return Err(Box::from("csv file has no records."));
254    }
255    if let Some(names) = &opt.columns {
256        if names.len() != number_of_columns {
257            return Err(Box::from(
258                "There is more columns in the file than provided by columns option.",
259            ));
260        }
261    }
262
263    // Parse csv
264    let mut column_types = vec![ColumnType::Unknown; number_of_columns];
265    let mut column_constraints = vec![ColumnConstraint::Nullable; number_of_columns];
266
267    for result in rdr.records() {
268        let record = result?;
269        for (i, field) in record.iter().enumerate() {
270            let field_type = find_type(field);
271            if field_type > column_types[i] {
272                column_types[i] = field_type
273            }
274            let field_constraint = find_constraint(field, &opt.null_as);
275            if field_constraint > column_constraints[i] {
276                column_constraints[i] = field_constraint
277            }
278        }
279    }
280
281    let column_types = column_types
282        .iter()
283        .map(|x| match x {
284            ColumnType::Unknown => ColumnType::Text,
285            a => a.clone(),
286        })
287        .collect();
288
289    // Create table
290    let column_names = get_column_names(opt.columns.as_deref(), opt.no_header, rdr.headers()?);
291    let columns = get_columns(column_names, column_types, column_constraints);
292    let table_name = get_table_name(opt.table_name, opt.file);
293    let table = Table {
294        name: table_name,
295        columns,
296    };
297
298
299    if opt.drop {
300        println!("drop table if exists {};\n", table.name)
301    }
302
303    println!("{}", table);
304
305    let mut copy_cmd = format!(r"\copy {} from", table.name);
306    copy_cmd = match filename {
307        None => format!("{} stdin", copy_cmd),
308        Some(s) => format!("{} '{}'", copy_cmd, s.display()),
309    };
310    copy_cmd = format!("{} with csv delimiter '{}'", copy_cmd, opt.delimiter);
311    if !opt.no_header {
312        copy_cmd = format!("{} header", copy_cmd)
313    };
314    println!("{};", copy_cmd);
315
316    Ok(())
317}
318
319mod test {
320    #[allow(unused)]
321    use super::*;
322    #[allow(unused)]
323    use std::path::Path;
324
325    #[test]
326    fn test_find_type() {
327        assert_eq!(find_type("true"), ColumnType::Boolean);
328        assert_eq!(find_type("false"), ColumnType::Boolean);
329        assert_eq!(find_type("TRUE"), ColumnType::Boolean);
330        assert_eq!(find_type("0"), ColumnType::Integer);
331        assert_eq!(find_type("0.0"), ColumnType::Numeric);
332    }
333
334    #[test]
335    fn test_parse_date() {
336        assert_eq!(try_parse_date("2020-01-01"), Ok(ColumnType::Date));
337        assert_eq!(dtparse::parse(""), Ok(()));
338        assert_eq!(
339            try_parse_date("2020-01-01 18:30:04 +02:00"),
340            Ok(ColumnType::Timestamp)
341        );
342    }
343
344    #[test]
345    fn test_find_constraint() {
346        assert_eq!(find_constraint("", ""), ColumnConstraint::Nullable);
347        assert_eq!(find_constraint("smth", ""), ColumnConstraint::NotNull);
348    }
349
350    #[test]
351    fn test_get_table_name() {
352        assert_eq!(
353            get_table_name(
354                Option::from("t".to_owned()),
355                Option::from(PathBuf::from(Box::from(Path::new("f.csv"))))
356            ),
357            "t"
358        );
359        assert_eq!(
360            get_table_name(
361                None,
362                Option::from(PathBuf::from(Box::from(Path::new("f.csv"))))
363            ),
364            "f"
365        );
366    }
367}