use std::collections::{HashMap, HashSet};
use std::fmt;
use std::fs::File;
use std::io::Error;
use std::io::ErrorKind;
use std::io::Write;
use std::io::{BufRead, BufReader};
pub struct CSVFile {
pub delimiter: char,
pub columns: Vec<String>,
pub rows: Vec<Vec<String>>,
}
#[derive(PartialEq)]
pub struct CSVCoords {
pub row: usize,
pub column: usize,
}
impl fmt::Display for CSVCoords {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "({}, {})", self.row, self.column)
}
}
impl fmt::Debug for CSVCoords {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"CSVCoords {{ row: {}, column: {} }}",
self.row, self.column
)
}
}
impl fmt::Display for CSVFile {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut result = String::new();
for column in &self.columns {
result.push_str(column);
result.push(self.delimiter);
}
result.pop(); result.push('\n');
for row in &self.rows {
for field in row {
result.push_str(field);
result.push(self.delimiter);
}
result.pop();
result.push('\n');
}
write!(f, "{}", result)
}
}
impl fmt::Debug for CSVFile {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"CSVFile {{ delimiter: {}, columns: {:?}, rows: {:?} }}",
self.delimiter, self.columns, self.rows
)
}
}
impl CSVFile {
pub fn new(file_name: &String, delimiter: &char) -> Result<Self, Error> {
let file = File::open(&file_name)?;
let mut lines = BufReader::new(&file).lines();
let first_line = lines.next().unwrap()?;
let columns = read_columns(&first_line, delimiter)?;
let rows = read_rows(&mut lines, delimiter, columns.len())?;
Ok(Self {
delimiter: *delimiter,
columns,
rows,
})
}
pub fn build(
columns: &Vec<String>,
rows: &Vec<Vec<String>>,
delimiter: &char,
) -> Result<Self, Error> {
for (index, row) in rows.iter().enumerate() {
if columns.len() != row.len() {
return Err(Error::new(
ErrorKind::InvalidData,
format!("Invalid number of fields for row of index {}, {} were given, but expected {}", index, row.len(), columns.len()))
);
}
}
Ok(Self {
delimiter: *delimiter,
columns: columns.clone(),
rows: rows.clone(),
})
}
pub fn map_rows<F, T>(&self, f: F) -> Vec<T>
where
F: Fn(&Vec<String>) -> T,
{
self.rows.iter().map(f).collect()
}
fn map_columns<T>(&self) -> HashMap<String, Vec<T>> {
let mut map = HashMap::new();
for column in &self.columns {
map.insert(column.clone(), Vec::new());
}
map
}
pub fn to_map<F, T>(&self, f: F) -> HashMap<String, Vec<T>>
where
F: Fn(&String) -> T,
{
let mut map: HashMap<String, Vec<T>> = self.map_columns();
for row in &self.rows {
for (i, field) in row.iter().enumerate() {
map.get_mut(&self.columns[i]).unwrap().push(f(field));
}
}
map
}
pub fn write(&self, filename: &String) -> Result<(), Error> {
let mut file = File::create(filename)?;
file.write_all(self.to_string().as_bytes())?;
Ok(())
}
pub fn len(&self) -> usize {
self.columns.len()
}
pub fn count_rows(&self) -> usize {
self.rows.len()
}
pub fn has_column(&self, column_name: &String) -> bool {
self.columns.contains(column_name)
}
pub fn has_no_rows(&self) -> bool {
self.rows.is_empty()
}
pub fn has_no_columns(&self) -> bool {
self.columns.is_empty()
}
pub fn empty(&self) -> bool {
self.has_no_rows() && self.has_no_columns()
}
pub fn set_delimiter(&mut self, new_delimiter: &char) {
self.delimiter = *new_delimiter;
}
pub fn get_column_idx(&self, column_name: &String) -> Option<usize> {
self.columns.iter().position(|c| c == column_name)
}
pub fn get_cell(&self, coordinates: &CSVCoords) -> Option<&String> {
self.rows.get(coordinates.row)?.get(coordinates.column)
}
pub fn find_text(&self, text: &String) -> Vec<CSVCoords> {
let mut coords: Vec<CSVCoords> = Vec::new();
for (i, row) in self.rows.iter().enumerate() {
for (j, cell) in row.iter().enumerate() {
if cell.contains(text) {
coords.push(CSVCoords { row: i, column: j });
}
}
}
coords
}
pub fn check_validity(&self) -> bool {
let mut column_names: HashSet<&str> = HashSet::new();
for column in &self.columns {
if column_names.contains(column.as_str()) {
return false;
}
column_names.insert(column);
}
let number_of_columns = self.len();
for row in &self.rows {
if row.len() != number_of_columns {
return false;
}
}
true
}
pub fn fill_column(&mut self, column_name: &String, data: &Vec<String>) -> Result<(), Error> {
let column_idx = self.columns.iter().position(|c| c == column_name);
if column_idx.is_none() {
Err(Error::new(
ErrorKind::InvalidData,
format!("The column {} doesn't exist", column_name),
))
} else {
if data.len() != self.count_rows() {
Err(Error::new(
ErrorKind::InvalidData,
format!(
"Invalid number of fields, {} were given, but expected {}",
data.len(),
self.count_rows()
),
))
} else {
let column_idx = column_idx.unwrap();
for (i, row) in self.rows.iter_mut().enumerate() {
row[column_idx] = data[i].clone();
}
Ok(())
}
}
}
pub fn merge(&mut self, other: &CSVFile) -> Result<(), Error> {
for column in &other.columns {
if self.columns.contains(column) {
return Err(Error::new(
ErrorKind::InvalidData,
format!("The column {} already exists", column),
));
}
}
let initial_self_len = self.len();
let self_rows = self.count_rows();
let other_rows = other.count_rows();
self.columns.extend(other.columns.iter().cloned());
if self_rows < other_rows {
for _ in self_rows..other_rows {
self.rows.push(vec![String::new(); initial_self_len]);
}
} else if self_rows > other_rows {
for i in other_rows..self_rows {
self.rows[i].extend(vec![String::new(); other.len()].iter().cloned());
}
}
for i in 0..other_rows {
self.rows[i].extend(other.rows[i].iter().cloned());
}
Ok(())
}
pub fn add_row(&mut self, data: &Vec<String>) -> Result<(), Error> {
if data.len() != self.len() {
return Err(Error::new(
ErrorKind::InvalidData,
format!(
"Invalid number of fields, {} were given, but expected {}",
data.len(),
self.len()
),
));
}
self.rows.push(data.clone());
Ok(())
}
pub fn add_column(&mut self, name: &String) -> Result<(), Error> {
if self.columns.contains(&name) {
return Err(Error::new(
ErrorKind::InvalidData,
format!("The column {} already exists", name),
));
}
self.columns.push(name.clone());
for row in &mut self.rows {
row.push(String::new());
}
Ok(())
}
pub fn insert_column(&mut self, name: &String, column_idx: usize) -> Result<(), Error> {
if column_idx > self.len() {
return Err(Error::new(
ErrorKind::InvalidData,
format!("The column index {} is out of range", column_idx),
));
}
if self.columns.contains(&name) {
return Err(Error::new(
ErrorKind::InvalidData,
format!("The column {} already exists", name),
));
}
self.columns.insert(column_idx, name.clone());
for row in &mut self.rows {
row.insert(column_idx, String::new());
}
Ok(())
}
pub fn remove_column(&mut self, column_idx: usize) -> Result<(), Error> {
if column_idx >= self.len() {
return Err(Error::new(
ErrorKind::InvalidData,
format!("The column index {} is out of range", column_idx),
));
}
self.columns.remove(column_idx);
for row in &mut self.rows {
row.remove(column_idx);
}
Ok(())
}
pub fn remove_row(&mut self, row_idx: usize) -> Result<(), Error> {
if row_idx >= self.rows.len() {
return Err(Error::new(
ErrorKind::InvalidData,
format!("The row index {} is out of range", row_idx),
));
}
self.rows.remove(row_idx);
Ok(())
}
pub fn trim_end(&mut self) {
let mut i = self.rows.len() - 1;
loop {
if self.rows[i].iter().all(|s| s.is_empty()) {
self.rows.remove(i);
if i == 0 {
break;
} else {
i -= 1;
}
} else {
break;
}
}
}
pub fn trim_start(&mut self) {
let mut to_remove: Vec<usize> = Vec::new();
let mut i = 0;
while i < self.rows.len() {
if self.rows[i].iter().all(|s| s.is_empty()) {
to_remove.push(i);
i += 1;
} else {
break;
}
}
for i in to_remove.into_iter().rev() {
self.rows.remove(i);
}
}
pub fn trim(&mut self) {
self.trim_start();
self.trim_end();
}
pub fn remove_empty_lines(&mut self) {
self.rows.retain(|row| !row.iter().all(|s| s.is_empty()));
}
}
pub(crate) fn parse_line(
line: &String,
delimiter: &char,
number_of_fields: Option<u32>,
) -> Result<Vec<String>, Error> {
let mut fields: Vec<String> = match number_of_fields {
Some(n) => Vec::with_capacity(n as usize),
None => Vec::new(),
};
let mut chars = line.chars();
let mut current_field = String::new();
let mut is_in_quote = false;
let mut is_escaped = false;
while let Some(c) = chars.next() {
if c == '\\' {
if is_escaped {
current_field.push(c);
}
is_escaped = !is_escaped;
} else {
if c == '"' {
if !is_escaped {
if is_in_quote {
fields.push(current_field);
current_field = String::new();
chars.next();
}
is_in_quote = !is_in_quote;
} else {
current_field.push(c);
}
} else {
if c == *delimiter && !is_in_quote {
fields.push(current_field);
current_field = String::new();
} else {
current_field.push(c);
}
}
is_escaped = false;
}
}
if is_escaped || is_in_quote {
return Err(Error::new(
ErrorKind::InvalidData,
"Invalid escape sequence",
));
}
fields.push(current_field);
Ok(fields)
}
pub(crate) fn split_line(line: &String, delimiter: &char) -> Vec<String> {
line.split(*delimiter).map(|s| s.to_string()).collect()
}
pub(crate) fn read_columns(line: &String, delimiter: &char) -> Result<Vec<String>, Error> {
if line.contains('"') {
parse_line(line, delimiter, None)
} else {
Ok(split_line(line, delimiter))
}
}
pub(crate) fn read_rows(
lines: &mut std::io::Lines<BufReader<&File>>,
delimiter: &char,
number_of_fields: usize,
) -> Result<Vec<Vec<String>>, Error> {
let mut data: Vec<Vec<String>> = Vec::new();
for line in lines {
let line = line?;
let fields: Vec<String>;
if line.contains('"') {
fields = parse_line(&line, delimiter, Some(number_of_fields as u32))?;
} else {
fields = split_line(&line, delimiter);
}
data.push(fields);
}
Ok(data)
}
mod tests;