1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
use std::fmt;
use std::collections::HashSet;
use std::fs::File;
use std::io::Error;
use std::io::{BufRead, BufReader};
use std::io::ErrorKind;
use std::io::Write;

pub struct CSVFile {
  pub delimiter: char,
  pub columns: Vec<String>,
  pub rows: Vec<Vec<String>>,
}

pub struct CSVCoords {
  pub row: usize,
  pub column: usize,
}

impl PartialEq for CSVCoords {
  fn eq(&self, other: &Self) -> bool {
    self.row == other.row && self.column == other.column
  }
}

impl fmt::Display for CSVCoords {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    write!(f, "({}, {})", self.row, self.column)
  }
}

impl fmt::Debug for CSVCoords {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    write!(f, "CSVCoords {{ row: {}, column: {} }}", self.row, self.column)
  }
}

impl fmt::Display for CSVFile {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    let mut result = String::new();
    for column in &self.columns {
      result.push_str(column);
      result.push(self.delimiter);
    }
    result.pop(); // removes the trailing delimiter
    result.push('\n');

    for row in &self.rows {
      for field in row {
        result.push_str(field);
        result.push(self.delimiter);
      }
      result.pop();
      result.push('\n');
    }

    write!(f, "{}", result)
  }
}

impl fmt::Debug for CSVFile {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    write!(f, "CSVFile {{ delimiter: {}, columns: {:?}, rows: {:?} }}", self.delimiter, self.columns, self.rows)
  }
}

impl CSVFile {
  /// Creates a new CSVFile from a file name and an optional delimiter (a comma by default).
  /// It reads the first line of the file to get the columns and the rest of the file to get the data.
  /// It may return an error if the file doesn't exist or if it can't be read properly.
  pub fn new(file_name: &String, delimiter: &char) -> Result<Self, Error> {
    let file = File::open(&file_name)?;
    let mut lines = BufReader::new(&file).lines();
    let first_line = lines.next().unwrap()?;
    let columns = read_columns(&first_line, delimiter)?;
    let rows = read_rows(&mut lines, delimiter, columns.len())?;

    Ok(
      Self {
        delimiter: *delimiter,
        columns,
        rows
      }
    )
  }

  /// Creates a new CSVFile from the columns and the rows.
  /// 
  /// # Example
  /// 
  /// ```
  /// # use csv_tools::CSVFile;
  /// 
  /// let columns = vec!["a".to_string(), "b".to_string(), "c".to_string()];
  /// let rows = vec![
  ///    vec!["1".to_string(), "2".to_string(), "3".to_string()],
  ///    vec!["4".to_string(), "5".to_string(), "6".to_string()],
  ///    vec!["7".to_string(), "8".to_string(), "9".to_string()],
  /// ];
  /// 
  /// let file = CSVFile::build(&columns, &rows, &',').unwrap();
  /// assert_eq!(file.columns, columns);
  /// assert_eq!(file.rows, rows);
  /// ```
  pub fn build(columns: &Vec<String>, rows: &Vec<Vec<String>>, delimiter: &char) -> Result<Self, Error> {
    for (index, row) in rows.iter().enumerate() {
      if columns.len() != row.len() {
        return Err(Error::new(
          ErrorKind::InvalidData,
          format!("Invalid number of fields for row of index {}, {} were given, but expected {}", index, row.len(), columns.len()))
        );
      }
    }

    Ok(
      Self {
        delimiter: *delimiter,
        columns: columns.clone(),
        rows: rows.clone()
      }
    )
  }

  /// Writes the CSV file to a file.
  pub fn write(&self, filename: &String) -> Result<(), Error> {
    let mut file = File::create(filename)?;
    file.write_all(self.to_string().as_bytes())?;
    Ok(())
  }

  /// Returns the number of columns in the CSV file.
  pub fn len(&self) -> usize {
    self.columns.len()
  }

  /// Returns the number of rows in the CSV file.
  /// It doesn't count the header.
  pub fn count_rows(&self) -> usize {
    self.rows.len()
  }

  /// Returns `true` if the CSV file has the given column.
  pub fn has_column(&self, column_name: &String) -> bool {
    self.columns.contains(column_name)
  }

  /// Returns `true` if the CSV file has no row.
  pub fn has_no_rows(&self) -> bool {
    self.rows.is_empty()
  }

  /// Returns `true` if the CSV file has no column.
  pub fn has_no_columns(&self) -> bool {
    self.columns.is_empty()
  }

  /// Returns `true` if the CSV file is empty,
  /// meaning it doesn't have any column and any row.
  pub fn empty(&self) -> bool {
    self.has_no_rows() && self.has_no_columns()
  }

  /// Sets the delimiter of the CSV file.
  pub fn set_delimiter(&mut self, new_delimiter: &char) {
    self.delimiter = *new_delimiter;
  }

  /// Gets the index of a column by its name.
  pub fn get_column_idx(&self, column_name: &String) -> Option<usize> {
    self.columns.iter().position(|c| c == column_name)
  }

  /// Gets a cell at given coordinates.
  /// It returns `None` if the coordinates are out of range.
  /// 
  /// # Example
  /// 
  /// ```
  /// # use csv_tools::{CSVFile, CSVCoords};
  /// let columns = vec!["a".to_string(), "b".to_string(), "c".to_string()];
  /// let rows = vec![
  ///    vec!["1".to_string(), "2".to_string(), "3".to_string()],
  ///    vec!["4".to_string(), "5".to_string(), "6".to_string()],
  ///    vec!["7".to_string(), "8".to_string(), "9".to_string()],
  /// ];
  /// 
  /// let file = CSVFile::build(&columns, &rows, &',').unwrap();
  /// 
  /// assert_eq!(file.get_cell(&CSVCoords { row: 0, column: 0 }), Some(&"1".to_string()));
  /// assert_eq!(file.get_cell(&CSVCoords { row: 1, column: 1 }), Some(&"5".to_string()));
  /// assert_eq!(file.get_cell(&CSVCoords { row: 2, column: 2 }), Some(&"9".to_string()));
  /// ```
  pub fn get_cell(&self, coordinates: &CSVCoords) -> Option<&String> {
    self.rows.get(coordinates.row)?.get(coordinates.column)
  }

  /// Finds text in the CSV file and returns the coordinates of the cells.
  pub fn find_text(&self, text: &String) -> Vec<CSVCoords> {
    let mut coords: Vec<CSVCoords> = Vec::new();
    for (i, row) in self.rows.iter().enumerate() {
      for (j, cell) in row.iter().enumerate() {
        if cell.contains(text) {
          coords.push(CSVCoords { row: i, column: j });
        }
      }
    }

    coords
  }

  /// Checks if the CSV file is valid.
  /// It checks for duplicates in the columns and if the rows have the right length.
  pub fn check_validity(&self) -> bool {
    // Check for duplicates in the columns
    let mut column_names: HashSet<&str> = HashSet::new();
    for column in &self.columns {
      if column_names.contains(column.as_str()) {
        return false;
      }
      column_names.insert(column);
    }

    // Make sure the rows have the right length
    let number_of_columns = self.len();
    for row in &self.rows {
      if row.len() != number_of_columns {
        return false;
      }
    }

    true
  }

  /// Fills a column with the given data.
  /// It may return an error if the column doesn't exist
  /// or if the length of the data is different from the number of rows.
  pub fn fill_column(&mut self, column_name: &String, data: &Vec<String>) -> Result<(), Error> {
    let column_idx = self.columns.iter().position(|c| c == column_name);

    if column_idx.is_none() {
      Err(Error::new(
        ErrorKind::InvalidData,
        format!("The column {} doesn't exist", column_name))
      )
    } else {
      if data.len() != self.count_rows() {
        Err(Error::new(
          ErrorKind::InvalidData,
          format!("Invalid number of fields, {} were given, but expected {}", data.len(), self.count_rows()))
        )
      } else {
        let column_idx = column_idx.unwrap();
        for (i, row) in self.rows.iter_mut().enumerate() {
          row[column_idx] = data[i].clone();
        }
  
        Ok(())
      }
    }
  }

  /// Merges two CSV files together.
  /// It may return an error if a duplicated column is found.
  /// If the number of rows are different, then the rows are extended with empty strings.
  /// 
  /// The other CSVFile instance is supposed to be valid.
  pub fn merge(&mut self, other: &CSVFile) -> Result<(), Error> {
    for column in &other.columns {
      if self.columns.contains(column) {
        return Err(Error::new(
          ErrorKind::InvalidData,
          format!("The column {} already exists", column))
        );
      }
    }

    // If self has less rows than other
    //   -> add rows composed of empty strings to self until the lengths match
    // If self has more rows than other
    //   -> extend the rows of self with empty strings (from the point where the lengths dismatch to the end of the file).
    //      Add as many empty strings as the number of columns in other.
    // Finally:
    //   -> extend the rows of self with the data from other

    let initial_self_len = self.len();
    let self_rows = self.count_rows();
    let other_rows = other.count_rows();

    // Add the columns of other to self
    self.columns.extend(other.columns.iter().cloned());

    if self_rows < other_rows {
      for _ in self_rows..other_rows {
        self.rows.push(vec![String::new(); initial_self_len]);
      }
    } else if self_rows > other_rows {
      for i in other_rows..self_rows {
        self.rows[i].extend(vec![String::new(); other.len()].iter().cloned());
      }
    }

    for i in 0..other_rows {
      self.rows[i].extend(other.rows[i].iter().cloned());
    }

    Ok(())
  }

  /// Adds a row to the CSV file.
  /// It may return an error if the number of fields
  /// in the row is different from the number of columns.
  pub fn add_row(&mut self, data: &Vec<String>) -> Result<(), Error> {
    if data.len() != self.len() {
      return Err(Error::new(
        ErrorKind::InvalidData,
        format!("Invalid number of fields, {} were given, but expected {}", data.len(), self.len()))
      );
    }

    self.rows.push(data.clone());

    Ok(())
  }

  /// Adds a column to the CSV file.
  /// It may return an error if the column already exists.
  /// It appends an empty string to each row.
  pub fn add_column(&mut self, name: &String) -> Result<(), Error> {
    if self.columns.contains(&name) {
      return Err(Error::new(
        ErrorKind::InvalidData,
        format!("The column {} already exists", name))
      );
    }

    self.columns.push(name.clone());
    for row in &mut self.rows {
      row.push(String::new());
    }

    Ok(())
  }

  /// Inserts a column to the CSV file at a specific index.
  /// It may return an error if the column already exists or if the index is out of range.
  /// It also inserts an empty string to each row.
  pub fn insert_column(&mut self, name: &String, column_idx: usize) -> Result<(), Error> {
    if column_idx > self.len() {
      return Err(Error::new(
        ErrorKind::InvalidData,
        format!("The column index {} is out of range", column_idx))
      );
    }

    if self.columns.contains(&name) {
      return Err(Error::new(
        ErrorKind::InvalidData,
        format!("The column {} already exists", name))
      );
    }

    self.columns.insert(column_idx, name.clone());
    for row in &mut self.rows {
      row.insert(column_idx, String::new());
    }

    Ok(())
  }

  /// Removes a column from the CSV file.
  /// It may return an error if the column index is out of range.
  pub fn remove_column(&mut self, column_idx: usize) -> Result<(), Error> {
    if column_idx >= self.len() {
      return Err(Error::new(
        ErrorKind::InvalidData,
        format!("The column index {} is out of range", column_idx))
      );
    }

    self.columns.remove(column_idx);
    for row in &mut self.rows {
      row.remove(column_idx);
    }

    Ok(())
  }

  /// Removes a row from the CSV file.
  /// It may return an error if the row index is out of range.
  pub fn remove_row(&mut self, row_idx: usize) -> Result<(), Error> {
    if row_idx >= self.rows.len() {
      return Err(Error::new(
        ErrorKind::InvalidData,
        format!("The row index {} is out of range", row_idx))
      );
    }

    self.rows.remove(row_idx);

    Ok(())
  }

  /// Removes all the rows that are composed of empty strings only,
  /// starting at the very end and stopping as soon as a non-empty row is found.
  /// 
  /// If no empty row is found, then nothing happens.
  pub fn trim_end(&mut self) {
    let mut i = self.rows.len() - 1;
    loop {
      if self.rows[i].iter().all(|s| s.is_empty()) {
        self.rows.remove(i);
        if i == 0 {
          break;
        } else {
          i -= 1;
        }
      } else {
        break;
      }
    }
  }

  /// Removes all the rows that are composed of empty strings only,
  /// starting at the very beginning and stopping as soon as a non-empty row is found.
  /// 
  /// If no empty row is found, then nothing happens.
  pub fn trim_start(&mut self) {
    let mut to_remove: Vec<usize> = Vec::new();
    let mut i = 0;
    while i < self.rows.len() {
      if self.rows[i].iter().all(|s| s.is_empty()) {
        to_remove.push(i);
        i += 1;
      } else {
        break;
      }
    }
    for i in to_remove.into_iter().rev() {
      self.rows.remove(i);
    }
  }

  /// Removes all the rows that are composed of empty strings only at the beginning and at the end.
  pub fn trim(&mut self) {
    self.trim_start();
    self.trim_end();
  }

  /// Removes all the empty lines from the CSV file.
  pub fn remove_empty_lines(&mut self) {
    self.rows.retain(|row| !row.iter().all(|s| s.is_empty()));
  }
}

/// Parses the line into a vector of strings.
/// It does so by reading the line character by character.
/// If the character is not the delimiter, it appends it to the current field.
/// If the character is the delimiter, it appends the current field to the vector and starts a new field.
/// 
/// The point of this function is to avoid using the split method, as it would ignore quotes.
/// Indeed, if a cell is a string we want to ignore the delimiters inside it.
/// 
/// The "number_of_fields" parameter is used to pre-allocate the vectors.
/// This is useful when we know the number of fields in advance.
pub(crate) fn parse_line(line: &String, delimiter: &char, number_of_fields: Option<u32>) -> Result<Vec<String>, Error> {
  let mut fields: Vec<String> = match number_of_fields {
    Some(n) => Vec::with_capacity(n as usize),
    None => Vec::new(),
  };

  let mut chars = line.chars();
  let mut current_field = String::new();
  let mut is_in_quote = false;
  let mut is_escaped = false;

  while let Some(c) = chars.next() {
    if c == '\\' {
      if is_escaped {
        current_field.push(c);
      }
      is_escaped = !is_escaped;
    } else {
      if c == '"' {
        if !is_escaped {
          if is_in_quote {
            fields.push(current_field);
            current_field = String::new();
            // skip the next character because it should be
            // the delimiter (or the end of the line)
            chars.next();
          }
          is_in_quote = !is_in_quote;
        } else {
          current_field.push(c);
        }
      } else {
        if c == *delimiter && !is_in_quote {
          fields.push(current_field);
          current_field = String::new();
        } else {
          current_field.push(c);
        }
      }
      // If the character immediately following a blackslash
      // isn't another backslash, then make sure to be unescaped.
      is_escaped = false;
    }
  }

  if is_escaped || is_in_quote {
    return Err(Error::new(ErrorKind::InvalidData, "Invalid escape sequence"));
  }

  // Push the last field
  fields.push(current_field);

  Ok(fields)
}

/// Splits the line into a vector of strings using the delimiter.
/// Contrary to [parse_line](`#parse_line`), this function uses the split method.
pub(crate) fn split_line(line: &String, delimiter: &char) -> Vec<String> {
  line
    .split(*delimiter)
    .map(|s| s.to_string())
    .collect()
}

/// Reads the columns of the CSV file.
/// If the line contains quotes (double quotes), it uses the [parse_line](`#parse_line`) function.
/// Otherwise, it uses the [split_line](`#split_line`) function.
/// 
/// It returns a Result because it can fail if the line,
/// contains an invalid escape sequence or an unclosed quote.
pub(crate) fn read_columns(line: &String, delimiter: &char) -> Result<Vec<String>, Error> {
  if line.contains('"') {
    parse_line(line, delimiter, None)
  } else {
    Ok(
      split_line(line, delimiter)
    )
  }
}

/// Reads the data of the CSV file.
/// It reads the lines of the file and uses the [parse_line](`#parse_line`) function if the line contains double quotes.
/// Otherwise, it uses the [split_line](`#split_line`) function.
/// 
/// It returns a Result because it can fail if the line,
/// contains an invalid escape sequence or an unclosed quote.
/// 
/// The "number_of_fields" parameter is used to pre-allocate the vectors.
/// This is useful when we know the number of fields in advance.
pub(crate) fn read_rows(lines: &mut std::io::Lines<BufReader<&File>>, delimiter: &char, number_of_fields: usize) -> Result<Vec<Vec<String>>, Error> {
  let mut data: Vec<Vec<String>> = Vec::new();

  for line in lines {
    let line = line?;
    let fields: Vec<String>;
    if line.contains('"') {
      fields = parse_line(&line, delimiter, Some(number_of_fields as u32))?;
    } else {
      fields = split_line(&line, delimiter);
    }
    data.push(fields);
  }

  Ok(data)
}

mod tests;