csv_lib/
lib.rs

1//! # Csv_lib Crate
2//!
3//! A Rust library to read/write CSV files in the fastest way I know.
4//! 
5//! For further information, and complete docs, you can check the repo [here](https://github.com/PTechSoftware/csv_lib)
6//!
7//! ## 3rd Party Crates Used:
8//!
9//! | Crate | Link |
10//! | :---- | :---- |
11//! | Memmap2 | [memmap2 crate](https://docs.rs/memmap2/latest/memmap2/) |
12//! | Memchr | [memchr crate](https://docs.rs/memchr/latest/memchr/) |
13//! | num_cpus | [num_cpus crate](https://docs.rs/memchr/latest/memchr/) |
14//!
15//! ## Features
16//! - Zero-copy parsing
17//! - Custom delimiters support
18//! - Escape string support
19//! - Direct mapping from memory
20//! - Multicore Process
21//! - Low Ram Usage, even on big files
22//!
23//!
24//! ## Performance
25//! This library is designed to process large CSV files.  
26//! Successfully tested on a 1 billion lines CSV file.
27//! To test performance, run in `release` config, it improves a lot the performance
28//!
29//! ## Contact
30//! If you have any questions, contact me on [LinkedIn](https://www.linkedin.com/in/ignacio-p%C3%A9rez-panizza-322844165/)
31
32
33extern crate core;
34
35pub mod models;
36pub mod io;
37pub mod helpers;
38pub mod csv;
39pub mod extensions;
40pub mod features;
41pub mod decoders;
42pub mod encoders;
43pub mod parallel;
44pub mod macros;
45
46#[cfg(test)]
47mod test {
48    use std::collections::HashSet;
49    use std::sync::{Arc, Mutex};
50    use crate::csv::csv_reader::CsvReaderWithMap;
51    use crate::decoders::decoders::Encoding;
52    use crate::{get_bool, get_f64, get_i32, get_str};
53    use crate::models::csv_config::CsvConfig;
54    use crate::models::row::Row;
55    use crate::models::shared::Shared;
56    use crate::parallel::parallel_reader::parallel_processing_csv;
57    use crate::parallel::row_parallel::RowParallel;
58
59    #[test]
60    fn read_csv_one_core(){
61        //Create Config
62        let cfg = CsvConfig::new(
63            b',',
64            0u8,
65            b'\n',
66            Encoding::Windows1252,
67            false
68        );
69        //Open the file
70        let mut f = match CsvReaderWithMap::open("data.csv", &cfg) {
71            Ok(f) => f,
72            Err(e) => panic!("{}", e)
73        };
74        // We extract different' s country's of the dataset :
75        // For example:
76        //Create a Hash Acumulator
77        let mut cities :HashSet<String>= HashSet::with_capacity(195);
78
79
80        //Iter over rows [no more need to be mutable]
81        while let Some(row) = f.next_raw() {
82            //Extract Field index 6 starting on 0
83            let city = row.get_index(6 );
84            // Decode bytes as &str
85            let name = city.get_utf8_as_str();
86            let _ = get_bool!(row,1);
87            let num = city.get_i8();
88            let i = get_i32!(row,2);
89
90            let city = row.get_index(6 ).get_as_cow_decoded(Encoding::Windows1252);
91            //with macro 
92            let city = get_str!(row,6, Encoding::Windows1252);
93            //Check and accumulate
94            if !cities.contains(name){
95                cities.insert(name.to_string());
96            }
97        }
98        assert_ne!(cities.len(), 0);
99    }
100    #[test]
101    fn read_csv_multicore(){
102        //Create Config
103        let cfg = CsvConfig::new(
104            b',',
105            0u8,
106            b'\n',
107            Encoding::Windows1252,
108            false
109        );
110        //Open the file
111        let f = match CsvReaderWithMap::open("data.csv", &cfg) {
112            Ok(f) => f,
113            Err(e) => panic!("{}", e)
114        };
115
116        //Get Slice Reference
117        let data = f.get_slice();
118        //Create a shared counter
119        let shared = Shared::<i32>::default();
120        //Create de clousere executed on each thread (the ARC Mutex type must be the same as Shared)
121        let closure = |row: &mut RowParallel<'_>, id_thread:usize, target: Arc<Mutex<i32>>| {
122            //Get thread Id
123            let _ = id_thread;
124            //Access actual row
125            let _actual = row.get_row();
126            //Peek nex row
127            let next = row.peek_next();
128            //Do some stuff
129            // ...
130            
131            let mut lock = target.lock().unwrap();
132            *lock += 1;
133        };
134        //Execute parallel process
135        parallel_processing_csv(
136            data,
137            b'\n',
138            b';',
139            b'"',
140            false,
141            closure,
142            shared.arc(),
143        );
144        println!("Iterated Lines: {:.2}", shared.lock())
145    }
146}