csv_lib/lib.rs
1//! # Csv_lib Crate
2//!
3//! A Rust library to read/write CSV files in the fastest way I know.
4//!
5//! For further information, and complete docs, you can check the repo [here](https://github.com/PTechSoftware/csv_lib)
6//!
7//! ## 3rd Party Crates Used:
8//!
9//! | Crate | Link |
10//! | :---- | :---- |
11//! | Memmap2 | [memmap2 crate](https://docs.rs/memmap2/latest/memmap2/) |
12//! | Memchr | [memchr crate](https://docs.rs/memchr/latest/memchr/) |
13//! | num_cpus | [num_cpus crate](https://docs.rs/memchr/latest/memchr/) |
14//!
15//! ## Features
16//! - Zero-copy parsing
17//! - Custom delimiters support
18//! - Escape string support
19//! - Direct mapping from memory
20//! - Multicore Process
21//! - Low Ram Usage, even on big files
22//!
23//!
24//! ## Performance
25//! This library is designed to process large CSV files.
26//! Successfully tested on a 1 billion lines CSV file.
27//! To test performance, run in `release` config, it improves a lot the performance
28//!
29//! ## Contact
30//! If you have any questions, contact me on [LinkedIn](https://www.linkedin.com/in/ignacio-p%C3%A9rez-panizza-322844165/)
31
32extern crate core;
33
34pub mod models;
35pub mod io;
36pub mod helpers;
37pub mod csv;
38pub mod extensions;
39pub mod features;
40pub mod decoders;
41pub mod encoders;
42pub mod parallel;
43pub mod macros;
44
45#[cfg(test)]
46mod test {
47 use crate::csv::csv_reader::CsvReaderWithMap;
48 use crate::decoders::decoders::Encoding;
49 use crate::models::csv_config::CsvConfig;
50 use crate::models::shared::Shared;
51 use crate::parallel::parallel_reader::parallel_processing_csv;
52 use crate::parallel::row_parallel::RowParallel;
53 use crate::{get_bool, get_i32, get_str};
54 use std::collections::HashSet;
55 use std::sync::{Arc, Mutex};
56
57 #[test]
58 #[allow(dead_code, unused_assignments,unused_variables)]
59 fn read_csv_one_core(){
60 //Create Config
61 let cfg = CsvConfig::new(
62 b',',
63 0u8,
64 b'\n',
65 Encoding::Windows1252,
66 false
67 );
68 //Open the file
69 let mut f = match CsvReaderWithMap::open("data.csv", &cfg) {
70 Ok(f) => f,
71 Err(e) => panic!("{}", e)
72 };
73 // We extract different' s country's of the dataset :
74 // For example:
75 //Create a Hash Acumulator
76 let mut cities :HashSet<String>= HashSet::with_capacity(195);
77
78
79 //Iter over rows [no more need to be mutable]
80 while let Some(row) = f.next_raw() {
81 //Extract Field index 6 starting on 0
82 let city = row.get_index(6 );
83 // Decode bytes as &str
84 let name = city.get_utf8_as_str();
85 let _ = get_bool!(row,1);
86 let num = city.get_i8();
87 let i = get_i32!(row,2);
88
89 let city = row.get_index(6 ).get_as_cow_decoded(Encoding::Windows1252);
90 //with macro
91 let city = get_str!(row,6, Encoding::Windows1252);
92 //Check and accumulate
93 if !cities.contains(name){
94 cities.insert(name.to_string());
95 }
96 }
97 assert_ne!(cities.len(), 0);
98 }
99 #[test]
100 #[allow(dead_code, unused_assignments,unused_variables)]
101 fn read_csv_multicore(){
102 //Create Config
103 let cfg = CsvConfig::new(
104 b',',
105 0u8,
106 b'\n',
107 Encoding::Windows1252,
108 false
109 );
110 //Open the file
111 let f = match CsvReaderWithMap::open("data.csv", &cfg) {
112 Ok(f) => f,
113 Err(e) => panic!("{}", e)
114 };
115
116 //Get Slice Reference
117 let data = f.get_slice();
118 //Create a shared counter
119 let shared = Shared::<i32>::default();
120 //Create de clousere executed on each thread (the ARC Mutex type must be the same as Shared)
121 let closure = |row: &mut RowParallel<'_>, id_thread:usize, target: Arc<Mutex<i32>>| {
122 //Get thread Id
123 let _ = id_thread;
124 //Access actual row
125 let _actual = row.get_row();
126 //Peek nex row
127 let next = row.peek_next();
128 //Do some stuff
129 // ...
130
131 let mut lock = target.lock().unwrap();
132 *lock += 1;
133 };
134 //Execute parallel process
135 parallel_processing_csv(
136 data,
137 b'\n',
138 b';',
139 b'"',
140 false,
141 closure,
142 shared.arc(),
143 );
144 println!("Iterated Lines: {:.2}", shared.lock())
145 }
146}