1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
/*! `csvsc` is a library for building csv file processors. Imagine you have N csv files with the same structure and you want to use them to make other M csv files whose information depends in some way on the original files. This is what csvcv is for. With this tool you can build a processing chain that will modify each of the input files and generate new output files with the modifications. # Preparation Mode Start a new binary project with cargo: ```text $ cargo new --bin miprocesadordecsv ``` Add `csvsc` and `encoding` as a dependency in `Cargo.toml` ```toml [dependencies] csvsc = { git = "https://github.com/categulario/csvsc-rs.git" } encoding = "*" ``` Now build your processing chain. In this example, a processing chain is built with the following characteristics: 1. It takes files `1.csv` and `2.csv` as input with `UTF-8` encoding, 2. adds virtual column `_target` which will define the output file and uses the `a` column of both input files in its definition, 3. Eliminates column `b`. ```rust use csvsc::ColSpec; use csvsc::InputStream; use csvsc::ReaderSource; use csvsc::RowStream; use csvsc::FlushTarget; use encoding::all::UTF_8; fn main() { let filenames = vec!["test/assets/1.csv", "test/assets/2.csv"]; let mut chain = InputStream::from_readers( filenames .iter() .map(|f| ReaderSource::from_path(f).unwrap()), UTF_8, ) .add(ColSpec::Mix { colname: "_target".to_string(), coldef: "output/{a}.csv".to_string(), }).unwrap() .del(vec!["b"]) .flush(FlushTarget::Column("_target".to_string())).unwrap() .into_iter(); while let Some(item) = chain.next() { if let Err(e) = item { eprintln!("failed {:?}", e); } } } ``` Executing this project will lead to an `output/` folder being created and inside there will be as many files as there are different values in column `a`. Another example but for one file whose name is read from stdin: ```rust use std::env; use csvsc::ColSpec; use csvsc::InputStream; use csvsc::ReaderSource; use csvsc::RowStream; use csvsc::FlushTarget; use encoding::all::UTF_8; fn main() { let filename = env::args().next().unwrap(); let reader_source = ReaderSource::from_path(filename).unwrap(); let mut chain = InputStream::new( reader_source, UTF_8, ) .add(ColSpec::Mix { colname: "_target".to_string(), coldef: "output/{a}.csv".to_string(), }).unwrap() .del(vec!["b"]) .flush(FlushTarget::Column("_target".to_string())).unwrap() .into_iter(); while let Some(item) = chain.next() { if let Err(e) = item { eprintln!("failed {:?}", e); } } } ``` To know which methods are available in a processing chain, go to the [RowStream](trait.RowStream.html) documentation. Columns with names that start with an underscore will not be written to the output files. */ mod add; mod add_with; mod group; mod adjacent_group; mod del; mod error; mod flush; mod headers; mod input; mod inspect; mod mock; mod reduce; mod rename; mod row_stream; mod map; pub mod col; pub use add::{Add, ColSpec}; pub use add_with::AddWith; pub use adjacent_group::AdjacentGroup; pub use group::Group; pub use del::Del; pub use error::{Error, RowResult}; pub use flush::{Flush, FlushTarget}; pub use headers::Headers; pub use input::{InputStream, ReaderSource}; pub use inspect::Inspect; pub use mock::MockStream; pub use reduce::Reduce; pub use rename::Rename; pub use row_stream::RowStream; pub use reduce::aggregate; pub use map::{MapRow, MapCol}; /// Type alias of csv::StringRecord. Represents a row of data. pub type Row = csv::StringRecord; /// A column with this name will be added to each record. The column will /// have as a value the absolute path to the input file and serves to extract /// information that may be contained, for example, in the file name. /// It is useful in combination with the processor [Add](struct.Add.html). pub const SOURCE_FIELD: &'static str = "_source"; /// Things that could go wrong while building a group or adjacent group #[derive(Debug)] pub enum GroupBuildError { GroupingKeyError(String), }