csv_pipeline/lib.rs
1//! CSV processing library inspired by [csvsc](https://crates.io/crates/csvsc)
2//!
3//! ## Get started
4//!
5//! The first thing you need is to create a [`Pipeline`]. This can be done by calling [`Pipeline::from_reader`] with a [`csv::Reader`], or [`Pipeline::from_path`] with a path.
6//!
7//! Once you have a pipeline, there are various methods available which let you add your desired processing steps. Check the [`Pipeline`] for more details and examples.
8//!
9//! In the end, you may want to write the result somewhere. To do that, you can [flush](Pipeline::flush) into a [`Target`].
10//!
11//! Finally, you probably want to run the pipeline. There are a few options:
12//! - [`Pipeline::build`] gives you a [`PipelineIter`] which you can iterate through
13//! - [`Pipeline::run`] runs through the pipeline until it finds an error, or the end
14//! - [`Pipeline::collect_into_string`] runs the pipeline and returns the csv as a `Result<String, Error>`. Can be a convenient alternative to flushing to a [`StringTarget`](target::StringTarget).
15//!
16//! ## Basic Example
17//!
18//! ```
19//! use csv_pipeline::{Pipeline, Transformer};
20//!
21//! // First create a pipeline from a CSV file path
22//! let csv = Pipeline::from_path("test/Countries.csv")
23//! .unwrap()
24//! // Add a column with values computed from a closure
25//! .add_col("Language", |headers, row| {
26//! match headers.get_field(row, "Country") {
27//! Some("Norway") => Ok("Norwegian".into()),
28//! _ => Ok("Unknown".into()),
29//! }
30//! })
31//! // Make the "Country" column uppercase
32//! .rename_col("Country", "COUNTRY")
33//! .map_col("COUNTRY", |id_str| Ok(id_str.to_uppercase()))
34//! // Collect the csv into a string
35//! .collect_into_string()
36//! .unwrap();
37//!
38//! assert_eq!(
39//! csv,
40//! "ID,COUNTRY,Language\n\
41//! 1,NORWAY,Norwegian\n\
42//! 2,TUVALU,Unknown\n"
43//! );
44//! ```
45//!
46//! ## Transform Example
47//! ```
48//! use csv_pipeline::{Pipeline, Transformer};
49//!
50//! let source = "\
51//! Person,Score\n\
52//! A,1\n\
53//! A,8\n\
54//! B,3\n\
55//! B,4\n";
56//! let reader = csv::Reader::from_reader(source.as_bytes());
57//! let csv = Pipeline::from_reader(reader)
58//! .unwrap()
59//! .map(|_headers, row| Ok(row))
60//! // Transform into a new csv
61//! .transform_into(|| {
62//! vec![
63//! // Keep every Person
64//! Transformer::new("Person").keep_unique(),
65//! // Sum the scores into a "Total score" column
66//! Transformer::new("Total score").from_col("Score").sum(0),
67//! ]
68//! })
69//! .collect_into_string()
70//! .unwrap();
71//!
72//! assert_eq!(
73//! csv,
74//! "Person,Total score\n\
75//! A,9\n\
76//! B,7\n"
77//! );
78//! ```
79//!
80
81use std::path::PathBuf;
82
83mod headers;
84mod pipeline;
85mod pipeline_iterators;
86mod transform;
87
88pub use headers::Headers;
89pub use pipeline::{Pipeline, PipelineIter};
90pub use transform::{Transform, Transformer};
91
92pub mod target;
93/// Helper for building a target to flush data into
94pub struct Target {}
95impl Target {
96 pub fn path<P: Into<PathBuf>>(path: P) -> target::PathTarget {
97 target::PathTarget::new(path)
98 }
99 pub fn stdout() -> target::StdoutTarget {
100 target::StdoutTarget::new()
101 }
102 pub fn stderr() -> target::StderrTarget {
103 target::StderrTarget::new()
104 }
105 pub fn string<'a>(s: &'a mut String) -> target::StringTarget {
106 target::StringTarget::new(s)
107 }
108}
109
110/// Alias of [`csv::StringRecord`]
111pub type Row = csv::StringRecord;
112/// Alias of `Result<Row, Error>`
113pub type RowResult = Result<Row, PlError>;
114
115/// Error originating from the specified pipeline source index
116#[derive(Debug)]
117pub struct PlError {
118 pub error: Error,
119 pub source: usize,
120}
121
122#[derive(Debug)]
123pub enum Error {
124 /// CSV and IO errors are in here.
125 Csv(csv::Error),
126 /// The column of this name is missing.
127 MissingColumn(String),
128 /// This column name appears twice.
129 DuplicateColumn(String),
130 /// This field has an invalid format.
131 InvalidField(String),
132 /// Two pipeline sources don't have the same headers.
133 MismatchedHeaders(Row, Row),
134}
135impl Error {
136 pub fn at_source(self, source: usize) -> PlError {
137 PlError {
138 error: self,
139 source,
140 }
141 }
142}