csv_pipeline/
lib.rs

1//! CSV processing library inspired by [csvsc](https://crates.io/crates/csvsc)
2//!
3//! ## Get started
4//!
5//! The first thing you need is to create a [`Pipeline`]. This can be done by calling [`Pipeline::from_reader`] with a [`csv::Reader`], or [`Pipeline::from_path`] with a path.
6//!
7//! Once you have a pipeline, there are various methods available which let you add your desired processing steps. Check the [`Pipeline`] for more details and examples.
8//!
9//! In the end, you may want to write the result somewhere. To do that, you can [flush](Pipeline::flush) into a [`Target`].
10//!
11//! Finally, you probably want to run the pipeline. There are a few options:
12//! - [`Pipeline::build`] gives you a [`PipelineIter`] which you can iterate through
13//! - [`Pipeline::run`] runs through the pipeline until it finds an error, or the end
14//! - [`Pipeline::collect_into_string`] runs the pipeline and returns the csv as a `Result<String, Error>`. Can be a convenient alternative to flushing to a [`StringTarget`](target::StringTarget).
15//!
16//! ## Basic Example
17//!
18//! ```
19//! use csv_pipeline::{Pipeline, Transformer};
20//!
21//! // First create a pipeline from a CSV file path
22//! let csv = Pipeline::from_path("test/Countries.csv")
23//!   .unwrap()
24//!   // Add a column with values computed from a closure
25//!   .add_col("Language", |headers, row| {
26//!     match headers.get_field(row, "Country") {
27//!       Some("Norway") => Ok("Norwegian".into()),
28//!       _ => Ok("Unknown".into()),
29//!     }
30//!   })
31//!   // Make the "Country" column uppercase
32//!   .rename_col("Country", "COUNTRY")
33//!   .map_col("COUNTRY", |id_str| Ok(id_str.to_uppercase()))
34//!   // Collect the csv into a string
35//!   .collect_into_string()
36//!   .unwrap();
37//!
38//! assert_eq!(
39//!   csv,
40//!   "ID,COUNTRY,Language\n\
41//!     1,NORWAY,Norwegian\n\
42//!     2,TUVALU,Unknown\n"
43//! );
44//! ```
45//!
46//! ## Transform Example
47//! ```
48//! use csv_pipeline::{Pipeline, Transformer};
49//!
50//! let source = "\
51//!   Person,Score\n\
52//!   A,1\n\
53//!   A,8\n\
54//!   B,3\n\
55//!   B,4\n";
56//! let reader = csv::Reader::from_reader(source.as_bytes());
57//! let csv = Pipeline::from_reader(reader)
58//!   .unwrap()
59//!   .map(|_headers, row| Ok(row))
60//!   // Transform into a new csv
61//!   .transform_into(|| {
62//!     vec![
63//!       // Keep every Person
64//!       Transformer::new("Person").keep_unique(),
65//!       // Sum the scores into a "Total score" column
66//!       Transformer::new("Total score").from_col("Score").sum(0),
67//!     ]
68//!   })
69//!   .collect_into_string()
70//!   .unwrap();
71//!
72//! assert_eq!(
73//!   csv,
74//!   "Person,Total score\n\
75//!     A,9\n\
76//!     B,7\n"
77//! );
78//! ```
79//!
80
81use std::path::PathBuf;
82
83mod headers;
84mod pipeline;
85mod pipeline_iterators;
86mod transform;
87
88pub use headers::Headers;
89pub use pipeline::{Pipeline, PipelineIter};
90pub use transform::{Transform, Transformer};
91
92pub mod target;
93/// Helper for building a target to flush data into
94pub struct Target {}
95impl Target {
96	pub fn path<P: Into<PathBuf>>(path: P) -> target::PathTarget {
97		target::PathTarget::new(path)
98	}
99	pub fn stdout() -> target::StdoutTarget {
100		target::StdoutTarget::new()
101	}
102	pub fn stderr() -> target::StderrTarget {
103		target::StderrTarget::new()
104	}
105	pub fn string<'a>(s: &'a mut String) -> target::StringTarget {
106		target::StringTarget::new(s)
107	}
108}
109
110/// Alias of [`csv::StringRecord`]
111pub type Row = csv::StringRecord;
112/// Alias of `Result<Row, Error>`
113pub type RowResult = Result<Row, PlError>;
114
115/// Error originating from the specified pipeline source index
116#[derive(Debug)]
117pub struct PlError {
118	pub error: Error,
119	pub source: usize,
120}
121
122#[derive(Debug)]
123pub enum Error {
124	/// CSV and IO errors are in here.
125	Csv(csv::Error),
126	/// The column of this name is missing.
127	MissingColumn(String),
128	/// This column name appears twice.
129	DuplicateColumn(String),
130	/// This field has an invalid format.
131	InvalidField(String),
132	/// Two pipeline sources don't have the same headers.
133	MismatchedHeaders(Row, Row),
134}
135impl Error {
136	pub fn at_source(self, source: usize) -> PlError {
137		PlError {
138			error: self,
139			source,
140		}
141	}
142}