Skip to main content

rustsv/
lib.rs

1#![warn(missing_docs)]
2#![warn(missing_doc_code_examples)]
3
4//! ## What is RustSV?
5//! <p>
6//! RustSV (referred to as RSV) is a CSV parser, built for the modern age.
7//!
8//! It focuses on usability, and has the advantage of not requiring the use of [Serde](https://github.com/fatalcenturion/RSV#reference-2-serde-free-serialization) to parse your files into a programmatically readable structure.
9//!
10//! See the source code [here](https://github.com/fatalcenturion/RSV)
11//!
12//! Found a bug? [report it!](https://github.com/fatalcenturion/RSV/issues/new)
13//!
14//! ## Basic usage:
15//!
16//! ### Parsing a string:
17//! ```
18//! use rustsv::prelude::*;
19//! // Create our input data
20//! let input: &str = "surname,initial,address,phone number\n\
21//! Smith,A,\"14 Made up Drive, Made up City, Ohio\",216-235-3744\n\
22//! Doe,J,\"15 Fake Street, Phonyville, Texas\",210-214-5737";
23//!
24//! // Parse the `input` into `Content`
25//! // The parameters are as follows:
26//! // 1. Input: String   - The text you wish to parse
27//! // 2. Delimiter: Char - The character to delimit by
28//! // 3. Headers: Bool   - If the parser should use the first row in the file as headers
29//! let content: Content = parse(input, ',', true);
30//! ```
31//! The above method will provide an instance of [`Content`](structs/struct.Content.html)
32//!
33//! ### Parsing a file:
34//! > Note: this code is correct at the time of documentation, it has the `no_run` tag to ensure the doc tests do not fail due to an unavoidable IO Error
35//! ```no_run
36//! use rustsv::prelude::*;
37//!
38//! // Parse the `path`'s content into `Content`
39//! // The parameters are as follows:
40//! // 1. Path: String   - The text you wish to parse
41//! // 2. Delimiter: Char - The character to delimit by
42//! // 3. Headers: Bool   - If the parser should use the first row in the file as headers
43//! let content: Content = read("path/to/file.csv", ',', true)?;
44//! ```
45//! The above method will provide a result containing an error, or [`Content`](structs/struct.Content.html)
46//!
47//! ### Parsing a remote file, from a URL:
48//! #### This method requires the `http` feature to be anabled.
49//!  > Note: this code is correct at the time of documentation, it has the `no_run` tag to ensure the doc tests do not fail due to an unavoidable IO Error
50//! ```no_run
51//! use rustsv::prelude::*;
52//! // Parse the `URL`'s content into `Content`
53//! // The parameters are as follows:
54//! // 1. URL: String   - The text you wish to parse
55//! // 2. Delimiter: Char - The character to delimit by
56//! // 3. Headers: Bool   - If the parser should use the first row in the file as headers
57//! let content: Content = fetch("https://domain.tld/path/to/file", ',', true)?;
58//! ```
59//! The above method will provide a result containing an error, or [`Content`](structs/struct.Content.html)
60use std::error::Error;
61
62
63mod tokenizer;
64
65mod parser;
66
67pub mod structs;
68
69pub mod prelude;
70
71/// Parses the provided String into an instance of [`Content`](structs/struct.Content.html)
72///
73/// The method will deconstruct the provided data, turning it into a special, serialization free structure [`Content`](structs/struct.Content.html)
74///
75/// `content` the CSV data to parse
76/// `delimiter` The delimiter used in the data, for example a pipe (`|`) or a tab (`   `)
77/// `has_headers` If the data's first line contains the titles of each column or not
78///
79/// # Examples
80///
81/// Basic usage:
82/// ```
83/// use rustsv::prelude::*;
84/// // Create our input data
85/// let input: &str = "surname,initial,address,phone number\n\
86/// Smith,A,\"14 Made up Drive, Made up City, Ohio\",216-235-3744\n\
87/// Doe,J,\"15 Fake Street, Phonyville, Texas\",210-214-5737";
88///
89/// // Parse the `input` into `Content`
90/// let content: Content = parse(input, ',', true);
91///
92/// assert_eq!(content[0]["surname"], String::from("Smith"))
93/// ```
94pub fn parse<A>(content: A, delimiter: char, has_headers: bool) -> structs::Content where A: Into<String> {
95    let tree = tokenizer::tokenize(delimiter, content.into());
96    let body = parser::parse(tree, has_headers);
97    body
98}
99
100/// Reads a file and parses it into an instance of [`Content`](structs/struct.Content.html)
101///
102/// The method takes a path to a file, and then deconstructs the data, turning it into a special, serialization free structure [`Content`](structs/struct.Content.html)
103///
104/// `path` the path to the file
105/// `delimiter` The delimiter used in the data, for example a pipe (`|`) or a tab (`   `)
106/// `has_headers` If the data's first line contains the titles of each column or not
107///
108/// # Examples
109///  > Note: this code is correct at the time of documentation, it has the `no_run` tag to ensure the doc tests do not fail due to an unavoidable IO Error
110/// Basic usage:
111/// ```no_run
112/// use rustsv::prelude::*;
113/// // Parse the `input` into `Content`
114/// let content: Content = read("./path/to/file.csv", ',', true)?;
115/// ```
116pub fn read<A>(path: A, delimiter: char, has_headers: bool) -> Result<structs::Content, Box<dyn Error>> where A: Into<String> {
117    let file = std::fs::read_to_string(path.into());
118    return if file.is_ok() {
119        let c = file.unwrap();
120        Ok(parse(c, delimiter, has_headers))
121    } else {
122        Err(Box::new(file.unwrap_err()))
123    };
124}
125
126
127#[cfg(feature = "http")]
128/// Fetches a URL and parses it into an instance of [`Content`](structs/struct.Content.html) (Requires the `http` feature)
129///
130/// The method takes a URL, fetches it, and then deconstructs the data, turning it into a special, serialization free structure [`Content`](structs/struct.Content.html)
131///
132/// `url` the URL to fetch
133/// `delimiter` The delimiter used in the data, for example a pipe (`|`) or a tab (`   `)
134/// `has_headers` If the data's first line contains the titles of each column or not
135///
136/// # Examples
137///  > Note: this code is correct at the time of documentation, it has the `no_run` tag to ensure the doc tests do not fail due to an unavoidable IO Error
138/// Basic usage:
139/// ```no_run
140/// use rustsv::prelude::*;
141/// // Parse the `input` into `Content`
142/// let content: Content = fetch("https://domain.tld/path/to/file", ',', true)?;
143/// ```
144pub fn fetch<A>(url: A, delimiter: char, has_headers: bool) -> Result<structs::Content, Box<dyn Error>> where A: Into<String> {
145    use reqwest;
146    let p = url.into();
147    let response = reqwest::blocking::get(&p);
148    return if response.is_ok() {
149        let text = response.unwrap().text().unwrap();
150        Ok(parse(text, delimiter, has_headers))
151    } else {
152        Err(Box::new(response.unwrap_err()))
153    };
154}
155
156pub use crate::structs::{Entry, Content};
157
158#[cfg(test)]
159mod tests {
160    use crate::prelude::*;
161
162    #[test]
163    // CSV sample file is provided free of charge by EForExcel (http://eforexcel.com/wp/downloads-18-sample-csv-files-data-sets-for-testing-sales/)
164    fn test_parsing() {
165        let csv = crate::parse("Region,Country,Item Type,Sales Channel,Order Priority,Order Date,Order ID,Ship Date,Units Sold,Unit Price,Unit Cost,Total Revenue,Total Cost,Total Profit\nAustralia and Oceania,Tuvalu,Baby Food,Offline,H,5/28/2010,669165933,6/27/2010,9925,255.28,159.42,2533654.00,1582243.50,951410.50\nCentral America and the Caribbean,Grenada,Cereal,Online,C,8/22/2012,963881480,9/15/2012,2804,205.70,117.11,576782.80,328376.44,248406.36", ',', true);
166        assert_eq!(csv[0]["Region"], String::from("Australia and Oceania"));
167    }
168
169    #[test]
170    // CSV sample file is provided free of charge by EForExcel (http://eforexcel.com/wp/downloads-18-sample-csv-files-data-sets-for-testing-sales/)
171    fn test_parsing_iter() {
172        let csv = crate::parse("Region,Country,Item Type,Sales Channel,Order Priority,Order Date,Order ID,Ship Date,Units Sold,Unit Price,Unit Cost,Total Revenue,Total Cost,Total Profit\nAustralia and Oceania,Tuvalu,Baby Food,Offline,H,5/28/2010,669165933,6/27/2010,9925,255.28,159.42,2533654.00,1582243.50,951410.50\nCentral America and the Caribbean,Grenada,Cereal,Online,C,8/22/2012,963881480,9/15/2012,2804,205.70,117.11,576782.80,328376.44,248406.36", ',', true);
173        assert_eq!(csv[0]["Region"], String::from("Australia and Oceania"));
174        for entry in csv {
175            for reference in entry {
176                println!("{}: {}", reference.0, reference.1);
177            }
178        }
179    }
180
181    #[test]
182    fn test_fetch_parse() {
183        let url = "https://www.stats.govt.nz/assets/Uploads/Business-price-indexes/Business-price-indexes-June-2020-quarter/Download-data/business-price-indexes-june-2020-quarter-corrections-to-previously-published-statistics.csv";
184        let csv = crate::fetch(url, ',', true).unwrap();
185        assert_eq!(csv[0]["Revised"], String::from("1434"))
186    }
187
188    #[test]
189    fn test_random_func() {
190        let input: String = "first,middle,last\ntom,bob,scott".to_string();
191        let content: Content = parse(input, ',', true);
192        println!("Hello, world! content = {:?} content2 = {:?}", content.get(2).is_some(), content[0]);
193        assert_eq!(1, 1);
194    }
195}