csv_slice/lib.rs
1// src/lib.rs
2// This library provides utilities for extracting specific rows and columns from CSV files.
3// It offers two main functions:
4// 1. extract_rows: Extracts a range of rows from a CSV file
5// 2. extract_columns: Extracts specific columns from a CSV file by column name
6
7#[cfg(test)]
8mod tests {
9 use super::*;
10 use tempfile::NamedTempFile;
11 use std::io::Write;
12
13 /// Test for the extract_rows function
14 /// Creates a temporary CSV file with sample data and verifies that
15 /// the correct rows are extracted based on the provided range.
16 #[test]
17 fn test_extract_rows() {
18 // Create sample CSV data with header and 3 data rows
19 let csv_data = "Name,Age\nAlice,30\nBob,25\nCharlie,40\n";
20
21 // Create a temporary file that will be automatically deleted when the test completes
22 let mut file = NamedTempFile::new().unwrap();
23
24 // Write the sample CSV data to the temporary file
25 write!(file, "{}", csv_data).unwrap();
26
27 // Get the path to the temporary file
28 let path = file.path();
29
30 // Extract rows 0 and 1 (Alice and Bob) from the CSV file
31 let rows = super::extract_rows(path, 0, 2).unwrap();
32
33 // Verify that exactly 2 rows were extracted
34 assert_eq!(rows.len(), 2);
35
36 // Verify that the first row contains "Alice" in the first column
37 assert_eq!(rows[0].get(0), Some("Alice"));
38
39 // Verify that the second row contains "Bob" in the first column
40 assert_eq!(rows[1].get(0), Some("Bob"));
41 // The temporary file is automatically deleted when it goes out of scope
42 }
43
44 /// Test for the extract_columns function
45 /// Creates a temporary CSV file and verifies that the correct column
46 /// data is extracted when a valid column name is provided.
47 #[test]
48 fn test_extract_columns() {
49 // Create sample CSV data with header and 3 data rows
50 let csv_data = "Name,Age\nAlice,30\nBob,25\nCharlie,40\n";
51
52 // Create a temporary file that will be automatically deleted
53 let mut file = NamedTempFile::new().unwrap();
54
55 // Write the sample CSV data to the temporary file
56 write!(file, "{}", csv_data).unwrap();
57
58 // Get the path to the temporary file
59 let path = file.path();
60
61 // Extract the "Name" column from the CSV file
62 let columns = super::extract_columns(path, &["Name"]).unwrap();
63
64 // Verify that 3 rows of data were extracted
65 assert_eq!(columns.len(), 3);
66
67 // Verify the values in the extracted column
68 assert_eq!(columns[0][0], "Alice");
69 assert_eq!(columns[1][0], "Bob");
70 assert_eq!(columns[2][0], "Charlie");
71 }
72
73 /// Test for error handling in extract_columns
74 /// Verifies that an error is returned when attempting to extract
75 /// a column that doesn't exist in the CSV file.
76 #[test]
77 fn test_extract_columns_not_found() {
78 // Create sample CSV data with header and 1 data row
79 let csv_data = "Name,Age\nAlice,30\n";
80
81 // Create a temporary file
82 let mut file = NamedTempFile::new().unwrap();
83
84 // Write the sample CSV data to the temporary file
85 write!(file, "{}", csv_data).unwrap();
86
87 // Get the path to the temporary file
88 let path = file.path();
89
90 // Attempt to extract a column that doesn't exist ("Email")
91 let result = super::extract_columns(path, &["Email"]);
92
93 // Verify that an error is returned
94 assert!(result.is_err());
95 }
96}
97// END TESTS
98// Import required dependencies
99use csv::StringRecord; // For handling CSV records
100use std::fs::File; // For file operations
101use std::io::BufReader; // For buffered reading from files
102mod error; // Import the error module
103pub use crate::error::CsvSliceError; // Re-export the CsvSliceError type
104
105/// Extracts a range of rows from a CSV file.
106///
107/// # Parameters
108/// * `path` - Path to the CSV file. Can be any type that can be converted to a Path.
109/// * `start` - The index of the first row to extract (0-based, excluding header).
110/// * `end` - The index after the last row to extract (exclusive).
111///
112/// # Returns
113/// * `Result<Vec<StringRecord>, CsvSliceError>` - A vector of StringRecords on success,
114/// or a CsvSliceError on failure.
115///
116/// # Example
117/// ```
118/// use csv_slice::extract_rows;
119/// let rows = extract_rows("data.csv", 0, 5).unwrap();
120/// // This extracts the first 5 rows from data.csv
121/// ```
122pub fn extract_rows<P: AsRef<std::path::Path>>(
123 path: P,
124 start: usize,
125 end: usize,
126) -> Result<Vec<StringRecord>, CsvSliceError> {
127 // Open the file at the specified path
128 let file = File::open(path)?;
129
130 // Create a CSV reader with buffered IO for better performance
131 let mut rdr = csv::Reader::from_reader(BufReader::new(file));
132
133 // Initialize an empty vector to store the results
134 let mut result = Vec::new();
135
136 // Iterate through all records in the CSV file
137 for (i, record) in rdr.records().enumerate() {
138 // Parse the record, propagating any errors
139 let record = record?;
140
141 // If the current index is within our desired range, add it to the results
142 if i >= start && i < end {
143 result.push(record);
144 }
145
146 // If we've reached the end of our desired range, stop processing
147 if i >= end {
148 break;
149 }
150 }
151
152 // Return the collected results
153 Ok(result)
154}
155
156/// Extracts specific columns from a CSV file by column name.
157///
158/// # Parameters
159/// * `path` - Path to the CSV file. Can be any type that can be converted to a Path.
160/// * `columns` - Array of column names to extract.
161///
162/// # Returns
163/// * `Result<Vec<Vec<String>>, CsvSliceError>` - A vector of vectors containing the
164/// extracted column data on success, or a CsvSliceError on failure.
165///
166/// # Example
167/// ```
168/// use csv_slice::extract_columns;
169/// let data = extract_columns("data.csv", &["Name", "Email"]).unwrap();
170/// // This extracts the Name and Email columns from data.csv
171/// ```
172pub fn extract_columns<P: AsRef<std::path::Path>>(
173 path: P,
174 columns: &[&str],
175) -> Result<Vec<Vec<String>>, CsvSliceError> {
176 // Open the file at the specified path
177 let file = File::open(path)?;
178
179 // Create a CSV reader with buffered IO for better performance
180 let mut rdr = csv::Reader::from_reader(BufReader::new(file));
181
182 // Get the headers from the CSV file and clone them for later use
183 let headers = rdr.headers()?.clone();
184
185 // Find the indices of the requested columns in the header row
186 let indices: Vec<_> = columns
187 .iter()
188 .map(|&col| headers.iter().position(|h| h == col)
189 // If a column is not found, return a ColumnNotFound error
190 .ok_or_else(|| CsvSliceError::ColumnNotFound(col.to_string())))
191 .collect::<Result<_, _>>()?;
192
193 // Initialize an empty vector to store the results
194 let mut result = Vec::new();
195
196 // Process each record in the CSV file
197 for record in rdr.records() {
198 // Parse the record, propagating any errors
199 let record = record?;
200
201 // Extract the values from the requested columns for this record
202 let row: Vec<String> = indices.iter()
203 .map(|&i| record.get(i).unwrap_or("").to_string())
204 .collect();
205
206 // Add the extracted values to the result
207 result.push(row);
208 }
209
210 // Return the collected results
211 Ok(result)
212}