Skip to main content

csv_slice/
lib.rs

1// src/lib.rs
2// This library provides utilities for extracting specific rows and columns from CSV files.
3// It offers two main functions:
4// 1. extract_rows: Extracts a range of rows from a CSV file
5// 2. extract_columns: Extracts specific columns from a CSV file by column name
6
7#[cfg(test)]
8mod tests {
9    use super::*;
10    use tempfile::NamedTempFile;
11    use std::io::Write;
12
13    /// Test for the extract_rows function
14    /// Creates a temporary CSV file with sample data and verifies that
15    /// the correct rows are extracted based on the provided range.
16    #[test]
17    fn test_extract_rows() {
18        // Create sample CSV data with header and 3 data rows
19        let csv_data = "Name,Age\nAlice,30\nBob,25\nCharlie,40\n";
20        
21        // Create a temporary file that will be automatically deleted when the test completes
22        let mut file = NamedTempFile::new().unwrap();
23        
24        // Write the sample CSV data to the temporary file
25        write!(file, "{}", csv_data).unwrap();
26        
27        // Get the path to the temporary file
28        let path = file.path();
29        
30        // Extract rows 0 and 1 (Alice and Bob) from the CSV file
31        let rows = super::extract_rows(path, 0, 2).unwrap();
32        
33        // Verify that exactly 2 rows were extracted
34        assert_eq!(rows.len(), 2);
35        
36        // Verify that the first row contains "Alice" in the first column
37        assert_eq!(rows[0].get(0), Some("Alice"));
38        
39        // Verify that the second row contains "Bob" in the first column
40        assert_eq!(rows[1].get(0), Some("Bob"));
41        // The temporary file is automatically deleted when it goes out of scope
42    }
43
44    /// Test for the extract_columns function
45    /// Creates a temporary CSV file and verifies that the correct column
46    /// data is extracted when a valid column name is provided.
47    #[test]
48    fn test_extract_columns() {
49        // Create sample CSV data with header and 3 data rows
50        let csv_data = "Name,Age\nAlice,30\nBob,25\nCharlie,40\n";
51        
52        // Create a temporary file that will be automatically deleted
53        let mut file = NamedTempFile::new().unwrap();
54        
55        // Write the sample CSV data to the temporary file
56        write!(file, "{}", csv_data).unwrap();
57        
58        // Get the path to the temporary file
59        let path = file.path();
60        
61        // Extract the "Name" column from the CSV file
62        let columns = super::extract_columns(path, &["Name"]).unwrap();
63        
64        // Verify that 3 rows of data were extracted
65        assert_eq!(columns.len(), 3);
66        
67        // Verify the values in the extracted column
68        assert_eq!(columns[0][0], "Alice");
69        assert_eq!(columns[1][0], "Bob");
70        assert_eq!(columns[2][0], "Charlie");
71    }
72
73    /// Test for error handling in extract_columns
74    /// Verifies that an error is returned when attempting to extract
75    /// a column that doesn't exist in the CSV file.
76    #[test]
77    fn test_extract_columns_not_found() {
78        // Create sample CSV data with header and 1 data row
79        let csv_data = "Name,Age\nAlice,30\n";
80        
81        // Create a temporary file
82        let mut file = NamedTempFile::new().unwrap();
83        
84        // Write the sample CSV data to the temporary file
85        write!(file, "{}", csv_data).unwrap();
86        
87        // Get the path to the temporary file
88        let path = file.path();
89        
90        // Attempt to extract a column that doesn't exist ("Email")
91        let result = super::extract_columns(path, &["Email"]);
92        
93        // Verify that an error is returned
94        assert!(result.is_err());
95    }
96}
97// END TESTS
98// Import required dependencies
99use csv::StringRecord;  // For handling CSV records
100use std::fs::File;      // For file operations
101use std::io::BufReader; // For buffered reading from files
102mod error;              // Import the error module
103pub use crate::error::CsvSliceError; // Re-export the CsvSliceError type
104
105/// Extracts a range of rows from a CSV file.
106///
107/// # Parameters
108/// * `path` - Path to the CSV file. Can be any type that can be converted to a Path.
109/// * `start` - The index of the first row to extract (0-based, excluding header).
110/// * `end` - The index after the last row to extract (exclusive).
111///
112/// # Returns
113/// * `Result<Vec<StringRecord>, CsvSliceError>` - A vector of StringRecords on success,
114///   or a CsvSliceError on failure.
115///
116/// # Example
117/// ```
118/// use csv_slice::extract_rows;
119/// let rows = extract_rows("data.csv", 0, 5).unwrap();
120/// // This extracts the first 5 rows from data.csv
121/// ```
122pub fn extract_rows<P: AsRef<std::path::Path>>(
123    path: P,
124    start: usize,
125    end: usize,
126) -> Result<Vec<StringRecord>, CsvSliceError> {
127    // Open the file at the specified path
128    let file = File::open(path)?;
129    
130    // Create a CSV reader with buffered IO for better performance
131    let mut rdr = csv::Reader::from_reader(BufReader::new(file));
132    
133    // Initialize an empty vector to store the results
134    let mut result = Vec::new();
135
136    // Iterate through all records in the CSV file
137    for (i, record) in rdr.records().enumerate() {
138        // Parse the record, propagating any errors
139        let record = record?;
140        
141        // If the current index is within our desired range, add it to the results
142        if i >= start && i < end {
143            result.push(record);
144        }
145        
146        // If we've reached the end of our desired range, stop processing
147        if i >= end {
148            break;
149        }
150    }
151    
152    // Return the collected results
153    Ok(result)
154}
155
156/// Extracts specific columns from a CSV file by column name.
157///
158/// # Parameters
159/// * `path` - Path to the CSV file. Can be any type that can be converted to a Path.
160/// * `columns` - Array of column names to extract.
161///
162/// # Returns
163/// * `Result<Vec<Vec<String>>, CsvSliceError>` - A vector of vectors containing the
164///   extracted column data on success, or a CsvSliceError on failure.
165///
166/// # Example
167/// ```
168/// use csv_slice::extract_columns;
169/// let data = extract_columns("data.csv", &["Name", "Email"]).unwrap();
170/// // This extracts the Name and Email columns from data.csv
171/// ```
172pub fn extract_columns<P: AsRef<std::path::Path>>(
173    path: P,
174    columns: &[&str],
175) -> Result<Vec<Vec<String>>, CsvSliceError> {
176    // Open the file at the specified path
177    let file = File::open(path)?;
178    
179    // Create a CSV reader with buffered IO for better performance
180    let mut rdr = csv::Reader::from_reader(BufReader::new(file));
181    
182    // Get the headers from the CSV file and clone them for later use
183    let headers = rdr.headers()?.clone();
184
185    // Find the indices of the requested columns in the header row
186    let indices: Vec<_> = columns
187        .iter()
188        .map(|&col| headers.iter().position(|h| h == col)
189            // If a column is not found, return a ColumnNotFound error
190            .ok_or_else(|| CsvSliceError::ColumnNotFound(col.to_string())))
191        .collect::<Result<_, _>>()?;
192
193    // Initialize an empty vector to store the results
194    let mut result = Vec::new();
195    
196    // Process each record in the CSV file
197    for record in rdr.records() {
198        // Parse the record, propagating any errors
199        let record = record?;
200        
201        // Extract the values from the requested columns for this record
202        let row: Vec<String> = indices.iter()
203            .map(|&i| record.get(i).unwrap_or("").to_string())
204            .collect();
205            
206        // Add the extracted values to the result
207        result.push(row);
208    }
209    
210    // Return the collected results
211    Ok(result)
212}