excelstream/csv_writer.rs
1//! CSV file writing with streaming support and compression
2
3use crate::csv::{CompressionMethod, CsvEncoder};
4use crate::error::{ExcelError, Result};
5use crate::fast_writer::StreamingZipWriter;
6use crate::types::CellValue;
7use std::fs::File;
8use std::io::{BufWriter, Write};
9use std::path::Path;
10
11/// CSV file writer with streaming capabilities and compression support
12///
13/// Writes CSV files row by row, streaming data directly to disk or compressed ZIP.
14/// Memory usage is constant (~5MB or less) regardless of dataset size.
15///
16/// # Examples
17///
18/// ```no_run
19/// use excelstream::csv_writer::CsvWriter;
20///
21/// let mut writer = CsvWriter::new("output.csv").unwrap();
22/// writer.write_row(&["Name", "Age", "City"]).unwrap();
23/// writer.write_row(&["Alice", "30", "NYC"]).unwrap();
24/// writer.save().unwrap();
25/// ```
26///
27/// # Compression
28///
29/// Auto-detects compression from file extension:
30/// - `.csv` → Uncompressed
31/// - `.csv.zst` or `.csv.zip` → Zstd compression (level 3)
32/// - `.csv.gz` → Deflate/Gzip compression (level 6)
33///
34/// ```no_run
35/// use excelstream::csv_writer::CsvWriter;
36/// use excelstream::csv::CompressionMethod;
37///
38/// // Auto-detect from extension
39/// let mut writer = CsvWriter::new("data.csv.zst").unwrap();
40///
41/// // Or explicit compression
42/// let mut writer = CsvWriter::with_compression(
43/// "data.csv.zst",
44/// CompressionMethod::Zstd,
45/// 3
46/// ).unwrap();
47/// ```
48pub struct CsvWriter {
49 // Dual-mode output
50 zip_writer: Option<StreamingZipWriter<File>>,
51 direct_writer: Option<BufWriter<File>>,
52
53 // State
54 row_count: u64,
55 buffer: Vec<u8>,
56
57 // Configuration
58 delimiter: u8,
59 quote_char: u8,
60 line_ending: &'static [u8],
61}
62
63impl CsvWriter {
64 /// Create a new CSV writer - auto-detects compression from file extension
65 ///
66 /// # File Extensions
67 /// - `.csv` → Uncompressed
68 /// - `.csv.zst` or `.csv.zip` → Zstd compression (level 3)
69 /// - `.csv.gz` → Deflate compression (level 6)
70 ///
71 /// # Examples
72 ///
73 /// ```no_run
74 /// use excelstream::csv_writer::CsvWriter;
75 ///
76 /// // Plain CSV
77 /// let mut writer = CsvWriter::new("data.csv").unwrap();
78 ///
79 /// // Zstd compressed
80 /// let mut writer = CsvWriter::new("data.csv.zst").unwrap();
81 ///
82 /// // Gzip compressed
83 /// let mut writer = CsvWriter::new("data.csv.gz").unwrap();
84 /// ```
85 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
86 let path_ref = path.as_ref();
87 let path_str = path_ref.to_str().unwrap_or("");
88
89 if path_str.ends_with(".csv.zst") || path_str.ends_with(".csv.zip") {
90 Self::with_compression(path_ref, CompressionMethod::Zstd, 3)
91 } else if path_str.ends_with(".csv.gz") {
92 Self::with_compression(path_ref, CompressionMethod::Deflate, 6)
93 } else {
94 // Plain CSV - direct file write
95 let file = File::create(path_ref)
96 .map_err(|e| ExcelError::WriteError(format!("Failed to create CSV file: {}", e)))?;
97
98 Ok(CsvWriter {
99 zip_writer: None,
100 direct_writer: Some(BufWriter::new(file)),
101 row_count: 0,
102 buffer: Vec::with_capacity(4096),
103 delimiter: b',',
104 quote_char: b'"',
105 line_ending: b"\n",
106 })
107 }
108 }
109
110 /// Create a writer with explicit compression method and level
111 ///
112 /// # Arguments
113 /// * `path` - Output file path
114 /// * `method` - Compression method (Zstd or Deflate)
115 /// * `level` - Compression level:
116 /// - Zstd: 1-21 (recommend 3 for balanced)
117 /// - Deflate: 0-9 (recommend 6 for balanced)
118 ///
119 /// # Examples
120 ///
121 /// ```no_run
122 /// use excelstream::csv_writer::CsvWriter;
123 /// use excelstream::csv::CompressionMethod;
124 ///
125 /// // Maximum Zstd compression
126 /// let mut writer = CsvWriter::with_compression(
127 /// "data.csv.zst",
128 /// CompressionMethod::Zstd,
129 /// 9
130 /// ).unwrap();
131 /// ```
132 pub fn with_compression<P: AsRef<Path>>(
133 path: P,
134 method: CompressionMethod,
135 level: u32,
136 ) -> Result<Self> {
137 let path_ref = path.as_ref();
138
139 // Create ZIP with single CSV entry
140 let mut zip = StreamingZipWriter::with_method(path_ref, method, level)
141 .map_err(|e| ExcelError::WriteError(format!("Failed to create ZIP writer: {}", e)))?;
142
143 // Entry name: extract from path or use "data.csv"
144 let entry_name = path_ref
145 .file_stem()
146 .and_then(|s| s.to_str())
147 .map(|s| {
148 // Remove .zip/.zst/.gz extension if present
149 let clean = s
150 .trim_end_matches(".csv")
151 .trim_end_matches(".zst")
152 .trim_end_matches(".gz");
153 format!("{}.csv", clean)
154 })
155 .unwrap_or_else(|| "data.csv".to_string());
156
157 zip.start_entry(&entry_name)
158 .map_err(|e| ExcelError::WriteError(format!("Failed to start ZIP entry: {}", e)))?;
159
160 Ok(CsvWriter {
161 zip_writer: Some(zip),
162 direct_writer: None,
163 row_count: 0,
164 buffer: Vec::with_capacity(4096),
165 delimiter: b',',
166 quote_char: b'"',
167 line_ending: b"\n",
168 })
169 }
170
171 /// Set custom delimiter (builder pattern)
172 ///
173 /// # Examples
174 ///
175 /// ```no_run
176 /// use excelstream::csv_writer::CsvWriter;
177 ///
178 /// let mut writer = CsvWriter::new("data.csv")
179 /// .unwrap()
180 /// .delimiter(b';');
181 /// ```
182 pub fn delimiter(mut self, delim: u8) -> Self {
183 self.delimiter = delim;
184 self
185 }
186
187 /// Set custom quote character (builder pattern)
188 pub fn quote_char(mut self, quote: u8) -> Self {
189 self.quote_char = quote;
190 self
191 }
192
193 /// Write a row of strings
194 ///
195 /// # Examples
196 ///
197 /// ```no_run
198 /// use excelstream::csv_writer::CsvWriter;
199 ///
200 /// let mut writer = CsvWriter::new("data.csv").unwrap();
201 /// writer.write_row(&["Name", "Age", "City"]).unwrap();
202 /// writer.write_row(&["Alice", "30", "NYC"]).unwrap();
203 /// writer.save().unwrap();
204 /// ```
205 pub fn write_row<I, S>(&mut self, data: I) -> Result<()>
206 where
207 I: IntoIterator<Item = S>,
208 S: AsRef<str>,
209 {
210 // Reuse buffer
211 self.buffer.clear();
212
213 // Encode row using CSV encoder
214 let encoder = CsvEncoder::new(self.delimiter, self.quote_char);
215 let fields: Vec<String> = data.into_iter().map(|s| s.as_ref().to_string()).collect();
216 let refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
217
218 encoder.encode_row(&refs, &mut self.buffer);
219 self.buffer.extend_from_slice(self.line_ending);
220
221 // Write to output
222 if let Some(ref mut zip) = self.zip_writer {
223 zip.write_data(&self.buffer)
224 .map_err(|e| ExcelError::WriteError(format!("Failed to write to ZIP: {}", e)))?;
225 } else if let Some(ref mut writer) = self.direct_writer {
226 writer
227 .write_all(&self.buffer)
228 .map_err(|e| ExcelError::WriteError(format!("Failed to write to file: {}", e)))?;
229 }
230
231 self.row_count += 1;
232 Ok(())
233 }
234
235 /// Write a row of typed values
236 ///
237 /// Converts CellValue types to strings before writing.
238 ///
239 /// # Examples
240 ///
241 /// ```no_run
242 /// use excelstream::csv_writer::CsvWriter;
243 /// use excelstream::types::CellValue;
244 ///
245 /// let mut writer = CsvWriter::new("data.csv").unwrap();
246 /// writer.write_row_typed(&[
247 /// CellValue::String("Alice".to_string()),
248 /// CellValue::Int(30),
249 /// CellValue::Float(75.5),
250 /// ]).unwrap();
251 /// ```
252 pub fn write_row_typed(&mut self, cells: &[CellValue]) -> Result<()> {
253 let strings: Vec<String> = cells.iter().map(|c| c.as_string()).collect();
254 let refs: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
255 self.write_row(refs)
256 }
257
258 /// Write multiple rows at once
259 ///
260 /// # Examples
261 ///
262 /// ```no_run
263 /// use excelstream::csv_writer::CsvWriter;
264 ///
265 /// let mut writer = CsvWriter::new("data.csv").unwrap();
266 /// let rows = vec![
267 /// vec!["Alice", "30"],
268 /// vec!["Bob", "25"],
269 /// ];
270 /// writer.write_rows_batch(rows).unwrap();
271 /// ```
272 pub fn write_rows_batch<I, R, S>(&mut self, rows: I) -> Result<()>
273 where
274 I: IntoIterator<Item = R>,
275 R: IntoIterator<Item = S>,
276 S: AsRef<str>,
277 {
278 for row_data in rows {
279 self.write_row(row_data)?;
280 }
281 Ok(())
282 }
283
284 /// Get the number of rows written
285 pub fn row_count(&self) -> u64 {
286 self.row_count
287 }
288
289 /// Finalize and save the CSV file
290 ///
291 /// This must be called to properly close the file.
292 /// Consumes the writer.
293 ///
294 /// # Examples
295 ///
296 /// ```no_run
297 /// use excelstream::csv_writer::CsvWriter;
298 ///
299 /// let mut writer = CsvWriter::new("data.csv").unwrap();
300 /// writer.write_row(&["Name", "Age"]).unwrap();
301 /// writer.save().unwrap();
302 /// ```
303 pub fn save(mut self) -> Result<()> {
304 if let Some(zip) = self.zip_writer.take() {
305 zip.finish()
306 .map_err(|e| ExcelError::WriteError(format!("Failed to finish ZIP: {}", e)))?;
307 } else if let Some(mut writer) = self.direct_writer.take() {
308 writer
309 .flush()
310 .map_err(|e| ExcelError::WriteError(format!("Failed to flush file: {}", e)))?;
311 }
312 Ok(())
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319 use std::io::Read;
320
321 #[test]
322 fn test_plain_csv() -> Result<()> {
323 let path = "test_output.csv";
324 {
325 let mut writer = CsvWriter::new(path)?;
326 writer.write_row(["Name", "Age", "City"])?;
327 writer.write_row(["Alice", "30", "NYC"])?;
328 writer.save()?;
329 }
330
331 // Read and verify
332 let mut content = String::new();
333 File::open(path)?.read_to_string(&mut content)?;
334 assert!(content.contains("Name,Age,City"));
335 assert!(content.contains("Alice,30,NYC"));
336
337 // Cleanup
338 std::fs::remove_file(path).ok();
339 Ok(())
340 }
341
342 #[test]
343 fn test_typed_values() -> Result<()> {
344 let path = "test_typed.csv";
345 {
346 let mut writer = CsvWriter::new(path)?;
347 writer.write_row_typed(&[
348 CellValue::String("Test".to_string()),
349 CellValue::Int(42),
350 CellValue::Float(3.15),
351 ])?;
352 writer.save()?;
353 }
354
355 // Read and verify
356 let mut content = String::new();
357 File::open(path)?.read_to_string(&mut content)?;
358 assert!(content.contains("Test,42,3.15"));
359
360 // Cleanup
361 std::fs::remove_file(path).ok();
362 Ok(())
363 }
364
365 #[test]
366 fn test_edge_cases() -> Result<()> {
367 let path = "test_edge.csv";
368 {
369 let mut writer = CsvWriter::new(path)?;
370 writer.write_row(["a,b", r#"Say "Hi""#, "Line1\nLine2"])?;
371 writer.save()?;
372 }
373
374 // Read and verify
375 let mut content = String::new();
376 File::open(path)?.read_to_string(&mut content)?;
377 assert!(content.contains(r#""a,b""#));
378 assert!(content.contains(r#""Say ""Hi""""#));
379
380 // Cleanup
381 std::fs::remove_file(path).ok();
382 Ok(())
383 }
384}