Skip to main content

excelstream/
csv_writer.rs

1//! CSV file writing with streaming support and compression
2
3use crate::csv::{CompressionMethod, CsvEncoder};
4use crate::error::{ExcelError, Result};
5use crate::fast_writer::StreamingZipWriter;
6use crate::types::CellValue;
7use std::fs::File;
8use std::io::{BufWriter, Write};
9use std::path::Path;
10
11/// CSV file writer with streaming capabilities and compression support
12///
13/// Writes CSV files row by row, streaming data directly to disk or compressed ZIP.
14/// Memory usage is constant (~5MB or less) regardless of dataset size.
15///
16/// # Examples
17///
18/// ```no_run
19/// use excelstream::csv_writer::CsvWriter;
20///
21/// let mut writer = CsvWriter::new("output.csv").unwrap();
22/// writer.write_row(&["Name", "Age", "City"]).unwrap();
23/// writer.write_row(&["Alice", "30", "NYC"]).unwrap();
24/// writer.save().unwrap();
25/// ```
26///
27/// # Compression
28///
29/// Auto-detects compression from file extension:
30/// - `.csv` → Uncompressed
31/// - `.csv.zst` or `.csv.zip` → Zstd compression (level 3)
32/// - `.csv.gz` → Deflate/Gzip compression (level 6)
33///
34/// ```no_run
35/// use excelstream::csv_writer::CsvWriter;
36/// use excelstream::csv::CompressionMethod;
37///
38/// // Auto-detect from extension
39/// let mut writer = CsvWriter::new("data.csv.zst").unwrap();
40///
41/// // Or explicit compression
42/// let mut writer = CsvWriter::with_compression(
43///     "data.csv.zst",
44///     CompressionMethod::Zstd,
45///     3
46/// ).unwrap();
47/// ```
48pub struct CsvWriter {
49    // Dual-mode output
50    zip_writer: Option<StreamingZipWriter<File>>,
51    direct_writer: Option<BufWriter<File>>,
52
53    // State
54    row_count: u64,
55    buffer: Vec<u8>,
56
57    // Configuration
58    delimiter: u8,
59    quote_char: u8,
60    line_ending: &'static [u8],
61}
62
63impl CsvWriter {
64    /// Create a new CSV writer - auto-detects compression from file extension
65    ///
66    /// # File Extensions
67    /// - `.csv` → Uncompressed
68    /// - `.csv.zst` or `.csv.zip` → Zstd compression (level 3)
69    /// - `.csv.gz` → Deflate compression (level 6)
70    ///
71    /// # Examples
72    ///
73    /// ```no_run
74    /// use excelstream::csv_writer::CsvWriter;
75    ///
76    /// // Plain CSV
77    /// let mut writer = CsvWriter::new("data.csv").unwrap();
78    ///
79    /// // Zstd compressed
80    /// let mut writer = CsvWriter::new("data.csv.zst").unwrap();
81    ///
82    /// // Gzip compressed
83    /// let mut writer = CsvWriter::new("data.csv.gz").unwrap();
84    /// ```
85    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
86        let path_ref = path.as_ref();
87        let path_str = path_ref.to_str().unwrap_or("");
88
89        if path_str.ends_with(".csv.zst") || path_str.ends_with(".csv.zip") {
90            Self::with_compression(path_ref, CompressionMethod::Zstd, 3)
91        } else if path_str.ends_with(".csv.gz") {
92            Self::with_compression(path_ref, CompressionMethod::Deflate, 6)
93        } else {
94            // Plain CSV - direct file write
95            let file = File::create(path_ref)
96                .map_err(|e| ExcelError::WriteError(format!("Failed to create CSV file: {}", e)))?;
97
98            Ok(CsvWriter {
99                zip_writer: None,
100                direct_writer: Some(BufWriter::new(file)),
101                row_count: 0,
102                buffer: Vec::with_capacity(4096),
103                delimiter: b',',
104                quote_char: b'"',
105                line_ending: b"\n",
106            })
107        }
108    }
109
110    /// Create a writer with explicit compression method and level
111    ///
112    /// # Arguments
113    /// * `path` - Output file path
114    /// * `method` - Compression method (Zstd or Deflate)
115    /// * `level` - Compression level:
116    ///   - Zstd: 1-21 (recommend 3 for balanced)
117    ///   - Deflate: 0-9 (recommend 6 for balanced)
118    ///
119    /// # Examples
120    ///
121    /// ```no_run
122    /// use excelstream::csv_writer::CsvWriter;
123    /// use excelstream::csv::CompressionMethod;
124    ///
125    /// // Maximum Zstd compression
126    /// let mut writer = CsvWriter::with_compression(
127    ///     "data.csv.zst",
128    ///     CompressionMethod::Zstd,
129    ///     9
130    /// ).unwrap();
131    /// ```
132    pub fn with_compression<P: AsRef<Path>>(
133        path: P,
134        method: CompressionMethod,
135        level: u32,
136    ) -> Result<Self> {
137        let path_ref = path.as_ref();
138
139        // Create ZIP with single CSV entry
140        let mut zip = StreamingZipWriter::with_method(path_ref, method, level)
141            .map_err(|e| ExcelError::WriteError(format!("Failed to create ZIP writer: {}", e)))?;
142
143        // Entry name: extract from path or use "data.csv"
144        let entry_name = path_ref
145            .file_stem()
146            .and_then(|s| s.to_str())
147            .map(|s| {
148                // Remove .zip/.zst/.gz extension if present
149                let clean = s
150                    .trim_end_matches(".csv")
151                    .trim_end_matches(".zst")
152                    .trim_end_matches(".gz");
153                format!("{}.csv", clean)
154            })
155            .unwrap_or_else(|| "data.csv".to_string());
156
157        zip.start_entry(&entry_name)
158            .map_err(|e| ExcelError::WriteError(format!("Failed to start ZIP entry: {}", e)))?;
159
160        Ok(CsvWriter {
161            zip_writer: Some(zip),
162            direct_writer: None,
163            row_count: 0,
164            buffer: Vec::with_capacity(4096),
165            delimiter: b',',
166            quote_char: b'"',
167            line_ending: b"\n",
168        })
169    }
170
171    /// Set custom delimiter (builder pattern)
172    ///
173    /// # Examples
174    ///
175    /// ```no_run
176    /// use excelstream::csv_writer::CsvWriter;
177    ///
178    /// let mut writer = CsvWriter::new("data.csv")
179    ///     .unwrap()
180    ///     .delimiter(b';');
181    /// ```
182    pub fn delimiter(mut self, delim: u8) -> Self {
183        self.delimiter = delim;
184        self
185    }
186
187    /// Set custom quote character (builder pattern)
188    pub fn quote_char(mut self, quote: u8) -> Self {
189        self.quote_char = quote;
190        self
191    }
192
193    /// Write a row of strings
194    ///
195    /// # Examples
196    ///
197    /// ```no_run
198    /// use excelstream::csv_writer::CsvWriter;
199    ///
200    /// let mut writer = CsvWriter::new("data.csv").unwrap();
201    /// writer.write_row(&["Name", "Age", "City"]).unwrap();
202    /// writer.write_row(&["Alice", "30", "NYC"]).unwrap();
203    /// writer.save().unwrap();
204    /// ```
205    pub fn write_row<I, S>(&mut self, data: I) -> Result<()>
206    where
207        I: IntoIterator<Item = S>,
208        S: AsRef<str>,
209    {
210        // Reuse buffer
211        self.buffer.clear();
212
213        // Encode row using CSV encoder
214        let encoder = CsvEncoder::new(self.delimiter, self.quote_char);
215        let fields: Vec<String> = data.into_iter().map(|s| s.as_ref().to_string()).collect();
216        let refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
217
218        encoder.encode_row(&refs, &mut self.buffer);
219        self.buffer.extend_from_slice(self.line_ending);
220
221        // Write to output
222        if let Some(ref mut zip) = self.zip_writer {
223            zip.write_data(&self.buffer)
224                .map_err(|e| ExcelError::WriteError(format!("Failed to write to ZIP: {}", e)))?;
225        } else if let Some(ref mut writer) = self.direct_writer {
226            writer
227                .write_all(&self.buffer)
228                .map_err(|e| ExcelError::WriteError(format!("Failed to write to file: {}", e)))?;
229        }
230
231        self.row_count += 1;
232        Ok(())
233    }
234
235    /// Write a row of typed values
236    ///
237    /// Converts CellValue types to strings before writing.
238    ///
239    /// # Examples
240    ///
241    /// ```no_run
242    /// use excelstream::csv_writer::CsvWriter;
243    /// use excelstream::types::CellValue;
244    ///
245    /// let mut writer = CsvWriter::new("data.csv").unwrap();
246    /// writer.write_row_typed(&[
247    ///     CellValue::String("Alice".to_string()),
248    ///     CellValue::Int(30),
249    ///     CellValue::Float(75.5),
250    /// ]).unwrap();
251    /// ```
252    pub fn write_row_typed(&mut self, cells: &[CellValue]) -> Result<()> {
253        let strings: Vec<String> = cells.iter().map(|c| c.as_string()).collect();
254        let refs: Vec<&str> = strings.iter().map(|s| s.as_str()).collect();
255        self.write_row(refs)
256    }
257
258    /// Write multiple rows at once
259    ///
260    /// # Examples
261    ///
262    /// ```no_run
263    /// use excelstream::csv_writer::CsvWriter;
264    ///
265    /// let mut writer = CsvWriter::new("data.csv").unwrap();
266    /// let rows = vec![
267    ///     vec!["Alice", "30"],
268    ///     vec!["Bob", "25"],
269    /// ];
270    /// writer.write_rows_batch(rows).unwrap();
271    /// ```
272    pub fn write_rows_batch<I, R, S>(&mut self, rows: I) -> Result<()>
273    where
274        I: IntoIterator<Item = R>,
275        R: IntoIterator<Item = S>,
276        S: AsRef<str>,
277    {
278        for row_data in rows {
279            self.write_row(row_data)?;
280        }
281        Ok(())
282    }
283
284    /// Get the number of rows written
285    pub fn row_count(&self) -> u64 {
286        self.row_count
287    }
288
289    /// Finalize and save the CSV file
290    ///
291    /// This must be called to properly close the file.
292    /// Consumes the writer.
293    ///
294    /// # Examples
295    ///
296    /// ```no_run
297    /// use excelstream::csv_writer::CsvWriter;
298    ///
299    /// let mut writer = CsvWriter::new("data.csv").unwrap();
300    /// writer.write_row(&["Name", "Age"]).unwrap();
301    /// writer.save().unwrap();
302    /// ```
303    pub fn save(mut self) -> Result<()> {
304        if let Some(zip) = self.zip_writer.take() {
305            zip.finish()
306                .map_err(|e| ExcelError::WriteError(format!("Failed to finish ZIP: {}", e)))?;
307        } else if let Some(mut writer) = self.direct_writer.take() {
308            writer
309                .flush()
310                .map_err(|e| ExcelError::WriteError(format!("Failed to flush file: {}", e)))?;
311        }
312        Ok(())
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use std::io::Read;
320
321    #[test]
322    fn test_plain_csv() -> Result<()> {
323        let path = "test_output.csv";
324        {
325            let mut writer = CsvWriter::new(path)?;
326            writer.write_row(["Name", "Age", "City"])?;
327            writer.write_row(["Alice", "30", "NYC"])?;
328            writer.save()?;
329        }
330
331        // Read and verify
332        let mut content = String::new();
333        File::open(path)?.read_to_string(&mut content)?;
334        assert!(content.contains("Name,Age,City"));
335        assert!(content.contains("Alice,30,NYC"));
336
337        // Cleanup
338        std::fs::remove_file(path).ok();
339        Ok(())
340    }
341
342    #[test]
343    fn test_typed_values() -> Result<()> {
344        let path = "test_typed.csv";
345        {
346            let mut writer = CsvWriter::new(path)?;
347            writer.write_row_typed(&[
348                CellValue::String("Test".to_string()),
349                CellValue::Int(42),
350                CellValue::Float(3.15),
351            ])?;
352            writer.save()?;
353        }
354
355        // Read and verify
356        let mut content = String::new();
357        File::open(path)?.read_to_string(&mut content)?;
358        assert!(content.contains("Test,42,3.15"));
359
360        // Cleanup
361        std::fs::remove_file(path).ok();
362        Ok(())
363    }
364
365    #[test]
366    fn test_edge_cases() -> Result<()> {
367        let path = "test_edge.csv";
368        {
369            let mut writer = CsvWriter::new(path)?;
370            writer.write_row(["a,b", r#"Say "Hi""#, "Line1\nLine2"])?;
371            writer.save()?;
372        }
373
374        // Read and verify
375        let mut content = String::new();
376        File::open(path)?.read_to_string(&mut content)?;
377        assert!(content.contains(r#""a,b""#));
378        assert!(content.contains(r#""Say ""Hi""""#));
379
380        // Cleanup
381        std::fs::remove_file(path).ok();
382        Ok(())
383    }
384}