json_archive/
compression_writer.rs

1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025  Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Unified writer abstraction for compressed and uncompressed output.
23//!
24//! This module provides `CompressionWriter`, an enum that wraps different
25//! compression encoders behind a common interface implementing `std::io::Write`.
26//!
27//! The goal is to simplify write logic by allowing callers to write to any
28//! compression format using the same API, with proper error handling that
29//! produces user-friendly diagnostics.
30
31use std::fs::File;
32use std::io::{BufWriter, Write};
33use std::path::Path;
34
35use crate::detection::CompressionFormat;
36use crate::diagnostics::{Diagnostic, DiagnosticCode};
37
38/// A writer that handles optional compression transparently.
39///
40/// Wraps different compression encoders behind a unified interface
41/// that implements `Write` and provides a `finish()` method for cleanup.
42///
43/// # Example
44///
45/// ```ignore
46/// use json_archive::compression_writer::CompressionWriter;
47/// use json_archive::detection::CompressionFormat;
48/// use std::io::Write;
49///
50/// let mut writer = CompressionWriter::create(path, CompressionFormat::Gzip)?;
51/// writeln!(writer, "some data")?;
52/// writer.finish()?;
53/// ```
54// Note: Cannot derive Debug because compression encoder types don't implement Debug
55pub enum CompressionWriter {
56    /// Uncompressed output - uses BufWriter since File has no internal buffering
57    Plain(BufWriter<File>),
58    /// Compression encoders write directly to File - they do their own internal buffering
59    #[cfg(feature = "compression")]
60    Gzip(flate2::write::GzEncoder<File>),
61    #[cfg(feature = "compression")]
62    Zlib(flate2::write::ZlibEncoder<File>),
63    #[cfg(feature = "compression")]
64    Zstd(zstd::stream::write::Encoder<'static, File>),
65    #[cfg(feature = "compression")]
66    Brotli(brotli::CompressorWriter<File>),
67}
68
69impl CompressionWriter {
70    /// Open a file for writing with the specified compression format.
71    ///
72    /// # Errors
73    ///
74    /// Returns a diagnostic explaining:
75    /// - What file we tried to create
76    /// - What compression format was requested
77    /// - Why it failed (permissions, disk full, unsupported format, etc.)
78    pub fn create(path: &Path, format: CompressionFormat) -> Result<Self, Vec<Diagnostic>> {
79        let file = File::create(path).map_err(|e| {
80            vec![Diagnostic::fatal(
81                DiagnosticCode::PathNotFound,
82                format!(
83                    "I couldn't create the output file '{}': {}",
84                    path.display(),
85                    describe_io_error(&e)
86                ),
87            )
88            .with_advice(advice_for_create_error(&e, path))]
89        })?;
90
91        match format {
92            // Plain needs BufWriter since File has no internal buffering
93            CompressionFormat::None => Ok(Self::Plain(BufWriter::new(file))),
94
95            // Compression encoders do their own buffering, write directly to File
96            #[cfg(feature = "compression")]
97            CompressionFormat::Gzip => {
98                use flate2::write::GzEncoder;
99                use flate2::Compression;
100                Ok(Self::Gzip(GzEncoder::new(file, Compression::default())))
101            }
102
103            #[cfg(feature = "compression")]
104            CompressionFormat::Zlib => {
105                use flate2::write::ZlibEncoder;
106                use flate2::Compression;
107                Ok(Self::Zlib(ZlibEncoder::new(file, Compression::default())))
108            }
109
110            #[cfg(feature = "compression")]
111            CompressionFormat::Deflate => {
112                // Deflate is a raw compression algorithm, not a container format.
113                // We can read deflate data, but when writing we need to pick a
114                // container (gzip or zlib) that provides headers and checksums.
115                Err(vec![Diagnostic::fatal(
116                    DiagnosticCode::UnsupportedVersion,
117                    "I can't write raw deflate format because it's not a container format.".to_string(),
118                )
119                .with_advice(
120                    "Deflate is a compression algorithm, not a file format. When writing, \
121                     you need to choose a container format that wraps deflate data:\n\
122                     \n  - Use .gz (gzip) for general-purpose compression\n  \
123                     - Use .zlib for zlib-wrapped deflate\n\
124                     \nIf you're appending to an existing deflate file, consider converting \
125                     it to gzip first.".to_string()
126                )])
127            }
128
129            #[cfg(feature = "compression")]
130            CompressionFormat::Zstd => {
131                let encoder = zstd::stream::write::Encoder::new(file, 0).map_err(|e| {
132                    vec![Diagnostic::fatal(
133                        DiagnosticCode::PathNotFound,
134                        format!(
135                            "I couldn't initialize zstd compression for '{}': {}",
136                            path.display(),
137                            e
138                        ),
139                    )]
140                })?;
141                Ok(Self::Zstd(encoder))
142            }
143
144            #[cfg(feature = "compression")]
145            CompressionFormat::Brotli => {
146                // buffer_size=4096, quality=11 (max), lgwin=22 (default window)
147                Ok(Self::Brotli(brotli::CompressorWriter::new(
148                    file, 4096, 11, 22,
149                )))
150            }
151
152            #[cfg(not(feature = "compression"))]
153            _ => Err(vec![Diagnostic::fatal(
154                DiagnosticCode::UnsupportedVersion,
155                format!(
156                    "I can't write {} compressed files because this build doesn't include compression support.",
157                    format_name(format)
158                ),
159            )
160            .with_advice("Rebuild with: cargo build --features compression".to_string())]),
161        }
162    }
163
164    /// Finish writing and flush all buffers.
165    ///
166    /// For compressed formats, this finalizes the compression stream.
167    /// Must be called before dropping to ensure all data is written.
168    ///
169    /// # Errors
170    ///
171    /// Returns a diagnostic if flushing or finalizing fails.
172    ///
173    /// **Important**: This method does not clean up the output file on error.
174    /// If `finish()` fails, the caller is responsible for removing the
175    /// partially-written file themselves:
176    ///
177    /// ```ignore
178    /// if let Err(diagnostics) = writer.finish() {
179    ///     let _ = std::fs::remove_file(&path);
180    ///     return Err(diagnostics);
181    /// }
182    /// ```
183    pub fn finish(self) -> Result<(), Vec<Diagnostic>> {
184        match self {
185            Self::Plain(mut w) => w.flush().map_err(|e| {
186                vec![Diagnostic::fatal(
187                    DiagnosticCode::PathNotFound,
188                    format!(
189                        "I couldn't flush the output file: {}",
190                        describe_io_error(&e)
191                    ),
192                )]
193            }),
194
195            #[cfg(feature = "compression")]
196            Self::Gzip(encoder) => {
197                encoder.finish().map_err(|e| {
198                    vec![Diagnostic::fatal(
199                        DiagnosticCode::PathNotFound,
200                        format!(
201                            "I couldn't finalize gzip compression: {}",
202                            describe_io_error(&e)
203                        ),
204                    )]
205                })?;
206                Ok(())
207            }
208
209            #[cfg(feature = "compression")]
210            Self::Zlib(encoder) => {
211                encoder.finish().map_err(|e| {
212                    vec![Diagnostic::fatal(
213                        DiagnosticCode::PathNotFound,
214                        format!(
215                            "I couldn't finalize zlib compression: {}",
216                            describe_io_error(&e)
217                        ),
218                    )]
219                })?;
220                Ok(())
221            }
222
223            #[cfg(feature = "compression")]
224            Self::Zstd(encoder) => {
225                encoder.finish().map_err(|e| {
226                    vec![Diagnostic::fatal(
227                        DiagnosticCode::PathNotFound,
228                        format!(
229                            "I couldn't finalize zstd compression: {}",
230                            describe_io_error(&e)
231                        ),
232                    )]
233                })?;
234                Ok(())
235            }
236
237            #[cfg(feature = "compression")]
238            Self::Brotli(mut encoder) => {
239                // Brotli uses a different API - no finish() method
240                // Flush the encoder (brotli auto-flushes on drop, but we flush explicitly)
241                encoder.flush().map_err(|e| {
242                    vec![Diagnostic::fatal(
243                        DiagnosticCode::PathNotFound,
244                        format!(
245                            "I couldn't finalize brotli compression: {}",
246                            describe_io_error(&e)
247                        ),
248                    )]
249                })
250            }
251        }
252    }
253}
254
255impl Write for CompressionWriter {
256    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
257        match self {
258            Self::Plain(w) => w.write(buf),
259            #[cfg(feature = "compression")]
260            Self::Gzip(w) => w.write(buf),
261            #[cfg(feature = "compression")]
262            Self::Zlib(w) => w.write(buf),
263            #[cfg(feature = "compression")]
264            Self::Zstd(w) => w.write(buf),
265            #[cfg(feature = "compression")]
266            Self::Brotli(w) => w.write(buf),
267        }
268    }
269
270    fn flush(&mut self) -> std::io::Result<()> {
271        match self {
272            Self::Plain(w) => w.flush(),
273            #[cfg(feature = "compression")]
274            Self::Gzip(w) => w.flush(),
275            #[cfg(feature = "compression")]
276            Self::Zlib(w) => w.flush(),
277            #[cfg(feature = "compression")]
278            Self::Zstd(w) => w.flush(),
279            #[cfg(feature = "compression")]
280            Self::Brotli(w) => w.flush(),
281        }
282    }
283}
284
285/// Translate io::Error into human-readable descriptions.
286fn describe_io_error(e: &std::io::Error) -> String {
287    match e.kind() {
288        std::io::ErrorKind::NotFound => "the directory doesn't exist".to_string(),
289        std::io::ErrorKind::PermissionDenied => "permission denied".to_string(),
290        std::io::ErrorKind::AlreadyExists => {
291            "a directory with that name already exists".to_string()
292        }
293        std::io::ErrorKind::StorageFull => "the disk is full".to_string(),
294        std::io::ErrorKind::ReadOnlyFilesystem => "the filesystem is read-only".to_string(),
295        _ => e.to_string(),
296    }
297}
298
299/// Generate helpful advice based on the error type.
300fn advice_for_create_error(e: &std::io::Error, path: &Path) -> String {
301    match e.kind() {
302        std::io::ErrorKind::NotFound => {
303            if let Some(parent) = path.parent() {
304                format!(
305                    "The parent directory '{}' doesn't exist. Create it first with:\n  mkdir -p '{}'",
306                    parent.display(),
307                    parent.display()
308                )
309            } else {
310                "Check that the path is valid.".to_string()
311            }
312        }
313        std::io::ErrorKind::PermissionDenied => {
314            format!(
315                "You don't have write permission for this location. Try:\n  ls -la '{}'",
316                path.parent()
317                    .map(|p| p.display().to_string())
318                    .unwrap_or_else(|| ".".to_string())
319            )
320        }
321        std::io::ErrorKind::StorageFull => {
322            "Free up disk space or write to a different location.".to_string()
323        }
324        _ => "Check that the path is valid and you have write permission.".to_string(),
325    }
326}
327
328/// Get a human-readable name for a compression format.
329#[cfg(not(feature = "compression"))]
330fn format_name(format: CompressionFormat) -> &'static str {
331    match format {
332        CompressionFormat::Gzip => "gzip",
333        CompressionFormat::Zlib => "zlib",
334        CompressionFormat::Zstd => "zstd",
335        CompressionFormat::Brotli => "brotli",
336        CompressionFormat::Deflate => "deflate",
337        CompressionFormat::None => "uncompressed",
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use std::io::Read;
345    use tempfile::NamedTempFile;
346
347    #[test]
348    fn test_plain_writer() -> Result<(), Box<dyn std::error::Error>> {
349        let temp_file = NamedTempFile::new()?;
350        let path = temp_file.path();
351
352        {
353            let mut writer = CompressionWriter::create(path, CompressionFormat::None)
354                .map_err(|d| format!("{:?}", d))?;
355            writeln!(writer, "hello world").map_err(|e| format!("{}", e))?;
356            writer.finish().map_err(|d| format!("{:?}", d))?;
357        }
358
359        let content = std::fs::read_to_string(path)?;
360        assert_eq!(content, "hello world\n");
361        Ok(())
362    }
363
364    #[test]
365    #[cfg(feature = "compression")]
366    fn test_gzip_writer() -> Result<(), Box<dyn std::error::Error>> {
367        use flate2::read::GzDecoder;
368
369        let temp_file = NamedTempFile::new()?;
370        let path = temp_file.path();
371
372        {
373            let mut writer = CompressionWriter::create(path, CompressionFormat::Gzip)
374                .map_err(|d| format!("{:?}", d))?;
375            writeln!(writer, "hello gzip").map_err(|e| format!("{}", e))?;
376            writer.finish().map_err(|d| format!("{:?}", d))?;
377        }
378
379        // Verify by decompressing
380        let file = File::open(path)?;
381        let mut decoder = GzDecoder::new(file);
382        let mut content = String::new();
383        decoder.read_to_string(&mut content)?;
384        assert_eq!(content, "hello gzip\n");
385        Ok(())
386    }
387
388    #[test]
389    #[cfg(feature = "compression")]
390    fn test_zstd_writer() -> Result<(), Box<dyn std::error::Error>> {
391        let temp_file = NamedTempFile::new()?;
392        let path = temp_file.path();
393
394        {
395            let mut writer = CompressionWriter::create(path, CompressionFormat::Zstd)
396                .map_err(|d| format!("{:?}", d))?;
397            writeln!(writer, "hello zstd").map_err(|e| format!("{}", e))?;
398            writer.finish().map_err(|d| format!("{:?}", d))?;
399        }
400
401        // Verify by decompressing
402        let file = File::open(path)?;
403        let mut decoder = zstd::stream::read::Decoder::new(file)?;
404        let mut content = String::new();
405        decoder.read_to_string(&mut content)?;
406        assert_eq!(content, "hello zstd\n");
407        Ok(())
408    }
409
410    #[test]
411    fn test_create_nonexistent_directory() {
412        let result = CompressionWriter::create(
413            Path::new("/nonexistent/directory/file.txt"),
414            CompressionFormat::None,
415        );
416        match result {
417            Ok(_) => panic!("Expected error for nonexistent directory"),
418            Err(diagnostics) => {
419                assert_eq!(diagnostics.len(), 1);
420                // The error message should mention the path
421                assert!(
422                    diagnostics[0]
423                        .description
424                        .contains("/nonexistent/directory/file.txt"),
425                    "Expected path in error message, got: {}",
426                    diagnostics[0].description
427                );
428            }
429        }
430    }
431}