json_archive/
write_strategy.rs

1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025  Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Write strategy for archive operations.
23//!
24//! There are exactly two questions:
25//!   1. Where do we write? (dest_path)
26//!   2. Can we write there directly, or do we need to dance?
27//!
28//! The dance (temp file + atomic swap) is required when:
29//!   - source_path == dest_path, AND
30//!   - the file is compressed
31//!
32//! Why? Compressed streams don't support append. To add one
33//! record to a gzip file, you decompress everything, add the
34//! record, recompress everything. If you write to the same
35//! file you're reading, you corrupt it mid-operation.
36//!
37//! So: write to temp, swap when done. See atomic_file.rs.
38//!
39//! When source != dest, there is no conflict. Read from source,
40//! write to dest. Even if source is compressed. Even if dest
41//! is compressed. Even if they use different compression.
42//! The source is never modified.
43//!
44//! When source == dest AND uncompressed, just append. Seek to
45//! end, write new records. Simple.
46//!
47//! The output compression format is determined by dest_path's
48//! extension, not the source's format. That's a separate concern.
49//!
50//! ## Truth Table
51//!
52//! ```text
53//! INPUTS                    OUTPUT FLAG         STRATEGY
54//! ───────────────────────────────────────────────────────────────
55//! [A.json, B.json]          (none)              Create { out: A.json.archive, fmt: None }
56//! [A.json, B.json]          -o X.archive.gz     Create { out: X.archive.gz, fmt: Gzip }
57//!
58//! [A.archive, B.json]       (none)              Append { path: A.archive }
59//! [A.archive, B.json]       -o X.archive        Direct { in: A.archive/None, out: X.archive/None }
60//!
61//! [A.archive.gz, B.json]    (none)              AtomicSwap { path: A.archive.gz, fmt: Gzip, temp: .A.archive.gz.xxx }
62//! [A.archive.gz, B.json]    -o A.archive.gz     AtomicSwap { path: A.archive.gz, fmt: Gzip, temp: .A.archive.gz.xxx }
63//! [A.archive.gz, B.json]    -o X.archive        Direct { in: A.archive.gz/Gzip, out: X.archive/None }
64//! [A.archive.gz, B.json]    -o X.archive.br     Direct { in: A.archive.gz/Gzip, out: X.archive.br/Brotli }
65//! ```
66//!
67//! The rule:
68//! ```text
69//! if creating new archive:
70//!     Create
71//! else if source != dest:
72//!     Direct (read from source, write to dest, transcoding as needed)
73//! else if source == dest AND uncompressed:
74//!     Append (seek to end, write)
75//! else if source == dest AND compressed:
76//!     AtomicSwap (read all, write to temp, swap)
77//! ```
78
79use std::path::{Path, PathBuf};
80
81use crate::atomic_file::generate_temp_filename;
82use crate::detection::CompressionFormat;
83
84/// A path with its compression format.
85pub type CompressedPath = (PathBuf, CompressionFormat);
86
87/// Describes how to write archive data based on input/output paths and compression.
88#[derive(Debug, Clone)]
89pub enum WriteStrategy {
90    /// Create a new archive from scratch. No existing archive to read.
91    Create { output: CompressedPath },
92
93    /// Append to an existing uncompressed archive in-place.
94    /// Just seek to end and write new records.
95    Append { path: PathBuf },
96
97    /// Read from one location, write to another.
98    /// Handles transcoding between compression formats.
99    CopyOnWrite {
100        input: CompressedPath,
101        output: CompressedPath,
102    },
103
104    /// Read compressed archive, write to temp, atomic swap.
105    /// Required when source == dest AND compressed.
106    AtomicSwap {
107        /// The archive path (both input and output)
108        path: PathBuf,
109        /// Compression format (same for input and output in this case)
110        compression: CompressionFormat,
111        /// Temp file to write to before swapping
112        temp_path: PathBuf,
113    },
114}
115
116/// Determine compression format from file extension.
117///
118/// Returns `CompressionFormat::None` for uncompressed files.
119pub fn compression_from_extension(path: &Path) -> CompressionFormat {
120    let s = path.to_string_lossy();
121    if s.ends_with(".gz") {
122        CompressionFormat::Gzip
123    } else if s.ends_with(".br") {
124        CompressionFormat::Brotli
125    } else if s.ends_with(".zst") {
126        CompressionFormat::Zstd
127    } else if s.ends_with(".zlib") {
128        CompressionFormat::Zlib
129    } else {
130        CompressionFormat::None
131    }
132}
133
134/// Determine write strategy from parsed arguments.
135///
136/// # Arguments
137///
138/// * `source_archive` - Path to existing archive if appending, None if creating new
139/// * `dest_path` - Where to write the output
140/// * `source_compression` - Compression format of source (from magic bytes). Pass
141///   `CompressionFormat::None` if unknown or uncompressed.
142///
143/// # Returns
144///
145/// The appropriate `WriteStrategy` for this operation.
146pub fn determine_strategy(
147    source_archive: Option<&Path>,
148    dest_path: &Path,
149    source_compression: CompressionFormat,
150) -> WriteStrategy {
151    let dest_compression = compression_from_extension(dest_path);
152
153    // No source archive? Creating new.
154    let Some(source) = source_archive else {
155        return WriteStrategy::Create {
156            output: (dest_path.to_path_buf(), dest_compression),
157        };
158    };
159
160    // Check if source and dest are the same file
161    let same_file = match (source.canonicalize(), dest_path.canonicalize()) {
162        (Ok(s), Ok(d)) => s == d,
163        // dest doesn't exist yet, or other error - not same file
164        _ => false,
165    };
166
167    if !same_file {
168        // Different files: read from source, write to dest
169        let source_fmt = if source_compression == CompressionFormat::None {
170            compression_from_extension(source)
171        } else {
172            source_compression
173        };
174        return WriteStrategy::CopyOnWrite {
175            input: (source.to_path_buf(), source_fmt),
176            output: (dest_path.to_path_buf(), dest_compression),
177        };
178    }
179
180    // Same file - check if compressed
181    let compression = if source_compression == CompressionFormat::None {
182        compression_from_extension(source)
183    } else {
184        source_compression
185    };
186
187    match compression {
188        CompressionFormat::None => {
189            // Uncompressed: can append in-place
190            WriteStrategy::Append {
191                path: dest_path.to_path_buf(),
192            }
193        }
194        fmt => {
195            // Compressed: need atomic swap
196            WriteStrategy::AtomicSwap {
197                path: dest_path.to_path_buf(),
198                compression: fmt,
199                temp_path: generate_temp_filename(dest_path),
200            }
201        }
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use std::io::Write as IoWrite;
209    use tempfile::NamedTempFile;
210
211    #[test]
212    fn test_compression_from_extension() {
213        assert_eq!(
214            compression_from_extension(Path::new("foo.json.archive.gz")),
215            CompressionFormat::Gzip
216        );
217        assert_eq!(
218            compression_from_extension(Path::new("foo.json.archive.br")),
219            CompressionFormat::Brotli
220        );
221        assert_eq!(
222            compression_from_extension(Path::new("foo.json.archive.zst")),
223            CompressionFormat::Zstd
224        );
225        assert_eq!(
226            compression_from_extension(Path::new("foo.json.archive.zlib")),
227            CompressionFormat::Zlib
228        );
229        assert_eq!(
230            compression_from_extension(Path::new("foo.json.archive")),
231            CompressionFormat::None
232        );
233        assert_eq!(
234            compression_from_extension(Path::new("foo.json")),
235            CompressionFormat::None
236        );
237    }
238
239    #[test]
240    fn test_create_new_archive() {
241        let dest = Path::new("/tmp/new.json.archive");
242        let strategy = determine_strategy(None, dest, CompressionFormat::None);
243
244        match strategy {
245            WriteStrategy::Create { output } => {
246                assert_eq!(output.0, PathBuf::from("/tmp/new.json.archive"));
247                assert_eq!(output.1, CompressionFormat::None);
248            }
249            _ => panic!("Expected Create strategy"),
250        }
251    }
252
253    #[test]
254    fn test_create_new_compressed_archive() {
255        let dest = Path::new("/tmp/new.json.archive.gz");
256        let strategy = determine_strategy(None, dest, CompressionFormat::None);
257
258        match strategy {
259            WriteStrategy::Create { output } => {
260                assert_eq!(output.0, PathBuf::from("/tmp/new.json.archive.gz"));
261                assert_eq!(output.1, CompressionFormat::Gzip);
262            }
263            _ => panic!("Expected Create strategy"),
264        }
265    }
266
267    #[test]
268    fn test_append_uncompressed_same_file() -> Result<(), Box<dyn std::error::Error>> {
269        let mut temp = NamedTempFile::with_suffix(".json.archive")?;
270        writeln!(temp, "test")?;
271        temp.flush()?;
272
273        let path = temp.path();
274        let strategy = determine_strategy(Some(path), path, CompressionFormat::None);
275
276        match strategy {
277            WriteStrategy::Append { path: p } => {
278                assert_eq!(p, path);
279            }
280            _ => panic!("Expected Append strategy, got {:?}", strategy),
281        }
282
283        Ok(())
284    }
285
286    #[test]
287    fn test_atomic_swap_compressed_same_file() -> Result<(), Box<dyn std::error::Error>> {
288        let mut temp = NamedTempFile::with_suffix(".json.archive.gz")?;
289        writeln!(temp, "test")?;
290        temp.flush()?;
291
292        let path = temp.path();
293        let strategy = determine_strategy(Some(path), path, CompressionFormat::Gzip);
294
295        match strategy {
296            WriteStrategy::AtomicSwap {
297                path: p,
298                compression,
299                temp_path,
300            } => {
301                assert_eq!(p, path);
302                assert_eq!(compression, CompressionFormat::Gzip);
303                assert!(temp_path.to_string_lossy().contains(".json.archive.gz"));
304            }
305            _ => panic!("Expected AtomicSwap strategy, got {:?}", strategy),
306        }
307
308        Ok(())
309    }
310
311    #[test]
312    fn test_direct_different_files() -> Result<(), Box<dyn std::error::Error>> {
313        let mut source = NamedTempFile::with_suffix(".json.archive")?;
314        writeln!(source, "test")?;
315        source.flush()?;
316
317        let dest = Path::new("/tmp/different.json.archive");
318        let strategy = determine_strategy(Some(source.path()), dest, CompressionFormat::None);
319
320        match strategy {
321            WriteStrategy::CopyOnWrite { input, output } => {
322                assert_eq!(input.0, source.path());
323                assert_eq!(input.1, CompressionFormat::None);
324                assert_eq!(output.0, PathBuf::from("/tmp/different.json.archive"));
325                assert_eq!(output.1, CompressionFormat::None);
326            }
327            _ => panic!("Expected Direct strategy, got {:?}", strategy),
328        }
329
330        Ok(())
331    }
332
333    #[test]
334    fn test_direct_transcode_compression() -> Result<(), Box<dyn std::error::Error>> {
335        let mut source = NamedTempFile::with_suffix(".json.archive.gz")?;
336        writeln!(source, "test")?;
337        source.flush()?;
338
339        let dest = Path::new("/tmp/output.json.archive.br");
340        let strategy = determine_strategy(Some(source.path()), dest, CompressionFormat::Gzip);
341
342        match strategy {
343            WriteStrategy::CopyOnWrite { input, output } => {
344                assert_eq!(input.1, CompressionFormat::Gzip);
345                assert_eq!(output.1, CompressionFormat::Brotli);
346            }
347            _ => panic!("Expected Direct strategy, got {:?}", strategy),
348        }
349
350        Ok(())
351    }
352}