Skip to main content

hexz_cli/cmd/data/
convert.rs

1//! Convert external data formats into Hexz archives.
2//!
3//! Supports:
4//! - **tar**: Pure Rust via the `tar` crate (streaming, no extraction)
5
6use crate::ui::progress::create_progress_bar;
7use anyhow::{Context, Result, bail};
8use hexz_core::algo::compression::create_compressor_from_str;
9use hexz_ops::archive_writer::ArchiveWriter;
10use std::io::Read;
11use std::path::Path;
12use std::sync::{Arc, Mutex};
13use colored::Colorize;
14
15/// Execute the convert command.
16#[allow(clippy::too_many_arguments)]
17pub fn run(
18    format: &str,
19    input: &Path,
20    output: &Path,
21    compression: &str,
22    block_size: u32,
23    silent: bool,
24) -> Result<()> {
25    if !silent {
26        println!("{} Converting {}", "╭".dimmed(), input.display().to_string().cyan());
27        println!("{} Format     {}", "│".dimmed(), format.bright_black());
28        println!("{} Output     {}", "╰".dimmed(), output.display().to_string().bright_black());
29        println!();
30    }
31    match format.to_lowercase().as_str() {
32        "tar" => convert_tar(input, output, compression, block_size, silent),
33        other => bail!("Unknown format: {other:?}. Supported formats: tar"),
34    }
35}
36
37/// Convert a tar archive to a Hexz archive using pure Rust.
38///
39/// Streams tar entries directly through the `ArchiveWriter` without
40/// extracting to disk. Stores a file manifest in archive metadata.
41fn convert_tar(
42    input: &Path,
43    output: &Path,
44    compression: &str,
45    block_size: u32,
46    silent: bool,
47) -> Result<()> {
48    // ... rest of convert_tar ...
49    // (I will replace the println! calls later or in a separate step if this is too large)
50    // Actually let's try to replace the whole file to be safe with all formatting.
51
52    // Calculate total size for progress bar
53    let total_size = std::fs::metadata(input)
54        .with_context(|| format!("Cannot read input file: {}", input.display()))?
55        .len();
56
57    // Set up progress bar
58    let pb = if silent {
59        None
60    } else {
61        let pb = create_progress_bar(total_size);
62        Some(Arc::new(Mutex::new(pb)))
63    };
64
65    // Create compressor and archive writer
66    let (compressor, compression_type) =
67        create_compressor_from_str(compression, None, None).map_err(|e| anyhow::anyhow!("{e}"))?;
68
69    let mut writer = ArchiveWriter::builder(output, compressor, compression_type)
70        .block_size(block_size)
71        .build()
72        .map_err(|e| anyhow::anyhow!("{e}"))?;
73
74    // Open tar archive (supports .tar, .tar.gz, .tar.bz2, .tar.xz)
75    let file = std::fs::File::open(input)
76        .with_context(|| format!("Cannot open tar file: {}", input.display()))?;
77
78    let mut archive = tar::Archive::new(file);
79
80    // Track file manifest for metadata
81    let mut source_files: Vec<serde_json::Value> = Vec::new();
82    let mut total_bytes: u64 = 0;
83    let mut bytes_from_archive: u64 = 0;
84
85    // Begin a main stream for the tar data
86    // We'll set total_size after reading all entries by using a two-pass approach,
87    // but for streaming we start with the tar file size as an estimate.
88    writer.begin_stream(true, total_size);
89
90    for entry_result in archive.entries()? {
91        let mut entry = entry_result?;
92        let header = entry.header();
93
94        // Skip non-file entries (directories, symlinks, etc.)
95        if !header.entry_type().is_file() {
96            continue;
97        }
98
99        let name = entry.path()?.to_string_lossy().to_string();
100        let size = header.size()?;
101
102        // Read the entry data and write in blocks
103        let mut remaining = size;
104        let mut buf = vec![0u8; block_size as usize];
105
106        while remaining > 0 {
107            let to_read = std::cmp::min(remaining as usize, buf.len());
108            entry.read_exact(&mut buf[..to_read])?;
109
110            writer
111                .write_data_block(&buf[..to_read])
112                .map_err(|e| anyhow::anyhow!("{e}"))?;
113
114            remaining -= to_read as u64;
115            bytes_from_archive += to_read as u64;
116
117            if let Some(ref pb) = pb {
118                if let Ok(pb) = pb.lock() {
119                    // Approximate progress based on bytes read from archive
120                    pb.set_position(std::cmp::min(bytes_from_archive, total_size));
121                }
122            }
123        }
124
125        source_files.push(serde_json::json!({
126            "name": name,
127            "size": size,
128            "offset": total_bytes,
129        }));
130        total_bytes += size;
131    }
132
133    writer.end_stream().map_err(|e| anyhow::anyhow!("{e}"))?;
134
135    // Build metadata JSON
136    let metadata = serde_json::json!({
137        "source": {
138            "format": "tar",
139            "original_path": input.file_name().unwrap_or_default().to_string_lossy(),
140            "total_files": source_files.len(),
141            "total_bytes": total_bytes,
142            "source_files": source_files,
143        }
144    });
145    let meta_bytes = serde_json::to_vec(&metadata)?;
146
147    writer
148        .finalize(Vec::new(), Some(&meta_bytes))
149        .map_err(|e| anyhow::anyhow!("{e}"))?;
150
151    if let Some(ref pb) = pb {
152        if let Ok(pb) = pb.lock() {
153            pb.finish_with_message("Done");
154        }
155    }
156
157    if !silent {
158        println!(
159            "\n  {} Converted {} files ({}) from tar archive",
160            "✓".green(),
161            source_files.len(),
162            indicatif::HumanBytes(total_bytes).to_string().bright_black(),
163        );
164    }
165
166    Ok(())
167}
168