Skip to main content

hexz_cli/cmd/data/
convert.rs

1//! Convert external data formats into Hexz archives.
2//!
3//! Supports:
4//! - **tar**: Pure Rust via the `tar` crate (streaming, no extraction)
5
6use crate::ui::progress::create_progress_bar;
7use anyhow::{Context, Result, bail};
8use colored::Colorize;
9use hexz_core::algo::compression::create_compressor_from_str;
10use hexz_ops::archive_writer::ArchiveWriter;
11use std::io::Read;
12use std::path::Path;
13use std::sync::{Arc, Mutex};
14
15/// Execute the convert command.
16#[allow(clippy::too_many_arguments)]
17pub fn run(
18    format: &str,
19    input: &Path,
20    output: &Path,
21    compression: &str,
22    block_size: u32,
23    silent: bool,
24) -> Result<()> {
25    if !silent {
26        println!(
27            "{} Converting {}",
28            "╭".dimmed(),
29            input.display().to_string().cyan()
30        );
31        println!("{} Format     {}", "│".dimmed(), format.bright_black());
32        println!(
33            "{} Output     {}",
34            "╰".dimmed(),
35            output.display().to_string().bright_black()
36        );
37        println!();
38    }
39    match format.to_lowercase().as_str() {
40        "tar" => convert_tar(input, output, compression, block_size, silent),
41        other => bail!("Unknown format: {other:?}. Supported formats: tar"),
42    }
43}
44
45/// Convert a tar archive to a Hexz archive using pure Rust.
46///
47/// Streams tar entries directly through the `ArchiveWriter` without
48/// extracting to disk. Stores a file manifest in archive metadata.
49fn convert_tar(
50    input: &Path,
51    output: &Path,
52    compression: &str,
53    block_size: u32,
54    silent: bool,
55) -> Result<()> {
56    // ... rest of convert_tar ...
57    // (I will replace the println! calls later or in a separate step if this is too large)
58    // Actually let's try to replace the whole file to be safe with all formatting.
59
60    // Calculate total size for progress bar
61    let total_size = std::fs::metadata(input)
62        .with_context(|| format!("Cannot read input file: {}", input.display()))?
63        .len();
64
65    // Set up progress bar
66    let pb = if silent {
67        None
68    } else {
69        let pb = create_progress_bar(total_size);
70        Some(Arc::new(Mutex::new(pb)))
71    };
72
73    // Create compressor and archive writer
74    let (compressor, compression_type) =
75        create_compressor_from_str(compression, None, None).map_err(|e| anyhow::anyhow!("{e}"))?;
76
77    let mut writer = ArchiveWriter::builder(output, compressor, compression_type)
78        .block_size(block_size)
79        .build()
80        .map_err(|e| anyhow::anyhow!("{e}"))?;
81
82    // Open tar archive (supports .tar, .tar.gz, .tar.bz2, .tar.xz)
83    let file = std::fs::File::open(input)
84        .with_context(|| format!("Cannot open tar file: {}", input.display()))?;
85
86    let mut archive = tar::Archive::new(file);
87
88    // Track file manifest for metadata
89    let mut source_files: Vec<serde_json::Value> = Vec::new();
90    let mut total_bytes: u64 = 0;
91    let mut bytes_from_archive: u64 = 0;
92
93    // Begin a main stream for the tar data
94    // We'll set total_size after reading all entries by using a two-pass approach,
95    // but for streaming we start with the tar file size as an estimate.
96    writer.begin_stream(true, total_size);
97
98    for entry_result in archive.entries()? {
99        let mut entry = entry_result?;
100        let header = entry.header();
101
102        // Skip non-file entries (directories, symlinks, etc.)
103        if !header.entry_type().is_file() {
104            continue;
105        }
106
107        let name = entry.path()?.to_string_lossy().to_string();
108        let size = header.size()?;
109
110        // Read the entry data and write in blocks
111        let mut remaining = size;
112        let mut buf = vec![0u8; block_size as usize];
113
114        while remaining > 0 {
115            let to_read = std::cmp::min(remaining as usize, buf.len());
116            entry.read_exact(&mut buf[..to_read])?;
117
118            writer
119                .write_data_block(&buf[..to_read])
120                .map_err(|e| anyhow::anyhow!("{e}"))?;
121
122            remaining -= to_read as u64;
123            bytes_from_archive += to_read as u64;
124
125            if let Some(ref pb) = pb {
126                if let Ok(pb) = pb.lock() {
127                    // Approximate progress based on bytes read from archive
128                    pb.set_position(std::cmp::min(bytes_from_archive, total_size));
129                }
130            }
131        }
132
133        source_files.push(serde_json::json!({
134            "name": name,
135            "size": size,
136            "offset": total_bytes,
137        }));
138        total_bytes += size;
139    }
140
141    writer.end_stream().map_err(|e| anyhow::anyhow!("{e}"))?;
142
143    // Build metadata JSON
144    let metadata = serde_json::json!({
145        "source": {
146            "format": "tar",
147            "original_path": input.file_name().unwrap_or_default().to_string_lossy(),
148            "total_files": source_files.len(),
149            "total_bytes": total_bytes,
150            "source_files": source_files,
151        }
152    });
153    let meta_bytes = serde_json::to_vec(&metadata)?;
154
155    writer
156        .finalize(Vec::new(), Some(&meta_bytes))
157        .map_err(|e| anyhow::anyhow!("{e}"))?;
158
159    if let Some(ref pb) = pb {
160        if let Ok(pb) = pb.lock() {
161            pb.finish_with_message("Done");
162        }
163    }
164
165    if !silent {
166        println!(
167            "\n  {} Converted {} files ({}) from tar archive",
168            "✓".green(),
169            source_files.len(),
170            indicatif::HumanBytes(total_bytes)
171                .to_string()
172                .bright_black(),
173        );
174    }
175
176    Ok(())
177}