use clap::{Parser, Subcommand};
use msy::compress::{Compression, decompress};
use msy::delta::{Delta, apply_delta, compute_checksums};
use msy::sparse::DataRegion;
use msy::sync::scanner::Scanner;
use serde::{Deserialize, Serialize};
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::PathBuf;
#[derive(Parser)]
#[command(name = "sy-remote")]
#[command(about = "Remote helper for sy - executes on remote hosts via SSH")]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
Scan {
path: PathBuf,
#[arg(long)]
no_git_ignore: bool,
#[arg(long, default_value_t = false)]
include_git: bool,
},
Checksums {
path: PathBuf,
#[arg(long)]
block_size: usize,
},
FileChecksum {
path: PathBuf,
#[arg(long, default_value = "fast")]
checksum_type: String,
},
ApplyDelta {
base_file: PathBuf,
output_file: PathBuf,
},
ReceiveFile {
output_path: PathBuf,
#[arg(long)]
mtime: Option<u64>,
},
ReceiveSparseFile {
output_path: PathBuf,
#[arg(long)]
total_size: u64,
#[arg(long)]
regions: String,
#[arg(long)]
mtime: Option<u64>,
},
}
#[derive(Debug, Serialize, Deserialize)]
struct ScanOutput {
entries: Vec<FileEntryJson>,
}
#[derive(Debug, Serialize, Deserialize)]
struct FileEntryJson {
path: String,
size: u64,
mtime: i64,
is_dir: bool,
is_symlink: bool,
symlink_target: Option<String>,
is_sparse: bool,
allocated_size: u64,
#[serde(default)]
xattrs: Option<Vec<(String, String)>>, inode: Option<u64>,
nlink: u64,
#[serde(default)]
acls: Option<String>, }
fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
match cli.command {
Commands::Scan { path, no_git_ignore, include_git } => {
let scanner = Scanner::new(&path).respect_gitignore(!no_git_ignore).include_git_dir(include_git);
let entries = scanner.scan()?;
let json_entries: Vec<FileEntryJson> = entries
.into_iter()
.map(|e| {
let mtime = e.modified.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs() as i64;
let xattrs = e.xattrs.map(|xattrs_map| {
use base64::{Engine as _, engine::general_purpose};
xattrs_map
.into_iter()
.map(|(key, value)| {
let encoded = general_purpose::STANDARD.encode(&value);
(key, encoded)
})
.collect()
});
let acls = e.acls.and_then(|acl_bytes| String::from_utf8(acl_bytes).ok());
FileEntryJson {
path: e.path.to_string_lossy().to_string(),
size: e.size,
mtime,
is_dir: e.is_dir,
is_symlink: e.is_symlink,
symlink_target: e.symlink_target.map(|p| p.to_string_lossy().to_string()),
is_sparse: e.is_sparse,
allocated_size: e.allocated_size,
xattrs,
inode: e.inode,
nlink: e.nlink,
acls,
}
})
.collect();
let output = ScanOutput { entries: json_entries };
println!("{}", serde_json::to_string(&output)?);
}
Commands::Checksums { path, block_size } => {
let checksums = compute_checksums(&path, block_size)?;
println!("{}", serde_json::to_string(&checksums)?);
}
Commands::FileChecksum { path, checksum_type } => {
use msy::integrity::{ChecksumType, IntegrityVerifier};
let csum_type = match checksum_type.as_str() {
"fast" => ChecksumType::Fast,
"cryptographic" => ChecksumType::Cryptographic,
_ => anyhow::bail!("Invalid checksum type: {}. Use 'fast' or 'cryptographic'", checksum_type),
};
let verifier = IntegrityVerifier::new(csum_type, false);
let checksum = verifier.compute_file_checksum(&path)?;
println!("{}", checksum.to_hex());
}
Commands::ApplyDelta { base_file, output_file } => {
let mut stdin_data = Vec::new();
std::io::stdin().read_to_end(&mut stdin_data)?;
let delta_json = if stdin_data.len() >= 4 && stdin_data[0] == 0x28 && stdin_data[1] == 0xB5 && stdin_data[2] == 0x2F && stdin_data[3] == 0xFD {
let decompressed = decompress(&stdin_data, Compression::Zstd)?;
String::from_utf8(decompressed).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?
} else {
String::from_utf8(stdin_data).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?
};
let delta: Delta = serde_json::from_str(&delta_json)?;
let stats = apply_delta(&base_file, &delta, &output_file)?;
println!("{{\"operations_count\": {}, \"literal_bytes\": {}}}", stats.operations_count, stats.literal_bytes);
}
Commands::ReceiveFile { output_path, mtime } => {
let mut stdin_data = Vec::new();
std::io::stdin().read_to_end(&mut stdin_data)?;
let file_data = if stdin_data.len() >= 4 && stdin_data[0] == 0x28 && stdin_data[1] == 0xB5 && stdin_data[2] == 0x2F && stdin_data[3] == 0xFD {
decompress(&stdin_data, Compression::Zstd)?
} else {
stdin_data
};
if let Some(parent) = output_path.parent() {
std::fs::create_dir_all(parent)?;
}
let mut output_file = std::fs::File::create(&output_path)?;
output_file.write_all(&file_data)?;
output_file.flush()?;
if let Some(mtime_secs) = mtime {
use std::time::{Duration, UNIX_EPOCH};
let mtime = UNIX_EPOCH + Duration::from_secs(mtime_secs);
let _ = filetime::set_file_mtime(&output_path, filetime::FileTime::from_system_time(mtime));
}
println!("{{\"bytes_written\": {}}}", file_data.len());
}
Commands::ReceiveSparseFile { output_path, total_size, regions, mtime } => {
let data_regions: Vec<DataRegion> = serde_json::from_str(®ions)?;
if let Some(parent) = output_path.parent() {
std::fs::create_dir_all(parent)?;
}
let mut output_file = std::fs::File::create(&output_path)?;
output_file.set_len(total_size)?;
let mut stdin = std::io::stdin();
let mut total_bytes_written = 0u64;
for region in &data_regions {
output_file.seek(SeekFrom::Start(region.offset))?;
let mut buffer = vec![0u8; region.length as usize];
stdin.read_exact(&mut buffer)?;
output_file.write_all(&buffer)?;
total_bytes_written += region.length;
}
output_file.flush()?;
output_file.sync_all()?;
if let Some(mtime_secs) = mtime {
use std::time::{Duration, UNIX_EPOCH};
let mtime = UNIX_EPOCH + Duration::from_secs(mtime_secs);
let _ = filetime::set_file_mtime(&output_path, filetime::FileTime::from_system_time(mtime));
}
println!("{{\"bytes_written\": {}, \"file_size\": {}, \"regions\": {}}}", total_bytes_written, total_size, data_regions.len());
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
#[test]
fn test_receive_sparse_file_basic() {
let temp = TempDir::new().unwrap();
let output_path = temp.path().join("sparse_output.dat");
let regions = vec![DataRegion { offset: 0, length: 1024 }, DataRegion { offset: 2048, length: 512 }];
let total_size = 4096; let regions_json = serde_json::to_string(®ions).unwrap();
let mut input_data = Vec::new();
input_data.extend(vec![b'A'; 1024]); input_data.extend(vec![b'B'; 512]);
let mut output_file = std::fs::File::create(&output_path).unwrap();
output_file.set_len(total_size).unwrap();
let data_regions: Vec<DataRegion> = serde_json::from_str(®ions_json).unwrap();
let mut offset_in_buffer = 0;
for region in &data_regions {
use std::io::Seek;
output_file.seek(std::io::SeekFrom::Start(region.offset)).unwrap();
output_file.write_all(&input_data[offset_in_buffer..offset_in_buffer + region.length as usize]).unwrap();
offset_in_buffer += region.length as usize;
}
output_file.flush().unwrap();
drop(output_file);
let result = std::fs::read(&output_path).unwrap();
assert_eq!(result.len(), 4096);
assert!(result[0..1024].iter().all(|&b| b == b'A'));
assert!(result[1024..2048].iter().all(|&b| b == 0));
assert!(result[2048..2560].iter().all(|&b| b == b'B'));
assert!(result[2560..4096].iter().all(|&b| b == 0));
#[cfg(unix)]
{
use std::os::unix::fs::MetadataExt;
let metadata = std::fs::metadata(&output_path).unwrap();
let allocated = metadata.blocks() * 512;
let _ = allocated < total_size;
}
}
#[test]
fn test_receive_sparse_file_single_region() {
let temp = TempDir::new().unwrap();
let output_path = temp.path().join("single_region.dat");
let regions = vec![DataRegion { offset: 1024 * 1024, length: 100 }];
let total_size = 1024 * 1024 + 200; let regions_json = serde_json::to_string(®ions).unwrap();
let input_data = vec![b'X'; 100];
let mut output_file = std::fs::File::create(&output_path).unwrap();
output_file.set_len(total_size).unwrap();
let data_regions: Vec<DataRegion> = serde_json::from_str(®ions_json).unwrap();
use std::io::Seek;
for region in &data_regions {
output_file.seek(std::io::SeekFrom::Start(region.offset)).unwrap();
output_file.write_all(&input_data).unwrap();
}
output_file.flush().unwrap();
drop(output_file);
let metadata = std::fs::metadata(&output_path).unwrap();
assert_eq!(metadata.len(), total_size);
let mut file = std::fs::File::open(&output_path).unwrap();
file.seek(std::io::SeekFrom::Start(1024 * 1024)).unwrap();
let mut buffer = vec![0u8; 100];
file.read_exact(&mut buffer).unwrap();
assert!(buffer.iter().all(|&b| b == b'X'));
}
#[test]
fn test_data_region_json_serialization() {
let regions = vec![DataRegion { offset: 0, length: 1024 }, DataRegion { offset: 4096, length: 2048 }];
let json = serde_json::to_string(®ions).unwrap();
let deserialized: Vec<DataRegion> = serde_json::from_str(&json).unwrap();
assert_eq!(regions.len(), deserialized.len());
assert_eq!(regions[0].offset, deserialized[0].offset);
assert_eq!(regions[0].length, deserialized[0].length);
assert_eq!(regions[1].offset, deserialized[1].offset);
assert_eq!(regions[1].length, deserialized[1].length);
}
}