xcp 0.8.0

xcp is a (partial) clone of the Unix `cp` command with some more user-friendly feedback and some optimisations.
/*
 * Copyright © 2018, Steve Smith <tarkasteve@gmail.com>
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 3 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

use anyhow::Error;
use rand::{Rng, RngCore, SeedableRng};
use rand_distr::{Alphanumeric, Pareto, Triangular};
use rand_xorshift::XorShiftRng;
use std::cmp;
use std::env::current_dir;
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, Read, Write, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use std::result;
use tempfile::{TempDir, tempdir_in};
use uuid::Uuid;
use walkdir::WalkDir;


pub type TResult = result::Result<(), Error>;

pub fn get_command() -> Result<Command, Error> {
    let exe = env!("CARGO_BIN_EXE_xcp");
    Ok(Command::new(exe))
}

pub fn run(args: &[&str]) -> Result<Output, Error> {
    let out = get_command()?.args(args).output()?;
    Ok(out)
}

pub fn tempdir() -> Result<TempDir, Error> {
    // Force into local dir as /tmp might be tmpfs, which doesn't
    // support all VFS options (notably fiemap).
    Ok(tempdir_in(current_dir()?.join("target"))?)
}

pub fn tempdir_rel() -> Result<PathBuf, Error> {
    let uuid = Uuid::new_v4();
    let dir = PathBuf::from("target/").join(uuid.to_string());
    create_dir_all(&dir)?;
    Ok(dir)
}

pub fn create_file(path: &Path, text: &str) -> Result<(), Error> {
    let file = File::create(&path)?;
    write!(&file, "{}", text)?;
    Ok(())
}

#[cfg(any(target_os = "linux", target_os = "android"))]
pub fn create_sparse(file: &Path, head: u64, tail: u64) -> Result<u64, Error> {
    let data = "c00lc0d3";
    let len = 4096u64 * 4096 + data.len() as u64 + tail;

    let out = Command::new("/usr/bin/truncate")
        .args(&["-s", len.to_string().as_str(),
                file.to_str().unwrap()])
        .output()?;
    assert!(out.status.success());

    let mut fd = std::fs::OpenOptions::new()
        .write(true)
        .append(false)
        .open(&file)?;

    fd.seek(SeekFrom::Start(head))?;
    write!(fd, "{}", data)?;

    fd.seek(SeekFrom::Start(1024*4096))?;
    write!(fd, "{}", data)?;

    fd.seek(SeekFrom::Start(4096*4096))?;
    write!(fd, "{}", data)?;

    Ok(len as u64)
}

pub fn file_contains(path: &Path, text: &str) -> Result<bool, Error> {
    let mut dest = File::open(path)?;
    let mut buf = String::new();
    dest.read_to_string(&mut buf)?;

    Ok(buf == text)
}

pub fn files_match(a: &Path, b: &Path) -> bool {
    println!("Checking: {:?}", a);
    if a.metadata().unwrap().len() != b.metadata().unwrap().len() {
        return false;
    }
    let mut abr = BufReader::with_capacity(1024*1024, File::open(a).unwrap());
    let mut bbr = BufReader::with_capacity(1024*1024, File::open(b).unwrap());
    loop {
        let read = {
            let ab = abr.fill_buf().unwrap();
            let bb = bbr.fill_buf().unwrap();
            if ab != bb {
                return false;
            }
            if ab.is_empty() {
                return true;
            }
            ab.len()
        };
        abr.consume(read);
        bbr.consume(read);
    }
}


#[test]
fn test_hasher() -> TResult {
    {
        let dir = tempdir()?;
        let a = dir.path().join("source.txt");
        let b = dir.path().join("dest.txt");
        let text = "sd;lkjfasl;kjfa;sldkfjaslkjfa;jsdlfkjsdlfkajl";
        create_file(&a, text)?;
        create_file(&b, text)?;
        assert!(files_match(&a, &b));
    }
    {
        let dir = tempdir()?;
        let a = dir.path().join("source.txt");
        let b = dir.path().join("dest.txt");
        create_file(&a, "lskajdf;laksjdfl;askjdf;alksdj")?;
        create_file(&b, "29483793857398")?;
        assert!(!files_match(&a, &b));
    }

    Ok(())
}


#[cfg(any(target_os = "linux", target_os = "android"))]
pub fn quickstat(file: &Path) -> Result<(i32, i32, i32), Error> {
    let out = Command::new("stat")
        .args(&["--format", "%s %b %B",
                file.to_str().unwrap()])
        .output()?;
    assert!(out.status.success());

    let stdout = String::from_utf8(out.stdout)?;
    let stats = stdout
        .split_whitespace()
        .map(|s| s.parse::<i32>().unwrap())
        .collect::<Vec<i32>>();
    let (size, blocks, blksize) = (stats[0], stats[1], stats[2]);

    Ok((size, blocks, blksize))
}

#[cfg(any(target_os = "linux", target_os = "android"))]
pub fn probably_sparse(file: &Path) -> Result<bool, Error> {
    let (size, blocks, blksize) = quickstat(file)?;
    Ok(blocks < size / blksize)
}
#[cfg(not(any(target_os = "linux", target_os = "android")))]
pub fn probably_sparse(file: &Path) -> Result<bool, Error> {
    Ok(false)
}

const MAXDEPTH: u64 = 2;

pub fn gen_file_name(rng: &mut dyn RngCore, len: u64) -> String {
    rng.sample_iter(Alphanumeric)
        .take(len as usize)
        .collect()
}

pub fn gen_file(path: &Path, rng: &mut dyn RngCore, size: usize, sparse: bool) -> TResult {
    println!("Generating: {:?}", path);
    let mut fd = File::create(path)?;
    const BSIZE: usize = 4096;
    let mut buffer = [0; BSIZE];
    let mut left = size;

    while left > 0 {
        let blen = cmp::min(left, BSIZE);
        let b = &mut buffer[..blen];
        rng.fill(b);
        if sparse && b[0] % 3 == 0 {
            fd.seek(SeekFrom::Current(blen as i64))?;
            left -= blen;
        } else {
            left -= fd.write(b)?;
        }
    }

    Ok(())
}

/// Recursive random file-tree generator. The distributions have been
/// manually chosen to give a rough approximation of a working
/// project, with most files in the 10's of Ks, and a few larger
/// ones. With a seeded PRNG (see below) this will give a repeatable
/// tree depending on the seed.
pub fn gen_subtree(base: &Path, rng: &mut dyn RngCore, depth: u64, with_sparse: bool) -> TResult {
    create_dir_all(base)?;

    let dist0 = Triangular::new(0.0, 64.0, 64.0/5.0).unwrap();
    let dist1 = Triangular::new(1.0, 64.0, 64.0/5.0).unwrap();
    let distf = Pareto::new(50.0*1024.0, 1.0).unwrap();

    let nfiles = rng.sample(dist0) as u64;
    for _ in 0..nfiles {
        let fnlen = rng.sample(dist1) as u64;
        let fsize = rng.sample(distf) as u64;
        let fname = gen_file_name(rng, fnlen);
        let path = base.join(fname);
        let sparse = with_sparse && nfiles % 3 == 0;
        gen_file(&path, rng, fsize as usize, sparse)?;
    }

    if depth < MAXDEPTH {
        let ndirs = rng.sample(dist1) as u64;
        for _ in 0..ndirs {
            let fnlen = rng.sample(dist1) as u64;
            let fname = gen_file_name(rng, fnlen);
            let path = base.join(fname);
            gen_subtree(&path, rng, depth+1, with_sparse)?;
        }
    }

    Ok(())
}

pub fn gen_filetree(base: &Path, seed: u64, with_sparse: bool) -> TResult {
    let mut rng = XorShiftRng::seed_from_u64(seed);
    gen_subtree(base, &mut rng, 0, with_sparse)
}

pub fn compare_trees(src: &Path, dest: &Path) -> TResult {
    let pref = src.components().count();
    for entry in WalkDir::new(src) {
        let from = entry?.into_path();
        let tail: PathBuf = from.components().skip(pref).collect();
        let to = dest.join(tail);

        assert!(to.exists());
        assert_eq!(from.is_dir(), to.is_dir());
        assert_eq!(from.metadata()?.file_type().is_symlink(),
                   to.metadata()?.file_type().is_symlink());

        if from.is_file() {
            assert_eq!(probably_sparse(&to)?, probably_sparse(&to)?);
            assert!(files_match(&from, &to));
            // FIXME: Ideally we'd check sparse holes here, but
            // there's no guarantee they'll match exactly due to
            // low-level filesystem details (SEEK_HOLE behaviour,
            // tail-packing, compression, etc.)
        }

    }
    Ok(())
}