metadata-backup 0.1.0

Program to back up file system metadata.
Documentation
// Copyright 2019 metadata-backup Authors (see AUTHORS.md)

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//     http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::io::Write;
use std::path::{Path, PathBuf};

use std::cell::RefCell;
use std::collections::BinaryHeap;
use std::sync::Mutex;

use rayon::prelude::*;
use walkdir::WalkDir;

use super::metadata;

pub static MANIFEST_FILE_NAME: &'static str = "FILE_MANIFEST";

/// Writes a zip file with the metadata backup for all files under `file_root`
/// to `out_path`.
pub fn write_backup<P: AsRef<Path>>(file_root: P, out_path: P) -> Result<(), Error> {
    let file_root = file_root.as_ref();
    let out_path = out_path.as_ref();
    let zip_root = PathBuf::from("FILESYSTEM_ROOT");

    // This can panic, so we shold do it before we create the zip file
    let directories: Vec<PathBuf> = get_all_directories(file_root)?;

    let basic_options = zip::write::FileOptions::default().unix_permissions(0o755);
    let zip_file = std::fs::File::create(out_path)?;
    let zip_writer = Mutex::new(zip::ZipWriter::new(zip_file));
    let file_manifest = Mutex::new(BinaryHeap::new());

    // Now write the directories and the contents of all the directories
    thread_local! {
        static DATA_BUFFER : RefCell<Vec<u8>> = RefCell::new(Vec::new());
    };

    let write_directory = |directory_path: &PathBuf| -> Result<(), Error> {
        let base_dir_path = directory_path.strip_prefix(file_root)?;
        let dir_zpath = zip_root.join(&base_dir_path);
        let contents_path = dir_zpath.join("contents.csv");

        let records = match load_directory_metadata(&directory_path) {
            Ok(records) => records,
            Err(e) => {
                if is_skippable_error(&e) {
                    eprintln!("Warning: {} at path {}", e, directory_path.display());
                    return Ok(());
                } else {
                    return Err(e.into());
                }
            }
        };

        // Add the files we've seen to the manifest
        let to_manifest_listing = |md: &metadata::Metadata| -> String {
            base_dir_path
                .join(md.name.clone())
                .to_string_lossy()
                .into_owned()
        };

        let mut manifest = file_manifest.lock().unwrap();
        manifest.extend(records.iter().map(to_manifest_listing));

        // Write the data to the thread-local data buffer
        DATA_BUFFER.with(|buffer| -> Result<(), Error> {
            let mut buffer = buffer.borrow_mut();
            let mut csv_writer = csv::Writer::from_writer(&mut *buffer);
            for record in records {
                csv_writer.serialize(record)?;
            }
            Ok(())
        })?;
        // Acquire the lock while writing to the zip file
        let mut writer = zip_writer.lock().unwrap();

        writer.add_directory(
            dir_zpath
                .to_str()
                .ok_or(Error::new("Failed to convert directory to string"))?,
            basic_options,
        )?;

        writer.start_file_from_path(&contents_path, basic_options)?;
        DATA_BUFFER.with(|buffer| -> Result<(), Error> {
            let mut buffer = buffer.borrow_mut();
            writer.write_all(&mut *buffer)?;
            buffer.clear();
            Ok(())
        })?;

        Ok(())
    };

    directories.par_iter().try_for_each(write_directory)?;

    // Finally, write the FILE_MANIFEST of all the files we've seen.
    let file_manifest_vec = file_manifest.into_inner().unwrap().into_sorted_vec();
    let mut writer = zip_writer.lock().unwrap();
    writer.start_file_from_path(&PathBuf::from(MANIFEST_FILE_NAME), basic_options)?;
    for file_path_str in file_manifest_vec {
        writer.write_all(&file_path_str.as_bytes())?;
        writer.write_all(b"\n")?;
    }
    Ok(())
}

fn load_directory_metadata<P: AsRef<Path>>(
    dir_path: P,
) -> Result<Vec<metadata::Metadata>, std::io::Error> {
    let dir_path = dir_path.as_ref();
    std::fs::read_dir(dir_path)?
        .into_iter()
        .map(|entry| metadata::Metadata::new(&(entry?).path()))
        .collect()
}

fn is_skippable_error(err: &std::io::Error) -> bool {
    match err.kind() {
        std::io::ErrorKind::PermissionDenied => true,
        _ => false,
    }
}

fn get_all_directories<P: AsRef<Path>>(base_path: P) -> Result<Vec<PathBuf>, Error> {
    WalkDir::new(base_path.as_ref())
        .into_iter()
        .filter_map(|entry| match entry.as_ref() {
            Ok(val) => {
                let path = val.path();
                if path.is_dir() {
                    Some(Ok(path.to_path_buf()))
                } else {
                    None
                }
            }
            Err(e) => {
                if is_skippable_error(e.io_error()?) {
                    eprintln!("Warning: {}", e.to_string());
                    None
                } else {
                    Some(Err(Error::new(e.to_string())))
                }
            }
        })
        .collect()
}

pub struct Error {
    message: String,
}

impl Error {
    pub fn new<T: Into<String>>(message: T) -> Error {
        Error {
            message: message.into(),
        }
    }
}

impl From<csv::Error> for Error {
    fn from(err: csv::Error) -> Error {
        Error::new(err.to_string())
    }
}

impl From<std::path::StripPrefixError> for Error {
    fn from(err: std::path::StripPrefixError) -> Error {
        Error::new(err.to_string())
    }
}
impl From<zip::result::ZipError> for Error {
    fn from(err: zip::result::ZipError) -> Error {
        Error::new(err.to_string())
    }
}

impl From<std::io::Error> for Error {
    fn from(err: std::io::Error) -> Error {
        Error::new(err.to_string())
    }
}

impl From<walkdir::Error> for Error {
    fn from(err: walkdir::Error) -> Error {
        Error::new(err.to_string())
    }
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.message)
    }
}