forge_backup 1.2.1

A program to backup all the user home folders to an S3 bucket.
Documentation
use crate::error::{AppResult, S3CopyError};
use crate::{config::Config, error::BackupError};
use chrono::Local;
use indicatif::{ProgressBar, ProgressStyle};
use std::ffi::OsString;
use std::io::{self, Write};
use std::path::Path;
use std::process::Command;

pub fn run(config: &Config) -> AppResult<(String, String)> {
    let mut homes = get_home_directories(&config.home_dir)?;
    homes.retain(|dir| !config.exclude_users.contains(dir));

    ensure_directory_exists(&config.temp_folder)?;
    cleanup_old_backups(&config.temp_folder);

    let s3_folder = format!(
        "s3://{}/{}/{}/{}/",
        &config.s3_bucket,
        &config.s3_folder,
        &config.hostname,
        Local::now().format("%Y-%m%d")
    );
    let pb = ProgressBar::new(homes.len().try_into().unwrap());
    let tmpl = ProgressStyle::with_template(
        "{spinner:.green} [{elapsed_precise}] [{bar:.cyan/blue}] {pos:>7}/{len:7} {msg}",
    )
    .unwrap()
    .progress_chars("#>-");
    pb.set_style(tmpl);
    let (ok_results, err_results): (Vec<_>, Vec<_>) = homes
        .iter()
        .map(|home| {
            pb.set_message("home");
            pb.inc(1);

            perform_backup_of_home(home, config, &s3_folder, true)
        })
        .partition(Result::is_ok);
    pb.finish_with_message("done");

    let ok_results = ok_results
        .into_iter()
        .map(Result::unwrap)
        .collect::<Vec<_>>()
        .join("\n");

    let err_results = err_results
        .into_iter()
        .map(|e| e.unwrap_err().to_string())
        .collect::<Vec<_>>()
        .join("\n");

    Ok((ok_results, err_results))
}

fn perform_backup_of_home<P: AsRef<Path>>(
    home: P,
    config: &Config,
    s3_folder: &str,
    _verbose: bool,
) -> AppResult<String> {
    let home_path = Path::new(&config.home_dir).join(&home);

    // 1. make sure directory exists
    if !home_path.exists() {
        return Err(BackupError::MissingHomeError(home_path).into());
    }

    // 2. set backup file name
    let home_str: &str = home
        .as_ref()
        .to_str()
        .ok_or_else(|| BackupError::InvalidHomeError(home_path.clone()))?;
    io::stdout().flush().unwrap();

    let file = format!("{}_backup_{}.zip", home_str, Local::now().format("%Y-%m%d"));
    let backup_file = Path::new(&config.temp_folder).join(file);
    let backup_file = backup_file.to_str().ok_or_else(|| {
        BackupError::MissingTempError(Path::new(&config.temp_folder).to_path_buf())
    })?;
    let home_folder = &home_path
        .to_str()
        .ok_or_else(|| BackupError::MissingHomeError(home_path.clone()))?;

    let zip_warning = zip(backup_file, home_folder, &config.exclude_files)?;
    _ = copy_to_s3(backup_file, s3_folder, config.aws_profile.as_deref())?;
    if let Err(e) = delete_backup(backup_file) {
        eprintln!("WARNING: Failed to delete temp file '{backup_file}' after upload: {e}");
    }

    let mut message = format!("SUCCESS: {backup_file} backed up to: {s3_folder}");
    if let Some(warning) = zip_warning {
        message.push_str(&format!("\n  WARNING: {warning}"));
    }
    Ok(message)
}

fn cleanup_old_backups(temp_folder: &str) {
    let cutoff = std::time::SystemTime::now()
        .checked_sub(std::time::Duration::from_secs(2 * 24 * 60 * 60))
        .unwrap();

    let entries = match std::fs::read_dir(temp_folder) {
        Ok(e) => e,
        Err(e) => {
            eprintln!("WARNING: Could not scan temp folder '{temp_folder}' for retention cleanup: {e}");
            return;
        }
    };

    for entry in entries.flatten() {
        let path = entry.path();
        if path.extension().and_then(|e| e.to_str()) != Some("zip") {
            continue;
        }
        let mtime = match entry.metadata().and_then(|m| m.modified()) {
            Ok(t) => t,
            Err(e) => {
                eprintln!("WARNING: Could not read metadata for '{}': {e}", path.display());
                continue;
            }
        };
        if mtime < cutoff {
            match std::fs::remove_file(&path) {
                Ok(()) => println!("Retention cleanup: deleted stale backup '{}'", path.display()),
                Err(e) => eprintln!("WARNING: Failed to delete stale backup '{}': {e}", path.display()),
            }
        }
    }
}

fn delete_backup<P: AsRef<Path>>(file: P) -> AppResult<()> {
    std::fs::remove_file(file.as_ref())
        .map_err(|_| BackupError::DeleteTempError(file.as_ref().to_path_buf()))?;
    Ok(())
}

fn copy_to_s3<P: AsRef<Path>>(file: P, folder: &str, profile: Option<&str>) -> AppResult<String> {
    let mut command = Command::new("aws");

    let file = file
        .as_ref()
        .to_str()
        .ok_or_else(|| BackupError::S3InvalidFile(file.as_ref().to_path_buf()))?;

    if let Some(profile) = profile {
        command.arg("--profile").arg(profile);
    }

    command.arg("s3").arg("cp").arg(file).arg(folder);

    let output = command.output().map_err(|_| {
        BackupError::S3CopyError(S3CopyError {
            src: file.to_owned(),
            dest: folder.to_owned(),
            std_err: "Failed to execute command".to_owned(),
            std_out: "".to_owned(),
        })
    })?;

    if !output.status.success() {
        let std_err = String::from_utf8_lossy(&output.stderr).to_string();
        let stdout = String::from_utf8_lossy(&output.stdout).to_string();

        return Err(BackupError::S3CopyError(S3CopyError {
            src: file.to_owned(),
            dest: folder.to_owned(),
            std_err,
            std_out: stdout,
        })
        .into());
    }

    Ok(format!("Copied file: {file} to {folder}"))
}

fn zip(file: &str, folder: &str, exclude_list: &[String]) -> AppResult<Option<String>> {
    let mut command = Command::new("zip");
    // Note: we deliberately do NOT pass -q here. In quiet mode some zip builds
    // omit the offending path from the "could not open for reading" warnings,
    // which is exactly the detail we need to tune exclude_files. The per-file
    // "adding:" chatter goes to stdout, which we capture but ignore unless the
    // run hard-fails.
    command.arg("-r").arg(file).arg(folder);

    for exclude in exclude_list {
        let mut exclude_arg = OsString::from("--exclude=");
        exclude_arg.push(exclude);
        command.arg(exclude_arg);
    }

    let output = command
        .output()
        .map_err(|e| BackupError::ZipError(format!("Failed to execute command: {e}")))?;

    if output.status.success() {
        return Ok(None);
    }

    let code = output.status.code();
    let std_err = String::from_utf8_lossy(&output.stderr).to_string();
    let std_out = String::from_utf8_lossy(&output.stdout).to_string();

    // zip exit code 12 ("nothing to do") and 18 ("could not open a file for
    // reading") are expected on a live server: files routinely vanish between
    // zip's scan and read passes, and a valid archive is still produced. Treat
    // them as warnings and let the upload proceed, but log exactly which files
    // were skipped so the exclude_files list can be tuned.
    if matches!(code, Some(12) | Some(18)) {
        let archive_ok = std::fs::metadata(file).map(|m| m.len() > 0).unwrap_or(false);
        if archive_ok {
            let skipped = collect_skipped_files(&std_err);
            let summary = summarize_skipped(folder, &skipped, code.unwrap_or_default());
            // Mirror to stderr so the detail lands in the Forge/cron log too.
            eprintln!("{summary}");
            return Ok(Some(summary));
        }
    }

    Err(BackupError::ZipError(format!(
        "zip failed with status {code:?}\nstdout: \n{std_out}\nstderr:\n{std_err}"
    ))
    .into())
}

/// Pull the offending paths out of zip's stderr. zip emits one warning line per
/// unreadable file; the exact wording varies by version, so we keep any warning
/// line and try to isolate the path within it.
fn collect_skipped_files(std_err: &str) -> Vec<String> {
    let mut seen = std::collections::BTreeSet::new();
    for line in std_err.lines() {
        let line = line.trim();
        if line.is_empty() || !line.contains("warning") {
            continue;
        }
        seen.insert(extract_path(line).unwrap_or(line).to_string());
    }
    seen.into_iter().collect()
}

/// Best-effort extraction of the file path from a zip warning line such as
/// `zip warning: could not open for reading: /path/to/file: No such file or directory`.
/// Backups use absolute home paths, so the path segment is the one starting with `/`.
fn extract_path(line: &str) -> Option<&str> {
    let rest = line.strip_prefix("zip warning:")?.trim();
    rest.split(": ")
        .map(str::trim)
        .filter(|seg| seg.starts_with('/'))
        .max_by_key(|seg| seg.len())
}

fn summarize_skipped(folder: &str, skipped: &[String], code: i32) -> String {
    const MAX_LISTED: usize = 40;
    let mut out = format!(
        "zip completed with warnings (exit {code}) for {folder}: {} file(s) skipped because they could not be read (vanished or unreadable during backup).",
        skipped.len()
    );
    if skipped.is_empty() {
        return out;
    }
    out.push_str("\n  Skipped paths (tune exclude_files to silence these):");
    for path in skipped.iter().take(MAX_LISTED) {
        out.push_str("\n    - ");
        out.push_str(path);
    }
    if skipped.len() > MAX_LISTED {
        out.push_str(&format!(
            "\n    ... and {} more",
            skipped.len() - MAX_LISTED
        ));
    }
    out
}

fn ensure_directory_exists<P: AsRef<Path>>(path: P) -> AppResult<()> {
    if !path.as_ref().exists() {
        std::fs::create_dir_all(&path)
            .map_err(|_| BackupError::MakeDirectoryError(path.as_ref().to_path_buf()))?;
    }

    Ok(())
}
fn get_home_directories<P: AsRef<Path> + Copy>(path: P) -> AppResult<Vec<String>> {
    Ok(std::fs::read_dir(path)
        .map_err(|_| BackupError::DirectoryReadError(path.as_ref().to_path_buf()))?
        .filter_map(|entry| {
            entry.ok().and_then(|e| {
                if e.path().is_dir() {
                    e.path()
                        .file_name()
                        .and_then(|name| name.to_str().map(|s| s.to_string()))
                } else {
                    None
                }
            })
        })
        .collect::<Vec<String>>())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extracts_path_from_standard_warning() {
        let line = "zip warning: could not open for reading: /home/forge/app/storage/framework/sessions/abc123: No such file or directory";
        assert_eq!(
            extract_path(line),
            Some("/home/forge/app/storage/framework/sessions/abc123")
        );
    }

    #[test]
    fn returns_none_when_no_path_present() {
        assert_eq!(extract_path("zip warning: No such file or directory"), None);
    }

    #[test]
    fn collect_dedupes_and_falls_back_to_full_line() {
        let std_err = "\tzip warning: could not open for reading: /home/forge/a/cache.tmp: No such file or directory\n\
             \tzip warning: could not open for reading: /home/forge/a/cache.tmp: No such file or directory\n\
             \tzip warning: No such file or directory\n";
        let skipped = collect_skipped_files(std_err);
        // One unique path + one path-less line that falls back to the whole line.
        assert_eq!(skipped.len(), 2);
        assert!(skipped.iter().any(|s| s == "/home/forge/a/cache.tmp"));
    }
}