avml 0.18.0

A portable volatile memory acquisition tool
Documentation
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

use crate::snapshot::{Error, Result};
use core::{mem::zeroed, num::NonZeroU64, ops::Range};
use std::{ffi::CString, os::unix::ffi::OsStrExt as _, path::Path};

/// Assume roughly 100k per block extra overhead
///
/// This should account for the header used by `LiME` chunks as well as
/// potential increase in storage from compression, as Snappy does not guarantee
/// compression results in fewer bytes.
const EXTRA_PADDING: u64 = 1024 * 100;
const EXCESSIVE_VALUE: u64 = 4_000_000_000_000_000_000;

#[derive(Debug)]
struct DiskUsage {
    pub total: u64,
    pub used: u64,
}

/// Check an *estimate* of the disk usage of a snapshot
///
/// This is a best effort attempt to estimate the disk usage of a snapshot and
/// validate the snapshot will fit in the configured parameters.
pub fn check(
    image_path: &Path,
    memory_ranges: &[Range<u64>],
    max_disk_usage: Option<NonZeroU64>,
    max_disk_usage_percentage: Option<f64>,
) -> Result<()> {
    let estimate_add = estimate(memory_ranges);

    if let Some(max_disk_usage) = max_disk_usage {
        check_max_usage(estimate_add, max_disk_usage)?;
    }

    if let Some(max_disk_usage_percentage) = max_disk_usage_percentage {
        let disk_usage = disk_usage(image_path)?;
        check_max_usage_percentage(estimate_add, &disk_usage, max_disk_usage_percentage)?;
    }

    Ok(())
}

fn check_max_usage(estimated: u64, max_disk_usage: NonZeroU64) -> Result<()> {
    // convert to MB
    let allowed = max_disk_usage
        .get()
        .saturating_mul(1024)
        .saturating_mul(1024);

    if estimated > allowed {
        return Err(Error::DiskUsageEstimateExceeded { estimated, allowed });
    }
    Ok(())
}

fn check_max_usage_percentage(
    estimated: u64,
    disk_usage: &DiskUsage,
    max_disk_usage_percentage: f64,
) -> Result<()> {
    let estimated_used = disk_usage.used.saturating_add(estimated);

    // assuming the disk was empty, how much could we use
    let max_allowed =
        f64_to_u64(u64_to_f64(disk_usage.total)? * (max_disk_usage_percentage / 100.0))?;

    if estimated_used > max_allowed {
        let allowed = max_allowed.saturating_sub(disk_usage.used);
        return Err(Error::DiskUsageEstimateExceeded { estimated, allowed });
    }

    Ok(())
}

/// Attempt to convert u64 into f64
///
/// Of note: The maximum value works out to be more than 4 exabytes, which is *way* more than any
/// memory or disk we're likely to see any time soon.
///
/// `TryInto<f64> for u64` is not implemented.  This tries to be mindful of the
/// following edge condition:
/// 1. The value must be less than or equal to the const `EXCESSIVE_VALUE`
#[expect(
    clippy::cast_precision_loss,
    clippy::as_conversions,
    reason = "value is bounded by EXCESSIVE_VALUE (4 EB) which is below f64 precision limits"
)]
fn u64_to_f64(value: u64) -> Result<f64> {
    if value > EXCESSIVE_VALUE {
        return Err(Error::Other {
            context: "unable to convert u64 to f64",
            detail: format!("value is too large to convert to f64: {value}"),
        });
    }
    Ok(value as f64)
}

/// Attempt to convert f64 into u64
///
/// `TryInto<u64> for f64` is not implemented.  This tries to be mindful of the
/// following edge conditions:
/// 1. The value must be a signed positive value
/// 2. The value is explicitly truncated and clamped to the integer value
#[expect(
    clippy::cast_sign_loss,
    clippy::cast_possible_truncation,
    clippy::as_conversions,
    reason = "sign guarded by is_sign_positive check; truncation is explicit"
)]
fn f64_to_u64(value: f64) -> Result<u64> {
    if !value.is_sign_positive() {
        return Err(Error::Other {
            context: "unable to convert f64 to u64",
            detail: format!("value is not a positive f64: {value}"),
        });
    }
    Ok(value.trunc() as u64)
}

/// Estimate potential disk usage for a given set of memory ranges
fn estimate(ranges: &[Range<u64>]) -> u64 {
    let mut total: u64 = 0;
    for range in ranges {
        let chunk_size = range.end.saturating_sub(range.start);
        total = total
            .saturating_add(chunk_size)
            .saturating_add(EXTRA_PADDING);
    }
    total
}

fn disk_usage(path: &Path) -> Result<DiskUsage> {
    let path_as_cstr = CString::new(path.as_os_str().as_bytes()).map_err(|e| Error::Other {
        context: "unable to convert path to CString",
        detail: e.to_string(),
    })?;

    // SAFETY: this is the only way to initialize the statfs64 struct
    let mut statfs: libc::statfs64 = unsafe { zeroed() };
    // SAFETY: calling statfs64 is ok as long as the path is valid
    let ret = unsafe { libc::statfs64(path_as_cstr.as_ptr(), &raw mut statfs) };

    if ret < 0 {
        return Err(Error::Disk(std::io::Error::last_os_error()));
    }

    let f_bsize: u64 = statfs.f_bsize.try_into().map_err(|e| Error::Other {
        context: "unable to identify block size",
        detail: format!("{e}"),
    })?;

    let total = statfs.f_blocks.saturating_mul(f_bsize);
    let free = statfs.f_bavail.saturating_mul(f_bsize);
    let used = total.saturating_sub(free);

    let result = DiskUsage { total, used };

    Ok(result)
}

#[cfg(test)]
mod tests {
    #![expect(
        clippy::assertions_on_result_states,
        reason = "tests intentionally assert on Result variants via is_err()"
    )]

    use super::*;

    const EXCESSIVE_VALUE_F64: f64 = 4_000_000_000_000_000_000.0;

    #[test]
    fn test_disk_usage() -> Result<()> {
        let current_exe = std::env::current_exe().map_err(|e| Error::Other {
            context: "unable to get current exe",
            detail: e.to_string(),
        })?;
        // check that we can get disk usage for at least one file system, here
        // we check file system that the current exe resides on
        let result = disk_usage(&current_exe)?;

        // We can't really test this, but we can at least make sure it's not zero
        assert!(result.total > 0);

        // Since we know that at least this executable should reside on this
        // disk, we can at least check we have some disk usage from that executable alone
        assert!(result.used > 0);

        Ok(())
    }

    // test f64<->u64
    //
    // This tests values up to 4EB, which is larger than we can reasonably
    // expect to see any time soon.
    #[test]
    fn test_conversion() -> Result<()> {
        // validate our assumptions that this should always reduce down to u64::MAX
        assert_eq!(f64_to_u64(f64::MAX - 1.0)?, u64::MAX);

        f64_to_u64(0.0)?;
        assert!(f64_to_u64(-0.1).is_err());
        assert_eq!(f64_to_u64(EXCESSIVE_VALUE_F64)?, EXCESSIVE_VALUE);

        // note: testing equality of floating point values is tricky.
        //
        // these follow the guidelines set by `clippy::float_cmp`
        assert!((u64_to_f64(0)? - 0.0).abs() < f64::EPSILON);
        assert!((u64_to_f64(EXCESSIVE_VALUE)? - EXCESSIVE_VALUE_F64).abs() < f64::EPSILON);

        Ok(())
    }

    #[test]
    fn test_estimates() {
        insta::assert_json_snapshot!(estimate(&[0..100, 100..200, 200..300]));
        insta::assert_json_snapshot!(estimate(&[
            0..1024 * 1024,
            (1024 * 1024) + 10..(1024 * 1024 * 1024)
        ]));
    }

    #[test]
    fn test_check_max_usable() -> Result<()> {
        let ten = NonZeroU64::new(10).ok_or(Error::Other {
            context: "unable to create NonZeroU64",
            detail: String::new(),
        })?;
        check_max_usage(1, ten)?;
        check_max_usage(10, ten)?;
        assert!(check_max_usage(11 * 1024 * 1024, ten).is_err());
        Ok(())
    }

    // Testing where we are using disk percentage is more difficult as we don't
    // know how much disk space is available on CICD apriori.
    //
    // Instead we have to provide pre-computed values for estimates and disk
    // usage to the underlying check function.
    #[test]
    fn test_check_max_usage_percentage() -> Result<()> {
        // usage should be well below allowed %
        check_max_usage_percentage(
            10,
            &DiskUsage {
                total: 1000,
                used: 0,
            },
            10.0,
        )?;

        // usage should just at the allowed value
        check_max_usage_percentage(
            1,
            &DiskUsage {
                total: 1000,
                used: 99,
            },
            10.0,
        )?;

        // disk is already past the max allowed, should fail even with a tiny addition
        assert!(
            check_max_usage_percentage(
                1,
                &DiskUsage {
                    total: 1000,
                    used: 910
                },
                10.0
            )
            .is_err()
        );

        Ok(())
    }
}