file-with-meta 0.2.0

store a file's metadata for caching purposes
Documentation
#![warn(missing_docs)]
/*
 * Copyright (c) 2021, 2022  Peter Pentchev <roam@ringlet.net>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
//! Store a file's metadata for caching purposes.
//!
//! The [`FileHttpMetadata`] structure may be serialized and
//! stored in a JSON file alongside the real file, e.g. one with
//! ".meta" appended to the file name. Then either the [`match_meta`]
//! function may be used directly, or the [`build_req`] one may be
//! used to modify an HTTP request, adding the necessary headers to
//! make sure that the file is not downloaded if there have been
//! no changes on the remote server.
//!
//! Example for checking whether a file needs to be downloaded:
//! ```rust
//! # use std::error::Error;
//! use std::fs::{self, File};
//! # use std::io::{self, BufWriter, Read, Write};
//! # use std::path::Path;
//!
//! # #[cfg(feature = "ureq")]
//! # fn main() -> Result<(), Box<dyn Error>> {
//! # let agent = ureq::agent();
//! # let tempd_obj = tempfile::tempdir()?;
//! # let destdir: &Path = tempd_obj.as_ref();
//! let dst = destdir.join("data.json");
//! let dst_meta = destdir.join("data.json.meta");
//! let (req, stored_meta) = file_with_meta::build_req(
//!     agent.get("https://example.com/"),
//!     &dst,
//!     &dst_meta,
//! )?;
//! let resp = req.call()?;
//! match resp.status() {
//!     304 => println!("Nothing was fetched"),
//!     _ => {
//!         println!("Storing the content");
//!         /* ... */
//! #         let mut reader = resp.into_reader();
//! #         let mut outfile = File::create(&dst)?;
//! #         let mut writer = BufWriter::new(&outfile);
//! #         loop {
//! #             let mut buf = [0; 8192];
//! #             let n = reader.read(&mut buf[..])?;
//! #             if n == 0 {
//! #                 break;
//! #             }
//! #             writer.write_all(&buf[..n])?;
//! #         }
//! #         writer.flush()?;
//! #         outfile.sync_all()?;
//!
//!         println!("Updating the file's metadata");
//!         let meta = file_with_meta::FileHttpMetadata::from_file(&dst)?;
//!         fs::write(&dst_meta, serde_json::to_string(&meta).unwrap())?;
//!     }
//! };
//! # Ok(())
//! # }
//! # #[cfg(not(feature = "ureq"))]
//! # fn main() {
//! # }
//! ```
//!
//! Example for checking whether a file has changed since its metadata
//! was last updated:
//! ```rust
//! let dst = "/path/to/file.dat";
//! let dst_meta = "/path/to/file.dat.meta";
//!
//! match file_with_meta::match_meta(&dst, &dst_meta)?.is_some() {
//!     true => println!("No change"),
//!     false => println!("Somebody touched our file, recreate it?"),
//! };
//! # Ok::<_, file_with_meta::Error>(())
//! ```
//!
//! The [`match_meta_with_source`] function may be used to additionally
//! make sure that a "source" file has not been modified since this file
//! was last generated from its data.

#![doc(html_root_url = "https://docs.rs/file-with-meta/0.2.0")]
use std::fs::{self, Metadata};
use std::io::Error as IoError;
use std::path::Path;
use std::time::SystemTime;

use serde::{Deserialize, Serialize};
use serde_json::Error as SJError;
use thiserror::Error;

#[cfg(feature = "ureq")]
use ureq::Request;

#[cfg(test)]
mod tests;

/// An error that occurred during processing the metadata.
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum Error {
    /// Could not examine a local file.
    #[error("Could not examine {0}")]
    Examine(String, #[source] IoError),

    /// Unexpected format major version in the metadata JSON structure.
    #[error("Unsupported format major version {0}")]
    FormatVersionMajor(u32),

    /// Something went really, really wrong...
    #[error("file-with-meta internal error: {0}")]
    Internal(String),

    /// Could not parse the metadata JSON structure.
    #[error("Could not parse the metadata")]
    Parse(SJError),
}

/// The version of the format of the serialized metadata.
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct MetadataFormatVersion {
    /// The major version number; bumped when a field is removed or
    /// its type is changed.
    major: u32,
    /// The minor version number; bumped when a new field is added.
    minor: u32,
}

impl Default for MetadataFormatVersion {
    /// The default format version is the most recent one.
    fn default() -> Self {
        Self { major: 0, minor: 1 }
    }
}

/// Information about the format of the JSON-serialized metadata.
#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct MetadataFormat {
    /// The version of the metadata format, currently 0.x.
    version: MetadataFormatVersion,
}

#[derive(Debug, Serialize, Deserialize)]
struct MetadataTopLevelFormatOnly {
    format: MetadataFormat,
}

/// Information about a single file's last modification time and,
/// if specified, some relevant HTTP headers returned by the server
/// that the file was fetched from.
#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
#[non_exhaustive]
pub struct FileHttpMetadata {
    /// The version of the metadata as stored in a JSON string.
    pub format: MetadataFormat,
    /// The size of the file.
    pub file_size: u64,
    /// The modification time of the file as a Unix timestamp.
    pub file_mtime: u64,
    /// The "Last-Modified" header as returned by an HTTP server.
    pub hdr_last_modified: Option<String>,
    /// The "ETag" header as returned by an HTTP server.
    pub hdr_etag: Option<String>,
    /// The size of the source file if applicable.
    pub source_file_size: Option<u64>,
    /// The modification time of the source file if applicable.
    pub source_file_mtime: Option<u64>,
    /// A hook for external users to store information about whether
    /// the file's contents has been validated.
    pub verified: bool,
}

impl FileHttpMetadata {
    /// Examine an existing file and return a metadata structure
    /// recording its size and last modification time.
    ///
    /// # Errors
    ///
    /// [`Error::Examine`] if the file cannot be examined.
    pub fn from_file<P>(path: P) -> Result<Self, Error>
    where
        P: AsRef<Path>,
    {
        match fs::metadata(&path) {
            Ok(meta) => Ok(Self {
                file_size: meta.len(),
                file_mtime: mtime_to_unix(&meta)?,
                ..Self::default()
            }),
            Err(err) => Err(Error::Examine(path.as_ref().display().to_string(), err)),
        }
    }

    /// Examine an existing file and return a metadata structure
    /// recording its size and last modification time, as well as
    /// that of the specified "source" file.
    ///
    /// # Errors
    ///
    /// Propagates errors from [`Self::from_file`] and [`mtime_to_unix`].
    /// [`Error::Examine`] if the source file cannot be examined.
    pub fn from_file_with_source<P1, P2>(path: P1, src: P2) -> Result<Self, Error>
    where
        P1: AsRef<Path>,
        P2: AsRef<Path>,
    {
        let meta = Self::from_file(path)?;
        match fs::metadata(&src) {
            Ok(src_meta) => Ok(Self {
                source_file_size: Some(src_meta.len()),
                source_file_mtime: Some(mtime_to_unix(&src_meta)?),
                ..meta
            }),
            Err(err) => Err(Error::Examine(src.as_ref().display().to_string(), err)),
        }
    }

    /// Examine an existing file and return a metadata structure
    /// recording its size and last modification time, as well as
    /// the previously-stored one for a "source" file.
    ///
    /// # Errors
    ///
    /// Propagates errors from [`Self::from_file`].
    pub fn from_file_with_source_meta<P>(path: P, src_meta: &Self) -> Result<Self, Error>
    where
        P: AsRef<Path>,
    {
        let meta = Self::from_file(path)?;
        Ok(Self {
            source_file_size: Some(src_meta.file_size),
            source_file_mtime: Some(src_meta.file_mtime),
            ..meta
        })
    }

    /// Parse a metadata structure from the supplied JSON string.
    /// Verify the version specified in the "format" element, do not
    /// even attempt to parse unknown versions.
    ///
    /// # Errors
    ///
    /// [`Error::Parse`] if the JSON data cannot be parsed.
    /// [`Error::FormatVersionMajor`] on unexpected format.version.major values.
    pub fn parse(contents: &str) -> Result<Self, Error> {
        let header =
            serde_json::from_str::<MetadataTopLevelFormatOnly>(contents).map_err(Error::Parse)?;
        match header.format.version.major {
            0 => serde_json::from_str::<Self>(contents).map_err(Error::Parse),
            _ => Err(Error::FormatVersionMajor(header.format.version.major)),
        }
    }
}

/// Unwrap a [`Metadata`] object's last modified timestamp,
/// assume it may be converted to a Unix timestamp, and return
/// the number of seconds since the Unix epoch.
///
/// # Errors
///
/// [`Error::Internal`] if the mtime cannot be fetche out of the metadata or
/// it cannot be converted to a duration since the Unix epoch.
pub fn mtime_to_unix(metadata: &Metadata) -> Result<u64, Error> {
    Ok(metadata
        .modified()
        .map_err(|err| {
            Error::Internal(format!(
                "Could not get the mtime from {:?}: {}",
                metadata, err
            ))
        })?
        .duration_since(SystemTime::UNIX_EPOCH)
        .map_err(|err| {
            Error::Internal(format!(
                "Could not get a Unix epoch timestamp from the 'modified' time in {:?}: {}",
                metadata, err
            ))
        })?
        .as_secs())
}

/// Verify that a file has not been changed since the last time
/// the metadata was stored.
///
/// # Errors
///
/// Propagates errors from [`mtime_to_unix`].
// No need for MSRV 1.62
#[allow(clippy::unnecessary_lazy_evaluations)]
pub fn match_meta<P1, P2>(dst: P1, dst_meta: P2) -> Result<Option<FileHttpMetadata>, Error>
where
    P1: AsRef<Path>,
    P2: AsRef<Path>,
{
    if let Ok(file_meta) = fs::metadata(&dst) {
        if let Ok(contents) = fs::read_to_string(&dst_meta) {
            if let Ok(meta) = FileHttpMetadata::parse(&contents) {
                return Ok((file_meta.is_file()
                    && file_meta.len() == meta.file_size
                    && mtime_to_unix(&file_meta)? == meta.file_mtime)
                    .then(|| meta));
            }
        }
    }
    Ok(None)
}

/// Verify that a file has not been changed, and additionally verify
/// that its source file, specified by the `src` local path, has
/// also not been changed. Useful when e.g. uncompressing or otherwise
/// processing downloaded files.
///
/// # Errors
///
/// Propagates errors from [`match_meta`].
// No need for MSRV 1.62
#[allow(clippy::unnecessary_lazy_evaluations)]
pub fn match_meta_with_source<P1, P2, P3>(
    dst: P1,
    dst_meta: P2,
    src: P3,
) -> Result<Option<FileHttpMetadata>, Error>
where
    P1: AsRef<Path>,
    P2: AsRef<Path>,
    P3: AsRef<Path>,
{
    if let Some(meta) = match_meta(dst, dst_meta)? {
        Ok(match fs::metadata(src) {
            Ok(src_meta) => {
                let src_len = src_meta.len();
                if meta.source_file_size.unwrap_or(src_len) == src_len {
                    let src_mtime = mtime_to_unix(&src_meta)?;
                    (meta.source_file_mtime.unwrap_or(src_mtime) == src_mtime).then(|| meta)
                } else {
                    None
                }
            }
            Err(_) => {
                (meta.source_file_size.is_none() && meta.source_file_mtime.is_none()).then(|| meta)
            }
        })
    } else {
        Ok(None)
    }
}

#[cfg(feature = "ureq")]
#[allow(clippy::doc_markdown)]
/// Add the "If-Modified-Since" and/or "If-None-Match" headers to
/// an HTTP request if the relevant fields ("Last-Modified" and "ETag"
/// respectively) have been returned in the last response from
/// the server when the file has been downloaded.
///
/// # Errors
///
/// Propagates errors from [`match_meta`].
pub fn build_req<P1, P2>(
    orig_req: Request,
    dst: P1,
    dst_meta: P2,
) -> Result<(Request, Option<FileHttpMetadata>), Error>
where
    P1: AsRef<Path>,
    P2: AsRef<Path>,
{
    let stored_meta = match_meta(dst, dst_meta)?;

    let req = match stored_meta {
        None => orig_req,
        Some(ref meta) => match meta.hdr_etag {
            Some(ref etag) => orig_req.set("If-None-Match", etag),
            None => match meta.hdr_last_modified {
                Some(ref last_modified) => orig_req.set("If-Modified-Since", last_modified),
                None => orig_req,
            },
        },
    };
    Ok((req, stored_meta))
}