file_with_meta/
lib.rs

1#![warn(missing_docs)]
2// SPDX-FileCopyrightText: Peter Pentchev <roam@ringlet.net>
3// SPDX-License-Identifier: BSD-2-Clause
4//! Store a file's metadata for caching purposes.
5//!
6//! The [`FileHttpMetadata`] structure may be serialized and
7//! stored in a JSON file alongside the real file, e.g. one with
8//! ".meta" appended to the file name. Then either the [`match_meta`]
9//! function may be used directly, or the [`build_req`] one may be
10//! used to modify an HTTP request, adding the necessary headers to
11//! make sure that the file is not downloaded if there have been
12//! no changes on the remote server.
13//!
14//! Example for checking whether a file needs to be downloaded:
15//! ```rust
16//! # use std::error::Error;
17//! use std::fs::{self, File};
18//! # use std::io::{self, BufWriter, Read, Write};
19//! # use std::path::Path;
20//!
21//! # use anyhow::{Context as _, Result};
22//! # use camino::Utf8Path;
23//!
24//! # #[cfg(feature = "ureq")]
25//! # fn main() -> Result<()> {
26//! # let agent = ureq::agent();
27//! # let tempd_obj = tempfile::tempdir()?;
28//! # let destdir: &Utf8Path = tempd_obj
29//! #     .as_ref()
30//! #     .try_into()
31//! #     .with_context(|| format!("Could not represent {tempd_obj:?} as a UTF-8 path"))?;
32//! let dst = destdir.join("data.json");
33//! let dst_meta = destdir.join("data.json.meta");
34//! let (req, stored_meta) = file_with_meta::build_req(
35//!     agent.get("https://example.com/"),
36//!     &dst,
37//!     &dst_meta,
38//! )?;
39//! let resp = req.call()?;
40//! match resp.status() {
41//!     304 => println!("Nothing was fetched"),
42//!     _ => {
43//!         println!("Storing the content");
44//!         /* ... */
45//! #         let mut reader = resp.into_reader();
46//! #         let mut outfile = File::create(&dst)?;
47//! #         let mut writer = BufWriter::new(&outfile);
48//! #         loop {
49//! #             let mut buf = [0; 8192];
50//! #             let n = reader.read(&mut buf[..])?;
51//! #             if n == 0 {
52//! #                 break;
53//! #             }
54//! #             writer.write_all(&buf[..n])?;
55//! #         }
56//! #         writer.flush()?;
57//! #         outfile.sync_all()?;
58//!
59//!         println!("Updating the file's metadata");
60//!         let meta = file_with_meta::FileHttpMetadata::from_file(&dst)?;
61//!         fs::write(&dst_meta, serde_json::to_string(&meta).unwrap())?;
62//!     }
63//! };
64//! # Ok(())
65//! # }
66//! # #[cfg(not(feature = "ureq"))]
67//! # fn main() {
68//! # }
69//! ```
70//!
71//! Example for checking whether a file has changed since its metadata
72//! was last updated:
73//! ```rust
74//! let dst = "/path/to/file.dat";
75//! let dst_meta = "/path/to/file.dat.meta";
76//!
77//! match file_with_meta::match_meta(&dst, &dst_meta)?.is_some() {
78//!     true => println!("No change"),
79//!     false => println!("Somebody touched our file, recreate it?"),
80//! };
81//! # Ok::<_, file_with_meta::Error>(())
82//! ```
83//!
84//! The [`match_meta_with_source`] function may be used to additionally
85//! make sure that a "source" file has not been modified since this file
86//! was last generated from its data.
87
88#![doc(html_root_url = "https://docs.rs/file-with-meta/0.3.0")]
89use std::fs::{self, Metadata};
90use std::io::Error as IoError;
91use std::time::SystemTime;
92
93use camino::Utf8Path;
94use serde_derive::{Deserialize, Serialize};
95use serde_json::Error as SJError;
96use thiserror::Error;
97
98#[cfg(feature = "ureq")]
99use ureq::Request;
100
101#[cfg(test)]
102mod tests;
103
104/// An error that occurred during processing the metadata.
105#[derive(Debug, Error)]
106#[non_exhaustive]
107#[allow(clippy::error_impl_error)]
108pub enum Error {
109    /// Could not examine a local file.
110    #[error("Could not examine {0}")]
111    Examine(String, #[source] IoError),
112
113    /// Unexpected format major version in the metadata JSON structure.
114    #[error("Unsupported format major version {0}")]
115    FormatVersionMajor(u32),
116
117    /// Something went really, really wrong...
118    #[error("file-with-meta internal error: {0}")]
119    Internal(String),
120
121    /// Could not parse the metadata JSON structure.
122    #[error("Could not parse the metadata")]
123    Parse(SJError),
124}
125
126/// The version of the format of the serialized metadata.
127#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
128pub struct MetadataFormatVersion {
129    /// The major version number; bumped when a field is removed or
130    /// its type is changed.
131    major: u32,
132    /// The minor version number; bumped when a new field is added.
133    minor: u32,
134}
135
136impl Default for MetadataFormatVersion {
137    /// The default format version is the most recent one.
138    fn default() -> Self {
139        Self { major: 0, minor: 1 }
140    }
141}
142
143/// Information about the format of the JSON-serialized metadata.
144#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
145pub struct MetadataFormat {
146    /// The version of the metadata format, currently 0.x.
147    version: MetadataFormatVersion,
148}
149
150#[derive(Debug, Serialize, Deserialize)]
151struct MetadataTopLevelFormatOnly {
152    format: MetadataFormat,
153}
154
155/// Information about a single file's last modification time and,
156/// if specified, some relevant HTTP headers returned by the server
157/// that the file was fetched from.
158#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
159#[non_exhaustive]
160pub struct FileHttpMetadata {
161    /// The version of the metadata as stored in a JSON string.
162    pub format: MetadataFormat,
163    /// The size of the file.
164    pub file_size: u64,
165    /// The modification time of the file as a Unix timestamp.
166    pub file_mtime: u64,
167    /// The `Last-Modified` header as returned by an HTTP server.
168    pub hdr_last_modified: Option<String>,
169    /// The `ETag` header as returned by an HTTP server.
170    pub hdr_etag: Option<String>,
171    /// The size of the source file if applicable.
172    pub source_file_size: Option<u64>,
173    /// The modification time of the source file if applicable.
174    pub source_file_mtime: Option<u64>,
175    /// A hook for external users to store information about whether
176    /// the file's contents has been validated.
177    pub verified: bool,
178}
179
180impl FileHttpMetadata {
181    /// Examine an existing file and return a metadata structure
182    /// recording its size and last modification time.
183    ///
184    /// # Errors
185    ///
186    /// [`Error::Examine`] if the file cannot be examined.
187    pub fn from_file<P>(path: P) -> Result<Self, Error>
188    where
189        P: AsRef<Utf8Path>,
190    {
191        let meta = fs::metadata(path.as_ref())
192            .map_err(|err| Error::Examine(path.as_ref().to_string(), err))?;
193        Ok(Self {
194            file_size: meta.len(),
195            file_mtime: mtime_to_unix(&meta)?,
196            ..Self::default()
197        })
198    }
199
200    /// Examine an existing file and return a metadata structure
201    /// recording its size and last modification time, as well as
202    /// that of the specified "source" file.
203    ///
204    /// # Errors
205    ///
206    /// Propagates errors from [`Self::from_file`] and [`mtime_to_unix`].
207    /// [`Error::Examine`] if the source file cannot be examined.
208    pub fn from_file_with_source<P1, P2>(path: P1, src: P2) -> Result<Self, Error>
209    where
210        P1: AsRef<Utf8Path>,
211        P2: AsRef<Utf8Path>,
212    {
213        let meta = Self::from_file(path)?;
214        let src_meta = fs::metadata(src.as_ref())
215            .map_err(|err| Error::Examine(src.as_ref().to_string(), err))?;
216        Ok(Self {
217            source_file_size: Some(src_meta.len()),
218            source_file_mtime: Some(mtime_to_unix(&src_meta)?),
219            ..meta
220        })
221    }
222
223    /// Examine an existing file and return a metadata structure
224    /// recording its size and last modification time, as well as
225    /// the previously-stored one for a "source" file.
226    ///
227    /// # Errors
228    ///
229    /// Propagates errors from [`Self::from_file`].
230    pub fn from_file_with_source_meta<P>(path: P, src_meta: &Self) -> Result<Self, Error>
231    where
232        P: AsRef<Utf8Path>,
233    {
234        let meta = Self::from_file(path)?;
235        Ok(Self {
236            source_file_size: Some(src_meta.file_size),
237            source_file_mtime: Some(src_meta.file_mtime),
238            ..meta
239        })
240    }
241
242    /// Parse a metadata structure from the supplied JSON string.
243    /// Verify the version specified in the "format" element, do not
244    /// even attempt to parse unknown versions.
245    ///
246    /// # Errors
247    ///
248    /// [`Error::Parse`] if the JSON data cannot be parsed.
249    /// [`Error::FormatVersionMajor`] on unexpected format.version.major values.
250    pub fn parse(contents: &str) -> Result<Self, Error> {
251        let header =
252            serde_json::from_str::<MetadataTopLevelFormatOnly>(contents).map_err(Error::Parse)?;
253        match header.format.version.major {
254            0 => serde_json::from_str::<Self>(contents).map_err(Error::Parse),
255            _ => Err(Error::FormatVersionMajor(header.format.version.major)),
256        }
257    }
258}
259
260/// Unwrap a [`Metadata`] object's last modified timestamp,
261/// assume it may be converted to a Unix timestamp, and return
262/// the number of seconds since the Unix epoch.
263///
264/// # Errors
265///
266/// [`Error::Internal`] if the mtime cannot be fetche out of the metadata or
267/// it cannot be converted to a duration since the Unix epoch.
268pub fn mtime_to_unix(metadata: &Metadata) -> Result<u64, Error> {
269    Ok(metadata
270        .modified()
271        .map_err(|err| {
272            Error::Internal(format!("Could not get the mtime from {metadata:?}: {err}"))
273        })?
274        .duration_since(SystemTime::UNIX_EPOCH)
275        .map_err(|err| {
276            Error::Internal(format!(
277                "Could not get a Unix timestamp from the 'modified' time in {metadata:?}: {err}"
278            ))
279        })?
280        .as_secs())
281}
282
283/// Verify that a file has not been changed since the last time
284/// the metadata was stored.
285///
286/// # Errors
287///
288/// Propagates errors from [`mtime_to_unix`].
289// No need for MSRV 1.62
290pub fn match_meta<P1, P2>(dst: P1, dst_meta: P2) -> Result<Option<FileHttpMetadata>, Error>
291where
292    P1: AsRef<Utf8Path>,
293    P2: AsRef<Utf8Path>,
294{
295    if let Ok(file_meta) = fs::metadata(dst.as_ref()) {
296        if let Ok(contents) = fs::read_to_string(dst_meta.as_ref()) {
297            if let Ok(meta) = FileHttpMetadata::parse(&contents) {
298                return Ok((file_meta.is_file()
299                    && file_meta.len() == meta.file_size
300                    && mtime_to_unix(&file_meta)? == meta.file_mtime)
301                    .then_some(meta));
302            }
303        }
304    }
305    Ok(None)
306}
307
308/// Verify that a file has not been changed, and additionally verify
309/// that its source file, specified by the `src` local path, has
310/// also not been changed.
311///
312/// Useful when e.g. uncompressing or otherwise processing downloaded files.
313///
314/// # Errors
315///
316/// Propagates errors from [`match_meta`].
317// No need for MSRV 1.62
318pub fn match_meta_with_source<P1, P2, P3>(
319    dst: P1,
320    dst_meta: P2,
321    src: P3,
322) -> Result<Option<FileHttpMetadata>, Error>
323where
324    P1: AsRef<Utf8Path>,
325    P2: AsRef<Utf8Path>,
326    P3: AsRef<Utf8Path>,
327{
328    if let Some(meta) = match_meta(dst, dst_meta)? {
329        Ok(match fs::metadata(src.as_ref()) {
330            Ok(src_meta) => {
331                let src_len = src_meta.len();
332                if meta.source_file_size.unwrap_or(src_len) == src_len {
333                    let src_mtime = mtime_to_unix(&src_meta)?;
334                    (meta.source_file_mtime.unwrap_or(src_mtime) == src_mtime).then_some(meta)
335                } else {
336                    None
337                }
338            }
339            Err(_) => (meta.source_file_size.is_none() && meta.source_file_mtime.is_none())
340                .then_some(meta),
341        })
342    } else {
343        Ok(None)
344    }
345}
346
347#[cfg(feature = "ureq")]
348#[allow(clippy::doc_markdown)]
349/// Add the `If-Modified-Since` and/or `If-None-Match` headers to
350/// an HTTP request if needed.
351///
352/// The headers will be added if the relevant fields (`Last-Modified` and `ETag`
353/// respectively) have been returned in the last response from
354/// the server when the file has been downloaded.
355///
356/// # Errors
357///
358/// Propagates errors from [`match_meta`].
359pub fn build_req<P1, P2>(
360    orig_req: Request,
361    dst: P1,
362    dst_meta: P2,
363) -> Result<(Request, Option<FileHttpMetadata>), Error>
364where
365    P1: AsRef<Utf8Path>,
366    P2: AsRef<Utf8Path>,
367{
368    let stored_meta = match_meta(dst, dst_meta)?;
369
370    let req = match stored_meta {
371        None => orig_req,
372        Some(ref meta) => match meta.hdr_etag {
373            Some(ref etag) => orig_req.set("If-None-Match", etag),
374            None => match meta.hdr_last_modified {
375                Some(ref last_modified) => orig_req.set("If-Modified-Since", last_modified),
376                None => orig_req,
377            },
378        },
379    };
380    Ok((req, stored_meta))
381}