file_with_meta/lib.rs
1#![warn(missing_docs)]
2// SPDX-FileCopyrightText: Peter Pentchev <roam@ringlet.net>
3// SPDX-License-Identifier: BSD-2-Clause
4//! Store a file's metadata for caching purposes.
5//!
6//! The [`FileHttpMetadata`] structure may be serialized and
7//! stored in a JSON file alongside the real file, e.g. one with
8//! ".meta" appended to the file name. Then either the [`match_meta`]
9//! function may be used directly, or the [`build_req`] one may be
10//! used to modify an HTTP request, adding the necessary headers to
11//! make sure that the file is not downloaded if there have been
12//! no changes on the remote server.
13//!
14//! Example for checking whether a file needs to be downloaded:
15//! ```rust
16//! # use std::error::Error;
17//! use std::fs::{self, File};
18//! # use std::io::{self, BufWriter, Read, Write};
19//! # use std::path::Path;
20//!
21//! # use anyhow::{Context as _, Result};
22//! # use camino::Utf8Path;
23//!
24//! # #[cfg(feature = "ureq")]
25//! # fn main() -> Result<()> {
26//! # let agent = ureq::agent();
27//! # let tempd_obj = tempfile::tempdir()?;
28//! # let destdir: &Utf8Path = tempd_obj
29//! # .as_ref()
30//! # .try_into()
31//! # .with_context(|| format!("Could not represent {tempd_obj:?} as a UTF-8 path"))?;
32//! let dst = destdir.join("data.json");
33//! let dst_meta = destdir.join("data.json.meta");
34//! let (req, stored_meta) = file_with_meta::build_req(
35//! agent.get("https://example.com/"),
36//! &dst,
37//! &dst_meta,
38//! )?;
39//! let resp = req.call()?;
40//! match resp.status() {
41//! 304 => println!("Nothing was fetched"),
42//! _ => {
43//! println!("Storing the content");
44//! /* ... */
45//! # let mut reader = resp.into_reader();
46//! # let mut outfile = File::create(&dst)?;
47//! # let mut writer = BufWriter::new(&outfile);
48//! # loop {
49//! # let mut buf = [0; 8192];
50//! # let n = reader.read(&mut buf[..])?;
51//! # if n == 0 {
52//! # break;
53//! # }
54//! # writer.write_all(&buf[..n])?;
55//! # }
56//! # writer.flush()?;
57//! # outfile.sync_all()?;
58//!
59//! println!("Updating the file's metadata");
60//! let meta = file_with_meta::FileHttpMetadata::from_file(&dst)?;
61//! fs::write(&dst_meta, serde_json::to_string(&meta).unwrap())?;
62//! }
63//! };
64//! # Ok(())
65//! # }
66//! # #[cfg(not(feature = "ureq"))]
67//! # fn main() {
68//! # }
69//! ```
70//!
71//! Example for checking whether a file has changed since its metadata
72//! was last updated:
73//! ```rust
74//! let dst = "/path/to/file.dat";
75//! let dst_meta = "/path/to/file.dat.meta";
76//!
77//! match file_with_meta::match_meta(&dst, &dst_meta)?.is_some() {
78//! true => println!("No change"),
79//! false => println!("Somebody touched our file, recreate it?"),
80//! };
81//! # Ok::<_, file_with_meta::Error>(())
82//! ```
83//!
84//! The [`match_meta_with_source`] function may be used to additionally
85//! make sure that a "source" file has not been modified since this file
86//! was last generated from its data.
87
88#![doc(html_root_url = "https://docs.rs/file-with-meta/0.3.0")]
89use std::fs::{self, Metadata};
90use std::io::Error as IoError;
91use std::time::SystemTime;
92
93use camino::Utf8Path;
94use serde_derive::{Deserialize, Serialize};
95use serde_json::Error as SJError;
96use thiserror::Error;
97
98#[cfg(feature = "ureq")]
99use ureq::Request;
100
101#[cfg(test)]
102mod tests;
103
104/// An error that occurred during processing the metadata.
105#[derive(Debug, Error)]
106#[non_exhaustive]
107#[allow(clippy::error_impl_error)]
108pub enum Error {
109 /// Could not examine a local file.
110 #[error("Could not examine {0}")]
111 Examine(String, #[source] IoError),
112
113 /// Unexpected format major version in the metadata JSON structure.
114 #[error("Unsupported format major version {0}")]
115 FormatVersionMajor(u32),
116
117 /// Something went really, really wrong...
118 #[error("file-with-meta internal error: {0}")]
119 Internal(String),
120
121 /// Could not parse the metadata JSON structure.
122 #[error("Could not parse the metadata")]
123 Parse(SJError),
124}
125
126/// The version of the format of the serialized metadata.
127#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
128pub struct MetadataFormatVersion {
129 /// The major version number; bumped when a field is removed or
130 /// its type is changed.
131 major: u32,
132 /// The minor version number; bumped when a new field is added.
133 minor: u32,
134}
135
136impl Default for MetadataFormatVersion {
137 /// The default format version is the most recent one.
138 fn default() -> Self {
139 Self { major: 0, minor: 1 }
140 }
141}
142
143/// Information about the format of the JSON-serialized metadata.
144#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
145pub struct MetadataFormat {
146 /// The version of the metadata format, currently 0.x.
147 version: MetadataFormatVersion,
148}
149
150#[derive(Debug, Serialize, Deserialize)]
151struct MetadataTopLevelFormatOnly {
152 format: MetadataFormat,
153}
154
155/// Information about a single file's last modification time and,
156/// if specified, some relevant HTTP headers returned by the server
157/// that the file was fetched from.
158#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
159#[non_exhaustive]
160pub struct FileHttpMetadata {
161 /// The version of the metadata as stored in a JSON string.
162 pub format: MetadataFormat,
163 /// The size of the file.
164 pub file_size: u64,
165 /// The modification time of the file as a Unix timestamp.
166 pub file_mtime: u64,
167 /// The `Last-Modified` header as returned by an HTTP server.
168 pub hdr_last_modified: Option<String>,
169 /// The `ETag` header as returned by an HTTP server.
170 pub hdr_etag: Option<String>,
171 /// The size of the source file if applicable.
172 pub source_file_size: Option<u64>,
173 /// The modification time of the source file if applicable.
174 pub source_file_mtime: Option<u64>,
175 /// A hook for external users to store information about whether
176 /// the file's contents has been validated.
177 pub verified: bool,
178}
179
180impl FileHttpMetadata {
181 /// Examine an existing file and return a metadata structure
182 /// recording its size and last modification time.
183 ///
184 /// # Errors
185 ///
186 /// [`Error::Examine`] if the file cannot be examined.
187 pub fn from_file<P>(path: P) -> Result<Self, Error>
188 where
189 P: AsRef<Utf8Path>,
190 {
191 let meta = fs::metadata(path.as_ref())
192 .map_err(|err| Error::Examine(path.as_ref().to_string(), err))?;
193 Ok(Self {
194 file_size: meta.len(),
195 file_mtime: mtime_to_unix(&meta)?,
196 ..Self::default()
197 })
198 }
199
200 /// Examine an existing file and return a metadata structure
201 /// recording its size and last modification time, as well as
202 /// that of the specified "source" file.
203 ///
204 /// # Errors
205 ///
206 /// Propagates errors from [`Self::from_file`] and [`mtime_to_unix`].
207 /// [`Error::Examine`] if the source file cannot be examined.
208 pub fn from_file_with_source<P1, P2>(path: P1, src: P2) -> Result<Self, Error>
209 where
210 P1: AsRef<Utf8Path>,
211 P2: AsRef<Utf8Path>,
212 {
213 let meta = Self::from_file(path)?;
214 let src_meta = fs::metadata(src.as_ref())
215 .map_err(|err| Error::Examine(src.as_ref().to_string(), err))?;
216 Ok(Self {
217 source_file_size: Some(src_meta.len()),
218 source_file_mtime: Some(mtime_to_unix(&src_meta)?),
219 ..meta
220 })
221 }
222
223 /// Examine an existing file and return a metadata structure
224 /// recording its size and last modification time, as well as
225 /// the previously-stored one for a "source" file.
226 ///
227 /// # Errors
228 ///
229 /// Propagates errors from [`Self::from_file`].
230 pub fn from_file_with_source_meta<P>(path: P, src_meta: &Self) -> Result<Self, Error>
231 where
232 P: AsRef<Utf8Path>,
233 {
234 let meta = Self::from_file(path)?;
235 Ok(Self {
236 source_file_size: Some(src_meta.file_size),
237 source_file_mtime: Some(src_meta.file_mtime),
238 ..meta
239 })
240 }
241
242 /// Parse a metadata structure from the supplied JSON string.
243 /// Verify the version specified in the "format" element, do not
244 /// even attempt to parse unknown versions.
245 ///
246 /// # Errors
247 ///
248 /// [`Error::Parse`] if the JSON data cannot be parsed.
249 /// [`Error::FormatVersionMajor`] on unexpected format.version.major values.
250 pub fn parse(contents: &str) -> Result<Self, Error> {
251 let header =
252 serde_json::from_str::<MetadataTopLevelFormatOnly>(contents).map_err(Error::Parse)?;
253 match header.format.version.major {
254 0 => serde_json::from_str::<Self>(contents).map_err(Error::Parse),
255 _ => Err(Error::FormatVersionMajor(header.format.version.major)),
256 }
257 }
258}
259
260/// Unwrap a [`Metadata`] object's last modified timestamp,
261/// assume it may be converted to a Unix timestamp, and return
262/// the number of seconds since the Unix epoch.
263///
264/// # Errors
265///
266/// [`Error::Internal`] if the mtime cannot be fetche out of the metadata or
267/// it cannot be converted to a duration since the Unix epoch.
268pub fn mtime_to_unix(metadata: &Metadata) -> Result<u64, Error> {
269 Ok(metadata
270 .modified()
271 .map_err(|err| {
272 Error::Internal(format!("Could not get the mtime from {metadata:?}: {err}"))
273 })?
274 .duration_since(SystemTime::UNIX_EPOCH)
275 .map_err(|err| {
276 Error::Internal(format!(
277 "Could not get a Unix timestamp from the 'modified' time in {metadata:?}: {err}"
278 ))
279 })?
280 .as_secs())
281}
282
283/// Verify that a file has not been changed since the last time
284/// the metadata was stored.
285///
286/// # Errors
287///
288/// Propagates errors from [`mtime_to_unix`].
289// No need for MSRV 1.62
290pub fn match_meta<P1, P2>(dst: P1, dst_meta: P2) -> Result<Option<FileHttpMetadata>, Error>
291where
292 P1: AsRef<Utf8Path>,
293 P2: AsRef<Utf8Path>,
294{
295 if let Ok(file_meta) = fs::metadata(dst.as_ref()) {
296 if let Ok(contents) = fs::read_to_string(dst_meta.as_ref()) {
297 if let Ok(meta) = FileHttpMetadata::parse(&contents) {
298 return Ok((file_meta.is_file()
299 && file_meta.len() == meta.file_size
300 && mtime_to_unix(&file_meta)? == meta.file_mtime)
301 .then_some(meta));
302 }
303 }
304 }
305 Ok(None)
306}
307
308/// Verify that a file has not been changed, and additionally verify
309/// that its source file, specified by the `src` local path, has
310/// also not been changed.
311///
312/// Useful when e.g. uncompressing or otherwise processing downloaded files.
313///
314/// # Errors
315///
316/// Propagates errors from [`match_meta`].
317// No need for MSRV 1.62
318pub fn match_meta_with_source<P1, P2, P3>(
319 dst: P1,
320 dst_meta: P2,
321 src: P3,
322) -> Result<Option<FileHttpMetadata>, Error>
323where
324 P1: AsRef<Utf8Path>,
325 P2: AsRef<Utf8Path>,
326 P3: AsRef<Utf8Path>,
327{
328 if let Some(meta) = match_meta(dst, dst_meta)? {
329 Ok(match fs::metadata(src.as_ref()) {
330 Ok(src_meta) => {
331 let src_len = src_meta.len();
332 if meta.source_file_size.unwrap_or(src_len) == src_len {
333 let src_mtime = mtime_to_unix(&src_meta)?;
334 (meta.source_file_mtime.unwrap_or(src_mtime) == src_mtime).then_some(meta)
335 } else {
336 None
337 }
338 }
339 Err(_) => (meta.source_file_size.is_none() && meta.source_file_mtime.is_none())
340 .then_some(meta),
341 })
342 } else {
343 Ok(None)
344 }
345}
346
347#[cfg(feature = "ureq")]
348#[allow(clippy::doc_markdown)]
349/// Add the `If-Modified-Since` and/or `If-None-Match` headers to
350/// an HTTP request if needed.
351///
352/// The headers will be added if the relevant fields (`Last-Modified` and `ETag`
353/// respectively) have been returned in the last response from
354/// the server when the file has been downloaded.
355///
356/// # Errors
357///
358/// Propagates errors from [`match_meta`].
359pub fn build_req<P1, P2>(
360 orig_req: Request,
361 dst: P1,
362 dst_meta: P2,
363) -> Result<(Request, Option<FileHttpMetadata>), Error>
364where
365 P1: AsRef<Utf8Path>,
366 P2: AsRef<Utf8Path>,
367{
368 let stored_meta = match_meta(dst, dst_meta)?;
369
370 let req = match stored_meta {
371 None => orig_req,
372 Some(ref meta) => match meta.hdr_etag {
373 Some(ref etag) => orig_req.set("If-None-Match", etag),
374 None => match meta.hdr_last_modified {
375 Some(ref last_modified) => orig_req.set("If-Modified-Since", last_modified),
376 None => orig_req,
377 },
378 },
379 };
380 Ok((req, stored_meta))
381}