Skip to main content

doiget_core/store/
fs_store.rs

1//! Filesystem-backed [`Store`] implementation.
2//!
3//! Binding spec: `docs/STORE.md` §§1-7. Re-stated as the implementation
4//! contract:
5//!
6//! - **§1 Layout:** `<root>/<safekey>.pdf` and `<root>/.metadata/<safekey>.toml`,
7//!   `.toml.lock` siblings for advisory locking.
8//! - **§3 Schema version policy:** parse `<MAJOR>.<MINOR>`. Future `MAJOR`
9//!   yields [`StoreError::SchemaTooNew`] on writes, warn-and-tolerate on
10//!   reads. Future `MINOR` (same major) yields a `tracing::warn!`-then-OK on
11//!   reads.
12//! - **§4 Lock protocol:** `flock` (`fs2::FileExt`) on the SEPARATE
13//!   `.toml.lock` file with a 5 s timeout polled via `try_lock_*`.
14//! - **§5 Atomic write:** write `<safekey>.toml.tmp` → `sync_all` → `rename`
15//!   → fsync parent dir (POSIX). On Windows `std::fs::rename` invokes
16//!   `MoveFileEx` with `MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH`,
17//!   so no extra parent-fsync syscall is required. Each file is atomic
18//!   individually; there is no cross-file transaction. The PDF is
19//!   therefore written BEFORE the metadata that references it (issue
20//!   #122), so a crash between the two renames can only leave an orphan
21//!   PDF or the prior consistent entry — never metadata pointing at a
22//!   missing PDF.
23//! - **§6 Coexistence with BiblioFetch.jl:** when re-writing an existing
24//!   entry, reserved top-level fields previously present are NOT overwritten
25//!   if the new value differs. Only the `[doiget]` table and `other` are
26//!   updated freely.
27//! - **§7 Normalization:** alphabetical key order, `\n` line endings,
28//!   trailing newline. Implemented through `BTreeMap`-backed re-serialization
29//!   of the on-wire `toml::Value`.
30
31use std::fs::{File, OpenOptions};
32use std::io::{Read, Write};
33use std::time::{Duration, Instant};
34
35use camino::{Utf8Path, Utf8PathBuf};
36use fs2::FileExt;
37use tracing::warn;
38
39use super::metadata::{DoigetExtension, Metadata};
40use super::{EntryInfo, Store, StoreError};
41use crate::{Safekey, SCHEMA_VERSION};
42
43/// Subdirectory under `<root>` that holds metadata TOML files and their
44/// advisory lock siblings, per `docs/STORE.md` §1.
45const METADATA_DIR: &str = ".metadata";
46
47/// Lock-acquisition timeout per `docs/STORE.md` §4 (5 seconds).
48const LOCK_TIMEOUT: Duration = Duration::from_secs(5);
49
50/// How long to back off between `try_lock_*` polls. Small relative to
51/// [`LOCK_TIMEOUT`] so a contended writer in the common case sees the lock
52/// released within ~50 ms.
53const LOCK_POLL_INTERVAL: Duration = Duration::from_millis(50);
54
55/// Filesystem-shaped [`Store`] implementation rooted at `<root>`.
56#[derive(Debug, Clone)]
57pub struct FsStore {
58    root: Utf8PathBuf,
59    metadata_dir: Utf8PathBuf,
60}
61
62impl FsStore {
63    /// Open or create a store at `root`.
64    ///
65    /// Creates `<root>/` and `<root>/.metadata/` if missing. On POSIX, both
66    /// directories are created with mode `0700` (owner-only). On Windows,
67    /// directory ACLs are inherited (no-op).
68    ///
69    /// # Errors
70    ///
71    /// Returns [`StoreError::Io`] if `root` exists but is not a directory,
72    /// or if directory creation fails.
73    pub fn new(root: Utf8PathBuf) -> Result<Self, StoreError> {
74        // Reject non-directory existing paths up front; `create_dir_all` on
75        // a regular-file path returns a confusing platform-dependent error.
76        if root.exists() && !root.is_dir() {
77            return Err(StoreError::Io(std::io::Error::new(
78                std::io::ErrorKind::AlreadyExists,
79                format!("store root {} exists but is not a directory", root),
80            )));
81        }
82        let metadata_dir = root.join(METADATA_DIR);
83
84        create_dir_secure(root.as_std_path())?;
85        create_dir_secure(metadata_dir.as_std_path())?;
86
87        Ok(Self { root, metadata_dir })
88    }
89
90    /// Returns the store root.
91    pub fn root(&self) -> &Utf8Path {
92        &self.root
93    }
94
95    /// Resolve the metadata-TOML path for `key`, with a defense-in-depth
96    /// path-traversal check.
97    ///
98    /// `Safekey` construction already restricts the inner string to
99    /// `[A-Za-z0-9._-]` per `docs/SAFEKEY.md`. The check below catches
100    /// hand-crafted `Safekey` values produced by in-crate `pub(crate)`
101    /// shortcuts (e.g. tests) and any future regression in the safekey
102    /// charset.
103    fn metadata_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
104        guard_safekey(key.as_str())?;
105        let p = self.metadata_dir.join(format!("{}.toml", key.as_str()));
106        // Final paranoia: parent must equal `metadata_dir`. After the charset
107        // check above this should always hold; if it ever does not, surface
108        // it as `PathTraversal` rather than panicking.
109        if p.parent() != Some(self.metadata_dir.as_path()) {
110            return Err(StoreError::PathTraversal { path: p });
111        }
112        Ok(p)
113    }
114
115    fn lock_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
116        guard_safekey(key.as_str())?;
117        Ok(self
118            .metadata_dir
119            .join(format!("{}.toml.lock", key.as_str())))
120    }
121
122    fn pdf_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
123        guard_safekey(key.as_str())?;
124        Ok(self.root.join(format!("{}.pdf", key.as_str())))
125    }
126}
127
128impl Store for FsStore {
129    fn read(&self, key: &Safekey) -> Result<Option<Metadata>, StoreError> {
130        let meta_path = self.metadata_path(key)?;
131        if !meta_path.exists() {
132            return Ok(None);
133        }
134
135        // Per `docs/STORE.md` §4 we MAY take a shared lock for reads. Use the
136        // sibling `.lock` file. Lock acquisition errors are surfaced as
137        // LockTimeout (5 s budget); locking is best-effort on platforms that
138        // implement it as a no-op.
139        let lock_path = self.lock_path(key)?;
140        let lock_file = open_or_create_lock_file(&lock_path)?;
141        acquire_lock(&lock_file, &lock_path, LockMode::Shared)?;
142
143        let raw = std::fs::read_to_string(meta_path.as_std_path())?;
144        // Drop the lock by closing the file handle; explicit unlock ensures
145        // determinism on platforms where Drop semantics differ. Disambiguate
146        // from `std::fs::File::unlock` (stabilized in 1.89) to keep MSRV
147        // at 1.86 — the `<File as FileExt>::…` form forces the `fs2` impl.
148        let _ = <File as FileExt>::unlock(&lock_file);
149
150        let metadata: Metadata = toml::from_str(&raw)?;
151        check_schema_version(&metadata.schema_version)?;
152        Ok(Some(metadata))
153    }
154
155    fn write(&self, key: &Safekey, m: &Metadata, pdf: Option<&Utf8Path>) -> Result<(), StoreError> {
156        let meta_path = self.metadata_path(key)?;
157        let lock_path = self.lock_path(key)?;
158        let lock_file = open_or_create_lock_file(&lock_path)?;
159        acquire_lock(&lock_file, &lock_path, LockMode::Exclusive)?;
160
161        // Re-read existing TOML (if any) so we can apply the §6 merge rule:
162        // never overwrite a reserved top-level field previously written by
163        // another tool. We DO let the new value win for the [doiget] table
164        // (doiget owns it per §6) and for `other` (preserve unknown tables
165        // on update; new contents replace prior contents).
166        let merged = if meta_path.exists() {
167            let raw = std::fs::read_to_string(meta_path.as_std_path())?;
168            let existing: Metadata = toml::from_str(&raw)?;
169            check_schema_version_for_write(&existing.schema_version)?;
170            merge_metadata(existing, m.clone())
171        } else {
172            m.clone()
173        };
174
175        // Serialize → normalize per §7. The normalizer enforces alphabetical
176        // key order within tables and a trailing `\n`.
177        let normalized = normalize_toml(&merged)?;
178
179        // Issue #122 — crash-consistent ordering: the PDF is written
180        // BEFORE the metadata that references it. A crash between the
181        // two atomic renames then leaves either the previous
182        // consistent entry or no metadata at all — NEVER metadata
183        // whose `pdf_path` points at a `.pdf` that does not exist
184        // yet. (The reverse order could publish a dangling pointer.)
185        // Worst case under the new order is an orphan `<safekey>.pdf`
186        // with stale/absent metadata, which list/search ignore (they
187        // key off metadata) and a re-fetch overwrites — strictly
188        // safer than a torn pointer. There is still no cross-file
189        // transaction; this ordering is the bounded MVP guarantee
190        // (documented in STORE.md §5).
191        if let Some(pdf_src) = pdf {
192            let pdf_dst = self.pdf_path(key)?;
193            let mut bytes = Vec::new();
194            File::open(pdf_src.as_std_path())?.read_to_end(&mut bytes)?;
195            // Same atomic dance as the metadata, byte-by-byte.
196            atomic_write(&pdf_dst, &bytes)?;
197        }
198
199        // Atomic write per §5: tmp → fsync → rename → fsync parent.
200        // Done LAST so the metadata only becomes visible once its PDF
201        // (if any) is already durably on disk.
202        atomic_write(&meta_path, normalized.as_bytes())?;
203
204        let _ = <File as FileExt>::unlock(&lock_file);
205        Ok(())
206    }
207
208    fn list_recent(&self, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
209        let mut entries = read_all_entries(&self.metadata_dir)?;
210        // Most-recent first by [doiget].fetched_at; entries with no
211        // `[doiget]` table sort last (None < Some via Reverse).
212        entries.sort_by_key(|e| std::cmp::Reverse(e.fetched_at));
213        entries.truncate(limit);
214        Ok(entries)
215    }
216
217    /// Phase 1 search is a linear scan over all metadata files. Phase 2 will
218    /// add a tantivy / sqlite-fts index when the corpus grows past the point
219    /// where O(N) per query becomes noticeable in CLI latency.
220    fn search(&self, query: &str, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
221        let q = query.to_lowercase();
222        let mut hits = Vec::new();
223        for path in metadata_files(&self.metadata_dir)? {
224            let raw = std::fs::read_to_string(path.as_std_path())?;
225            let Ok(md) = toml::from_str::<Metadata>(&raw) else {
226                // Malformed entries are skipped rather than failing the
227                // whole query. A future audit task will surface them.
228                continue;
229            };
230            let haystacks = [
231                md.title.to_lowercase(),
232                md.authors.join(" ").to_lowercase(),
233                md.venue.clone().unwrap_or_default().to_lowercase(),
234                md.publisher.clone().unwrap_or_default().to_lowercase(),
235            ];
236            if haystacks.iter().any(|h| h.contains(&q)) {
237                let safekey = safekey_from_metadata_filename(&path);
238                hits.push(EntryInfo {
239                    safekey,
240                    title: md.title,
241                    year: md.year,
242                    fetched_at: md.doiget.as_ref().map(|d| d.fetched_at),
243                });
244                if hits.len() >= limit {
245                    break;
246                }
247            }
248        }
249        Ok(hits)
250    }
251}
252
253// ---------------------------------------------------------------------------
254// Helpers
255// ---------------------------------------------------------------------------
256
257/// Reject any safekey containing path-traversal indicators. `Safekey`
258/// construction already enforces `[A-Za-z0-9._-]`-only chars per
259/// `docs/SAFEKEY.md`; this is defense-in-depth in case a hand-crafted
260/// `Safekey` (e.g. an in-crate `Safekey("...".into())` shortcut) is passed
261/// in.
262fn guard_safekey(s: &str) -> Result<(), StoreError> {
263    let bad = s.is_empty()
264        || s.contains('/')
265        || s.contains('\\')
266        || s.contains("..")
267        || s.contains('\0')
268        || s.starts_with('.')
269        || !s
270            .chars()
271            .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_');
272    if bad {
273        Err(StoreError::PathTraversal {
274            path: Utf8PathBuf::from(s),
275        })
276    } else {
277        Ok(())
278    }
279}
280
281/// Recover the safekey from a `<key>.toml` filename. Used only for surfacing
282/// list/search results; the safekey we emit here originated as a stored
283/// safekey, so it has already passed `guard_safekey` at write time.
284fn safekey_from_metadata_filename(p: &Utf8Path) -> Safekey {
285    Safekey(p.file_stem().unwrap_or("").to_string())
286}
287
288/// Lock mode for [`acquire_lock`].
289#[derive(Debug, Clone, Copy)]
290enum LockMode {
291    /// `flock(LOCK_SH)` — multiple readers OK.
292    Shared,
293    /// `flock(LOCK_EX)` — exclusive writer.
294    Exclusive,
295}
296
297/// Open (or create) the advisory lock file. Lock files are never deleted
298/// during normal operation per `docs/STORE.md` §4.
299fn open_or_create_lock_file(path: &Utf8Path) -> Result<File, StoreError> {
300    let f = OpenOptions::new()
301        .create(true)
302        .read(true)
303        .write(true)
304        .truncate(false)
305        .open(path.as_std_path())?;
306    Ok(f)
307}
308
309/// Acquire `mode` on `lock_file`, polling `try_lock_*` until success or the
310/// 5 s budget expires per `docs/STORE.md` §4.
311fn acquire_lock(lock_file: &File, lock_path: &Utf8Path, mode: LockMode) -> Result<(), StoreError> {
312    let deadline = Instant::now() + LOCK_TIMEOUT;
313    loop {
314        // Disambiguate from `std::fs::File::try_lock_shared` (stabilized in
315        // 1.89), which is an inherent method on `File` and would otherwise
316        // shadow the trait method. The `<File as FileExt>::…` form forces
317        // the `fs2` impl; we want the cross-platform behavior that returns
318        // `std::io::Error` rather than the std `TryLockError` newtype.
319        let attempt = match mode {
320            LockMode::Shared => <File as FileExt>::try_lock_shared(lock_file),
321            LockMode::Exclusive => <File as FileExt>::try_lock_exclusive(lock_file),
322        };
323        match attempt {
324            Ok(()) => return Ok(()),
325            Err(e) => {
326                let contended = e.raw_os_error() == fs2::lock_contended_error().raw_os_error();
327                if !contended {
328                    // Not a "would-block" error — surface it directly.
329                    return Err(StoreError::Io(e));
330                }
331                if Instant::now() >= deadline {
332                    return Err(StoreError::LockTimeout {
333                        path: lock_path.to_owned(),
334                    });
335                }
336                std::thread::sleep(LOCK_POLL_INTERVAL);
337            }
338        }
339    }
340}
341
342/// Verify `schema_version` is acceptable for a read. Per `docs/STORE.md` §3,
343/// reads succeed with a `tracing::warn!` for ANY future schema_version
344/// (minor or major); the read-only mode is enforced at write time
345/// (see [`check_schema_version_for_write`]).
346fn check_schema_version(theirs: &str) -> Result<(), StoreError> {
347    let (their_major, their_minor) = parse_schema_version(theirs)?;
348    let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
349    if their_major > our_major {
350        warn!(
351            theirs = theirs,
352            ours = SCHEMA_VERSION,
353            "store entry uses a future-major schema_version; entering read-only mode \
354             for this entry (docs/STORE.md §3)"
355        );
356    } else if their_major == our_major && their_minor > our_minor {
357        warn!(
358            theirs = theirs,
359            ours = SCHEMA_VERSION,
360            "store entry uses a newer minor schema_version; reading in compatibility mode \
361             (docs/STORE.md §3 future-minor tolerance)"
362        );
363    }
364    Ok(())
365}
366
367/// Same as [`check_schema_version`] but used on the EXISTING file before a
368/// write merge: any `schema_version` strictly greater than ours (major or
369/// minor) refuses the write per `docs/STORE.md` §3 read-only-mode rule.
370fn check_schema_version_for_write(theirs: &str) -> Result<(), StoreError> {
371    let (their_major, their_minor) = parse_schema_version(theirs)?;
372    let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
373    if their_major > our_major || (their_major == our_major && their_minor > our_minor) {
374        return Err(StoreError::SchemaTooNew {
375            theirs: theirs.to_string(),
376            ours: SCHEMA_VERSION.to_string(),
377        });
378    }
379    Ok(())
380}
381
382fn parse_schema_version(s: &str) -> Result<(u32, u32), StoreError> {
383    let (maj, min) = s.split_once('.').ok_or(StoreError::MissingField {
384        field: "schema_version",
385    })?;
386    let maj: u32 = maj.parse().map_err(|_| StoreError::MissingField {
387        field: "schema_version",
388    })?;
389    let min: u32 = min.parse().map_err(|_| StoreError::MissingField {
390        field: "schema_version",
391    })?;
392    Ok((maj, min))
393}
394
395/// Apply the `docs/STORE.md` §6 merge rule: doiget MUST NOT modify reserved
396/// top-level fields written by another tool. Concretely: if `existing` has a
397/// reserved field set to a value different from `incoming`, KEEP existing.
398/// `[doiget]` is owned by doiget and is overwritten freely. `other` (unknown
399/// tables / fields like `[bibliofetch]`) is preserved through union: fields
400/// in `existing` not present in `incoming` are kept; otherwise `incoming`
401/// wins (callers usually leave `other` empty on a re-fetch, so existing
402/// fields survive intact).
403fn merge_metadata(existing: Metadata, incoming: Metadata) -> Metadata {
404    let mut out = incoming.clone();
405
406    // schema_version: never downgrade. The §6 exception explicitly allows a
407    // coordinated minor revision bump, so we take the max of the two.
408    if let (Ok((em, en)), Ok((im, in_))) = (
409        parse_schema_version(&existing.schema_version),
410        parse_schema_version(&incoming.schema_version),
411    ) {
412        if (em, en) > (im, in_) {
413            out.schema_version = existing.schema_version.clone();
414        }
415    }
416
417    // Reserved fields with non-Option String types: prefer existing if it
418    // differs from incoming (and is non-empty).
419    if !existing.title.is_empty() && existing.title != incoming.title {
420        warn!(
421            field = "title",
422            existing = existing.title.as_str(),
423            "preserving reserved field set by another tool (docs/STORE.md §6)"
424        );
425        out.title = existing.title;
426    }
427    if !existing.authors.is_empty() && existing.authors != incoming.authors {
428        warn!(
429            field = "authors",
430            "preserving reserved field set by another tool (docs/STORE.md §6)"
431        );
432        out.authors = existing.authors;
433    }
434
435    // Optional reserved fields: prefer existing Some over incoming Some-different.
436    macro_rules! merge_opt {
437        ($field:ident) => {
438            if existing.$field.is_some() && existing.$field != incoming.$field {
439                warn!(
440                    field = stringify!($field),
441                    "preserving reserved field set by another tool (docs/STORE.md §6)"
442                );
443                out.$field = existing.$field;
444            }
445        };
446    }
447    merge_opt!(year);
448    merge_opt!(doi);
449    merge_opt!(arxiv_id);
450    merge_opt!(abstract_);
451    merge_opt!(venue);
452    merge_opt!(volume);
453    merge_opt!(issue);
454    merge_opt!(pages);
455    merge_opt!(publisher);
456    merge_opt!(issn);
457    merge_opt!(isbn);
458    merge_opt!(type_);
459    merge_opt!(url);
460    merge_opt!(pdf_path);
461
462    // keywords (Vec<String>): prefer existing if non-empty and different.
463    if !existing.keywords.is_empty() && existing.keywords != incoming.keywords {
464        warn!(
465            field = "keywords",
466            "preserving reserved field set by another tool (docs/STORE.md §6)"
467        );
468        out.keywords = existing.keywords;
469    }
470
471    // [doiget]: doiget owns this table; incoming wins (already in `out`).
472    // If incoming has no [doiget] but existing did, keep the existing one
473    // so a metadata-only re-write doesn't silently drop a fetch record.
474    if out.doiget.is_none() && existing.doiget.is_some() {
475        out.doiget = existing.doiget;
476    }
477
478    // `other` (unknown tables / fields): union, prefer EXISTING on key
479    // collision (issue #123). STORE.md §6 forbids doiget overwriting a
480    // field/table another tool authored; an unknown key already on disk
481    // (e.g. a `[bibliofetch]` sub-key) must win over whatever doiget
482    // happens to carry in `other`. Doiget normally leaves `other`
483    // empty on a re-fetch, so this only changes behaviour in the
484    // (latent) case where both sides populate the same unknown key —
485    // there, "never overwrite" is the correct §6 resolution.
486    let mut merged_other = existing.other;
487    for (k, v) in out.other.iter() {
488        merged_other.entry(k.clone()).or_insert_with(|| v.clone());
489    }
490    out.other = merged_other;
491
492    out
493}
494
495/// Serialize `m` to TOML and apply `docs/STORE.md` §7 normalization:
496/// alphabetical key order within tables, `\n` line endings, trailing
497/// newline.
498///
499/// Implementation: serialize the [`Metadata`] to `toml::Value`, then walk
500/// the value tree and re-emit through `BTreeMap` for stable key order.
501fn normalize_toml(m: &Metadata) -> Result<String, StoreError> {
502    // Serialize to a Value to escape Rust-struct field order; tables are
503    // re-keyed alphabetically below.
504    let value = toml::Value::try_from(m)?;
505    let mut out = String::new();
506    write_normalized_toml(&value, &mut out)?;
507    if !out.ends_with('\n') {
508        out.push('\n');
509    }
510    Ok(out)
511}
512
513/// Walk the top-level table, emit reserved-vs-table keys in normalized
514/// order: `schema_version` first, then remaining scalar/array keys
515/// alphabetically, then sub-tables alphabetically. Within sub-tables, keys
516/// are alphabetical via `BTreeMap`.
517fn write_normalized_toml(value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
518    let table = match value {
519        toml::Value::Table(t) => t,
520        _ => {
521            return Err(StoreError::Serialize(
522                <toml::ser::Error as serde::ser::Error>::custom(
523                    "Metadata did not serialize to a TOML table",
524                ),
525            ));
526        }
527    };
528
529    // Partition into scalar/array keys (top-level) vs sub-tables (rendered
530    // as `[name]` blocks). `schema_version` is forced first per §7.
531    let mut top_keys: Vec<&String> = Vec::new();
532    let mut sub_table_keys: Vec<&String> = Vec::new();
533    for (k, v) in table.iter() {
534        if matches!(v, toml::Value::Table(_)) {
535            sub_table_keys.push(k);
536        } else {
537            top_keys.push(k);
538        }
539    }
540    top_keys.sort();
541    sub_table_keys.sort();
542
543    // schema_version always first.
544    if let Some(v) = table.get("schema_version") {
545        write_kv("schema_version", v, out)?;
546    }
547    for k in top_keys {
548        if k == "schema_version" {
549            continue;
550        }
551        if let Some(v) = table.get(k) {
552            write_kv(k, v, out)?;
553        }
554    }
555    for k in sub_table_keys {
556        if let Some(toml::Value::Table(sub)) = table.get(k) {
557            out.push('\n');
558            out.push('[');
559            out.push_str(k);
560            out.push_str("]\n");
561            // Within a sub-table, alphabetical order via BTreeMap.
562            let sorted: std::collections::BTreeMap<&String, &toml::Value> = sub.iter().collect();
563            for (sk, sv) in sorted {
564                write_kv(sk, sv, out)?;
565            }
566        }
567    }
568    Ok(())
569}
570
571/// Render a single `key = value` line. Uses `toml::to_string` on the value
572/// half so quoting / escaping matches the spec ("ASCII-safe single-line
573/// strings use `\"...\"`", §7).
574fn write_kv(key: &str, value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
575    out.push_str(key);
576    out.push_str(" = ");
577    let rendered = toml_value_inline(value)?;
578    out.push_str(&rendered);
579    out.push('\n');
580    Ok(())
581}
582
583/// Render a TOML value as a single-line inline expression. Tables are
584/// rejected (the caller emits them as `[name]` blocks instead).
585fn toml_value_inline(value: &toml::Value) -> Result<String, StoreError> {
586    let s = match value {
587        toml::Value::Table(_) => {
588            return Err(StoreError::Serialize(
589                <toml::ser::Error as serde::ser::Error>::custom(
590                    "nested tables not supported by inline writer",
591                ),
592            ));
593        }
594        // Defer to toml's own serializer for a single value via a one-key
595        // shim. `toml::to_string` on a value alone is not supported in
596        // toml 1.x, but wrapping it in a singleton table and slicing off
597        // the key is reliable.
598        v => {
599            let mut wrapper = toml::map::Map::new();
600            wrapper.insert("__v".to_string(), v.clone());
601            let rendered = toml::to_string(&toml::Value::Table(wrapper))?;
602            // Output looks like `__v = <value>\n`. Strip the prefix and
603            // trailing newline.
604            let body = rendered
605                .strip_prefix("__v = ")
606                .ok_or_else(|| {
607                    StoreError::Serialize(<toml::ser::Error as serde::ser::Error>::custom(
608                        "unexpected toml singleton format",
609                    ))
610                })?
611                .trim_end_matches('\n')
612                .to_string();
613            body
614        }
615    };
616    Ok(s)
617}
618
619/// Atomic write per `docs/STORE.md` §5: write `tmp` → `sync_all` → `rename`
620/// → fsync parent (POSIX). On Windows `std::fs::rename` already issues
621/// `MoveFileEx(.., MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)`,
622/// so the parent-fsync step is a no-op.
623///
624/// A crash mid-write leaves either the old file intact (if before the
625/// rename) or the new file fully written (if after). It never leaves a
626/// partially-visible new file.
627fn atomic_write(dst: &Utf8Path, bytes: &[u8]) -> std::io::Result<()> {
628    let file_name = dst.file_name().ok_or_else(|| {
629        std::io::Error::new(
630            std::io::ErrorKind::InvalidInput,
631            "destination path has no file name",
632        )
633    })?;
634    let mut tmp_path = dst.to_path_buf();
635    tmp_path.set_file_name(format!("{}.tmp", file_name));
636
637    {
638        let mut f = OpenOptions::new()
639            .create(true)
640            .write(true)
641            .truncate(true)
642            .open(tmp_path.as_std_path())?;
643        f.write_all(bytes)?;
644        f.sync_all()?;
645    }
646    std::fs::rename(tmp_path.as_std_path(), dst.as_std_path())?;
647
648    // Best-effort parent-dir fsync on POSIX. On Windows opening a directory
649    // for sync is not supported; the rename above already used
650    // MOVEFILE_WRITE_THROUGH semantics.
651    #[cfg(unix)]
652    {
653        if let Some(parent) = dst.parent() {
654            if let Ok(dir) = File::open(parent.as_std_path()) {
655                let _ = dir.sync_all();
656            }
657        }
658    }
659
660    Ok(())
661}
662
663/// Create `path` if missing. On POSIX, set mode `0700`.
664fn create_dir_secure(path: &std::path::Path) -> std::io::Result<()> {
665    if path.exists() {
666        return Ok(());
667    }
668    std::fs::create_dir_all(path)?;
669    #[cfg(unix)]
670    {
671        use std::os::unix::fs::PermissionsExt;
672        let mut perms = std::fs::metadata(path)?.permissions();
673        perms.set_mode(0o700);
674        std::fs::set_permissions(path, perms)?;
675    }
676    Ok(())
677}
678
679/// List metadata-TOML files (skipping `.tmp` artifacts and `.lock` siblings).
680///
681/// Non-UTF-8 entry names are skipped silently. Safekey-derived filenames are
682/// pure ASCII per `docs/SAFEKEY.md`, so this only filters out unrelated
683/// non-UTF-8 garbage that may have been dropped into the store directory by
684/// a third party.
685fn metadata_files(metadata_dir: &Utf8Path) -> std::io::Result<Vec<Utf8PathBuf>> {
686    let mut out = Vec::new();
687    if !metadata_dir.exists() {
688        return Ok(out);
689    }
690    for entry in std::fs::read_dir(metadata_dir.as_std_path())? {
691        let entry = entry?;
692        if !entry.file_type()?.is_file() {
693            continue;
694        }
695        let path = entry.path();
696        let utf8_path = match Utf8PathBuf::from_path_buf(path) {
697            Ok(p) => p,
698            Err(_) => continue,
699        };
700        let name = match utf8_path.file_name() {
701            Some(n) => n,
702            None => continue,
703        };
704        if name.ends_with(".toml") && !name.ends_with(".tmp") {
705            out.push(utf8_path);
706        }
707    }
708    Ok(out)
709}
710
711fn read_all_entries(metadata_dir: &Utf8Path) -> Result<Vec<EntryInfo>, StoreError> {
712    let mut out = Vec::new();
713    for path in metadata_files(metadata_dir)? {
714        let raw = std::fs::read_to_string(path.as_std_path())?;
715        let Ok(md) = toml::from_str::<Metadata>(&raw) else {
716            // Corrupt / future-major entries are skipped from list output.
717            continue;
718        };
719        let safekey = safekey_from_metadata_filename(&path);
720        out.push(EntryInfo {
721            safekey,
722            title: md.title,
723            year: md.year,
724            fetched_at: md.doiget.map(|d| d.fetched_at),
725        });
726    }
727    Ok(out)
728}
729
730// `DoigetExtension` is referenced in the test module below; this tiny shim
731// keeps the symbol live in non-test builds so rustdoc intra-doc linking
732// stays stable.
733#[allow(dead_code)]
734fn _doiget_extension_is_visible(d: DoigetExtension) -> DoigetExtension {
735    d
736}
737
738// ---------------------------------------------------------------------------
739// Tests
740// ---------------------------------------------------------------------------
741
742// `expect`/`unwrap` are idiomatic in tests where panics double as assertions.
743// Workspace lints deny them in production code; relax for the test module.
744#[cfg(test)]
745#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
746mod tests {
747    use super::*;
748    use std::collections::BTreeMap;
749    use std::sync::Arc;
750    use std::thread;
751
752    use chrono::TimeZone;
753    use tempfile::TempDir;
754
755    use crate::{Doi, Safekey, SCHEMA_VERSION};
756
757    fn tmp_dir_utf8(dir: &TempDir) -> Utf8PathBuf {
758        Utf8PathBuf::from_path_buf(dir.path().to_path_buf()).expect("temp dir path must be UTF-8")
759    }
760
761    fn sample_safekey() -> Safekey {
762        // In-crate `pub(crate)` shortcut; matches the pattern used in
763        // safekey vector tests in lib.rs.
764        Safekey("doi_10.1234_example".to_string())
765    }
766
767    fn sample_metadata() -> Metadata {
768        Metadata {
769            schema_version: SCHEMA_VERSION.to_string(),
770            title: "Sample Paper Title".to_string(),
771            authors: vec!["Alice Researcher".to_string(), "Bob Coauthor".to_string()],
772            year: Some(2026),
773            doi: Some(Doi("10.1234/example".to_string())),
774            arxiv_id: None,
775            abstract_: Some("A short abstract.".to_string()),
776            venue: Some("Phys. Rev. X".to_string()),
777            volume: Some("12".to_string()),
778            issue: Some("3".to_string()),
779            pages: Some("031001".to_string()),
780            publisher: Some("American Physical Society".to_string()),
781            issn: Some("2160-3308".to_string()),
782            isbn: None,
783            type_: Some("journal-article".to_string()),
784            keywords: vec!["physics".to_string(), "tdd".to_string()],
785            url: Some("https://example.test/paper".to_string()),
786            pdf_path: Some("doi_10.1234_example.pdf".to_string()),
787            doiget: Some(DoigetExtension {
788                fetched_at: chrono::Utc.with_ymd_and_hms(2026, 5, 6, 12, 0, 0).unwrap(),
789                source: "unpaywall".to_string(),
790                license: "CC-BY-4.0".to_string(),
791                size_bytes: 1234567,
792                mcp_call_id: Some("01JCKZ7Q0000000000000000AB".to_string()),
793            }),
794            other: BTreeMap::new(),
795        }
796    }
797
798    fn fresh_store(dir: &TempDir) -> FsStore {
799        let root = tmp_dir_utf8(dir).join("papers");
800        FsStore::new(root).expect("FsStore::new")
801    }
802
803    #[test]
804    fn roundtrip_reserved_fields() {
805        let dir = TempDir::new().expect("tmp");
806        let store = fresh_store(&dir);
807        let key = sample_safekey();
808        let m = sample_metadata();
809        store.write(&key, &m, None).expect("write");
810
811        let read = store.read(&key).expect("read").expect("Some");
812        assert_eq!(read.schema_version, m.schema_version);
813        assert_eq!(read.title, m.title);
814        assert_eq!(read.authors, m.authors);
815        assert_eq!(read.year, m.year);
816        assert_eq!(
817            read.doi.as_ref().map(|d| d.as_str()),
818            Some("10.1234/example")
819        );
820        assert_eq!(read.abstract_, m.abstract_);
821        assert_eq!(read.venue, m.venue);
822        assert_eq!(read.publisher, m.publisher);
823        assert_eq!(read.issn, m.issn);
824        assert_eq!(read.type_, m.type_);
825        assert_eq!(read.keywords, m.keywords);
826        assert_eq!(read.url, m.url);
827        assert_eq!(read.pdf_path, m.pdf_path);
828    }
829
830    #[test]
831    fn roundtrip_doiget_extension() {
832        let dir = TempDir::new().expect("tmp");
833        let store = fresh_store(&dir);
834        let key = sample_safekey();
835        let m = sample_metadata();
836        store.write(&key, &m, None).expect("write");
837
838        let read = store.read(&key).expect("read").expect("Some");
839        let d = read.doiget.expect("doiget table present");
840        let want = m.doiget.expect("input doiget");
841        assert_eq!(d.fetched_at, want.fetched_at);
842        assert_eq!(d.source, want.source);
843        assert_eq!(d.license, want.license);
844        assert_eq!(d.size_bytes, want.size_bytes);
845        assert_eq!(d.mcp_call_id, want.mcp_call_id);
846    }
847
848    #[test]
849    fn read_returns_none_for_missing_safekey() {
850        let dir = TempDir::new().expect("tmp");
851        let store = fresh_store(&dir);
852        let key = Safekey("nonexistent".to_string());
853        let res = store.read(&key).expect("read ok");
854        assert!(res.is_none(), "expected Ok(None), got {:?}", res);
855    }
856
857    #[test]
858    fn schema_too_new_blocks_writes_but_allows_reads() {
859        let dir = TempDir::new().expect("tmp");
860        let store = fresh_store(&dir);
861        let key = sample_safekey();
862
863        // Hand-craft a TOML with a future-major schema_version.
864        let meta_path = store.metadata_path(&key).expect("path");
865        std::fs::create_dir_all(meta_path.parent().expect("parent").as_std_path()).expect("mkdir");
866        let body = "schema_version = \"2.0\"\ntitle = \"Future\"\nauthors = []\n";
867        std::fs::write(meta_path.as_std_path(), body).expect("write");
868
869        // Read should succeed (read-only mode per §3, warn is best-effort).
870        let read = store.read(&key).expect("read ok");
871        assert!(read.is_some(), "future-major file must be readable");
872
873        // Write should refuse with SchemaTooNew.
874        let m = sample_metadata();
875        let err = store.write(&key, &m, None).expect_err("write must fail");
876        match err {
877            StoreError::SchemaTooNew { theirs, ours } => {
878                assert_eq!(theirs, "2.0");
879                assert_eq!(ours, SCHEMA_VERSION);
880            }
881            other => panic!("expected SchemaTooNew, got {:?}", other),
882        }
883    }
884
885    #[test]
886    fn concurrent_writers_serialize_via_flock() {
887        // Two threads writing to the same key with different [doiget].source
888        // values. The flock SHOULD make every write atomic from the on-disk
889        // perspective: at no point is the metadata file half-written, and
890        // every parse succeeds. We do not assert WHICH writer wins — only
891        // that the file remains valid TOML throughout.
892        let dir = TempDir::new().expect("tmp");
893        let store = Arc::new(fresh_store(&dir));
894        let key = sample_safekey();
895
896        // Pre-create so both threads exercise the merge path.
897        store.write(&key, &sample_metadata(), None).expect("seed");
898
899        let mut handles = Vec::new();
900        for source in ["unpaywall", "europepmc"] {
901            let store = Arc::clone(&store);
902            let key = key.clone();
903            handles.push(thread::spawn(move || {
904                let mut m = sample_metadata();
905                if let Some(d) = m.doiget.as_mut() {
906                    d.source = source.to_string();
907                }
908                store.write(&key, &m, None).expect("write");
909            }));
910        }
911        for h in handles {
912            h.join().expect("join");
913        }
914
915        // Every read after the two writers complete must succeed and produce
916        // a value whose `[doiget].source` is one of the two contenders.
917        let read = store.read(&key).expect("read").expect("Some");
918        let source = read.doiget.expect("doiget").source;
919        assert!(
920            source == "unpaywall" || source == "europepmc",
921            "winning source must be one of the contenders, got {}",
922            source
923        );
924    }
925
926    #[test]
927    fn list_recent_orders_by_fetched_at_desc() {
928        let dir = TempDir::new().expect("tmp");
929        let store = fresh_store(&dir);
930
931        for (idx, year_seed) in [(1, 2024_u32), (2, 2025), (3, 2026)] {
932            let key = Safekey(format!("doi_10.1234_entry{}", idx));
933            let mut m = sample_metadata();
934            m.title = format!("Entry {}", idx);
935            if let Some(d) = m.doiget.as_mut() {
936                d.fetched_at = chrono::Utc
937                    .with_ymd_and_hms(year_seed as i32, 5, 6, 12, 0, 0)
938                    .unwrap();
939            }
940            store.write(&key, &m, None).expect("write");
941        }
942
943        let recent = store.list_recent(10).expect("list");
944        assert_eq!(recent.len(), 3, "expected 3 entries, got {}", recent.len());
945        // Most-recent first: 2026, 2025, 2024.
946        assert_eq!(recent[0].title, "Entry 3");
947        assert_eq!(recent[1].title, "Entry 2");
948        assert_eq!(recent[2].title, "Entry 1");
949        for w in recent.windows(2) {
950            assert!(
951                w[0].fetched_at >= w[1].fetched_at,
952                "recent[].fetched_at must be non-increasing"
953            );
954        }
955    }
956
957    #[test]
958    fn search_finds_by_title_substring() {
959        let dir = TempDir::new().expect("tmp");
960        let store = fresh_store(&dir);
961
962        let key = Safekey("doi_10.1234_quantum".to_string());
963        let mut m = sample_metadata();
964        m.title = "Quantum Stuff and Other Topics".to_string();
965        store.write(&key, &m, None).expect("write");
966
967        let hits = store.search("quantum", 10).expect("search");
968        assert_eq!(hits.len(), 1, "expected 1 hit, got {}", hits.len());
969        assert_eq!(hits[0].title, "Quantum Stuff and Other Topics");
970
971        let empty = store.search("relativity", 10).expect("search");
972        assert!(empty.is_empty(), "expected no hits, got {:?}", empty);
973    }
974
975    #[test]
976    fn path_traversal_in_safekey_blocked() {
977        let dir = TempDir::new().expect("tmp");
978        let store = fresh_store(&dir);
979        let bad = Safekey("../etc/passwd".to_string());
980
981        match store.read(&bad) {
982            Err(StoreError::PathTraversal { .. }) => {}
983            other => panic!("expected PathTraversal, got {:?}", other),
984        }
985        let m = sample_metadata();
986        match store.write(&bad, &m, None) {
987            Err(StoreError::PathTraversal { .. }) => {}
988            other => panic!("expected PathTraversal, got {:?}", other),
989        }
990    }
991
992    #[test]
993    fn write_then_read_normalized_toml_alphabetizes_keys() {
994        // §7 normalization: schema_version first, then reserved fields
995        // alphabetically, then sub-tables alphabetically with alphabetical
996        // keys inside.
997        let dir = TempDir::new().expect("tmp");
998        let store = fresh_store(&dir);
999        let key = sample_safekey();
1000        store.write(&key, &sample_metadata(), None).expect("write");
1001
1002        let path = store.metadata_path(&key).expect("path");
1003        let raw = std::fs::read_to_string(path.as_std_path()).expect("read");
1004        // schema_version must be the first line.
1005        let first_line = raw.lines().next().expect("at least one line");
1006        assert!(
1007            first_line.starts_with("schema_version = "),
1008            "first line must be schema_version, got: {:?}",
1009            first_line
1010        );
1011        // EOF newline.
1012        assert!(raw.ends_with('\n'), "file must end with a newline");
1013        // No CR characters anywhere.
1014        assert!(!raw.contains('\r'), "no CR allowed; LF only");
1015        // Sub-table appears.
1016        assert!(raw.contains("\n[doiget]\n"), "doiget sub-table missing");
1017        // Within [doiget], `fetched_at` must precede `license` alphabetically.
1018        let doiget_idx = raw.find("[doiget]").expect("doiget block");
1019        let after = &raw[doiget_idx..];
1020        let fetched_at_idx = after
1021            .find("fetched_at = ")
1022            .expect("fetched_at key in doiget");
1023        let license_idx = after.find("license = ").expect("license key in doiget");
1024        assert!(
1025            fetched_at_idx < license_idx,
1026            "fetched_at must precede license within [doiget]"
1027        );
1028    }
1029
1030    #[test]
1031    fn write_preserves_unknown_table_from_existing_file() {
1032        // §6 + §8: if the existing file has a `[bibliofetch]` table, a
1033        // doiget rewrite must not silently drop it.
1034        let dir = TempDir::new().expect("tmp");
1035        let store = fresh_store(&dir);
1036        let key = sample_safekey();
1037        let meta_path = store.metadata_path(&key).expect("path");
1038
1039        let body = format!(
1040            "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\n\
1041             [bibliofetch]\nharvest = \"2026-01-01\"\n",
1042            SCHEMA_VERSION
1043        );
1044        std::fs::write(meta_path.as_std_path(), body).expect("write");
1045
1046        let mut m = sample_metadata();
1047        m.title = "Doiget Wins?".to_string(); // would normally overwrite, but §6 keeps existing
1048        store.write(&key, &m, None).expect("write");
1049
1050        let read_raw = std::fs::read_to_string(meta_path.as_std_path()).expect("re-read");
1051        assert!(
1052            read_raw.contains("bibliofetch"),
1053            "[bibliofetch] table was dropped: {}",
1054            read_raw
1055        );
1056        assert!(
1057            read_raw.contains("title = \"Existing\""),
1058            "doiget overwrote a reserved field set by another tool: {}",
1059            read_raw
1060        );
1061    }
1062
1063    /// Issue #121: prove the BiblioFetch.jl coexistence contract
1064    /// end-to-end through the actual `read()` / `write()` API with
1065    /// TYPED values — not a raw-text substring check. Seeds a
1066    /// "BiblioFetch-authored" entry with reserved fields, a
1067    /// `[bibliofetch]` table carrying typed sub-keys (string / int /
1068    /// array) AND an unknown top-level scalar, then asserts a
1069    /// doiget read→mutate→write→read cycle preserves all of it and
1070    /// does not clobber the reserved field (STORE.md §6 + §8).
1071    #[test]
1072    fn bibliofetch_typed_table_and_unknown_scalar_survive_roundtrip() {
1073        let dir = TempDir::new().expect("tmp");
1074        let store = fresh_store(&dir);
1075        let key = sample_safekey();
1076        let meta_path = store.metadata_path(&key).expect("path");
1077
1078        // Written "by BiblioFetch.jl": reserved fields + a typed
1079        // [bibliofetch] table + an unknown top-level scalar.
1080        let body = format!(
1081            "schema_version = \"{}\"\n\
1082             title = \"Existing\"\n\
1083             authors = [\"Carol\"]\n\
1084             zotero_key = \"ABC123\"\n\n\
1085             [bibliofetch]\n\
1086             harvest = \"2026-02-03\"\n\
1087             count = 42\n\
1088             tags = [\"x\", \"y\"]\n",
1089            SCHEMA_VERSION
1090        );
1091        std::fs::write(meta_path.as_std_path(), body).expect("seed write");
1092
1093        // First read through the real API must surface the unknowns
1094        // in `other`.
1095        let m0 = store.read(&key).expect("read ok").expect("entry present");
1096        assert!(
1097            m0.other.contains_key("bibliofetch"),
1098            "[bibliofetch] not captured into `other` on read: {:?}",
1099            m0.other
1100        );
1101        assert_eq!(
1102            m0.other.get("zotero_key").and_then(|v| v.as_str()),
1103            Some("ABC123"),
1104            "unknown top-level scalar not captured: {:?}",
1105            m0.other
1106        );
1107
1108        // doiget rewrites (e.g. a re-fetch) with its own metadata.
1109        let mut m_doiget = sample_metadata();
1110        m_doiget.title = "Doiget Would Overwrite".to_string();
1111        store.write(&key, &m_doiget, None).expect("doiget write");
1112
1113        // Read again — everything BiblioFetch authored must still be
1114        // there, byte/value-identical, and the reserved field intact.
1115        let m1 = store
1116            .read(&key)
1117            .expect("re-read ok")
1118            .expect("entry present");
1119        assert_eq!(
1120            m1.title, "Existing",
1121            "STORE.md §6: doiget overwrote a reserved field"
1122        );
1123        let bf = m1
1124            .other
1125            .get("bibliofetch")
1126            .and_then(|v| v.as_table())
1127            .expect("[bibliofetch] table survived read->write->read");
1128        assert_eq!(
1129            bf.get("harvest").and_then(|v| v.as_str()),
1130            Some("2026-02-03")
1131        );
1132        assert_eq!(bf.get("count").and_then(|v| v.as_integer()), Some(42));
1133        let tags = bf
1134            .get("tags")
1135            .and_then(|v| v.as_array())
1136            .expect("tags array survived");
1137        let tags: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect();
1138        assert_eq!(tags, vec!["x", "y"]);
1139        assert_eq!(
1140            m1.other.get("zotero_key").and_then(|v| v.as_str()),
1141            Some("ABC123"),
1142            "unknown top-level scalar lost across the cycle"
1143        );
1144
1145        // STORE.md §7 normalization: trailing newline preserved.
1146        let raw = std::fs::read_to_string(meta_path.as_std_path()).expect("raw re-read");
1147        assert!(raw.ends_with('\n'), "missing trailing newline: {raw:?}");
1148    }
1149
1150    /// Issue #123: on an `other`-key collision the EXISTING on-disk
1151    /// value must win (STORE.md §6 "never overwrite"). Seeds a
1152    /// `zotero_key` "by another tool", then has doiget write an entry
1153    /// whose own `other` carries a different `zotero_key`; the disk
1154    /// value must survive.
1155    #[test]
1156    fn other_key_collision_prefers_existing() {
1157        let dir = TempDir::new().expect("tmp");
1158        let store = fresh_store(&dir);
1159        let key = sample_safekey();
1160        let meta_path = store.metadata_path(&key).expect("path");
1161
1162        let body = format!(
1163            "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\
1164             zotero_key = \"FROM_BIBLIOFETCH\"\n",
1165            SCHEMA_VERSION
1166        );
1167        std::fs::write(meta_path.as_std_path(), body).expect("seed");
1168
1169        let mut m = sample_metadata();
1170        m.other.insert(
1171            "zotero_key".to_string(),
1172            toml::Value::String("FROM_DOIGET".to_string()),
1173        );
1174        store.write(&key, &m, None).expect("write");
1175
1176        let got = store.read(&key).expect("read").expect("present");
1177        assert_eq!(
1178            got.other.get("zotero_key").and_then(|v| v.as_str()),
1179            Some("FROM_BIBLIOFETCH"),
1180            "STORE.md §6: existing `other` value must win on collision"
1181        );
1182    }
1183
1184    #[test]
1185    fn pdf_is_copied_atomically_on_write() {
1186        let dir = TempDir::new().expect("tmp");
1187        let store = fresh_store(&dir);
1188        let key = sample_safekey();
1189
1190        // Write a small synthetic "PDF" file.
1191        let src_dir = TempDir::new().expect("tmp src");
1192        let src_path = Utf8PathBuf::from_path_buf(src_dir.path().to_path_buf())
1193            .expect("utf8 src dir")
1194            .join("input.pdf");
1195        std::fs::write(src_path.as_std_path(), b"%PDF-1.7 synthetic").expect("write src");
1196
1197        store
1198            .write(&key, &sample_metadata(), Some(&src_path))
1199            .expect("write");
1200
1201        let dst = store.pdf_path(&key).expect("pdf path");
1202        let bytes = std::fs::read(dst.as_std_path()).expect("read dst");
1203        assert_eq!(bytes, b"%PDF-1.7 synthetic");
1204    }
1205}