use std::fs::{File, OpenOptions};
use std::io::{Read, Write};
use std::time::{Duration, Instant};
use camino::{Utf8Path, Utf8PathBuf};
use fs2::FileExt;
use tracing::warn;
use super::metadata::{DoigetExtension, Metadata};
use super::{EntryInfo, Store, StoreError};
use crate::{Safekey, SCHEMA_VERSION};
const METADATA_DIR: &str = ".metadata";
const LOCK_TIMEOUT: Duration = Duration::from_secs(5);
const LOCK_POLL_INTERVAL: Duration = Duration::from_millis(50);
#[derive(Debug, Clone)]
pub struct FsStore {
root: Utf8PathBuf,
metadata_dir: Utf8PathBuf,
}
impl FsStore {
pub fn new(root: Utf8PathBuf) -> Result<Self, StoreError> {
if root.exists() && !root.is_dir() {
return Err(StoreError::Io(std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
format!("store root {} exists but is not a directory", root),
)));
}
let metadata_dir = root.join(METADATA_DIR);
create_dir_secure(root.as_std_path())?;
create_dir_secure(metadata_dir.as_std_path())?;
Ok(Self { root, metadata_dir })
}
pub fn root(&self) -> &Utf8Path {
&self.root
}
fn metadata_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
guard_safekey(key.as_str())?;
let p = self.metadata_dir.join(format!("{}.toml", key.as_str()));
if p.parent() != Some(self.metadata_dir.as_path()) {
return Err(StoreError::PathTraversal { path: p });
}
Ok(p)
}
fn lock_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
guard_safekey(key.as_str())?;
Ok(self
.metadata_dir
.join(format!("{}.toml.lock", key.as_str())))
}
fn pdf_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
guard_safekey(key.as_str())?;
Ok(self.root.join(format!("{}.pdf", key.as_str())))
}
}
impl Store for FsStore {
fn read(&self, key: &Safekey) -> Result<Option<Metadata>, StoreError> {
let meta_path = self.metadata_path(key)?;
if !meta_path.exists() {
return Ok(None);
}
let lock_path = self.lock_path(key)?;
let lock_file = open_or_create_lock_file(&lock_path)?;
acquire_lock(&lock_file, &lock_path, LockMode::Shared)?;
let raw = std::fs::read_to_string(meta_path.as_std_path())?;
let _ = <File as FileExt>::unlock(&lock_file);
let metadata: Metadata = toml::from_str(&raw)?;
check_schema_version(&metadata.schema_version)?;
Ok(Some(metadata))
}
fn write(&self, key: &Safekey, m: &Metadata, pdf: Option<&Utf8Path>) -> Result<(), StoreError> {
let meta_path = self.metadata_path(key)?;
let lock_path = self.lock_path(key)?;
let lock_file = open_or_create_lock_file(&lock_path)?;
acquire_lock(&lock_file, &lock_path, LockMode::Exclusive)?;
let merged = if meta_path.exists() {
let raw = std::fs::read_to_string(meta_path.as_std_path())?;
let existing: Metadata = toml::from_str(&raw)?;
check_schema_version_for_write(&existing.schema_version)?;
merge_metadata(existing, m.clone())
} else {
m.clone()
};
let normalized = normalize_toml(&merged)?;
if let Some(pdf_src) = pdf {
let pdf_dst = self.pdf_path(key)?;
let mut bytes = Vec::new();
File::open(pdf_src.as_std_path())?.read_to_end(&mut bytes)?;
atomic_write(&pdf_dst, &bytes)?;
}
atomic_write(&meta_path, normalized.as_bytes())?;
let _ = <File as FileExt>::unlock(&lock_file);
Ok(())
}
fn list_recent(&self, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
let mut entries = read_all_entries(&self.metadata_dir)?;
entries.sort_by_key(|e| std::cmp::Reverse(e.fetched_at));
entries.truncate(limit);
Ok(entries)
}
fn search(&self, query: &str, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
let q = query.to_lowercase();
let mut hits = Vec::new();
for path in metadata_files(&self.metadata_dir)? {
let raw = std::fs::read_to_string(path.as_std_path())?;
let Ok(md) = toml::from_str::<Metadata>(&raw) else {
continue;
};
let haystacks = [
md.title.to_lowercase(),
md.authors.join(" ").to_lowercase(),
md.venue.clone().unwrap_or_default().to_lowercase(),
md.publisher.clone().unwrap_or_default().to_lowercase(),
];
if haystacks.iter().any(|h| h.contains(&q)) {
let safekey = safekey_from_metadata_filename(&path);
hits.push(EntryInfo {
safekey,
title: md.title,
year: md.year,
fetched_at: md.doiget.as_ref().map(|d| d.fetched_at),
});
if hits.len() >= limit {
break;
}
}
}
Ok(hits)
}
}
fn guard_safekey(s: &str) -> Result<(), StoreError> {
let bad = s.is_empty()
|| s.contains('/')
|| s.contains('\\')
|| s.contains("..")
|| s.contains('\0')
|| s.starts_with('.')
|| !s
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_');
if bad {
Err(StoreError::PathTraversal {
path: Utf8PathBuf::from(s),
})
} else {
Ok(())
}
}
fn safekey_from_metadata_filename(p: &Utf8Path) -> Safekey {
Safekey(p.file_stem().unwrap_or("").to_string())
}
#[derive(Debug, Clone, Copy)]
enum LockMode {
Shared,
Exclusive,
}
fn open_or_create_lock_file(path: &Utf8Path) -> Result<File, StoreError> {
let f = OpenOptions::new()
.create(true)
.read(true)
.write(true)
.truncate(false)
.open(path.as_std_path())?;
Ok(f)
}
fn acquire_lock(lock_file: &File, lock_path: &Utf8Path, mode: LockMode) -> Result<(), StoreError> {
let deadline = Instant::now() + LOCK_TIMEOUT;
loop {
let attempt = match mode {
LockMode::Shared => <File as FileExt>::try_lock_shared(lock_file),
LockMode::Exclusive => <File as FileExt>::try_lock_exclusive(lock_file),
};
match attempt {
Ok(()) => return Ok(()),
Err(e) => {
let contended = e.raw_os_error() == fs2::lock_contended_error().raw_os_error();
if !contended {
return Err(StoreError::Io(e));
}
if Instant::now() >= deadline {
return Err(StoreError::LockTimeout {
path: lock_path.to_owned(),
});
}
std::thread::sleep(LOCK_POLL_INTERVAL);
}
}
}
}
fn check_schema_version(theirs: &str) -> Result<(), StoreError> {
let (their_major, their_minor) = parse_schema_version(theirs)?;
let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
if their_major > our_major {
warn!(
theirs = theirs,
ours = SCHEMA_VERSION,
"store entry uses a future-major schema_version; entering read-only mode \
for this entry (docs/STORE.md §3)"
);
} else if their_major == our_major && their_minor > our_minor {
warn!(
theirs = theirs,
ours = SCHEMA_VERSION,
"store entry uses a newer minor schema_version; reading in compatibility mode \
(docs/STORE.md §3 future-minor tolerance)"
);
}
Ok(())
}
fn check_schema_version_for_write(theirs: &str) -> Result<(), StoreError> {
let (their_major, their_minor) = parse_schema_version(theirs)?;
let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
if their_major > our_major || (their_major == our_major && their_minor > our_minor) {
return Err(StoreError::SchemaTooNew {
theirs: theirs.to_string(),
ours: SCHEMA_VERSION.to_string(),
});
}
Ok(())
}
fn parse_schema_version(s: &str) -> Result<(u32, u32), StoreError> {
let (maj, min) = s.split_once('.').ok_or(StoreError::MissingField {
field: "schema_version",
})?;
let maj: u32 = maj.parse().map_err(|_| StoreError::MissingField {
field: "schema_version",
})?;
let min: u32 = min.parse().map_err(|_| StoreError::MissingField {
field: "schema_version",
})?;
Ok((maj, min))
}
fn merge_metadata(existing: Metadata, incoming: Metadata) -> Metadata {
let mut out = incoming.clone();
if let (Ok((em, en)), Ok((im, in_))) = (
parse_schema_version(&existing.schema_version),
parse_schema_version(&incoming.schema_version),
) {
if (em, en) > (im, in_) {
out.schema_version = existing.schema_version.clone();
}
}
if !existing.title.is_empty() && existing.title != incoming.title {
warn!(
field = "title",
existing = existing.title.as_str(),
"preserving reserved field set by another tool (docs/STORE.md §6)"
);
out.title = existing.title;
}
if !existing.authors.is_empty() && existing.authors != incoming.authors {
warn!(
field = "authors",
"preserving reserved field set by another tool (docs/STORE.md §6)"
);
out.authors = existing.authors;
}
macro_rules! merge_opt {
($field:ident) => {
if existing.$field.is_some() && existing.$field != incoming.$field {
warn!(
field = stringify!($field),
"preserving reserved field set by another tool (docs/STORE.md §6)"
);
out.$field = existing.$field;
}
};
}
merge_opt!(year);
merge_opt!(doi);
merge_opt!(arxiv_id);
merge_opt!(abstract_);
merge_opt!(venue);
merge_opt!(publisher);
merge_opt!(issn);
merge_opt!(isbn);
merge_opt!(type_);
merge_opt!(url);
merge_opt!(pdf_path);
if !existing.keywords.is_empty() && existing.keywords != incoming.keywords {
warn!(
field = "keywords",
"preserving reserved field set by another tool (docs/STORE.md §6)"
);
out.keywords = existing.keywords;
}
if out.doiget.is_none() && existing.doiget.is_some() {
out.doiget = existing.doiget;
}
let mut merged_other = existing.other;
for (k, v) in out.other.iter() {
merged_other.entry(k.clone()).or_insert_with(|| v.clone());
}
out.other = merged_other;
out
}
fn normalize_toml(m: &Metadata) -> Result<String, StoreError> {
let value = toml::Value::try_from(m)?;
let mut out = String::new();
write_normalized_toml(&value, &mut out)?;
if !out.ends_with('\n') {
out.push('\n');
}
Ok(out)
}
fn write_normalized_toml(value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
let table = match value {
toml::Value::Table(t) => t,
_ => {
return Err(StoreError::Serialize(
<toml::ser::Error as serde::ser::Error>::custom(
"Metadata did not serialize to a TOML table",
),
));
}
};
let mut top_keys: Vec<&String> = Vec::new();
let mut sub_table_keys: Vec<&String> = Vec::new();
for (k, v) in table.iter() {
if matches!(v, toml::Value::Table(_)) {
sub_table_keys.push(k);
} else {
top_keys.push(k);
}
}
top_keys.sort();
sub_table_keys.sort();
if let Some(v) = table.get("schema_version") {
write_kv("schema_version", v, out)?;
}
for k in top_keys {
if k == "schema_version" {
continue;
}
if let Some(v) = table.get(k) {
write_kv(k, v, out)?;
}
}
for k in sub_table_keys {
if let Some(toml::Value::Table(sub)) = table.get(k) {
out.push('\n');
out.push('[');
out.push_str(k);
out.push_str("]\n");
let sorted: std::collections::BTreeMap<&String, &toml::Value> = sub.iter().collect();
for (sk, sv) in sorted {
write_kv(sk, sv, out)?;
}
}
}
Ok(())
}
fn write_kv(key: &str, value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
out.push_str(key);
out.push_str(" = ");
let rendered = toml_value_inline(value)?;
out.push_str(&rendered);
out.push('\n');
Ok(())
}
fn toml_value_inline(value: &toml::Value) -> Result<String, StoreError> {
let s = match value {
toml::Value::Table(_) => {
return Err(StoreError::Serialize(
<toml::ser::Error as serde::ser::Error>::custom(
"nested tables not supported by inline writer",
),
));
}
v => {
let mut wrapper = toml::map::Map::new();
wrapper.insert("__v".to_string(), v.clone());
let rendered = toml::to_string(&toml::Value::Table(wrapper))?;
let body = rendered
.strip_prefix("__v = ")
.ok_or_else(|| {
StoreError::Serialize(<toml::ser::Error as serde::ser::Error>::custom(
"unexpected toml singleton format",
))
})?
.trim_end_matches('\n')
.to_string();
body
}
};
Ok(s)
}
fn atomic_write(dst: &Utf8Path, bytes: &[u8]) -> std::io::Result<()> {
let file_name = dst.file_name().ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"destination path has no file name",
)
})?;
let mut tmp_path = dst.to_path_buf();
tmp_path.set_file_name(format!("{}.tmp", file_name));
{
let mut f = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(tmp_path.as_std_path())?;
f.write_all(bytes)?;
f.sync_all()?;
}
std::fs::rename(tmp_path.as_std_path(), dst.as_std_path())?;
#[cfg(unix)]
{
if let Some(parent) = dst.parent() {
if let Ok(dir) = File::open(parent.as_std_path()) {
let _ = dir.sync_all();
}
}
}
Ok(())
}
fn create_dir_secure(path: &std::path::Path) -> std::io::Result<()> {
if path.exists() {
return Ok(());
}
std::fs::create_dir_all(path)?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = std::fs::metadata(path)?.permissions();
perms.set_mode(0o700);
std::fs::set_permissions(path, perms)?;
}
Ok(())
}
fn metadata_files(metadata_dir: &Utf8Path) -> std::io::Result<Vec<Utf8PathBuf>> {
let mut out = Vec::new();
if !metadata_dir.exists() {
return Ok(out);
}
for entry in std::fs::read_dir(metadata_dir.as_std_path())? {
let entry = entry?;
if !entry.file_type()?.is_file() {
continue;
}
let path = entry.path();
let utf8_path = match Utf8PathBuf::from_path_buf(path) {
Ok(p) => p,
Err(_) => continue,
};
let name = match utf8_path.file_name() {
Some(n) => n,
None => continue,
};
if name.ends_with(".toml") && !name.ends_with(".tmp") {
out.push(utf8_path);
}
}
Ok(out)
}
fn read_all_entries(metadata_dir: &Utf8Path) -> Result<Vec<EntryInfo>, StoreError> {
let mut out = Vec::new();
for path in metadata_files(metadata_dir)? {
let raw = std::fs::read_to_string(path.as_std_path())?;
let Ok(md) = toml::from_str::<Metadata>(&raw) else {
continue;
};
let safekey = safekey_from_metadata_filename(&path);
out.push(EntryInfo {
safekey,
title: md.title,
year: md.year,
fetched_at: md.doiget.map(|d| d.fetched_at),
});
}
Ok(out)
}
#[allow(dead_code)]
fn _doiget_extension_is_visible(d: DoigetExtension) -> DoigetExtension {
d
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use std::collections::BTreeMap;
use std::sync::Arc;
use std::thread;
use chrono::TimeZone;
use tempfile::TempDir;
use crate::{Doi, Safekey, SCHEMA_VERSION};
fn tmp_dir_utf8(dir: &TempDir) -> Utf8PathBuf {
Utf8PathBuf::from_path_buf(dir.path().to_path_buf()).expect("temp dir path must be UTF-8")
}
fn sample_safekey() -> Safekey {
Safekey("doi_10.1234_example".to_string())
}
fn sample_metadata() -> Metadata {
Metadata {
schema_version: SCHEMA_VERSION.to_string(),
title: "Sample Paper Title".to_string(),
authors: vec!["Alice Researcher".to_string(), "Bob Coauthor".to_string()],
year: Some(2026),
doi: Some(Doi("10.1234/example".to_string())),
arxiv_id: None,
abstract_: Some("A short abstract.".to_string()),
venue: Some("Phys. Rev. X".to_string()),
publisher: Some("American Physical Society".to_string()),
issn: Some("2160-3308".to_string()),
isbn: None,
type_: Some("journal-article".to_string()),
keywords: vec!["physics".to_string(), "tdd".to_string()],
url: Some("https://example.test/paper".to_string()),
pdf_path: Some("doi_10.1234_example.pdf".to_string()),
doiget: Some(DoigetExtension {
fetched_at: chrono::Utc.with_ymd_and_hms(2026, 5, 6, 12, 0, 0).unwrap(),
source: "unpaywall".to_string(),
license: "CC-BY-4.0".to_string(),
size_bytes: 1234567,
mcp_call_id: Some("01JCKZ7Q0000000000000000AB".to_string()),
}),
other: BTreeMap::new(),
}
}
fn fresh_store(dir: &TempDir) -> FsStore {
let root = tmp_dir_utf8(dir).join("papers");
FsStore::new(root).expect("FsStore::new")
}
#[test]
fn roundtrip_reserved_fields() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
let m = sample_metadata();
store.write(&key, &m, None).expect("write");
let read = store.read(&key).expect("read").expect("Some");
assert_eq!(read.schema_version, m.schema_version);
assert_eq!(read.title, m.title);
assert_eq!(read.authors, m.authors);
assert_eq!(read.year, m.year);
assert_eq!(
read.doi.as_ref().map(|d| d.as_str()),
Some("10.1234/example")
);
assert_eq!(read.abstract_, m.abstract_);
assert_eq!(read.venue, m.venue);
assert_eq!(read.publisher, m.publisher);
assert_eq!(read.issn, m.issn);
assert_eq!(read.type_, m.type_);
assert_eq!(read.keywords, m.keywords);
assert_eq!(read.url, m.url);
assert_eq!(read.pdf_path, m.pdf_path);
}
#[test]
fn roundtrip_doiget_extension() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
let m = sample_metadata();
store.write(&key, &m, None).expect("write");
let read = store.read(&key).expect("read").expect("Some");
let d = read.doiget.expect("doiget table present");
let want = m.doiget.expect("input doiget");
assert_eq!(d.fetched_at, want.fetched_at);
assert_eq!(d.source, want.source);
assert_eq!(d.license, want.license);
assert_eq!(d.size_bytes, want.size_bytes);
assert_eq!(d.mcp_call_id, want.mcp_call_id);
}
#[test]
fn read_returns_none_for_missing_safekey() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = Safekey("nonexistent".to_string());
let res = store.read(&key).expect("read ok");
assert!(res.is_none(), "expected Ok(None), got {:?}", res);
}
#[test]
fn schema_too_new_blocks_writes_but_allows_reads() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
let meta_path = store.metadata_path(&key).expect("path");
std::fs::create_dir_all(meta_path.parent().expect("parent").as_std_path()).expect("mkdir");
let body = "schema_version = \"2.0\"\ntitle = \"Future\"\nauthors = []\n";
std::fs::write(meta_path.as_std_path(), body).expect("write");
let read = store.read(&key).expect("read ok");
assert!(read.is_some(), "future-major file must be readable");
let m = sample_metadata();
let err = store.write(&key, &m, None).expect_err("write must fail");
match err {
StoreError::SchemaTooNew { theirs, ours } => {
assert_eq!(theirs, "2.0");
assert_eq!(ours, SCHEMA_VERSION);
}
other => panic!("expected SchemaTooNew, got {:?}", other),
}
}
#[test]
fn concurrent_writers_serialize_via_flock() {
let dir = TempDir::new().expect("tmp");
let store = Arc::new(fresh_store(&dir));
let key = sample_safekey();
store.write(&key, &sample_metadata(), None).expect("seed");
let mut handles = Vec::new();
for source in ["unpaywall", "europepmc"] {
let store = Arc::clone(&store);
let key = key.clone();
handles.push(thread::spawn(move || {
let mut m = sample_metadata();
if let Some(d) = m.doiget.as_mut() {
d.source = source.to_string();
}
store.write(&key, &m, None).expect("write");
}));
}
for h in handles {
h.join().expect("join");
}
let read = store.read(&key).expect("read").expect("Some");
let source = read.doiget.expect("doiget").source;
assert!(
source == "unpaywall" || source == "europepmc",
"winning source must be one of the contenders, got {}",
source
);
}
#[test]
fn list_recent_orders_by_fetched_at_desc() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
for (idx, year_seed) in [(1, 2024_u32), (2, 2025), (3, 2026)] {
let key = Safekey(format!("doi_10.1234_entry{}", idx));
let mut m = sample_metadata();
m.title = format!("Entry {}", idx);
if let Some(d) = m.doiget.as_mut() {
d.fetched_at = chrono::Utc
.with_ymd_and_hms(year_seed as i32, 5, 6, 12, 0, 0)
.unwrap();
}
store.write(&key, &m, None).expect("write");
}
let recent = store.list_recent(10).expect("list");
assert_eq!(recent.len(), 3, "expected 3 entries, got {}", recent.len());
assert_eq!(recent[0].title, "Entry 3");
assert_eq!(recent[1].title, "Entry 2");
assert_eq!(recent[2].title, "Entry 1");
for w in recent.windows(2) {
assert!(
w[0].fetched_at >= w[1].fetched_at,
"recent[].fetched_at must be non-increasing"
);
}
}
#[test]
fn search_finds_by_title_substring() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = Safekey("doi_10.1234_quantum".to_string());
let mut m = sample_metadata();
m.title = "Quantum Stuff and Other Topics".to_string();
store.write(&key, &m, None).expect("write");
let hits = store.search("quantum", 10).expect("search");
assert_eq!(hits.len(), 1, "expected 1 hit, got {}", hits.len());
assert_eq!(hits[0].title, "Quantum Stuff and Other Topics");
let empty = store.search("relativity", 10).expect("search");
assert!(empty.is_empty(), "expected no hits, got {:?}", empty);
}
#[test]
fn path_traversal_in_safekey_blocked() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let bad = Safekey("../etc/passwd".to_string());
match store.read(&bad) {
Err(StoreError::PathTraversal { .. }) => {}
other => panic!("expected PathTraversal, got {:?}", other),
}
let m = sample_metadata();
match store.write(&bad, &m, None) {
Err(StoreError::PathTraversal { .. }) => {}
other => panic!("expected PathTraversal, got {:?}", other),
}
}
#[test]
fn write_then_read_normalized_toml_alphabetizes_keys() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
store.write(&key, &sample_metadata(), None).expect("write");
let path = store.metadata_path(&key).expect("path");
let raw = std::fs::read_to_string(path.as_std_path()).expect("read");
let first_line = raw.lines().next().expect("at least one line");
assert!(
first_line.starts_with("schema_version = "),
"first line must be schema_version, got: {:?}",
first_line
);
assert!(raw.ends_with('\n'), "file must end with a newline");
assert!(!raw.contains('\r'), "no CR allowed; LF only");
assert!(raw.contains("\n[doiget]\n"), "doiget sub-table missing");
let doiget_idx = raw.find("[doiget]").expect("doiget block");
let after = &raw[doiget_idx..];
let fetched_at_idx = after
.find("fetched_at = ")
.expect("fetched_at key in doiget");
let license_idx = after.find("license = ").expect("license key in doiget");
assert!(
fetched_at_idx < license_idx,
"fetched_at must precede license within [doiget]"
);
}
#[test]
fn write_preserves_unknown_table_from_existing_file() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
let meta_path = store.metadata_path(&key).expect("path");
let body = format!(
"schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\n\
[bibliofetch]\nharvest = \"2026-01-01\"\n",
SCHEMA_VERSION
);
std::fs::write(meta_path.as_std_path(), body).expect("write");
let mut m = sample_metadata();
m.title = "Doiget Wins?".to_string(); store.write(&key, &m, None).expect("write");
let read_raw = std::fs::read_to_string(meta_path.as_std_path()).expect("re-read");
assert!(
read_raw.contains("bibliofetch"),
"[bibliofetch] table was dropped: {}",
read_raw
);
assert!(
read_raw.contains("title = \"Existing\""),
"doiget overwrote a reserved field set by another tool: {}",
read_raw
);
}
#[test]
fn bibliofetch_typed_table_and_unknown_scalar_survive_roundtrip() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
let meta_path = store.metadata_path(&key).expect("path");
let body = format!(
"schema_version = \"{}\"\n\
title = \"Existing\"\n\
authors = [\"Carol\"]\n\
zotero_key = \"ABC123\"\n\n\
[bibliofetch]\n\
harvest = \"2026-02-03\"\n\
count = 42\n\
tags = [\"x\", \"y\"]\n",
SCHEMA_VERSION
);
std::fs::write(meta_path.as_std_path(), body).expect("seed write");
let m0 = store.read(&key).expect("read ok").expect("entry present");
assert!(
m0.other.contains_key("bibliofetch"),
"[bibliofetch] not captured into `other` on read: {:?}",
m0.other
);
assert_eq!(
m0.other.get("zotero_key").and_then(|v| v.as_str()),
Some("ABC123"),
"unknown top-level scalar not captured: {:?}",
m0.other
);
let mut m_doiget = sample_metadata();
m_doiget.title = "Doiget Would Overwrite".to_string();
store.write(&key, &m_doiget, None).expect("doiget write");
let m1 = store
.read(&key)
.expect("re-read ok")
.expect("entry present");
assert_eq!(
m1.title, "Existing",
"STORE.md §6: doiget overwrote a reserved field"
);
let bf = m1
.other
.get("bibliofetch")
.and_then(|v| v.as_table())
.expect("[bibliofetch] table survived read->write->read");
assert_eq!(
bf.get("harvest").and_then(|v| v.as_str()),
Some("2026-02-03")
);
assert_eq!(bf.get("count").and_then(|v| v.as_integer()), Some(42));
let tags = bf
.get("tags")
.and_then(|v| v.as_array())
.expect("tags array survived");
let tags: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect();
assert_eq!(tags, vec!["x", "y"]);
assert_eq!(
m1.other.get("zotero_key").and_then(|v| v.as_str()),
Some("ABC123"),
"unknown top-level scalar lost across the cycle"
);
let raw = std::fs::read_to_string(meta_path.as_std_path()).expect("raw re-read");
assert!(raw.ends_with('\n'), "missing trailing newline: {raw:?}");
}
#[test]
fn other_key_collision_prefers_existing() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
let meta_path = store.metadata_path(&key).expect("path");
let body = format!(
"schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\
zotero_key = \"FROM_BIBLIOFETCH\"\n",
SCHEMA_VERSION
);
std::fs::write(meta_path.as_std_path(), body).expect("seed");
let mut m = sample_metadata();
m.other.insert(
"zotero_key".to_string(),
toml::Value::String("FROM_DOIGET".to_string()),
);
store.write(&key, &m, None).expect("write");
let got = store.read(&key).expect("read").expect("present");
assert_eq!(
got.other.get("zotero_key").and_then(|v| v.as_str()),
Some("FROM_BIBLIOFETCH"),
"STORE.md §6: existing `other` value must win on collision"
);
}
#[test]
fn pdf_is_copied_atomically_on_write() {
let dir = TempDir::new().expect("tmp");
let store = fresh_store(&dir);
let key = sample_safekey();
let src_dir = TempDir::new().expect("tmp src");
let src_path = Utf8PathBuf::from_path_buf(src_dir.path().to_path_buf())
.expect("utf8 src dir")
.join("input.pdf");
std::fs::write(src_path.as_std_path(), b"%PDF-1.7 synthetic").expect("write src");
store
.write(&key, &sample_metadata(), Some(&src_path))
.expect("write");
let dst = store.pdf_path(&key).expect("pdf path");
let bytes = std::fs::read(dst.as_std_path()).expect("read dst");
assert_eq!(bytes, b"%PDF-1.7 synthetic");
}
}