1use std::fs::{File, OpenOptions};
32use std::io::{Read, Write};
33use std::time::{Duration, Instant};
34
35use camino::{Utf8Path, Utf8PathBuf};
36use fs2::FileExt;
37use tracing::warn;
38
39use super::metadata::{DoigetExtension, Metadata};
40use super::{EntryInfo, Store, StoreError};
41use crate::{Safekey, SCHEMA_VERSION};
42
43const METADATA_DIR: &str = ".metadata";
46
47const LOCK_TIMEOUT: Duration = Duration::from_secs(5);
49
50const LOCK_POLL_INTERVAL: Duration = Duration::from_millis(50);
54
55#[derive(Debug, Clone)]
57pub struct FsStore {
58 root: Utf8PathBuf,
59 metadata_dir: Utf8PathBuf,
60}
61
62impl FsStore {
63 pub fn new(root: Utf8PathBuf) -> Result<Self, StoreError> {
74 if root.exists() && !root.is_dir() {
77 return Err(StoreError::Io(std::io::Error::new(
78 std::io::ErrorKind::AlreadyExists,
79 format!("store root {} exists but is not a directory", root),
80 )));
81 }
82 let metadata_dir = root.join(METADATA_DIR);
83
84 create_dir_secure(root.as_std_path())?;
85 create_dir_secure(metadata_dir.as_std_path())?;
86
87 Ok(Self { root, metadata_dir })
88 }
89
90 pub fn root(&self) -> &Utf8Path {
92 &self.root
93 }
94
95 fn metadata_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
104 guard_safekey(key.as_str())?;
105 let p = self.metadata_dir.join(format!("{}.toml", key.as_str()));
106 if p.parent() != Some(self.metadata_dir.as_path()) {
110 return Err(StoreError::PathTraversal { path: p });
111 }
112 Ok(p)
113 }
114
115 fn lock_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
116 guard_safekey(key.as_str())?;
117 Ok(self
118 .metadata_dir
119 .join(format!("{}.toml.lock", key.as_str())))
120 }
121
122 fn pdf_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
123 guard_safekey(key.as_str())?;
124 Ok(self.root.join(format!("{}.pdf", key.as_str())))
125 }
126}
127
128impl Store for FsStore {
129 fn read(&self, key: &Safekey) -> Result<Option<Metadata>, StoreError> {
130 let meta_path = self.metadata_path(key)?;
131 if !meta_path.exists() {
132 return Ok(None);
133 }
134
135 let lock_path = self.lock_path(key)?;
140 let lock_file = open_or_create_lock_file(&lock_path)?;
141 acquire_lock(&lock_file, &lock_path, LockMode::Shared)?;
142
143 let raw = std::fs::read_to_string(meta_path.as_std_path())?;
144 let _ = <File as FileExt>::unlock(&lock_file);
149
150 let metadata: Metadata = toml::from_str(&raw)?;
151 check_schema_version(&metadata.schema_version)?;
152 Ok(Some(metadata))
153 }
154
155 fn write(&self, key: &Safekey, m: &Metadata, pdf: Option<&Utf8Path>) -> Result<(), StoreError> {
156 let meta_path = self.metadata_path(key)?;
157 let lock_path = self.lock_path(key)?;
158 let lock_file = open_or_create_lock_file(&lock_path)?;
159 acquire_lock(&lock_file, &lock_path, LockMode::Exclusive)?;
160
161 let merged = if meta_path.exists() {
167 let raw = std::fs::read_to_string(meta_path.as_std_path())?;
168 let existing: Metadata = toml::from_str(&raw)?;
169 check_schema_version_for_write(&existing.schema_version)?;
170 merge_metadata(existing, m.clone())
171 } else {
172 m.clone()
173 };
174
175 let normalized = normalize_toml(&merged)?;
178
179 if let Some(pdf_src) = pdf {
192 let pdf_dst = self.pdf_path(key)?;
193 let mut bytes = Vec::new();
194 File::open(pdf_src.as_std_path())?.read_to_end(&mut bytes)?;
195 atomic_write(&pdf_dst, &bytes)?;
197 }
198
199 atomic_write(&meta_path, normalized.as_bytes())?;
203
204 let _ = <File as FileExt>::unlock(&lock_file);
205 Ok(())
206 }
207
208 fn list_recent(&self, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
209 let mut entries = read_all_entries(&self.metadata_dir)?;
210 entries.sort_by_key(|e| std::cmp::Reverse(e.fetched_at));
213 entries.truncate(limit);
214 Ok(entries)
215 }
216
217 fn search(&self, query: &str, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
221 let q = query.to_lowercase();
222 let mut hits = Vec::new();
223 for path in metadata_files(&self.metadata_dir)? {
224 let raw = std::fs::read_to_string(path.as_std_path())?;
225 let Ok(md) = toml::from_str::<Metadata>(&raw) else {
226 continue;
229 };
230 let haystacks = [
231 md.title.to_lowercase(),
232 md.authors.join(" ").to_lowercase(),
233 md.venue.clone().unwrap_or_default().to_lowercase(),
234 md.publisher.clone().unwrap_or_default().to_lowercase(),
235 ];
236 if haystacks.iter().any(|h| h.contains(&q)) {
237 let safekey = safekey_from_metadata_filename(&path);
238 hits.push(EntryInfo {
239 safekey,
240 title: md.title,
241 year: md.year,
242 fetched_at: md.doiget.as_ref().map(|d| d.fetched_at),
243 });
244 if hits.len() >= limit {
245 break;
246 }
247 }
248 }
249 Ok(hits)
250 }
251}
252
253fn guard_safekey(s: &str) -> Result<(), StoreError> {
263 let bad = s.is_empty()
264 || s.contains('/')
265 || s.contains('\\')
266 || s.contains("..")
267 || s.contains('\0')
268 || s.starts_with('.')
269 || !s
270 .chars()
271 .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_');
272 if bad {
273 Err(StoreError::PathTraversal {
274 path: Utf8PathBuf::from(s),
275 })
276 } else {
277 Ok(())
278 }
279}
280
281fn safekey_from_metadata_filename(p: &Utf8Path) -> Safekey {
285 Safekey(p.file_stem().unwrap_or("").to_string())
286}
287
288#[derive(Debug, Clone, Copy)]
290enum LockMode {
291 Shared,
293 Exclusive,
295}
296
297fn open_or_create_lock_file(path: &Utf8Path) -> Result<File, StoreError> {
300 let f = OpenOptions::new()
301 .create(true)
302 .read(true)
303 .write(true)
304 .truncate(false)
305 .open(path.as_std_path())?;
306 Ok(f)
307}
308
309fn acquire_lock(lock_file: &File, lock_path: &Utf8Path, mode: LockMode) -> Result<(), StoreError> {
312 let deadline = Instant::now() + LOCK_TIMEOUT;
313 loop {
314 let attempt = match mode {
320 LockMode::Shared => <File as FileExt>::try_lock_shared(lock_file),
321 LockMode::Exclusive => <File as FileExt>::try_lock_exclusive(lock_file),
322 };
323 match attempt {
324 Ok(()) => return Ok(()),
325 Err(e) => {
326 let contended = e.raw_os_error() == fs2::lock_contended_error().raw_os_error();
327 if !contended {
328 return Err(StoreError::Io(e));
330 }
331 if Instant::now() >= deadline {
332 return Err(StoreError::LockTimeout {
333 path: lock_path.to_owned(),
334 });
335 }
336 std::thread::sleep(LOCK_POLL_INTERVAL);
337 }
338 }
339 }
340}
341
342fn check_schema_version(theirs: &str) -> Result<(), StoreError> {
347 let (their_major, their_minor) = parse_schema_version(theirs)?;
348 let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
349 if their_major > our_major {
350 warn!(
351 theirs = theirs,
352 ours = SCHEMA_VERSION,
353 "store entry uses a future-major schema_version; entering read-only mode \
354 for this entry (docs/STORE.md §3)"
355 );
356 } else if their_major == our_major && their_minor > our_minor {
357 warn!(
358 theirs = theirs,
359 ours = SCHEMA_VERSION,
360 "store entry uses a newer minor schema_version; reading in compatibility mode \
361 (docs/STORE.md §3 future-minor tolerance)"
362 );
363 }
364 Ok(())
365}
366
367fn check_schema_version_for_write(theirs: &str) -> Result<(), StoreError> {
371 let (their_major, their_minor) = parse_schema_version(theirs)?;
372 let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
373 if their_major > our_major || (their_major == our_major && their_minor > our_minor) {
374 return Err(StoreError::SchemaTooNew {
375 theirs: theirs.to_string(),
376 ours: SCHEMA_VERSION.to_string(),
377 });
378 }
379 Ok(())
380}
381
382fn parse_schema_version(s: &str) -> Result<(u32, u32), StoreError> {
383 let (maj, min) = s.split_once('.').ok_or(StoreError::MissingField {
384 field: "schema_version",
385 })?;
386 let maj: u32 = maj.parse().map_err(|_| StoreError::MissingField {
387 field: "schema_version",
388 })?;
389 let min: u32 = min.parse().map_err(|_| StoreError::MissingField {
390 field: "schema_version",
391 })?;
392 Ok((maj, min))
393}
394
395fn merge_metadata(existing: Metadata, incoming: Metadata) -> Metadata {
404 let mut out = incoming.clone();
405
406 if let (Ok((em, en)), Ok((im, in_))) = (
409 parse_schema_version(&existing.schema_version),
410 parse_schema_version(&incoming.schema_version),
411 ) {
412 if (em, en) > (im, in_) {
413 out.schema_version = existing.schema_version.clone();
414 }
415 }
416
417 if !existing.title.is_empty() && existing.title != incoming.title {
420 warn!(
421 field = "title",
422 existing = existing.title.as_str(),
423 "preserving reserved field set by another tool (docs/STORE.md §6)"
424 );
425 out.title = existing.title;
426 }
427 if !existing.authors.is_empty() && existing.authors != incoming.authors {
428 warn!(
429 field = "authors",
430 "preserving reserved field set by another tool (docs/STORE.md §6)"
431 );
432 out.authors = existing.authors;
433 }
434
435 macro_rules! merge_opt {
437 ($field:ident) => {
438 if existing.$field.is_some() && existing.$field != incoming.$field {
439 warn!(
440 field = stringify!($field),
441 "preserving reserved field set by another tool (docs/STORE.md §6)"
442 );
443 out.$field = existing.$field;
444 }
445 };
446 }
447 merge_opt!(year);
448 merge_opt!(doi);
449 merge_opt!(arxiv_id);
450 merge_opt!(abstract_);
451 merge_opt!(venue);
452 merge_opt!(volume);
453 merge_opt!(issue);
454 merge_opt!(pages);
455 merge_opt!(publisher);
456 merge_opt!(issn);
457 merge_opt!(isbn);
458 merge_opt!(type_);
459 merge_opt!(url);
460 merge_opt!(pdf_path);
461
462 if !existing.keywords.is_empty() && existing.keywords != incoming.keywords {
464 warn!(
465 field = "keywords",
466 "preserving reserved field set by another tool (docs/STORE.md §6)"
467 );
468 out.keywords = existing.keywords;
469 }
470
471 if out.doiget.is_none() && existing.doiget.is_some() {
475 out.doiget = existing.doiget;
476 }
477
478 let mut merged_other = existing.other;
487 for (k, v) in out.other.iter() {
488 merged_other.entry(k.clone()).or_insert_with(|| v.clone());
489 }
490 out.other = merged_other;
491
492 out
493}
494
495fn normalize_toml(m: &Metadata) -> Result<String, StoreError> {
502 let value = toml::Value::try_from(m)?;
505 let mut out = String::new();
506 write_normalized_toml(&value, &mut out)?;
507 if !out.ends_with('\n') {
508 out.push('\n');
509 }
510 Ok(out)
511}
512
513fn write_normalized_toml(value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
518 let table = match value {
519 toml::Value::Table(t) => t,
520 _ => {
521 return Err(StoreError::Serialize(
522 <toml::ser::Error as serde::ser::Error>::custom(
523 "Metadata did not serialize to a TOML table",
524 ),
525 ));
526 }
527 };
528
529 let mut top_keys: Vec<&String> = Vec::new();
532 let mut sub_table_keys: Vec<&String> = Vec::new();
533 for (k, v) in table.iter() {
534 if matches!(v, toml::Value::Table(_)) {
535 sub_table_keys.push(k);
536 } else {
537 top_keys.push(k);
538 }
539 }
540 top_keys.sort();
541 sub_table_keys.sort();
542
543 if let Some(v) = table.get("schema_version") {
545 write_kv("schema_version", v, out)?;
546 }
547 for k in top_keys {
548 if k == "schema_version" {
549 continue;
550 }
551 if let Some(v) = table.get(k) {
552 write_kv(k, v, out)?;
553 }
554 }
555 for k in sub_table_keys {
556 if let Some(toml::Value::Table(sub)) = table.get(k) {
557 out.push('\n');
558 out.push('[');
559 out.push_str(k);
560 out.push_str("]\n");
561 let sorted: std::collections::BTreeMap<&String, &toml::Value> = sub.iter().collect();
563 for (sk, sv) in sorted {
564 write_kv(sk, sv, out)?;
565 }
566 }
567 }
568 Ok(())
569}
570
571fn write_kv(key: &str, value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
575 out.push_str(key);
576 out.push_str(" = ");
577 let rendered = toml_value_inline(value)?;
578 out.push_str(&rendered);
579 out.push('\n');
580 Ok(())
581}
582
583fn toml_value_inline(value: &toml::Value) -> Result<String, StoreError> {
586 let s = match value {
587 toml::Value::Table(_) => {
588 return Err(StoreError::Serialize(
589 <toml::ser::Error as serde::ser::Error>::custom(
590 "nested tables not supported by inline writer",
591 ),
592 ));
593 }
594 v => {
599 let mut wrapper = toml::map::Map::new();
600 wrapper.insert("__v".to_string(), v.clone());
601 let rendered = toml::to_string(&toml::Value::Table(wrapper))?;
602 let body = rendered
605 .strip_prefix("__v = ")
606 .ok_or_else(|| {
607 StoreError::Serialize(<toml::ser::Error as serde::ser::Error>::custom(
608 "unexpected toml singleton format",
609 ))
610 })?
611 .trim_end_matches('\n')
612 .to_string();
613 body
614 }
615 };
616 Ok(s)
617}
618
619fn atomic_write(dst: &Utf8Path, bytes: &[u8]) -> std::io::Result<()> {
628 let file_name = dst.file_name().ok_or_else(|| {
629 std::io::Error::new(
630 std::io::ErrorKind::InvalidInput,
631 "destination path has no file name",
632 )
633 })?;
634 let mut tmp_path = dst.to_path_buf();
635 tmp_path.set_file_name(format!("{}.tmp", file_name));
636
637 {
638 let mut f = OpenOptions::new()
639 .create(true)
640 .write(true)
641 .truncate(true)
642 .open(tmp_path.as_std_path())?;
643 f.write_all(bytes)?;
644 f.sync_all()?;
645 }
646 std::fs::rename(tmp_path.as_std_path(), dst.as_std_path())?;
647
648 #[cfg(unix)]
652 {
653 if let Some(parent) = dst.parent() {
654 if let Ok(dir) = File::open(parent.as_std_path()) {
655 let _ = dir.sync_all();
656 }
657 }
658 }
659
660 Ok(())
661}
662
663fn create_dir_secure(path: &std::path::Path) -> std::io::Result<()> {
665 if path.exists() {
666 return Ok(());
667 }
668 std::fs::create_dir_all(path)?;
669 #[cfg(unix)]
670 {
671 use std::os::unix::fs::PermissionsExt;
672 let mut perms = std::fs::metadata(path)?.permissions();
673 perms.set_mode(0o700);
674 std::fs::set_permissions(path, perms)?;
675 }
676 Ok(())
677}
678
679fn metadata_files(metadata_dir: &Utf8Path) -> std::io::Result<Vec<Utf8PathBuf>> {
686 let mut out = Vec::new();
687 if !metadata_dir.exists() {
688 return Ok(out);
689 }
690 for entry in std::fs::read_dir(metadata_dir.as_std_path())? {
691 let entry = entry?;
692 if !entry.file_type()?.is_file() {
693 continue;
694 }
695 let path = entry.path();
696 let utf8_path = match Utf8PathBuf::from_path_buf(path) {
697 Ok(p) => p,
698 Err(_) => continue,
699 };
700 let name = match utf8_path.file_name() {
701 Some(n) => n,
702 None => continue,
703 };
704 if name.ends_with(".toml") && !name.ends_with(".tmp") {
705 out.push(utf8_path);
706 }
707 }
708 Ok(out)
709}
710
711fn read_all_entries(metadata_dir: &Utf8Path) -> Result<Vec<EntryInfo>, StoreError> {
712 let mut out = Vec::new();
713 for path in metadata_files(metadata_dir)? {
714 let raw = std::fs::read_to_string(path.as_std_path())?;
715 let Ok(md) = toml::from_str::<Metadata>(&raw) else {
716 continue;
718 };
719 let safekey = safekey_from_metadata_filename(&path);
720 out.push(EntryInfo {
721 safekey,
722 title: md.title,
723 year: md.year,
724 fetched_at: md.doiget.map(|d| d.fetched_at),
725 });
726 }
727 Ok(out)
728}
729
730#[allow(dead_code)]
734fn _doiget_extension_is_visible(d: DoigetExtension) -> DoigetExtension {
735 d
736}
737
738#[cfg(test)]
745#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
746mod tests {
747 use super::*;
748 use std::collections::BTreeMap;
749 use std::sync::Arc;
750 use std::thread;
751
752 use chrono::TimeZone;
753 use tempfile::TempDir;
754
755 use crate::{Doi, Safekey, SCHEMA_VERSION};
756
757 fn tmp_dir_utf8(dir: &TempDir) -> Utf8PathBuf {
758 Utf8PathBuf::from_path_buf(dir.path().to_path_buf()).expect("temp dir path must be UTF-8")
759 }
760
761 fn sample_safekey() -> Safekey {
762 Safekey("doi_10.1234_example".to_string())
765 }
766
767 fn sample_metadata() -> Metadata {
768 Metadata {
769 schema_version: SCHEMA_VERSION.to_string(),
770 title: "Sample Paper Title".to_string(),
771 authors: vec!["Alice Researcher".to_string(), "Bob Coauthor".to_string()],
772 year: Some(2026),
773 doi: Some(Doi("10.1234/example".to_string())),
774 arxiv_id: None,
775 abstract_: Some("A short abstract.".to_string()),
776 venue: Some("Phys. Rev. X".to_string()),
777 volume: Some("12".to_string()),
778 issue: Some("3".to_string()),
779 pages: Some("031001".to_string()),
780 publisher: Some("American Physical Society".to_string()),
781 issn: Some("2160-3308".to_string()),
782 isbn: None,
783 type_: Some("journal-article".to_string()),
784 keywords: vec!["physics".to_string(), "tdd".to_string()],
785 url: Some("https://example.test/paper".to_string()),
786 pdf_path: Some("doi_10.1234_example.pdf".to_string()),
787 doiget: Some(DoigetExtension {
788 fetched_at: chrono::Utc.with_ymd_and_hms(2026, 5, 6, 12, 0, 0).unwrap(),
789 source: "unpaywall".to_string(),
790 license: "CC-BY-4.0".to_string(),
791 size_bytes: 1234567,
792 mcp_call_id: Some("01JCKZ7Q0000000000000000AB".to_string()),
793 }),
794 other: BTreeMap::new(),
795 }
796 }
797
798 fn fresh_store(dir: &TempDir) -> FsStore {
799 let root = tmp_dir_utf8(dir).join("papers");
800 FsStore::new(root).expect("FsStore::new")
801 }
802
803 #[test]
804 fn roundtrip_reserved_fields() {
805 let dir = TempDir::new().expect("tmp");
806 let store = fresh_store(&dir);
807 let key = sample_safekey();
808 let m = sample_metadata();
809 store.write(&key, &m, None).expect("write");
810
811 let read = store.read(&key).expect("read").expect("Some");
812 assert_eq!(read.schema_version, m.schema_version);
813 assert_eq!(read.title, m.title);
814 assert_eq!(read.authors, m.authors);
815 assert_eq!(read.year, m.year);
816 assert_eq!(
817 read.doi.as_ref().map(|d| d.as_str()),
818 Some("10.1234/example")
819 );
820 assert_eq!(read.abstract_, m.abstract_);
821 assert_eq!(read.venue, m.venue);
822 assert_eq!(read.publisher, m.publisher);
823 assert_eq!(read.issn, m.issn);
824 assert_eq!(read.type_, m.type_);
825 assert_eq!(read.keywords, m.keywords);
826 assert_eq!(read.url, m.url);
827 assert_eq!(read.pdf_path, m.pdf_path);
828 }
829
830 #[test]
831 fn roundtrip_doiget_extension() {
832 let dir = TempDir::new().expect("tmp");
833 let store = fresh_store(&dir);
834 let key = sample_safekey();
835 let m = sample_metadata();
836 store.write(&key, &m, None).expect("write");
837
838 let read = store.read(&key).expect("read").expect("Some");
839 let d = read.doiget.expect("doiget table present");
840 let want = m.doiget.expect("input doiget");
841 assert_eq!(d.fetched_at, want.fetched_at);
842 assert_eq!(d.source, want.source);
843 assert_eq!(d.license, want.license);
844 assert_eq!(d.size_bytes, want.size_bytes);
845 assert_eq!(d.mcp_call_id, want.mcp_call_id);
846 }
847
848 #[test]
849 fn read_returns_none_for_missing_safekey() {
850 let dir = TempDir::new().expect("tmp");
851 let store = fresh_store(&dir);
852 let key = Safekey("nonexistent".to_string());
853 let res = store.read(&key).expect("read ok");
854 assert!(res.is_none(), "expected Ok(None), got {:?}", res);
855 }
856
857 #[test]
858 fn schema_too_new_blocks_writes_but_allows_reads() {
859 let dir = TempDir::new().expect("tmp");
860 let store = fresh_store(&dir);
861 let key = sample_safekey();
862
863 let meta_path = store.metadata_path(&key).expect("path");
865 std::fs::create_dir_all(meta_path.parent().expect("parent").as_std_path()).expect("mkdir");
866 let body = "schema_version = \"2.0\"\ntitle = \"Future\"\nauthors = []\n";
867 std::fs::write(meta_path.as_std_path(), body).expect("write");
868
869 let read = store.read(&key).expect("read ok");
871 assert!(read.is_some(), "future-major file must be readable");
872
873 let m = sample_metadata();
875 let err = store.write(&key, &m, None).expect_err("write must fail");
876 match err {
877 StoreError::SchemaTooNew { theirs, ours } => {
878 assert_eq!(theirs, "2.0");
879 assert_eq!(ours, SCHEMA_VERSION);
880 }
881 other => panic!("expected SchemaTooNew, got {:?}", other),
882 }
883 }
884
885 #[test]
886 fn concurrent_writers_serialize_via_flock() {
887 let dir = TempDir::new().expect("tmp");
893 let store = Arc::new(fresh_store(&dir));
894 let key = sample_safekey();
895
896 store.write(&key, &sample_metadata(), None).expect("seed");
898
899 let mut handles = Vec::new();
900 for source in ["unpaywall", "europepmc"] {
901 let store = Arc::clone(&store);
902 let key = key.clone();
903 handles.push(thread::spawn(move || {
904 let mut m = sample_metadata();
905 if let Some(d) = m.doiget.as_mut() {
906 d.source = source.to_string();
907 }
908 store.write(&key, &m, None).expect("write");
909 }));
910 }
911 for h in handles {
912 h.join().expect("join");
913 }
914
915 let read = store.read(&key).expect("read").expect("Some");
918 let source = read.doiget.expect("doiget").source;
919 assert!(
920 source == "unpaywall" || source == "europepmc",
921 "winning source must be one of the contenders, got {}",
922 source
923 );
924 }
925
926 #[test]
927 fn list_recent_orders_by_fetched_at_desc() {
928 let dir = TempDir::new().expect("tmp");
929 let store = fresh_store(&dir);
930
931 for (idx, year_seed) in [(1, 2024_u32), (2, 2025), (3, 2026)] {
932 let key = Safekey(format!("doi_10.1234_entry{}", idx));
933 let mut m = sample_metadata();
934 m.title = format!("Entry {}", idx);
935 if let Some(d) = m.doiget.as_mut() {
936 d.fetched_at = chrono::Utc
937 .with_ymd_and_hms(year_seed as i32, 5, 6, 12, 0, 0)
938 .unwrap();
939 }
940 store.write(&key, &m, None).expect("write");
941 }
942
943 let recent = store.list_recent(10).expect("list");
944 assert_eq!(recent.len(), 3, "expected 3 entries, got {}", recent.len());
945 assert_eq!(recent[0].title, "Entry 3");
947 assert_eq!(recent[1].title, "Entry 2");
948 assert_eq!(recent[2].title, "Entry 1");
949 for w in recent.windows(2) {
950 assert!(
951 w[0].fetched_at >= w[1].fetched_at,
952 "recent[].fetched_at must be non-increasing"
953 );
954 }
955 }
956
957 #[test]
958 fn search_finds_by_title_substring() {
959 let dir = TempDir::new().expect("tmp");
960 let store = fresh_store(&dir);
961
962 let key = Safekey("doi_10.1234_quantum".to_string());
963 let mut m = sample_metadata();
964 m.title = "Quantum Stuff and Other Topics".to_string();
965 store.write(&key, &m, None).expect("write");
966
967 let hits = store.search("quantum", 10).expect("search");
968 assert_eq!(hits.len(), 1, "expected 1 hit, got {}", hits.len());
969 assert_eq!(hits[0].title, "Quantum Stuff and Other Topics");
970
971 let empty = store.search("relativity", 10).expect("search");
972 assert!(empty.is_empty(), "expected no hits, got {:?}", empty);
973 }
974
975 #[test]
976 fn path_traversal_in_safekey_blocked() {
977 let dir = TempDir::new().expect("tmp");
978 let store = fresh_store(&dir);
979 let bad = Safekey("../etc/passwd".to_string());
980
981 match store.read(&bad) {
982 Err(StoreError::PathTraversal { .. }) => {}
983 other => panic!("expected PathTraversal, got {:?}", other),
984 }
985 let m = sample_metadata();
986 match store.write(&bad, &m, None) {
987 Err(StoreError::PathTraversal { .. }) => {}
988 other => panic!("expected PathTraversal, got {:?}", other),
989 }
990 }
991
992 #[test]
993 fn write_then_read_normalized_toml_alphabetizes_keys() {
994 let dir = TempDir::new().expect("tmp");
998 let store = fresh_store(&dir);
999 let key = sample_safekey();
1000 store.write(&key, &sample_metadata(), None).expect("write");
1001
1002 let path = store.metadata_path(&key).expect("path");
1003 let raw = std::fs::read_to_string(path.as_std_path()).expect("read");
1004 let first_line = raw.lines().next().expect("at least one line");
1006 assert!(
1007 first_line.starts_with("schema_version = "),
1008 "first line must be schema_version, got: {:?}",
1009 first_line
1010 );
1011 assert!(raw.ends_with('\n'), "file must end with a newline");
1013 assert!(!raw.contains('\r'), "no CR allowed; LF only");
1015 assert!(raw.contains("\n[doiget]\n"), "doiget sub-table missing");
1017 let doiget_idx = raw.find("[doiget]").expect("doiget block");
1019 let after = &raw[doiget_idx..];
1020 let fetched_at_idx = after
1021 .find("fetched_at = ")
1022 .expect("fetched_at key in doiget");
1023 let license_idx = after.find("license = ").expect("license key in doiget");
1024 assert!(
1025 fetched_at_idx < license_idx,
1026 "fetched_at must precede license within [doiget]"
1027 );
1028 }
1029
1030 #[test]
1031 fn write_preserves_unknown_table_from_existing_file() {
1032 let dir = TempDir::new().expect("tmp");
1035 let store = fresh_store(&dir);
1036 let key = sample_safekey();
1037 let meta_path = store.metadata_path(&key).expect("path");
1038
1039 let body = format!(
1040 "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\n\
1041 [bibliofetch]\nharvest = \"2026-01-01\"\n",
1042 SCHEMA_VERSION
1043 );
1044 std::fs::write(meta_path.as_std_path(), body).expect("write");
1045
1046 let mut m = sample_metadata();
1047 m.title = "Doiget Wins?".to_string(); store.write(&key, &m, None).expect("write");
1049
1050 let read_raw = std::fs::read_to_string(meta_path.as_std_path()).expect("re-read");
1051 assert!(
1052 read_raw.contains("bibliofetch"),
1053 "[bibliofetch] table was dropped: {}",
1054 read_raw
1055 );
1056 assert!(
1057 read_raw.contains("title = \"Existing\""),
1058 "doiget overwrote a reserved field set by another tool: {}",
1059 read_raw
1060 );
1061 }
1062
1063 #[test]
1072 fn bibliofetch_typed_table_and_unknown_scalar_survive_roundtrip() {
1073 let dir = TempDir::new().expect("tmp");
1074 let store = fresh_store(&dir);
1075 let key = sample_safekey();
1076 let meta_path = store.metadata_path(&key).expect("path");
1077
1078 let body = format!(
1081 "schema_version = \"{}\"\n\
1082 title = \"Existing\"\n\
1083 authors = [\"Carol\"]\n\
1084 zotero_key = \"ABC123\"\n\n\
1085 [bibliofetch]\n\
1086 harvest = \"2026-02-03\"\n\
1087 count = 42\n\
1088 tags = [\"x\", \"y\"]\n",
1089 SCHEMA_VERSION
1090 );
1091 std::fs::write(meta_path.as_std_path(), body).expect("seed write");
1092
1093 let m0 = store.read(&key).expect("read ok").expect("entry present");
1096 assert!(
1097 m0.other.contains_key("bibliofetch"),
1098 "[bibliofetch] not captured into `other` on read: {:?}",
1099 m0.other
1100 );
1101 assert_eq!(
1102 m0.other.get("zotero_key").and_then(|v| v.as_str()),
1103 Some("ABC123"),
1104 "unknown top-level scalar not captured: {:?}",
1105 m0.other
1106 );
1107
1108 let mut m_doiget = sample_metadata();
1110 m_doiget.title = "Doiget Would Overwrite".to_string();
1111 store.write(&key, &m_doiget, None).expect("doiget write");
1112
1113 let m1 = store
1116 .read(&key)
1117 .expect("re-read ok")
1118 .expect("entry present");
1119 assert_eq!(
1120 m1.title, "Existing",
1121 "STORE.md §6: doiget overwrote a reserved field"
1122 );
1123 let bf = m1
1124 .other
1125 .get("bibliofetch")
1126 .and_then(|v| v.as_table())
1127 .expect("[bibliofetch] table survived read->write->read");
1128 assert_eq!(
1129 bf.get("harvest").and_then(|v| v.as_str()),
1130 Some("2026-02-03")
1131 );
1132 assert_eq!(bf.get("count").and_then(|v| v.as_integer()), Some(42));
1133 let tags = bf
1134 .get("tags")
1135 .and_then(|v| v.as_array())
1136 .expect("tags array survived");
1137 let tags: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect();
1138 assert_eq!(tags, vec!["x", "y"]);
1139 assert_eq!(
1140 m1.other.get("zotero_key").and_then(|v| v.as_str()),
1141 Some("ABC123"),
1142 "unknown top-level scalar lost across the cycle"
1143 );
1144
1145 let raw = std::fs::read_to_string(meta_path.as_std_path()).expect("raw re-read");
1147 assert!(raw.ends_with('\n'), "missing trailing newline: {raw:?}");
1148 }
1149
1150 #[test]
1156 fn other_key_collision_prefers_existing() {
1157 let dir = TempDir::new().expect("tmp");
1158 let store = fresh_store(&dir);
1159 let key = sample_safekey();
1160 let meta_path = store.metadata_path(&key).expect("path");
1161
1162 let body = format!(
1163 "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\
1164 zotero_key = \"FROM_BIBLIOFETCH\"\n",
1165 SCHEMA_VERSION
1166 );
1167 std::fs::write(meta_path.as_std_path(), body).expect("seed");
1168
1169 let mut m = sample_metadata();
1170 m.other.insert(
1171 "zotero_key".to_string(),
1172 toml::Value::String("FROM_DOIGET".to_string()),
1173 );
1174 store.write(&key, &m, None).expect("write");
1175
1176 let got = store.read(&key).expect("read").expect("present");
1177 assert_eq!(
1178 got.other.get("zotero_key").and_then(|v| v.as_str()),
1179 Some("FROM_BIBLIOFETCH"),
1180 "STORE.md §6: existing `other` value must win on collision"
1181 );
1182 }
1183
1184 #[test]
1185 fn pdf_is_copied_atomically_on_write() {
1186 let dir = TempDir::new().expect("tmp");
1187 let store = fresh_store(&dir);
1188 let key = sample_safekey();
1189
1190 let src_dir = TempDir::new().expect("tmp src");
1192 let src_path = Utf8PathBuf::from_path_buf(src_dir.path().to_path_buf())
1193 .expect("utf8 src dir")
1194 .join("input.pdf");
1195 std::fs::write(src_path.as_std_path(), b"%PDF-1.7 synthetic").expect("write src");
1196
1197 store
1198 .write(&key, &sample_metadata(), Some(&src_path))
1199 .expect("write");
1200
1201 let dst = store.pdf_path(&key).expect("pdf path");
1202 let bytes = std::fs::read(dst.as_std_path()).expect("read dst");
1203 assert_eq!(bytes, b"%PDF-1.7 synthetic");
1204 }
1205}