doiget_core/store/metadata.rs
1//! Metadata struct matching `docs/STORE.md` §2 / `docs/PUBLIC_API.md` §3.
2//!
3//! The on-disk wire format is TOML, with the reserved top-level fields named
4//! by the spec and any tool-specific table (`[doiget]`, `[bibliofetch]`, ...)
5//! beneath. Per `docs/STORE.md` §8, both implementations MUST tolerate
6//! unknown top-level fields and unknown tables; this module captures unknown
7//! entries through the `other` field via `#[serde(flatten)]` so they
8//! survive a read/modify/write round-trip.
9
10use chrono::{DateTime, Utc};
11use serde::{Deserialize, Serialize};
12
13/// Metadata for a single stored entry.
14///
15/// Reserved top-level fields per `docs/STORE.md` §2. `schema_version` is a
16/// string of the form `<MAJOR>.<MINOR>`; the current version this build
17/// writes is [`crate::SCHEMA_VERSION`].
18///
19/// Unknown top-level fields and unknown tables are preserved verbatim
20/// through the `other` field, so reading-and-rewriting an entry produced
21/// by a future minor revision (or by BiblioFetch.jl) does not silently
22/// drop data.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct Metadata {
25 /// Schema version of the form `<MAJOR>.<MINOR>`. See `docs/STORE.md` §3.
26 pub schema_version: String,
27 /// Paper title.
28 pub title: String,
29 /// List of authors (preserve original ordering).
30 pub authors: Vec<String>,
31 /// Publication year, if known.
32 #[serde(skip_serializing_if = "Option::is_none", default)]
33 pub year: Option<i32>,
34 /// DOI, if any.
35 #[serde(skip_serializing_if = "Option::is_none", default)]
36 pub doi: Option<crate::Doi>,
37 /// arXiv id, if any.
38 #[serde(skip_serializing_if = "Option::is_none", default)]
39 pub arxiv_id: Option<crate::ArxivId>,
40 /// Abstract; serialized as the bare `abstract` key (Rust keyword).
41 #[serde(rename = "abstract", skip_serializing_if = "Option::is_none", default)]
42 pub abstract_: Option<String>,
43 /// Venue (e.g. journal or conference).
44 #[serde(skip_serializing_if = "Option::is_none", default)]
45 pub venue: Option<String>,
46 /// Publisher.
47 #[serde(skip_serializing_if = "Option::is_none", default)]
48 pub publisher: Option<String>,
49 /// ISSN (for journals).
50 #[serde(skip_serializing_if = "Option::is_none", default)]
51 pub issn: Option<String>,
52 /// ISBN (for books).
53 #[serde(skip_serializing_if = "Option::is_none", default)]
54 pub isbn: Option<String>,
55 /// Crossref-taxonomy type. Serialized as the bare `type` key.
56 #[serde(rename = "type", skip_serializing_if = "Option::is_none", default)]
57 pub type_: Option<String>,
58 /// Free-form keywords.
59 #[serde(skip_serializing_if = "Vec::is_empty", default)]
60 pub keywords: Vec<String>,
61 /// Canonical URL for the entry, if any.
62 #[serde(skip_serializing_if = "Option::is_none", default)]
63 pub url: Option<String>,
64 /// Path to the stored PDF, relative to the store root.
65 #[serde(skip_serializing_if = "Option::is_none", default)]
66 pub pdf_path: Option<String>,
67 /// doiget-specific extension table. BiblioFetch.jl ignores it.
68 #[serde(skip_serializing_if = "Option::is_none", default)]
69 pub doiget: Option<DoigetExtension>,
70 /// All other top-level keys and tables (e.g. `[bibliofetch]`).
71 ///
72 /// Per `docs/STORE.md` §8 we MUST tolerate unknown top-level fields and
73 /// unknown tables. Unknown entries are captured here so a read /
74 /// modify / write cycle does not silently drop them. Keys are stored in
75 /// a `BTreeMap` so re-serialization is alphabetically ordered, matching
76 /// the normalization rule in `docs/STORE.md` §7.
77 #[serde(flatten)]
78 pub other: std::collections::BTreeMap<String, toml::Value>,
79}
80
81/// doiget-specific extension table (`[doiget]`).
82///
83/// Per `docs/STORE.md` §6, doiget owns this table outright and may
84/// overwrite its contents on a re-fetch. BiblioFetch.jl ignores it.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct DoigetExtension {
87 /// RFC3339 UTC timestamp of the fetch that produced this entry.
88 pub fetched_at: DateTime<Utc>,
89 /// Which `Source` produced this entry (e.g. `unpaywall`).
90 pub source: String,
91 /// OA license string, or the literal `"unknown"`.
92 pub license: String,
93 /// Size of the stored PDF in bytes.
94 pub size_bytes: u64,
95 /// ULID of the originating MCP call, if the fetch came in via MCP.
96 #[serde(skip_serializing_if = "Option::is_none", default)]
97 pub mcp_call_id: Option<String>,
98}