Skip to main content

doiget_core/store/
metadata.rs

1//! Metadata struct matching `docs/STORE.md` §2 / `docs/PUBLIC_API.md` §3.
2//!
3//! The on-disk wire format is TOML, with the reserved top-level fields named
4//! by the spec and any tool-specific table (`[doiget]`, `[bibliofetch]`, ...)
5//! beneath. Per `docs/STORE.md` §8, both implementations MUST tolerate
6//! unknown top-level fields and unknown tables; this module captures unknown
7//! entries through the `other` field via `#[serde(flatten)]` so they
8//! survive a read/modify/write round-trip.
9
10use chrono::{DateTime, Utc};
11use serde::{Deserialize, Serialize};
12
13/// Metadata for a single stored entry.
14///
15/// Reserved top-level fields per `docs/STORE.md` §2. `schema_version` is a
16/// string of the form `<MAJOR>.<MINOR>`; the current version this build
17/// writes is [`crate::SCHEMA_VERSION`].
18///
19/// Unknown top-level fields and unknown tables are preserved verbatim
20/// through the `other` field, so reading-and-rewriting an entry produced
21/// by a future minor revision (or by BiblioFetch.jl) does not silently
22/// drop data.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct Metadata {
25    /// Schema version of the form `<MAJOR>.<MINOR>`. See `docs/STORE.md` §3.
26    pub schema_version: String,
27    /// Paper title.
28    pub title: String,
29    /// List of authors (preserve original ordering).
30    pub authors: Vec<String>,
31    /// Publication year, if known.
32    #[serde(skip_serializing_if = "Option::is_none", default)]
33    pub year: Option<i32>,
34    /// DOI, if any.
35    #[serde(skip_serializing_if = "Option::is_none", default)]
36    pub doi: Option<crate::Doi>,
37    /// arXiv id, if any.
38    #[serde(skip_serializing_if = "Option::is_none", default)]
39    pub arxiv_id: Option<crate::ArxivId>,
40    /// Abstract; serialized as the bare `abstract` key (Rust keyword).
41    #[serde(rename = "abstract", skip_serializing_if = "Option::is_none", default)]
42    pub abstract_: Option<String>,
43    /// Venue (e.g. journal or conference).
44    #[serde(skip_serializing_if = "Option::is_none", default)]
45    pub venue: Option<String>,
46    /// Publisher.
47    #[serde(skip_serializing_if = "Option::is_none", default)]
48    pub publisher: Option<String>,
49    /// ISSN (for journals).
50    #[serde(skip_serializing_if = "Option::is_none", default)]
51    pub issn: Option<String>,
52    /// ISBN (for books).
53    #[serde(skip_serializing_if = "Option::is_none", default)]
54    pub isbn: Option<String>,
55    /// Crossref-taxonomy type. Serialized as the bare `type` key.
56    #[serde(rename = "type", skip_serializing_if = "Option::is_none", default)]
57    pub type_: Option<String>,
58    /// Free-form keywords.
59    #[serde(skip_serializing_if = "Vec::is_empty", default)]
60    pub keywords: Vec<String>,
61    /// Canonical URL for the entry, if any.
62    #[serde(skip_serializing_if = "Option::is_none", default)]
63    pub url: Option<String>,
64    /// Path to the stored PDF, relative to the store root.
65    #[serde(skip_serializing_if = "Option::is_none", default)]
66    pub pdf_path: Option<String>,
67    /// doiget-specific extension table. BiblioFetch.jl ignores it.
68    #[serde(skip_serializing_if = "Option::is_none", default)]
69    pub doiget: Option<DoigetExtension>,
70    /// All other top-level keys and tables (e.g. `[bibliofetch]`).
71    ///
72    /// Per `docs/STORE.md` §8 we MUST tolerate unknown top-level fields and
73    /// unknown tables. Unknown entries are captured here so a read /
74    /// modify / write cycle does not silently drop them. Keys are stored in
75    /// a `BTreeMap` so re-serialization is alphabetically ordered, matching
76    /// the normalization rule in `docs/STORE.md` §7.
77    #[serde(flatten)]
78    pub other: std::collections::BTreeMap<String, toml::Value>,
79}
80
81/// doiget-specific extension table (`[doiget]`).
82///
83/// Per `docs/STORE.md` §6, doiget owns this table outright and may
84/// overwrite its contents on a re-fetch. BiblioFetch.jl ignores it.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct DoigetExtension {
87    /// RFC3339 UTC timestamp of the fetch that produced this entry.
88    pub fetched_at: DateTime<Utc>,
89    /// Which `Source` produced this entry (e.g. `unpaywall`).
90    pub source: String,
91    /// OA license string, or the literal `"unknown"`.
92    pub license: String,
93    /// Size of the stored PDF in bytes.
94    pub size_bytes: u64,
95    /// ULID of the originating MCP call, if the fetch came in via MCP.
96    #[serde(skip_serializing_if = "Option::is_none", default)]
97    pub mcp_call_id: Option<String>,
98}