Skip to main content

mnm_core/
provenance.rs

1//! `document.provenance` JSONB shape — the trust + verification metadata that
2//! drives the confidence-scoring blend in Phase 9 (US6, D24).
3//!
4//! The DB column is `jsonb` so unknown keys are tolerated at the storage layer;
5//! validation happens at the application boundary. The struct here is the
6//! canonical Rust mirror — see the data-model schema reference §"JSONB schemas"
7//! for the wire shape and US6 for the per-field scoring impact.
8
9use serde::{Deserialize, Serialize};
10use time::Date;
11
12/// Source attribution — drives the dominant trust-score multiplier (US6 §"Trust
13/// score computation").
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
15#[serde(rename_all = "snake_case")]
16pub enum Attribution {
17    /// Authored by the Midnight Foundation. Highest default trust.
18    Foundation,
19    /// Authored by a Midnight ecosystem partner under agreement.
20    Partner,
21    /// Third-party content endorsed for inclusion.
22    ThirdParty,
23    /// Community-contributed content with no explicit vetting.
24    Community,
25    /// Attribution not set — lowest default trust.
26    #[default]
27    Unknown,
28}
29
30/// Free-form content-type tag used in filters and scoring. New variants are
31/// additive; unknown wire values deserialize to [`ContentType::Other`].
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
33#[serde(rename_all = "snake_case")]
34pub enum ContentType {
35    /// Reference documentation, API surface.
36    Doc,
37    /// Step-by-step tutorial / walkthrough.
38    Tutorial,
39    /// Pure reference material (specs, RFCs).
40    Reference,
41    /// Worked example.
42    Example,
43    /// Compact contract source code.
44    ContractSource,
45    /// SDK source code (Rust / TypeScript).
46    SdkSource,
47    /// Test source.
48    Test,
49    /// README file at any level.
50    Readme,
51    /// Any other content type — preserved verbatim where possible.
52    #[default]
53    #[serde(other)]
54    Other,
55}
56
57/// Optional language target a document/chunk applies to. Used by US6's
58/// `version_match` scoring multiplier (e.g. "this is Compact ≥ 0.23 only").
59#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
60pub struct LanguageTarget {
61    /// Language name (e.g. `"compact"`, `"rust"`, `"typescript"`).
62    pub name: String,
63    /// Optional semver-style constraint, e.g. `">=0.23"`, `"^1.4.0"`.
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub version_constraint: Option<String>,
66}
67
68/// Declared SDK dependency a document/chunk targets — used in filter narrowing.
69#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
70pub struct SdkDependency {
71    /// Package ecosystem kind (e.g. `"npm"`, `"cargo"`).
72    pub kind: String,
73    /// Canonical package name.
74    pub name: String,
75    /// Optional semver constraint.
76    #[serde(default, skip_serializing_if = "Option::is_none")]
77    pub version_constraint: Option<String>,
78}
79
80/// Deprecation flag on a document/chunk. Triggers a configurable penalty
81/// multiplier (default ×0.3, US6 §"Trust score computation").
82#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
83pub struct Deprecation {
84    /// Whether the content is marked deprecated.
85    pub is_deprecated: bool,
86    /// Optional ISO date marking when the deprecation began.
87    #[serde(default, skip_serializing_if = "Option::is_none")]
88    pub since: Option<Date>,
89    /// Optional human-readable reason.
90    #[serde(default, skip_serializing_if = "Option::is_none")]
91    pub reason: Option<String>,
92}
93
94/// The full `document.provenance` JSONB shape mirrored as a typed Rust struct.
95///
96/// All fields are optional on the wire — old corpora may have sparse provenance,
97/// and scoring multipliers degrade to sensible defaults (see [`Attribution::Unknown`],
98/// `verified=false`, etc.).
99#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
100pub struct Provenance {
101    /// Author attribution (drives the dominant trust multiplier).
102    #[serde(default)]
103    pub attribution: Attribution,
104
105    /// Whether the content has been explicitly verified by an authorized
106    /// principal (Foundation, partner, or other vetted reviewer).
107    #[serde(default)]
108    pub verified: bool,
109    /// Who performed the verification, if known.
110    #[serde(default, skip_serializing_if = "Option::is_none")]
111    pub verified_by: Option<String>,
112    /// When the verification occurred (ISO date).
113    #[serde(default, skip_serializing_if = "Option::is_none")]
114    pub verified_at: Option<Date>,
115    /// Free-form notes captured during verification.
116    #[serde(default, skip_serializing_if = "Option::is_none")]
117    pub verification_notes: Option<String>,
118
119    /// Programming-language constraints the content applies to.
120    #[serde(default, skip_serializing_if = "Vec::is_empty")]
121    pub language_targets: Vec<LanguageTarget>,
122
123    /// SDK package constraints the content applies to.
124    #[serde(default, skip_serializing_if = "Vec::is_empty")]
125    pub sdk_dependencies: Vec<SdkDependency>,
126
127    /// Deprecation flag (with optional reason / since).
128    #[serde(default)]
129    pub deprecation: Deprecation,
130
131    /// Free-form taxonomy tags used in filter narrowing.
132    #[serde(default, skip_serializing_if = "Vec::is_empty")]
133    pub tags: Vec<String>,
134
135    /// Coarse content-type tag.
136    #[serde(default)]
137    pub content_type: ContentType,
138}
139
140impl Provenance {
141    /// Construct a `Provenance` carrying only the attribution. Convenience for
142    /// minimal-metadata test fixtures.
143    #[must_use]
144    pub fn attributed_to(attribution: Attribution) -> Self {
145        Self { attribution, ..Self::default() }
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn default_is_safe() {
155        let p = Provenance::default();
156        assert_eq!(p.attribution, Attribution::Unknown);
157        assert!(!p.verified);
158        assert!(p.tags.is_empty());
159    }
160
161    #[test]
162    fn round_trips_full_shape() {
163        let p = Provenance {
164            attribution: Attribution::Foundation,
165            verified: true,
166            verified_by: Some("midnight-foundation".into()),
167            verified_at: Date::from_calendar_date(2026, time::Month::April, 1).ok(),
168            verification_notes: None,
169            language_targets: vec![LanguageTarget {
170                name: "compact".into(),
171                version_constraint: Some(">=0.23".into()),
172            }],
173            sdk_dependencies: vec![SdkDependency {
174                kind: "npm".into(),
175                name: "@midnight-ntwrk/midnight-js".into(),
176                version_constraint: Some("^1.4.0".into()),
177            }],
178            deprecation: Deprecation::default(),
179            tags: vec!["quickstart".into(), "tutorial".into()],
180            content_type: ContentType::Tutorial,
181        };
182        let v = serde_json::to_value(&p).unwrap();
183        let back: Provenance = serde_json::from_value(v).unwrap();
184        assert_eq!(p, back);
185    }
186
187    #[test]
188    fn empty_collections_elided() {
189        let v = serde_json::to_value(Provenance::default()).unwrap();
190        assert!(v.get("language_targets").is_none());
191        assert!(v.get("sdk_dependencies").is_none());
192        assert!(v.get("tags").is_none());
193    }
194
195    #[test]
196    fn tolerates_unknown_attribution_via_default() {
197        // Forward-compatibility: server adds a new attribution variant; old
198        // CLI deserializing the response should still parse the document. We
199        // model this by deserializing a totally absent field which falls back
200        // to the Default impl.
201        let v = serde_json::json!({});
202        let p: Provenance = serde_json::from_value(v).unwrap();
203        assert_eq!(p.attribution, Attribution::Unknown);
204    }
205
206    #[test]
207    fn unknown_content_type_falls_back_to_other() {
208        let v = serde_json::json!({ "content_type": "blog_post_2026_meta" });
209        let p: Provenance = serde_json::from_value(v).unwrap();
210        assert_eq!(p.content_type, ContentType::Other);
211    }
212
213    #[test]
214    fn attribution_serializes_snake_case() {
215        let v = serde_json::to_value(Attribution::ThirdParty).unwrap();
216        assert_eq!(v, serde_json::Value::String("third_party".into()));
217    }
218}