Skip to main content

xarf/
model.rs

1//! Core data model for XARF v4 reports.
2//!
3//! The design choice here is deliberate: rather than encode all 32 concrete
4//! report subtypes as a Rust enum (which would force compile-time knowledge of
5//! every category-specific field and lock the crate to one frozen version of
6//! the spec), we model the report as a single [`Report`] struct with
7//! strongly-typed *core* fields plus a `BTreeMap` of category-specific
8//! "extra" fields preserved verbatim from JSON.
9//!
10//! This mirrors what a Go drop-in for XARF would feel like (`encoding/json`
11//! unmarshalling with explicit fields + a catch-all map). It allows:
12//!
13//! * Forward compatibility — schemas evolve, new fields appear, the crate
14//!   keeps round-tripping without churn.
15//! * Correct strict-mode validation — all rules live in the bundled JSON
16//!   Schemas, not in Rust types.
17//! * Lossless re-serialization — every byte of input that wasn't malformed
18//!   ends up back in the output.
19
20use std::collections::BTreeMap;
21
22use serde::{Deserialize, Serialize};
23use serde_json::{Map, Value};
24
25/// Contact information for the reporter or sender. Shared by both because the
26/// JSON shape is identical.
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
28pub struct Contact {
29    pub org: String,
30    pub contact: String,
31    pub domain: String,
32}
33
34impl Contact {
35    pub fn new(
36        org: impl Into<String>,
37        contact: impl Into<String>,
38        domain: impl Into<String>,
39    ) -> Self {
40        Self {
41            org: org.into(),
42            contact: contact.into(),
43            domain: domain.into(),
44        }
45    }
46}
47
48/// A single evidence item attached to a report.
49///
50/// `payload` is the base64-encoded body per RFC 4648. Use
51/// [`crate::create_evidence`] to compute the hash, size, and encoding from raw
52/// bytes.
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
54pub struct Evidence {
55    pub content_type: String,
56    pub payload: String,
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    pub description: Option<String>,
59    #[serde(default, skip_serializing_if = "Option::is_none")]
60    pub hash: Option<String>,
61    #[serde(default, skip_serializing_if = "Option::is_none")]
62    pub size: Option<u64>,
63}
64
65/// The seven XARF v4 categories. `Other` is **not** a valid category — it
66/// exists only so deserialization of malformed input can produce a structured
67/// error rather than panicking. Validation always rejects [`Category::Other`].
68#[derive(Debug, Clone, PartialEq, Eq, Hash)]
69pub enum Category {
70    Messaging,
71    Connection,
72    Content,
73    Copyright,
74    Vulnerability,
75    Infrastructure,
76    Reputation,
77    /// Unknown / unrecognised value (kept for round-tripping malformed input).
78    Other(String),
79}
80
81impl Category {
82    pub fn as_str(&self) -> &str {
83        match self {
84            Self::Messaging => "messaging",
85            Self::Connection => "connection",
86            Self::Content => "content",
87            Self::Copyright => "copyright",
88            Self::Vulnerability => "vulnerability",
89            Self::Infrastructure => "infrastructure",
90            Self::Reputation => "reputation",
91            Self::Other(s) => s.as_str(),
92        }
93    }
94
95    pub fn from_str_value(s: &str) -> Self {
96        match s {
97            "messaging" => Self::Messaging,
98            "connection" => Self::Connection,
99            "content" => Self::Content,
100            "copyright" => Self::Copyright,
101            "vulnerability" => Self::Vulnerability,
102            "infrastructure" => Self::Infrastructure,
103            "reputation" => Self::Reputation,
104            other => Self::Other(other.to_string()),
105        }
106    }
107
108    /// `true` for the seven canonical categories defined by the v4 spec.
109    pub fn is_known(&self) -> bool {
110        !matches!(self, Self::Other(_))
111    }
112}
113
114impl Serialize for Category {
115    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
116    where
117        S: serde::Serializer,
118    {
119        serializer.serialize_str(self.as_str())
120    }
121}
122
123impl<'de> Deserialize<'de> for Category {
124    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
125    where
126        D: serde::Deserializer<'de>,
127    {
128        let s = String::deserialize(deserializer)?;
129        Ok(Self::from_str_value(&s))
130    }
131}
132
133/// A XARF v4 report. Core spec fields are strongly typed; category-specific
134/// fields live in [`Report::extra`] keyed by the JSON property name.
135///
136/// `extra` is sorted (it's a `BTreeMap`) so re-serializations are
137/// deterministic.
138#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
139pub struct Report {
140    pub xarf_version: String,
141    pub report_id: String,
142    pub timestamp: String,
143    pub reporter: Contact,
144    pub sender: Contact,
145    pub source_identifier: String,
146    pub category: Category,
147    #[serde(rename = "type")]
148    pub type_: String,
149
150    // Recommended core fields
151    #[serde(default, skip_serializing_if = "Option::is_none")]
152    pub source_port: Option<u16>,
153    #[serde(default, skip_serializing_if = "Option::is_none")]
154    pub evidence_source: Option<String>,
155    #[serde(default, skip_serializing_if = "Option::is_none")]
156    pub evidence: Option<Vec<Evidence>>,
157    #[serde(default, skip_serializing_if = "Option::is_none")]
158    pub confidence: Option<f64>,
159
160    // Optional core fields
161    #[serde(default, skip_serializing_if = "Option::is_none")]
162    pub tags: Option<Vec<String>>,
163    #[serde(default, skip_serializing_if = "Option::is_none")]
164    pub description: Option<String>,
165    #[serde(default, skip_serializing_if = "Option::is_none")]
166    pub legacy_version: Option<String>,
167    /// Internal operational metadata. NEVER transmitted. Use
168    /// [`Report::strip_internal`] before emitting to another system.
169    #[serde(rename = "_internal", default, skip_serializing_if = "Option::is_none")]
170    pub internal: Option<Map<String, Value>>,
171
172    /// Category-specific and forward-compatible extra fields. Sorted for
173    /// deterministic serialization.
174    #[serde(flatten)]
175    pub extra: BTreeMap<String, Value>,
176}
177
178impl Report {
179    /// Remove the `_internal` block. Always do this before transmission.
180    pub fn strip_internal(&mut self) -> Option<Map<String, Value>> {
181        self.internal.take()
182    }
183
184    /// Look up an extra (category-specific) field by name.
185    pub fn extra(&self, key: &str) -> Option<&Value> {
186        self.extra.get(key)
187    }
188
189    /// Insert or replace an extra field. Returns the previous value.
190    pub fn set_extra(&mut self, key: impl Into<String>, value: Value) -> Option<Value> {
191        self.extra.insert(key.into(), value)
192    }
193
194    /// Convert the report to a plain `serde_json::Value` (object). The result
195    /// has the core fields first followed by category extras, but ordering of
196    /// the extras within the object follows `BTreeMap` order — that is,
197    /// alphabetic.
198    pub fn to_json_value(&self) -> Value {
199        serde_json::to_value(self)
200            .expect("XARF report serialization is infallible for valid Report values")
201    }
202}