Skip to main content

osproxy_spi/
rules.rs

1//! Declarative tenancy rules an implementer provides through [`TenancySpi`].
2//!
3//! These types are pure data: how to find the partition id, how to build the
4//! document `_id`, which fields to inject, and which to treat as sensitive. The
5//! [`crate::TenancySpi`] returns them; `osproxy-tenancy` interprets them. The
6//! interpretation is symmetric, a field injected on ingest is stripped on read
7//! (`docs/02` §2, `docs/03`).
8//!
9//! [`TenancySpi`]: crate::TenancySpi
10
11use osproxy_core::FieldName;
12use serde_json::Value as JsonValue;
13
14/// A dotted path into a JSON document, e.g. `tenant_id` or `meta.tenant`.
15///
16/// A deliberately small subset of JSONPath: a sequence of object keys. It does
17/// not support array indexing or wildcards in M1, the partition key is a
18/// scalar field on the document root or a nested object. The supported grammar
19/// is version-tracked in `docs/specs/opensearch-endpoints.md`.
20///
21/// # Examples
22///
23/// ```
24/// use osproxy_spi::JsonPath;
25///
26/// let p = JsonPath::new("meta.tenant");
27/// assert_eq!(p.segments().collect::<Vec<_>>(), ["meta", "tenant"]);
28/// ```
29#[derive(Clone, PartialEq, Eq, Debug)]
30pub struct JsonPath(String);
31
32impl JsonPath {
33    /// Constructs a path from a dotted string.
34    pub fn new(path: impl Into<String>) -> Self {
35        Self(path.into())
36    }
37
38    /// The dotted path as written.
39    #[must_use]
40    pub fn as_str(&self) -> &str {
41        &self.0
42    }
43
44    /// Iterates the path's object-key segments in order.
45    pub fn segments(&self) -> impl Iterator<Item = &str> {
46        self.0.split('.')
47    }
48}
49
50/// How to find the partition id in a request.
51///
52/// Not `#[non_exhaustive]`: the resolver must handle every source kind, so a new
53/// source should force the resolver to be updated rather than silently fail to
54/// resolve.
55#[derive(Clone, PartialEq, Eq, Debug)]
56pub enum PartitionKeySpec {
57    /// A JSON path into the document body (ingest path).
58    BodyField(JsonPath),
59    /// A request header carries it (e.g. set by an upstream auth gateway).
60    Header(String),
61    /// Derived from a [`crate::Principal`] attribute of this name.
62    PrincipalAttr(String),
63    /// Try each in order until one resolves.
64    AnyOf(Vec<PartitionKeySpec>),
65}
66
67/// The kind tag of a [`PartitionKeySpec`], without its payload.
68///
69/// Returned in [`crate::SpiError::PartitionUnresolved`] to report *which*
70/// sources were tried, as shape-only telemetry (never the values looked for).
71#[non_exhaustive]
72#[derive(Clone, Copy, PartialEq, Eq, Debug)]
73pub enum PartitionKeySpecKind {
74    /// Corresponds to [`PartitionKeySpec::BodyField`].
75    BodyField,
76    /// Corresponds to [`PartitionKeySpec::Header`].
77    Header,
78    /// Corresponds to [`PartitionKeySpec::PrincipalAttr`].
79    PrincipalAttr,
80}
81
82/// Rule to construct a document `_id`.
83///
84/// In `SharedIndex` placement the partition id MUST appear in the template so
85/// ids cannot collide across tenants sharing one physical index (`docs/03`).
86/// `osproxy-tenancy` enforces this.
87///
88/// # Examples
89///
90/// ```
91/// use osproxy_spi::{DocIdRule, IdTemplate};
92///
93/// let rule = DocIdRule::new(IdTemplate::new("{partition}:{body.order_id}"))
94///     .with_routing(true);
95/// assert!(rule.set_routing);
96/// assert!(rule.template.references_partition());
97/// ```
98#[derive(Clone, PartialEq, Eq, Debug)]
99pub struct DocIdRule {
100    /// The id template, e.g. `{partition}:{body.natural_key}`.
101    pub template: IdTemplate,
102    /// Also set OpenSearch `_routing` to the partition id, so the document
103    /// lands on a deterministic shard for the partition.
104    pub set_routing: bool,
105}
106
107impl DocIdRule {
108    /// Constructs a rule from a template, with routing off.
109    #[must_use]
110    pub fn new(template: IdTemplate) -> Self {
111        Self {
112            template,
113            set_routing: false,
114        }
115    }
116
117    /// Sets `set_routing` (builder style).
118    #[must_use]
119    pub fn with_routing(mut self, set_routing: bool) -> Self {
120        self.set_routing = set_routing;
121        self
122    }
123}
124
125/// A document-`_id` template with `{partition}` and `{body.<path>}` placeholders.
126///
127/// Interpretation lives in `osproxy-rewrite`; this is just the parsed-on-demand
128/// source string. `{partition}` expands to the resolved partition id;
129/// `{body.<path>}` expands to a scalar pulled from the document at `<path>`.
130#[derive(Clone, PartialEq, Eq, Debug)]
131pub struct IdTemplate(String);
132
133impl IdTemplate {
134    /// Constructs a template from its source string.
135    pub fn new(template: impl Into<String>) -> Self {
136        Self(template.into())
137    }
138
139    /// The template source.
140    #[must_use]
141    pub fn as_str(&self) -> &str {
142        &self.0
143    }
144
145    /// Whether the template references the partition id placeholder. Used to
146    /// reject a `SharedIndex` rule that would allow cross-tenant id collisions.
147    #[must_use]
148    pub fn references_partition(&self) -> bool {
149        self.0.contains("{partition}")
150    }
151}
152
153/// A field the proxy injects into every ingested document (and strips on read).
154///
155/// The field *name* is chosen by the implementer (per the requirement that the
156/// SPI decides injected field names). The value is computed per-document from
157/// [`InjectedValue`].
158#[derive(Clone, PartialEq, Eq, Debug)]
159pub struct InjectedField {
160    /// The name of the field to inject.
161    pub name: FieldName,
162    /// How to compute the field's value.
163    pub value: InjectedValue,
164}
165
166impl InjectedField {
167    /// Constructs an injected field.
168    #[must_use]
169    pub fn new(name: FieldName, value: InjectedValue) -> Self {
170        Self { name, value }
171    }
172}
173
174/// How an [`InjectedField`]'s value is computed for a document.
175///
176/// Not `#[non_exhaustive]`: the proxy must resolve every value kind to inject a
177/// concrete value, so a new kind should force the resolver to be updated.
178#[derive(Clone, PartialEq, Eq, Debug)]
179pub enum InjectedValue {
180    /// The resolved partition id. This is the **isolation** value: the read path
181    /// filters on it, so it must be deterministic (the partition), not
182    /// context-derived. Exactly the fields whose value is `PartitionId` drive
183    /// read isolation.
184    PartitionId,
185    /// A fixed JSON value, the same for every document.
186    Constant(JsonValue),
187    /// A named attribute of the authenticated principal, resolved per request.
188    /// A *decorative* value: injected on write and stripped on read, never used
189    /// as a read filter (its value can differ between the write and the read).
190    FromPrincipal(String),
191    /// A named request header, resolved per request. Decorative like
192    /// [`InjectedValue::FromPrincipal`]: injected and stripped, never filtered.
193    /// Lets injection be dynamic from request context (e.g. a `_region` field
194    /// taken from an `x-region` header set by an upstream gateway).
195    FromHeader(String),
196}
197
198/// Declares which document field *values* observability may capture.
199///
200/// Drives value-suppression so observability never leaks tenant values (NFR-S2).
201/// The model is **deny-by-default (opt-out)**: every field is treated as
202/// sensitive unless explicitly allow-listed as safe. A field added to your
203/// documents tomorrow is protected automatically, you opt specific, known-safe
204/// fields *out* of redaction rather than remembering to opt every risky one in.
205///
206/// Use [`SensitivitySpec::allowing`] to name the shape-only, non-tenant fields
207/// that are safe to capture; [`SensitivitySpec::all_sensitive`] (the default)
208/// redacts everything; [`SensitivitySpec::nothing_sensitive`] is the explicit
209/// opt-out for data that carries no tenant values at all (e.g. test fixtures).
210#[derive(Clone, PartialEq, Eq, Debug)]
211pub struct SensitivitySpec {
212    /// Fields explicitly safe to capture. Consulted only in deny-by-default mode.
213    safe: Vec<FieldName>,
214    /// When `true` (default), every field not in `safe` is sensitive. When
215    /// `false`, nothing is sensitive (the explicit opt-out).
216    deny_by_default: bool,
217}
218
219impl Default for SensitivitySpec {
220    fn default() -> Self {
221        Self::all_sensitive()
222    }
223}
224
225impl SensitivitySpec {
226    /// Deny by default: every field's value is sensitive. The safe default.
227    #[must_use]
228    pub fn all_sensitive() -> Self {
229        Self {
230            safe: Vec::new(),
231            deny_by_default: true,
232        }
233    }
234
235    /// Deny by default, except the `safe` fields (known shape-only / non-tenant
236    /// values) which observability may capture.
237    #[must_use]
238    pub fn allowing(safe: Vec<FieldName>) -> Self {
239        Self {
240            safe,
241            deny_by_default: true,
242        }
243    }
244
245    /// Treat nothing as sensitive. An explicit opt-out for data that carries no
246    /// tenant values (e.g. test fixtures); never appropriate for tenant payloads.
247    #[must_use]
248    pub fn nothing_sensitive() -> Self {
249        Self {
250            safe: Vec::new(),
251            deny_by_default: false,
252        }
253    }
254
255    /// Alias for [`SensitivitySpec::nothing_sensitive`].
256    #[must_use]
257    pub fn none() -> Self {
258        Self::nothing_sensitive()
259    }
260
261    /// Whether `field`'s value is sensitive (deny-by-default: sensitive unless
262    /// explicitly allow-listed as safe).
263    #[must_use]
264    pub fn is_sensitive(&self, field: &FieldName) -> bool {
265        if self.deny_by_default {
266            !self.safe.contains(field)
267        } else {
268            false
269        }
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn json_path_splits_into_segments() {
279        assert_eq!(
280            JsonPath::new("a.b.c").segments().collect::<Vec<_>>(),
281            ["a", "b", "c"]
282        );
283        assert_eq!(
284            JsonPath::new("flat").segments().collect::<Vec<_>>(),
285            ["flat"]
286        );
287    }
288
289    #[test]
290    fn id_template_detects_partition_reference() {
291        assert!(IdTemplate::new("{partition}:{body.k}").references_partition());
292        assert!(!IdTemplate::new("{body.k}").references_partition());
293    }
294
295    #[test]
296    fn sensitivity_is_deny_by_default_with_an_allow_list() {
297        // Deny-by-default: an unknown field is sensitive, even one never named.
298        let spec = SensitivitySpec::allowing(vec![FieldName::from("status")]);
299        assert!(
300            spec.is_sensitive(&FieldName::from("ssn")),
301            "unknown ⇒ sensitive"
302        );
303        assert!(spec.is_sensitive(&FieldName::from("brand_new_field")));
304        assert!(
305            !spec.is_sensitive(&FieldName::from("status")),
306            "explicitly allow-listed ⇒ safe"
307        );
308        // `all_sensitive` (the default) redacts everything.
309        assert!(SensitivitySpec::all_sensitive().is_sensitive(&FieldName::from("anything")));
310        assert_eq!(SensitivitySpec::default(), SensitivitySpec::all_sensitive());
311        // The explicit opt-out treats nothing as sensitive.
312        assert!(!SensitivitySpec::nothing_sensitive().is_sensitive(&FieldName::from("ssn")));
313        assert!(!SensitivitySpec::none().is_sensitive(&FieldName::from("ssn")));
314    }
315}