osproxy_spi/rules.rs
1//! Declarative tenancy rules an implementer provides through [`TenancySpi`].
2//!
3//! These types are pure data: how to find the partition id, how to build the
4//! document `_id`, which fields to inject, and which to treat as sensitive. The
5//! [`crate::TenancySpi`] returns them; `osproxy-tenancy` interprets them. The
6//! interpretation is symmetric, a field injected on ingest is stripped on read
7//! (`docs/02` §2, `docs/03`).
8//!
9//! [`TenancySpi`]: crate::TenancySpi
10
11use osproxy_core::FieldName;
12use serde_json::Value as JsonValue;
13
14/// A dotted path into a JSON document, e.g. `tenant_id` or `meta.tenant`.
15///
16/// A deliberately small subset of JSONPath: a sequence of object keys. It does
17/// not support array indexing or wildcards in M1, the partition key is a
18/// scalar field on the document root or a nested object. The supported grammar
19/// is version-tracked in `docs/specs/opensearch-endpoints.md`.
20///
21/// # Examples
22///
23/// ```
24/// use osproxy_spi::JsonPath;
25///
26/// let p = JsonPath::new("meta.tenant");
27/// assert_eq!(p.segments().collect::<Vec<_>>(), ["meta", "tenant"]);
28/// ```
29#[derive(Clone, PartialEq, Eq, Debug)]
30pub struct JsonPath(String);
31
32impl JsonPath {
33 /// Constructs a path from a dotted string.
34 pub fn new(path: impl Into<String>) -> Self {
35 Self(path.into())
36 }
37
38 /// The dotted path as written.
39 #[must_use]
40 pub fn as_str(&self) -> &str {
41 &self.0
42 }
43
44 /// Iterates the path's object-key segments in order.
45 pub fn segments(&self) -> impl Iterator<Item = &str> {
46 self.0.split('.')
47 }
48}
49
50/// How to find the partition id in a request.
51///
52/// Not `#[non_exhaustive]`: the resolver must handle every source kind, so a new
53/// source should force the resolver to be updated rather than silently fail to
54/// resolve.
55#[derive(Clone, PartialEq, Eq, Debug)]
56pub enum PartitionKeySpec {
57 /// A JSON path into the document body (ingest path).
58 BodyField(JsonPath),
59 /// A request header carries it (e.g. set by an upstream auth gateway).
60 Header(String),
61 /// Derived from a [`crate::Principal`] attribute of this name.
62 PrincipalAttr(String),
63 /// Try each in order until one resolves.
64 AnyOf(Vec<PartitionKeySpec>),
65}
66
67/// The kind tag of a [`PartitionKeySpec`], without its payload.
68///
69/// Returned in [`crate::SpiError::PartitionUnresolved`] to report *which*
70/// sources were tried, as shape-only telemetry (never the values looked for).
71#[non_exhaustive]
72#[derive(Clone, Copy, PartialEq, Eq, Debug)]
73pub enum PartitionKeySpecKind {
74 /// Corresponds to [`PartitionKeySpec::BodyField`].
75 BodyField,
76 /// Corresponds to [`PartitionKeySpec::Header`].
77 Header,
78 /// Corresponds to [`PartitionKeySpec::PrincipalAttr`].
79 PrincipalAttr,
80}
81
82/// Rule to construct a document `_id`.
83///
84/// In `SharedIndex` placement the partition id MUST appear in the template so
85/// ids cannot collide across tenants sharing one physical index (`docs/03`).
86/// `osproxy-tenancy` enforces this.
87///
88/// # Examples
89///
90/// ```
91/// use osproxy_spi::{DocIdRule, IdTemplate};
92///
93/// let rule = DocIdRule::new(IdTemplate::new("{partition}:{body.order_id}"))
94/// .with_routing(true);
95/// assert!(rule.set_routing);
96/// assert!(rule.template.references_partition());
97/// ```
98#[derive(Clone, PartialEq, Eq, Debug)]
99pub struct DocIdRule {
100 /// The id template, e.g. `{partition}:{body.natural_key}`.
101 pub template: IdTemplate,
102 /// Also set OpenSearch `_routing` to the partition id, so the document
103 /// lands on a deterministic shard for the partition.
104 pub set_routing: bool,
105}
106
107impl DocIdRule {
108 /// Constructs a rule from a template, with routing off.
109 #[must_use]
110 pub fn new(template: IdTemplate) -> Self {
111 Self {
112 template,
113 set_routing: false,
114 }
115 }
116
117 /// Sets `set_routing` (builder style).
118 #[must_use]
119 pub fn with_routing(mut self, set_routing: bool) -> Self {
120 self.set_routing = set_routing;
121 self
122 }
123}
124
125/// A document-`_id` template with `{partition}` and `{body.<path>}` placeholders.
126///
127/// Interpretation lives in `osproxy-rewrite`; this is just the parsed-on-demand
128/// source string. `{partition}` expands to the resolved partition id;
129/// `{body.<path>}` expands to a scalar pulled from the document at `<path>`.
130#[derive(Clone, PartialEq, Eq, Debug)]
131pub struct IdTemplate(String);
132
133impl IdTemplate {
134 /// Constructs a template from its source string.
135 pub fn new(template: impl Into<String>) -> Self {
136 Self(template.into())
137 }
138
139 /// The template source.
140 #[must_use]
141 pub fn as_str(&self) -> &str {
142 &self.0
143 }
144
145 /// Whether the template references the partition id placeholder. Used to
146 /// reject a `SharedIndex` rule that would allow cross-tenant id collisions.
147 #[must_use]
148 pub fn references_partition(&self) -> bool {
149 self.0.contains("{partition}")
150 }
151}
152
153/// A field the proxy injects into every ingested document (and strips on read).
154///
155/// The field *name* is chosen by the implementer (per the requirement that the
156/// SPI decides injected field names). The value is computed per-document from
157/// [`InjectedValue`].
158#[derive(Clone, PartialEq, Eq, Debug)]
159pub struct InjectedField {
160 /// The name of the field to inject.
161 pub name: FieldName,
162 /// How to compute the field's value.
163 pub value: InjectedValue,
164}
165
166impl InjectedField {
167 /// Constructs an injected field.
168 #[must_use]
169 pub fn new(name: FieldName, value: InjectedValue) -> Self {
170 Self { name, value }
171 }
172}
173
174/// How an [`InjectedField`]'s value is computed for a document.
175///
176/// Not `#[non_exhaustive]`: the proxy must resolve every value kind to inject a
177/// concrete value, so a new kind should force the resolver to be updated.
178#[derive(Clone, PartialEq, Eq, Debug)]
179pub enum InjectedValue {
180 /// The resolved partition id. This is the **isolation** value: the read path
181 /// filters on it, so it must be deterministic (the partition), not
182 /// context-derived. Exactly the fields whose value is `PartitionId` drive
183 /// read isolation.
184 PartitionId,
185 /// A fixed JSON value, the same for every document.
186 Constant(JsonValue),
187 /// A named attribute of the authenticated principal, resolved per request.
188 /// A *decorative* value: injected on write and stripped on read, never used
189 /// as a read filter (its value can differ between the write and the read).
190 FromPrincipal(String),
191 /// A named request header, resolved per request. Decorative like
192 /// [`InjectedValue::FromPrincipal`]: injected and stripped, never filtered.
193 /// Lets injection be dynamic from request context (e.g. a `_region` field
194 /// taken from an `x-region` header set by an upstream gateway).
195 FromHeader(String),
196}
197
198/// Declares which document field *values* observability may capture.
199///
200/// Drives value-suppression so observability never leaks tenant values (NFR-S2).
201/// The model is **deny-by-default (opt-out)**: every field is treated as
202/// sensitive unless explicitly allow-listed as safe. A field added to your
203/// documents tomorrow is protected automatically, you opt specific, known-safe
204/// fields *out* of redaction rather than remembering to opt every risky one in.
205///
206/// Use [`SensitivitySpec::allowing`] to name the shape-only, non-tenant fields
207/// that are safe to capture; [`SensitivitySpec::all_sensitive`] (the default)
208/// redacts everything; [`SensitivitySpec::nothing_sensitive`] is the explicit
209/// opt-out for data that carries no tenant values at all (e.g. test fixtures).
210#[derive(Clone, PartialEq, Eq, Debug)]
211pub struct SensitivitySpec {
212 /// Fields explicitly safe to capture. Consulted only in deny-by-default mode.
213 safe: Vec<FieldName>,
214 /// When `true` (default), every field not in `safe` is sensitive. When
215 /// `false`, nothing is sensitive (the explicit opt-out).
216 deny_by_default: bool,
217}
218
219impl Default for SensitivitySpec {
220 fn default() -> Self {
221 Self::all_sensitive()
222 }
223}
224
225impl SensitivitySpec {
226 /// Deny by default: every field's value is sensitive. The safe default.
227 #[must_use]
228 pub fn all_sensitive() -> Self {
229 Self {
230 safe: Vec::new(),
231 deny_by_default: true,
232 }
233 }
234
235 /// Deny by default, except the `safe` fields (known shape-only / non-tenant
236 /// values) which observability may capture.
237 #[must_use]
238 pub fn allowing(safe: Vec<FieldName>) -> Self {
239 Self {
240 safe,
241 deny_by_default: true,
242 }
243 }
244
245 /// Treat nothing as sensitive. An explicit opt-out for data that carries no
246 /// tenant values (e.g. test fixtures); never appropriate for tenant payloads.
247 #[must_use]
248 pub fn nothing_sensitive() -> Self {
249 Self {
250 safe: Vec::new(),
251 deny_by_default: false,
252 }
253 }
254
255 /// Alias for [`SensitivitySpec::nothing_sensitive`].
256 #[must_use]
257 pub fn none() -> Self {
258 Self::nothing_sensitive()
259 }
260
261 /// Whether `field`'s value is sensitive (deny-by-default: sensitive unless
262 /// explicitly allow-listed as safe).
263 #[must_use]
264 pub fn is_sensitive(&self, field: &FieldName) -> bool {
265 if self.deny_by_default {
266 !self.safe.contains(field)
267 } else {
268 false
269 }
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276
277 #[test]
278 fn json_path_splits_into_segments() {
279 assert_eq!(
280 JsonPath::new("a.b.c").segments().collect::<Vec<_>>(),
281 ["a", "b", "c"]
282 );
283 assert_eq!(
284 JsonPath::new("flat").segments().collect::<Vec<_>>(),
285 ["flat"]
286 );
287 }
288
289 #[test]
290 fn id_template_detects_partition_reference() {
291 assert!(IdTemplate::new("{partition}:{body.k}").references_partition());
292 assert!(!IdTemplate::new("{body.k}").references_partition());
293 }
294
295 #[test]
296 fn sensitivity_is_deny_by_default_with_an_allow_list() {
297 // Deny-by-default: an unknown field is sensitive, even one never named.
298 let spec = SensitivitySpec::allowing(vec![FieldName::from("status")]);
299 assert!(
300 spec.is_sensitive(&FieldName::from("ssn")),
301 "unknown ⇒ sensitive"
302 );
303 assert!(spec.is_sensitive(&FieldName::from("brand_new_field")));
304 assert!(
305 !spec.is_sensitive(&FieldName::from("status")),
306 "explicitly allow-listed ⇒ safe"
307 );
308 // `all_sensitive` (the default) redacts everything.
309 assert!(SensitivitySpec::all_sensitive().is_sensitive(&FieldName::from("anything")));
310 assert_eq!(SensitivitySpec::default(), SensitivitySpec::all_sensitive());
311 // The explicit opt-out treats nothing as sensitive.
312 assert!(!SensitivitySpec::nothing_sensitive().is_sensitive(&FieldName::from("ssn")));
313 assert!(!SensitivitySpec::none().is_sensitive(&FieldName::from("ssn")));
314 }
315}