Skip to main content

osproxy_rewrite/
fields.rs

1//! Injecting tenancy fields on ingest and stripping them on read.
2//!
3//! The two operations are inverses: a field [`inject_fields`] adds is removed by
4//! [`strip_fields`]. This symmetry is the heart of the shared-index isolation
5//! model (`docs/03`) and is exercised by a round-trip property test.
6
7use osproxy_core::json::object_top_level;
8use osproxy_core::FieldName;
9use serde_json::{Map, Value};
10
11use crate::error::RewriteError;
12
13/// Inserts each `(name, value)` into the top-level object of `doc`.
14///
15/// A field that already exists is a [`RewriteError::ReservedFieldCollision`],
16/// not an overwrite: a client must not be able to pre-seed a tenancy field and
17/// defeat isolation (`docs/03`).
18///
19/// # Errors
20///
21/// Returns [`RewriteError::NotAnObject`] if `doc` is not a JSON object, or
22/// [`RewriteError::ReservedFieldCollision`] if a field is already present.
23///
24/// # Examples
25///
26/// ```
27/// use serde_json::{json, Value};
28/// use osproxy_core::FieldName;
29/// use osproxy_rewrite::inject_fields;
30///
31/// let mut doc = json!({ "msg": "hi" });
32/// inject_fields(&mut doc, &[(FieldName::from("_tenant"), Value::from("acme"))]).unwrap();
33/// assert_eq!(doc["_tenant"], json!("acme"));
34/// ```
35pub fn inject_fields(doc: &mut Value, fields: &[(FieldName, Value)]) -> Result<(), RewriteError> {
36    let obj = doc.as_object_mut().ok_or(RewriteError::NotAnObject)?;
37    // Pre-check collisions so injection is all-or-nothing: a partial inject
38    // would leave the document in a half-tenanted state.
39    for (name, _) in fields {
40        if obj.contains_key(name.as_str()) {
41            return Err(RewriteError::ReservedFieldCollision {
42                field: name.clone(),
43            });
44        }
45    }
46    for (name, value) in fields {
47        obj.insert(name.as_str().to_owned(), value.clone());
48    }
49    Ok(())
50}
51
52/// Splices `fields` into the top level of the JSON object in `body`, returning
53/// the new bytes, **without parsing `body` into a `Value` or re-serializing it**
54/// (ADR-014). The body is scanned once for its top-level keys (to reject a
55/// spoofed reserved field) and the injected fields are written right after the
56/// opening `{`; the rest of the document is copied verbatim. The byte-level twin
57/// of [`inject_fields`] for the streaming write path.
58///
59/// A field that already exists is a [`RewriteError::ReservedFieldCollision`], as
60/// in [`inject_fields`]: a client must not pre-seed a tenancy field and defeat
61/// isolation (`docs/03`). Escaped key names are decoded before the check, so the
62/// collision cannot be smuggled past as `"_tenant"`.
63///
64/// # Errors
65///
66/// [`RewriteError::NotAnObject`] if `body` is not a JSON object,
67/// [`RewriteError::InvalidJson`] if it is malformed, or
68/// [`RewriteError::ReservedFieldCollision`] if an injected field is already
69/// present.
70///
71/// # Examples
72///
73/// ```
74/// use serde_json::Value;
75/// use osproxy_core::FieldName;
76/// use osproxy_rewrite::inject_fields_bytes;
77///
78/// let out = inject_fields_bytes(
79///     br#"{"msg":"hi"}"#,
80///     &[(FieldName::from("_tenant"), Value::from("acme"))],
81/// ).unwrap();
82/// assert_eq!(out, br#"{"_tenant":"acme","msg":"hi"}"#);
83/// ```
84pub fn inject_fields_bytes(
85    body: &[u8],
86    fields: &[(FieldName, Value)],
87) -> Result<Vec<u8>, RewriteError> {
88    let top = object_top_level(body)?;
89    if fields.is_empty() {
90        return Ok(body.to_vec());
91    }
92    for (name, _) in fields {
93        if top.keys.iter().any(|k| k == name.as_str()) {
94            return Err(RewriteError::ReservedFieldCollision {
95                field: name.clone(),
96            });
97        }
98    }
99    let mut injected: Vec<u8> = Vec::new();
100    for (idx, (name, value)) in fields.iter().enumerate() {
101        if idx > 0 {
102            injected.push(b',');
103        }
104        // Serializing a `&str` key and an in-memory `Value` into a `Vec` is
105        // infallible (no I/O, no non-string map keys, no NaN); the error arms are
106        // unreachable but kept so the splice fails closed rather than panics.
107        serde_json::to_writer(&mut injected, name.as_str())
108            .map_err(|_| RewriteError::InvalidJson)?;
109        injected.push(b':');
110        serde_json::to_writer(&mut injected, value).map_err(|_| RewriteError::InvalidJson)?;
111    }
112    let mut out = Vec::with_capacity(body.len() + injected.len() + 1);
113    out.extend_from_slice(&body[..top.insert_at]);
114    out.extend_from_slice(&injected);
115    if !top.empty {
116        out.push(b',');
117    }
118    out.extend_from_slice(&body[top.insert_at..]);
119    Ok(out)
120}
121
122/// Injects the tenancy fields into the `doc` and `upsert` sub-objects of an
123/// `_update` body (`docs/04` ยง3).
124///
125/// An update never replaces a whole document, so the fields are stamped into
126/// whichever sub-documents are present: a partial `doc` (re-asserting the
127/// tenancy fields, harmless on an existing doc) and the `upsert` (so an upsert
128/// that *creates* the document still carries its isolation fields). A sub-key
129/// that is absent is skipped; a `script`-only update with no `upsert` injects
130/// nothing (the targeted document already carries the fields).
131///
132/// # Errors
133///
134/// Returns [`RewriteError::NotAnObject`] if `update` itself, or a present
135/// `doc`/`upsert`, is not a JSON object, or
136/// [`RewriteError::ReservedFieldCollision`] if a sub-document already contains an
137/// injected field (a client must not pre-seed a tenancy field, `docs/03`).
138pub fn inject_update(
139    update: &mut Value,
140    fields: &[(FieldName, Value)],
141) -> Result<(), RewriteError> {
142    let obj = update.as_object_mut().ok_or(RewriteError::NotAnObject)?;
143    for key in ["doc", "upsert"] {
144        if let Some(sub) = obj.get_mut(key) {
145            inject_fields(sub, fields)?;
146        }
147    }
148    Ok(())
149}
150
151/// Removes each named field from the top-level object of `doc`, if present.
152///
153/// The inverse of [`inject_fields`]. Lenient by design: stripping a field that
154/// is absent (or a non-object body) is a no-op, because the read path must
155/// never fail just because a document predates a tenancy field.
156///
157/// Returns the number of fields actually removed (for a strip/inject symmetry
158/// assertion and observability).
159pub fn strip_fields(doc: &mut Value, names: &[FieldName]) -> usize {
160    let Some(obj): Option<&mut Map<String, Value>> = doc.as_object_mut() else {
161        return 0;
162    };
163    names
164        .iter()
165        .filter(|name| obj.remove(name.as_str()).is_some())
166        .count()
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172    use serde_json::json;
173
174    #[test]
175    fn inject_then_strip_restores_original() {
176        let original = json!({ "msg": "hi", "n": 3 });
177        let mut doc = original.clone();
178        let injected = [
179            (FieldName::from("_tenant"), Value::from("acme")),
180            (FieldName::from("_epoch"), Value::from(5)),
181        ];
182        inject_fields(&mut doc, &injected).unwrap();
183        assert_eq!(doc["_tenant"], json!("acme"));
184        let names: Vec<_> = injected.iter().map(|(n, _)| n.clone()).collect();
185        assert_eq!(strip_fields(&mut doc, &names), 2);
186        assert_eq!(doc, original);
187    }
188
189    #[test]
190    fn collision_is_rejected_and_leaves_doc_untouched() {
191        let mut doc = json!({ "_tenant": "evil", "msg": "hi" });
192        let err = inject_fields(
193            &mut doc,
194            &[(FieldName::from("_tenant"), Value::from("acme"))],
195        )
196        .unwrap_err();
197        assert_eq!(
198            err,
199            RewriteError::ReservedFieldCollision {
200                field: FieldName::from("_tenant")
201            }
202        );
203        // Untouched: the spoofed value is still there (caller rejects the request).
204        assert_eq!(doc["_tenant"], json!("evil"));
205    }
206
207    #[test]
208    fn inject_update_stamps_doc_and_upsert() {
209        let mut update = json!({
210            "doc": { "msg": "hi" },
211            "upsert": { "msg": "new" },
212        });
213        inject_update(
214            &mut update,
215            &[(FieldName::from("_tenant"), Value::from("acme"))],
216        )
217        .unwrap();
218        assert_eq!(update["doc"]["_tenant"], json!("acme"));
219        assert_eq!(update["upsert"]["_tenant"], json!("acme"));
220    }
221
222    #[test]
223    fn inject_update_rejects_spoofed_tenancy_field() {
224        let mut update = json!({ "upsert": { "_tenant": "evil" } });
225        assert_eq!(
226            inject_update(
227                &mut update,
228                &[(FieldName::from("_tenant"), Value::from("acme"))],
229            )
230            .unwrap_err(),
231            RewriteError::ReservedFieldCollision {
232                field: FieldName::from("_tenant")
233            }
234        );
235    }
236
237    #[test]
238    fn inject_update_is_a_noop_without_doc_or_upsert() {
239        let mut update = json!({ "script": { "source": "ctx._source.n++" } });
240        inject_update(
241            &mut update,
242            &[(FieldName::from("_tenant"), Value::from("acme"))],
243        )
244        .unwrap();
245        assert_eq!(update["script"]["source"], "ctx._source.n++");
246    }
247
248    #[test]
249    fn inject_into_non_object_fails() {
250        let mut doc = json!([1, 2, 3]);
251        assert_eq!(
252            inject_fields(&mut doc, &[(FieldName::from("x"), Value::from(1))]).unwrap_err(),
253            RewriteError::NotAnObject
254        );
255    }
256
257    #[test]
258    fn strip_is_lenient_on_absent_and_non_object() {
259        let mut doc = json!({ "msg": "hi" });
260        assert_eq!(strip_fields(&mut doc, &[FieldName::from("_tenant")]), 0);
261        let mut arr = json!([1]);
262        assert_eq!(strip_fields(&mut arr, &[FieldName::from("x")]), 0);
263    }
264}