Skip to main content

osproxy_transport/
classify.rs

1//! Classifying an OpenSearch REST path into an [`EndpointKind`].
2//!
3//! A small, explicit matcher over the path segments, the supported matrix is
4//! version-tracked in `docs/specs/opensearch-endpoints.md`. M1 fully handles
5//! single-document ingest (`_doc`/`_create`); other shapes are classified so the
6//! pipeline can reject them with a precise reason, not mis-handle them.
7
8use osproxy_core::EndpointKind;
9use osproxy_spi::HttpMethod;
10
11/// The result of classifying a request path.
12#[derive(Clone, PartialEq, Eq, Debug)]
13pub struct Classified {
14    /// The endpoint class.
15    pub endpoint: EndpointKind,
16    /// The logical index (first path segment), empty if none.
17    pub logical_index: String,
18    /// The document id, if the path carries one.
19    pub doc_id: Option<String>,
20}
21
22/// Classifies a `method` + `path` into an endpoint, logical index, and doc id.
23///
24/// The path's query string, if any, must already be stripped by the caller.
25#[must_use]
26pub fn classify(method: HttpMethod, path: &str) -> Classified {
27    // No classified endpoint has more than three meaningful path segments
28    // (`/{index}/{verb}/{id}`), and the `Admin` arm only inspects the first. So
29    // collect at most four segments onto the stack, the fourth's mere presence
30    // forces anything longer than a three-segment shape to the `Unknown`/`Admin`
31    // arms, exactly as a full `Vec` would, but without a per-request heap
32    // allocation (classify runs on every request).
33    let mut buf = [""; 4];
34    let mut count = 0usize;
35    for seg in path.split('/').filter(|s| !s.is_empty()) {
36        if count < buf.len() {
37            buf[count] = seg;
38        }
39        count += 1;
40    }
41    let segments = &buf[..count.min(buf.len())];
42    match segments {
43        // /{index}/_doc/{id} and /{index}/_create/{id}
44        [index, verb @ ("_doc" | "_create"), id] => Classified {
45            endpoint: by_id_endpoint(method, verb),
46            logical_index: (*index).to_owned(),
47            doc_id: Some((*id).to_owned()),
48        },
49        // /{index}/_doc (auto-id ingest)
50        [index, "_doc"] => Classified {
51            endpoint: doc_endpoint(method),
52            logical_index: (*index).to_owned(),
53            doc_id: None,
54        },
55        // Cursor lifecycle, scroll & PIT, bound to the cluster that created them
56        // (`docs/03` §6). These carry a wrapped cursor the engine unwraps to route
57        // to the pinned cluster; the path-form scroll id rides in `doc_id`.
58        //   /_search/scroll (body-form scroll continue/clear) and
59        //   /_search/point_in_time (PIT delete), both carry the wrapped cursor in
60        //   the body, no logical index. (OpenSearch's PIT endpoint is
61        //   `_search/point_in_time`, not Elasticsearch's `_pit`, see
62        //   `docs/specs/opensearch-endpoints.md`.)
63        ["_search", "scroll" | "point_in_time"] => classified(EndpointKind::Cursor, ""),
64        //   /_search/scroll/{scroll_id} (path-form continue/clear)
65        ["_search", "scroll", scroll_id] => Classified {
66            endpoint: EndpointKind::Cursor,
67            logical_index: String::new(),
68            doc_id: Some((*scroll_id).to_owned()),
69        },
70        //   /{index}/_search/point_in_time (PIT create, resolves the index's
71        //   cluster, wraps the returned `pit_id`).
72        [index, "_search", "point_in_time"] => classified(EndpointKind::Cursor, index),
73        // /_search with no index, a PIT search (the PIT defines the index set);
74        // the engine reads the `pit` in the body and routes to its pinned cluster.
75        ["_search"] => classified(EndpointKind::Search, ""),
76        // /{index}/_search and /{index}/_count
77        [index, "_search"] => classified(EndpointKind::Search, index),
78        [index, "_count"] => classified(EndpointKind::Count, index),
79        // /_mget and /{index}/_mget
80        ["_mget"] => classified(EndpointKind::MultiGet, ""),
81        [index, "_mget"] => classified(EndpointKind::MultiGet, index),
82        // /_msearch and /{index}/_msearch
83        ["_msearch"] => classified(EndpointKind::MultiSearch, ""),
84        [index, "_msearch"] => classified(EndpointKind::MultiSearch, index),
85        // /_bulk and /{index}/_bulk
86        ["_bulk"] => Classified {
87            endpoint: EndpointKind::IngestBulk,
88            logical_index: String::new(),
89            doc_id: None,
90        },
91        [index, "_bulk"] => classified(EndpointKind::IngestBulk, index),
92        // /{index}/_delete_by_query, only honorable in async fan-out mode, where
93        // the engine expands it to a delete per match; rejected otherwise
94        // (`docs/04` §9). `_update_by_query` is intentionally NOT classified, it
95        // needs a scripted read-modify-write the proxy cannot do, so it falls
96        // through to `Unknown` and is rejected.
97        [index, "_delete_by_query"] => classified(EndpointKind::DeleteByQuery, index),
98        // Administrative endpoints (`_cat/*`, `_cluster/*`, `_nodes/*`): no tenancy
99        // semantics, classified `Admin` so the engine can pass them through to an
100        // operator-allow-listed cluster, or reject (the default). The full path is
101        // forwarded verbatim, so no segment is captured (`docs/specs/
102        // opensearch-endpoints.md`). Placed last so it cannot shadow a tenancy path.
103        [first, ..] if matches!(*first, "_cat" | "_cluster" | "_nodes") => {
104            classified(EndpointKind::Admin, "")
105        }
106        _ => Classified {
107            endpoint: EndpointKind::Unknown,
108            logical_index: segments
109                .first()
110                .map(|s| (*s).to_owned())
111                .unwrap_or_default(),
112            doc_id: None,
113        },
114    }
115}
116
117/// Endpoint for `/{index}/_doc/{id}` / `_create/{id}`, by method.
118fn by_id_endpoint(method: HttpMethod, verb: &str) -> EndpointKind {
119    match method {
120        HttpMethod::Get | HttpMethod::Head => EndpointKind::GetById,
121        HttpMethod::Delete => EndpointKind::DeleteById,
122        // _create is always an ingest; _doc PUT/POST is ingest too.
123        HttpMethod::Put | HttpMethod::Post if verb == "_create" || verb == "_doc" => {
124            EndpointKind::IngestDoc
125        }
126        // PUT/POST of an unrecognized verb, or a future method: treat as
127        // unsupported rather than mis-routing.
128        _ => EndpointKind::Unknown,
129    }
130}
131
132/// Endpoint for `/{index}/_doc` (no id), by method.
133fn doc_endpoint(method: HttpMethod) -> EndpointKind {
134    match method {
135        HttpMethod::Post | HttpMethod::Put => EndpointKind::IngestDoc,
136        _ => EndpointKind::Unknown,
137    }
138}
139
140/// Helper for an endpoint that carries a logical index but no doc id.
141fn classified(endpoint: EndpointKind, index: &str) -> Classified {
142    Classified {
143        endpoint,
144        logical_index: index.to_owned(),
145        doc_id: None,
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn put_doc_with_id_is_ingest() {
155        let c = classify(HttpMethod::Put, "/orders/_doc/acme:1");
156        assert_eq!(c.endpoint, EndpointKind::IngestDoc);
157        assert_eq!(c.logical_index, "orders");
158        assert_eq!(c.doc_id.as_deref(), Some("acme:1"));
159    }
160
161    #[test]
162    fn post_doc_without_id_is_ingest() {
163        let c = classify(HttpMethod::Post, "/orders/_doc");
164        assert_eq!(c.endpoint, EndpointKind::IngestDoc);
165        assert_eq!(c.logical_index, "orders");
166        assert!(c.doc_id.is_none());
167    }
168
169    #[test]
170    fn get_and_delete_by_id_are_classified() {
171        assert_eq!(
172            classify(HttpMethod::Get, "/orders/_doc/1").endpoint,
173            EndpointKind::GetById
174        );
175        assert_eq!(
176            classify(HttpMethod::Delete, "/orders/_doc/1").endpoint,
177            EndpointKind::DeleteById
178        );
179    }
180
181    #[test]
182    fn search_count_and_bulk() {
183        assert_eq!(
184            classify(HttpMethod::Post, "/orders/_search").endpoint,
185            EndpointKind::Search
186        );
187        assert_eq!(
188            classify(HttpMethod::Get, "/orders/_count").endpoint,
189            EndpointKind::Count
190        );
191        assert_eq!(
192            classify(HttpMethod::Post, "/_bulk").endpoint,
193            EndpointKind::IngestBulk
194        );
195        assert_eq!(
196            classify(HttpMethod::Post, "/_mget").endpoint,
197            EndpointKind::MultiGet
198        );
199        assert_eq!(
200            classify(HttpMethod::Post, "/orders/_mget").endpoint,
201            EndpointKind::MultiGet
202        );
203        assert_eq!(
204            classify(HttpMethod::Post, "/_msearch").endpoint,
205            EndpointKind::MultiSearch
206        );
207        assert_eq!(
208            classify(HttpMethod::Post, "/orders/_msearch").endpoint,
209            EndpointKind::MultiSearch
210        );
211        assert_eq!(
212            classify(HttpMethod::Post, "/orders/_bulk").endpoint,
213            EndpointKind::IngestBulk
214        );
215    }
216
217    #[test]
218    fn scroll_and_pit_paths_are_cursor() {
219        // Scroll continue/clear, body form and path form.
220        assert_eq!(
221            classify(HttpMethod::Post, "/_search/scroll").endpoint,
222            EndpointKind::Cursor
223        );
224        let path_form = classify(HttpMethod::Get, "/_search/scroll/c2Nyb2xs");
225        assert_eq!(path_form.endpoint, EndpointKind::Cursor);
226        assert_eq!(path_form.doc_id.as_deref(), Some("c2Nyb2xs"));
227        assert!(
228            classify(HttpMethod::Delete, "/_search/scroll")
229                .logical_index
230                .is_empty(),
231            "scroll clear carries no logical index"
232        );
233        // PIT create resolves the named index's cluster; PIT delete does not.
234        // OpenSearch's PIT endpoint is `_search/point_in_time`, not ES's `_pit`.
235        let pit_create = classify(HttpMethod::Post, "/orders/_search/point_in_time");
236        assert_eq!(pit_create.endpoint, EndpointKind::Cursor);
237        assert_eq!(pit_create.logical_index, "orders");
238        let pit_delete = classify(HttpMethod::Delete, "/_search/point_in_time");
239        assert_eq!(pit_delete.endpoint, EndpointKind::Cursor);
240        assert!(pit_delete.logical_index.is_empty());
241    }
242
243    #[test]
244    fn a_no_index_search_classifies_as_search() {
245        // `POST /_search` (no index) is a PIT search; the engine reads the body.
246        let c = classify(HttpMethod::Post, "/_search");
247        assert_eq!(c.endpoint, EndpointKind::Search);
248        assert!(c.logical_index.is_empty());
249    }
250
251    #[test]
252    fn a_real_search_is_not_mistaken_for_a_cursor() {
253        // `_search` on an index is a normal search; only `_search/scroll` is a
254        // cursor, so the new arms must not shadow the search arm.
255        assert_eq!(
256            classify(HttpMethod::Post, "/orders/_search").endpoint,
257            EndpointKind::Search
258        );
259    }
260
261    #[test]
262    fn admin_endpoints_classify_as_admin() {
263        for path in ["/_cat/indices", "/_cluster/health", "/_nodes/stats"] {
264            let c = classify(HttpMethod::Get, path);
265            assert_eq!(c.endpoint, EndpointKind::Admin, "{path}");
266            assert!(
267                c.logical_index.is_empty(),
268                "{path} carries no logical index"
269            );
270        }
271        // An index literally named `_catalog` is not an admin path (prefix-exact).
272        assert_eq!(
273            classify(HttpMethod::Post, "/_catalog/_search").endpoint,
274            EndpointKind::Search
275        );
276    }
277
278    #[test]
279    fn unknown_paths_classify_as_unknown() {
280        assert_eq!(
281            classify(HttpMethod::Get, "/").endpoint,
282            EndpointKind::Unknown
283        );
284        // `_cluster/*` is now classified `Admin` (see `admin_endpoints_*`), so use
285        // a genuinely unmatched proxy path here.
286        assert_eq!(
287            classify(HttpMethod::Get, "/_sql").endpoint,
288            EndpointKind::Unknown
289        );
290    }
291
292    #[test]
293    fn create_verb_is_always_ingest() {
294        assert_eq!(
295            classify(HttpMethod::Put, "/orders/_create/1").endpoint,
296            EndpointKind::IngestDoc
297        );
298    }
299}