Skip to main content

omnigraph_server/
graph_id.rs

1//! `GraphId` — registry-level identity for a graph in multi-graph mode (MR-668).
2//!
3//! Validation lives in `GraphId::try_from(String)`; nothing else can construct a
4//! `GraphId`. The newtype prevents `graph_id` strings from escaping the storage
5//! root via path traversal or colliding with engine-reserved filenames.
6//!
7//! Regex: `^[a-zA-Z0-9-]{1,64}$`
8//!
9//! The engine reserves every filename starting with `_` at the graph root
10//! (`_schema.pg`, `_schema.ir.json`, `__schema_state.json`, `__manifest/`,
11//! `__recovery/`, etc.). Disallowing leading underscores at the regex level
12//! means a `graph_id` can never collide with engine-managed files. Path
13//! traversal (`..`, `/`) is unrepresentable.
14//!
15//! `policies` is additionally reserved as a future-proofing measure for a
16//! potential `/graphs/policies/...` cluster route.
17
18use std::fmt;
19use std::sync::OnceLock;
20
21use color_eyre::eyre::{Result, bail};
22use regex::Regex;
23use serde::{Deserialize, Serialize};
24
25/// Maximum length of a `GraphId` value.
26pub const GRAPH_ID_MAX_LEN: usize = 64;
27
28/// Validated registry-level identity for a graph.
29///
30/// Constructed only via `GraphId::try_from(String)` or
31/// `GraphId::try_from(&str)`. The inner `String` is private to enforce the
32/// validation contract.
33#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)]
34#[serde(transparent)]
35pub struct GraphId(String);
36
37impl GraphId {
38    /// View the validated identifier as `&str`.
39    pub fn as_str(&self) -> &str {
40        &self.0
41    }
42}
43
44impl fmt::Display for GraphId {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        f.write_str(&self.0)
47    }
48}
49
50impl AsRef<str> for GraphId {
51    fn as_ref(&self) -> &str {
52        &self.0
53    }
54}
55
56impl TryFrom<String> for GraphId {
57    type Error = color_eyre::eyre::Error;
58
59    fn try_from(value: String) -> Result<Self> {
60        validate(value.as_str())?;
61        Ok(Self(value))
62    }
63}
64
65impl TryFrom<&str> for GraphId {
66    type Error = color_eyre::eyre::Error;
67
68    fn try_from(value: &str) -> Result<Self> {
69        validate(value)?;
70        Ok(Self(value.to_string()))
71    }
72}
73
74// Custom Deserialize that re-runs validation. Otherwise a serde-derived impl
75// would accept any String, defeating the newtype's guarantee.
76impl<'de> Deserialize<'de> for GraphId {
77    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
78    where
79        D: serde::Deserializer<'de>,
80    {
81        let s = String::deserialize(deserializer)?;
82        Self::try_from(s).map_err(serde::de::Error::custom)
83    }
84}
85
86fn validate(value: &str) -> Result<()> {
87    if value.is_empty() {
88        bail!("graph_id must not be empty");
89    }
90    if value.len() > GRAPH_ID_MAX_LEN {
91        bail!(
92            "graph_id '{}' is {} chars; max {}",
93            value,
94            value.len(),
95            GRAPH_ID_MAX_LEN
96        );
97    }
98    if !regex().is_match(value) {
99        bail!(
100            "graph_id '{}' must match ^[a-zA-Z0-9-]{{1,64}}$ — \
101             no underscores (engine reserves them), no path separators, no unicode",
102            value
103        );
104    }
105    if is_reserved(value) {
106        bail!(
107            "graph_id '{}' is reserved (would collide with engine-managed names or \
108             future cluster routes)",
109            value
110        );
111    }
112    Ok(())
113}
114
115fn regex() -> &'static Regex {
116    static RE: OnceLock<Regex> = OnceLock::new();
117    RE.get_or_init(|| Regex::new(r"^[a-zA-Z0-9-]{1,64}$").expect("regex literal"))
118}
119
120/// Reserved `graph_id` values that the regex alone wouldn't catch.
121/// The leading-underscore rule already excludes every engine-managed
122/// filename pattern (`_schema.pg`, `__manifest`, etc.); the regex
123/// `^[a-zA-Z0-9-]{1,64}$` (see `regex()`) additionally rejects every
124/// dot-containing name structurally — `openapi.json` and friends
125/// never reach this check.
126///
127/// This list only needs to cover route-prefix collisions and
128/// top-level endpoint names whose spellings DO satisfy the regex
129/// (no dots, no underscores).
130fn is_reserved(value: &str) -> bool {
131    matches!(value, "policies" | "healthz" | "openapi" | "graphs")
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn accepts_simple_alphanumeric_ids() {
140        for ok in ["alpha", "beta", "tenant-001", "A", "g", "X-9-z"] {
141            GraphId::try_from(ok).unwrap_or_else(|_| panic!("expected accept: {ok}"));
142        }
143    }
144
145    #[test]
146    fn accepts_64_char_max() {
147        let max = "a".repeat(64);
148        GraphId::try_from(max.as_str()).unwrap();
149    }
150
151    #[test]
152    fn rejects_empty() {
153        assert!(GraphId::try_from("").is_err());
154    }
155
156    #[test]
157    fn rejects_over_64_chars() {
158        let too_long = "a".repeat(65);
159        assert!(GraphId::try_from(too_long.as_str()).is_err());
160    }
161
162    #[test]
163    fn rejects_leading_underscore() {
164        // Engine reserves every `_*` filename at the graph root.
165        assert!(GraphId::try_from("_internal").is_err());
166        assert!(GraphId::try_from("__manifest").is_err());
167    }
168
169    #[test]
170    fn rejects_underscores_anywhere() {
171        // The regex doesn't allow `_` at all — keeps the disallow-leading-`_`
172        // rule cheap to enforce. If the rule changes later, we'd need to
173        // distinguish "starts with `_`" from "contains `_`".
174        assert!(GraphId::try_from("tenant_alpha").is_err());
175    }
176
177    #[test]
178    fn rejects_path_separators() {
179        for bad in ["alpha/beta", "../etc", "..", "alpha\\beta"] {
180            assert!(GraphId::try_from(bad).is_err(), "expected reject: {bad}");
181        }
182    }
183
184    #[test]
185    fn rejects_unicode() {
186        assert!(GraphId::try_from("αlpha").is_err());
187        assert!(GraphId::try_from("graph-✨").is_err());
188    }
189
190    #[test]
191    fn rejects_whitespace() {
192        assert!(GraphId::try_from(" alpha").is_err());
193        assert!(GraphId::try_from("alpha ").is_err());
194        assert!(GraphId::try_from("alpha beta").is_err());
195        assert!(GraphId::try_from("\talpha").is_err());
196    }
197
198    #[test]
199    fn rejects_dots() {
200        // Reserves the "extension"-shaped ids that look like filenames.
201        assert!(GraphId::try_from(".").is_err());
202        assert!(GraphId::try_from("alpha.beta").is_err());
203        assert!(GraphId::try_from("alpha.").is_err());
204    }
205
206    #[test]
207    fn rejects_reserved_route_names() {
208        // Names that satisfy the regex but are still reserved because
209        // they'd collide with top-level route prefixes / endpoint names.
210        // Dot-containing names (e.g. `openapi.json`) are rejected by the
211        // regex, not this list — `rejects_dots` above covers them.
212        for bad in ["policies", "healthz", "openapi", "graphs"] {
213            assert!(
214                GraphId::try_from(bad).is_err(),
215                "expected reject (reserved): {bad}"
216            );
217        }
218    }
219
220    #[test]
221    fn display_returns_inner_string() {
222        let id = GraphId::try_from("alpha").unwrap();
223        assert_eq!(format!("{id}"), "alpha");
224        assert_eq!(id.as_str(), "alpha");
225    }
226
227    #[test]
228    fn serialize_round_trips_via_json() {
229        let id = GraphId::try_from("tenant-007").unwrap();
230        let json = serde_json::to_string(&id).unwrap();
231        assert_eq!(json, "\"tenant-007\"");
232        let back: GraphId = serde_json::from_str(&json).unwrap();
233        assert_eq!(back, id);
234    }
235
236    #[test]
237    fn deserialize_runs_validation() {
238        // Hostile payload must not produce a GraphId.
239        let bad = serde_json::from_str::<GraphId>("\"_evil\"");
240        assert!(bad.is_err());
241        let bad = serde_json::from_str::<GraphId>("\"../../etc\"");
242        assert!(bad.is_err());
243    }
244
245    #[test]
246    fn hash_equality_works_for_use_as_map_key() {
247        use std::collections::HashMap;
248        let a = GraphId::try_from("alpha").unwrap();
249        let b = GraphId::try_from("alpha").unwrap();
250        let mut m = HashMap::new();
251        m.insert(a, 1u32);
252        assert_eq!(m.get(&b), Some(&1));
253    }
254}