Skip to main content

stoa_core/
id.rs

1//! Page-id parsing + routing (ARCHITECTURE §2).
2//!
3//! Stoa page ids are slug-style and carry a stable prefix that maps directly
4//! to a wiki sub-directory:
5//!
6//! | Prefix | Directory       |
7//! |--------|-----------------|
8//! | `ent-` | `wiki/entities/` |
9//! | `con-` | `wiki/concepts/` |
10//! | `syn-` | `wiki/synthesis/` |
11//!
12//! This prefix→dir mapping is the only routing rule in the system.
13
14/// One of the three canonical wiki sub-directories.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
16pub enum PageDir {
17    /// `wiki/entities/`
18    Entities,
19    /// `wiki/concepts/`
20    Concepts,
21    /// `wiki/synthesis/`
22    Synthesis,
23}
24
25impl PageDir {
26    /// Relative sub-directory name under `wiki/`.
27    #[must_use]
28    pub fn as_subdir(self) -> &'static str {
29        match self {
30            Self::Entities => "entities",
31            Self::Concepts => "concepts",
32            Self::Synthesis => "synthesis",
33        }
34    }
35
36    /// All three directories in canonical order.
37    #[must_use]
38    pub fn all() -> [Self; 3] {
39        [Self::Entities, Self::Concepts, Self::Synthesis]
40    }
41}
42
43/// Max total id length, including the `xxx-` prefix. Prevents pathologically
44/// long filenames on case-sensitive filesystems (most cap at 255 bytes).
45const MAX_ID_LEN: usize = 128;
46
47/// A wiki page id with its routing dir resolved.
48///
49/// Construction is restricted to [`Id::parse`], which enforces the slug
50/// grammar `(ent|con|syn)-[a-z0-9]+(-[a-z0-9]+)*` — no `..`, `/`, `\`,
51/// uppercase, NUL, or any character outside `[a-z0-9-]`. This makes the
52/// id structurally safe to interpolate into a filesystem path.
53#[derive(Debug, Clone, PartialEq, Eq, Hash)]
54pub struct Id {
55    /// Full id including the prefix (e.g. `ent-redis`).
56    pub raw: String,
57    /// Directory derived from the id prefix.
58    pub dir: PageDir,
59}
60
61impl Id {
62    /// Parse an id, classifying by prefix and enforcing the slug grammar.
63    /// Returns `None` for unknown prefixes, invalid characters, empty
64    /// suffixes, or ids longer than 128 bytes.
65    #[must_use]
66    pub fn parse(raw: &str) -> Option<Self> {
67        if raw.len() > MAX_ID_LEN {
68            return None;
69        }
70        let (dir, suffix) = Self::split_prefix(raw)?;
71        if !is_valid_suffix(suffix) {
72            return None;
73        }
74        Some(Self { raw: raw.to_owned(), dir })
75    }
76
77    /// Map an id prefix to its canonical [`PageDir`]. Does **not** validate
78    /// the suffix — use [`Id::parse`] for full validation.
79    #[must_use]
80    pub fn dir_for(raw: &str) -> Option<PageDir> {
81        Self::split_prefix(raw).map(|(dir, _)| dir)
82    }
83
84    fn split_prefix(raw: &str) -> Option<(PageDir, &str)> {
85        raw.strip_prefix("ent-")
86            .map(|s| (PageDir::Entities, s))
87            .or_else(|| raw.strip_prefix("con-").map(|s| (PageDir::Concepts, s)))
88            .or_else(|| raw.strip_prefix("syn-").map(|s| (PageDir::Synthesis, s)))
89    }
90}
91
92/// Validate `suffix` matches `[a-z0-9]+(-[a-z0-9]+)*` — non-empty, no
93/// leading/trailing/consecutive hyphens, ASCII lowercase + digits only.
94fn is_valid_suffix(suffix: &str) -> bool {
95    if suffix.is_empty() || suffix.starts_with('-') || suffix.ends_with('-') {
96        return false;
97    }
98    let mut prev_hyphen = false;
99    for c in suffix.chars() {
100        let ok = c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-';
101        if !ok || (c == '-' && prev_hyphen) {
102            return false;
103        }
104        prev_hyphen = c == '-';
105    }
106    true
107}
108
109impl std::fmt::Display for Id {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        f.write_str(&self.raw)
112    }
113}
114
115/// Max session-id length, matching [`MAX_ID_LEN`] for [`Id`]. Keeps the
116/// `sessions/<id>.jsonl` path well under the typical 255-byte filename cap.
117const MAX_SESSION_ID_LEN: usize = 128;
118
119/// A capture-pipeline session id (Claude Code / Cursor / etc).
120///
121/// Session ids come from upstream agent platforms (UUIDs, slugs, hex
122/// strings, etc.) so we use a wider grammar than [`Id`]: ASCII alnum plus
123/// `-`, `_`, `.` — but never `..`, `/`, `\`, NUL, leading `.`, or any
124/// non-ASCII byte. This makes a parsed [`SessionId`] structurally safe to
125/// interpolate into a filesystem path (e.g. `sessions/<id>.jsonl`).
126#[derive(Debug, Clone, PartialEq, Eq, Hash)]
127pub struct SessionId {
128    /// Full id as supplied by the agent platform.
129    pub raw: String,
130}
131
132impl SessionId {
133    /// Parse a session id. Returns `None` for ids that would be unsafe to
134    /// use as a path component: empty, longer than 128 bytes, contain `/`,
135    /// `\`, `..`, NUL, leading `.`, or any non-ASCII byte.
136    #[must_use]
137    pub fn parse(raw: &str) -> Option<Self> {
138        if raw.is_empty() || raw.len() > MAX_SESSION_ID_LEN {
139            return None;
140        }
141        if raw.starts_with('.') || raw.contains("..") {
142            return None;
143        }
144        if !raw.chars().all(is_session_char) {
145            return None;
146        }
147        Some(Self { raw: raw.to_owned() })
148    }
149}
150
151fn is_session_char(c: char) -> bool {
152    c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.')
153}
154
155impl std::fmt::Display for SessionId {
156    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
157        f.write_str(&self.raw)
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::{Id, PageDir, SessionId};
164
165    #[test]
166    fn parses_entity_prefix() {
167        let id = Id::parse("ent-redis").unwrap();
168        assert_eq!(id.dir, PageDir::Entities);
169    }
170
171    #[test]
172    fn parses_concept_prefix() {
173        let id = Id::parse("con-rag").unwrap();
174        assert_eq!(id.dir, PageDir::Concepts);
175    }
176
177    #[test]
178    fn parses_synthesis_prefix() {
179        let id = Id::parse("syn-x").unwrap();
180        assert_eq!(id.dir, PageDir::Synthesis);
181    }
182
183    #[test]
184    fn rejects_unknown_prefix() {
185        assert!(Id::parse("xxx-broken").is_none());
186    }
187
188    #[test]
189    fn rejects_empty_suffix() {
190        assert!(Id::parse("ent-").is_none());
191        assert!(Id::parse("con-").is_none());
192        assert!(Id::parse("syn-").is_none());
193    }
194
195    #[test]
196    fn rejects_path_traversal_segments() {
197        assert!(Id::parse("ent-..").is_none());
198        assert!(Id::parse("ent-../../etc/passwd").is_none());
199        assert!(Id::parse("ent-/abs/path").is_none());
200        assert!(Id::parse("ent-..\\..\\etc").is_none());
201        assert!(Id::parse("ent-foo/bar").is_none());
202    }
203
204    #[test]
205    fn rejects_uppercase_and_unicode() {
206        assert!(Id::parse("ent-Redis").is_none());
207        assert!(Id::parse("ent-café").is_none());
208        assert!(Id::parse("ent-foo\0bar").is_none());
209    }
210
211    #[test]
212    fn rejects_hyphen_edges_and_doubles() {
213        assert!(Id::parse("ent--foo").is_none());
214        assert!(Id::parse("ent-foo-").is_none());
215        assert!(Id::parse("ent-foo--bar").is_none());
216    }
217
218    #[test]
219    fn rejects_overlong_id() {
220        let long_suffix: String = "a".repeat(200);
221        let raw = format!("ent-{long_suffix}");
222        assert!(Id::parse(&raw).is_none());
223    }
224
225    #[test]
226    fn accepts_multi_segment_slug() {
227        let id = Id::parse("syn-redis-vs-memcached").unwrap();
228        assert_eq!(id.dir, PageDir::Synthesis);
229        assert_eq!(id.raw, "syn-redis-vs-memcached");
230    }
231
232    #[test]
233    fn accepts_digits_and_mixed() {
234        assert!(Id::parse("ent-redis-7").is_some());
235        assert!(Id::parse("con-rfc-3339").is_some());
236    }
237
238    #[test]
239    fn session_id_accepts_realistic_shapes() {
240        assert!(SessionId::parse("sess-001").is_some());
241        assert!(SessionId::parse("sess-A").is_some());
242        assert!(SessionId::parse("550e8400-e29b-41d4-a716-446655440000").is_some());
243        assert!(SessionId::parse("claude_2025_05_12").is_some());
244        assert!(SessionId::parse("Session.001").is_some());
245    }
246
247    #[test]
248    fn session_id_rejects_path_traversal() {
249        assert!(SessionId::parse("../../etc/foo").is_none());
250        assert!(SessionId::parse("..").is_none());
251        assert!(SessionId::parse("foo..bar").is_none());
252        assert!(SessionId::parse("/abs/path").is_none());
253        assert!(SessionId::parse("a/b").is_none());
254        assert!(SessionId::parse("a\\b").is_none());
255    }
256
257    #[test]
258    fn session_id_rejects_hidden_and_empty() {
259        assert!(SessionId::parse("").is_none());
260        assert!(SessionId::parse(".hidden").is_none());
261    }
262
263    #[test]
264    fn session_id_rejects_non_ascii_and_nul() {
265        assert!(SessionId::parse("sess-café").is_none());
266        assert!(SessionId::parse("sess\0bar").is_none());
267    }
268
269    #[test]
270    fn session_id_rejects_overlong() {
271        let raw = "a".repeat(200);
272        assert!(SessionId::parse(&raw).is_none());
273    }
274}