1use std::fmt;
26
27const SCHEME: &str = "reddb:";
28
29#[derive(Debug, Clone, PartialEq)]
32pub enum UrnKind {
33 Row,
34 KvEntry,
35 GraphNode,
36 VectorHit { score: f32 },
37 Document { fragment: String },
38 GraphEdge { edge_id: String },
39}
40
41impl UrnKind {
42 fn suffix(&self) -> Option<String> {
43 match self {
44 UrnKind::Row | UrnKind::KvEntry | UrnKind::GraphNode => None,
45 UrnKind::VectorHit { score } => Some(format_score(*score)),
46 UrnKind::Document { fragment } => Some(fragment.clone()),
47 UrnKind::GraphEdge { edge_id } => Some(edge_id.clone()),
48 }
49 }
50
51 pub fn token(&self) -> &'static str {
52 match self {
53 UrnKind::Row => "row",
54 UrnKind::KvEntry => "kv",
55 UrnKind::GraphNode => "graph_node",
56 UrnKind::VectorHit { .. } => "vector_hit",
57 UrnKind::Document { .. } => "document",
58 UrnKind::GraphEdge { .. } => "graph_edge",
59 }
60 }
61}
62
63#[derive(Debug, Clone, PartialEq)]
64pub struct Urn {
65 pub collection: String,
66 pub id: String,
67 pub kind: UrnKind,
68}
69
70impl Urn {
71 pub fn row(collection: impl Into<String>, id: impl Into<String>) -> Self {
72 Self {
73 collection: collection.into(),
74 id: id.into(),
75 kind: UrnKind::Row,
76 }
77 }
78 pub fn vector_hit(collection: impl Into<String>, id: impl Into<String>, score: f32) -> Self {
79 Self {
80 collection: collection.into(),
81 id: id.into(),
82 kind: UrnKind::VectorHit { score },
83 }
84 }
85 pub fn document(
86 collection: impl Into<String>,
87 id: impl Into<String>,
88 fragment: impl Into<String>,
89 ) -> Self {
90 Self {
91 collection: collection.into(),
92 id: id.into(),
93 kind: UrnKind::Document {
94 fragment: fragment.into(),
95 },
96 }
97 }
98 pub fn graph_node(collection: impl Into<String>, id: impl Into<String>) -> Self {
99 Self {
100 collection: collection.into(),
101 id: id.into(),
102 kind: UrnKind::GraphNode,
103 }
104 }
105 pub fn graph_edge(
106 collection: impl Into<String>,
107 id: impl Into<String>,
108 edge_id: impl Into<String>,
109 ) -> Self {
110 Self {
111 collection: collection.into(),
112 id: id.into(),
113 kind: UrnKind::GraphEdge {
114 edge_id: edge_id.into(),
115 },
116 }
117 }
118 pub fn kv(collection: impl Into<String>, id: impl Into<String>) -> Self {
119 Self {
120 collection: collection.into(),
121 id: id.into(),
122 kind: UrnKind::KvEntry,
123 }
124 }
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub enum UrnError {
129 MissingScheme,
130 MissingId,
131 InvalidPercent,
132 InvalidScore,
133}
134
135impl fmt::Display for UrnError {
136 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137 match self {
138 UrnError::MissingScheme => write!(f, "URN missing reddb: scheme"),
139 UrnError::MissingId => write!(f, "URN missing /id segment"),
140 UrnError::InvalidPercent => write!(f, "URN has invalid percent-encoding"),
141 UrnError::InvalidScore => write!(f, "URN vector_hit suffix is not a score"),
142 }
143 }
144}
145
146impl std::error::Error for UrnError {}
147
148pub fn encode(urn: &Urn) -> String {
149 let mut s = String::with_capacity(SCHEME.len() + urn.collection.len() + urn.id.len() + 8);
150 s.push_str(SCHEME);
151 pct_encode_into(&urn.collection, &mut s);
152 s.push('/');
153 pct_encode_into(&urn.id, &mut s);
154 if let Some(suffix) = urn.kind.suffix() {
155 s.push('#');
156 pct_encode_into(&suffix, &mut s);
157 }
158 s
159}
160
161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
163pub enum KindHint {
164 Row,
165 KvEntry,
166 GraphNode,
167 VectorHit,
168 Document,
169 GraphEdge,
170}
171
172pub fn decode(s: &str, hint: KindHint) -> Result<Urn, UrnError> {
173 let rest = s.strip_prefix(SCHEME).ok_or(UrnError::MissingScheme)?;
174 let (head, suffix) = match rest.split_once('#') {
175 Some((h, s)) => (h, Some(pct_decode(s)?)),
176 None => (rest, None),
177 };
178 let (collection, id) = head.split_once('/').ok_or(UrnError::MissingId)?;
179 if id.is_empty() {
180 return Err(UrnError::MissingId);
181 }
182 let collection = pct_decode(collection)?;
183 let id = pct_decode(id)?;
184 let kind = match (hint, suffix) {
185 (KindHint::Row, None) => UrnKind::Row,
186 (KindHint::KvEntry, None) => UrnKind::KvEntry,
187 (KindHint::GraphNode, None) => UrnKind::GraphNode,
188 (KindHint::VectorHit, Some(sx)) => {
189 let score: f32 = sx.parse().map_err(|_| UrnError::InvalidScore)?;
190 UrnKind::VectorHit { score }
191 }
192 (KindHint::Document, Some(sx)) => UrnKind::Document { fragment: sx },
193 (KindHint::GraphEdge, Some(sx)) => UrnKind::GraphEdge { edge_id: sx },
194 _ => return Err(UrnError::MissingId),
195 };
196 Ok(Urn {
197 collection,
198 id,
199 kind,
200 })
201}
202
203fn pct_encode_into(input: &str, out: &mut String) {
204 for &b in input.as_bytes() {
205 if needs_pct(b) {
206 out.push('%');
207 out.push(hex_high(b));
208 out.push(hex_low(b));
209 } else {
210 out.push(b as char);
211 }
212 }
213}
214
215fn pct_decode(input: &str) -> Result<String, UrnError> {
216 let bytes = input.as_bytes();
217 let mut out = Vec::with_capacity(bytes.len());
218 let mut i = 0;
219 while i < bytes.len() {
220 if bytes[i] == b'%' {
221 if i + 2 >= bytes.len() {
222 return Err(UrnError::InvalidPercent);
223 }
224 let hi = hex_value(bytes[i + 1]).ok_or(UrnError::InvalidPercent)?;
225 let lo = hex_value(bytes[i + 2]).ok_or(UrnError::InvalidPercent)?;
226 out.push((hi << 4) | lo);
227 i += 3;
228 } else {
229 out.push(bytes[i]);
230 i += 1;
231 }
232 }
233 String::from_utf8(out).map_err(|_| UrnError::InvalidPercent)
234}
235
236fn needs_pct(b: u8) -> bool {
237 b == b'%' || b == b'/' || b == b'#' || b == b' ' || !(0x20..0x7F).contains(&b)
238}
239
240fn hex_high(b: u8) -> char {
241 let h = b >> 4;
242 if h < 10 {
243 (b'0' + h) as char
244 } else {
245 (b'A' + h - 10) as char
246 }
247}
248
249fn hex_low(b: u8) -> char {
250 let h = b & 0x0F;
251 if h < 10 {
252 (b'0' + h) as char
253 } else {
254 (b'A' + h - 10) as char
255 }
256}
257
258fn hex_value(b: u8) -> Option<u8> {
259 match b {
260 b'0'..=b'9' => Some(b - b'0'),
261 b'a'..=b'f' => Some(10 + b - b'a'),
262 b'A'..=b'F' => Some(10 + b - b'A'),
263 _ => None,
264 }
265}
266
267fn format_score(score: f32) -> String {
268 let mut s = format!("{:.6}", score);
269 if s.contains('.') {
270 while s.ends_with('0') {
271 s.pop();
272 }
273 if s.ends_with('.') {
274 s.pop();
275 }
276 }
277 s
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283
284 #[test]
285 fn row_round_trip() {
286 let u = Urn::row("incidents", "42");
287 assert_eq!(encode(&u), "reddb:incidents/42");
288 assert_eq!(decode("reddb:incidents/42", KindHint::Row).unwrap(), u);
289 }
290
291 #[test]
292 fn kv_round_trip() {
293 let u = Urn::kv("settings", "ask.cache.enabled");
294 assert_eq!(decode(&encode(&u), KindHint::KvEntry).unwrap(), u);
295 }
296
297 #[test]
298 fn graph_node_round_trip() {
299 let u = Urn::graph_node("hosts", "n-7");
300 assert_eq!(decode(&encode(&u), KindHint::GraphNode).unwrap(), u);
301 }
302
303 #[test]
304 fn vector_hit_round_trip() {
305 let u = Urn::vector_hit("docs", "doc-9", 0.87125);
306 let s = encode(&u);
307 let back = decode(&s, KindHint::VectorHit).unwrap();
308 assert_eq!(back.collection, "docs");
309 assert_eq!(back.id, "doc-9");
310 match back.kind {
311 UrnKind::VectorHit { score } => assert!((score - 0.87125).abs() < 1e-5),
312 _ => panic!("wrong kind"),
313 }
314 }
315
316 #[test]
317 fn vector_hit_score_format_stable() {
318 assert_eq!(format_score(0.5), "0.5");
319 assert_eq!(format_score(1.0), "1");
320 assert_eq!(format_score(0.0), "0");
321 assert_eq!(format_score(0.123456), "0.123456");
322 }
323
324 #[test]
325 fn document_round_trip_with_fragment() {
326 let u = Urn::document("manuals", "m-1", "chunk-7");
327 assert_eq!(encode(&u), "reddb:manuals/m-1#chunk-7");
328 assert_eq!(decode(&encode(&u), KindHint::Document).unwrap(), u);
329 }
330
331 #[test]
332 fn graph_edge_round_trip() {
333 let u = Urn::graph_edge("hosts", "n-1", "e-77");
334 assert_eq!(encode(&u), "reddb:hosts/n-1#e-77");
335 assert_eq!(decode(&encode(&u), KindHint::GraphEdge).unwrap(), u);
336 }
337
338 #[test]
339 fn percent_encodes_separators_in_collection() {
340 let u = Urn::row("we/ird#name", "id");
341 assert_eq!(encode(&u), "reddb:we%2Fird%23name/id");
342 assert_eq!(decode(&encode(&u), KindHint::Row).unwrap(), u);
343 }
344
345 #[test]
346 fn percent_encodes_separators_in_id() {
347 let u = Urn::row("col", "a/b#c");
348 assert_eq!(encode(&u), "reddb:col/a%2Fb%23c");
349 assert_eq!(decode(&encode(&u), KindHint::Row).unwrap(), u);
350 }
351
352 #[test]
353 fn percent_encodes_space_and_percent() {
354 let u = Urn::row("col with space", "100%");
355 assert_eq!(encode(&u), "reddb:col%20with%20space/100%25");
356 assert_eq!(decode(&encode(&u), KindHint::Row).unwrap(), u);
357 }
358
359 #[test]
360 fn percent_encodes_control_bytes() {
361 let u = Urn::row("col\nname", "id\t");
362 let s = encode(&u);
363 assert!(s.contains("%0A"));
364 assert!(s.contains("%09"));
365 assert_eq!(decode(&s, KindHint::Row).unwrap(), u);
366 }
367
368 #[test]
369 fn utf8_round_trips_via_pct_encoding() {
370 let u = Urn::row("日本語", "café");
371 let s = encode(&u);
372 assert!(s.is_ascii(), "wire URN must be ASCII: {s}");
373 assert_eq!(decode(&s, KindHint::Row).unwrap(), u);
374 }
375
376 #[test]
377 fn fragment_with_special_chars_round_trips() {
378 let u = Urn::document("docs", "d-1", "section/2#a b");
379 assert_eq!(decode(&encode(&u), KindHint::Document).unwrap(), u);
380 }
381
382 #[test]
383 fn missing_scheme_rejected() {
384 assert_eq!(
385 decode("not-a-urn/x", KindHint::Row),
386 Err(UrnError::MissingScheme)
387 );
388 }
389
390 #[test]
391 fn missing_id_rejected() {
392 assert_eq!(
393 decode("reddb:colonly", KindHint::Row),
394 Err(UrnError::MissingId)
395 );
396 assert_eq!(
397 decode("reddb:col/", KindHint::Row),
398 Err(UrnError::MissingId)
399 );
400 }
401
402 #[test]
403 fn invalid_percent_rejected() {
404 assert_eq!(
405 decode("reddb:col%2/id", KindHint::Row),
406 Err(UrnError::InvalidPercent)
407 );
408 assert_eq!(
409 decode("reddb:col/id%ZZ", KindHint::Row),
410 Err(UrnError::InvalidPercent)
411 );
412 }
413
414 #[test]
415 fn vector_hit_invalid_score_rejected() {
416 assert_eq!(
417 decode("reddb:docs/d-1#nope", KindHint::VectorHit),
418 Err(UrnError::InvalidScore)
419 );
420 }
421
422 #[test]
423 fn hint_mismatch_rejected() {
424 let s = encode(&Urn::row("col", "id"));
425 assert!(decode(&s, KindHint::VectorHit).is_err());
426 let s = encode(&Urn::vector_hit("col", "id", 0.5));
427 assert!(decode(&s, KindHint::Row).is_err());
428 }
429
430 #[test]
431 fn token_is_stable() {
432 assert_eq!(UrnKind::Row.token(), "row");
433 assert_eq!(UrnKind::KvEntry.token(), "kv");
434 assert_eq!(UrnKind::GraphNode.token(), "graph_node");
435 assert_eq!(UrnKind::VectorHit { score: 0.0 }.token(), "vector_hit");
436 assert_eq!(
437 UrnKind::Document {
438 fragment: "x".into()
439 }
440 .token(),
441 "document"
442 );
443 assert_eq!(
444 UrnKind::GraphEdge {
445 edge_id: "e".into()
446 }
447 .token(),
448 "graph_edge"
449 );
450 }
451
452 #[test]
456 fn property_round_trip_byte_matrix() {
457 let collections = [
458 "simple",
459 "with/slash",
460 "with#hash",
461 "with%pct",
462 "with space",
463 "with\ttab",
464 "with\nnewline",
465 "café",
466 "日本語",
467 "mixed/ # %",
468 ];
469 let ids = ["1", "abc", "uuid-1234", "with/slash", "deep/path#frag"];
470 for c in collections {
471 for i in ids {
472 for hint in [KindHint::Row, KindHint::KvEntry, KindHint::GraphNode] {
473 let u = match hint {
474 KindHint::Row => Urn::row(c, i),
475 KindHint::KvEntry => Urn::kv(c, i),
476 KindHint::GraphNode => Urn::graph_node(c, i),
477 _ => unreachable!(),
478 };
479 let s = encode(&u);
480 assert_eq!(decode(&s, hint).unwrap(), u, "mismatch for {s}");
481 }
482 let v = Urn::vector_hit(c, i, 0.42);
483 let back = decode(&encode(&v), KindHint::VectorHit).unwrap();
484 assert_eq!(back.collection, v.collection);
485 assert_eq!(back.id, v.id);
486 let d = Urn::document(c, i, "frag/with#stuff");
487 assert_eq!(decode(&encode(&d), KindHint::Document).unwrap(), d);
488 let e = Urn::graph_edge(c, i, "edge%01");
489 assert_eq!(decode(&encode(&e), KindHint::GraphEdge).unwrap(), e);
490 }
491 }
492 }
493}