use std::collections::HashMap;
use std::sync::RwLock;
pub const UNSET_LABEL_ID: LabelId = LabelId(0);
pub const FIRST_USER_LABEL_ID: u32 = 64;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct LabelId(pub u32);
impl LabelId {
#[inline]
pub const fn new(id: u32) -> Self {
Self(id)
}
#[inline]
pub const fn as_u32(self) -> u32 {
self.0
}
#[inline]
pub const fn is_unset(self) -> bool {
self.0 == 0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum Namespace {
Node = 0,
Edge = 1,
}
impl Namespace {
fn from_u8(v: u8) -> Option<Self> {
match v {
0 => Some(Self::Node),
1 => Some(Self::Edge),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LabelRegistryError {
LabelTooLong { len: usize, max: usize },
Malformed { offset: usize, reason: &'static str },
LockPoisoned,
}
impl std::fmt::Display for LabelRegistryError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LabelTooLong { len, max } => {
write!(f, "label too long: {} bytes (max {})", len, max)
}
Self::Malformed { offset, reason } => {
write!(f, "malformed registry at offset {}: {}", offset, reason)
}
Self::LockPoisoned => write!(f, "label registry lock poisoned"),
}
}
}
impl std::error::Error for LabelRegistryError {}
pub const MAX_LABEL_LEN: usize = 512;
#[derive(Debug)]
pub struct LabelRegistry {
inner: RwLock<RegistryInner>,
}
#[derive(Debug)]
struct RegistryInner {
by_label: HashMap<(Namespace, String), LabelId>,
by_id: Vec<Option<(Namespace, String)>>,
next_id: u32,
}
impl LabelRegistry {
pub fn with_legacy_seed() -> Self {
let reg = Self::empty();
for (raw, name) in LEGACY_NODE_LABELS {
reg.intern_with_id(Namespace::Node, name, LabelId(legacy_node_id(*raw)))
.expect("legacy node label seed");
}
for (raw, name) in LEGACY_EDGE_LABELS {
reg.intern_with_id(Namespace::Edge, name, LabelId(legacy_edge_id(*raw)))
.expect("legacy edge label seed");
}
if let Ok(mut g) = reg.inner.write() {
g.next_id = FIRST_USER_LABEL_ID;
}
reg
}
pub fn empty() -> Self {
Self {
inner: RwLock::new(RegistryInner {
by_label: HashMap::new(),
by_id: vec![None],
next_id: 1,
}),
}
}
pub fn intern(&self, ns: Namespace, label: &str) -> Result<LabelId, LabelRegistryError> {
if label.len() > MAX_LABEL_LEN {
return Err(LabelRegistryError::LabelTooLong {
len: label.len(),
max: MAX_LABEL_LEN,
});
}
let mut g = self
.inner
.write()
.map_err(|_| LabelRegistryError::LockPoisoned)?;
if let Some(&id) = g.by_label.get(&(ns, label.to_string())) {
return Ok(id);
}
let id = LabelId(g.next_id);
g.next_id = g
.next_id
.checked_add(1)
.expect("LabelId u32 space exhausted (>4B labels)");
let key = (ns, label.to_string());
g.by_label.insert(key.clone(), id);
let idx = id.0 as usize;
if g.by_id.len() <= idx {
g.by_id.resize(idx + 1, None);
}
g.by_id[idx] = Some(key);
Ok(id)
}
pub fn lookup(&self, ns: Namespace, label: &str) -> Option<LabelId> {
let g = self.inner.read().ok()?;
g.by_label.get(&(ns, label.to_string())).copied()
}
pub fn resolve(&self, id: LabelId) -> Option<(Namespace, String)> {
if id.is_unset() {
return None;
}
let g = self.inner.read().ok()?;
g.by_id.get(id.0 as usize).cloned().flatten()
}
pub fn label_of(&self, ns: Namespace, id: LabelId) -> Option<String> {
self.resolve(id)
.filter(|(found_ns, _)| *found_ns == ns)
.map(|(_, l)| l)
}
pub fn len(&self) -> usize {
self.inner.read().map(|g| g.by_label.len()).unwrap_or(0)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn legacy_node_label_id(disc: u8) -> LabelId {
if (disc as usize) < LEGACY_NODE_LABELS.len() {
LabelId(legacy_node_id(disc))
} else {
UNSET_LABEL_ID
}
}
pub fn legacy_edge_label_id(disc: u8) -> LabelId {
if (disc as usize) < LEGACY_EDGE_LABELS.len() {
LabelId(legacy_edge_id(disc))
} else {
UNSET_LABEL_ID
}
}
pub fn encode(&self) -> Result<Vec<u8>, LabelRegistryError> {
let g = self
.inner
.read()
.map_err(|_| LabelRegistryError::LockPoisoned)?;
let entries: Vec<(LabelId, Namespace, &str)> = g
.by_id
.iter()
.enumerate()
.filter_map(|(i, slot)| {
slot.as_ref()
.map(|(ns, label)| (LabelId(i as u32), *ns, label.as_str()))
})
.collect();
let mut buf = Vec::with_capacity(4 + entries.len() * 16);
buf.extend_from_slice(&(entries.len() as u32).to_le_bytes());
for (id, ns, label) in entries {
buf.extend_from_slice(&id.0.to_le_bytes());
buf.push(ns as u8);
let bytes = label.as_bytes();
buf.extend_from_slice(&(bytes.len() as u16).to_le_bytes());
buf.extend_from_slice(bytes);
}
Ok(buf)
}
pub fn decode(data: &[u8]) -> Result<Self, LabelRegistryError> {
let reg = Self::empty();
if data.len() < 4 {
return Err(LabelRegistryError::Malformed {
offset: 0,
reason: "header truncated",
});
}
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let mut off = 4;
for _ in 0..count {
if data.len() < off + 7 {
return Err(LabelRegistryError::Malformed {
offset: off,
reason: "entry header truncated",
});
}
let id = u32::from_le_bytes([data[off], data[off + 1], data[off + 2], data[off + 3]]);
let ns = Namespace::from_u8(data[off + 4]).ok_or(LabelRegistryError::Malformed {
offset: off + 4,
reason: "unknown namespace",
})?;
let len = u16::from_le_bytes([data[off + 5], data[off + 6]]) as usize;
off += 7;
if data.len() < off + len {
return Err(LabelRegistryError::Malformed {
offset: off,
reason: "label bytes truncated",
});
}
let label = std::str::from_utf8(&data[off..off + len]).map_err(|_| {
LabelRegistryError::Malformed {
offset: off,
reason: "label not utf8",
}
})?;
reg.intern_with_id(ns, label, LabelId(id))?;
off += len;
}
if let Ok(mut g) = reg.inner.write() {
let max_id = g
.by_id
.iter()
.enumerate()
.filter_map(|(i, slot)| slot.as_ref().map(|_| i as u32))
.max()
.unwrap_or(0);
g.next_id = max_id.saturating_add(1).max(FIRST_USER_LABEL_ID);
}
Ok(reg)
}
fn intern_with_id(
&self,
ns: Namespace,
label: &str,
id: LabelId,
) -> Result<(), LabelRegistryError> {
if label.len() > MAX_LABEL_LEN {
return Err(LabelRegistryError::LabelTooLong {
len: label.len(),
max: MAX_LABEL_LEN,
});
}
let mut g = self
.inner
.write()
.map_err(|_| LabelRegistryError::LockPoisoned)?;
let idx = id.0 as usize;
if g.by_id.len() <= idx {
g.by_id.resize(idx + 1, None);
}
let key = (ns, label.to_string());
if let Some(existing) = &g.by_id[idx] {
if existing != &key {
return Err(LabelRegistryError::Malformed {
offset: 0,
reason: "id collision with different label",
});
}
return Ok(());
}
g.by_id[idx] = Some(key.clone());
g.by_label.insert(key, id);
Ok(())
}
}
impl Default for LabelRegistry {
fn default() -> Self {
Self::with_legacy_seed()
}
}
const LEGACY_NODE_LABELS: &[(u8, &str)] = &[
(0, "host"),
(1, "service"),
(2, "credential"),
(3, "vulnerability"),
(4, "endpoint"),
(5, "technology"),
(6, "user"),
(7, "domain"),
(8, "certificate"),
];
const LEGACY_EDGE_LABELS: &[(u8, &str)] = &[
(0, "has_service"),
(1, "has_endpoint"),
(2, "uses_tech"),
(3, "auth_access"),
(4, "affected_by"),
(5, "contains"),
(6, "connects_to"),
(7, "related_to"),
(8, "has_user"),
(9, "has_cert"),
];
fn legacy_node_id(disc: u8) -> u32 {
1 + disc as u32
}
fn legacy_edge_id(disc: u8) -> u32 {
10 + disc as u32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_registry_has_no_entries_but_sentinel_resolves_to_none() {
let r = LabelRegistry::empty();
assert!(r.is_empty());
assert_eq!(r.resolve(UNSET_LABEL_ID), None);
assert_eq!(r.lookup(Namespace::Node, "anything"), None);
}
#[test]
fn intern_is_idempotent() {
let r = LabelRegistry::empty();
let a = r.intern(Namespace::Node, "order").unwrap();
let b = r.intern(Namespace::Node, "order").unwrap();
assert_eq!(a, b);
assert_eq!(r.len(), 1);
}
#[test]
fn namespaces_are_independent() {
let r = LabelRegistry::empty();
let n = r.intern(Namespace::Node, "host").unwrap();
let e = r.intern(Namespace::Edge, "host").unwrap();
assert_ne!(
n, e,
"same label in different namespaces must get distinct ids"
);
assert_eq!(r.label_of(Namespace::Node, n).as_deref(), Some("host"));
assert_eq!(r.label_of(Namespace::Edge, e).as_deref(), Some("host"));
assert_eq!(r.label_of(Namespace::Node, e), None);
}
#[test]
fn legacy_seed_populates_reserved_range() {
let r = LabelRegistry::with_legacy_seed();
let host_id = r.lookup(Namespace::Node, "host").unwrap();
assert_eq!(host_id, LabelId(1));
assert_eq!(LabelRegistry::legacy_node_label_id(0), host_id);
let edge_id = r.lookup(Namespace::Edge, "has_service").unwrap();
assert_eq!(edge_id, LabelId(10));
assert_eq!(LabelRegistry::legacy_edge_label_id(0), edge_id);
}
#[test]
fn user_labels_start_at_first_user_id() {
let r = LabelRegistry::with_legacy_seed();
let id = r.intern(Namespace::Node, "order").unwrap();
assert_eq!(id, LabelId(FIRST_USER_LABEL_ID));
let id2 = r.intern(Namespace::Node, "product").unwrap();
assert_eq!(id2, LabelId(FIRST_USER_LABEL_ID + 1));
}
#[test]
fn round_trip_encode_decode() {
let r = LabelRegistry::with_legacy_seed();
r.intern(Namespace::Node, "order").unwrap();
r.intern(Namespace::Node, "product").unwrap();
r.intern(Namespace::Edge, "purchased").unwrap();
let bytes = r.encode().unwrap();
let restored = LabelRegistry::decode(&bytes).unwrap();
assert_eq!(restored.len(), r.len());
assert_eq!(
restored.lookup(Namespace::Node, "order"),
r.lookup(Namespace::Node, "order")
);
assert_eq!(
restored.lookup(Namespace::Edge, "purchased"),
r.lookup(Namespace::Edge, "purchased")
);
let new_id = restored.intern(Namespace::Node, "shipment").unwrap();
let prior_max = r.lookup(Namespace::Node, "product").unwrap();
assert!(new_id.0 > prior_max.0);
}
#[test]
fn decode_rejects_truncated_input() {
let bad = vec![0xff, 0xff, 0xff];
assert!(matches!(
LabelRegistry::decode(&bad),
Err(LabelRegistryError::Malformed { .. })
));
}
#[test]
fn decode_rejects_invalid_namespace() {
let mut bad = Vec::new();
bad.extend_from_slice(&1u32.to_le_bytes());
bad.extend_from_slice(&64u32.to_le_bytes());
bad.push(99);
bad.extend_from_slice(&4u16.to_le_bytes());
bad.extend_from_slice(b"test");
let err = LabelRegistry::decode(&bad).unwrap_err();
assert!(matches!(err, LabelRegistryError::Malformed { .. }));
}
#[test]
fn label_too_long_is_rejected() {
let r = LabelRegistry::empty();
let big = "x".repeat(MAX_LABEL_LEN + 1);
assert!(matches!(
r.intern(Namespace::Node, &big),
Err(LabelRegistryError::LabelTooLong { .. })
));
}
#[test]
fn concurrent_intern_yields_consistent_ids() {
use std::sync::Arc;
use std::thread;
let r = Arc::new(LabelRegistry::empty());
let handles: Vec<_> = (0..16)
.map(|i| {
let r = Arc::clone(&r);
thread::spawn(move || {
let mut ids = Vec::new();
for j in 0..50 {
let label = format!("label_{}_{}", i % 4, j);
ids.push(r.intern(Namespace::Node, &label).unwrap());
}
ids
})
})
.collect();
for h in handles {
h.join().unwrap();
}
let mut seen_ids = std::collections::HashSet::new();
for i in 0..4 {
for j in 0..50 {
let label = format!("label_{}_{}", i, j);
let id = r.lookup(Namespace::Node, &label).unwrap();
assert!(seen_ids.insert(id), "duplicate id {:?} for {}", id, label);
}
}
assert_eq!(seen_ids.len(), 200);
}
#[test]
fn unset_id_never_resolves() {
let r = LabelRegistry::with_legacy_seed();
assert!(UNSET_LABEL_ID.is_unset());
assert_eq!(r.resolve(UNSET_LABEL_ID), None);
assert_eq!(r.label_of(Namespace::Node, UNSET_LABEL_ID), None);
}
}