use std::collections::HashMap;
use std::sync::Arc;
use dashmap::DashMap;
use sonic_rs::JsonContainerTrait as _;
pub struct FieldInterner {
table: DashMap<Arc<str>, ()>,
}
impl FieldInterner {
#[must_use]
pub fn new() -> Self {
Self {
table: DashMap::new(),
}
}
#[must_use]
pub fn with_known_fields(fields: &[&str]) -> Self {
let interner = Self::new();
for f in fields {
let _ = interner.intern(f);
}
interner
}
#[inline]
#[must_use]
pub fn intern(&self, name: &str) -> Arc<str> {
if let Some(entry) = self.table.get(name) {
return Arc::clone(entry.key());
}
let key: Arc<str> = Arc::from(name);
self.table.entry(Arc::clone(&key)).or_insert(());
if let Some(entry) = self.table.get(name) {
Arc::clone(entry.key())
} else {
key
}
}
#[must_use]
pub fn extract_known(&self, value: &sonic_rs::Value) -> HashMap<Arc<str>, sonic_rs::Value> {
let mut extracted = HashMap::new();
if let Some(obj) = value.as_object() {
for (key, val) in obj {
if let Some(entry) = self.table.get(key) {
let v: sonic_rs::Value = val.clone();
extracted.insert(Arc::clone(entry.key()), v);
}
}
}
extracted
}
#[must_use]
pub fn len(&self) -> usize {
self.table.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.table.is_empty()
}
}
impl Default for FieldInterner {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use std::thread;
use sonic_rs::JsonValueTrait as _;
use super::*;
#[test]
fn intern_returns_same_arc_for_same_string() {
let interner = FieldInterner::new();
let a = interner.intern("_table");
let b = interner.intern("_table");
assert!(Arc::ptr_eq(&a, &b), "expected same Arc for '_table'");
}
#[test]
fn intern_returns_different_arcs_for_different_strings() {
let interner = FieldInterner::new();
let a = interner.intern("_table");
let b = interner.intern("_timestamp");
assert!(!Arc::ptr_eq(&a, &b));
assert_eq!(a.as_ref(), "_table");
assert_eq!(b.as_ref(), "_timestamp");
}
#[test]
fn with_known_fields_prepopulates_table() {
let fields = ["_table", "_timestamp", "host"];
let interner = FieldInterner::with_known_fields(&fields);
assert_eq!(interner.len(), 3);
let a = interner.intern("_table");
let b = interner.intern("_table");
assert!(Arc::ptr_eq(&a, &b));
}
#[test]
fn extract_known_extracts_matching_fields() {
let interner = FieldInterner::with_known_fields(&["_table", "host"]);
let value: sonic_rs::Value =
sonic_rs::from_str(r#"{"_table": "events", "host": "web1", "unknown": 42}"#).unwrap();
let extracted = interner.extract_known(&value);
assert_eq!(extracted.len(), 2);
let table_key: Arc<str> = interner.intern("_table");
let host_key: Arc<str> = interner.intern("host");
assert_eq!(
extracted.get(&table_key).and_then(|v| v.as_str()),
Some("events")
);
assert_eq!(
extracted.get(&host_key).and_then(|v| v.as_str()),
Some("web1")
);
let unknown_key: Arc<str> = Arc::from("unknown");
assert!(!extracted.contains_key(&unknown_key));
}
#[test]
fn extract_known_ignores_unknown_fields() {
let interner = FieldInterner::with_known_fields(&["_table"]);
let value: sonic_rs::Value = sonic_rs::from_str(r#"{"foo": 1, "bar": 2}"#).unwrap();
let extracted = interner.extract_known(&value);
assert!(extracted.is_empty(), "no known fields should be extracted");
}
#[test]
fn extract_known_on_non_object_returns_empty() {
let interner = FieldInterner::with_known_fields(&["_table"]);
let value: sonic_rs::Value = sonic_rs::from_str("[1, 2, 3]").unwrap();
let extracted = interner.extract_known(&value);
assert!(extracted.is_empty());
}
#[test]
fn concurrent_interning_deduplicates_correctly() {
use std::sync::Arc as StdArc;
let interner = StdArc::new(FieldInterner::new());
let field = "_table";
let num_threads = 8;
let handles: Vec<_> = (0..num_threads)
.map(|_| {
let interner = StdArc::clone(&interner);
thread::spawn(move || interner.intern(field))
})
.collect();
let arcs: Vec<Arc<str>> = handles.into_iter().map(|h| h.join().unwrap()).collect();
let first = &arcs[0];
for arc in &arcs[1..] {
assert!(
Arc::ptr_eq(first, arc),
"concurrent interning produced different Arc instances"
);
}
assert_eq!(interner.len(), 1);
}
#[test]
fn len_and_is_empty() {
let interner = FieldInterner::new();
assert!(interner.is_empty());
let _ = interner.intern("a");
assert_eq!(interner.len(), 1);
assert!(!interner.is_empty());
let _ = interner.intern("b");
assert_eq!(interner.len(), 2);
let _ = interner.intern("a");
assert_eq!(interner.len(), 2);
}
}