use crate::ids::NameId;
use std::cell::RefCell;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
#[derive(Debug)]
struct Entry {
hash: u64,
next: i32,
text: Box<str>,
}
#[derive(Debug)]
pub struct NameTable {
entries: RefCell<Vec<Entry>>,
buckets: RefCell<Vec<i32>>,
}
impl NameTable {
const INITIAL_BUCKETS: usize = 256;
pub fn new() -> Self {
let table = Self {
entries: RefCell::new(Vec::with_capacity(Self::INITIAL_BUCKETS)),
buckets: RefCell::new(vec![-1; Self::INITIAL_BUCKETS]),
};
table.add("");
table.add(XS_NAMESPACE);
table.add(XSI_NAMESPACE);
table.add(XML_NAMESPACE);
table.add(XMLNS_NAMESPACE);
table.add("xs");
table.add("xsd");
table.add("xsi");
table.add("xml");
table.add("xmlns");
table.add("type");
table.add("nil");
table.add("schemaLocation");
table.add("noNamespaceSchemaLocation");
table
}
pub fn add(&self, value: &str) -> NameId {
let hash = Self::hash_str(value);
if let Some(id) = self.find(value, hash) {
return id;
}
self.insert(value, hash)
}
pub fn get(&self, value: &str) -> Option<NameId> {
let hash = Self::hash_str(value);
self.find(value, hash)
}
pub fn resolve_ref(&self, id: NameId) -> &str {
unsafe { &(&(*self.entries.as_ptr()))[id.0 as usize].text }
}
pub fn try_resolve_ref(&self, id: NameId) -> Option<&str> {
unsafe {
(&(*self.entries.as_ptr()))
.get(id.0 as usize)
.map(|e| e.text.as_ref())
}
}
pub fn resolve(&self, id: NameId) -> String {
self.resolve_ref(id).to_string()
}
pub fn try_resolve(&self, id: NameId) -> Option<String> {
self.try_resolve_ref(id).map(|s| s.to_string())
}
pub fn len(&self) -> usize {
self.entries.borrow().len()
}
pub fn is_empty(&self) -> bool {
self.entries.borrow().is_empty()
}
fn hash_str(value: &str) -> u64 {
let mut hasher = DefaultHasher::new();
value.hash(&mut hasher);
hasher.finish()
}
fn find(&self, value: &str, hash: u64) -> Option<NameId> {
let entries = self.entries.borrow();
let buckets = self.buckets.borrow();
let bucket_idx = (hash as usize) % buckets.len();
let mut entry_idx = buckets[bucket_idx];
while entry_idx >= 0 {
let entry = &entries[entry_idx as usize];
if entry.hash == hash && entry.text.as_ref() == value {
return Some(NameId(entry_idx as u32));
}
entry_idx = entry.next;
}
None
}
fn insert(&self, value: &str, hash: u64) -> NameId {
let mut entries = self.entries.borrow_mut();
let mut buckets = self.buckets.borrow_mut();
if entries.len() >= buckets.len() {
Self::rehash_internal(&mut entries, &mut buckets);
}
let id = NameId(entries.len() as u32);
let bucket_idx = (hash as usize) % buckets.len();
let next = buckets[bucket_idx];
entries.push(Entry {
hash,
next,
text: value.into(),
});
buckets[bucket_idx] = id.0 as i32;
id
}
#[allow(clippy::ptr_arg)]
fn rehash_internal(entries: &mut Vec<Entry>, buckets: &mut Vec<i32>) {
let new_size = buckets.len() * 2;
*buckets = vec![-1; new_size];
for (idx, entry) in entries.iter_mut().enumerate() {
let bucket_idx = (entry.hash as usize) % new_size;
entry.next = buckets[bucket_idx];
buckets[bucket_idx] = idx as i32;
}
}
}
impl Default for NameTable {
fn default() -> Self {
Self::new()
}
}
pub const XS_NAMESPACE: &str = "http://www.w3.org/2001/XMLSchema";
pub const XSI_NAMESPACE: &str = "http://www.w3.org/2001/XMLSchema-instance";
pub const XML_NAMESPACE: &str = "http://www.w3.org/XML/1998/namespace";
pub const XMLNS_NAMESPACE: &str = "http://www.w3.org/2000/xmlns/";
pub mod well_known {
use crate::ids::NameId;
pub const EMPTY: NameId = NameId(0);
pub const XS_NAMESPACE: NameId = NameId(1);
pub const XSI_NAMESPACE: NameId = NameId(2);
pub const XML_NAMESPACE: NameId = NameId(3);
pub const XMLNS_NAMESPACE: NameId = NameId(4);
pub const XS_PREFIX: NameId = NameId(5);
pub const XSD_PREFIX: NameId = NameId(6);
pub const XSI_PREFIX: NameId = NameId(7);
pub const XML_PREFIX: NameId = NameId(8);
pub const XMLNS_PREFIX: NameId = NameId(9);
pub const XSI_TYPE: NameId = NameId(10);
pub const XSI_NIL: NameId = NameId(11);
pub const XSI_SCHEMA_LOCATION: NameId = NameId(12);
pub const XSI_NO_NAMESPACE_SCHEMA_LOCATION: NameId = NameId(13);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_string_is_id_zero() {
let table = NameTable::new();
assert_eq!(table.resolve(NameId(0)), "");
}
#[test]
fn test_add_and_resolve() {
let table = NameTable::new();
let id = table.add("hello");
assert_eq!(table.resolve(id), "hello");
}
#[test]
fn test_deduplication() {
let table = NameTable::new();
let id1 = table.add("hello");
let id2 = table.add("hello");
assert_eq!(id1, id2);
}
#[test]
fn test_different_strings_different_ids() {
let table = NameTable::new();
let id1 = table.add("hello");
let id2 = table.add("world");
assert_ne!(id1, id2);
}
#[test]
fn test_get_existing() {
let table = NameTable::new();
let id = table.add("test");
assert_eq!(table.get("test"), Some(id));
}
#[test]
fn test_get_nonexistent() {
let table = NameTable::new();
assert_eq!(table.get("nonexistent"), None);
}
#[test]
fn test_well_known_namespaces() {
let table = NameTable::new();
assert_eq!(table.resolve(well_known::XS_NAMESPACE), XS_NAMESPACE);
assert_eq!(table.resolve(well_known::XSI_NAMESPACE), XSI_NAMESPACE);
assert_eq!(table.resolve(well_known::XML_NAMESPACE), XML_NAMESPACE);
assert_eq!(table.resolve(well_known::XMLNS_NAMESPACE), XMLNS_NAMESPACE);
}
#[test]
fn test_well_known_prefixes() {
let table = NameTable::new();
assert_eq!(table.resolve(well_known::XS_PREFIX), "xs");
assert_eq!(table.resolve(well_known::XSD_PREFIX), "xsd");
assert_eq!(table.resolve(well_known::XSI_PREFIX), "xsi");
assert_eq!(table.resolve(well_known::XML_PREFIX), "xml");
}
#[test]
fn test_well_known_xsi_local_names() {
let table = NameTable::new();
assert_eq!(table.resolve(well_known::XSI_TYPE), "type");
assert_eq!(table.resolve(well_known::XSI_NIL), "nil");
assert_eq!(
table.resolve(well_known::XSI_SCHEMA_LOCATION),
"schemaLocation"
);
assert_eq!(
table.resolve(well_known::XSI_NO_NAMESPACE_SCHEMA_LOCATION),
"noNamespaceSchemaLocation"
);
}
#[test]
fn test_rehashing() {
let table = NameTable::new();
for i in 0..1000 {
let s = format!("string_{}", i);
table.add(&s);
}
for i in 0..1000 {
let s = format!("string_{}", i);
assert!(table.get(&s).is_some(), "Failed to find: {}", s);
}
}
#[test]
fn test_unicode_strings() {
let table = NameTable::new();
let id1 = table.add("日本語");
let id2 = table.add("日本語");
assert_eq!(id1, id2);
assert_eq!(table.resolve(id1), "日本語");
}
#[test]
fn test_interior_mutability() {
let table = NameTable::new();
let table_ref = &table;
let id1 = table_ref.add("through_ref");
let id2 = table_ref.add("through_ref");
assert_eq!(id1, id2);
assert_eq!(table.resolve(id1), "through_ref");
}
#[test]
fn test_resolve_ref_basic() {
let table = NameTable::new();
let id = table.add("hello");
assert_eq!(table.resolve_ref(id), "hello");
assert_eq!(table.resolve_ref(NameId(0)), "");
}
#[test]
fn test_try_resolve_ref_basic() {
let table = NameTable::new();
let id = table.add("test");
assert_eq!(table.try_resolve_ref(id), Some("test"));
assert_eq!(table.try_resolve_ref(NameId(9999)), None);
}
#[test]
fn test_resolve_ref_stability_across_add() {
let table = NameTable::new();
let id = table.add("stable_string");
let resolved: &str = table.resolve_ref(id);
for i in 0..500 {
table.add(&format!("extra_{i}"));
}
assert_eq!(resolved, "stable_string");
}
#[test]
fn test_try_resolve_ref_stability_across_add() {
let table = NameTable::new();
let id = table.add("test_str");
let resolved: Option<&str> = table.try_resolve_ref(id);
for i in 0..500 {
table.add(&format!("filler_{i}"));
}
assert_eq!(resolved, Some("test_str"));
}
}