use crate::ids::NameId;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
#[derive(Clone, Copy, Debug)]
pub struct QNameAtom {
pub local_name: NameId,
pub namespace_uri: NameId,
pub prefix: NameId,
pub local_name_hash: u32,
pub qualified_name_idx: u32,
}
impl PartialEq for QNameAtom {
fn eq(&self, other: &Self) -> bool {
self.local_name == other.local_name
&& self.namespace_uri == other.namespace_uri
&& self.prefix == other.prefix
&& self.local_name_hash == other.local_name_hash
}
}
impl Eq for QNameAtom {}
impl Hash for QNameAtom {
fn hash<H: Hasher>(&self, state: &mut H) {
self.local_name.hash(state);
self.namespace_uri.hash(state);
}
}
pub const EMPTY_QNAME: QNameAtom = QNameAtom {
local_name: NameId(0),
namespace_uri: NameId(0),
prefix: NameId(0),
local_name_hash: 0,
qualified_name_idx: 0,
};
pub struct QNameTable {
atoms: Vec<QNameAtom>,
nexts: Vec<i32>,
buckets: Vec<i32>,
}
impl QNameTable {
const INITIAL_BUCKETS: usize = 64;
pub fn new() -> Self {
let mut atoms = Vec::with_capacity(Self::INITIAL_BUCKETS);
let mut nexts = Vec::with_capacity(Self::INITIAL_BUCKETS);
atoms.push(EMPTY_QNAME);
nexts.push(-1);
Self {
atoms,
nexts,
buckets: vec![-1; Self::INITIAL_BUCKETS],
}
}
pub fn atomize(&mut self, qname: QNameAtom) -> u32 {
let hash = Self::hash_atom(&qname);
let bucket_idx = (hash as usize) % self.buckets.len();
let mut entry_idx = self.buckets[bucket_idx];
while entry_idx >= 0 {
if self.atoms[entry_idx as usize] == qname {
return entry_idx as u32;
}
entry_idx = self.nexts[entry_idx as usize];
}
if self.atoms.len() >= self.buckets.len() {
self.rehash();
}
let new_idx = self.atoms.len() as u32;
let bucket_idx = (hash as usize) % self.buckets.len();
let head = self.buckets[bucket_idx];
self.atoms.push(qname);
self.nexts.push(head);
self.buckets[bucket_idx] = new_idx as i32;
new_idx
}
#[inline]
pub fn get(&self, idx: u32) -> QNameAtom {
self.atoms[idx as usize]
}
fn hash_atom(qname: &QNameAtom) -> u64 {
let mut hasher = DefaultHasher::new();
qname.local_name.hash(&mut hasher);
qname.namespace_uri.hash(&mut hasher);
qname.prefix.hash(&mut hasher);
qname.local_name_hash.hash(&mut hasher);
hasher.finish()
}
fn rehash(&mut self) {
let new_size = self.buckets.len() * 2;
self.buckets = vec![-1; new_size];
for n in self.nexts.iter_mut() {
*n = -1;
}
for idx in 1..self.atoms.len() {
let hash = Self::hash_atom(&self.atoms[idx]);
let bucket_idx = (hash as usize) % new_size;
self.nexts[idx] = self.buckets[bucket_idx];
self.buckets[bucket_idx] = idx as i32;
}
}
}
impl Default for QNameTable {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_qname(local: u32, ns: u32, prefix: u32, hash: u32) -> QNameAtom {
QNameAtom {
local_name: NameId(local),
namespace_uri: NameId(ns),
prefix: NameId(prefix),
local_name_hash: hash,
qualified_name_idx: 0,
}
}
#[test]
fn empty_qname_at_index_zero() {
let table = QNameTable::new();
assert_eq!(table.get(0), EMPTY_QNAME);
}
#[test]
fn dedup_identical_qnames() {
let mut table = QNameTable::new();
let q = make_qname(1, 2, 3, 100);
let idx1 = table.atomize(q);
let idx2 = table.atomize(q);
assert_eq!(idx1, idx2);
assert_eq!(idx1, 1); }
#[test]
fn different_qnames_different_indices() {
let mut table = QNameTable::new();
let q1 = make_qname(1, 2, 3, 100);
let q2 = make_qname(4, 5, 6, 200);
let idx1 = table.atomize(q1);
let idx2 = table.atomize(q2);
assert_ne!(idx1, idx2);
}
#[test]
fn different_prefix_different_entry() {
let mut table = QNameTable::new();
let q1 = make_qname(1, 2, 3, 100);
let q2 = make_qname(1, 2, 99, 100); let idx1 = table.atomize(q1);
let idx2 = table.atomize(q2);
assert_ne!(idx1, idx2, "Different prefix must produce a distinct entry");
}
#[test]
fn get_round_trip() {
let mut table = QNameTable::new();
let q = make_qname(10, 20, 30, 42);
let idx = table.atomize(q);
assert_eq!(table.get(idx), q);
}
#[test]
fn many_entries_trigger_rehash() {
let mut table = QNameTable::new();
let count = 1024;
let mut indices = Vec::with_capacity(count);
for i in 0..count as u32 {
let q = make_qname(i, i + 1000, i % 5, i.wrapping_mul(2654435761));
indices.push(table.atomize(q));
}
for i in 0..count as u32 {
let q = make_qname(i, i + 1000, i % 5, i.wrapping_mul(2654435761));
assert_eq!(table.get(indices[i as usize]), q);
}
for i in 0..count as u32 {
let q = make_qname(i, i + 1000, i % 5, i.wrapping_mul(2654435761));
assert_eq!(table.atomize(q), indices[i as usize]);
}
}
#[test]
fn dedup_ignores_qualified_name_idx() {
let mut table = QNameTable::new();
let q1 = QNameAtom {
local_name: NameId(1),
namespace_uri: NameId(2),
prefix: NameId(3),
local_name_hash: 100,
qualified_name_idx: 10,
};
let q2 = QNameAtom {
local_name: NameId(1),
namespace_uri: NameId(2),
prefix: NameId(3),
local_name_hash: 100,
qualified_name_idx: 99, };
let idx1 = table.atomize(q1);
let idx2 = table.atomize(q2);
assert_eq!(
idx1, idx2,
"Same identity fields must dedup despite different qualified_name_idx"
);
assert_eq!(table.get(idx1).qualified_name_idx, 10);
}
#[test]
fn hash_trait_excludes_prefix() {
let q1 = make_qname(1, 2, 3, 100);
let q2 = make_qname(1, 2, 99, 100);
let hash1 = {
let mut h = DefaultHasher::new();
q1.hash(&mut h);
h.finish()
};
let hash2 = {
let mut h = DefaultHasher::new();
q2.hash(&mut h);
h.finish()
};
assert_eq!(
hash1, hash2,
"Hash impl must ignore prefix (XML namespace identity)"
);
}
#[test]
fn hash_trait_differs_for_different_names() {
let q1 = make_qname(1, 2, 3, 100);
let q2 = make_qname(1, 99, 3, 100);
let hash1 = {
let mut h = DefaultHasher::new();
q1.hash(&mut h);
h.finish()
};
let hash2 = {
let mut h = DefaultHasher::new();
q2.hash(&mut h);
h.finish()
};
assert_ne!(
hash1, hash2,
"Hash should differ when namespace_uri differs"
);
}
}