use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::hash::Hash;
use crate::position::TermPos;
simple_counter::generate_counter!(GeneratedCounter, usize);
static INTERNER: Lazy<interner::Interner> = Lazy::new(interner::Interner::new);
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
#[serde(into = "String", from = "String")]
pub struct Ident {
symbol: interner::Symbol,
pub pos: TermPos,
generated: bool,
}
impl Ident {
pub fn new_with_pos(label: impl AsRef<str>, pos: TermPos) -> Self {
let generated = label.as_ref().starts_with(GEN_PREFIX);
Self {
symbol: INTERNER.intern(label),
pos,
generated,
}
}
pub fn new(label: impl AsRef<str>) -> Self {
Self::new_with_pos(label, TermPos::None)
}
pub fn fresh() -> Self {
Self::new(format!("{}{}", GEN_PREFIX, GeneratedCounter::next()))
}
pub fn label(&self) -> &str {
INTERNER.lookup(self.symbol)
}
pub fn into_label(self) -> String {
self.label().to_owned()
}
}
pub const GEN_PREFIX: char = '%';
impl PartialOrd for Ident {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.label().partial_cmp(other.label())
}
}
impl Ord for Ident {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.label().cmp(other.label())
}
}
impl PartialEq for Ident {
fn eq(&self, other: &Self) -> bool {
self.symbol == other.symbol
}
}
impl Eq for Ident {}
impl Hash for Ident {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.symbol.hash(state);
}
}
impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.label())
}
}
impl<F> From<F> for Ident
where
String: From<F>,
{
fn from(val: F) -> Self {
Self::new(String::from(val))
}
}
#[allow(clippy::from_over_into)]
impl Into<String> for Ident {
fn into(self) -> String {
self.into_label()
}
}
impl Ident {
pub fn is_generated(&self) -> bool {
self.generated
}
}
impl AsRef<str> for Ident {
fn as_ref(&self) -> &str {
self.label()
}
}
mod interner {
use std::collections::HashMap;
use std::sync::{Mutex, RwLock};
use typed_arena::Arena;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Symbol(u32);
pub(crate) struct Interner<'a>(RwLock<InnerInterner<'a>>);
impl<'a> Interner<'a> {
pub(crate) fn new() -> Self {
Self(RwLock::new(InnerInterner::new()))
}
pub(crate) fn intern(&self, string: impl AsRef<str>) -> Symbol {
self.0.write().unwrap().intern(string)
}
pub(crate) fn lookup(&self, sym: Symbol) -> &str {
unsafe { std::mem::transmute(self.0.read().unwrap().lookup(sym)) }
}
}
struct InnerInterner<'a> {
arena: Mutex<Arena<u8>>,
map: HashMap<&'a str, Symbol>,
vec: Vec<&'a str>,
}
impl<'a> InnerInterner<'a> {
fn new() -> Self {
Self {
arena: Mutex::new(Arena::new()),
map: HashMap::new(),
vec: Vec::new(),
}
}
fn intern(&mut self, string: impl AsRef<str>) -> Symbol {
if let Some(sym) = self.map.get(string.as_ref()) {
return *sym;
}
let in_string = unsafe {
std::mem::transmute(self.arena.lock().unwrap().alloc_str(string.as_ref()))
};
let sym = Symbol(self.vec.len() as u32);
self.vec.push(in_string);
self.map.insert(in_string, sym);
sym
}
fn lookup(&self, sym: Symbol) -> &str {
self.vec[sym.0 as usize]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_intern_then_lookup() {
let interner = Interner::new();
let test_string = "test_string";
let sym = interner.intern(test_string);
assert_eq!(interner.lookup(sym), test_string);
}
#[test]
fn test_intern_twice_has_same_symbol() {
let interner = Interner::new();
let test_string = "test_string";
let sym1 = interner.intern(test_string);
let sym2 = interner.intern(test_string);
assert_eq!(sym1, sym2);
}
#[test]
fn test_intern_two_different_has_different_symbols() {
let interner = Interner::new();
let sym1 = interner.intern("a");
let sym2 = interner.intern("b");
assert_ne!(sym1, sym2);
}
#[test]
fn test_large_number_of_interns() {
let interner = Interner::new();
for i in 0..10000 {
let i = i.to_string();
let sym = interner.intern(&i);
assert_eq!(i, interner.lookup(sym));
}
assert_eq!(10000, interner.0.read().unwrap().map.len());
assert_eq!(10000, interner.0.read().unwrap().vec.len());
for i in 0..10000 {
let i = i.to_string();
let sym = interner.intern(&i);
assert_eq!(i, interner.lookup(sym));
}
assert_eq!(10000, interner.0.read().unwrap().map.len());
assert_eq!(10000, interner.0.read().unwrap().vec.len());
}
}
}