use lazy_static::lazy_static;
use std::collections::HashMap;
use std::sync::{Arc, Mutex, Once, RwLock};
lazy_static! {
pub static ref GLOBAL_STRING_POOL: GlobalStringPool = GlobalStringPool::new();
}
#[derive(Debug)]
pub struct GlobalStringPool {
pool: RwLock<StringPoolMut>,
}
impl GlobalStringPool {
pub fn new() -> Self {
Self {
pool: RwLock::new(StringPoolMut {
strings: Vec::new(),
hash_map: HashMap::new(),
}),
}
}
pub fn get_or_insert(&self, s: &str) -> u32 {
if let Ok(read_pool) = self.pool.read() {
if let Some(&idx) = read_pool.hash_map.get(s) {
return idx;
}
}
if let Ok(mut write_pool) = self.pool.write() {
if let Some(&idx) = write_pool.hash_map.get(s) {
return idx;
}
let idx = write_pool.strings.len() as u32;
let arc_str: Arc<str> = Arc::from(s.to_owned());
write_pool.strings.push(arc_str.clone());
write_pool.hash_map.insert(arc_str, idx);
idx
} else {
0
}
}
pub fn get(&self, index: u32) -> Option<String> {
if let Ok(pool) = self.pool.read() {
pool.strings.get(index as usize).map(|s| s.to_string())
} else {
None
}
}
pub fn len(&self) -> usize {
if let Ok(pool) = self.pool.read() {
pool.strings.len()
} else {
0
}
}
pub fn add_strings(&self, strings: &[String]) -> Vec<u32> {
strings.iter().map(|s| self.get_or_insert(s)).collect()
}
}
#[derive(Debug, Clone)]
pub struct StringPool {
strings: Arc<Vec<Arc<str>>>,
hash_map: Arc<HashMap<Arc<str>, u32>>,
}
impl StringPool {
pub fn new() -> Self {
Self {
strings: Arc::new(Vec::new()),
hash_map: Arc::new(HashMap::new()),
}
}
pub fn from_strings(strings: Vec<String>) -> Self {
let indices = GLOBAL_STRING_POOL.add_strings(&strings);
let mut pool = Self::new_mut();
for (idx, s) in indices.iter().zip(strings.iter()) {
let arc_str: Arc<str> = Arc::from(s.to_owned());
pool.hash_map.insert(arc_str.clone(), *idx);
if idx >= &(pool.strings.len() as u32) {
while pool.strings.len() <= *idx as usize {
pool.strings.push(arc_str.clone());
}
} else {
pool.strings[*idx as usize] = arc_str;
}
}
pool.freeze()
}
pub fn from_strings_legacy(strings: Vec<String>) -> Self {
let mut pool = Self::new_mut();
for s in strings {
pool.get_or_insert(&s);
}
pool.freeze()
}
fn new_mut() -> StringPoolMut {
StringPoolMut {
strings: Vec::new(),
hash_map: HashMap::new(),
}
}
pub fn len(&self) -> usize {
self.strings.len()
}
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
pub fn get(&self, index: u32) -> Option<&str> {
self.strings.get(index as usize).map(|s| s.as_ref())
}
pub fn find(&self, s: &str) -> Option<u32> {
self.hash_map.get(s).copied()
}
pub fn all_strings(&self) -> Vec<&str> {
self.strings.iter().map(|s| s.as_ref()).collect()
}
pub fn indices_to_strings(&self, indices: &[u32]) -> Vec<String> {
indices
.iter()
.map(|&idx| self.get(idx).unwrap_or("").to_string())
.collect()
}
pub fn merge(&self, other: &Self) -> Self {
let mut merged = Self::new_mut();
for s in self.all_strings() {
merged.get_or_insert(s);
}
for s in other.all_strings() {
merged.get_or_insert(s);
}
merged.freeze()
}
}
#[derive(Debug)]
struct StringPoolMut {
strings: Vec<Arc<str>>,
hash_map: HashMap<Arc<str>, u32>,
}
impl StringPoolMut {
fn get_or_insert(&mut self, s: &str) -> u32 {
let arc_str: Arc<str> = s.into();
if let Some(&index) = self.hash_map.get(&arc_str) {
return index;
}
let index = self.strings.len() as u32;
self.strings.push(arc_str.clone());
self.hash_map.insert(arc_str, index);
index
}
fn freeze(self) -> StringPool {
StringPool {
strings: Arc::new(self.strings),
hash_map: Arc::new(self.hash_map),
}
}
}