use std::{
fs::File,
io::{self, BufRead, BufReader},
path::Path,
sync::{Arc, LazyLock},
};
use bimap::BiHashMap;
use encoding_rs::SHIFT_JIS;
use parking_lot::Mutex;
use thiserror::Error;
use super::HashCode;
#[derive(Debug, Default)]
pub struct HashCodeMap {
map: BiHashMap<HashCode, String>,
encoding: Option<Encoding>,
strict: bool,
}
impl HashCodeMap {
pub fn get() -> Arc<Mutex<Self>> {
static INSTANCE: LazyLock<Arc<Mutex<HashCodeMap>>> =
LazyLock::new(|| Arc::new(Mutex::new(HashCodeMap::default())));
INSTANCE.clone()
}
pub fn label_of(&self, hash: HashCode, width: Option<u32>) -> Option<&String> {
match width {
None => self.map.get_by_left(&hash),
Some(width) => {
let mask = u32::MAX >> (u32::BITS - width);
let hash = hash.into_raw() & mask;
self.map
.iter()
.find(|(k, _)| k.into_raw() & mask == hash)
.map(|(_, v)| v)
}
}
}
pub fn hash_of(&self, label: &str) -> Option<HashCode> {
self.map
.get_by_right(label)
.copied()
.or_else(|| (!self.strict).then(|| HashCode::from(label)))
}
pub fn extend_shift_jis<I: IntoIterator<Item = String>>(
&mut self,
iter: I,
) -> Result<(), ParseLabelError> {
self.extend(iter, Encoding::ShiftJis, encode_shift_jis)
}
pub fn extend_utf8<I: IntoIterator<Item = String>>(
&mut self,
iter: I,
) -> Result<(), ParseLabelError> {
self.extend(iter, Encoding::Utf8, encode_utf8)
}
fn extend<I, F>(
&mut self,
iter: I,
encoding: Encoding,
encode: F,
) -> Result<(), ParseLabelError>
where
I: IntoIterator<Item = String>,
F: Fn(String) -> Result<(HashCode, String), ParseLabelError>,
{
if *self.encoding.get_or_insert(encoding) != encoding {
return Err(ParseLabelError::InconsistentEncoding);
}
for label in iter {
let (hash, label) = encode(label)?;
self.map.insert(hash, label);
}
Ok(())
}
pub fn read_shift_jis<P: AsRef<Path>>(&mut self, path: P) -> Result<(), ParseLabelError> {
self.read(path, Encoding::ShiftJis, encode_shift_jis)
}
pub fn read_utf8<P: AsRef<Path>>(&mut self, path: P) -> Result<(), ParseLabelError> {
self.read(path, Encoding::Utf8, encode_utf8)
}
fn read<P, F>(&mut self, path: P, encoding: Encoding, encode: F) -> Result<(), ParseLabelError>
where
P: AsRef<Path>,
F: Fn(String) -> Result<(HashCode, String), ParseLabelError>,
{
if *self.encoding.get_or_insert(encoding) != encoding {
return Err(ParseLabelError::InconsistentEncoding);
}
let reader = BufReader::new(File::open(path)?);
let map = reader
.lines()
.map(|l| encode(l?))
.collect::<Result<BiHashMap<_, _>, _>>()?;
self.map.extend(map);
Ok(())
}
pub fn set_strict(&mut self, strict: bool) {
self.strict = strict;
}
pub fn clear(&mut self) {
self.map.clear();
}
pub fn reset(&mut self) {
self.clear();
self.encoding = None;
self.strict = false;
}
}
#[derive(Debug, Error)]
pub enum ParseLabelError {
#[error("the requested character encoding does not match the current character encoding")]
InconsistentEncoding,
#[error(transparent)]
Io(#[from] io::Error),
#[error("label contains Shift JIS errors")]
EncodeShiftJis,
}
#[derive(Debug, Clone, Copy, PartialEq)]
enum Encoding {
ShiftJis,
Utf8,
}
fn encode_shift_jis(label: String) -> Result<(HashCode, String), ParseLabelError> {
let (encoded, _, is_error) = SHIFT_JIS.encode(&label);
if is_error {
return Err(ParseLabelError::EncodeShiftJis);
}
Ok((HashCode::from(encoded.as_ref()), label))
}
fn encode_utf8(label: String) -> Result<(HashCode, String), ParseLabelError> {
Ok((HashCode::from(&label), label))
}