moore-common 0.1.0

The common core of the moore compiler framework.
Documentation
// Copyright (c) 2016-2017 Fabian Schuiki

//! A name table that internalizes all names presented to it and allows for them
//! to be referred to by a lightweight tag. This structure is heavily inspired
//! by the interner used in the Rust compiler.

use std::borrow::Borrow;
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt;
use std::hash::Hash;
use std::ops::Deref;
use std::rc::Rc;
use rustc_serialize::{Encodable, Encoder, Decodable, Decoder};


/// A name is a lightweight 32 bit tag that refers to a string in a name table.
/// During parsing, encountered strings are inserted into the name table and
/// only the corresponding tag is kept in the token. Names which have their most
/// significant bit set represent case sensitive names, such as for extended
/// identifiers.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Name(pub u32);

impl Name {
	/// Check if the name is case sensitive.
	pub fn is_case_sensitive(&self) -> bool {
		self.0 & 1 == 1
	}

	/// Return the string representation of this name.
	pub fn as_str(self) -> RcStr {
		get_name_table().get(self)
	}
}

impl fmt::Debug for Name {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		write!(f, "{}({})", self, self.0)
	}
}

impl fmt::Display for Name {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		fmt::Display::fmt(&self.as_str(), f)
	}
}

impl Encodable for Name {
	fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
		s.emit_bool(self.is_case_sensitive())?;
		s.emit_str(self.as_str().borrow())?;
		Ok(())
	}
}

impl Decodable for Name {
	fn decode<S: Decoder>(s: &mut S) -> Result<Name, S::Error> {
		let case = s.read_bool()?;
		let name = s.read_str()?;
		Ok(get_name_table().intern(&name, case))
	}
}

impl Into<String> for Name {
	fn into(self) -> String {
		self.as_str().into()
	}
}



/// A reference-counted string that acts like a regular str slice, hiding the
/// fact that it is wrapped in Rc<>.
#[derive(Clone, PartialEq, Hash, PartialOrd)]
pub struct RcStr(Rc<String>);

impl RcStr {
	/// Create a new ref-counted string which is a copy of `value`.
	pub fn new(value: &str) -> RcStr {
		RcStr(Rc::new(value.to_string()))
	}

	/// Create a new ref-counted string that contains `value`, without
	/// allocating any new storage.
	pub fn from(value: String) -> RcStr {
		RcStr(Rc::new(value))
	}
}

impl Eq for RcStr {}

impl Ord for RcStr {
	fn cmp(&self, other: &RcStr) -> Ordering {
		self[..].cmp(&other[..])
	}
}

impl fmt::Debug for RcStr {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		self[..].fmt(f)
	}
}

impl fmt::Display for RcStr {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		self[..].fmt(f)
	}
}

impl Borrow<str> for RcStr {
	fn borrow(&self) -> &str {
		&self.0[..]
	}
}

impl Deref for RcStr {
	type Target = str;
	fn deref(&self) -> &str {
		&self.0[..]
	}
}

impl Into<String> for RcStr {
	fn into(self) -> String {
		(*self.0).clone()
	}
}



/// A lookup table of names. Internalizes strings either in a case sensitive or
/// case insensitive way. Allows for bidirectional lookup, i.e. by string or by
/// assigned name.
pub struct NameTable {
	map: RefCell<HashMap<RcStr, Name>>,
	vect: RefCell<Vec<RcStr>>,
}

impl NameTable {
	/// Create a new empty name table.
	pub fn new() -> NameTable {
		NameTable {
			map: RefCell::new(HashMap::new()),
			vect: RefCell::new(Vec::new()),
		}
	}

	/// Obtain a name for a string. This either inserts the string into the
	/// table and returns the new name, or returns the existing name if the
	/// string already exists in the table.
	pub fn intern(&self, value: &str, case_sensitive: bool) -> Name {
		let mut map = self.map.borrow_mut();
		if let Some(&idx) = map.get(value) {
			return idx;
		}

		// Since the name is not present in the table yet, we allocate a new idx
		// for it. Also, if it is a case-insensitive name, we insert both its
		// original form as well as its lowercase form into the lookup table.
		let mut vect = self.vect.borrow_mut();
		if case_sensitive {
			let new_idx = Name((vect.len() as u32) << 1 | 1);
			let v = RcStr::new(value);
			map.insert(v.clone(), new_idx);
			vect.push(v);
			new_idx
		} else {
			let new_idx = Name((vect.len() as u32) << 1 | 0);
			let lower = value.to_lowercase();
			if let Some(&idx) = map.get(lower.as_str()) {
				return idx;
			}
			let v = RcStr::new(value);
			map.insert(RcStr::from(lower), new_idx);
			map.insert(v.clone(), new_idx);
			vect.push(v);
			new_idx
		}
	}

	/// Retrieve the string given a name tag.
	pub fn get(&self, idx: Name) -> RcStr {
		(*self.vect.borrow())[(idx.0 >> 1) as usize].clone()
	}

	/// Try to find a string.
	pub fn find<Q: ?Sized>(&self, value: &Q) -> Option<Name>
	where RcStr: Borrow<Q>, Q: Eq + Hash {
		(*self.map.borrow()).get(value).map(|v| *v)
	}
}

/// Get this thread's current name table.
pub fn get_name_table() -> Rc<NameTable> {
	thread_local!(static TBL: Rc<NameTable> = {
		let nt = NameTable::new();
		// token::prefill_name_table(&mut nt);
		Rc::new(nt)
	});
	TBL.with(|x| x.clone())
}