sphinx/runtime/
strings.rs

1use core::fmt;
2use core::cmp;
3use core::hash::{Hash, Hasher};
4use crate::runtime::gc::{Gc, GcTrace};
5use crate::runtime::errors::ExecResult;
6
7pub mod intern;
8pub mod buffer;
9
10pub use intern::{StringSymbol, StringInterner, static_symbol, STRING_TABLE};
11pub use buffer::StrBuffer;
12
13use intern::StringTable;
14
15
16// need to build on a 32-bit machine to find out what will fit
17#[cfg(target_pointer_width = "32")]
18pub type InlineStr = StrBuffer<4>;
19
20#[cfg(target_pointer_width = "64")]
21pub type InlineStr = StrBuffer<14>;
22
23#[derive(Debug, Clone, Copy)]
24pub enum StringValue {
25    Intern(StringSymbol),
26    Inline(InlineStr),
27    Gc(Gc<str>),
28}
29
30unsafe impl GcTrace for str {
31    fn trace(&self) { }
32    
33    fn size_hint(&self) -> usize {
34        self.len()
35    }
36}
37
38impl From<StringSymbol> for StringValue {
39    fn from(symbol: StringSymbol) -> Self {
40        StringValue::Intern(symbol)
41    }
42}
43
44impl From<InlineStr> for StringValue {
45    fn from(inline: InlineStr) -> Self {
46        StringValue::Inline(inline)
47    }
48}
49
50impl From<Gc<str>> for StringValue {
51    fn from(gc_str: Gc<str>) -> Self {
52        StringValue::Gc(gc_str)
53    }
54}
55
56const INTERN_THRESHOLD: usize = 40;
57
58// different constructors for different interning policies
59impl StringValue {
60    pub fn new_maybe_interned<S>(s: S) -> Self where S: AsRef<str> {
61        let string = s.as_ref();
62        
63        // first, try inlining
64        if let Ok(inline) = InlineStr::try_from(string) {
65            return Self::Inline(inline);
66        }
67        
68        // always intern short-ish strings
69        if string.len() <= INTERN_THRESHOLD {
70            return Self::Intern(StringSymbol::intern(string))
71        }
72        
73        Self::Gc(Gc::from_box(string.to_string().into_boxed_str()))
74    }
75    
76    pub fn new_uninterned<S>(s: S) -> Self where S: AsRef<str> {
77        let string = s.as_ref();
78        
79        // first, try inlining
80        if let Ok(inline) = InlineStr::try_from(string) {
81            return Self::Inline(inline);
82        }
83        
84        Self::Gc(Gc::from_box(string.to_string().into_boxed_str()))
85    }
86    
87    pub fn new_interned<S>(s: S) -> Self where S: AsRef<str> {
88        Self::Intern(StringSymbol::from(s.as_ref()))
89    }
90}
91
92/// evaluates an expression using a StringValue, accessing the STRING_TABLE only if needed
93macro_rules! with_str {
94    ($strval:expr, $string:ident => $expr:expr) => {
95        match $strval.try_str() {
96            Ok($string) => $expr,
97            Err(symbol) => STRING_TABLE.with(|string_table| {
98                let string_table = string_table.borrow();
99                let $string = string_table.resolve(&symbol);
100                $expr
101            })
102        }
103    };
104}
105
106
107impl StringValue {
108    pub fn trace(&self) {
109        if let Self::Gc(gc_str) = self {
110            gc_str.mark_trace()
111        }
112    }
113    
114    pub fn write(&self, buf: &mut impl fmt::Write) -> fmt::Result {
115        match self {
116            Self::Intern(symbol) => symbol.write(buf),
117            Self::Inline(inline) => buf.write_str(&*inline),
118            Self::Gc(gc_str) => buf.write_str(&**gc_str),
119        }
120    }
121    
122    /// Interns the string, producing a StringSymbol
123    pub fn as_intern(&self) -> StringSymbol {
124        match self {
125            Self::Intern(symbol) => *symbol,
126            Self::Inline(inline) => StringSymbol::intern(&*inline),
127            Self::Gc(gc_str) => StringSymbol::intern(&**gc_str),
128        }
129    }
130    
131    /// Interns the string *in place*.
132    pub fn make_intern(&mut self) {
133        let symbol = self.as_intern();
134        *self = Self::Intern(symbol)
135    }
136    
137    fn is_intern(&self) -> bool {
138        matches!(self, Self::Intern(..))
139    }
140    
141    fn resolve_str<'s, 'h>(&'s self, string_table: &'h StringTable) -> &'s str where 'h: 's {
142        match self {
143            Self::Intern(symbol) => string_table.resolve(symbol),
144            Self::Inline(inline) => &*inline,
145            Self::Gc(gc_str) => &**gc_str,
146        }
147    }
148    
149    pub fn with_str<R>(&self, f: impl FnOnce(&str) -> R) -> R {
150        with_str!(self, s => f(s))
151    }
152    
153    fn try_str(&self) -> Result<&str, StringSymbol> {
154        match self {
155            Self::Inline(inline) => Ok(&*inline),
156            Self::Gc(gc_str) => Ok(&**gc_str),
157            Self::Intern(symbol) => Err(*symbol),
158        }
159    }
160    
161    pub fn len(&self) -> usize {
162        with_str!(self, s => s.len())
163    }
164    
165    pub fn char_count(&self) -> usize {
166        with_str!(self, s => s.chars().count())
167    }
168    
169    pub fn concat(&self, other: &StringValue) -> ExecResult<StringValue> {
170        // don't allocate when concatenating small strings
171        const BUFLEN: usize = 64;
172        if self.len() + other.len() <= BUFLEN {
173            let mut buf = StrBuffer::<BUFLEN>::new();
174            with_str!(self, s => buf.try_push_str(s).unwrap());
175            with_str!(other, s => buf.try_push_str(s).unwrap());
176            
177            Ok(StringValue::new_maybe_interned(buf))
178        } else {
179            let mut buf = String::new();
180            with_str!(self, s => buf.push_str(s));
181            with_str!(other, s => buf.push_str(s));
182            
183            Ok(StringValue::new_maybe_interned(buf.as_str()))
184        }
185    }
186}
187
188// It's important for strings to hash consistently regardless of representation
189// This means that a string "foo" should hash the same whether it is Intern, Inline, or Gc
190// Unfortunately Rust's hash API makes it VERY hard to have precise control of the hash output
191// So we either have to lookup and hash the underlying string data each time (which would kill the 
192// performance benefits of string interning) or double-hash.
193//
194// Double-hashing means that when we intern a string, we precompute a hash. Then, when we want to
195// hash an interned string, we lookup the precomputed hash and then feed that to the Hasher.
196// When hashing a non-interned string, we compute a hash *using the same hasher* that is used to
197// pre-compute interned string hashes and feed that to the Hasher (so the cost of hashing a string 
198// is only truly doubled when hashing non-interned strings). This ensures that hashes are consistent.
199impl Hash for StringValue {
200    fn hash<H: Hasher>(&self, state: &mut H) {
201        STRING_TABLE.with(|string_table| match self {
202            Self::Intern(symbol) =>
203                string_table.borrow().lookup_hash(symbol).hash(state),
204            
205            Self::Inline(inline) =>
206                string_table.borrow().hash_str(&*inline).hash(state),
207            
208            Self::Gc(gc_str) => 
209                string_table.borrow().hash_str(&**gc_str).hash(state),
210            
211        })
212    }
213}
214
215impl PartialEq for StringValue {
216    fn eq(&self, other: &StringValue) -> bool {
217        match (self, other) {
218            // both interned
219            (Self::Intern(a), Self::Intern(b)) => a == b,
220            
221            // one interned
222            (Self::Intern(symbol), strval) | (strval, Self::Intern(symbol)) => STRING_TABLE.with(|string_table| {
223                string_table.borrow().resolve(symbol) == strval.try_str().unwrap()
224            }),
225            
226            // neither interned
227            (a, b) => a.try_str().unwrap() == b.try_str().unwrap()
228        }
229    }
230}
231
232impl Eq for StringValue { }
233
234impl PartialOrd for StringValue {
235    fn partial_cmp(&self, other: &StringValue) -> Option<cmp::Ordering> {
236        if let (Ok(a), Ok(b)) = (self.try_str(), other.try_str()) {
237            return a.partial_cmp(b)
238        }
239        
240        STRING_TABLE.with(|string_table| {
241            let string_table = string_table.borrow();
242            self.resolve_str(&string_table)
243                .partial_cmp(other.resolve_str(&string_table))
244        })
245    }
246}
247
248impl Ord for StringValue {
249    fn cmp(&self, other: &Self) -> cmp::Ordering {
250        if let (Ok(a), Ok(b)) = (self.try_str(), other.try_str()) {
251            return a.cmp(b)
252        }
253        
254        STRING_TABLE.with(|string_table| {
255            let string_table = string_table.borrow();
256            self.resolve_str(&string_table)
257                .cmp(other.resolve_str(&string_table))
258        })
259    }
260}
261
262impl fmt::Display for StringValue {
263    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
264        self.write(fmt)
265    }
266}