sphinx/codegen/
chunk.rs

1use core::str;
2use core::ops::Range;
3use std::collections::HashMap;
4use string_interner::Symbol as _;
5use crate::language::InternSymbol;
6use crate::runtime::{DefaultBuildHasher, STRING_TABLE};
7use crate::runtime::strings::{StringInterner, StringSymbol};
8use crate::runtime::function::{Signature, Parameter};
9use crate::runtime::errors::ErrorKind;
10use crate::codegen::consts::{Constant, ConstID, StringID};
11use crate::codegen::funproto::{FunctionProto, UnloadedFunction, UnloadedSignature, UnloadedParam, FunctionID};
12use crate::codegen::errors::CompileResult;
13use crate::debug::DebugSymbol;
14
15
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub enum Chunk {
19    Main,
20    Function(FunctionID),
21}
22
23impl From<FunctionID> for Chunk {
24    fn from(fun_id: FunctionID) -> Self {
25        Self::Function(fun_id)
26    }
27}
28
29
30#[derive(Debug, Clone)]
31pub enum ChunkInfo {
32    ModuleMain,
33    Function {
34        symbol: Option<DebugSymbol>,
35    },
36}
37
38/// A buffer used by ChunkBuilder
39#[derive(Debug)]
40pub struct ChunkBuf {
41    info: ChunkInfo,
42    bytes: Vec<u8>,
43}
44
45impl ChunkBuf {
46    pub fn new(info: ChunkInfo) -> Self {
47        Self {
48            info,
49            bytes: Vec::new(),
50        }
51    }
52    
53    // Bytes
54    
55    pub fn len(&self) -> usize {
56        self.bytes.len()
57    }
58    
59    pub fn is_empty(&self) -> bool {
60        self.bytes.is_empty()
61    }
62    
63    pub fn as_slice(&self) -> &[u8] {
64        self.bytes.as_slice()
65    }
66    
67    pub fn as_mut_slice(&mut self) -> &mut [u8] {
68        self.bytes.as_mut_slice()
69    }
70    
71    // using Into<u8> so that OpCodes can be accepted without extra fuss
72    pub fn push_byte(&mut self, byte: impl Into<u8>) {
73        self.bytes.push(byte.into());
74    }
75    
76    pub fn extend_bytes(&mut self, bytes: &[u8]) {
77        self.bytes.extend(bytes);
78    }
79    
80    pub fn patch_bytes(&mut self, offset: usize, patch: &[u8]) {
81        let patch_range = offset..(offset + patch.len());
82        let target = &mut self.bytes[patch_range];
83        target.copy_from_slice(patch);
84    }
85    
86    /// anything previously inside the patch is overwritten
87    pub fn resize_patch(&mut self, offset: usize, from_len: usize, to_len: usize) {
88        let patch_range = offset..(offset + from_len);
89        let patch = core::iter::repeat(u8::default()).take(to_len);
90        self.bytes.splice(patch_range, patch);
91    }
92}
93
94
95pub struct ChunkBuilder {
96    main: ChunkBuf,
97    chunks: Vec<ChunkBuf>,
98    consts: Vec<Constant>,
99    functions: Vec<Option<UnloadedFunction>>,
100    dedup: HashMap<Constant, ConstID, DefaultBuildHasher>,
101    strings: StringInterner,
102}
103
104impl Default for ChunkBuilder {
105    fn default() -> Self {
106        Self::new()
107    }
108}
109
110impl ChunkBuilder {
111    pub fn new() -> Self {
112        Self::with_strings(StringInterner::new())
113    }
114    
115    pub fn with_strings(strings: StringInterner) -> Self {
116        Self {
117            main: ChunkBuf::new(ChunkInfo::ModuleMain),
118            chunks: Vec::new(),
119            functions: Vec::new(),
120            consts: Vec::new(),
121            dedup: HashMap::with_hasher(DefaultBuildHasher::default()),
122            strings,
123        }
124    }
125    
126    // Bytecode
127    
128    pub fn new_chunk(&mut self, info: ChunkInfo) -> CompileResult<Chunk> {
129        let chunk_id = FunctionID::try_from(self.chunks.len())
130            .map_err(|_| "function count limit reached")?;
131        
132        self.chunks.push(ChunkBuf::new(info));
133        self.functions.push(None);
134        
135        Ok(chunk_id.into())
136    }
137
138    pub fn chunk(&self, chunk_id: Chunk) -> &ChunkBuf {
139        match chunk_id {
140            Chunk::Main => &self.main,
141            Chunk::Function(id) => &self.chunks[usize::from(id)],
142        }
143    }
144    
145    pub fn chunk_mut(&mut self, chunk_id: Chunk) -> &mut ChunkBuf { 
146        match chunk_id {
147            Chunk::Main => &mut self.main,
148            Chunk::Function(id) => &mut self.chunks[usize::from(id)],
149        }
150    }
151    
152    // Constants
153    
154    pub fn get_or_insert_const(&mut self, value: Constant) -> CompileResult<ConstID> {
155        if let Constant::String(index) = value {
156            let symbol = InternSymbol::try_from_usize(index);
157            debug_assert!(self.strings.resolve(symbol.unwrap()).is_some());
158        }
159        
160        if let Some(cid) = self.dedup.get(&value) {
161            Ok(*cid)
162        } else {
163            let cid = ConstID::try_from(self.consts.len())
164                .map_err(|_| "constant pool limit reached")?;
165            self.consts.push(value);
166            self.dedup.insert(value, cid);
167            Ok(cid)
168        }
169    }
170    
171    pub fn get_or_insert_str(&mut self, string: &str) -> StringID {
172        let symbol = self.strings.get_or_intern(string);
173        symbol.to_usize()
174    }
175    
176    pub fn get_or_insert_error(&mut self, error: ErrorKind, message: &str) -> CompileResult<ConstID> {
177        let message = self.get_or_insert_str(message);
178        self.get_or_insert_const(Constant::Error { error, message })
179    }
180    
181    pub fn insert_function(&mut self, fun_proto: UnloadedFunction) {
182        let fun_index = usize::from(fun_proto.fun_id);
183        while self.functions.len() <= fun_index {
184            self.functions.resize(fun_index + 1, None);
185        }
186        
187        self.functions[fun_index].replace(fun_proto);
188    }
189    
190    // Output
191    
192    pub fn build(self) -> UnloadedProgram {
193        let bytes_len = self.chunks.iter().map(|chunk| chunk.bytes.len()).sum();
194        let mut chunks = Vec::with_capacity(bytes_len);
195        let mut chunk_index = Vec::with_capacity(self.chunks.len());
196        
197        for chunk in self.chunks.into_iter() {
198            let offset = chunks.len();
199            let length = chunk.bytes.len();
200            chunks.extend(chunk.bytes);
201            
202            let index = ChunkIndex {
203                offset, length,
204                info: chunk.info,
205            };
206            chunk_index.push(index);
207        }
208        
209        let bytes_len = self.strings.into_iter().map(|(_, s)| s.len()).sum();
210        let mut strings = Vec::with_capacity(bytes_len);
211        let mut string_index = Vec::new();
212        string_index.resize_with(self.strings.len(), StringIndex::default);
213        
214        for (symbol, string) in self.strings.into_iter() {
215            let bytes = string.as_bytes();
216            let offset = strings.len();
217            let length = bytes.len();
218            strings.extend(bytes);
219            
220            let index = StringIndex {
221                offset, length
222            };
223            string_index.insert(symbol.to_usize(), index);
224        }
225        
226        // truncate trailing `None` values
227        let fun_len = self.functions.iter().enumerate().rev()
228            .find_map(|(idx, fun)| fun.as_ref().map(|_| idx + 1))
229            .unwrap_or(0);
230        
231        let functions = self.functions.into_iter().take(fun_len)
232            .map(|fun| fun.expect("function ids must be contiguous"))
233            .collect::<Vec<UnloadedFunction>>();
234        
235        UnloadedProgram {
236            main: self.main.bytes.into_boxed_slice(),
237            chunks: chunks.into_boxed_slice(),
238            chunk_index: chunk_index.into_boxed_slice(),
239            strings: strings.into_boxed_slice(),
240            string_index: string_index.into_boxed_slice(),
241            consts: self.consts.into_boxed_slice(),
242            functions: functions.into_boxed_slice(),
243        }
244    }
245}
246
247
248// TODO store all chunk bytes in a single array
249// TODO figure out how debug symbols will work, esp. at runtime
250
251#[derive(Debug, Clone)]
252pub struct ChunkIndex {
253    info: ChunkInfo,
254    offset: usize,
255    length: usize,
256}
257
258impl ChunkIndex {
259    pub fn as_range(&self) -> Range<usize> {
260        self.offset..(self.offset + self.length)
261    }
262    
263    pub fn info(&self) -> &ChunkInfo {
264        &self.info
265    }
266}
267
268#[derive(Debug, Default, Clone)]
269pub struct StringIndex {
270    offset: usize,
271    length: usize,
272}
273
274impl StringIndex {
275    pub fn as_range(&self) -> Range<usize> {
276        self.offset..(self.offset + self.length)
277    }
278}
279
280/// A program whose strings have not been yet been loaded into the thread-local string table
281/// This means that an `UnloadedProgram` cannot be executed. However, it also means that an
282/// `UnloadedProgram` is also self-contained, which is useful for exporting to a file or
283/// between threads.
284#[derive(Debug, Clone)]
285pub struct UnloadedProgram {
286    main: Box<[u8]>,
287    chunks: Box<[u8]>,
288    chunk_index: Box<[ChunkIndex]>,
289    strings: Box<[u8]>,
290    string_index: Box<[StringIndex]>,
291    consts: Box<[Constant]>,
292    functions: Box<[UnloadedFunction]>,
293}
294
295impl UnloadedProgram {
296    pub fn main(&self) -> &[u8] {
297        &self.main
298    }
299    
300    pub fn get_chunk(&self, fun_id: FunctionID) -> &[u8] {
301        let chunk_idx = &self.chunk_index[usize::from(fun_id)];
302        &self.chunks[chunk_idx.as_range()]
303    }
304    
305    pub fn chunk_info(&self, fun_id: FunctionID) -> &ChunkInfo {
306        let chunk_idx = &self.chunk_index[usize::from(fun_id)];
307        chunk_idx.info()
308    }
309    
310    pub fn iter_chunks(&self) -> impl Iterator<Item=(Chunk, &[u8])> {
311        self.chunk_index.iter()
312            .map(|index| &self.chunks[index.as_range()])
313            .enumerate()
314            .map(|(fun_id, chunk)| (Chunk::Function(fun_id.try_into().unwrap()), chunk))
315    }
316    
317    pub fn get_string(&self, string_id: StringID) -> &str {
318        let string_idx = &self.string_index[usize::from(string_id)];
319        str::from_utf8(&self.strings[string_idx.as_range()]).expect("invalid string")
320    }
321    
322    pub fn iter_strings(&self) -> impl Iterator<Item=(StringID, &str)> {
323        self.string_index.iter()
324            .map(|index| &self.strings[index.as_range()])
325            .map(|slice| str::from_utf8(slice).expect("invalid string"))
326            .enumerate()
327    }
328    
329    pub fn get_const(&self, index: ConstID) -> &Constant {
330        &self.consts[usize::from(index)]
331    }
332    
333    pub fn get_function(&self, index: FunctionID) -> &UnloadedFunction {
334        &self.functions[usize::from(index)]
335    }
336}
337
338
339/// Unlike `UnloadedProgram`, this is not `Send` (mainly because `StringSymbol` is not Send)
340#[derive(Debug)]
341pub struct ProgramData {
342    chunks: Box<[u8]>,
343    chunk_index: Box<[ChunkIndex]>,
344    strings: Box<[StringSymbol]>,
345    consts: Box<[Constant]>,
346    functions: Box<[FunctionProto]>,
347}
348
349impl ProgramData {
350
351    #[inline(always)]
352    pub fn get_chunk(&self, fun_id: FunctionID) -> &[u8] {
353        let index = &self.chunk_index[usize::from(fun_id)];
354        &self.chunks[index.as_range()]
355    }
356    
357    pub fn chunk_info(&self, fun_id: FunctionID) -> &ChunkInfo {
358        let index = &self.chunk_index[usize::from(fun_id)];
359        &index.info
360    }
361    
362    pub fn get_const(&self, index: ConstID) -> &Constant {
363        &self.consts[usize::from(index)]
364    }
365    
366    pub fn get_string(&self, index: StringID) -> &StringSymbol {
367        &self.strings[usize::from(index)]
368    }
369    
370    pub fn get_function(&self, index: FunctionID) -> &FunctionProto {
371        &self.functions[usize::from(index)]
372    }
373    
374}
375
376
377#[derive(Debug)]
378pub struct Program {
379    pub main: Box<[u8]>,
380    pub data: ProgramData,
381}
382
383impl Program {
384    pub fn load(program: UnloadedProgram) -> Self {
385        
386        // Convert strings to StringSymbols
387        let strings = STRING_TABLE.with(|string_table| {
388            let mut string_table = string_table.borrow_mut();
389            
390            let mut strings = Vec::with_capacity(program.strings.len());
391            for (_, string) in program.iter_strings() {
392                let symbol = string_table.get_or_intern(string);
393                strings.push(symbol);
394            }
395            strings
396        });
397        
398        let functions: Vec<FunctionProto> = program.functions.into_vec().into_iter()
399            .map(|function| {
400                let signature = Self::load_signature(function.signature, &program.consts, &strings);
401                FunctionProto::new(function.fun_id, signature, function.upvalues)
402            })
403            .collect();
404        
405        Self {
406            main: program.main,
407            data: ProgramData {
408                chunks: program.chunks,
409                chunk_index: program.chunk_index,
410                consts: program.consts,
411                functions: functions.into_boxed_slice(),
412                strings: strings.into_boxed_slice(),
413            },
414        }
415    }
416    
417    fn load_name(const_id: ConstID, consts: &[Constant], strings: &[StringSymbol]) -> StringSymbol {
418        let string_id = match consts[usize::from(const_id)] {
419            Constant::String(symbol) => symbol,
420            _ => panic!("invalid name constant")
421        };
422        strings[usize::from(string_id)]
423    }
424    
425    fn load_signature(signature: UnloadedSignature, consts: &[Constant], strings: &[StringSymbol]) -> Signature {
426        let name = signature.name.map(|const_id| Self::load_name(const_id, consts, strings));
427        
428        let required = signature.required.into_vec().into_iter()
429            .map(|param| Self::load_parameter(param, consts, strings)).collect();
430            
431        let default = signature.default.into_vec().into_iter()
432            .map(|param| Self::load_parameter(param, consts, strings)).collect();
433        
434        let variadic = signature.variadic
435            .map(|param| Self::load_parameter(param, consts, strings));
436        
437        Signature::new(name, required, default, variadic)
438    }
439    
440    fn load_parameter(param: UnloadedParam, consts: &[Constant], strings: &[StringSymbol]) -> Parameter {
441        let name = Self::load_name(param.name, consts, strings);
442        Parameter::new(name, param.mode)
443    }
444}
445
446