emacs_lsp_booster/
bytecode.rs

1use std::collections::BTreeMap;
2use std::str::FromStr;
3
4use anyhow::{Result, bail};
5use serde_json as json;
6use smallvec::smallvec;
7
8
9#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
10pub enum LispObject {
11    Symbol(String),
12    Keyword(String),
13    UnibyteStr(Vec<u8>),
14    Str(String),
15    Int(i64),
16    Float(String),  // use string for Eq and Ord
17    Nil,
18    T,
19    Vector(Vec<LispObject>),
20}
21
22impl FromStr for LispObject {
23    type Err = anyhow::Error;
24
25    fn from_str(s: &str) -> Result<Self> {
26        if s == "nil" {
27            Ok(Self::Nil)
28        } else if s == "t" {
29            Ok(Self::T)
30        } else if s.starts_with(":") {
31            Ok(Self::Keyword(s[1..].to_string()))
32        } else {
33            bail!("Supported LispObject: {}", s)
34        }
35    }
36}
37
38impl LispObject {
39    fn to_repl(&self) -> String {
40        match self {
41            LispObject::Symbol(s) => s.clone(),
42            LispObject::Keyword(s) => format!(":{}", s),
43            LispObject::Str(s) => {
44                let mut result = String::new();
45                result.reserve(s.len() * 2 + 2);
46                result.push('"');
47                for c in s.chars() {
48                    if c == '\"' || c == '\\' {
49                        result.push('\\');
50                        result.push(c);
51                    } else if (c as u32) < 32 || (c as u32) == 127 {
52                        // not printable
53                        // NOTE: cannot use escape for c in 128..=255, otherwise the string would become unibyte
54                        result += &format!("\\{:03o}", c as u32);
55                    } else {
56                        result.push(c);
57                    }
58                }
59                result.push('"');
60                result
61            },
62            LispObject::UnibyteStr(vec) => {
63                let mut result = String::new();
64                result.reserve(vec.len() * 4 + 2);
65                result.push('"');
66                let mut last_oct_escape_not_full = false;
67                for c in vec {
68                    let mut oct_escape_not_full = false;
69                    match *c {
70                        7 => result += "\\a",
71                        8 => result += "\\b",
72                        9 => result += "\\t",
73                        10 => result += "\\n",
74                        11 => result += "\\v",
75                        12 => result += "\\f",
76                        13 => result += "\\r",
77                        127 => result += "\\d",
78                        27 => result += "\\e",
79                        // NOTE: do not use 0..=7 in this branch, because it takes one more byte than the next branch
80                        8..=26 => {  // \^@ \^A \^B ... \^Z
81                            result += &format!("\\^{}", (*c as u32 + 64) as u8 as char);
82                        },
83                        0..=7 | 27..=31 | 128..=255 | 34 | 92 => {  // oct, for unprintable and '"' and '\\'
84                            let oct_s = format!("\\{:o}", *c as u32);
85                            if oct_s.len() < 4 {
86                                oct_escape_not_full = true;
87                            }
88                            result += &oct_s;
89                        },
90                        _ => {  // printable
91                            // https://www.gnu.org/software/emacs/manual/html_node/elisp/Non_002dASCII-in-St
92                            if last_oct_escape_not_full && ('0'..='7').contains(&(*c as char)) {
93                                result += "\\ ";
94                            }
95                            result.push(*c as char);
96                        },
97                    }
98                    last_oct_escape_not_full = oct_escape_not_full;
99                }
100                result.push('"');
101                result
102            },
103            LispObject::Int(i) => i.to_string(),
104            LispObject::Float(s) => s.clone(),
105            LispObject::Nil => "nil".into(),
106            LispObject::T => "t".into(),
107            LispObject::Vector(v) =>
108                format!("[{}]", v.iter().map(|x| x.to_repl()).collect::<Vec<_>>().join(" "))
109        }
110    }
111}
112
113// to support constants more than 65536 elements:
114// - for first 63536 slots, use it as normal
115// - in 63536-64536, put numbers 0-1000, for indexing
116// - for last 1000 slots (64536-65536), use two-level vector, 1000*1000, each is a 1000-element vector
117
118// cv: constant vector
119const CV_TWO_LEVEL_VECTOR_SIZE: u32 = 1000;
120const CV_NORMAL_SLOT_COUNT: u32 = (1 << 16) - CV_TWO_LEVEL_VECTOR_SIZE * 2;
121const CV_TWO_LEVEL_IDX_BEGIN: u32 = CV_NORMAL_SLOT_COUNT;
122const CV_TWO_LEVEL_DATA_BEGIN: u32 = CV_NORMAL_SLOT_COUNT + CV_TWO_LEVEL_VECTOR_SIZE;
123
124#[allow(dead_code)]
125enum Op {
126    PushConstant(u32),  // support more than u16, will expand to multiple ops
127    Call(u16),
128    StackRef(u16),
129    List(u8),
130    Discard,
131    ASet,
132    Add1,
133    Cons,
134    Return,
135}
136
137impl Op {
138    fn get_stack_delta(&self) -> i32 {
139        match self {
140            &Self::PushConstant(_) => 1,
141            &Self::Call(n) => -(n as i32 + 1) + 1,
142            &Self::StackRef(_) => 1,
143            &Self::List(n) => -(n as i32) + 1,
144            &Self::Discard => -1,
145            &Self::ASet => -3 + 1,
146            &Self::Add1 => 0,
147            &Self::Cons => -2 + 1,
148            &Self::Return => -1,
149        }
150    }
151
152    fn to_code(&self) -> Result<smallvec::SmallVec<[u8; 3]>> {
153        match self {
154            &Self::PushConstant(v) if v < 64 =>
155                Ok(smallvec![(192 + v) as u8]),
156            &Self::PushConstant(v) if v < CV_NORMAL_SLOT_COUNT =>
157                Ok(smallvec![129, (v & 0xff) as u8, (v >> 8) as u8]),
158            &Self::PushConstant(v) if v < (CV_NORMAL_SLOT_COUNT + CV_TWO_LEVEL_VECTOR_SIZE * CV_TWO_LEVEL_VECTOR_SIZE) => {
159                let mut result = smallvec![];
160                let two_level_i = (v - CV_NORMAL_SLOT_COUNT) / CV_TWO_LEVEL_VECTOR_SIZE;
161                let two_level_j = (v - CV_NORMAL_SLOT_COUNT) % CV_TWO_LEVEL_VECTOR_SIZE;
162
163                // get vector
164                let index_for_i = two_level_i + CV_TWO_LEVEL_DATA_BEGIN;
165                result.extend_from_slice(&[129, (index_for_i & 0xff) as u8, (index_for_i >> 8) as u8]);
166                // get index
167                let index_for_j = two_level_j + CV_TWO_LEVEL_IDX_BEGIN;
168                result.extend_from_slice(&[129, (index_for_j & 0xff) as u8, (index_for_j >> 8) as u8]);
169                // aref
170                result.push(72);
171
172                Ok(result)
173            },
174            &Self::PushConstant(v) =>
175                bail!("Too many constants! {}", v),
176            &Self::Call(v) if v <= 5 =>
177                Ok(smallvec![(32 + v) as u8]),
178            &Self::Call(v) if v < (1 << 8) =>
179                Ok(smallvec![(32 + 6) as u8, v as u8]),
180            &Self::Call(v) =>
181                Ok(smallvec![(32 + 7) as u8, (v & 0xff) as u8, (v >> 8) as u8]),
182            &Self::StackRef(v) if (1..=4).contains(&v) =>
183                Ok(smallvec![v as u8]),
184            &Self::StackRef(_) => unimplemented!(),
185            &Self::List(v) if v == 0 => unreachable!(),
186            &Self::List(v) if (1..=4).contains(&v) => Ok(smallvec![66 + v]),
187            &Self::List(v) => Ok(smallvec![175, v]),
188            &Self::Discard => Ok(smallvec![136]),
189            &Self::ASet => Ok(smallvec![73]),
190            &Self::Add1 => Ok(smallvec![84]),
191            &Self::Cons => Ok(smallvec![66]),
192            &Self::Return => Ok(smallvec![135]),
193        }
194    }
195}
196
197#[derive(Clone, Copy, Debug, PartialEq, Eq, clap::ValueEnum)]
198pub enum ObjectType {
199    Plist,
200    Hashtable,
201    Alist,
202}
203
204#[derive(Clone, Debug)]
205pub struct BytecodeOptions {
206    pub object_type: ObjectType,
207    // TODO: array_type
208    pub null_value: LispObject,
209    pub false_value: LispObject,
210}
211
212impl Default for BytecodeOptions {
213    fn default() -> Self {
214        Self {
215            object_type: ObjectType::Plist,
216            null_value: LispObject::Nil,
217            false_value: LispObject::Nil,
218        }
219    }
220}
221
222// Only for generating json. Sequential execution only.
223struct BytecodeCompiler {
224    options: BytecodeOptions,
225
226    ops: Vec<Op>,
227    constants: BTreeMap<LispObject, (u32, u32)>,  // (index, count)
228}
229
230impl BytecodeCompiler {
231    fn compile_constant_op(&mut self, obj: LispObject) {
232        let idx = {
233            if let Some(idx_and_count) = self.constants.get_mut(&obj) {
234                idx_and_count.1 += 1;
235                idx_and_count.0
236            } else {
237                let next_id = self.constants.len() as u32;
238                self.constants.insert(obj, (next_id, 1));
239                next_id
240            }
241        };
242        self.ops.push(Op::PushConstant(idx))
243    }
244
245    fn compile_value_array(&mut self, arr: &[json::Value]) {
246        if arr.is_empty() {
247            self.compile_constant_op(LispObject::Symbol("vector".into()));
248            self.ops.push(Op::Call(0));
249            return;
250        }
251
252        let chunks = arr.chunks((1 << 16) - 1);
253        let chunks_len = chunks.len();
254        assert!(chunks_len < (1 << 16));
255
256        if chunks_len > 1 {
257            // prepare a "vconcat" function, to concat multiple vectors
258            self.compile_constant_op(LispObject::Symbol("vconcat".into()));
259        }
260
261        for chunk in chunks {
262            self.compile_constant_op(LispObject::Symbol("vector".into()));
263            for value in chunk {
264                self.compile_value(value);
265            }
266            self.ops.push(Op::Call(chunk.len() as u16));
267        }
268
269        if chunks_len > 1 {
270            // call vconcat
271            self.ops.push(Op::Call(chunks_len as u16));
272        }
273    }
274
275    fn compile_value_map_plist_or_alist(&mut self, map: &json::Map<String, json::Value>, alist: bool) {
276        let list_len = if alist { map.len() } else { map.len() * 2 };
277        // see below
278        if list_len < (1 << 16) && list_len >= (1 << 8) {
279            self.compile_constant_op(LispObject::Symbol("list".into()));
280        }
281
282        for (key, value) in map {
283            if alist {
284                self.compile_constant_op(LispObject::Symbol(key.clone()));
285                self.compile_value(value);
286                self.ops.push(Op::Cons);
287            } else {
288                self.compile_constant_op(LispObject::Keyword(key.clone()));
289                self.compile_value(value);
290            }
291        }
292
293        // four modes: 0. (empty) just nil 1. list op; 2. list call; 3. recursive cons
294        if list_len == 0 {
295            self.compile_constant_op(LispObject::Nil);
296        } else if list_len < (1 << 8) {
297            self.ops.push(Op::List(list_len as u8));
298        } else if list_len < (1 << 16) {
299            self.ops.push(Op::Call(list_len as u16));
300        } else {
301            self.compile_constant_op(LispObject::Nil);
302            for _ in 0..list_len {
303                self.ops.push(Op::Cons);
304            }
305        }
306    }
307
308    fn compile_value_map_hashtable(&mut self, map: &json::Map<String, json::Value>) {
309        self.compile_constant_op(LispObject::Symbol("make-hash-table".into()));
310        self.compile_constant_op(LispObject::Keyword("test".into()));
311        self.compile_constant_op(LispObject::Symbol("equal".into()));
312        self.compile_constant_op(LispObject::Keyword("size".into()));
313        self.compile_constant_op(LispObject::Int(map.len() as i64));
314        self.ops.push(Op::Call(4));
315
316        for (key, value) in map {
317            self.compile_constant_op(LispObject::Symbol("puthash".into()));
318            self.compile_constant_op(LispObject::Str(key.clone()));
319            self.compile_value(value);
320            self.ops.push(Op::StackRef(3));
321            self.ops.push(Op::Call(3));
322            self.ops.push(Op::Discard);
323        }
324    }
325
326    fn compile_value(&mut self, value: &json::Value) {
327        match value {
328            &json::Value::Null => {
329                self.compile_constant_op(self.options.null_value.clone());
330            },
331            &json::Value::Bool(false) => {
332                self.compile_constant_op(self.options.false_value.clone());
333            },
334            &json::Value::Bool(true) => {
335                self.compile_constant_op(LispObject::T);
336            },
337            &json::Value::Number(ref num) => {
338                if num.is_f64() {
339                    self.compile_constant_op(LispObject::Float(num.to_string()));
340                } else {
341                    self.compile_constant_op(LispObject::Int(num.as_i64().unwrap()));
342                }
343            },
344            &json::Value::String(ref s) => {
345                self.compile_constant_op(LispObject::Str(s.clone()));
346            },
347            &json::Value::Array(ref arr) => {
348                self.compile_value_array(&arr);
349            },
350            &json::Value::Object(ref map) => {
351                match self.options.object_type {
352                    ObjectType::Plist => self.compile_value_map_plist_or_alist(&map, false),
353                    ObjectType::Alist => self.compile_value_map_plist_or_alist(&map, true),
354                    ObjectType::Hashtable => self.compile_value_map_hashtable(&map),
355                };
356            },
357        }
358    }
359
360    fn compile(&mut self, value: &json::Value) {
361        self.compile_value(value);
362        self.ops.push(Op::Return);
363    }
364
365    // return (code, constants, max_stack_size)
366    fn into_bytecode(self) -> Result<(Vec<u8>, Vec<LispObject>, i32)> {
367        // optimize constants vector, sort by usage
368        let mut constants_array = self.constants.into_iter().collect::<Vec<_>>();
369        constants_array.sort_by_key(
370            // if count is same, still sort by the old idx, to increase locality
371            |(_, idx_and_count)| (-(idx_and_count.1 as i32), idx_and_count.0));
372        let index_remap = constants_array.iter().enumerate()
373            .map(|(new_idx, (_, idx_and_count))| (idx_and_count.0, new_idx as u32))
374            .collect::<BTreeMap<_, _>>();
375
376        let mut constants_array = constants_array.into_iter().map(|(obj, _)| obj).collect::<Vec<_>>();
377        // rearrange constants
378        let mut two_level_vectors: Vec<LispObject> = Vec::new();
379        // collect two level vectors from the end (reverse order)
380        while constants_array.len() > CV_NORMAL_SLOT_COUNT as usize {
381            let len = {
382                let remaining = (constants_array.len() - CV_NORMAL_SLOT_COUNT as usize) % CV_TWO_LEVEL_VECTOR_SIZE as usize;
383                if remaining == 0 {
384                    CV_TWO_LEVEL_VECTOR_SIZE as usize
385                } else {
386                    remaining
387                }
388            };
389            let v = constants_array
390                .splice((constants_array.len() - len)..constants_array.len(), Vec::new())
391                .collect();
392            two_level_vectors.push(LispObject::Vector(v));
393        }
394        two_level_vectors.reverse();
395
396        if !two_level_vectors.is_empty() {
397            debug_assert_eq!(constants_array.len(), CV_NORMAL_SLOT_COUNT as usize);
398            for i in 0..CV_TWO_LEVEL_VECTOR_SIZE {
399                constants_array.push(LispObject::Int(i as i64));
400            }
401            constants_array.extend(two_level_vectors);
402        }
403
404        let mut code: Vec<u8> = Vec::new();
405        let mut current_stack_size = 0;
406        let mut max_stack_size = 0;
407        for op in self.ops {
408            let op = if let Op::PushConstant(v) = op {
409                Op::PushConstant(index_remap[&v])
410            } else {
411                op
412            };
413            code.extend(op.to_code()?);
414            current_stack_size += op.get_stack_delta();
415            debug_assert!(current_stack_size >= 0);
416            max_stack_size = i32::max(current_stack_size, max_stack_size);
417        }
418
419        // some buffer for max stack size (for the PushConstant variant)
420        Ok((code, constants_array, max_stack_size + 8))
421    }
422
423    fn into_repl(self) -> Result<String> {
424        let (code, constants, max_stack_size) = self.into_bytecode()?;
425        Ok(format!("#[0 {} {} {}]",
426                   LispObject::UnibyteStr(code).to_repl(),
427                   LispObject::Vector(constants).to_repl(),
428                   max_stack_size))
429    }
430}
431
432pub fn generate_bytecode_repl(value: &json::Value, options: BytecodeOptions) -> Result<String> {
433    let mut compiler = BytecodeCompiler {
434        options,
435        ops: Vec::new(),
436        constants: BTreeMap::new(),
437    };
438    compiler.compile(value);
439    compiler.into_repl()
440}
441
442
443#[test]
444fn test_string_repl() {
445    assert_eq!(LispObject::UnibyteStr("\x00".into()).to_repl(), r#""\0""#);
446    assert_eq!(LispObject::UnibyteStr("\x1a".into()).to_repl(), r#""\^Z""#);
447    assert_eq!(LispObject::UnibyteStr("\x20".into()).to_repl(), r#"" ""#);
448    assert_eq!(LispObject::UnibyteStr("\x7f".into()).to_repl(), r#""\d""#);
449    assert_eq!(LispObject::UnibyteStr(vec![0xff]).to_repl(), r#""\377""#);
450}