1use std::collections::BTreeMap;
2use std::str::FromStr;
3
4use anyhow::{Result, bail};
5use serde_json as json;
6use smallvec::smallvec;
7
8
9#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
10pub enum LispObject {
11 Symbol(String),
12 Keyword(String),
13 UnibyteStr(Vec<u8>),
14 Str(String),
15 Int(i64),
16 Float(String), Nil,
18 T,
19 Vector(Vec<LispObject>),
20}
21
22impl FromStr for LispObject {
23 type Err = anyhow::Error;
24
25 fn from_str(s: &str) -> Result<Self> {
26 if s == "nil" {
27 Ok(Self::Nil)
28 } else if s == "t" {
29 Ok(Self::T)
30 } else if s.starts_with(":") {
31 Ok(Self::Keyword(s[1..].to_string()))
32 } else {
33 bail!("Supported LispObject: {}", s)
34 }
35 }
36}
37
38impl LispObject {
39 fn to_repl(&self) -> String {
40 match self {
41 LispObject::Symbol(s) => s.clone(),
42 LispObject::Keyword(s) => format!(":{}", s),
43 LispObject::Str(s) => {
44 let mut result = String::new();
45 result.reserve(s.len() * 2 + 2);
46 result.push('"');
47 for c in s.chars() {
48 if c == '\"' || c == '\\' {
49 result.push('\\');
50 result.push(c);
51 } else if (c as u32) < 32 || (c as u32) == 127 {
52 result += &format!("\\{:03o}", c as u32);
55 } else {
56 result.push(c);
57 }
58 }
59 result.push('"');
60 result
61 },
62 LispObject::UnibyteStr(vec) => {
63 let mut result = String::new();
64 result.reserve(vec.len() * 4 + 2);
65 result.push('"');
66 let mut last_oct_escape_not_full = false;
67 for c in vec {
68 let mut oct_escape_not_full = false;
69 match *c {
70 7 => result += "\\a",
71 8 => result += "\\b",
72 9 => result += "\\t",
73 10 => result += "\\n",
74 11 => result += "\\v",
75 12 => result += "\\f",
76 13 => result += "\\r",
77 127 => result += "\\d",
78 27 => result += "\\e",
79 8..=26 => { result += &format!("\\^{}", (*c as u32 + 64) as u8 as char);
82 },
83 0..=7 | 27..=31 | 128..=255 | 34 | 92 => { let oct_s = format!("\\{:o}", *c as u32);
85 if oct_s.len() < 4 {
86 oct_escape_not_full = true;
87 }
88 result += &oct_s;
89 },
90 _ => { if last_oct_escape_not_full && ('0'..='7').contains(&(*c as char)) {
93 result += "\\ ";
94 }
95 result.push(*c as char);
96 },
97 }
98 last_oct_escape_not_full = oct_escape_not_full;
99 }
100 result.push('"');
101 result
102 },
103 LispObject::Int(i) => i.to_string(),
104 LispObject::Float(s) => s.clone(),
105 LispObject::Nil => "nil".into(),
106 LispObject::T => "t".into(),
107 LispObject::Vector(v) =>
108 format!("[{}]", v.iter().map(|x| x.to_repl()).collect::<Vec<_>>().join(" "))
109 }
110 }
111}
112
113const CV_TWO_LEVEL_VECTOR_SIZE: u32 = 1000;
120const CV_NORMAL_SLOT_COUNT: u32 = (1 << 16) - CV_TWO_LEVEL_VECTOR_SIZE * 2;
121const CV_TWO_LEVEL_IDX_BEGIN: u32 = CV_NORMAL_SLOT_COUNT;
122const CV_TWO_LEVEL_DATA_BEGIN: u32 = CV_NORMAL_SLOT_COUNT + CV_TWO_LEVEL_VECTOR_SIZE;
123
124#[allow(dead_code)]
125enum Op {
126 PushConstant(u32), Call(u16),
128 StackRef(u16),
129 List(u8),
130 Discard,
131 ASet,
132 Add1,
133 Cons,
134 Return,
135}
136
137impl Op {
138 fn get_stack_delta(&self) -> i32 {
139 match self {
140 &Self::PushConstant(_) => 1,
141 &Self::Call(n) => -(n as i32 + 1) + 1,
142 &Self::StackRef(_) => 1,
143 &Self::List(n) => -(n as i32) + 1,
144 &Self::Discard => -1,
145 &Self::ASet => -3 + 1,
146 &Self::Add1 => 0,
147 &Self::Cons => -2 + 1,
148 &Self::Return => -1,
149 }
150 }
151
152 fn to_code(&self) -> Result<smallvec::SmallVec<[u8; 3]>> {
153 match self {
154 &Self::PushConstant(v) if v < 64 =>
155 Ok(smallvec![(192 + v) as u8]),
156 &Self::PushConstant(v) if v < CV_NORMAL_SLOT_COUNT =>
157 Ok(smallvec![129, (v & 0xff) as u8, (v >> 8) as u8]),
158 &Self::PushConstant(v) if v < (CV_NORMAL_SLOT_COUNT + CV_TWO_LEVEL_VECTOR_SIZE * CV_TWO_LEVEL_VECTOR_SIZE) => {
159 let mut result = smallvec![];
160 let two_level_i = (v - CV_NORMAL_SLOT_COUNT) / CV_TWO_LEVEL_VECTOR_SIZE;
161 let two_level_j = (v - CV_NORMAL_SLOT_COUNT) % CV_TWO_LEVEL_VECTOR_SIZE;
162
163 let index_for_i = two_level_i + CV_TWO_LEVEL_DATA_BEGIN;
165 result.extend_from_slice(&[129, (index_for_i & 0xff) as u8, (index_for_i >> 8) as u8]);
166 let index_for_j = two_level_j + CV_TWO_LEVEL_IDX_BEGIN;
168 result.extend_from_slice(&[129, (index_for_j & 0xff) as u8, (index_for_j >> 8) as u8]);
169 result.push(72);
171
172 Ok(result)
173 },
174 &Self::PushConstant(v) =>
175 bail!("Too many constants! {}", v),
176 &Self::Call(v) if v <= 5 =>
177 Ok(smallvec![(32 + v) as u8]),
178 &Self::Call(v) if v < (1 << 8) =>
179 Ok(smallvec![(32 + 6) as u8, v as u8]),
180 &Self::Call(v) =>
181 Ok(smallvec![(32 + 7) as u8, (v & 0xff) as u8, (v >> 8) as u8]),
182 &Self::StackRef(v) if (1..=4).contains(&v) =>
183 Ok(smallvec![v as u8]),
184 &Self::StackRef(_) => unimplemented!(),
185 &Self::List(v) if v == 0 => unreachable!(),
186 &Self::List(v) if (1..=4).contains(&v) => Ok(smallvec![66 + v]),
187 &Self::List(v) => Ok(smallvec![175, v]),
188 &Self::Discard => Ok(smallvec![136]),
189 &Self::ASet => Ok(smallvec![73]),
190 &Self::Add1 => Ok(smallvec![84]),
191 &Self::Cons => Ok(smallvec![66]),
192 &Self::Return => Ok(smallvec![135]),
193 }
194 }
195}
196
197#[derive(Clone, Copy, Debug, PartialEq, Eq, clap::ValueEnum)]
198pub enum ObjectType {
199 Plist,
200 Hashtable,
201 Alist,
202}
203
204#[derive(Clone, Debug)]
205pub struct BytecodeOptions {
206 pub object_type: ObjectType,
207 pub null_value: LispObject,
209 pub false_value: LispObject,
210}
211
212impl Default for BytecodeOptions {
213 fn default() -> Self {
214 Self {
215 object_type: ObjectType::Plist,
216 null_value: LispObject::Nil,
217 false_value: LispObject::Nil,
218 }
219 }
220}
221
222struct BytecodeCompiler {
224 options: BytecodeOptions,
225
226 ops: Vec<Op>,
227 constants: BTreeMap<LispObject, (u32, u32)>, }
229
230impl BytecodeCompiler {
231 fn compile_constant_op(&mut self, obj: LispObject) {
232 let idx = {
233 if let Some(idx_and_count) = self.constants.get_mut(&obj) {
234 idx_and_count.1 += 1;
235 idx_and_count.0
236 } else {
237 let next_id = self.constants.len() as u32;
238 self.constants.insert(obj, (next_id, 1));
239 next_id
240 }
241 };
242 self.ops.push(Op::PushConstant(idx))
243 }
244
245 fn compile_value_array(&mut self, arr: &[json::Value]) {
246 if arr.is_empty() {
247 self.compile_constant_op(LispObject::Symbol("vector".into()));
248 self.ops.push(Op::Call(0));
249 return;
250 }
251
252 let chunks = arr.chunks((1 << 16) - 1);
253 let chunks_len = chunks.len();
254 assert!(chunks_len < (1 << 16));
255
256 if chunks_len > 1 {
257 self.compile_constant_op(LispObject::Symbol("vconcat".into()));
259 }
260
261 for chunk in chunks {
262 self.compile_constant_op(LispObject::Symbol("vector".into()));
263 for value in chunk {
264 self.compile_value(value);
265 }
266 self.ops.push(Op::Call(chunk.len() as u16));
267 }
268
269 if chunks_len > 1 {
270 self.ops.push(Op::Call(chunks_len as u16));
272 }
273 }
274
275 fn compile_value_map_plist_or_alist(&mut self, map: &json::Map<String, json::Value>, alist: bool) {
276 let list_len = if alist { map.len() } else { map.len() * 2 };
277 if list_len < (1 << 16) && list_len >= (1 << 8) {
279 self.compile_constant_op(LispObject::Symbol("list".into()));
280 }
281
282 for (key, value) in map {
283 if alist {
284 self.compile_constant_op(LispObject::Symbol(key.clone()));
285 self.compile_value(value);
286 self.ops.push(Op::Cons);
287 } else {
288 self.compile_constant_op(LispObject::Keyword(key.clone()));
289 self.compile_value(value);
290 }
291 }
292
293 if list_len == 0 {
295 self.compile_constant_op(LispObject::Nil);
296 } else if list_len < (1 << 8) {
297 self.ops.push(Op::List(list_len as u8));
298 } else if list_len < (1 << 16) {
299 self.ops.push(Op::Call(list_len as u16));
300 } else {
301 self.compile_constant_op(LispObject::Nil);
302 for _ in 0..list_len {
303 self.ops.push(Op::Cons);
304 }
305 }
306 }
307
308 fn compile_value_map_hashtable(&mut self, map: &json::Map<String, json::Value>) {
309 self.compile_constant_op(LispObject::Symbol("make-hash-table".into()));
310 self.compile_constant_op(LispObject::Keyword("test".into()));
311 self.compile_constant_op(LispObject::Symbol("equal".into()));
312 self.compile_constant_op(LispObject::Keyword("size".into()));
313 self.compile_constant_op(LispObject::Int(map.len() as i64));
314 self.ops.push(Op::Call(4));
315
316 for (key, value) in map {
317 self.compile_constant_op(LispObject::Symbol("puthash".into()));
318 self.compile_constant_op(LispObject::Str(key.clone()));
319 self.compile_value(value);
320 self.ops.push(Op::StackRef(3));
321 self.ops.push(Op::Call(3));
322 self.ops.push(Op::Discard);
323 }
324 }
325
326 fn compile_value(&mut self, value: &json::Value) {
327 match value {
328 &json::Value::Null => {
329 self.compile_constant_op(self.options.null_value.clone());
330 },
331 &json::Value::Bool(false) => {
332 self.compile_constant_op(self.options.false_value.clone());
333 },
334 &json::Value::Bool(true) => {
335 self.compile_constant_op(LispObject::T);
336 },
337 &json::Value::Number(ref num) => {
338 if num.is_f64() {
339 self.compile_constant_op(LispObject::Float(num.to_string()));
340 } else {
341 self.compile_constant_op(LispObject::Int(num.as_i64().unwrap()));
342 }
343 },
344 &json::Value::String(ref s) => {
345 self.compile_constant_op(LispObject::Str(s.clone()));
346 },
347 &json::Value::Array(ref arr) => {
348 self.compile_value_array(&arr);
349 },
350 &json::Value::Object(ref map) => {
351 match self.options.object_type {
352 ObjectType::Plist => self.compile_value_map_plist_or_alist(&map, false),
353 ObjectType::Alist => self.compile_value_map_plist_or_alist(&map, true),
354 ObjectType::Hashtable => self.compile_value_map_hashtable(&map),
355 };
356 },
357 }
358 }
359
360 fn compile(&mut self, value: &json::Value) {
361 self.compile_value(value);
362 self.ops.push(Op::Return);
363 }
364
365 fn into_bytecode(self) -> Result<(Vec<u8>, Vec<LispObject>, i32)> {
367 let mut constants_array = self.constants.into_iter().collect::<Vec<_>>();
369 constants_array.sort_by_key(
370 |(_, idx_and_count)| (-(idx_and_count.1 as i32), idx_and_count.0));
372 let index_remap = constants_array.iter().enumerate()
373 .map(|(new_idx, (_, idx_and_count))| (idx_and_count.0, new_idx as u32))
374 .collect::<BTreeMap<_, _>>();
375
376 let mut constants_array = constants_array.into_iter().map(|(obj, _)| obj).collect::<Vec<_>>();
377 let mut two_level_vectors: Vec<LispObject> = Vec::new();
379 while constants_array.len() > CV_NORMAL_SLOT_COUNT as usize {
381 let len = {
382 let remaining = (constants_array.len() - CV_NORMAL_SLOT_COUNT as usize) % CV_TWO_LEVEL_VECTOR_SIZE as usize;
383 if remaining == 0 {
384 CV_TWO_LEVEL_VECTOR_SIZE as usize
385 } else {
386 remaining
387 }
388 };
389 let v = constants_array
390 .splice((constants_array.len() - len)..constants_array.len(), Vec::new())
391 .collect();
392 two_level_vectors.push(LispObject::Vector(v));
393 }
394 two_level_vectors.reverse();
395
396 if !two_level_vectors.is_empty() {
397 debug_assert_eq!(constants_array.len(), CV_NORMAL_SLOT_COUNT as usize);
398 for i in 0..CV_TWO_LEVEL_VECTOR_SIZE {
399 constants_array.push(LispObject::Int(i as i64));
400 }
401 constants_array.extend(two_level_vectors);
402 }
403
404 let mut code: Vec<u8> = Vec::new();
405 let mut current_stack_size = 0;
406 let mut max_stack_size = 0;
407 for op in self.ops {
408 let op = if let Op::PushConstant(v) = op {
409 Op::PushConstant(index_remap[&v])
410 } else {
411 op
412 };
413 code.extend(op.to_code()?);
414 current_stack_size += op.get_stack_delta();
415 debug_assert!(current_stack_size >= 0);
416 max_stack_size = i32::max(current_stack_size, max_stack_size);
417 }
418
419 Ok((code, constants_array, max_stack_size + 8))
421 }
422
423 fn into_repl(self) -> Result<String> {
424 let (code, constants, max_stack_size) = self.into_bytecode()?;
425 Ok(format!("#[0 {} {} {}]",
426 LispObject::UnibyteStr(code).to_repl(),
427 LispObject::Vector(constants).to_repl(),
428 max_stack_size))
429 }
430}
431
432pub fn generate_bytecode_repl(value: &json::Value, options: BytecodeOptions) -> Result<String> {
433 let mut compiler = BytecodeCompiler {
434 options,
435 ops: Vec::new(),
436 constants: BTreeMap::new(),
437 };
438 compiler.compile(value);
439 compiler.into_repl()
440}
441
442
443#[test]
444fn test_string_repl() {
445 assert_eq!(LispObject::UnibyteStr("\x00".into()).to_repl(), r#""\0""#);
446 assert_eq!(LispObject::UnibyteStr("\x1a".into()).to_repl(), r#""\^Z""#);
447 assert_eq!(LispObject::UnibyteStr("\x20".into()).to_repl(), r#"" ""#);
448 assert_eq!(LispObject::UnibyteStr("\x7f".into()).to_repl(), r#""\d""#);
449 assert_eq!(LispObject::UnibyteStr(vec![0xff]).to_repl(), r#""\377""#);
450}