1use sim_codec::{DecodeBudget, Input, ReadCx};
2use sim_kernel::{
3 CodecId, Error, Expr, LocatedExprTree, NumberLiteral, Origin, Result, SourceId, Span, Symbol,
4};
5
6pub fn decode_cl_lite_tree(
11 cx: &mut ReadCx<'_>,
12 source_id: impl Into<String>,
13 input: Input,
14) -> Result<LocatedExprTree> {
15 let source = input_text(cx.codec, input)?;
16 let mut budget = DecodeBudget::new(cx.limits);
17 budget.check_input_bytes(cx.codec, source.len())?;
18 let source_id = SourceId(source_id.into());
19 cx.cx.sources_mut().intern_text(source_id.clone(), &source);
20 let tree = parse_cl_lite_source(cx.codec, source_id, &source, &mut budget)?;
21 budget.check_tokens(cx.codec, tree_size(&tree))?;
22 Ok(tree)
23}
24
25pub fn parse_cl_lite_source(
31 codec: CodecId,
32 source_id: SourceId,
33 source: &str,
34 budget: &mut DecodeBudget,
35) -> Result<LocatedExprTree> {
36 let mut parser = Parser {
37 codec,
38 source_id,
39 source,
40 bytes: source.as_bytes(),
41 index: 0,
42 budget,
43 };
44 let tree = parser.read_expr(0)?;
45 parser.skip_ws_and_comments();
46 if !parser.is_eof() {
47 return parser.err("expected exactly one top-level CL-lite expression");
48 }
49 Ok(tree)
50}
51
52struct Parser<'a, 'b> {
53 codec: CodecId,
54 source_id: SourceId,
55 source: &'a str,
56 bytes: &'a [u8],
57 index: usize,
58 budget: &'b mut DecodeBudget,
59}
60
61impl Parser<'_, '_> {
62 fn read_expr(&mut self, depth: usize) -> Result<LocatedExprTree> {
63 self.skip_ws_and_comments();
64 self.budget.enter_node(self.codec, depth)?;
65 let start = self.index;
66 let Some(byte) = self.peek() else {
67 return self.err("expected CL-lite expression");
68 };
69 match byte {
70 b'(' => self.read_list(depth, start),
71 b')' => self.err("unexpected close parenthesis"),
72 b'\'' => self.read_quote(depth, start),
73 b'"' => self.read_string(start),
74 _ => self.read_atom(start),
75 }
76 }
77
78 fn read_list(&mut self, depth: usize, start: usize) -> Result<LocatedExprTree> {
79 self.index += 1;
80 let mut children = Vec::new();
81 loop {
82 self.skip_ws_and_comments();
83 match self.peek() {
84 Some(b')') => {
85 self.index += 1;
86 break;
87 }
88 Some(_) => children.push(self.read_expr(depth + 1)?),
89 None => return self.err("unterminated CL-lite list"),
90 }
91 }
92 self.budget
93 .check_collection_len(self.codec, children.len())?;
94 let expr = Expr::List(children.iter().map(|child| child.expr.clone()).collect());
95 Ok(self.tree(expr, start, self.index, children))
96 }
97
98 fn read_quote(&mut self, depth: usize, start: usize) -> Result<LocatedExprTree> {
99 self.index += 1;
100 let quoted = self.read_expr(depth + 1)?;
101 let quote = self.tree(
102 Expr::Symbol(Symbol::new("quote")),
103 start,
104 start + 1,
105 Vec::new(),
106 );
107 let end = quoted
108 .origin
109 .as_ref()
110 .map(|origin| origin.span.end)
111 .unwrap_or(self.index);
112 Ok(self.tree(
113 Expr::List(vec![quote.expr.clone(), quoted.expr.clone()]),
114 start,
115 end,
116 vec![quote, quoted],
117 ))
118 }
119
120 fn read_string(&mut self, start: usize) -> Result<LocatedExprTree> {
121 self.index += 1;
122 let mut out = String::new();
123 while let Some(byte) = self.peek() {
124 self.index += 1;
125 match byte {
126 b'"' => {
127 self.budget.check_string_bytes(self.codec, out.len())?;
128 return Ok(self.tree(Expr::String(out), start, self.index, Vec::new()));
129 }
130 b'\\' => out.push(self.read_escape()?),
131 other => out.push(other as char),
132 }
133 }
134 self.err("unterminated CL-lite string")
135 }
136
137 fn read_escape(&mut self) -> Result<char> {
138 let Some(escaped) = self.peek() else {
139 return self.err("unterminated CL-lite string escape");
140 };
141 self.index += 1;
142 Ok(match escaped {
143 b'n' => '\n',
144 b'r' => '\r',
145 b't' => '\t',
146 b'"' => '"',
147 b'\\' => '\\',
148 other => other as char,
149 })
150 }
151
152 fn read_atom(&mut self, start: usize) -> Result<LocatedExprTree> {
153 let atom = self.take_atom();
154 if atom.is_empty() {
155 return self.err("expected CL-lite atom");
156 }
157 let expr = match atom.as_str() {
158 "nil" | "NIL" => Expr::Nil,
159 "t" | "T" => Expr::Bool(true),
160 _ => number_literal(&atom)
161 .map(Expr::Number)
162 .unwrap_or_else(|| Expr::Symbol(symbol_atom(&atom))),
163 };
164 Ok(self.tree(expr, start, self.index, Vec::new()))
165 }
166
167 fn take_atom(&mut self) -> String {
168 let start = self.index;
169 while let Some(byte) = self.peek() {
170 if byte.is_ascii_whitespace() || matches!(byte, b'(' | b')' | b'"' | b';') {
171 break;
172 }
173 self.index += 1;
174 }
175 self.source[start..self.index].to_owned()
176 }
177
178 fn skip_ws_and_comments(&mut self) {
179 loop {
180 while self.peek().is_some_and(|byte| byte.is_ascii_whitespace()) {
181 self.index += 1;
182 }
183 if self.peek() != Some(b';') {
184 return;
185 }
186 while let Some(byte) = self.peek() {
187 self.index += 1;
188 if byte == b'\n' {
189 break;
190 }
191 }
192 }
193 }
194
195 fn tree(
196 &self,
197 expr: Expr,
198 start: usize,
199 end: usize,
200 children: Vec<LocatedExprTree>,
201 ) -> LocatedExprTree {
202 LocatedExprTree {
203 expr,
204 origin: Some(Origin {
205 codec: self.codec,
206 source: self.source_id.clone(),
207 span: Span { start, end },
208 trivia: Vec::new(),
209 }),
210 children,
211 }
212 }
213
214 fn peek(&self) -> Option<u8> {
215 self.bytes.get(self.index).copied()
216 }
217
218 fn is_eof(&self) -> bool {
219 self.index >= self.bytes.len()
220 }
221
222 fn err<T>(&self, message: impl Into<String>) -> Result<T> {
223 Err(Error::CodecError {
224 codec: self.codec,
225 message: message.into(),
226 })
227 }
228}
229
230fn symbol_atom(atom: &str) -> Symbol {
231 if let Some(keyword) = atom.strip_prefix(':') {
232 return Symbol::qualified("keyword", keyword.to_owned());
233 }
234 if let Some((package, name)) = atom.split_once("::") {
235 return Symbol::qualified(package.to_owned(), name.to_owned());
236 }
237 Symbol::new(atom.to_owned())
238}
239
240fn number_literal(raw: &str) -> Option<NumberLiteral> {
241 let is_integer = raw
242 .strip_prefix(['+', '-'])
243 .unwrap_or(raw)
244 .chars()
245 .all(|ch| ch.is_ascii_digit());
246 if !is_integer || raw == "+" || raw == "-" {
247 return None;
248 }
249 Some(NumberLiteral {
250 domain: Symbol::qualified("numbers", "i64"),
251 canonical: raw.to_owned(),
252 })
253}
254
255fn tree_size(tree: &LocatedExprTree) -> usize {
256 1 + tree.children.iter().map(tree_size).sum::<usize>()
257}
258
259fn input_text(codec: CodecId, input: Input) -> Result<String> {
260 match input {
261 Input::Text(text) => Ok(text),
262 Input::Bytes(bytes) => String::from_utf8(bytes).map_err(|err| Error::CodecError {
263 codec,
264 message: format!("CL-lite input is not valid UTF-8: {err}"),
265 }),
266 }
267}