1use sim_codec::{DecodeBudget, Input, ReadCx};
2use sim_kernel::{
3 CodecId, Error, Expr, LocatedExprTree, NumberLiteral, Origin, Result, SourceId, Span, Symbol,
4};
5
6pub fn decode_clojure_edn_tree(
11 cx: &mut ReadCx<'_>,
12 source_id: impl Into<String>,
13 input: Input,
14) -> Result<LocatedExprTree> {
15 let source = input_text(cx.codec, input)?;
16 let mut budget = DecodeBudget::new(cx.limits);
17 budget.check_input_bytes(cx.codec, source.len())?;
18 let source_id = SourceId(source_id.into());
19 cx.cx.sources_mut().intern_text(source_id.clone(), &source);
20 let tree = parse_clojure_edn_source(cx.codec, source_id, &source, &mut budget)?;
21 budget.check_tokens(cx.codec, tree_size(&tree))?;
22 Ok(tree)
23}
24
25pub fn parse_clojure_edn_source(
30 codec: CodecId,
31 source_id: SourceId,
32 source: &str,
33 budget: &mut DecodeBudget,
34) -> Result<LocatedExprTree> {
35 let mut parser = Parser {
36 codec,
37 source_id,
38 source,
39 bytes: source.as_bytes(),
40 index: 0,
41 budget,
42 };
43 let tree = parser.read_expr(0)?;
44 parser.skip_ws_commas_and_comments();
45 if !parser.is_eof() {
46 return parser.err("expected exactly one top-level EDN value");
47 }
48 Ok(tree)
49}
50
51struct Parser<'a, 'b> {
52 codec: CodecId,
53 source_id: SourceId,
54 source: &'a str,
55 bytes: &'a [u8],
56 index: usize,
57 budget: &'b mut DecodeBudget,
58}
59
60impl Parser<'_, '_> {
61 fn read_expr(&mut self, depth: usize) -> Result<LocatedExprTree> {
62 self.skip_ws_commas_and_comments();
63 self.budget.enter_node(self.codec, depth)?;
64 let start = self.index;
65 let Some(byte) = self.peek() else {
66 return self.err("expected EDN value");
67 };
68 match byte {
69 b'(' => self.read_sequence(depth, start, b')', Expr::List),
70 b'[' => self.read_sequence(depth, start, b']', Expr::Vector),
71 b'{' => self.read_map(depth, start),
72 b'"' => self.read_string(start),
73 b'#' => self.read_dispatch(depth, start),
74 b')' | b']' | b'}' => self.err("unexpected EDN delimiter"),
75 _ => self.read_atom(start),
76 }
77 }
78
79 fn read_sequence(
80 &mut self,
81 depth: usize,
82 start: usize,
83 close: u8,
84 make_expr: fn(Vec<Expr>) -> Expr,
85 ) -> Result<LocatedExprTree> {
86 self.index += 1;
87 let mut children = Vec::new();
88 loop {
89 self.skip_ws_commas_and_comments();
90 match self.peek() {
91 Some(byte) if byte == close => {
92 self.index += 1;
93 break;
94 }
95 Some(_) => children.push(self.read_expr(depth + 1)?),
96 None => return self.err("unterminated EDN sequence"),
97 }
98 }
99 self.budget
100 .check_collection_len(self.codec, children.len())?;
101 let expr = make_expr(children.iter().map(|child| child.expr.clone()).collect());
102 Ok(self.tree(expr, start, self.index, children))
103 }
104
105 fn read_map(&mut self, depth: usize, start: usize) -> Result<LocatedExprTree> {
106 self.index += 1;
107 let mut children = Vec::new();
108 let mut entries = Vec::new();
109 loop {
110 self.skip_ws_commas_and_comments();
111 match self.peek() {
112 Some(b'}') => {
113 self.index += 1;
114 break;
115 }
116 Some(_) => {
117 let key = self.read_expr(depth + 1)?;
118 self.skip_ws_commas_and_comments();
119 if self.peek() == Some(b'}') || self.is_eof() {
120 return self.err("EDN map expects an even number of forms");
121 }
122 let value = self.read_expr(depth + 1)?;
123 entries.push((key.expr.clone(), value.expr.clone()));
124 children.push(key);
125 children.push(value);
126 }
127 None => return self.err("unterminated EDN map"),
128 }
129 }
130 self.budget
131 .check_collection_len(self.codec, entries.len())?;
132 Ok(self.tree(Expr::Map(entries), start, self.index, children))
133 }
134
135 fn read_dispatch(&mut self, depth: usize, start: usize) -> Result<LocatedExprTree> {
136 self.index += 1;
137 match self.peek() {
138 Some(b'{') => self.read_set(depth, start),
139 _ => self.err("unsupported EDN dispatch token"),
140 }
141 }
142
143 fn read_set(&mut self, depth: usize, start: usize) -> Result<LocatedExprTree> {
144 self.index += 1;
145 let mut children = Vec::new();
146 loop {
147 self.skip_ws_commas_and_comments();
148 match self.peek() {
149 Some(b'}') => {
150 self.index += 1;
151 break;
152 }
153 Some(_) => children.push(self.read_expr(depth + 1)?),
154 None => return self.err("unterminated EDN set"),
155 }
156 }
157 self.budget
158 .check_collection_len(self.codec, children.len())?;
159 let expr = Expr::Set(children.iter().map(|child| child.expr.clone()).collect());
160 Ok(self.tree(expr, start, self.index, children))
161 }
162
163 fn read_string(&mut self, start: usize) -> Result<LocatedExprTree> {
164 self.index += 1;
165 let mut out = String::new();
166 while let Some(byte) = self.peek() {
167 self.index += 1;
168 match byte {
169 b'"' => {
170 self.budget.check_string_bytes(self.codec, out.len())?;
171 return Ok(self.tree(Expr::String(out), start, self.index, Vec::new()));
172 }
173 b'\\' => out.push(self.read_escape()?),
174 other => out.push(other as char),
175 }
176 }
177 self.err("unterminated EDN string")
178 }
179
180 fn read_escape(&mut self) -> Result<char> {
181 let Some(escaped) = self.peek() else {
182 return self.err("unterminated EDN string escape");
183 };
184 self.index += 1;
185 Ok(match escaped {
186 b'n' => '\n',
187 b'r' => '\r',
188 b't' => '\t',
189 b'"' => '"',
190 b'\\' => '\\',
191 other => other as char,
192 })
193 }
194
195 fn read_atom(&mut self, start: usize) -> Result<LocatedExprTree> {
196 let atom = self.take_atom();
197 if atom.is_empty() {
198 return self.err("expected EDN atom");
199 }
200 let expr = match atom.as_str() {
201 "nil" => Expr::Nil,
202 "true" => Expr::Bool(true),
203 "false" => Expr::Bool(false),
204 _ => number_literal(&atom)
205 .map(Expr::Number)
206 .unwrap_or_else(|| Expr::Symbol(symbol_atom(&atom))),
207 };
208 Ok(self.tree(expr, start, self.index, Vec::new()))
209 }
210
211 fn take_atom(&mut self) -> String {
212 let start = self.index;
213 while let Some(byte) = self.peek() {
214 if byte.is_ascii_whitespace()
215 || matches!(
216 byte,
217 b',' | b'(' | b')' | b'[' | b']' | b'{' | b'}' | b'"' | b';'
218 )
219 {
220 break;
221 }
222 self.index += 1;
223 }
224 self.source[start..self.index].to_owned()
225 }
226
227 fn skip_ws_commas_and_comments(&mut self) {
228 loop {
229 while self
230 .peek()
231 .is_some_and(|byte| byte.is_ascii_whitespace() || byte == b',')
232 {
233 self.index += 1;
234 }
235 if self.peek() != Some(b';') {
236 return;
237 }
238 while let Some(byte) = self.peek() {
239 self.index += 1;
240 if byte == b'\n' {
241 break;
242 }
243 }
244 }
245 }
246
247 fn tree(
248 &self,
249 expr: Expr,
250 start: usize,
251 end: usize,
252 children: Vec<LocatedExprTree>,
253 ) -> LocatedExprTree {
254 LocatedExprTree {
255 expr,
256 origin: Some(Origin {
257 codec: self.codec,
258 source: self.source_id.clone(),
259 span: Span { start, end },
260 trivia: Vec::new(),
261 }),
262 children,
263 }
264 }
265
266 fn peek(&self) -> Option<u8> {
267 self.bytes.get(self.index).copied()
268 }
269
270 fn is_eof(&self) -> bool {
271 self.index >= self.bytes.len()
272 }
273
274 fn err<T>(&self, message: impl Into<String>) -> Result<T> {
275 Err(Error::CodecError {
276 codec: self.codec,
277 message: message.into(),
278 })
279 }
280}
281
282fn symbol_atom(atom: &str) -> Symbol {
283 if let Some(keyword) = atom.strip_prefix(':') {
284 return Symbol::qualified("keyword", keyword.to_owned());
285 }
286 if let Some((namespace, name)) = atom.split_once('/') {
287 return Symbol::qualified(namespace.to_owned(), name.to_owned());
288 }
289 Symbol::new(atom.to_owned())
290}
291
292fn number_literal(raw: &str) -> Option<NumberLiteral> {
293 let is_integer = raw
294 .strip_prefix(['+', '-'])
295 .unwrap_or(raw)
296 .chars()
297 .all(|ch| ch.is_ascii_digit());
298 if !is_integer || raw == "+" || raw == "-" {
299 return None;
300 }
301 Some(NumberLiteral {
302 domain: Symbol::qualified("numbers", "i64"),
303 canonical: raw.to_owned(),
304 })
305}
306
307fn tree_size(tree: &LocatedExprTree) -> usize {
308 1 + tree.children.iter().map(tree_size).sum::<usize>()
309}
310
311fn input_text(codec: CodecId, input: Input) -> Result<String> {
312 match input {
313 Input::Text(text) => Ok(text),
314 Input::Bytes(bytes) => String::from_utf8(bytes).map_err(|err| Error::CodecError {
315 codec,
316 message: format!("EDN input is not valid UTF-8: {err}"),
317 }),
318 }
319}