Skip to main content

sim_lib_lang_cl/
reader.rs

1use sim_codec::{DecodeBudget, Input, ReadCx};
2use sim_kernel::{
3    CodecId, Error, Expr, LocatedExprTree, NumberLiteral, Origin, Result, SourceId, Span, Symbol,
4};
5
6/// Decodes CL-lite surface text into a located `Expr` tree under codec budgets.
7///
8/// Interns the source, enforces the decode limits, and returns the single
9/// top-level form as a [`LocatedExprTree`].
10pub fn decode_cl_lite_tree(
11    cx: &mut ReadCx<'_>,
12    source_id: impl Into<String>,
13    input: Input,
14) -> Result<LocatedExprTree> {
15    let source = input_text(cx.codec, input)?;
16    let mut budget = DecodeBudget::new(cx.limits);
17    budget.check_input_bytes(cx.codec, source.len())?;
18    let source_id = SourceId(source_id.into());
19    cx.cx.sources_mut().intern_text(source_id.clone(), &source);
20    let tree = parse_cl_lite_source(cx.codec, source_id, &source, &mut budget)?;
21    budget.check_tokens(cx.codec, tree_size(&tree))?;
22    Ok(tree)
23}
24
25/// Parses one top-level CL-lite form from source text into a located tree.
26///
27/// Lower-level entry point behind [`decode_cl_lite_tree`]; the caller supplies
28/// the codec id, interned source id, and a [`DecodeBudget`]. Errors if the input
29/// holds more than one top-level expression.
30pub fn parse_cl_lite_source(
31    codec: CodecId,
32    source_id: SourceId,
33    source: &str,
34    budget: &mut DecodeBudget,
35) -> Result<LocatedExprTree> {
36    let mut parser = Parser {
37        codec,
38        source_id,
39        source,
40        bytes: source.as_bytes(),
41        index: 0,
42        budget,
43    };
44    let tree = parser.read_expr(0)?;
45    parser.skip_ws_and_comments();
46    if !parser.is_eof() {
47        return parser.err("expected exactly one top-level CL-lite expression");
48    }
49    Ok(tree)
50}
51
52struct Parser<'a, 'b> {
53    codec: CodecId,
54    source_id: SourceId,
55    source: &'a str,
56    bytes: &'a [u8],
57    index: usize,
58    budget: &'b mut DecodeBudget,
59}
60
61impl Parser<'_, '_> {
62    fn read_expr(&mut self, depth: usize) -> Result<LocatedExprTree> {
63        self.skip_ws_and_comments();
64        self.budget.enter_node(self.codec, depth)?;
65        let start = self.index;
66        let Some(byte) = self.peek() else {
67            return self.err("expected CL-lite expression");
68        };
69        match byte {
70            b'(' => self.read_list(depth, start),
71            b')' => self.err("unexpected close parenthesis"),
72            b'\'' => self.read_quote(depth, start),
73            b'"' => self.read_string(start),
74            _ => self.read_atom(start),
75        }
76    }
77
78    fn read_list(&mut self, depth: usize, start: usize) -> Result<LocatedExprTree> {
79        self.index += 1;
80        let mut children = Vec::new();
81        loop {
82            self.skip_ws_and_comments();
83            match self.peek() {
84                Some(b')') => {
85                    self.index += 1;
86                    break;
87                }
88                Some(_) => children.push(self.read_expr(depth + 1)?),
89                None => return self.err("unterminated CL-lite list"),
90            }
91        }
92        self.budget
93            .check_collection_len(self.codec, children.len())?;
94        let expr = Expr::List(children.iter().map(|child| child.expr.clone()).collect());
95        Ok(self.tree(expr, start, self.index, children))
96    }
97
98    fn read_quote(&mut self, depth: usize, start: usize) -> Result<LocatedExprTree> {
99        self.index += 1;
100        let quoted = self.read_expr(depth + 1)?;
101        let quote = self.tree(
102            Expr::Symbol(Symbol::new("quote")),
103            start,
104            start + 1,
105            Vec::new(),
106        );
107        let end = quoted
108            .origin
109            .as_ref()
110            .map(|origin| origin.span.end)
111            .unwrap_or(self.index);
112        Ok(self.tree(
113            Expr::List(vec![quote.expr.clone(), quoted.expr.clone()]),
114            start,
115            end,
116            vec![quote, quoted],
117        ))
118    }
119
120    fn read_string(&mut self, start: usize) -> Result<LocatedExprTree> {
121        self.index += 1;
122        let mut out = String::new();
123        while let Some(byte) = self.peek() {
124            self.index += 1;
125            match byte {
126                b'"' => {
127                    self.budget.check_string_bytes(self.codec, out.len())?;
128                    return Ok(self.tree(Expr::String(out), start, self.index, Vec::new()));
129                }
130                b'\\' => out.push(self.read_escape()?),
131                other => out.push(other as char),
132            }
133        }
134        self.err("unterminated CL-lite string")
135    }
136
137    fn read_escape(&mut self) -> Result<char> {
138        let Some(escaped) = self.peek() else {
139            return self.err("unterminated CL-lite string escape");
140        };
141        self.index += 1;
142        Ok(match escaped {
143            b'n' => '\n',
144            b'r' => '\r',
145            b't' => '\t',
146            b'"' => '"',
147            b'\\' => '\\',
148            other => other as char,
149        })
150    }
151
152    fn read_atom(&mut self, start: usize) -> Result<LocatedExprTree> {
153        let atom = self.take_atom();
154        if atom.is_empty() {
155            return self.err("expected CL-lite atom");
156        }
157        let expr = match atom.as_str() {
158            "nil" | "NIL" => Expr::Nil,
159            "t" | "T" => Expr::Bool(true),
160            _ => number_literal(&atom)
161                .map(Expr::Number)
162                .unwrap_or_else(|| Expr::Symbol(symbol_atom(&atom))),
163        };
164        Ok(self.tree(expr, start, self.index, Vec::new()))
165    }
166
167    fn take_atom(&mut self) -> String {
168        let start = self.index;
169        while let Some(byte) = self.peek() {
170            if byte.is_ascii_whitespace() || matches!(byte, b'(' | b')' | b'"' | b';') {
171                break;
172            }
173            self.index += 1;
174        }
175        self.source[start..self.index].to_owned()
176    }
177
178    fn skip_ws_and_comments(&mut self) {
179        loop {
180            while self.peek().is_some_and(|byte| byte.is_ascii_whitespace()) {
181                self.index += 1;
182            }
183            if self.peek() != Some(b';') {
184                return;
185            }
186            while let Some(byte) = self.peek() {
187                self.index += 1;
188                if byte == b'\n' {
189                    break;
190                }
191            }
192        }
193    }
194
195    fn tree(
196        &self,
197        expr: Expr,
198        start: usize,
199        end: usize,
200        children: Vec<LocatedExprTree>,
201    ) -> LocatedExprTree {
202        LocatedExprTree {
203            expr,
204            origin: Some(Origin {
205                codec: self.codec,
206                source: self.source_id.clone(),
207                span: Span { start, end },
208                trivia: Vec::new(),
209            }),
210            children,
211        }
212    }
213
214    fn peek(&self) -> Option<u8> {
215        self.bytes.get(self.index).copied()
216    }
217
218    fn is_eof(&self) -> bool {
219        self.index >= self.bytes.len()
220    }
221
222    fn err<T>(&self, message: impl Into<String>) -> Result<T> {
223        Err(Error::CodecError {
224            codec: self.codec,
225            message: message.into(),
226        })
227    }
228}
229
230fn symbol_atom(atom: &str) -> Symbol {
231    if let Some(keyword) = atom.strip_prefix(':') {
232        return Symbol::qualified("keyword", keyword.to_owned());
233    }
234    if let Some((package, name)) = atom.split_once("::") {
235        return Symbol::qualified(package.to_owned(), name.to_owned());
236    }
237    Symbol::new(atom.to_owned())
238}
239
240fn number_literal(raw: &str) -> Option<NumberLiteral> {
241    let is_integer = raw
242        .strip_prefix(['+', '-'])
243        .unwrap_or(raw)
244        .chars()
245        .all(|ch| ch.is_ascii_digit());
246    if !is_integer || raw == "+" || raw == "-" {
247        return None;
248    }
249    Some(NumberLiteral {
250        domain: Symbol::qualified("numbers", "i64"),
251        canonical: raw.to_owned(),
252    })
253}
254
255fn tree_size(tree: &LocatedExprTree) -> usize {
256    1 + tree.children.iter().map(tree_size).sum::<usize>()
257}
258
259fn input_text(codec: CodecId, input: Input) -> Result<String> {
260    match input {
261        Input::Text(text) => Ok(text),
262        Input::Bytes(bytes) => String::from_utf8(bytes).map_err(|err| Error::CodecError {
263            codec,
264            message: format!("CL-lite input is not valid UTF-8: {err}"),
265        }),
266    }
267}