sonatina_parser/
parser.rs

1// TODO: Refactor and refactor and refactor!!!
2use std::collections::HashSet;
3
4use cranelift_entity::SecondaryMap;
5use smallvec::smallvec;
6
7use sonatina_ir::{
8    builder::ModuleBuilder,
9    func_cursor::{CursorLocation, FuncCursor},
10    global_variable::ConstantValue,
11    insn::{BinaryOp, CastOp, DataLocationKind, JumpOp, UnaryOp},
12    isa::IsaBuilder,
13    module::{FuncRef, ModuleCtx},
14    Block, BlockData, Function, GlobalVariableData, Immediate, Insn, InsnData, Linkage, Module,
15    Signature, Type, Value, ValueData, I256, U256,
16};
17use sonatina_triple::TargetTriple;
18
19use super::{
20    lexer::{Code, Lexer, Token, WithLoc},
21    Error, ErrorKind, Result,
22};
23
24#[derive(Default)]
25pub struct Parser {}
26
27macro_rules! eat_token {
28    ($lexer:expr, $token:pat) => {
29        if matches!($lexer.peek_token()?, Some(WithLoc { item: $token, .. })) {
30            Ok(Some($lexer.next_token()?.unwrap().item))
31        } else {
32            Ok(None)
33        }
34    };
35}
36
37macro_rules! expect_token {
38    ($lexer:expr, $token:pat, $expected:expr) => {
39        if let Some(tok) = eat_token!($lexer, $token)? {
40            Ok(tok)
41        } else {
42            let (tok, line) = match $lexer.next_token()? {
43                Some(tok) => ((tok.item.to_string(), tok.line)),
44                None => (("EOF".to_string(), $lexer.line())),
45            };
46            Err(Error::new(
47                ErrorKind::SyntaxError(format!("expected `{}`, but got `{}`", $expected, tok)),
48                line,
49            ))
50        }
51    };
52}
53
54impl Parser {
55    pub fn parse(self, input: &str) -> Result<ParsedModule> {
56        let mut lexer = Lexer::new(input);
57
58        // Parse comments.
59        let mut module_comments = Vec::new();
60        while let Some(WithLoc {
61            item: Token::ModuleComment(comment),
62            ..
63        }) = lexer.peek_token()?
64        {
65            module_comments.push(comment.to_string());
66            lexer.next_token()?;
67        }
68
69        // Parse target triple.
70        let triple = self.parse_target_triple(&mut lexer)?;
71        let isa = IsaBuilder::new(triple).build();
72        let ctx = ModuleCtx::new(isa);
73
74        let mut module_builder = ModuleBuilder::new(ctx);
75
76        // Parse declared struct types.
77        while eat_token!(lexer, Token::Type)?.is_some() {
78            let name = expect_token!(lexer, Token::Ident(_), "type name")?.string();
79            expect_token!(lexer, Token::Eq, "=")?;
80            let packed = eat_token!(lexer, Token::LAngleBracket)?.is_some();
81            expect_token!(lexer, Token::LBrace, "{")?;
82
83            let mut fields = vec![];
84            if eat_token!(lexer, Token::RBrace)?.is_none() {
85                loop {
86                    let ty = expect_ty(&module_builder.ctx, &mut lexer)?;
87                    fields.push(ty);
88                    if eat_token!(lexer, Token::RBrace)?.is_some() {
89                        break;
90                    }
91                    expect_token!(lexer, Token::Comma, ",")?;
92                }
93            }
94            if packed {
95                expect_token!(lexer, Token::RAngleBracket, ">")?;
96            }
97            expect_token!(lexer, Token::SemiColon, ";")?;
98
99            module_builder.declare_struct_type(name, &fields, packed);
100        }
101
102        // Parse global variables.
103        while eat_token!(lexer, Token::Gv)?.is_some() {
104            let linkage = expect_linkage(&mut lexer)?;
105            let is_const = eat_token!(lexer, Token::Const)?.is_some();
106            let symbol = expect_token!(lexer, Token::Ident(_), "global variable name")?.string();
107            expect_token!(lexer, Token::Colon, ":")?;
108            let ty = expect_ty(&module_builder.ctx, &mut lexer)?;
109
110            let init = eat_token!(lexer, Token::Eq)?
111                .map(|_| {
112                    let init = expect_constant(&module_builder.ctx, &mut lexer, ty)?;
113                    Ok(init)
114                })
115                .transpose()?;
116
117            expect_token!(lexer, Token::SemiColon, ";")?;
118            let gv_data = GlobalVariableData::new(symbol.to_string(), ty, linkage, is_const, init);
119            module_builder.make_global(gv_data);
120        }
121
122        // Parse declared functions.
123        while eat_token!(lexer, Token::Declare)?.is_some() {
124            let sig = self.parse_declared_func_sig(&module_builder.ctx, &mut lexer)?;
125            expect_token!(lexer, Token::SemiColon, ";")?;
126            module_builder.declare_function(sig);
127        }
128
129        // Parse functions.
130        let mut func_comments = SecondaryMap::default();
131        while let Some(parsed_func) = FuncParser::new(&mut lexer, &mut module_builder).parse()? {
132            let func_ref = parsed_func.func_ref;
133            func_comments[func_ref] = parsed_func.comments;
134        }
135
136        Ok(ParsedModule {
137            module: module_builder.build(),
138            module_comments,
139            func_comments,
140        })
141    }
142
143    fn parse_target_triple(&self, lexer: &mut Lexer) -> Result<TargetTriple> {
144        expect_token!(lexer, Token::Target, "target")?;
145        expect_token!(lexer, Token::Eq, "=")?;
146        let triple = expect_token!(lexer, Token::String(..), "target triple")?.string();
147
148        TargetTriple::parse(triple)
149            .map_err(|e| Error::new(ErrorKind::SemanticError(format!("{}", e)), lexer.line()))
150    }
151
152    fn parse_declared_func_sig(&self, ctx: &ModuleCtx, lexer: &mut Lexer) -> Result<Signature> {
153        let linkage = expect_linkage(lexer)?;
154        let name = expect_token!(lexer, Token::Ident(..), "func name")?.string();
155
156        // Parse argument types.
157        expect_token!(lexer, Token::LParen, "(")?;
158        let mut args = vec![];
159        if eat_token!(lexer, Token::RParen)?.is_none() {
160            let ty = expect_ty(ctx, lexer)?;
161            args.push(ty);
162            while eat_token!(lexer, Token::RParen)?.is_none() {
163                expect_token!(lexer, Token::Comma, ",")?;
164                let ty = expect_ty(ctx, lexer)?;
165                args.push(ty);
166            }
167        }
168
169        // Parse return type.
170        expect_token!(lexer, Token::RArrow, "->")?;
171        let ret_ty = expect_ty(ctx, lexer)?;
172
173        Ok(Signature::new(name, linkage, &args, ret_ty))
174    }
175}
176
177pub struct ParsedModule {
178    pub module: Module,
179    pub module_comments: Vec<String>,
180    pub func_comments: SecondaryMap<FuncRef, Vec<String>>,
181}
182
183struct ParsedFunction {
184    func_ref: FuncRef,
185    comments: Vec<String>,
186}
187
188struct FuncParser<'a, 'b> {
189    lexer: &'b mut Lexer<'a>,
190    module_builder: &'b mut ModuleBuilder,
191}
192
193impl<'a, 'b> FuncParser<'a, 'b> {
194    fn new(lexer: &'b mut Lexer<'a>, module_builder: &'b mut ModuleBuilder) -> Self {
195        Self {
196            lexer,
197            module_builder,
198        }
199    }
200
201    fn parse(&mut self) -> Result<Option<ParsedFunction>> {
202        if self.lexer.peek_token()?.is_none() {
203            return Ok(None);
204        }
205
206        let comments = self.parse_comment()?;
207        expect_token!(self.lexer, Token::Func, "func")?;
208        let linkage = expect_linkage(self.lexer)?;
209
210        let fn_name = expect_token!(self.lexer, Token::Ident(..), "func name")?.string();
211
212        expect_token!(self.lexer, Token::LParen, "(")?;
213        // Use `Void` for dummy return type.
214        let sig = Signature::new(fn_name, linkage, &[], Type::Void);
215        let mut func = Function::new(&self.module_builder.ctx, sig);
216        let mut inserter = InsnInserter::new(&mut func);
217
218        if let Some(value) = eat_token!(self.lexer, Token::Value(..))? {
219            let value = Value(value.id());
220            inserter.def_value(value, self.lexer.line())?;
221            expect_token!(self.lexer, Token::Dot, "dot")?;
222            let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
223            inserter.append_arg_value(value, ty);
224
225            while eat_token!(self.lexer, Token::Comma)?.is_some() {
226                let value = Value(expect_token!(self.lexer, Token::Value(..), "value")?.id());
227                inserter.def_value(value, self.lexer.line())?;
228                expect_token!(self.lexer, Token::Dot, "dot")?;
229                let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
230                inserter.append_arg_value(value, ty);
231            }
232        }
233        expect_token!(self.lexer, Token::RParen, ")")?;
234
235        // Parse return type.
236        expect_token!(self.lexer, Token::RArrow, "->")?;
237        let ret_ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
238        inserter.func.sig.set_ret_ty(ret_ty);
239        expect_token!(self.lexer, Token::Colon, ":")?;
240
241        self.parse_body(&mut inserter)?;
242
243        let func_ref = self.module_builder.declare_function(func.sig.clone());
244        std::mem::swap(&mut self.module_builder.funcs[func_ref], &mut func);
245        Ok(Some(ParsedFunction { func_ref, comments }))
246    }
247
248    fn parse_body(&mut self, inserter: &mut InsnInserter) -> Result<()> {
249        while let Some(id) = eat_token!(self.lexer, Token::Block(..))? {
250            expect_token!(self.lexer, Token::Colon, ":")?;
251            self.parse_block_body(inserter, Block(id.id()))?;
252        }
253
254        Ok(())
255    }
256
257    fn parse_block_body(&mut self, inserter: &mut InsnInserter, block: Block) -> Result<()> {
258        inserter.def_block(block, self.lexer.line(), BlockData::default())?;
259        inserter.append_block(block);
260        inserter.set_loc(CursorLocation::BlockTop(block));
261
262        loop {
263            if let Some(value) = eat_token!(self.lexer, Token::Value(..))? {
264                expect_token!(self.lexer, Token::Dot, ".")?;
265                let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
266                expect_token!(self.lexer, Token::Eq, "=")?;
267                let opcode = expect_token!(self.lexer, Token::OpCode(..), "opcode")?.opcode();
268                let insn = opcode.make_insn(self, inserter, Some(ty))?;
269                let value = Value(value.id());
270                inserter.def_value(value, self.lexer.line())?;
271                let result = inserter.func.dfg.make_result(insn).unwrap();
272                inserter.func.dfg.values[value] = result;
273                inserter.func.dfg.attach_result(insn, value);
274            } else if let Some(opcode) = eat_token!(self.lexer, Token::OpCode(..))? {
275                opcode.opcode().make_insn(self, inserter, None)?;
276            } else {
277                break;
278            }
279        }
280
281        Ok(())
282    }
283
284    fn expect_insn_arg(
285        &mut self,
286        inserter: &mut InsnInserter,
287        idx: usize,
288        undefs: &mut Vec<usize>,
289    ) -> Result<Value> {
290        if let Some(value) = eat_token!(self.lexer, Token::Value(..))? {
291            let value = Value(value.id());
292            if !inserter.defined_values.contains(&value) {
293                undefs.push(idx);
294            }
295            Ok(value)
296        } else if let Some(ident) = eat_token!(self.lexer, Token::Ident(..))? {
297            let gv = inserter
298                .func()
299                .dfg
300                .ctx
301                .with_gv_store(|s| s.gv_by_symbol(ident.string()))
302                .unwrap();
303            Ok(inserter.func_mut().dfg.make_global_value(gv))
304        } else {
305            let number =
306                expect_token!(self.lexer, Token::Integer(..), "immediate or value")?.string();
307            expect_token!(self.lexer, Token::Dot, "type annotation for immediate")?;
308            let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
309            let imm = build_imm_data(number, &ty, self.lexer.line())?;
310            Ok(inserter.def_imm(imm))
311        }
312    }
313
314    fn expect_block(&mut self) -> Result<Block> {
315        let id = expect_token!(self.lexer, Token::Block(..), "block")?.id();
316        Ok(Block(id))
317    }
318
319    fn expect_data_loc_kind(&mut self) -> Result<DataLocationKind> {
320        let token = expect_token!(self.lexer, Token::DataLocationKind(..), "data location")?;
321
322        match token {
323            Token::DataLocationKind(loc) => Ok(loc),
324            _ => unreachable!(),
325        }
326    }
327
328    fn parse_comment(&mut self) -> Result<Vec<String>> {
329        let mut comments = Vec::new();
330        while let Some(line) = eat_token!(self.lexer, Token::FuncComment(..))? {
331            comments.push(line.string().to_string());
332        }
333        Ok(comments)
334    }
335}
336
337fn expect_ty(ctx: &ModuleCtx, lexer: &mut Lexer) -> Result<Type> {
338    if let Some(ty) = eat_token!(lexer, Token::BaseTy(..))?.map(|tok| tok.ty()) {
339        return Ok(ty);
340    };
341
342    if eat_token!(lexer, Token::LBracket)?.is_some() {
343        // Try parse array element type.
344        let elem_ty = expect_ty(ctx, lexer)?;
345        expect_token!(lexer, Token::SemiColon, ";")?;
346        // Try parse array length.
347        let len = expect_token!(lexer, Token::Integer(..), " or value")?
348            .string()
349            .parse()
350            .map_err(|err| Error::new(ErrorKind::SyntaxError(format!("{}", err)), lexer.line()))?;
351        expect_token!(lexer, Token::RBracket, "]")?;
352        Ok(ctx.with_ty_store_mut(|s| s.make_array(elem_ty, len)))
353    } else if eat_token!(lexer, Token::Star)?.is_some() {
354        // Try parse ptr base type.
355        let elem_ty = expect_ty(ctx, lexer)?;
356        Ok(ctx.with_ty_store_mut(|s| s.make_ptr(elem_ty)))
357    } else if let Some(tok) = eat_token!(lexer, Token::Ident(..))? {
358        let name = tok.string();
359        ctx.with_ty_store(|s| s.struct_type_by_name(name))
360            .ok_or_else(|| {
361                Error::new(
362                    ErrorKind::SemanticError(format!("type `{name}` is not declared")),
363                    lexer.line(),
364                )
365            })
366    } else {
367        Err(Error::new(
368            ErrorKind::SyntaxError("invalid type".into()),
369            lexer.line(),
370        ))
371    }
372}
373
374fn expect_linkage(lexer: &mut Lexer) -> Result<Linkage> {
375    let token = expect_token!(lexer, Token::Linkage { .. }, "linkage")?;
376    match token {
377        Token::Linkage(linkage) => Ok(linkage),
378        _ => unreachable!(),
379    }
380}
381
382fn expect_constant(ctx: &ModuleCtx, lexer: &mut Lexer, ty: Type) -> Result<ConstantValue> {
383    if let Some(number) = eat_token!(lexer, Token::Integer(..))? {
384        if !ty.is_integral() {
385            return Err(Error::new(
386                ErrorKind::SemanticError("expected integral type".to_string()),
387                lexer.line(),
388            ));
389        }
390
391        let data = build_imm_data(number.string(), &ty, lexer.line())?;
392        Ok(ConstantValue::Immediate(data))
393    } else if eat_token!(lexer, Token::LBracket)?.is_some() {
394        let (elem_ty, mut len) = ctx.with_ty_store(|s| s.array_def(ty)).ok_or_else(|| {
395            Error::new(
396                ErrorKind::SemanticError("expcted array type".into()),
397                lexer.line(),
398            )
399        })?;
400
401        let mut data = Vec::with_capacity(len);
402        while len > 0 {
403            let elem = expect_constant(ctx, lexer, elem_ty)?;
404            data.push(elem);
405            if len > 1 {
406                expect_token!(lexer, Token::Comma, ",")?;
407            }
408            len -= 1;
409        }
410
411        expect_token!(lexer, Token::RBracket, "]")?;
412        Ok(ConstantValue::Array(data))
413    } else if eat_token!(lexer, Token::LBrace)?.is_some() {
414        let fields = ctx
415            .with_ty_store(|s| s.struct_def(ty).map(|def| def.fields.clone()))
416            .ok_or_else(|| {
417                Error::new(
418                    ErrorKind::SemanticError("expected struct type".into()),
419                    lexer.line(),
420                )
421            })?;
422
423        let mut data = Vec::with_capacity(fields.len());
424        let field_len = fields.len();
425        for (i, field_ty) in fields.into_iter().enumerate() {
426            let field = expect_constant(ctx, lexer, field_ty)?;
427            data.push(field);
428            if i < field_len - 1 {
429                expect_token!(lexer, Token::Comma, ",")?;
430            }
431        }
432        expect_token!(lexer, Token::RBrace, "}")?;
433        Ok(ConstantValue::Struct(data))
434    } else {
435        Err(Error::new(
436            ErrorKind::SyntaxError("invalid constant".into()),
437            lexer.line(),
438        ))
439    }
440}
441
442struct InsnInserter<'a> {
443    func: &'a mut Function,
444    loc: CursorLocation,
445    defined_values: HashSet<Value>,
446    defined_blocks: HashSet<Block>,
447    defined_imms: HashSet<Value>,
448    undefs: HashSet<(Insn, usize)>,
449}
450
451impl<'a> InsnInserter<'a> {
452    fn new(func: &'a mut Function) -> Self {
453        Self {
454            func,
455            loc: CursorLocation::NoWhere,
456            defined_values: HashSet::new(),
457            defined_blocks: HashSet::new(),
458            defined_imms: HashSet::new(),
459            undefs: HashSet::new(),
460        }
461    }
462
463    fn def_value(&mut self, value: Value, line: u32) -> Result<()> {
464        if self.defined_values.contains(&value) {
465            return Err(Error::new(
466                ErrorKind::SemanticError(format!("v{} is already defined", value.0)),
467                line,
468            ));
469        }
470        self.defined_values.insert(value);
471
472        let value_len = self.func.dfg.values.len();
473        let value_id = value.0 as usize;
474
475        if value_len <= value_id {
476            self.func.dfg.values.reserve(value_id);
477            for _ in 0..(value_id - value_len + 1) {
478                // Make dummy value.
479                self.func.dfg.values.push(ValueData::Arg {
480                    ty: Type::I8,
481                    idx: usize::MAX,
482                });
483            }
484        }
485
486        if self.defined_imms.contains(&value) {
487            let imm_data = self.func.dfg.value_data(value).clone();
488            let new_imm_value = self.func.dfg.make_value(imm_data);
489            let mut must_replace = vec![];
490            for &user in self.func.dfg.users(value) {
491                for (idx, &arg) in self.func.dfg.insn_args(user).iter().enumerate() {
492                    if arg == value && !self.undefs.contains(&(user, idx)) {
493                        must_replace.push((user, idx));
494                    }
495                }
496            }
497
498            for (insn, idx) in must_replace {
499                self.func.dfg.replace_insn_arg(insn, new_imm_value, idx);
500            }
501
502            let imm = self.func.dfg.value_imm(new_imm_value).unwrap();
503            self.func.dfg.immediates.insert(imm, new_imm_value);
504            self.defined_imms.remove(&value);
505            self.defined_imms.insert(new_imm_value);
506        }
507
508        Ok(())
509    }
510
511    fn def_imm(&mut self, imm: Immediate) -> Value {
512        let value = self.func.dfg.make_imm_value(imm);
513        self.defined_imms.insert(value);
514        value
515    }
516
517    fn def_block(&mut self, block: Block, line: u32, block_data: BlockData) -> Result<()> {
518        if self.defined_blocks.contains(&block) {
519            return Err(Error::new(
520                ErrorKind::SemanticError(format!("block{} is already defined", block.0)),
521                line,
522            ));
523        }
524        self.defined_blocks.insert(block);
525
526        let block_id = block.0 as usize;
527        let block_len = self.func.dfg.blocks.len();
528
529        if block_len <= block_id {
530            self.func.dfg.blocks.reserve(block_id);
531            for _ in 0..(block_id - block_len + 1) {
532                // Make dummy block.
533                self.func.dfg.blocks.push(BlockData::default());
534            }
535        }
536
537        self.func.dfg.blocks[block] = block_data;
538        Ok(())
539    }
540
541    fn insert_insn_data(&mut self, insn_data: InsnData) -> Insn {
542        let insn = self.func.dfg.make_insn(insn_data);
543        self.insert_insn(insn);
544        self.set_loc(CursorLocation::At(insn));
545        insn
546    }
547
548    fn append_arg_value(&mut self, value: Value, ty: Type) {
549        let idx = self.func.arg_values.len();
550
551        let value_data = self.func.dfg.make_arg_value(ty, idx);
552        self.func.sig.append_arg(ty);
553        self.func.dfg.values[value] = value_data;
554        self.func.arg_values.push(value);
555    }
556}
557
558impl<'a> FuncCursor for InsnInserter<'a> {
559    fn set_loc(&mut self, loc: CursorLocation) {
560        self.loc = loc;
561    }
562
563    fn func(&self) -> &Function {
564        self.func
565    }
566
567    fn func_mut(&mut self) -> &mut Function {
568        self.func
569    }
570
571    fn loc(&self) -> CursorLocation {
572        self.loc
573    }
574}
575
576macro_rules! make_unary {
577    ($parser:ident, $inserter:ident, $code:path, $undefs:expr) => {{
578        let lhs = $parser.expect_insn_arg($inserter, 0, $undefs)?;
579        expect_token!($parser.lexer, Token::SemiColon, ";")?;
580        InsnData::Unary {
581            code: $code,
582            args: [lhs],
583        }
584    }};
585}
586
587macro_rules! make_binary {
588    ($parser:ident, $inserter:ident, $code:path, $undefs:expr) => {{
589        let lhs = $parser.expect_insn_arg($inserter, 0, $undefs)?;
590        let rhs = $parser.expect_insn_arg($inserter, 1, $undefs)?;
591        expect_token!($parser.lexer, Token::SemiColon, ";")?;
592        InsnData::Binary {
593            code: $code,
594            args: [lhs, rhs],
595        }
596    }};
597}
598
599macro_rules! make_cast {
600    ($parser:ident, $inserter:ident, $cast_to:expr, $code:path, $undefs:expr) => {{
601        let arg = $parser.expect_insn_arg($inserter, 0, $undefs)?;
602        expect_token!($parser.lexer, Token::SemiColon, ";")?;
603        InsnData::Cast {
604            code: $code,
605            args: [arg],
606            ty: $cast_to,
607        }
608    }};
609}
610
611macro_rules! make_jump {
612    ($parser:ident, $code:path) => {{
613        let dest = $parser.expect_block()?;
614        expect_token!($parser.lexer, Token::SemiColon, ";")?;
615        InsnData::Jump {
616            code: $code,
617            dests: [dest],
618        }
619    }};
620}
621
622impl Code {
623    /// Read args and create insn data.
624    fn make_insn(
625        self,
626        parser: &mut FuncParser,
627        inserter: &mut InsnInserter,
628        ret_ty: Option<Type>,
629    ) -> Result<Insn> {
630        let mut undefs = vec![];
631        let insn_data = match self {
632            Self::Not => make_unary!(parser, inserter, UnaryOp::Not, &mut undefs),
633            Self::Neg => make_unary!(parser, inserter, UnaryOp::Neg, &mut undefs),
634            Self::Add => make_binary!(parser, inserter, BinaryOp::Add, &mut undefs),
635            Self::Sub => make_binary!(parser, inserter, BinaryOp::Sub, &mut undefs),
636            Self::Mul => make_binary!(parser, inserter, BinaryOp::Mul, &mut undefs),
637            Self::Udiv => make_binary!(parser, inserter, BinaryOp::Udiv, &mut undefs),
638            Self::Sdiv => make_binary!(parser, inserter, BinaryOp::Sdiv, &mut undefs),
639            Self::Lt => make_binary!(parser, inserter, BinaryOp::Lt, &mut undefs),
640            Self::Gt => make_binary!(parser, inserter, BinaryOp::Gt, &mut undefs),
641            Self::Slt => make_binary!(parser, inserter, BinaryOp::Slt, &mut undefs),
642            Self::Sgt => make_binary!(parser, inserter, BinaryOp::Sgt, &mut undefs),
643            Self::Le => make_binary!(parser, inserter, BinaryOp::Le, &mut undefs),
644            Self::Ge => make_binary!(parser, inserter, BinaryOp::Ge, &mut undefs),
645            Self::Sle => make_binary!(parser, inserter, BinaryOp::Sle, &mut undefs),
646            Self::Sge => make_binary!(parser, inserter, BinaryOp::Sge, &mut undefs),
647            Self::Eq => make_binary!(parser, inserter, BinaryOp::Eq, &mut undefs),
648            Self::Ne => make_binary!(parser, inserter, BinaryOp::Ne, &mut undefs),
649            Self::And => make_binary!(parser, inserter, BinaryOp::And, &mut undefs),
650            Self::Or => make_binary!(parser, inserter, BinaryOp::Or, &mut undefs),
651            Self::Xor => make_binary!(parser, inserter, BinaryOp::Xor, &mut undefs),
652            Self::Sext => make_cast!(parser, inserter, ret_ty.unwrap(), CastOp::Sext, &mut undefs),
653            Self::Zext => make_cast!(parser, inserter, ret_ty.unwrap(), CastOp::Zext, &mut undefs),
654            Self::BitCast => make_cast!(
655                parser,
656                inserter,
657                ret_ty.unwrap(),
658                CastOp::BitCast,
659                &mut undefs
660            ),
661            Self::Trunc => make_cast!(
662                parser,
663                inserter,
664                ret_ty.unwrap(),
665                CastOp::Trunc,
666                &mut undefs
667            ),
668
669            Self::Load => {
670                let loc = parser.expect_data_loc_kind()?;
671                let arg = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
672                expect_token!(parser.lexer, Token::SemiColon, ";")?;
673                InsnData::Load { args: [arg], loc }
674            }
675            Self::Store => {
676                let loc = parser.expect_data_loc_kind()?;
677                let lhs = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
678                let rhs = parser.expect_insn_arg(inserter, 1, &mut undefs)?;
679                expect_token!(parser.lexer, Token::SemiColon, ";")?;
680                InsnData::Store {
681                    args: [lhs, rhs],
682                    loc,
683                }
684            }
685
686            Self::Call => {
687                let func_name =
688                    expect_token!(parser.lexer, Token::Ident(..), "func name")?.string();
689                let mut args = smallvec![];
690                let mut idx = 0;
691                while eat_token!(parser.lexer, Token::SemiColon)?.is_none() {
692                    let arg = parser.expect_insn_arg(inserter, idx, &mut undefs)?;
693                    args.push(arg);
694                    idx += 1;
695                }
696
697                let func = parser
698                    .module_builder
699                    .get_func_ref(func_name)
700                    .ok_or_else(|| {
701                        Error::new(
702                            ErrorKind::SemanticError(format!("%{} is not declared", func_name)),
703                            parser.lexer.line(),
704                        )
705                    })?;
706                let sig = parser.module_builder.get_sig(func).clone();
707                let ret_ty = sig.ret_ty();
708                inserter.func_mut().callees.insert(func, sig);
709                InsnData::Call { func, args, ret_ty }
710            }
711
712            Self::Jump => make_jump!(parser, JumpOp::Jump),
713            Self::FallThrough => make_jump!(parser, JumpOp::FallThrough),
714
715            Self::Br => {
716                let cond = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
717                let then = parser.expect_block()?;
718                let else_ = parser.expect_block()?;
719                expect_token!(parser.lexer, Token::SemiColon, ";")?;
720                InsnData::Branch {
721                    args: [cond],
722                    dests: [then, else_],
723                }
724            }
725            Self::BrTable => {
726                let mut arg_idx = 0;
727                let mut args = smallvec![];
728                let cond = parser.expect_insn_arg(inserter, arg_idx, &mut undefs)?;
729                args.push(cond);
730                arg_idx += 1;
731
732                let default = if eat_token!(parser.lexer, Token::Undef)?.is_some() {
733                    None
734                } else {
735                    Some(parser.expect_block()?)
736                };
737
738                let mut table = smallvec![];
739                while eat_token!(parser.lexer, Token::LParen)?.is_some() {
740                    let value = parser.expect_insn_arg(inserter, arg_idx, &mut undefs)?;
741                    args.push(value);
742                    let block = parser.expect_block()?;
743                    table.push(block);
744                    expect_token!(parser.lexer, Token::RParen, ")")?;
745                    arg_idx += 1;
746                }
747                expect_token!(parser.lexer, Token::SemiColon, ";")?;
748                InsnData::BrTable {
749                    args,
750                    default,
751                    table,
752                }
753            }
754
755            Self::Gep => {
756                let mut args = smallvec![];
757                let mut idx = 0;
758                while eat_token!(parser.lexer, Token::SemiColon)?.is_none() {
759                    let arg = parser.expect_insn_arg(inserter, idx, &mut undefs)?;
760                    args.push(arg);
761                    idx += 1;
762                }
763
764                InsnData::Gep { args }
765            }
766
767            Self::Alloca => {
768                let ty = expect_ty(&parser.module_builder.ctx, parser.lexer)?;
769                expect_token!(parser.lexer, Token::SemiColon, ";")?;
770                InsnData::Alloca { ty }
771            }
772
773            Self::Return => {
774                if eat_token!(parser.lexer, Token::SemiColon)?.is_some() {
775                    InsnData::Return { args: None }
776                } else {
777                    let value = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
778                    expect_token!(parser.lexer, Token::SemiColon, ";")?;
779                    InsnData::Return { args: Some(value) }
780                }
781            }
782
783            Self::Phi => {
784                let mut values = smallvec![];
785                let mut blocks = smallvec![];
786                let mut idx = 0;
787                while eat_token!(parser.lexer, Token::LParen)?.is_some() {
788                    let value = parser.expect_insn_arg(inserter, idx, &mut undefs)?;
789                    values.push(value);
790                    let block = parser.expect_block()?;
791                    blocks.push(block);
792                    expect_token!(parser.lexer, Token::RParen, ")")?;
793                    idx += 1;
794                }
795                expect_token!(parser.lexer, Token::SemiColon, ";")?;
796                InsnData::Phi {
797                    values,
798                    blocks,
799                    ty: ret_ty.unwrap(),
800                }
801            }
802        };
803
804        let insn = inserter.insert_insn_data(insn_data);
805        for undef in undefs {
806            inserter.undefs.insert((insn, undef));
807        }
808
809        Ok(insn)
810    }
811}
812
813fn build_imm_data(number: &str, ty: &Type, line: u32) -> Result<Immediate> {
814    match ty {
815        Type::I1 => number
816            .parse::<i8>()
817            .map(|val| Immediate::I1(val != 0))
818            .map_err(|err| parse_imm_error(err, line)),
819
820        Type::I8 => number
821            .parse::<i8>()
822            .or_else(|_| number.parse::<u8>().map(|v| v as i8))
823            .map(Into::into)
824            .map_err(|err| parse_imm_error(err, line)),
825
826        Type::I16 => number
827            .parse::<i16>()
828            .or_else(|_| number.parse::<u16>().map(|v| v as i16))
829            .map(Into::into)
830            .map_err(|err| parse_imm_error(err, line)),
831
832        Type::I32 => number
833            .parse::<i32>()
834            .or_else(|_| number.parse::<u32>().map(|v| v as i32))
835            .map(Into::into)
836            .map_err(|err| parse_imm_error(err, line)),
837
838        Type::I64 => number
839            .parse::<i64>()
840            .or_else(|_| number.parse::<u64>().map(|v| v as i64))
841            .map(Into::into)
842            .map_err(|err| parse_imm_error(err, line)),
843
844        Type::I128 => number
845            .parse::<i128>()
846            .or_else(|_| number.parse::<u128>().map(|v| v as i128))
847            .map(Into::into)
848            .map_err(|err| parse_imm_error(err, line)),
849
850        Type::I256 => {
851            let number = number.to_string();
852            let is_negative = number.as_bytes()[0] as char == '-';
853            let number = if is_negative { &number[1..] } else { &number };
854            let mut i256: I256 = U256::from_str_radix(number, 10)
855                .map(Into::into)
856                .map_err(|err| parse_imm_error(err, line))?;
857
858            if is_negative {
859                i256 = I256::zero().overflowing_sub(i256).0;
860            }
861
862            Ok(Immediate::I256(i256))
863        }
864
865        _ => Err(Error::new(
866            ErrorKind::SemanticError("can't use non integral types for immediates".into()),
867            line,
868        )),
869    }
870}
871
872fn parse_imm_error(err: impl std::fmt::Display, line: u32) -> Error {
873    Error::new(
874        ErrorKind::SemanticError(format!("failed to parse immediate: {}", err)),
875        line,
876    )
877}
878
879#[cfg(test)]
880mod tests {
881    use super::*;
882
883    use sonatina_ir::ir_writer::FuncWriter;
884
885    fn test_func_parser(input: &str) -> bool {
886        let mut lexer = Lexer::new(input);
887        let triple = TargetTriple::parse("evm-ethereum-london").unwrap();
888        let isa = IsaBuilder::new(triple).build();
889        let mut module_builder = ModuleBuilder::new(ModuleCtx::new(isa));
890        let parsed_func = FuncParser::new(&mut lexer, &mut module_builder)
891            .parse()
892            .unwrap()
893            .unwrap();
894        let module = module_builder.build();
895        let mut writer = FuncWriter::new(&module.funcs[parsed_func.func_ref]);
896
897        input.trim() == writer.dump_string().unwrap().trim()
898    }
899
900    #[test]
901    fn parser_with_return() {
902        assert!(test_func_parser(
903            "func private %test_func() -> i32:
904    block0:
905        return 311.i32;"
906        ));
907    }
908
909    #[test]
910    fn test_with_arg() {
911        assert!(test_func_parser(
912            "func public %test_func(v0.i32, v1.i64) -> void:
913    block0:
914        v2.i64 = sext v0;
915        v3.i64 = mul v2 v1;
916        return;
917"
918        ));
919    }
920
921    #[test]
922    fn parser_with_non_continuous_value() {
923        assert!(test_func_parser(
924            "func private %test_func() -> i32:
925    block64:
926        jump block1;
927
928    block1:
929        return 311.i32;"
930        ));
931    }
932
933    #[test]
934    fn parser_with_phi() {
935        assert!(test_func_parser(
936            "func private %test_func() -> void:
937    block0:
938        jump block1;
939
940    block1:
941        v4.i32 = phi (1.i32 block0) (v5 block5);
942        br 1.i32 block6 block2;
943
944    block2:
945        br 1.i32 block4 block3;
946
947    block3:
948        jump block5;
949
950    block4:
951        jump block5;
952
953    block5:
954        v5.i32 = phi (2.i32 block3) (v4 block4);
955        jump block1;
956
957    block6:
958        v3.i32 = add v4 v4;
959        return;
960        "
961        ));
962    }
963
964    #[test]
965    fn parser_with_immediate() {
966        assert!(test_func_parser(
967            "func private %test_func() -> i8:
968    block64:
969        v0.i8 = add -1.i8 127.i8;
970        v1.i8 = add v0 3.i8;
971        jump block1;
972
973    block1:
974        v2.i16 = zext -128.i8;
975        return v1;"
976        ));
977    }
978
979    #[test]
980    fn test_with_module_comment() {
981        let input = "
982            #! Module comment 1
983            #! Module comment 2
984
985            target = \"evm-ethereum-london\"
986
987            # f1 start 1
988            # f1 start 2
989            func private %f1() -> i32:
990                block0:
991                    return 311.i32;
992
993            # f2 start 1
994            # f2 start 2
995            func public %f2() -> i32:
996                block0:
997                    return 311.i32;
998            ";
999
1000        let parser = Parser::default();
1001        let parsed_module = parser.parse(input).unwrap();
1002        let module_comments = parsed_module.module_comments;
1003        assert_eq!(module_comments[0], " Module comment 1");
1004        assert_eq!(module_comments[1], " Module comment 2");
1005
1006        let module = parsed_module.module;
1007        let mut funcs = module.iter_functions();
1008        let func1 = funcs.next().unwrap();
1009        let func1_comment = &parsed_module.func_comments[func1];
1010        assert_eq!(func1_comment[0], " f1 start 1");
1011        assert_eq!(func1_comment[1], " f1 start 2");
1012
1013        let func2 = funcs.next().unwrap();
1014        let func2_comment = &parsed_module.func_comments[func2];
1015        assert_eq!(func2_comment[0], " f2 start 1");
1016        assert_eq!(func2_comment[1], " f2 start 2");
1017    }
1018
1019    #[test]
1020    fn test_with_struct_type() {
1021        let input = "
1022            target = \"evm-ethereum-london\"
1023            
1024            type %s1 = {i32, i64};
1025            type %s2_packed = <{i32, i64, *%s1}>;
1026
1027            func public %test(v0.*%s1, v1.*%s2_packed) -> i32:
1028                block0:
1029                    return 311.i32;
1030            ";
1031
1032        let parser = Parser::default();
1033        let module = parser.parse(input).unwrap().module;
1034
1035        module.ctx.with_ty_store(|s| {
1036            let ty = s.struct_type_by_name("s1").unwrap();
1037            let def = s.struct_def(ty).unwrap();
1038            assert_eq!(def.fields.len(), 2);
1039            assert_eq!(def.fields[0], Type::I32);
1040            assert_eq!(def.fields[1], Type::I64);
1041            assert!(!def.packed);
1042        });
1043
1044        let s1_ptr_ty = module.ctx.with_ty_store_mut(|s| {
1045            let ty = s.struct_type_by_name("s1").unwrap();
1046            s.make_ptr(ty)
1047        });
1048        module.ctx.with_ty_store(|s| {
1049            let ty = s.struct_type_by_name("s2_packed").unwrap();
1050            let def = s.struct_def(ty).unwrap();
1051            assert_eq!(def.fields.len(), 3);
1052            assert_eq!(def.fields[0], Type::I32);
1053            assert_eq!(def.fields[1], Type::I64);
1054            assert_eq!(def.fields[2], s1_ptr_ty);
1055            assert!(def.packed);
1056        });
1057    }
1058
1059    #[test]
1060    fn test_with_gv() {
1061        let input = "
1062            target = \"evm-ethereum-london\"
1063            
1064            gv public const %CONST_PUBLIC: i32 = 1;
1065            gv external %GLOBAL_EXTERNAL: i32;
1066
1067            func public %test() -> i32:
1068                block0:
1069                    v2.i32 =  add %CONST_PUBLIC %GLOBAL_EXTERNAL;
1070                    return v2;
1071            ";
1072
1073        let parser = Parser::default();
1074        let module = parser.parse(input).unwrap().module;
1075
1076        module.ctx.with_gv_store(|s| {
1077            let symbol = "CONST_PUBLIC";
1078            let gv = s.gv_by_symbol(symbol).unwrap();
1079            let data = s.gv_data(gv);
1080            assert_eq!(data.symbol, symbol);
1081            assert_eq!(data.ty, Type::I32);
1082            assert_eq!(data.linkage, Linkage::Public);
1083            assert!(data.is_const);
1084            assert_eq!(data.data, Some(ConstantValue::make_imm(1i32)));
1085        });
1086
1087        module.ctx.with_gv_store(|s| {
1088            let symbol = "GLOBAL_EXTERNAL";
1089            let gv = s.gv_by_symbol(symbol).unwrap();
1090            let data = s.gv_data(gv);
1091            assert_eq!(data.symbol, symbol);
1092            assert_eq!(data.ty, Type::I32);
1093            assert_eq!(data.linkage, Linkage::External);
1094            assert!(!data.is_const);
1095            assert_eq!(data.data, None)
1096        });
1097    }
1098}