1use std::collections::HashSet;
3
4use cranelift_entity::SecondaryMap;
5use smallvec::smallvec;
6
7use sonatina_ir::{
8 builder::ModuleBuilder,
9 func_cursor::{CursorLocation, FuncCursor},
10 global_variable::ConstantValue,
11 insn::{BinaryOp, CastOp, DataLocationKind, JumpOp, UnaryOp},
12 isa::IsaBuilder,
13 module::{FuncRef, ModuleCtx},
14 Block, BlockData, Function, GlobalVariableData, Immediate, Insn, InsnData, Linkage, Module,
15 Signature, Type, Value, ValueData, I256, U256,
16};
17use sonatina_triple::TargetTriple;
18
19use super::{
20 lexer::{Code, Lexer, Token, WithLoc},
21 Error, ErrorKind, Result,
22};
23
24#[derive(Default)]
25pub struct Parser {}
26
27macro_rules! eat_token {
28 ($lexer:expr, $token:pat) => {
29 if matches!($lexer.peek_token()?, Some(WithLoc { item: $token, .. })) {
30 Ok(Some($lexer.next_token()?.unwrap().item))
31 } else {
32 Ok(None)
33 }
34 };
35}
36
37macro_rules! expect_token {
38 ($lexer:expr, $token:pat, $expected:expr) => {
39 if let Some(tok) = eat_token!($lexer, $token)? {
40 Ok(tok)
41 } else {
42 let (tok, line) = match $lexer.next_token()? {
43 Some(tok) => ((tok.item.to_string(), tok.line)),
44 None => (("EOF".to_string(), $lexer.line())),
45 };
46 Err(Error::new(
47 ErrorKind::SyntaxError(format!("expected `{}`, but got `{}`", $expected, tok)),
48 line,
49 ))
50 }
51 };
52}
53
54impl Parser {
55 pub fn parse(self, input: &str) -> Result<ParsedModule> {
56 let mut lexer = Lexer::new(input);
57
58 let mut module_comments = Vec::new();
60 while let Some(WithLoc {
61 item: Token::ModuleComment(comment),
62 ..
63 }) = lexer.peek_token()?
64 {
65 module_comments.push(comment.to_string());
66 lexer.next_token()?;
67 }
68
69 let triple = self.parse_target_triple(&mut lexer)?;
71 let isa = IsaBuilder::new(triple).build();
72 let ctx = ModuleCtx::new(isa);
73
74 let mut module_builder = ModuleBuilder::new(ctx);
75
76 while eat_token!(lexer, Token::Type)?.is_some() {
78 let name = expect_token!(lexer, Token::Ident(_), "type name")?.string();
79 expect_token!(lexer, Token::Eq, "=")?;
80 let packed = eat_token!(lexer, Token::LAngleBracket)?.is_some();
81 expect_token!(lexer, Token::LBrace, "{")?;
82
83 let mut fields = vec![];
84 if eat_token!(lexer, Token::RBrace)?.is_none() {
85 loop {
86 let ty = expect_ty(&module_builder.ctx, &mut lexer)?;
87 fields.push(ty);
88 if eat_token!(lexer, Token::RBrace)?.is_some() {
89 break;
90 }
91 expect_token!(lexer, Token::Comma, ",")?;
92 }
93 }
94 if packed {
95 expect_token!(lexer, Token::RAngleBracket, ">")?;
96 }
97 expect_token!(lexer, Token::SemiColon, ";")?;
98
99 module_builder.declare_struct_type(name, &fields, packed);
100 }
101
102 while eat_token!(lexer, Token::Gv)?.is_some() {
104 let linkage = expect_linkage(&mut lexer)?;
105 let is_const = eat_token!(lexer, Token::Const)?.is_some();
106 let symbol = expect_token!(lexer, Token::Ident(_), "global variable name")?.string();
107 expect_token!(lexer, Token::Colon, ":")?;
108 let ty = expect_ty(&module_builder.ctx, &mut lexer)?;
109
110 let init = eat_token!(lexer, Token::Eq)?
111 .map(|_| {
112 let init = expect_constant(&module_builder.ctx, &mut lexer, ty)?;
113 Ok(init)
114 })
115 .transpose()?;
116
117 expect_token!(lexer, Token::SemiColon, ";")?;
118 let gv_data = GlobalVariableData::new(symbol.to_string(), ty, linkage, is_const, init);
119 module_builder.make_global(gv_data);
120 }
121
122 while eat_token!(lexer, Token::Declare)?.is_some() {
124 let sig = self.parse_declared_func_sig(&module_builder.ctx, &mut lexer)?;
125 expect_token!(lexer, Token::SemiColon, ";")?;
126 module_builder.declare_function(sig);
127 }
128
129 let mut func_comments = SecondaryMap::default();
131 while let Some(parsed_func) = FuncParser::new(&mut lexer, &mut module_builder).parse()? {
132 let func_ref = parsed_func.func_ref;
133 func_comments[func_ref] = parsed_func.comments;
134 }
135
136 Ok(ParsedModule {
137 module: module_builder.build(),
138 module_comments,
139 func_comments,
140 })
141 }
142
143 fn parse_target_triple(&self, lexer: &mut Lexer) -> Result<TargetTriple> {
144 expect_token!(lexer, Token::Target, "target")?;
145 expect_token!(lexer, Token::Eq, "=")?;
146 let triple = expect_token!(lexer, Token::String(..), "target triple")?.string();
147
148 TargetTriple::parse(triple)
149 .map_err(|e| Error::new(ErrorKind::SemanticError(format!("{}", e)), lexer.line()))
150 }
151
152 fn parse_declared_func_sig(&self, ctx: &ModuleCtx, lexer: &mut Lexer) -> Result<Signature> {
153 let linkage = expect_linkage(lexer)?;
154 let name = expect_token!(lexer, Token::Ident(..), "func name")?.string();
155
156 expect_token!(lexer, Token::LParen, "(")?;
158 let mut args = vec![];
159 if eat_token!(lexer, Token::RParen)?.is_none() {
160 let ty = expect_ty(ctx, lexer)?;
161 args.push(ty);
162 while eat_token!(lexer, Token::RParen)?.is_none() {
163 expect_token!(lexer, Token::Comma, ",")?;
164 let ty = expect_ty(ctx, lexer)?;
165 args.push(ty);
166 }
167 }
168
169 expect_token!(lexer, Token::RArrow, "->")?;
171 let ret_ty = expect_ty(ctx, lexer)?;
172
173 Ok(Signature::new(name, linkage, &args, ret_ty))
174 }
175}
176
177pub struct ParsedModule {
178 pub module: Module,
179 pub module_comments: Vec<String>,
180 pub func_comments: SecondaryMap<FuncRef, Vec<String>>,
181}
182
183struct ParsedFunction {
184 func_ref: FuncRef,
185 comments: Vec<String>,
186}
187
188struct FuncParser<'a, 'b> {
189 lexer: &'b mut Lexer<'a>,
190 module_builder: &'b mut ModuleBuilder,
191}
192
193impl<'a, 'b> FuncParser<'a, 'b> {
194 fn new(lexer: &'b mut Lexer<'a>, module_builder: &'b mut ModuleBuilder) -> Self {
195 Self {
196 lexer,
197 module_builder,
198 }
199 }
200
201 fn parse(&mut self) -> Result<Option<ParsedFunction>> {
202 if self.lexer.peek_token()?.is_none() {
203 return Ok(None);
204 }
205
206 let comments = self.parse_comment()?;
207 expect_token!(self.lexer, Token::Func, "func")?;
208 let linkage = expect_linkage(self.lexer)?;
209
210 let fn_name = expect_token!(self.lexer, Token::Ident(..), "func name")?.string();
211
212 expect_token!(self.lexer, Token::LParen, "(")?;
213 let sig = Signature::new(fn_name, linkage, &[], Type::Void);
215 let mut func = Function::new(&self.module_builder.ctx, sig);
216 let mut inserter = InsnInserter::new(&mut func);
217
218 if let Some(value) = eat_token!(self.lexer, Token::Value(..))? {
219 let value = Value(value.id());
220 inserter.def_value(value, self.lexer.line())?;
221 expect_token!(self.lexer, Token::Dot, "dot")?;
222 let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
223 inserter.append_arg_value(value, ty);
224
225 while eat_token!(self.lexer, Token::Comma)?.is_some() {
226 let value = Value(expect_token!(self.lexer, Token::Value(..), "value")?.id());
227 inserter.def_value(value, self.lexer.line())?;
228 expect_token!(self.lexer, Token::Dot, "dot")?;
229 let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
230 inserter.append_arg_value(value, ty);
231 }
232 }
233 expect_token!(self.lexer, Token::RParen, ")")?;
234
235 expect_token!(self.lexer, Token::RArrow, "->")?;
237 let ret_ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
238 inserter.func.sig.set_ret_ty(ret_ty);
239 expect_token!(self.lexer, Token::Colon, ":")?;
240
241 self.parse_body(&mut inserter)?;
242
243 let func_ref = self.module_builder.declare_function(func.sig.clone());
244 std::mem::swap(&mut self.module_builder.funcs[func_ref], &mut func);
245 Ok(Some(ParsedFunction { func_ref, comments }))
246 }
247
248 fn parse_body(&mut self, inserter: &mut InsnInserter) -> Result<()> {
249 while let Some(id) = eat_token!(self.lexer, Token::Block(..))? {
250 expect_token!(self.lexer, Token::Colon, ":")?;
251 self.parse_block_body(inserter, Block(id.id()))?;
252 }
253
254 Ok(())
255 }
256
257 fn parse_block_body(&mut self, inserter: &mut InsnInserter, block: Block) -> Result<()> {
258 inserter.def_block(block, self.lexer.line(), BlockData::default())?;
259 inserter.append_block(block);
260 inserter.set_loc(CursorLocation::BlockTop(block));
261
262 loop {
263 if let Some(value) = eat_token!(self.lexer, Token::Value(..))? {
264 expect_token!(self.lexer, Token::Dot, ".")?;
265 let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
266 expect_token!(self.lexer, Token::Eq, "=")?;
267 let opcode = expect_token!(self.lexer, Token::OpCode(..), "opcode")?.opcode();
268 let insn = opcode.make_insn(self, inserter, Some(ty))?;
269 let value = Value(value.id());
270 inserter.def_value(value, self.lexer.line())?;
271 let result = inserter.func.dfg.make_result(insn).unwrap();
272 inserter.func.dfg.values[value] = result;
273 inserter.func.dfg.attach_result(insn, value);
274 } else if let Some(opcode) = eat_token!(self.lexer, Token::OpCode(..))? {
275 opcode.opcode().make_insn(self, inserter, None)?;
276 } else {
277 break;
278 }
279 }
280
281 Ok(())
282 }
283
284 fn expect_insn_arg(
285 &mut self,
286 inserter: &mut InsnInserter,
287 idx: usize,
288 undefs: &mut Vec<usize>,
289 ) -> Result<Value> {
290 if let Some(value) = eat_token!(self.lexer, Token::Value(..))? {
291 let value = Value(value.id());
292 if !inserter.defined_values.contains(&value) {
293 undefs.push(idx);
294 }
295 Ok(value)
296 } else if let Some(ident) = eat_token!(self.lexer, Token::Ident(..))? {
297 let gv = inserter
298 .func()
299 .dfg
300 .ctx
301 .with_gv_store(|s| s.gv_by_symbol(ident.string()))
302 .unwrap();
303 Ok(inserter.func_mut().dfg.make_global_value(gv))
304 } else {
305 let number =
306 expect_token!(self.lexer, Token::Integer(..), "immediate or value")?.string();
307 expect_token!(self.lexer, Token::Dot, "type annotation for immediate")?;
308 let ty = expect_ty(&self.module_builder.ctx, self.lexer)?;
309 let imm = build_imm_data(number, &ty, self.lexer.line())?;
310 Ok(inserter.def_imm(imm))
311 }
312 }
313
314 fn expect_block(&mut self) -> Result<Block> {
315 let id = expect_token!(self.lexer, Token::Block(..), "block")?.id();
316 Ok(Block(id))
317 }
318
319 fn expect_data_loc_kind(&mut self) -> Result<DataLocationKind> {
320 let token = expect_token!(self.lexer, Token::DataLocationKind(..), "data location")?;
321
322 match token {
323 Token::DataLocationKind(loc) => Ok(loc),
324 _ => unreachable!(),
325 }
326 }
327
328 fn parse_comment(&mut self) -> Result<Vec<String>> {
329 let mut comments = Vec::new();
330 while let Some(line) = eat_token!(self.lexer, Token::FuncComment(..))? {
331 comments.push(line.string().to_string());
332 }
333 Ok(comments)
334 }
335}
336
337fn expect_ty(ctx: &ModuleCtx, lexer: &mut Lexer) -> Result<Type> {
338 if let Some(ty) = eat_token!(lexer, Token::BaseTy(..))?.map(|tok| tok.ty()) {
339 return Ok(ty);
340 };
341
342 if eat_token!(lexer, Token::LBracket)?.is_some() {
343 let elem_ty = expect_ty(ctx, lexer)?;
345 expect_token!(lexer, Token::SemiColon, ";")?;
346 let len = expect_token!(lexer, Token::Integer(..), " or value")?
348 .string()
349 .parse()
350 .map_err(|err| Error::new(ErrorKind::SyntaxError(format!("{}", err)), lexer.line()))?;
351 expect_token!(lexer, Token::RBracket, "]")?;
352 Ok(ctx.with_ty_store_mut(|s| s.make_array(elem_ty, len)))
353 } else if eat_token!(lexer, Token::Star)?.is_some() {
354 let elem_ty = expect_ty(ctx, lexer)?;
356 Ok(ctx.with_ty_store_mut(|s| s.make_ptr(elem_ty)))
357 } else if let Some(tok) = eat_token!(lexer, Token::Ident(..))? {
358 let name = tok.string();
359 ctx.with_ty_store(|s| s.struct_type_by_name(name))
360 .ok_or_else(|| {
361 Error::new(
362 ErrorKind::SemanticError(format!("type `{name}` is not declared")),
363 lexer.line(),
364 )
365 })
366 } else {
367 Err(Error::new(
368 ErrorKind::SyntaxError("invalid type".into()),
369 lexer.line(),
370 ))
371 }
372}
373
374fn expect_linkage(lexer: &mut Lexer) -> Result<Linkage> {
375 let token = expect_token!(lexer, Token::Linkage { .. }, "linkage")?;
376 match token {
377 Token::Linkage(linkage) => Ok(linkage),
378 _ => unreachable!(),
379 }
380}
381
382fn expect_constant(ctx: &ModuleCtx, lexer: &mut Lexer, ty: Type) -> Result<ConstantValue> {
383 if let Some(number) = eat_token!(lexer, Token::Integer(..))? {
384 if !ty.is_integral() {
385 return Err(Error::new(
386 ErrorKind::SemanticError("expected integral type".to_string()),
387 lexer.line(),
388 ));
389 }
390
391 let data = build_imm_data(number.string(), &ty, lexer.line())?;
392 Ok(ConstantValue::Immediate(data))
393 } else if eat_token!(lexer, Token::LBracket)?.is_some() {
394 let (elem_ty, mut len) = ctx.with_ty_store(|s| s.array_def(ty)).ok_or_else(|| {
395 Error::new(
396 ErrorKind::SemanticError("expcted array type".into()),
397 lexer.line(),
398 )
399 })?;
400
401 let mut data = Vec::with_capacity(len);
402 while len > 0 {
403 let elem = expect_constant(ctx, lexer, elem_ty)?;
404 data.push(elem);
405 if len > 1 {
406 expect_token!(lexer, Token::Comma, ",")?;
407 }
408 len -= 1;
409 }
410
411 expect_token!(lexer, Token::RBracket, "]")?;
412 Ok(ConstantValue::Array(data))
413 } else if eat_token!(lexer, Token::LBrace)?.is_some() {
414 let fields = ctx
415 .with_ty_store(|s| s.struct_def(ty).map(|def| def.fields.clone()))
416 .ok_or_else(|| {
417 Error::new(
418 ErrorKind::SemanticError("expected struct type".into()),
419 lexer.line(),
420 )
421 })?;
422
423 let mut data = Vec::with_capacity(fields.len());
424 let field_len = fields.len();
425 for (i, field_ty) in fields.into_iter().enumerate() {
426 let field = expect_constant(ctx, lexer, field_ty)?;
427 data.push(field);
428 if i < field_len - 1 {
429 expect_token!(lexer, Token::Comma, ",")?;
430 }
431 }
432 expect_token!(lexer, Token::RBrace, "}")?;
433 Ok(ConstantValue::Struct(data))
434 } else {
435 Err(Error::new(
436 ErrorKind::SyntaxError("invalid constant".into()),
437 lexer.line(),
438 ))
439 }
440}
441
442struct InsnInserter<'a> {
443 func: &'a mut Function,
444 loc: CursorLocation,
445 defined_values: HashSet<Value>,
446 defined_blocks: HashSet<Block>,
447 defined_imms: HashSet<Value>,
448 undefs: HashSet<(Insn, usize)>,
449}
450
451impl<'a> InsnInserter<'a> {
452 fn new(func: &'a mut Function) -> Self {
453 Self {
454 func,
455 loc: CursorLocation::NoWhere,
456 defined_values: HashSet::new(),
457 defined_blocks: HashSet::new(),
458 defined_imms: HashSet::new(),
459 undefs: HashSet::new(),
460 }
461 }
462
463 fn def_value(&mut self, value: Value, line: u32) -> Result<()> {
464 if self.defined_values.contains(&value) {
465 return Err(Error::new(
466 ErrorKind::SemanticError(format!("v{} is already defined", value.0)),
467 line,
468 ));
469 }
470 self.defined_values.insert(value);
471
472 let value_len = self.func.dfg.values.len();
473 let value_id = value.0 as usize;
474
475 if value_len <= value_id {
476 self.func.dfg.values.reserve(value_id);
477 for _ in 0..(value_id - value_len + 1) {
478 self.func.dfg.values.push(ValueData::Arg {
480 ty: Type::I8,
481 idx: usize::MAX,
482 });
483 }
484 }
485
486 if self.defined_imms.contains(&value) {
487 let imm_data = self.func.dfg.value_data(value).clone();
488 let new_imm_value = self.func.dfg.make_value(imm_data);
489 let mut must_replace = vec![];
490 for &user in self.func.dfg.users(value) {
491 for (idx, &arg) in self.func.dfg.insn_args(user).iter().enumerate() {
492 if arg == value && !self.undefs.contains(&(user, idx)) {
493 must_replace.push((user, idx));
494 }
495 }
496 }
497
498 for (insn, idx) in must_replace {
499 self.func.dfg.replace_insn_arg(insn, new_imm_value, idx);
500 }
501
502 let imm = self.func.dfg.value_imm(new_imm_value).unwrap();
503 self.func.dfg.immediates.insert(imm, new_imm_value);
504 self.defined_imms.remove(&value);
505 self.defined_imms.insert(new_imm_value);
506 }
507
508 Ok(())
509 }
510
511 fn def_imm(&mut self, imm: Immediate) -> Value {
512 let value = self.func.dfg.make_imm_value(imm);
513 self.defined_imms.insert(value);
514 value
515 }
516
517 fn def_block(&mut self, block: Block, line: u32, block_data: BlockData) -> Result<()> {
518 if self.defined_blocks.contains(&block) {
519 return Err(Error::new(
520 ErrorKind::SemanticError(format!("block{} is already defined", block.0)),
521 line,
522 ));
523 }
524 self.defined_blocks.insert(block);
525
526 let block_id = block.0 as usize;
527 let block_len = self.func.dfg.blocks.len();
528
529 if block_len <= block_id {
530 self.func.dfg.blocks.reserve(block_id);
531 for _ in 0..(block_id - block_len + 1) {
532 self.func.dfg.blocks.push(BlockData::default());
534 }
535 }
536
537 self.func.dfg.blocks[block] = block_data;
538 Ok(())
539 }
540
541 fn insert_insn_data(&mut self, insn_data: InsnData) -> Insn {
542 let insn = self.func.dfg.make_insn(insn_data);
543 self.insert_insn(insn);
544 self.set_loc(CursorLocation::At(insn));
545 insn
546 }
547
548 fn append_arg_value(&mut self, value: Value, ty: Type) {
549 let idx = self.func.arg_values.len();
550
551 let value_data = self.func.dfg.make_arg_value(ty, idx);
552 self.func.sig.append_arg(ty);
553 self.func.dfg.values[value] = value_data;
554 self.func.arg_values.push(value);
555 }
556}
557
558impl<'a> FuncCursor for InsnInserter<'a> {
559 fn set_loc(&mut self, loc: CursorLocation) {
560 self.loc = loc;
561 }
562
563 fn func(&self) -> &Function {
564 self.func
565 }
566
567 fn func_mut(&mut self) -> &mut Function {
568 self.func
569 }
570
571 fn loc(&self) -> CursorLocation {
572 self.loc
573 }
574}
575
576macro_rules! make_unary {
577 ($parser:ident, $inserter:ident, $code:path, $undefs:expr) => {{
578 let lhs = $parser.expect_insn_arg($inserter, 0, $undefs)?;
579 expect_token!($parser.lexer, Token::SemiColon, ";")?;
580 InsnData::Unary {
581 code: $code,
582 args: [lhs],
583 }
584 }};
585}
586
587macro_rules! make_binary {
588 ($parser:ident, $inserter:ident, $code:path, $undefs:expr) => {{
589 let lhs = $parser.expect_insn_arg($inserter, 0, $undefs)?;
590 let rhs = $parser.expect_insn_arg($inserter, 1, $undefs)?;
591 expect_token!($parser.lexer, Token::SemiColon, ";")?;
592 InsnData::Binary {
593 code: $code,
594 args: [lhs, rhs],
595 }
596 }};
597}
598
599macro_rules! make_cast {
600 ($parser:ident, $inserter:ident, $cast_to:expr, $code:path, $undefs:expr) => {{
601 let arg = $parser.expect_insn_arg($inserter, 0, $undefs)?;
602 expect_token!($parser.lexer, Token::SemiColon, ";")?;
603 InsnData::Cast {
604 code: $code,
605 args: [arg],
606 ty: $cast_to,
607 }
608 }};
609}
610
611macro_rules! make_jump {
612 ($parser:ident, $code:path) => {{
613 let dest = $parser.expect_block()?;
614 expect_token!($parser.lexer, Token::SemiColon, ";")?;
615 InsnData::Jump {
616 code: $code,
617 dests: [dest],
618 }
619 }};
620}
621
622impl Code {
623 fn make_insn(
625 self,
626 parser: &mut FuncParser,
627 inserter: &mut InsnInserter,
628 ret_ty: Option<Type>,
629 ) -> Result<Insn> {
630 let mut undefs = vec![];
631 let insn_data = match self {
632 Self::Not => make_unary!(parser, inserter, UnaryOp::Not, &mut undefs),
633 Self::Neg => make_unary!(parser, inserter, UnaryOp::Neg, &mut undefs),
634 Self::Add => make_binary!(parser, inserter, BinaryOp::Add, &mut undefs),
635 Self::Sub => make_binary!(parser, inserter, BinaryOp::Sub, &mut undefs),
636 Self::Mul => make_binary!(parser, inserter, BinaryOp::Mul, &mut undefs),
637 Self::Udiv => make_binary!(parser, inserter, BinaryOp::Udiv, &mut undefs),
638 Self::Sdiv => make_binary!(parser, inserter, BinaryOp::Sdiv, &mut undefs),
639 Self::Lt => make_binary!(parser, inserter, BinaryOp::Lt, &mut undefs),
640 Self::Gt => make_binary!(parser, inserter, BinaryOp::Gt, &mut undefs),
641 Self::Slt => make_binary!(parser, inserter, BinaryOp::Slt, &mut undefs),
642 Self::Sgt => make_binary!(parser, inserter, BinaryOp::Sgt, &mut undefs),
643 Self::Le => make_binary!(parser, inserter, BinaryOp::Le, &mut undefs),
644 Self::Ge => make_binary!(parser, inserter, BinaryOp::Ge, &mut undefs),
645 Self::Sle => make_binary!(parser, inserter, BinaryOp::Sle, &mut undefs),
646 Self::Sge => make_binary!(parser, inserter, BinaryOp::Sge, &mut undefs),
647 Self::Eq => make_binary!(parser, inserter, BinaryOp::Eq, &mut undefs),
648 Self::Ne => make_binary!(parser, inserter, BinaryOp::Ne, &mut undefs),
649 Self::And => make_binary!(parser, inserter, BinaryOp::And, &mut undefs),
650 Self::Or => make_binary!(parser, inserter, BinaryOp::Or, &mut undefs),
651 Self::Xor => make_binary!(parser, inserter, BinaryOp::Xor, &mut undefs),
652 Self::Sext => make_cast!(parser, inserter, ret_ty.unwrap(), CastOp::Sext, &mut undefs),
653 Self::Zext => make_cast!(parser, inserter, ret_ty.unwrap(), CastOp::Zext, &mut undefs),
654 Self::BitCast => make_cast!(
655 parser,
656 inserter,
657 ret_ty.unwrap(),
658 CastOp::BitCast,
659 &mut undefs
660 ),
661 Self::Trunc => make_cast!(
662 parser,
663 inserter,
664 ret_ty.unwrap(),
665 CastOp::Trunc,
666 &mut undefs
667 ),
668
669 Self::Load => {
670 let loc = parser.expect_data_loc_kind()?;
671 let arg = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
672 expect_token!(parser.lexer, Token::SemiColon, ";")?;
673 InsnData::Load { args: [arg], loc }
674 }
675 Self::Store => {
676 let loc = parser.expect_data_loc_kind()?;
677 let lhs = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
678 let rhs = parser.expect_insn_arg(inserter, 1, &mut undefs)?;
679 expect_token!(parser.lexer, Token::SemiColon, ";")?;
680 InsnData::Store {
681 args: [lhs, rhs],
682 loc,
683 }
684 }
685
686 Self::Call => {
687 let func_name =
688 expect_token!(parser.lexer, Token::Ident(..), "func name")?.string();
689 let mut args = smallvec![];
690 let mut idx = 0;
691 while eat_token!(parser.lexer, Token::SemiColon)?.is_none() {
692 let arg = parser.expect_insn_arg(inserter, idx, &mut undefs)?;
693 args.push(arg);
694 idx += 1;
695 }
696
697 let func = parser
698 .module_builder
699 .get_func_ref(func_name)
700 .ok_or_else(|| {
701 Error::new(
702 ErrorKind::SemanticError(format!("%{} is not declared", func_name)),
703 parser.lexer.line(),
704 )
705 })?;
706 let sig = parser.module_builder.get_sig(func).clone();
707 let ret_ty = sig.ret_ty();
708 inserter.func_mut().callees.insert(func, sig);
709 InsnData::Call { func, args, ret_ty }
710 }
711
712 Self::Jump => make_jump!(parser, JumpOp::Jump),
713 Self::FallThrough => make_jump!(parser, JumpOp::FallThrough),
714
715 Self::Br => {
716 let cond = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
717 let then = parser.expect_block()?;
718 let else_ = parser.expect_block()?;
719 expect_token!(parser.lexer, Token::SemiColon, ";")?;
720 InsnData::Branch {
721 args: [cond],
722 dests: [then, else_],
723 }
724 }
725 Self::BrTable => {
726 let mut arg_idx = 0;
727 let mut args = smallvec![];
728 let cond = parser.expect_insn_arg(inserter, arg_idx, &mut undefs)?;
729 args.push(cond);
730 arg_idx += 1;
731
732 let default = if eat_token!(parser.lexer, Token::Undef)?.is_some() {
733 None
734 } else {
735 Some(parser.expect_block()?)
736 };
737
738 let mut table = smallvec![];
739 while eat_token!(parser.lexer, Token::LParen)?.is_some() {
740 let value = parser.expect_insn_arg(inserter, arg_idx, &mut undefs)?;
741 args.push(value);
742 let block = parser.expect_block()?;
743 table.push(block);
744 expect_token!(parser.lexer, Token::RParen, ")")?;
745 arg_idx += 1;
746 }
747 expect_token!(parser.lexer, Token::SemiColon, ";")?;
748 InsnData::BrTable {
749 args,
750 default,
751 table,
752 }
753 }
754
755 Self::Gep => {
756 let mut args = smallvec![];
757 let mut idx = 0;
758 while eat_token!(parser.lexer, Token::SemiColon)?.is_none() {
759 let arg = parser.expect_insn_arg(inserter, idx, &mut undefs)?;
760 args.push(arg);
761 idx += 1;
762 }
763
764 InsnData::Gep { args }
765 }
766
767 Self::Alloca => {
768 let ty = expect_ty(&parser.module_builder.ctx, parser.lexer)?;
769 expect_token!(parser.lexer, Token::SemiColon, ";")?;
770 InsnData::Alloca { ty }
771 }
772
773 Self::Return => {
774 if eat_token!(parser.lexer, Token::SemiColon)?.is_some() {
775 InsnData::Return { args: None }
776 } else {
777 let value = parser.expect_insn_arg(inserter, 0, &mut undefs)?;
778 expect_token!(parser.lexer, Token::SemiColon, ";")?;
779 InsnData::Return { args: Some(value) }
780 }
781 }
782
783 Self::Phi => {
784 let mut values = smallvec![];
785 let mut blocks = smallvec![];
786 let mut idx = 0;
787 while eat_token!(parser.lexer, Token::LParen)?.is_some() {
788 let value = parser.expect_insn_arg(inserter, idx, &mut undefs)?;
789 values.push(value);
790 let block = parser.expect_block()?;
791 blocks.push(block);
792 expect_token!(parser.lexer, Token::RParen, ")")?;
793 idx += 1;
794 }
795 expect_token!(parser.lexer, Token::SemiColon, ";")?;
796 InsnData::Phi {
797 values,
798 blocks,
799 ty: ret_ty.unwrap(),
800 }
801 }
802 };
803
804 let insn = inserter.insert_insn_data(insn_data);
805 for undef in undefs {
806 inserter.undefs.insert((insn, undef));
807 }
808
809 Ok(insn)
810 }
811}
812
813fn build_imm_data(number: &str, ty: &Type, line: u32) -> Result<Immediate> {
814 match ty {
815 Type::I1 => number
816 .parse::<i8>()
817 .map(|val| Immediate::I1(val != 0))
818 .map_err(|err| parse_imm_error(err, line)),
819
820 Type::I8 => number
821 .parse::<i8>()
822 .or_else(|_| number.parse::<u8>().map(|v| v as i8))
823 .map(Into::into)
824 .map_err(|err| parse_imm_error(err, line)),
825
826 Type::I16 => number
827 .parse::<i16>()
828 .or_else(|_| number.parse::<u16>().map(|v| v as i16))
829 .map(Into::into)
830 .map_err(|err| parse_imm_error(err, line)),
831
832 Type::I32 => number
833 .parse::<i32>()
834 .or_else(|_| number.parse::<u32>().map(|v| v as i32))
835 .map(Into::into)
836 .map_err(|err| parse_imm_error(err, line)),
837
838 Type::I64 => number
839 .parse::<i64>()
840 .or_else(|_| number.parse::<u64>().map(|v| v as i64))
841 .map(Into::into)
842 .map_err(|err| parse_imm_error(err, line)),
843
844 Type::I128 => number
845 .parse::<i128>()
846 .or_else(|_| number.parse::<u128>().map(|v| v as i128))
847 .map(Into::into)
848 .map_err(|err| parse_imm_error(err, line)),
849
850 Type::I256 => {
851 let number = number.to_string();
852 let is_negative = number.as_bytes()[0] as char == '-';
853 let number = if is_negative { &number[1..] } else { &number };
854 let mut i256: I256 = U256::from_str_radix(number, 10)
855 .map(Into::into)
856 .map_err(|err| parse_imm_error(err, line))?;
857
858 if is_negative {
859 i256 = I256::zero().overflowing_sub(i256).0;
860 }
861
862 Ok(Immediate::I256(i256))
863 }
864
865 _ => Err(Error::new(
866 ErrorKind::SemanticError("can't use non integral types for immediates".into()),
867 line,
868 )),
869 }
870}
871
872fn parse_imm_error(err: impl std::fmt::Display, line: u32) -> Error {
873 Error::new(
874 ErrorKind::SemanticError(format!("failed to parse immediate: {}", err)),
875 line,
876 )
877}
878
879#[cfg(test)]
880mod tests {
881 use super::*;
882
883 use sonatina_ir::ir_writer::FuncWriter;
884
885 fn test_func_parser(input: &str) -> bool {
886 let mut lexer = Lexer::new(input);
887 let triple = TargetTriple::parse("evm-ethereum-london").unwrap();
888 let isa = IsaBuilder::new(triple).build();
889 let mut module_builder = ModuleBuilder::new(ModuleCtx::new(isa));
890 let parsed_func = FuncParser::new(&mut lexer, &mut module_builder)
891 .parse()
892 .unwrap()
893 .unwrap();
894 let module = module_builder.build();
895 let mut writer = FuncWriter::new(&module.funcs[parsed_func.func_ref]);
896
897 input.trim() == writer.dump_string().unwrap().trim()
898 }
899
900 #[test]
901 fn parser_with_return() {
902 assert!(test_func_parser(
903 "func private %test_func() -> i32:
904 block0:
905 return 311.i32;"
906 ));
907 }
908
909 #[test]
910 fn test_with_arg() {
911 assert!(test_func_parser(
912 "func public %test_func(v0.i32, v1.i64) -> void:
913 block0:
914 v2.i64 = sext v0;
915 v3.i64 = mul v2 v1;
916 return;
917"
918 ));
919 }
920
921 #[test]
922 fn parser_with_non_continuous_value() {
923 assert!(test_func_parser(
924 "func private %test_func() -> i32:
925 block64:
926 jump block1;
927
928 block1:
929 return 311.i32;"
930 ));
931 }
932
933 #[test]
934 fn parser_with_phi() {
935 assert!(test_func_parser(
936 "func private %test_func() -> void:
937 block0:
938 jump block1;
939
940 block1:
941 v4.i32 = phi (1.i32 block0) (v5 block5);
942 br 1.i32 block6 block2;
943
944 block2:
945 br 1.i32 block4 block3;
946
947 block3:
948 jump block5;
949
950 block4:
951 jump block5;
952
953 block5:
954 v5.i32 = phi (2.i32 block3) (v4 block4);
955 jump block1;
956
957 block6:
958 v3.i32 = add v4 v4;
959 return;
960 "
961 ));
962 }
963
964 #[test]
965 fn parser_with_immediate() {
966 assert!(test_func_parser(
967 "func private %test_func() -> i8:
968 block64:
969 v0.i8 = add -1.i8 127.i8;
970 v1.i8 = add v0 3.i8;
971 jump block1;
972
973 block1:
974 v2.i16 = zext -128.i8;
975 return v1;"
976 ));
977 }
978
979 #[test]
980 fn test_with_module_comment() {
981 let input = "
982 #! Module comment 1
983 #! Module comment 2
984
985 target = \"evm-ethereum-london\"
986
987 # f1 start 1
988 # f1 start 2
989 func private %f1() -> i32:
990 block0:
991 return 311.i32;
992
993 # f2 start 1
994 # f2 start 2
995 func public %f2() -> i32:
996 block0:
997 return 311.i32;
998 ";
999
1000 let parser = Parser::default();
1001 let parsed_module = parser.parse(input).unwrap();
1002 let module_comments = parsed_module.module_comments;
1003 assert_eq!(module_comments[0], " Module comment 1");
1004 assert_eq!(module_comments[1], " Module comment 2");
1005
1006 let module = parsed_module.module;
1007 let mut funcs = module.iter_functions();
1008 let func1 = funcs.next().unwrap();
1009 let func1_comment = &parsed_module.func_comments[func1];
1010 assert_eq!(func1_comment[0], " f1 start 1");
1011 assert_eq!(func1_comment[1], " f1 start 2");
1012
1013 let func2 = funcs.next().unwrap();
1014 let func2_comment = &parsed_module.func_comments[func2];
1015 assert_eq!(func2_comment[0], " f2 start 1");
1016 assert_eq!(func2_comment[1], " f2 start 2");
1017 }
1018
1019 #[test]
1020 fn test_with_struct_type() {
1021 let input = "
1022 target = \"evm-ethereum-london\"
1023
1024 type %s1 = {i32, i64};
1025 type %s2_packed = <{i32, i64, *%s1}>;
1026
1027 func public %test(v0.*%s1, v1.*%s2_packed) -> i32:
1028 block0:
1029 return 311.i32;
1030 ";
1031
1032 let parser = Parser::default();
1033 let module = parser.parse(input).unwrap().module;
1034
1035 module.ctx.with_ty_store(|s| {
1036 let ty = s.struct_type_by_name("s1").unwrap();
1037 let def = s.struct_def(ty).unwrap();
1038 assert_eq!(def.fields.len(), 2);
1039 assert_eq!(def.fields[0], Type::I32);
1040 assert_eq!(def.fields[1], Type::I64);
1041 assert!(!def.packed);
1042 });
1043
1044 let s1_ptr_ty = module.ctx.with_ty_store_mut(|s| {
1045 let ty = s.struct_type_by_name("s1").unwrap();
1046 s.make_ptr(ty)
1047 });
1048 module.ctx.with_ty_store(|s| {
1049 let ty = s.struct_type_by_name("s2_packed").unwrap();
1050 let def = s.struct_def(ty).unwrap();
1051 assert_eq!(def.fields.len(), 3);
1052 assert_eq!(def.fields[0], Type::I32);
1053 assert_eq!(def.fields[1], Type::I64);
1054 assert_eq!(def.fields[2], s1_ptr_ty);
1055 assert!(def.packed);
1056 });
1057 }
1058
1059 #[test]
1060 fn test_with_gv() {
1061 let input = "
1062 target = \"evm-ethereum-london\"
1063
1064 gv public const %CONST_PUBLIC: i32 = 1;
1065 gv external %GLOBAL_EXTERNAL: i32;
1066
1067 func public %test() -> i32:
1068 block0:
1069 v2.i32 = add %CONST_PUBLIC %GLOBAL_EXTERNAL;
1070 return v2;
1071 ";
1072
1073 let parser = Parser::default();
1074 let module = parser.parse(input).unwrap().module;
1075
1076 module.ctx.with_gv_store(|s| {
1077 let symbol = "CONST_PUBLIC";
1078 let gv = s.gv_by_symbol(symbol).unwrap();
1079 let data = s.gv_data(gv);
1080 assert_eq!(data.symbol, symbol);
1081 assert_eq!(data.ty, Type::I32);
1082 assert_eq!(data.linkage, Linkage::Public);
1083 assert!(data.is_const);
1084 assert_eq!(data.data, Some(ConstantValue::make_imm(1i32)));
1085 });
1086
1087 module.ctx.with_gv_store(|s| {
1088 let symbol = "GLOBAL_EXTERNAL";
1089 let gv = s.gv_by_symbol(symbol).unwrap();
1090 let data = s.gv_data(gv);
1091 assert_eq!(data.symbol, symbol);
1092 assert_eq!(data.ty, Type::I32);
1093 assert_eq!(data.linkage, Linkage::External);
1094 assert!(!data.is_const);
1095 assert_eq!(data.data, None)
1096 });
1097 }
1098}