lucia_lang/compiler/
analyzer.rs

1//! The semantic analyzer.
2//!
3//! Lowers the AST to `Vec<Function>` and add semantic information.
4//!
5//! Functions in the AST will be identified and processed.
6//! The kind of names within the namespace will be determined.
7
8use std::{fmt, hash::Hash, mem};
9
10use indexmap::IndexSet;
11
12use super::{
13    ast::*,
14    code::ConstValue,
15    opcode::{JumpTarget, OpCode},
16};
17
18/// Global name information.
19#[derive(Debug, Clone)]
20pub struct GlobalNameInfo {
21    pub name: String,
22    pub is_writable: bool,
23}
24
25impl Hash for GlobalNameInfo {
26    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
27        self.name.hash(state);
28    }
29}
30
31impl PartialEq for GlobalNameInfo {
32    fn eq(&self, other: &Self) -> bool {
33        self.name == other.name
34    }
35}
36
37impl Eq for GlobalNameInfo {}
38
39impl From<String> for GlobalNameInfo {
40    fn from(value: String) -> Self {
41        GlobalNameInfo {
42            name: value,
43            is_writable: false,
44        }
45    }
46}
47
48/// Upvalue name information.
49#[derive(Debug, Clone)]
50pub struct UpvalueNameInfo {
51    pub name: String,
52    pub func_count: usize,
53    pub upvalue_id: usize,
54}
55
56impl Hash for UpvalueNameInfo {
57    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
58        self.name.hash(state);
59    }
60}
61
62impl PartialEq for UpvalueNameInfo {
63    fn eq(&self, other: &Self) -> bool {
64        self.name == other.name
65    }
66}
67
68impl Eq for UpvalueNameInfo {}
69
70impl From<String> for UpvalueNameInfo {
71    fn from(value: String) -> Self {
72        UpvalueNameInfo {
73            name: value,
74            func_count: 0,
75            upvalue_id: 0,
76        }
77    }
78}
79
80impl From<UpvalueNameInfo> for (String, usize, usize) {
81    fn from(value: UpvalueNameInfo) -> Self {
82        (value.name, value.func_count, value.upvalue_id)
83    }
84}
85
86/// A Function.
87#[derive(Debug, Clone)]
88pub struct Function {
89    /// Function id.
90    pub func_id: usize,
91    /// Kind of Function.
92    pub kind: FunctionKind,
93    /// Name of parameters.
94    pub params: Vec<String>,
95    /// Name of variadic parameter.
96    pub variadic: Option<String>,
97    /// AST of the function.
98    pub body: Box<Block>,
99    /// The base function.
100    pub base_function: Option<usize>,
101
102    /// Local names.
103    pub local_names: IndexSet<String>,
104    /// Global names.
105    pub global_names: IndexSet<GlobalNameInfo>,
106    /// Upvalue names.
107    pub upvalue_names: IndexSet<UpvalueNameInfo>,
108
109    /// The count of upvalues defined in the function.
110    pub def_upvalue_count: usize,
111
112    // used by codegen
113    pub(crate) code: Vec<OpCode>,
114    pub(crate) consts: Vec<ConstValue>,
115    pub(crate) jump_target_count: usize,
116    pub(crate) continue_stack: Vec<JumpTarget>,
117    pub(crate) break_stack: Vec<JumpTarget>,
118}
119
120impl fmt::Display for Function {
121    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122        writeln!(f, "func_id: {}", self.func_id)?;
123        writeln!(f, "kind: {}", self.kind)?;
124        if let Some(v) = &self.variadic {
125            writeln!(f, "params: ({}), *{}", self.params.join(", "), v)?;
126        } else {
127            writeln!(f, "params: ({})", self.params.join(", "))?;
128        }
129        writeln!(f, "base_function: {:?}", self.base_function)?;
130        writeln!(f, "def_upvalue_count: {}", self.def_upvalue_count)?;
131        writeln!(f, "body: {}", self.body)?;
132
133        writeln!(f, "local_names: {:?}", self.local_names)?;
134        writeln!(f, "global_names: {:?}", self.global_names)?;
135        writeln!(f, "upvalue_names: {:?}", self.upvalue_names)?;
136        Ok(())
137    }
138}
139
140impl Function {
141    pub fn new(
142        func_id: usize,
143        kind: FunctionKind,
144        params: Vec<Ident>,
145        variadic: Option<Box<Ident>>,
146        body: Box<Block>,
147        base_function: Option<usize>,
148    ) -> Self {
149        Function {
150            func_id,
151            kind,
152            params: params.into_iter().map(|x| x.name).collect(),
153            variadic: variadic.map(|x| x.name),
154            body,
155            base_function,
156            local_names: IndexSet::new(),
157            global_names: IndexSet::new(),
158            upvalue_names: IndexSet::new(),
159            def_upvalue_count: 0,
160            code: Vec::new(),
161            consts: vec![ConstValue::Null],
162            jump_target_count: 0,
163            continue_stack: Vec::new(),
164            break_stack: Vec::new(),
165        }
166    }
167}
168
169/// Semantic Analyze. Lowers the AST to `Vec<Function>`.
170pub fn analyze(ast: Box<Block>) -> Vec<Function> {
171    let mut func = Function::new(0, FunctionKind::Function, Vec::new(), None, ast, None);
172    let mut analyzer = SemanticAnalyzer::new(&mut func);
173    analyzer.func_list.insert(0, func);
174    analyzer.analyze_name();
175    analyzer.func_list
176}
177
178#[derive(Debug, Clone)]
179struct SemanticAnalyzer {
180    func_id: usize,
181    pub func_list: Vec<Function>,
182}
183
184impl SemanticAnalyzer {
185    fn new(func: &mut Function) -> Self {
186        let mut analyzer = SemanticAnalyzer {
187            func_id: func.func_id,
188            func_list: Vec::new(),
189        };
190        analyzer.build_block(&mut func.body);
191        analyzer
192    }
193
194    fn build_block(&mut self, ast_node: &mut Block) {
195        for stmt in &mut ast_node.body {
196            self.build_stmt(stmt);
197        }
198    }
199
200    fn build_stmt(&mut self, ast_node: &mut Stmt) {
201        match &mut ast_node.kind {
202            StmtKind::If {
203                test,
204                consequent,
205                alternate,
206            } => {
207                self.build_expr(test);
208                self.build_block(consequent);
209                if let Some(alternate) = alternate {
210                    self.build_stmt(alternate);
211                }
212            }
213            StmtKind::Loop { body } => self.build_block(body),
214            StmtKind::While { test, body } => {
215                self.build_expr(test);
216                self.build_block(body);
217            }
218            StmtKind::For {
219                left: _,
220                right,
221                body,
222            } => {
223                self.build_expr(right);
224                self.build_block(body);
225            }
226            StmtKind::Break => (),
227            StmtKind::Continue => (),
228            StmtKind::Return { argument } => self.build_expr(argument),
229            StmtKind::Throw { argument } => self.build_expr(argument),
230            StmtKind::Global { arguments: _ } => (),
231            StmtKind::Import { path: _, kind: _ } => (),
232            StmtKind::Assign { left, right } => {
233                self.build_expr(left);
234                self.build_expr(right);
235            }
236            StmtKind::AssignOp {
237                operator: _,
238                left,
239                right,
240            } => {
241                self.build_expr(left);
242                self.build_expr(right);
243            }
244            StmtKind::AssignUnpack { left, right } => {
245                for left in left {
246                    self.build_expr(left);
247                }
248                self.build_expr(right);
249            }
250            StmtKind::AssignMulti { left, right } => {
251                for left in left {
252                    self.build_expr(left);
253                }
254                for right in right {
255                    self.build_expr(right);
256                }
257            }
258            StmtKind::Block(block) => self.build_block(block),
259            StmtKind::Expr(expr) => self.build_expr(expr),
260        }
261    }
262
263    fn build_expr(&mut self, ast_node: &mut Expr) {
264        match &mut ast_node.kind {
265            ExprKind::Lit(_) => (),
266            ExprKind::Ident(_) => (),
267            ExprKind::Function { .. } => {
268                let func_id = self.func_id + self.func_list.len() + 1;
269                if let ExprKind::Function {
270                    kind,
271                    params,
272                    variadic,
273                    body,
274                } = mem::replace(&mut ast_node.kind, ExprKind::FunctionId(func_id))
275                {
276                    let mut func =
277                        Function::new(func_id, kind, params, variadic, body, Some(self.func_id));
278                    let mut analyzer = SemanticAnalyzer::new(&mut func);
279                    self.func_list.push(func);
280                    self.func_list.append(&mut analyzer.func_list);
281                }
282            }
283            ExprKind::FunctionId(_) => (),
284            ExprKind::Table { properties } => {
285                for TableProperty { key, value, .. } in properties {
286                    self.build_expr(key);
287                    self.build_expr(value);
288                }
289            }
290            ExprKind::List { items } => {
291                for item in items {
292                    self.build_expr(item);
293                }
294            }
295            ExprKind::Unary {
296                operator: _,
297                argument,
298            } => self.build_expr(argument),
299            ExprKind::Binary {
300                operator: _,
301                left,
302                right,
303            } => {
304                self.build_expr(left);
305                self.build_expr(right);
306            }
307            ExprKind::Member {
308                table,
309                property,
310                kind: _,
311                safe: _,
312            } => {
313                self.build_expr(table);
314                self.build_expr(property);
315            }
316            ExprKind::MetaMember { table, safe: _ } => self.build_expr(table),
317            ExprKind::Call {
318                callee,
319                arguments,
320                propagating_error: _,
321            } => {
322                self.build_expr(callee);
323                for arg in arguments {
324                    self.build_expr(arg);
325                }
326            }
327        }
328    }
329
330    pub fn analyze_name(&mut self) {
331        for i in 0..self.func_list.len() {
332            for param in self.func_list[i].params.clone() {
333                self.func_list[i].local_names.insert(param);
334            }
335            if let Some(variadic) = self.func_list[i].variadic.clone() {
336                self.func_list[i].local_names.insert(variadic);
337            }
338            let mut t = self.func_list[i].body.clone();
339            self.analyze_name_block(i, &mut t);
340        }
341    }
342
343    fn analyze_name_block(&mut self, func_id: usize, ast_node: &mut Block) {
344        for stmt in &mut ast_node.body {
345            self.analyze_name_stmt(func_id, stmt);
346        }
347    }
348
349    fn analyze_name_stmt(&mut self, func_id: usize, ast_node: &mut Stmt) {
350        match &mut ast_node.kind {
351            StmtKind::If {
352                test,
353                consequent,
354                alternate,
355            } => {
356                self.analyze_name_expr(func_id, test);
357                self.analyze_name_block(func_id, consequent);
358                if let Some(alternate) = alternate {
359                    self.analyze_name_stmt(func_id, alternate);
360                }
361            }
362            StmtKind::Loop { body } => self.analyze_name_block(func_id, body),
363            StmtKind::While { test, body } => {
364                self.analyze_name_expr(func_id, test);
365                self.analyze_name_block(func_id, body);
366            }
367            StmtKind::For { left, right, body } => {
368                for left in left {
369                    self.store_name(func_id, &left.name);
370                }
371                self.analyze_name_expr(func_id, right);
372                self.analyze_name_block(func_id, body);
373            }
374            StmtKind::Break => (),
375            StmtKind::Continue => (),
376            StmtKind::Return { argument } => self.analyze_name_expr(func_id, argument),
377            StmtKind::Throw { argument } => self.analyze_name_expr(func_id, argument),
378            StmtKind::Global { arguments } => {
379                for arg in arguments {
380                    self.func_list[func_id]
381                        .global_names
382                        .replace(GlobalNameInfo {
383                            name: arg.name.clone(),
384                            is_writable: true,
385                        });
386                }
387            }
388            StmtKind::Import { path: _, kind } => match kind {
389                ImportKind::Simple(alias) => {
390                    self.func_list[func_id]
391                        .global_names
392                        .insert(GlobalNameInfo::from(alias.name.clone()));
393                }
394                ImportKind::Nested(items) => {
395                    for (_, alias) in items {
396                        self.func_list[func_id]
397                            .global_names
398                            .insert(GlobalNameInfo::from(alias.name.clone()));
399                    }
400                }
401                ImportKind::Glob => (),
402            },
403            StmtKind::Assign { left, right } => {
404                if let ExprKind::Ident(ident) = &left.kind {
405                    self.store_name(func_id, &ident.name);
406                } else {
407                    self.analyze_name_expr(func_id, left);
408                }
409                self.analyze_name_expr(func_id, right);
410            }
411            StmtKind::AssignOp {
412                operator: _,
413                left,
414                right,
415            } => {
416                if let ExprKind::Ident(ident) = &left.kind {
417                    self.store_name(func_id, &ident.name);
418                } else {
419                    self.analyze_name_expr(func_id, left);
420                }
421                self.analyze_name_expr(func_id, left);
422                self.analyze_name_expr(func_id, right);
423            }
424            StmtKind::AssignUnpack { left, right } => {
425                for left in left {
426                    if let ExprKind::Ident(ident) = &left.kind {
427                        self.store_name(func_id, &ident.name);
428                    } else {
429                        self.analyze_name_expr(func_id, left);
430                    }
431                }
432                self.analyze_name_expr(func_id, right);
433            }
434            StmtKind::AssignMulti { left, right } => {
435                for left in left {
436                    if let ExprKind::Ident(ident) = &left.kind {
437                        self.store_name(func_id, &ident.name);
438                    } else {
439                        self.analyze_name_expr(func_id, left);
440                    }
441                }
442                for right in right {
443                    self.analyze_name_expr(func_id, right);
444                }
445            }
446            StmtKind::Block(block) => self.analyze_name_block(func_id, block),
447            StmtKind::Expr(expr) => self.analyze_name_expr(func_id, expr),
448        }
449    }
450
451    fn analyze_name_expr(&mut self, func_id: usize, ast_node: &mut Expr) {
452        match &mut ast_node.kind {
453            ExprKind::Lit(_) => (),
454            ExprKind::Ident(ident) => self.load_name(func_id, &ident.name),
455            ExprKind::Function { .. } => {
456                let func_id = self.func_id + self.func_list.len() + 1;
457                if let ExprKind::Function {
458                    kind,
459                    params,
460                    variadic,
461                    body,
462                } = mem::replace(&mut ast_node.kind, ExprKind::FunctionId(func_id))
463                {
464                    let mut func =
465                        Function::new(func_id, kind, params, variadic, body, Some(self.func_id));
466                    let mut analyzer = SemanticAnalyzer::new(&mut func);
467                    self.func_list.push(func);
468                    self.func_list.append(&mut analyzer.func_list);
469                }
470            }
471            ExprKind::FunctionId(_) => (),
472            ExprKind::Table { properties } => {
473                for TableProperty { key, value, .. } in properties {
474                    self.analyze_name_expr(func_id, key);
475                    self.analyze_name_expr(func_id, value);
476                }
477            }
478            ExprKind::List { items } => {
479                for item in items {
480                    self.analyze_name_expr(func_id, item);
481                }
482            }
483            ExprKind::Unary {
484                operator: _,
485                argument,
486            } => self.analyze_name_expr(func_id, argument),
487            ExprKind::Binary {
488                operator: _,
489                left,
490                right,
491            } => {
492                self.analyze_name_expr(func_id, left);
493                self.analyze_name_expr(func_id, right);
494            }
495            ExprKind::Member {
496                table,
497                property,
498                kind: _,
499                safe: _,
500            } => {
501                self.analyze_name_expr(func_id, table);
502                self.analyze_name_expr(func_id, property);
503            }
504            ExprKind::MetaMember { table, safe: _ } => self.analyze_name_expr(func_id, table),
505            ExprKind::Call {
506                callee,
507                arguments,
508                propagating_error: _,
509            } => {
510                self.analyze_name_expr(func_id, callee);
511                for arg in arguments {
512                    self.analyze_name_expr(func_id, arg);
513                }
514            }
515        }
516    }
517
518    fn load_name(&mut self, func_id: usize, name: &str) {
519        if self.func_list[func_id].local_names.contains(name)
520            || self.func_list[func_id]
521                .global_names
522                .contains(&GlobalNameInfo::from(name.to_owned()))
523            || self.func_list[func_id]
524                .upvalue_names
525                .contains(&UpvalueNameInfo::from(name.to_owned()))
526        {
527            return;
528        }
529        if self.func_list[func_id].kind != FunctionKind::Closure {
530            self.func_list[func_id]
531                .global_names
532                .insert(GlobalNameInfo::from(name.to_owned()));
533        } else {
534            let mut base_func_count = 0;
535            let mut base_func = &mut self.func_list[func_id];
536            loop {
537                if let Some(func) = base_func.base_function {
538                    base_func = &mut self.func_list[func];
539                    base_func_count += 1;
540                } else {
541                    self.func_list[func_id]
542                        .global_names
543                        .insert(GlobalNameInfo::from(name.to_owned()));
544                    break;
545                }
546                if let Some(upvalue_name) = base_func
547                    .upvalue_names
548                    .get(&UpvalueNameInfo::from(name.to_owned()))
549                {
550                    let func_count = upvalue_name.func_count + base_func_count;
551                    let upvalue_id = upvalue_name.upvalue_id;
552                    self.func_list[func_id]
553                        .upvalue_names
554                        .insert(UpvalueNameInfo {
555                            name: name.to_owned(),
556                            func_count,
557                            upvalue_id,
558                        });
559                    break;
560                }
561                if base_func.local_names.contains(name) {
562                    base_func.local_names.remove(name);
563                    base_func.upvalue_names.insert(UpvalueNameInfo {
564                        name: name.to_owned(),
565                        func_count: 0,
566                        upvalue_id: base_func.def_upvalue_count,
567                    });
568                    let upvalue_id = base_func.def_upvalue_count;
569                    base_func.def_upvalue_count += 1;
570                    self.func_list[func_id]
571                        .upvalue_names
572                        .insert(UpvalueNameInfo {
573                            name: name.to_owned(),
574                            func_count: base_func_count,
575                            upvalue_id,
576                        });
577                    break;
578                }
579                if base_func.kind != FunctionKind::Closure {
580                    self.func_list[func_id]
581                        .global_names
582                        .insert(GlobalNameInfo::from(name.to_owned()));
583                    break;
584                }
585            }
586        }
587    }
588
589    fn store_name(&mut self, func_id: usize, name: &str) {
590        if self.func_list[func_id].local_names.contains(name)
591            || self.func_list[func_id]
592                .upvalue_names
593                .contains(&UpvalueNameInfo::from(name.to_owned()))
594        {
595            return;
596        } else if let Some(name_info) = self.func_list[func_id]
597            .global_names
598            .get(&GlobalNameInfo::from(name.to_owned()))
599        {
600            if name_info.is_writable {
601                return;
602            }
603        }
604        if self.func_list[func_id].kind != FunctionKind::Closure {
605            self.func_list[func_id].local_names.insert(name.to_owned());
606        } else {
607            let mut base_func_count = 0;
608            let mut base_func = &mut self.func_list[func_id];
609            loop {
610                if let Some(func) = base_func.base_function {
611                    base_func = &mut self.func_list[func];
612                    base_func_count += 1;
613                } else {
614                    self.func_list[func_id].local_names.insert(name.to_owned());
615                    break;
616                }
617                if let Some(upvalue_name) = base_func
618                    .upvalue_names
619                    .get(&UpvalueNameInfo::from(name.to_owned()))
620                {
621                    let func_count = upvalue_name.func_count + base_func_count;
622                    let upvalue_id = upvalue_name.upvalue_id;
623                    self.func_list[func_id]
624                        .upvalue_names
625                        .insert(UpvalueNameInfo {
626                            name: name.to_owned(),
627                            func_count,
628                            upvalue_id,
629                        });
630                    break;
631                }
632                if base_func.local_names.contains(name) {
633                    base_func.local_names.remove(name);
634                    base_func.upvalue_names.insert(UpvalueNameInfo {
635                        name: name.to_owned(),
636                        func_count: 0,
637                        upvalue_id: base_func.def_upvalue_count,
638                    });
639                    let upvalue_id = base_func.def_upvalue_count;
640                    base_func.def_upvalue_count += 1;
641                    self.func_list[func_id]
642                        .upvalue_names
643                        .insert(UpvalueNameInfo {
644                            name: name.to_owned(),
645                            func_count: base_func_count,
646                            upvalue_id,
647                        });
648                    break;
649                }
650                if base_func.kind != FunctionKind::Closure {
651                    self.func_list[func_id].local_names.insert(name.to_owned());
652                    break;
653                }
654            }
655        }
656    }
657}