mod_language 0.1.22

WIP wasm scripting language
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
//! The Item Parser function and its dependencies

use crate::{
  util::{ Either, },
  source::{ SourceRegion, SOURCE_MANAGER, },
  common::{ Keyword::*, Operator::*, ITEM_KEYWORDS, Identifier, },
  token::{ Token, TokenData, },
  ast::{ Item, ItemData, ExportData, Path, LocalDeclaration, },
  lexer::{ Lexer, },
};

use super::{ Parser, ParseletPredicate, ParseletFunction, type_expression, expression, block, path, sync, };


/// Parse a single Item
pub fn item (parser: &mut Parser) -> Option<Item> {
  let curr_tok = parser.curr_tok()?;

  let parselet: Option<ParseletFunction<Item>> = match curr_tok.data {
    TokenData::Keyword(Import) => Some(itm_import),
    TokenData::Keyword(Export) => Some(itm_export),
    _ => ItemParselet::get_function(curr_tok)
  };

  if let Some(parselet_function) = parselet {
    parselet_function(parser)
  } else {
    parser.error("No syntactic match for this token in the context of a top level item".to_owned());

    None
  }
}


fn get_new_name_and_origin (parser: &mut Parser) -> Result<Option<(Identifier, SourceRegion)>, ()> {
  if let Some(&Token { data: TokenData::Operator(As), origin }) = parser.curr_tok() {
    parser.advance();
    
    if let Some(&Token { data: TokenData::Identifier(ref new_name), origin: new_name_origin }) = parser.curr_tok() {
      let new_name = new_name.to_owned();
      parser.advance();

      Ok(Some((new_name, SourceRegion::merge(origin, new_name_origin))))
    } else {
      parser.error("Expected identifier to follow aliasing keyword `as`".to_owned());

      Err(())
    }
  } else {
    Ok(None)
  }
}

fn get_new_name (parser: &mut Parser) -> Result<Option<Identifier>, ()> {
  match get_new_name_and_origin(parser) {
    Ok(opt) => Ok(opt.map(|(new_name, _)| new_name)),
    Err(()) => Err(())
  }
}

fn get_single_import (parser: &mut Parser) -> Option<((Path, Option<Identifier>), SourceRegion)> {
  let (path_or_ident, origin) = path(parser)?;
  
  let path = match path_or_ident {
    Either::A(path) => path,
    Either::B(base) => base.into()
  };

  let new_name_and_origin = if let Ok(new_name_and_origin) = get_new_name_and_origin(parser) { new_name_and_origin } else { return None };
      
  let (new_name, origin) = if let Some((new_name, new_origin)) = new_name_and_origin {
    (Some(new_name), SourceRegion::merge(origin, new_origin))
  } else {
    (None, origin)
  };

  Some(((path, new_name), origin))
}


fn itm_import (parser: &mut Parser) -> Option<Item> {
  if let Some(&Token { data: TokenData::Keyword(Import), origin: start_region }) = parser.curr_tok() {
    parser.advance();

    let (refs, end_region, terminal) = if let Some(Token { data: TokenData::Operator(LeftBracket), .. }) = parser.curr_tok() {
      parser.advance();

      let mut refs = Vec::new();

      let mut ref_ok = true;

      let end = loop {
        match parser.curr_tok() {
          // Unexpected end of input
          None => {
            parser.error_at(SourceRegion::merge(start_region, parser.curr_region()), "Unexpected end of input, expected } to close block".to_owned());
            return None
          },
  
          // The end of the block
          Some(&Token { data: TokenData::Operator(RightBracket), origin }) => {
            parser.advance();
            break origin
          },
  
          // Refs
          _ => {
            if ref_ok {
              if let Some((imp, _)) = get_single_import(parser) {
                refs.push(imp);

                if let Some(Token { data: TokenData::Operator(Comma), .. }) = parser.curr_tok() {
                  parser.advance();
                } else {
                  ref_ok = false;
                }

                continue
              }
            } else {
              parser.error("Expected , to separate import references or } to end block".to_owned());
            }
    
            if parser.synchronize(sync::close_pair_or(sync::operator(LeftBracket), sync::operator(RightBracket), sync::operator(Comma))) {
              if let Some(&Token { data: TokenData::Operator(op), origin }) = parser.curr_tok() {
                parser.advance();

                if op == Comma { continue }
                else { break origin }
              }
            }
      
            // Could not recover
            return None
          }
        }
      };

      (refs, end, true)
    } else if let Some((imp, origin)) = get_single_import(parser) {
      (vec![ imp ], origin, false)
    } else {
      parser.error("Expected identifier or path, or a list of these, to follow `import` keyword".to_owned());

      return None
    };

    return Some(Item::new(ItemData::Import { data: refs, terminal }, SourceRegion::merge(start_region, end_region)))
  }

  unreachable!("Internal error, import item parselet called on non-import token");
}

fn itm_export (parser: &mut Parser) -> Option<Item> {
  if let Some(&Token { data: TokenData::Keyword(Export), origin: start_region }) = parser.curr_tok() {
    parser.advance();

    if let Some(&Token { data: TokenData::Operator(LeftBracket), .. }) = parser.curr_tok() {
      parser.advance();

      let mut refs = Vec::new();

      let mut ref_ok = true;

      let end_region;

      loop {
        match parser.curr_tok() {
          // Unexpected end of input
          None => {
            parser.error_at(SourceRegion::merge(start_region, parser.curr_region()), "Unexpected end of input, expected } to close block".to_owned());
            return None
          },
  
          // The end of the block
          Some(&Token { data: TokenData::Operator(RightBracket), origin }) => {
            parser.advance();
            end_region = origin;
            break
          },
  
          // Refs
          _ => {
            if ref_ok {
              if let Some(Token { data: TokenData::Identifier(base), .. }) = parser.curr_tok() {
                let base = base.clone();
                
                parser.advance();
                                
                if let Ok(new_name) = get_new_name(parser) {
                  refs.push((base, new_name));

                  if let Some(Token { data: TokenData::Operator(Comma), .. }) = parser.curr_tok() {
                    parser.advance();
                    ref_ok = true;
                  }

                  continue
                }
              }
            } else {
              parser.error("Expected , to separate export references or } to end block".to_owned());
            }
    
            if parser.synchronize(sync::close_pair_or(sync::operator(LeftBracket), sync::operator(RightBracket), sync::operator(Comma))) {
              if let Some(&Token { data: TokenData::Operator(op), origin }) = parser.curr_tok() {
                parser.advance();

                if op == Comma {
                  continue
                } else { 
                  end_region = origin;
                  break
                }
              }
            }
      
            // Could not recover
            return None
          }
        }
      }

      return Some(Item::new(ItemData::Export { data: ExportData::List(refs), terminal: true }, SourceRegion::merge(start_region, end_region)))
    } else if let Some(&Token { data: TokenData::Identifier(ref base), origin: base_end }) = parser.curr_tok() {
      let base = base.to_owned();

      parser.advance();

      let new_name_and_origin = if let Ok(new_name_and_origin) = get_new_name_and_origin(parser) { new_name_and_origin } else { return None };

      let (new_name, end) = if let Some((new_name, end)) = new_name_and_origin {
        (Some(new_name), end)
      } else {
        (None, base_end)
      };

      return Some(Item::new(ItemData::Export { data: ExportData::List(vec![ (base, new_name) ]), terminal: false }, SourceRegion::merge(start_region, end)))
    } else {
      let curr_tok = if let Some(tok) = parser.curr_tok() { tok } else {
        parser.error("Expected a list, alias, or inline item to follow `export` keyword".to_owned());
        return None
      };

      let inline = if let Some(parselet_function) = ItemParselet::get_function(curr_tok) {
        parselet_function(parser)
      } else {
        parser.error("No syntactic match for this token in the context of a top level item".to_owned());
    
        return None
      }?;

      let region = SourceRegion::merge(start_region, inline.origin);
      let terminal = !inline.requires_semi();

      return Some(Item::new(ItemData::Export { data: ExportData::Inline(box inline), terminal }, region))
    }
  }

  unreachable!("Internal error, export item parselet called on non-export token");
}


fn itm_module (parser: &mut Parser) -> Option<Item> {
  if let Some(&Token { data: TokenData::Keyword(Module), origin: start_region }) = parser.curr_tok() {
    parser.advance();

    if let Some(&Token { data: TokenData::Identifier(ref identifier), origin: end_region }) = parser.curr_tok() {
      let identifier = identifier.clone();

      parser.advance();

      if let Some(&Token { data: TokenData::Operator(LeftBracket), .. }) = parser.curr_tok() {
        parser.advance();

        let mut items = Vec::new();

        let mut itm_ok = true;

        loop {
          match parser.curr_tok() {
            // The end of the stream
            None => {
              parser.error("Unexpected end of input while parsing module".to_owned());
            },

            // The end of the block
            Some(&Token { data: TokenData::Operator(RightBracket), origin: end_region }) => {
              parser.advance();
              return Some(Item::new(ItemData::Module { identifier, items, inline: true }, SourceRegion::merge(start_region, end_region)));
            },

            // Items
            _ => {
              if itm_ok {
                if let Some(item) = item(parser) {
                  if item.requires_semi() {
                    if let Some(&Token { data: TokenData::Operator(Semi), .. }) = parser.curr_tok() {
                      parser.advance();
                      itm_ok = true;
                    } else {
                      itm_ok = false;
                    }
                  }
                  
                  items.push(item);

                  continue
                } // else { Error message already provided by item }
              } else {
                parser.error("Expected a ; to separate items or } to end module".to_owned());
              }

              // If we reach here there was some kind of error, either we didnt have a semi after the last item, or our item call had an error,
              // so we need to try and synchronize to the end of the block or the next semi or keyword
              
              if parser.synchronize(sync::close_pair_or(sync::operator(LeftBracket), sync::operator(RightBracket), sync::or(sync::operator(Semi), sync::any_keyword_of(ITEM_KEYWORDS)))) {
                match parser.curr_tok().unwrap() {
                  Token { data: TokenData::Operator(Semi), .. } => {
                    parser.advance();
                    itm_ok = true;
                  },
                  Token { data: TokenData::Keyword(_), .. } => {
                    itm_ok = true;
                  },
                  Token { data: TokenData::Operator(RightBracket), .. } => continue, // The next iteration will handle the closing bracket
                  _ => unreachable!("Internal error, unexpected parser state post synchronization")
                }
              } else {
                // Cannot recover state locally
                return None
              }
            }
          }
        }
      } else {
        let curr_source_key = start_region.source.expect("Internal error: Module item has no source origin");
        let curr_source = SOURCE_MANAGER.get(curr_source_key).expect("Internal error: Module item has invalid source origin");

        let curr_path = &curr_source.path;
        let curr_dir = curr_path.parent().expect("Internal error: Source file path has no directory");

        let sub_mod_path: std::path::PathBuf = [ curr_dir, identifier.as_ref().as_ref() ].iter().collect::<std::path::PathBuf>();
        let sub_dir_mod_path: std::path::PathBuf = sub_mod_path.join("mod.ms");
        let sub_file_mod_path: std::path::PathBuf = sub_mod_path.with_extension("ms");

        let dir_exists = sub_dir_mod_path.exists();
        let file_exists = sub_file_mod_path.exists();

        let local_region = SourceRegion::merge(start_region, end_region);

        let local_error = |msg| parser.error_at(local_region, format!("Cannot import submodule `{}`: {}", identifier, msg));

        let sub_mod_path = if dir_exists && !file_exists {
          sub_dir_mod_path
        } else if file_exists && !dir_exists {
          sub_file_mod_path
        } else {
          if file_exists && dir_exists {
            local_error(format!(
              "A file exists at both [{}] and [{}], please remove one to resolve the ambiguity",
              sub_dir_mod_path.display(), sub_file_mod_path.display()
            ))
          } else {
            local_error(format!(
              "Expected a file at either [{}] or [{}], but neither exists",
              sub_dir_mod_path.display(), sub_file_mod_path.display()
            ))
          }

          return None
        };

        let sub_source_key = match SOURCE_MANAGER.load(&sub_mod_path) {
          Ok(key) => key,
          Err(e) => {
            if e.kind() == std::io::ErrorKind::AlreadyExists {
              local_error(format!(
                "File [{}] has already been loaded during this session, it cannot be imported twice",
                sub_mod_path.display()
              ))
            } else {
              local_error(format!(
                "Unexpected error loading file [{}] from disk: {}",
                sub_mod_path.display(), e
              ))
            }

            return None
          }
        };

        let mut sub_lexer = Lexer::new(sub_source_key);
        let sub_stream = sub_lexer.lex_stream();
        let mut sub_parser = Parser::new(&sub_stream);

        return Some(Item::new(ItemData::Module { identifier, items: sub_parser.parse_ast(), inline: false }, local_region));
      }
    }
  }

  None
}


fn itm_global (parser: &mut Parser) -> Option<Item> {
  // Synchronization should be handled by higher level parselet

  if let Some(&Token { data: TokenData::Keyword(Global), origin: start_region }) = parser.curr_tok() {
    parser.advance();

    if let Some(&Token { data: TokenData::Identifier(ref identifier), origin: mut end_region }) = parser.curr_tok() {
      let identifier = identifier.clone();

      parser.advance();
      
      let explicit_type = if let Some(&Token { data: TokenData::Operator(Colon), .. }) = parser.curr_tok() {
        parser.advance();

        let texpr = type_expression(parser)?;

        end_region = texpr.origin;

        texpr
      } else {
        parser.error("Expected : and a type expression to follow global identifier".to_owned());
        return None
      };

      let initializer = if let Some(&Token { data: TokenData::Operator(Assign), .. }) = parser.curr_tok() {
        parser.advance();

        let expr = expression(parser)?;

        end_region = expr.origin;

        Some(expr)
      } else {
        None
      };

      return Some(Item::new(
        ItemData::Global { identifier, explicit_type, initializer },
        SourceRegion::merge(start_region, end_region)
      ))
    } else {
      parser.error("Expected identifier for variable to follow let keyword".to_owned());
    }
  }

  unreachable!("Internal error, global item parselet called on non-global token");
}


fn itm_function (parser: &mut Parser) -> Option<Item> {
  let (start_region, mut end_region) = if let Some(&Token { data: TokenData::Keyword(Function), origin }) = parser.curr_tok() {
    parser.advance();
    (origin, origin)
  } else {
    unreachable!("Internal error, function parselet called on non-fn token");
  };

  let identifier = if let Some(&Token { data: TokenData::Identifier(ref identifier), .. }) = parser.curr_tok() {
    let identifier = identifier.clone();

    parser.advance();

    identifier
  } else {
    parser.error("Expected identifier for function to follow fn keyword".to_owned());
    return None;
  };

  let mut parameters = Vec::new();

  if let Some(&Token { data: TokenData::Operator(LeftParen), .. }) = parser.curr_tok() {
    parser.advance();

    loop {
      if let Some(&Token { data: TokenData::Identifier(ref param_ident), origin: param_start }) = parser.curr_tok() {
        let parameter_name = param_ident.clone();

        parser.advance();

        if let Some(&Token { data: TokenData::Operator(Colon), .. }) = parser.curr_tok() {
          parser.advance();

          if let Some(parameter_type) = type_expression(parser) {
            if let Some(&Token { data: TokenData::Operator(op), origin: param_end }) = parser.curr_tok() {
              parameters.push(LocalDeclaration::new(parameter_name, parameter_type, SourceRegion::merge(param_start, param_end)));
              
              if op == Comma {
                parser.advance();
                
                continue
              } else if op == RightParen {
                parser.advance();

                end_region = param_end;

                break;
              }
            }

            parser.error("Expected , to separate parameters or ) to end parameter list".to_owned());
          } // else { Error has already been issued by type_expression, fall through to synchronization }
        } else {
          parser.error("Expected : and a type expression to follow parameter name".to_owned());
        }
      }

      if parser.synchronize(sync::close_pair_or(sync::operator(LeftParen), sync::operator(RightParen), sync::operator(Comma))) {
        if let Some(&Token { data: TokenData::Operator(op), .. }) = parser.curr_tok() {
          if op == Comma {
            parser.advance();
            continue;
          } else {
            parser.advance();
            break;
          }
        }
      }

      // Could not recover
      return None
    }
  }

  let return_type = if let Some(&Token { data: TokenData::Operator(RightArrow), .. }) = parser.curr_tok() {
    parser.advance();

    if let Some(texpr) = type_expression(parser) {
      end_region = texpr.origin;
      Some(texpr)
    } else {
      // type_expression should have already provided an error message
      // Synchronization should be handled by higher level parselet
      return None
    }
  } else {
    None
  };

  let body = if let Some(&Token { data: TokenData::Operator(LeftBracket), .. }) = parser.curr_tok() {
    if let Some(blk) = block(parser) {
      end_region = blk.origin;
      Some(blk)
    } else {
      // block should have already provided an error message
      // Synchronization should be handled by higher level parselet
      return None
    }
  } else {
    None
  };

  Some(Item::new(
    ItemData::Function { identifier, parameters, return_type, body },
    SourceRegion::merge(start_region, end_region)
  ))
}


struct ItemParselet {
  predicate: ParseletPredicate,
  function: ParseletFunction<Item>,
}

macro_rules! itm { ($( $predicate: expr => $function: expr ),* $(,)?) => { &[ $( ItemParselet { predicate: $predicate, function: $function } ),* ] } }

impl ItemParselet {
  const PARSELETS: &'static [Self] = itm! [
    |token| token.is_keyword(Module)   => itm_module,
    |token| token.is_keyword(Global)   => itm_global,
    |token| token.is_keyword(Function) => itm_function,
  ];

  fn get_function (token: &Token) -> Option<ParseletFunction<Item>> {
    for parselet in Self::PARSELETS.iter() {
      if (parselet.predicate)(token) {
        return Some(parselet.function)
      }
    }
  
    None
  }
}