1#![forbid(unsafe_code)]
2pub(crate) mod decode;
11mod engine;
12pub(crate) mod light;
13pub(crate) mod remap;
14
15#[cfg(test)]
16#[path = "tests/mod.rs"]
17mod tests;
18
19use std::borrow::Cow;
20use std::sync::Arc;
21use std::{fs, io, ops::Range, path::Path};
22
23use self::decode::{
24 decode_owned_bytes_auto, decode_owned_bytes_with_encoding, decode_source_auto,
25 decode_source_with_encoding,
26};
27use self::engine::Parser;
28pub use self::light::{
29 DecodedLightSlice, LightCommandSurface, LightItem, LightItemSink, LightParse,
30 LightParseOptions, LightProcSurface, LightScanReport, LightScanSummary, LightSourceFile,
31 LightSourceView, LightWord, SharedLightParse, SharedLightScanReport, parse_light_bytes,
32 parse_light_bytes_with_encoding, parse_light_file, parse_light_file_with_encoding,
33 parse_light_file_with_encoding_and_options, parse_light_file_with_options,
34 parse_light_shared_bytes, parse_light_shared_bytes_with_encoding, parse_light_shared_file,
35 parse_light_shared_file_with_encoding, parse_light_shared_source,
36 parse_light_shared_source_with_options, parse_light_source, parse_light_source_with_options,
37 scan_light_bytes_with_encoding_and_options_and_sink, scan_light_bytes_with_encoding_and_sink,
38 scan_light_bytes_with_options_and_sink, scan_light_bytes_with_options_and_sink_and_then,
39 scan_light_bytes_with_sink, scan_light_file_with_encoding_and_options_and_sink,
40 scan_light_file_with_encoding_and_sink, scan_light_file_with_options_and_sink,
41 scan_light_file_with_sink, scan_light_shared_bytes_with_encoding_and_options_and_sink,
42 scan_light_shared_bytes_with_options_and_sink,
43 scan_light_shared_file_with_encoding_and_options_and_sink,
44 scan_light_shared_file_with_options_and_sink, scan_light_shared_source_with_options_and_sink,
45 scan_light_shared_source_with_sink, scan_light_source_with_options_and_sink,
46 scan_light_source_with_sink,
47};
48use self::remap::{
49 RangeMapper, remap_parse_ranges_with_mapper, remap_shared_parse_ranges_with_mapper,
50 remap_source_file_ranges,
51};
52
53use mel_syntax::{LexDiagnostic, SourceMap, SourceView, TextRange, text_range};
54
55const DEFAULT_MAX_BYTES: usize = 256 * 1024 * 1024;
56const DEFAULT_MAX_NESTING_DEPTH: usize = 512;
57const DEFAULT_MAX_TOKENS: usize = 8_000_000;
58const DEFAULT_MAX_STATEMENTS: usize = 1_000_000;
59const DEFAULT_MAX_LITERAL_BYTES: usize = 8 * 1024 * 1024;
60const DEFAULT_PARSE_BUDGETS: ParseBudgets = ParseBudgets {
61 max_bytes: DEFAULT_MAX_BYTES,
62 max_nesting_depth: DEFAULT_MAX_NESTING_DEPTH,
63 max_tokens: DEFAULT_MAX_TOKENS,
64 max_statements: DEFAULT_MAX_STATEMENTS,
65 max_literal_bytes: DEFAULT_MAX_LITERAL_BYTES,
66};
67
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct DecodeDiagnostic {
71 pub message: Cow<'static, str>,
72 pub range: TextRange,
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct ParseError {
78 pub message: &'static str,
79 pub range: TextRange,
80}
81
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum SourceEncoding {
85 Utf8,
86 Cp932,
87 Gbk,
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
91pub enum ParseMode {
93 #[default]
94 Strict,
96 AllowTrailingStmtWithoutSemi,
98 Expression,
100 ExpressionAllowTrailingStmtWithoutSemi,
102}
103
104impl ParseMode {
105 #[must_use]
106 pub const fn is_expression_syntax(self) -> bool {
107 matches!(
108 self,
109 Self::Expression | Self::ExpressionAllowTrailingStmtWithoutSemi
110 )
111 }
112
113 #[must_use]
114 pub const fn allows_trailing_top_level_stmt_without_semi(self) -> bool {
115 matches!(
116 self,
117 Self::AllowTrailingStmtWithoutSemi | Self::ExpressionAllowTrailingStmtWithoutSemi
118 )
119 }
120}
121
122#[derive(Debug, Clone, Copy, PartialEq, Eq)]
123pub struct ParseBudgets {
129 pub max_bytes: usize,
130 pub max_nesting_depth: usize,
131 pub max_tokens: usize,
132 pub max_statements: usize,
133 pub max_literal_bytes: usize,
134}
135
136impl Default for ParseBudgets {
137 fn default() -> Self {
138 DEFAULT_PARSE_BUDGETS
139 }
140}
141
142impl ParseBudgets {
143 #[must_use]
144 pub const fn unlimited() -> Self {
145 Self {
146 max_bytes: usize::MAX,
147 max_nesting_depth: usize::MAX,
148 max_tokens: usize::MAX,
149 max_statements: usize::MAX,
150 max_literal_bytes: usize::MAX,
151 }
152 }
153}
154
155#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
156pub struct ParseOptions {
162 pub mode: ParseMode,
163 pub budgets: ParseBudgets,
164}
165
166impl ParseOptions {
167 #[must_use]
168 pub const fn strict() -> Self {
169 Self {
170 mode: ParseMode::Strict,
171 budgets: DEFAULT_PARSE_BUDGETS,
172 }
173 }
174
175 #[must_use]
176 pub const fn inline() -> Self {
177 Self {
178 mode: ParseMode::AllowTrailingStmtWithoutSemi,
179 budgets: DEFAULT_PARSE_BUDGETS,
180 }
181 }
182
183 #[must_use]
184 pub const fn expression() -> Self {
185 Self {
186 mode: ParseMode::Expression,
187 budgets: DEFAULT_PARSE_BUDGETS,
188 }
189 }
190
191 #[must_use]
192 pub const fn inline_expression() -> Self {
193 Self {
194 mode: ParseMode::ExpressionAllowTrailingStmtWithoutSemi,
195 budgets: DEFAULT_PARSE_BUDGETS,
196 }
197 }
198}
199
200#[derive(Debug, Clone, PartialEq, Eq)]
201pub struct Parse {
208 pub syntax: mel_ast::SourceFile,
209 pub source_text: String,
210 pub source_map: SourceMap,
211 pub source_encoding: SourceEncoding,
212 pub decode_errors: Vec<DecodeDiagnostic>,
213 pub lex_errors: Vec<LexDiagnostic>,
214 pub errors: Vec<ParseError>,
215}
216
217#[derive(Debug, Clone, PartialEq, Eq)]
218pub struct SharedParse {
223 pub syntax: mel_ast::SourceFile,
224 pub source_text: Arc<str>,
225 pub source_map: SourceMap,
226 pub source_encoding: SourceEncoding,
227 pub decode_errors: Vec<DecodeDiagnostic>,
228 pub lex_errors: Vec<LexDiagnostic>,
229 pub errors: Vec<ParseError>,
230}
231
232#[derive(Debug, Clone)]
233pub struct ParseSlice<'a> {
235 pub syntax: mel_ast::SourceFile,
236 pub source: SourceView<'a>,
237 pub lex_errors: Vec<LexDiagnostic>,
238 pub errors: Vec<ParseError>,
239}
240
241impl ParseSlice<'_> {
242 #[must_use]
243 pub fn source_slice(&self, range: TextRange) -> &str {
244 self.source.slice(range)
245 }
246
247 #[must_use]
248 pub fn display_slice(&self, range: TextRange) -> &str {
249 self.source.display_slice(range)
250 }
251}
252
253impl Parse {
254 #[must_use]
255 pub fn source_view(&self) -> SourceView<'_> {
256 SourceView::new(&self.source_text, &self.source_map)
257 }
258
259 #[must_use]
260 pub fn source_range(&self, range: TextRange) -> Range<usize> {
261 self.source_view().display_range(range)
262 }
263
264 #[must_use]
265 pub fn source_slice(&self, range: TextRange) -> &str {
266 self.source_view().slice(range)
267 }
268
269 #[must_use]
270 pub fn display_slice(&self, range: TextRange) -> &str {
271 self.source_view().display_slice(range)
272 }
273
274 #[must_use]
275 pub fn string_literal_contents(&self, range: TextRange) -> Option<&str> {
276 self.source_slice(range)
277 .strip_prefix('"')?
278 .strip_suffix('"')
279 }
280}
281
282impl SharedParse {
283 #[must_use]
284 pub fn source_view(&self) -> SourceView<'_> {
285 SourceView::new(&self.source_text, &self.source_map)
286 }
287
288 #[must_use]
289 pub fn source_range(&self, range: TextRange) -> Range<usize> {
290 self.source_view().display_range(range)
291 }
292
293 #[must_use]
294 pub fn source_slice(&self, range: TextRange) -> &str {
295 self.source_view().slice(range)
296 }
297
298 #[must_use]
299 pub fn display_slice(&self, range: TextRange) -> &str {
300 self.source_view().display_slice(range)
301 }
302
303 #[must_use]
304 pub fn string_literal_contents(&self, range: TextRange) -> Option<&str> {
305 self.source_slice(range)
306 .strip_prefix('"')?
307 .strip_suffix('"')
308 }
309}
310
311impl From<SharedParse> for Parse {
312 fn from(value: SharedParse) -> Self {
313 Self {
314 syntax: value.syntax,
315 source_text: value.source_text.as_ref().to_owned(),
316 source_map: value.source_map,
317 source_encoding: value.source_encoding,
318 decode_errors: value.decode_errors,
319 lex_errors: value.lex_errors,
320 errors: value.errors,
321 }
322 }
323}
324
325#[must_use]
326pub fn parse_source(input: &str) -> Parse {
336 parse_source_with_options(input, ParseOptions::default())
337}
338
339#[must_use]
340pub fn parse_source_with_options(input: &str, options: ParseOptions) -> Parse {
356 if let Some(error) = max_bytes_error_for_text(input.len(), options.budgets) {
357 return parse_budget_failure_for_source(input.to_owned(), error);
358 }
359 parse_owned_source(
360 input.to_owned(),
361 SourceMap::identity(input.len()),
362 SourceEncoding::Utf8,
363 Vec::new(),
364 options,
365 )
366}
367
368#[must_use]
369pub fn parse_shared_source(input: Arc<str>) -> SharedParse {
371 parse_shared_source_with_options(input, ParseOptions::default())
372}
373
374#[must_use]
375pub fn parse_shared_source_with_options(input: Arc<str>, options: ParseOptions) -> SharedParse {
377 if let Some(error) = max_bytes_error_for_text(input.len(), options.budgets) {
378 return shared_parse_budget_failure_for_source(input, error);
379 }
380 let len = input.len();
381 parse_shared_source_text(
382 input,
383 SourceMap::identity(len),
384 SourceEncoding::Utf8,
385 Vec::new(),
386 options,
387 )
388}
389
390#[must_use]
391pub fn parse_source_view_range(source: SourceView<'_>, range: TextRange) -> ParseSlice<'_> {
393 parse_source_view_range_with_options(source, range, ParseOptions::default())
394}
395
396#[must_use]
397pub fn parse_source_view_range_with_options(
399 source: SourceView<'_>,
400 range: TextRange,
401 options: ParseOptions,
402) -> ParseSlice<'_> {
403 let display_range = source.display_range(range);
404 let input = &source.text()[display_range.clone()];
405 if let Some(error) = max_bytes_error_for_text(input.len(), options.budgets) {
406 return ParseSlice {
407 syntax: mel_ast::SourceFile { items: Vec::new() },
408 source,
409 lex_errors: Vec::new(),
410 errors: vec![error],
411 };
412 }
413 let mut parse = parse_borrowed_source(
414 input,
415 SourceMap::identity(input.len()),
416 SourceEncoding::Utf8,
417 Vec::new(),
418 options,
419 );
420 let mapper = SourceViewRangeMapper {
421 source,
422 display_start: display_range.start,
423 };
424 remap_source_file_ranges(&mut parse.syntax, &mapper);
425 for diagnostic in &mut parse.lex_errors {
426 diagnostic.range = mapper.map_range(diagnostic.range);
427 }
428 for error in &mut parse.errors {
429 error.range = mapper.map_range(error.range);
430 }
431
432 ParseSlice {
433 syntax: parse.syntax,
434 source,
435 lex_errors: parse.lex_errors,
436 errors: parse.errors,
437 }
438}
439
440#[must_use]
441pub fn parse_bytes(input: &[u8]) -> Parse {
443 if let Some(error) = max_bytes_error_for_bytes(input.len(), ParseBudgets::default()) {
444 return parse_budget_failure_empty(error);
445 }
446 parse_decoded_source(decode_source_auto(input), ParseOptions::default())
447}
448
449#[must_use]
450pub fn parse_shared_bytes(input: &[u8]) -> SharedParse {
452 if let Some(error) = max_bytes_error_for_bytes(input.len(), ParseBudgets::default()) {
453 return shared_parse_budget_failure_empty(error);
454 }
455 parse_shared_decoded_source(decode_source_auto(input), ParseOptions::default())
456}
457
458#[must_use]
459pub fn parse_bytes_with_encoding(input: &[u8], encoding: SourceEncoding) -> Parse {
461 if let Some(error) = max_bytes_error_for_bytes(input.len(), ParseBudgets::default()) {
462 return parse_budget_failure_empty(error);
463 }
464 parse_decoded_source(
465 decode_source_with_encoding(input, encoding),
466 ParseOptions::default(),
467 )
468}
469
470#[must_use]
471pub fn parse_shared_bytes_with_encoding(input: &[u8], encoding: SourceEncoding) -> SharedParse {
473 if let Some(error) = max_bytes_error_for_bytes(input.len(), ParseBudgets::default()) {
474 return shared_parse_budget_failure_empty(error);
475 }
476 parse_shared_decoded_source(
477 decode_source_with_encoding(input, encoding),
478 ParseOptions::default(),
479 )
480}
481
482pub fn parse_shared_file(path: impl AsRef<Path>) -> io::Result<SharedParse> {
484 if let Some(error) = max_bytes_error_for_file(path.as_ref(), ParseBudgets::default())? {
485 return Ok(shared_parse_budget_failure_empty(error));
486 }
487 let bytes = fs::read(path)?;
488 Ok(parse_shared_bytes(&bytes))
489}
490
491pub fn parse_shared_file_with_encoding(
493 path: impl AsRef<Path>,
494 encoding: SourceEncoding,
495) -> io::Result<SharedParse> {
496 if let Some(error) = max_bytes_error_for_file(path.as_ref(), ParseBudgets::default())? {
497 return Ok(shared_parse_budget_failure_empty(error));
498 }
499 let bytes = fs::read(path)?;
500 Ok(parse_shared_bytes_with_encoding(&bytes, encoding))
501}
502
503pub fn parse_file(path: impl AsRef<Path>) -> io::Result<Parse> {
505 parse_file_with_options(path, ParseOptions::default())
506}
507
508pub fn parse_file_with_options(path: impl AsRef<Path>, options: ParseOptions) -> io::Result<Parse> {
510 if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
511 return Ok(parse_budget_failure_empty(error));
512 }
513 let bytes = fs::read(path)?;
514 Ok(parse_owned_bytes(bytes, options))
515}
516
517pub fn parse_file_with_encoding(
519 path: impl AsRef<Path>,
520 encoding: SourceEncoding,
521) -> io::Result<Parse> {
522 parse_file_with_encoding_and_options(path, encoding, ParseOptions::default())
523}
524
525pub fn parse_file_with_encoding_and_options(
527 path: impl AsRef<Path>,
528 encoding: SourceEncoding,
529 options: ParseOptions,
530) -> io::Result<Parse> {
531 if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
532 return Ok(parse_budget_failure_empty(error));
533 }
534 let bytes = fs::read(path)?;
535 Ok(parse_owned_bytes_with_encoding(bytes, encoding, options))
536}
537
538struct SourceViewRangeMapper<'a> {
539 source: SourceView<'a>,
540 display_start: usize,
541}
542
543impl RangeMapper for SourceViewRangeMapper<'_> {
544 fn map_range(&self, range: TextRange) -> TextRange {
545 let start = self.display_start + usize::from(range.start());
546 let end = self.display_start + usize::from(range.end());
547 self.source.source_range_from_display_range(start..end)
548 }
549}
550
551struct SourceMapRangeMapper<'a> {
552 source_map: &'a SourceMap,
553}
554
555impl RangeMapper for SourceMapRangeMapper<'_> {
556 fn map_range(&self, range: TextRange) -> TextRange {
557 self.source_map
558 .source_range_from_display_range(usize::from(range.start())..usize::from(range.end()))
559 }
560}
561
562fn parse_owned_source(
563 source_text: String,
564 source_map: SourceMap,
565 source_encoding: SourceEncoding,
566 decode_errors: Vec<DecodeDiagnostic>,
567 options: ParseOptions,
568) -> Parse {
569 let mut parse = parse_borrowed_source(
570 &source_text,
571 source_map,
572 source_encoding,
573 decode_errors,
574 options,
575 );
576 parse.source_text = source_text;
577 parse
578}
579
580fn parse_shared_source_text(
581 source_text: Arc<str>,
582 source_map: SourceMap,
583 source_encoding: SourceEncoding,
584 decode_errors: Vec<DecodeDiagnostic>,
585 options: ParseOptions,
586) -> SharedParse {
587 let parse = Parser::new(&source_text, options).parse();
588 SharedParse {
589 syntax: parse.syntax,
590 source_text,
591 source_map,
592 source_encoding,
593 decode_errors,
594 lex_errors: parse.lex_errors,
595 errors: parse.errors,
596 }
597}
598
599fn parse_borrowed_source(
600 input: &str,
601 source_map: SourceMap,
602 source_encoding: SourceEncoding,
603 decode_errors: Vec<DecodeDiagnostic>,
604 options: ParseOptions,
605) -> Parse {
606 let mut parse = Parser::new(input, options).parse();
607 parse.source_map = source_map;
608 parse.source_encoding = source_encoding;
609 parse.decode_errors = decode_errors;
610 parse
611}
612
613fn parse_decoded_source(decoded: decode::DecodedSource<'_>, options: ParseOptions) -> Parse {
614 let source_map = decoded.offset_map.source_map();
615 let mut parse = parse_owned_source(
616 decoded.text.into_owned(),
617 source_map.clone(),
618 decoded.encoding,
619 decoded.diagnostics,
620 options,
621 );
622 remap_parse_ranges_with_mapper(
623 &mut parse,
624 &SourceMapRangeMapper {
625 source_map: &source_map,
626 },
627 );
628 parse
629}
630
631fn parse_shared_decoded_source(
632 decoded: decode::DecodedSource<'_>,
633 options: ParseOptions,
634) -> SharedParse {
635 let source_map = decoded.offset_map.source_map();
636 let mut parse = parse_shared_source_text(
637 Arc::from(decoded.text.into_owned()),
638 source_map.clone(),
639 decoded.encoding,
640 decoded.diagnostics,
641 options,
642 );
643 remap_shared_parse_ranges_with_mapper(
644 &mut parse,
645 &SourceMapRangeMapper {
646 source_map: &source_map,
647 },
648 );
649 parse
650}
651
652fn parse_owned_decoded_source(decoded: decode::DecodedOwnedSource, options: ParseOptions) -> Parse {
653 let source_map = decoded.offset_map.source_map();
654 let mut parse = parse_owned_source(
655 decoded.text,
656 source_map.clone(),
657 decoded.encoding,
658 decoded.diagnostics,
659 options,
660 );
661 remap_parse_ranges_with_mapper(
662 &mut parse,
663 &SourceMapRangeMapper {
664 source_map: &source_map,
665 },
666 );
667 parse
668}
669
670fn parse_owned_bytes(input: Vec<u8>, options: ParseOptions) -> Parse {
671 if let Some(error) = max_bytes_error_for_bytes(input.len(), options.budgets) {
672 return parse_budget_failure_empty(error);
673 }
674 parse_owned_decoded_source(decode_owned_bytes_auto(input), options)
675}
676
677fn parse_owned_bytes_with_encoding(
678 input: Vec<u8>,
679 encoding: SourceEncoding,
680 options: ParseOptions,
681) -> Parse {
682 if let Some(error) = max_bytes_error_for_bytes(input.len(), options.budgets) {
683 return parse_budget_failure_empty(error);
684 }
685 parse_owned_decoded_source(decode_owned_bytes_with_encoding(input, encoding), options)
686}
687
688pub(crate) fn budget_error_message(limit: &'static str) -> &'static str {
689 match limit {
690 "max_bytes" => "source exceeds parse budget: max_bytes",
691 "max_tokens" => "source exceeds parse budget: max_tokens",
692 "max_statements" => "source exceeds parse budget: max_statements",
693 "max_nesting_depth" => "source exceeds parse budget: max_nesting_depth",
694 "max_literal_bytes" => "source exceeds parse budget: max_literal_bytes",
695 _ => "source exceeds parse budget",
696 }
697}
698
699pub(crate) fn budget_error(limit: &'static str, range: TextRange) -> ParseError {
700 ParseError {
701 message: budget_error_message(limit),
702 range,
703 }
704}
705
706fn max_bytes_error_for_text(len: usize, budgets: ParseBudgets) -> Option<ParseError> {
707 (len > budgets.max_bytes).then(|| budget_error("max_bytes", text_len_range(len)))
708}
709
710fn max_bytes_error_for_bytes(len: usize, budgets: ParseBudgets) -> Option<ParseError> {
711 (len > budgets.max_bytes).then(|| budget_error("max_bytes", text_range(0, 0)))
712}
713
714fn max_bytes_error_for_file(path: &Path, budgets: ParseBudgets) -> io::Result<Option<ParseError>> {
715 match fs::metadata(path) {
716 Ok(metadata) if metadata.len() > budgets.max_bytes as u64 => {
717 Ok(Some(budget_error("max_bytes", text_range(0, 0))))
718 }
719 Ok(_) => Ok(None),
720 Err(error) if error.kind() == io::ErrorKind::NotFound => Err(error),
721 Err(_) => Ok(None),
722 }
723}
724
725fn parse_budget_failure_for_source(source_text: String, error: ParseError) -> Parse {
726 Parse {
727 syntax: mel_ast::SourceFile { items: Vec::new() },
728 source_map: SourceMap::identity(source_text.len()),
729 source_text,
730 source_encoding: SourceEncoding::Utf8,
731 decode_errors: Vec::new(),
732 lex_errors: Vec::new(),
733 errors: vec![error],
734 }
735}
736
737fn shared_parse_budget_failure_for_source(source_text: Arc<str>, error: ParseError) -> SharedParse {
738 SharedParse {
739 syntax: mel_ast::SourceFile { items: Vec::new() },
740 source_map: SourceMap::identity(source_text.len()),
741 source_text,
742 source_encoding: SourceEncoding::Utf8,
743 decode_errors: Vec::new(),
744 lex_errors: Vec::new(),
745 errors: vec![error],
746 }
747}
748
749fn parse_budget_failure_empty(error: ParseError) -> Parse {
750 Parse {
751 syntax: mel_ast::SourceFile { items: Vec::new() },
752 source_text: String::new(),
753 source_map: SourceMap::identity(0),
754 source_encoding: SourceEncoding::Utf8,
755 decode_errors: Vec::new(),
756 lex_errors: Vec::new(),
757 errors: vec![error],
758 }
759}
760
761fn shared_parse_budget_failure_empty(error: ParseError) -> SharedParse {
762 SharedParse {
763 syntax: mel_ast::SourceFile { items: Vec::new() },
764 source_text: Arc::from(""),
765 source_map: SourceMap::identity(0),
766 source_encoding: SourceEncoding::Utf8,
767 decode_errors: Vec::new(),
768 lex_errors: Vec::new(),
769 errors: vec![error],
770 }
771}
772
773pub(crate) fn text_len_range(len: usize) -> TextRange {
774 text_range(0, len.min(u32::MAX as usize) as u32)
775}