Skip to main content

maya_mel/parser/
light.rs

1use std::borrow::Cow;
2use std::sync::Arc;
3use std::{fs, io, ops::Range, path::Path};
4
5use encoding_rs::{Encoding, GBK, SHIFT_JIS};
6use mel_syntax::{SourceMap, SourceView, TextRange, text_range};
7
8use crate::{
9    DecodeDiagnostic, ParseBudgets, ParseError, SourceEncoding, budget_error,
10    decode::{OffsetMap, decode_source_auto, decode_source_with_encoding},
11    text_len_range,
12};
13
14const DEFAULT_MAX_PREFIX_WORDS: usize = 64;
15const DEFAULT_MAX_PREFIX_BYTES: usize = 4096;
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub struct LightParseOptions {
19    pub max_prefix_words: usize,
20    pub max_prefix_bytes: usize,
21    pub budgets: ParseBudgets,
22}
23
24impl Default for LightParseOptions {
25    fn default() -> Self {
26        Self {
27            max_prefix_words: DEFAULT_MAX_PREFIX_WORDS,
28            max_prefix_bytes: DEFAULT_MAX_PREFIX_BYTES,
29            budgets: ParseBudgets::default(),
30        }
31    }
32}
33
34#[derive(Debug, Clone, PartialEq, Eq, Default)]
35pub struct LightSourceFile {
36    pub items: Vec<LightItem>,
37}
38
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum LightItem {
41    Command(LightCommandSurface),
42    Proc(LightProcSurface),
43    Other { span: TextRange },
44}
45
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct LightProcSurface {
48    pub name_range: Option<TextRange>,
49    pub is_global: bool,
50    pub span: TextRange,
51}
52
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct LightCommandSurface {
55    pub head_range: TextRange,
56    pub captured: bool,
57    pub words: Vec<LightWord>,
58    pub opaque_tail: Option<TextRange>,
59    pub span: TextRange,
60}
61
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum LightWord {
64    Flag { text: TextRange, range: TextRange },
65    NumericLiteral { text: TextRange, range: TextRange },
66    BareWord { text: TextRange, range: TextRange },
67    QuotedString { text: TextRange, range: TextRange },
68    Variable { range: TextRange },
69    GroupedExpr { range: TextRange },
70    BraceList { range: TextRange },
71    VectorLiteral { range: TextRange },
72    Capture { range: TextRange },
73}
74
75impl LightWord {
76    #[must_use]
77    pub const fn range(&self) -> TextRange {
78        match self {
79            Self::Flag { range, .. }
80            | Self::NumericLiteral { range, .. }
81            | Self::BareWord { range, .. }
82            | Self::QuotedString { range, .. }
83            | Self::Variable { range }
84            | Self::GroupedExpr { range }
85            | Self::BraceList { range }
86            | Self::VectorLiteral { range }
87            | Self::Capture { range } => *range,
88        }
89    }
90}
91
92pub trait LightItemSink {
93    fn on_item(&mut self, source: LightSourceView<'_>, item: LightItem);
94}
95
96impl<F> LightItemSink for F
97where
98    F: for<'a> FnMut(LightSourceView<'a>, LightItem),
99{
100    fn on_item(&mut self, source: LightSourceView<'_>, item: LightItem) {
101        self(source, item);
102    }
103}
104
105#[derive(Clone, Copy)]
106pub enum LightSourceView<'a> {
107    Text(SourceView<'a>),
108    Bytes {
109        bytes: &'a [u8],
110        encoding: SourceEncoding,
111    },
112}
113
114impl<'a> LightSourceView<'a> {
115    #[must_use]
116    pub fn raw_slice(self, range: TextRange) -> &'a [u8] {
117        let start = usize::from(range.start()).min(self.len());
118        let end = usize::from(range.end()).min(self.len()).max(start);
119        match self {
120            Self::Text(source) => &source.text().as_bytes()[start..end],
121            Self::Bytes { bytes, .. } => &bytes[start..end],
122        }
123    }
124
125    #[must_use]
126    pub fn try_ascii_slice(self, range: TextRange) -> Option<&'a str> {
127        std::str::from_utf8(self.raw_slice(range))
128            .ok()
129            .filter(|text| text.is_ascii())
130    }
131
132    #[must_use]
133    pub fn decode_slice_lossy_for_preview(self, range: TextRange) -> Cow<'a, str> {
134        match self {
135            Self::Text(source) => Cow::Borrowed(source.slice(range)),
136            Self::Bytes { bytes, encoding } => {
137                decode_bytes_lossy(slice_range(bytes, range), encoding)
138            }
139        }
140    }
141
142    #[must_use]
143    pub fn decode_slice(self, range: TextRange) -> DecodedLightSlice<'a> {
144        match self {
145            Self::Text(source) => DecodedLightSlice {
146                text: Cow::Borrowed(source.slice(range)),
147                diagnostics: Vec::new(),
148            },
149            Self::Bytes { bytes, encoding } => {
150                decode_bytes_with_diagnostics(slice_range(bytes, range), encoding, range)
151            }
152        }
153    }
154
155    #[must_use]
156    pub fn len(self) -> usize {
157        match self {
158            Self::Text(source) => source.text().len(),
159            Self::Bytes { bytes, .. } => bytes.len(),
160        }
161    }
162
163    #[must_use]
164    pub fn is_empty(self) -> bool {
165        self.len() == 0
166    }
167}
168
169#[derive(Debug, Clone, PartialEq, Eq)]
170pub struct DecodedLightSlice<'a> {
171    pub text: Cow<'a, str>,
172    pub diagnostics: Vec<DecodeDiagnostic>,
173}
174
175fn slice_range(bytes: &[u8], range: TextRange) -> &[u8] {
176    let start = usize::from(range.start()).min(bytes.len());
177    let end = usize::from(range.end()).min(bytes.len()).max(start);
178    &bytes[start..end]
179}
180
181fn decode_bytes_lossy(bytes: &[u8], encoding: SourceEncoding) -> Cow<'_, str> {
182    if matches!(encoding, SourceEncoding::Utf8) {
183        return String::from_utf8_lossy(bytes);
184    }
185    encoding_rs_encoding(encoding).decode(bytes).0
186}
187
188fn decode_bytes_with_diagnostics(
189    bytes: &[u8],
190    encoding: SourceEncoding,
191    range: TextRange,
192) -> DecodedLightSlice<'_> {
193    if matches!(encoding, SourceEncoding::Utf8) {
194        return match std::str::from_utf8(bytes) {
195            Ok(text) => DecodedLightSlice {
196                text: Cow::Borrowed(text),
197                diagnostics: Vec::new(),
198            },
199            Err(_) => DecodedLightSlice {
200                text: String::from_utf8_lossy(bytes),
201                diagnostics: vec![DecodeDiagnostic {
202                    message: "source slice is not valid UTF-8; decoded lossily".into(),
203                    range,
204                }],
205            },
206        };
207    }
208
209    let (text, _, had_errors) = encoding_rs_encoding(encoding).decode(bytes);
210    DecodedLightSlice {
211        text,
212        diagnostics: had_errors
213            .then(|| DecodeDiagnostic {
214                message: format!(
215                    "source slice is not valid {}; decoded with replacement",
216                    encoding.label()
217                )
218                .into(),
219                range,
220            })
221            .into_iter()
222            .collect(),
223    }
224}
225
226fn encoding_rs_encoding(encoding: SourceEncoding) -> &'static Encoding {
227    match encoding {
228        SourceEncoding::Utf8 => encoding_rs::UTF_8,
229        SourceEncoding::Cp932 => SHIFT_JIS,
230        SourceEncoding::Gbk => GBK,
231    }
232}
233
234#[derive(Debug, Clone, PartialEq, Eq)]
235pub struct LightScanReport {
236    pub source_text: String,
237    pub source_map: SourceMap,
238    pub source_encoding: SourceEncoding,
239    pub decode_errors: Vec<DecodeDiagnostic>,
240    pub errors: Vec<ParseError>,
241}
242
243#[derive(Debug, Clone, PartialEq, Eq)]
244pub struct SharedLightScanReport {
245    pub source_text: Arc<str>,
246    pub source_map: SourceMap,
247    pub source_encoding: SourceEncoding,
248    pub decode_errors: Vec<DecodeDiagnostic>,
249    pub errors: Vec<ParseError>,
250}
251
252#[derive(Debug, Clone, PartialEq, Eq)]
253pub struct LightScanSummary {
254    pub source_encoding: SourceEncoding,
255    pub decode_errors: Vec<DecodeDiagnostic>,
256    pub errors: Vec<ParseError>,
257}
258
259impl LightScanReport {
260    #[must_use]
261    pub fn source_view(&self) -> SourceView<'_> {
262        SourceView::new(&self.source_text, &self.source_map)
263    }
264
265    #[must_use]
266    pub fn source_range(&self, range: TextRange) -> Range<usize> {
267        self.source_view().display_range(range)
268    }
269
270    #[must_use]
271    pub fn source_slice(&self, range: TextRange) -> &str {
272        self.source_view().slice(range)
273    }
274
275    #[must_use]
276    pub fn display_slice(&self, range: TextRange) -> &str {
277        self.source_view().display_slice(range)
278    }
279
280    #[must_use]
281    pub fn string_literal_contents(&self, range: TextRange) -> Option<&str> {
282        self.source_slice(range)
283            .strip_prefix('"')?
284            .strip_suffix('"')
285    }
286}
287
288impl SharedLightScanReport {
289    #[must_use]
290    pub fn source_view(&self) -> SourceView<'_> {
291        SourceView::new(&self.source_text, &self.source_map)
292    }
293
294    #[must_use]
295    pub fn source_range(&self, range: TextRange) -> Range<usize> {
296        self.source_view().display_range(range)
297    }
298
299    #[must_use]
300    pub fn source_slice(&self, range: TextRange) -> &str {
301        self.source_view().slice(range)
302    }
303
304    #[must_use]
305    pub fn display_slice(&self, range: TextRange) -> &str {
306        self.source_view().display_slice(range)
307    }
308
309    #[must_use]
310    pub fn string_literal_contents(&self, range: TextRange) -> Option<&str> {
311        self.source_slice(range)
312            .strip_prefix('"')?
313            .strip_suffix('"')
314    }
315}
316
317#[derive(Debug, Clone, PartialEq, Eq)]
318pub struct LightParse {
319    pub source: LightSourceFile,
320    pub source_text: String,
321    pub source_map: SourceMap,
322    pub source_encoding: SourceEncoding,
323    pub decode_errors: Vec<DecodeDiagnostic>,
324    pub errors: Vec<ParseError>,
325}
326
327#[derive(Debug, Clone, PartialEq, Eq)]
328pub struct SharedLightParse {
329    pub source: LightSourceFile,
330    pub source_text: Arc<str>,
331    pub source_map: SourceMap,
332    pub source_encoding: SourceEncoding,
333    pub decode_errors: Vec<DecodeDiagnostic>,
334    pub errors: Vec<ParseError>,
335}
336
337impl LightParse {
338    #[must_use]
339    pub fn source_view(&self) -> SourceView<'_> {
340        SourceView::new(&self.source_text, &self.source_map)
341    }
342
343    #[must_use]
344    pub fn source_range(&self, range: TextRange) -> Range<usize> {
345        self.source_view().display_range(range)
346    }
347
348    #[must_use]
349    pub fn source_slice(&self, range: TextRange) -> &str {
350        self.source_view().slice(range)
351    }
352
353    #[must_use]
354    pub fn display_slice(&self, range: TextRange) -> &str {
355        self.source_view().display_slice(range)
356    }
357
358    #[must_use]
359    pub fn string_literal_contents(&self, range: TextRange) -> Option<&str> {
360        self.source_slice(range)
361            .strip_prefix('"')?
362            .strip_suffix('"')
363    }
364}
365
366impl SharedLightParse {
367    #[must_use]
368    pub fn source_view(&self) -> SourceView<'_> {
369        SourceView::new(&self.source_text, &self.source_map)
370    }
371
372    #[must_use]
373    pub fn source_range(&self, range: TextRange) -> Range<usize> {
374        self.source_view().display_range(range)
375    }
376
377    #[must_use]
378    pub fn source_slice(&self, range: TextRange) -> &str {
379        self.source_view().slice(range)
380    }
381
382    #[must_use]
383    pub fn display_slice(&self, range: TextRange) -> &str {
384        self.source_view().display_slice(range)
385    }
386
387    #[must_use]
388    pub fn string_literal_contents(&self, range: TextRange) -> Option<&str> {
389        self.source_slice(range)
390            .strip_prefix('"')?
391            .strip_suffix('"')
392    }
393}
394
395impl From<(LightSourceFile, LightScanReport)> for LightParse {
396    fn from((source, report): (LightSourceFile, LightScanReport)) -> Self {
397        Self {
398            source,
399            source_text: report.source_text,
400            source_map: report.source_map,
401            source_encoding: report.source_encoding,
402            decode_errors: report.decode_errors,
403            errors: report.errors,
404        }
405    }
406}
407
408impl From<(LightSourceFile, SharedLightScanReport)> for SharedLightParse {
409    fn from((source, report): (LightSourceFile, SharedLightScanReport)) -> Self {
410        Self {
411            source,
412            source_text: report.source_text,
413            source_map: report.source_map,
414            source_encoding: report.source_encoding,
415            decode_errors: report.decode_errors,
416            errors: report.errors,
417        }
418    }
419}
420
421impl From<SharedLightScanReport> for LightScanReport {
422    fn from(value: SharedLightScanReport) -> Self {
423        Self {
424            source_text: value.source_text.as_ref().to_owned(),
425            source_map: value.source_map,
426            source_encoding: value.source_encoding,
427            decode_errors: value.decode_errors,
428            errors: value.errors,
429        }
430    }
431}
432
433impl From<SharedLightParse> for LightParse {
434    fn from(value: SharedLightParse) -> Self {
435        Self {
436            source: value.source,
437            source_text: value.source_text.as_ref().to_owned(),
438            source_map: value.source_map,
439            source_encoding: value.source_encoding,
440            decode_errors: value.decode_errors,
441            errors: value.errors,
442        }
443    }
444}
445
446#[must_use]
447pub fn parse_light_source(input: &str) -> LightParse {
448    parse_light_source_with_options(input, LightParseOptions::default())
449}
450
451#[must_use]
452pub fn parse_light_source_with_options(input: &str, options: LightParseOptions) -> LightParse {
453    let mut sink = CollectLightItems::default();
454    let report = scan_light_source_with_options_and_sink(input, options, &mut sink);
455    LightParse::from((sink.finish(), report))
456}
457
458#[must_use]
459pub fn parse_light_shared_source(input: Arc<str>) -> SharedLightParse {
460    parse_light_shared_source_with_options(input, LightParseOptions::default())
461}
462
463#[must_use]
464pub fn parse_light_shared_source_with_options(
465    input: Arc<str>,
466    options: LightParseOptions,
467) -> SharedLightParse {
468    let mut sink = CollectLightItems::default();
469    let report =
470        scan_light_shared_source_with_options_and_sink(Arc::clone(&input), options, &mut sink);
471    SharedLightParse::from((sink.finish(), report))
472}
473
474pub fn scan_light_source_with_sink(input: &str, sink: &mut impl LightItemSink) -> LightScanReport {
475    scan_light_source_with_options_and_sink(input, LightParseOptions::default(), sink)
476}
477
478pub fn scan_light_shared_source_with_sink(
479    input: Arc<str>,
480    sink: &mut impl LightItemSink,
481) -> SharedLightScanReport {
482    scan_light_shared_source_with_options_and_sink(input, LightParseOptions::default(), sink)
483}
484
485pub fn scan_light_shared_source_with_options_and_sink(
486    input: Arc<str>,
487    options: LightParseOptions,
488    sink: &mut impl LightItemSink,
489) -> SharedLightScanReport {
490    let input_len = input.len();
491    if let Some(error) = max_bytes_error_for_text(input_len, options.budgets) {
492        return SharedLightScanReport {
493            source_text: input,
494            source_map: SourceMap::identity(input_len),
495            source_encoding: SourceEncoding::Utf8,
496            decode_errors: Vec::new(),
497            errors: vec![error],
498        };
499    }
500    let source_map = SourceMap::identity(input_len);
501    let source_view = LightSourceView::Text(SourceView::new(&input, &source_map));
502    let mut scanner = LightScanner::new(&input, options);
503    scanner.scan_with_sink(source_view, sink, None);
504    let errors = scanner.errors;
505    SharedLightScanReport {
506        source_text: input,
507        source_map,
508        source_encoding: SourceEncoding::Utf8,
509        decode_errors: Vec::new(),
510        errors,
511    }
512}
513
514pub fn scan_light_source_with_options_and_sink(
515    input: &str,
516    options: LightParseOptions,
517    sink: &mut impl LightItemSink,
518) -> LightScanReport {
519    if let Some(error) = max_bytes_error_for_text(input.len(), options.budgets) {
520        return LightScanReport {
521            source_text: input.to_owned(),
522            source_map: SourceMap::identity(input.len()),
523            source_encoding: SourceEncoding::Utf8,
524            decode_errors: Vec::new(),
525            errors: vec![error],
526        };
527    }
528    let source_map = SourceMap::identity(input.len());
529    let source_view = LightSourceView::Text(SourceView::new(input, &source_map));
530    let mut scanner = LightScanner::new(input, options);
531    scanner.scan_with_sink(source_view, sink, None);
532    LightScanReport {
533        source_text: input.to_owned(),
534        source_map,
535        source_encoding: SourceEncoding::Utf8,
536        decode_errors: Vec::new(),
537        errors: scanner.errors,
538    }
539}
540
541#[must_use]
542pub fn parse_light_bytes(input: &[u8]) -> LightParse {
543    let decoded = decode_source_auto(input);
544    let mut sink = CollectLightItems::default();
545    let report = build_light_scan(decoded, LightParseOptions::default(), &mut sink);
546    LightParse::from((sink.finish(), report))
547}
548
549#[must_use]
550pub fn parse_light_shared_bytes(input: &[u8]) -> SharedLightParse {
551    let decoded = decode_source_auto(input);
552    let mut sink = CollectLightItems::default();
553    let report = build_shared_light_scan(decoded, LightParseOptions::default(), &mut sink);
554    SharedLightParse::from((sink.finish(), report))
555}
556
557#[must_use]
558pub fn parse_light_bytes_with_encoding(input: &[u8], encoding: SourceEncoding) -> LightParse {
559    let decoded = decode_source_with_encoding(input, encoding);
560    let mut sink = CollectLightItems::default();
561    let report = build_light_scan(decoded, LightParseOptions::default(), &mut sink);
562    LightParse::from((sink.finish(), report))
563}
564
565#[must_use]
566pub fn parse_light_shared_bytes_with_encoding(
567    input: &[u8],
568    encoding: SourceEncoding,
569) -> SharedLightParse {
570    let decoded = decode_source_with_encoding(input, encoding);
571    let mut sink = CollectLightItems::default();
572    let report = build_shared_light_scan(decoded, LightParseOptions::default(), &mut sink);
573    SharedLightParse::from((sink.finish(), report))
574}
575
576pub fn scan_light_bytes_with_sink(input: &[u8], sink: &mut impl LightItemSink) -> LightScanReport {
577    scan_light_bytes_with_options_and_sink(input, LightParseOptions::default(), sink)
578}
579
580pub fn scan_light_shared_bytes_with_options_and_sink(
581    input: &[u8],
582    options: LightParseOptions,
583    sink: &mut impl LightItemSink,
584) -> SharedLightScanReport {
585    if let Some(error) = max_bytes_error_for_bytes(input.len(), options.budgets) {
586        return empty_shared_light_scan_report(error);
587    }
588    let summary =
589        scan_light_bytes_native(input, detect_light_source_encoding(input), options, sink);
590    shared_report_from_summary(summary, input.len())
591}
592
593pub fn scan_light_bytes_with_options_and_sink(
594    input: &[u8],
595    options: LightParseOptions,
596    sink: &mut impl LightItemSink,
597) -> LightScanReport {
598    if let Some(error) = max_bytes_error_for_bytes(input.len(), options.budgets) {
599        return empty_light_scan_report(error);
600    }
601    let summary =
602        scan_light_bytes_native(input, detect_light_source_encoding(input), options, sink);
603    report_from_summary(summary, input.len())
604}
605
606pub fn scan_light_bytes_with_options_and_sink_and_then<S, T>(
607    input: &[u8],
608    options: LightParseOptions,
609    sink: &mut S,
610    then: impl for<'a> FnOnce(&mut S, LightSourceView<'a>, LightScanSummary) -> T,
611) -> T
612where
613    S: LightItemSink,
614{
615    if let Some(error) = max_bytes_error_for_bytes(input.len(), options.budgets) {
616        let source_map = SourceMap::identity(0);
617        return then(
618            sink,
619            LightSourceView::Text(SourceView::new("", &source_map)),
620            LightScanSummary {
621                source_encoding: SourceEncoding::Utf8,
622                decode_errors: Vec::new(),
623                errors: vec![error],
624            },
625        );
626    }
627    let encoding = detect_light_source_encoding(input);
628    let summary = scan_light_bytes_native(input, encoding, options, sink);
629    then(
630        sink,
631        LightSourceView::Bytes {
632            bytes: input,
633            encoding,
634        },
635        summary,
636    )
637}
638
639pub fn scan_light_bytes_with_encoding_and_sink(
640    input: &[u8],
641    encoding: SourceEncoding,
642    sink: &mut impl LightItemSink,
643) -> LightScanReport {
644    scan_light_bytes_with_encoding_and_options_and_sink(
645        input,
646        encoding,
647        LightParseOptions::default(),
648        sink,
649    )
650}
651
652pub fn scan_light_shared_bytes_with_encoding_and_options_and_sink(
653    input: &[u8],
654    encoding: SourceEncoding,
655    options: LightParseOptions,
656    sink: &mut impl LightItemSink,
657) -> SharedLightScanReport {
658    if let Some(error) = max_bytes_error_for_bytes(input.len(), options.budgets) {
659        return empty_shared_light_scan_report(error);
660    }
661    let summary = scan_light_bytes_native(input, encoding, options, sink);
662    shared_report_from_summary(summary, input.len())
663}
664
665pub fn scan_light_bytes_with_encoding_and_options_and_sink(
666    input: &[u8],
667    encoding: SourceEncoding,
668    options: LightParseOptions,
669    sink: &mut impl LightItemSink,
670) -> LightScanReport {
671    if let Some(error) = max_bytes_error_for_bytes(input.len(), options.budgets) {
672        return empty_light_scan_report(error);
673    }
674    let summary = scan_light_bytes_native(input, encoding, options, sink);
675    report_from_summary(summary, input.len())
676}
677
678pub fn parse_light_file(path: impl AsRef<Path>) -> io::Result<LightParse> {
679    parse_light_file_with_options(path, LightParseOptions::default())
680}
681
682pub fn parse_light_file_with_options(
683    path: impl AsRef<Path>,
684    options: LightParseOptions,
685) -> io::Result<LightParse> {
686    if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
687        return Ok(LightParse::from((
688            LightSourceFile::default(),
689            empty_light_scan_report(error),
690        )));
691    }
692    let bytes = fs::read(path)?;
693    let decoded = decode_source_auto(&bytes);
694    let mut sink = CollectLightItems::default();
695    let report = build_light_scan(decoded, options, &mut sink);
696    Ok(LightParse::from((sink.finish(), report)))
697}
698
699pub fn parse_light_shared_file(path: impl AsRef<Path>) -> io::Result<SharedLightParse> {
700    if let Some(error) =
701        max_bytes_error_for_file(path.as_ref(), LightParseOptions::default().budgets)?
702    {
703        return Ok(SharedLightParse::from((
704            LightSourceFile::default(),
705            empty_shared_light_scan_report(error),
706        )));
707    }
708    let bytes = fs::read(path)?;
709    Ok(parse_light_shared_bytes(&bytes))
710}
711
712pub fn parse_light_file_with_encoding(
713    path: impl AsRef<Path>,
714    encoding: SourceEncoding,
715) -> io::Result<LightParse> {
716    parse_light_file_with_encoding_and_options(path, encoding, LightParseOptions::default())
717}
718
719pub fn parse_light_file_with_encoding_and_options(
720    path: impl AsRef<Path>,
721    encoding: SourceEncoding,
722    options: LightParseOptions,
723) -> io::Result<LightParse> {
724    if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
725        return Ok(LightParse::from((
726            LightSourceFile::default(),
727            empty_light_scan_report(error),
728        )));
729    }
730    let bytes = fs::read(path)?;
731    let decoded = decode_source_with_encoding(&bytes, encoding);
732    let mut sink = CollectLightItems::default();
733    let report = build_light_scan(decoded, options, &mut sink);
734    Ok(LightParse::from((sink.finish(), report)))
735}
736
737pub fn parse_light_shared_file_with_encoding(
738    path: impl AsRef<Path>,
739    encoding: SourceEncoding,
740) -> io::Result<SharedLightParse> {
741    if let Some(error) =
742        max_bytes_error_for_file(path.as_ref(), LightParseOptions::default().budgets)?
743    {
744        return Ok(SharedLightParse::from((
745            LightSourceFile::default(),
746            empty_shared_light_scan_report(error),
747        )));
748    }
749    let bytes = fs::read(path)?;
750    Ok(parse_light_shared_bytes_with_encoding(&bytes, encoding))
751}
752
753pub fn scan_light_file_with_sink(
754    path: impl AsRef<Path>,
755    sink: &mut impl LightItemSink,
756) -> io::Result<LightScanReport> {
757    scan_light_file_with_options_and_sink(path, LightParseOptions::default(), sink)
758}
759
760pub fn scan_light_shared_file_with_options_and_sink(
761    path: impl AsRef<Path>,
762    options: LightParseOptions,
763    sink: &mut impl LightItemSink,
764) -> io::Result<SharedLightScanReport> {
765    if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
766        return Ok(empty_shared_light_scan_report(error));
767    }
768    let bytes = fs::read(path)?;
769    Ok(scan_light_shared_bytes_with_options_and_sink(
770        &bytes, options, sink,
771    ))
772}
773
774pub fn scan_light_file_with_options_and_sink(
775    path: impl AsRef<Path>,
776    options: LightParseOptions,
777    sink: &mut impl LightItemSink,
778) -> io::Result<LightScanReport> {
779    if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
780        return Ok(empty_light_scan_report(error));
781    }
782    let bytes = fs::read(path)?;
783    Ok(scan_light_bytes_with_options_and_sink(
784        &bytes, options, sink,
785    ))
786}
787
788pub fn scan_light_file_with_encoding_and_sink(
789    path: impl AsRef<Path>,
790    encoding: SourceEncoding,
791    sink: &mut impl LightItemSink,
792) -> io::Result<LightScanReport> {
793    scan_light_file_with_encoding_and_options_and_sink(
794        path,
795        encoding,
796        LightParseOptions::default(),
797        sink,
798    )
799}
800
801pub fn scan_light_shared_file_with_encoding_and_options_and_sink(
802    path: impl AsRef<Path>,
803    encoding: SourceEncoding,
804    options: LightParseOptions,
805    sink: &mut impl LightItemSink,
806) -> io::Result<SharedLightScanReport> {
807    if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
808        return Ok(empty_shared_light_scan_report(error));
809    }
810    let bytes = fs::read(path)?;
811    Ok(scan_light_shared_bytes_with_encoding_and_options_and_sink(
812        &bytes, encoding, options, sink,
813    ))
814}
815
816pub fn scan_light_file_with_encoding_and_options_and_sink(
817    path: impl AsRef<Path>,
818    encoding: SourceEncoding,
819    options: LightParseOptions,
820    sink: &mut impl LightItemSink,
821) -> io::Result<LightScanReport> {
822    if let Some(error) = max_bytes_error_for_file(path.as_ref(), options.budgets)? {
823        return Ok(empty_light_scan_report(error));
824    }
825    let bytes = fs::read(path)?;
826    Ok(scan_light_bytes_with_encoding_and_options_and_sink(
827        &bytes, encoding, options, sink,
828    ))
829}
830
831fn build_light_scan(
832    decoded: crate::decode::DecodedSource<'_>,
833    options: LightParseOptions,
834    sink: &mut impl LightItemSink,
835) -> LightScanReport {
836    let source_text = decoded.text.into_owned();
837    let source_map = decoded.offset_map.source_map();
838    let source_view = LightSourceView::Text(SourceView::new(&source_text, &source_map));
839    let mut scanner = LightScanner::new(&source_text, options);
840    scanner.scan_with_sink(source_view, sink, Some(&decoded.offset_map));
841    let errors = scanner
842        .errors
843        .into_iter()
844        .map(|mut error| {
845            error.range = decoded.offset_map.map_range(error.range);
846            error
847        })
848        .collect();
849    LightScanReport {
850        source_text,
851        source_map,
852        source_encoding: decoded.encoding,
853        decode_errors: decoded.diagnostics,
854        errors,
855    }
856}
857
858fn build_shared_light_scan(
859    decoded: crate::decode::DecodedSource<'_>,
860    options: LightParseOptions,
861    sink: &mut impl LightItemSink,
862) -> SharedLightScanReport {
863    let source_text: Arc<str> = Arc::from(decoded.text.into_owned());
864    let source_map = decoded.offset_map.source_map();
865    let source_view = LightSourceView::Text(SourceView::new(&source_text, &source_map));
866    let mut scanner = LightScanner::new(&source_text, options);
867    scanner.scan_with_sink(source_view, sink, Some(&decoded.offset_map));
868    let errors = scanner
869        .errors
870        .into_iter()
871        .map(|mut error| {
872            error.range = decoded.offset_map.map_range(error.range);
873            error
874        })
875        .collect();
876    SharedLightScanReport {
877        source_text,
878        source_map,
879        source_encoding: decoded.encoding,
880        decode_errors: decoded.diagnostics,
881        errors,
882    }
883}
884
885fn scan_light_bytes_native(
886    input: &[u8],
887    encoding: SourceEncoding,
888    options: LightParseOptions,
889    sink: &mut impl LightItemSink,
890) -> LightScanSummary {
891    let mut scanner = ByteLightScanner::new(input, encoding, options);
892    scanner.scan_with_sink(sink);
893    LightScanSummary {
894        source_encoding: encoding,
895        decode_errors: Vec::new(),
896        errors: scanner.errors,
897    }
898}
899
900fn report_from_summary(summary: LightScanSummary, source_len: usize) -> LightScanReport {
901    LightScanReport {
902        source_text: String::new(),
903        source_map: SourceMap::identity(source_len),
904        source_encoding: summary.source_encoding,
905        decode_errors: summary.decode_errors,
906        errors: summary.errors,
907    }
908}
909
910fn shared_report_from_summary(
911    summary: LightScanSummary,
912    source_len: usize,
913) -> SharedLightScanReport {
914    SharedLightScanReport {
915        source_text: Arc::from(""),
916        source_map: SourceMap::identity(source_len),
917        source_encoding: summary.source_encoding,
918        decode_errors: summary.decode_errors,
919        errors: summary.errors,
920    }
921}
922
923fn detect_light_source_encoding(input: &[u8]) -> SourceEncoding {
924    if ascii_header_declares_codeset_932(input) {
925        return SourceEncoding::Cp932;
926    }
927    if std::str::from_utf8(input).is_ok() {
928        SourceEncoding::Utf8
929    } else {
930        SourceEncoding::Cp932
931    }
932}
933
934fn ascii_header_declares_codeset_932(input: &[u8]) -> bool {
935    let limit = input.len().min(4096);
936    input[..limit]
937        .windows(b"Codeset: 932".len())
938        .any(|window| window.eq_ignore_ascii_case(b"Codeset: 932"))
939}
940
941#[derive(Default)]
942struct CollectLightItems {
943    items: Vec<LightItem>,
944}
945
946impl CollectLightItems {
947    fn finish(self) -> LightSourceFile {
948        LightSourceFile { items: self.items }
949    }
950}
951
952impl LightItemSink for CollectLightItems {
953    fn on_item(&mut self, _: LightSourceView<'_>, item: LightItem) {
954        self.items.push(item);
955    }
956}
957
958fn remap_light_item(item: &mut LightItem, map: &OffsetMap) {
959    match item {
960        LightItem::Command(command) => {
961            command.head_range = map.map_range(command.head_range);
962            if let Some(opaque_tail) = &mut command.opaque_tail {
963                *opaque_tail = map.map_range(*opaque_tail);
964            }
965            for word in &mut command.words {
966                match word {
967                    LightWord::Flag { text, range }
968                    | LightWord::NumericLiteral { text, range }
969                    | LightWord::BareWord { text, range }
970                    | LightWord::QuotedString { text, range } => {
971                        *text = map.map_range(*text);
972                        *range = map.map_range(*range);
973                    }
974                    LightWord::Variable { range }
975                    | LightWord::GroupedExpr { range }
976                    | LightWord::BraceList { range }
977                    | LightWord::VectorLiteral { range }
978                    | LightWord::Capture { range } => {
979                        *range = map.map_range(*range);
980                    }
981                }
982            }
983            command.span = map.map_range(command.span);
984        }
985        LightItem::Proc(proc_def) => {
986            if let Some(name_range) = &mut proc_def.name_range {
987                *name_range = map.map_range(*name_range);
988            }
989            proc_def.span = map.map_range(proc_def.span);
990        }
991        LightItem::Other { span } => *span = map.map_range(*span),
992    }
993}
994
995struct LightScanner<'a> {
996    text: &'a str,
997    options: LightParseOptions,
998    errors: Vec<ParseError>,
999    reported_unterminated_block_comment: bool,
1000    reported_budget_error: bool,
1001    budget: LightBudgetTracker,
1002}
1003
1004impl<'a> LightScanner<'a> {
1005    fn new(text: &'a str, options: LightParseOptions) -> Self {
1006        Self {
1007            text,
1008            options,
1009            errors: Vec::new(),
1010            reported_unterminated_block_comment: false,
1011            reported_budget_error: false,
1012            budget: LightBudgetTracker::new(options.budgets),
1013        }
1014    }
1015
1016    fn scan_with_sink(
1017        &mut self,
1018        source: LightSourceView<'_>,
1019        sink: &mut impl LightItemSink,
1020        remap: Option<&OffsetMap>,
1021    ) {
1022        let mut cursor = self.skip_trivia(0);
1023
1024        while cursor < self.text.len() && !self.is_halted() {
1025            let (mut item, next_cursor) = if self.is_proc_start(cursor) {
1026                self.scan_proc_item(cursor)
1027            } else {
1028                self.scan_statement_item(cursor)
1029            };
1030            if self.is_halted() {
1031                break;
1032            }
1033            if !self.record_statement(start_range(&item)) {
1034                break;
1035            }
1036            if let Some(map) = remap {
1037                remap_light_item(&mut item, map);
1038            }
1039            sink.on_item(source, item);
1040            cursor = self.skip_trivia(next_cursor);
1041        }
1042    }
1043
1044    fn scan_proc_item(&mut self, start: usize) -> (LightItem, usize) {
1045        let mut cursor = start;
1046        let mut is_global = false;
1047        if let Some(after_global) = self.consume_keyword(cursor, "global") {
1048            is_global = true;
1049            cursor = self.skip_trivia(after_global);
1050        }
1051        let after_proc = self.consume_keyword(cursor, "proc").unwrap_or(cursor);
1052        cursor = self.skip_trivia(after_proc);
1053
1054        let first_word = self.scan_simple_word(cursor);
1055        let mut name_range = None;
1056        let mut body_scan_start = cursor;
1057        if let Some((first_start, first_end)) = first_word {
1058            let after_first = self.skip_trivia(first_end);
1059            body_scan_start = after_first;
1060            if self.peek_byte(after_first) == Some(b'(') {
1061                name_range = Some(text_range(first_start as u32, first_end as u32));
1062            } else if let Some((name_start, name_end)) = self.scan_simple_word(after_first) {
1063                name_range = Some(text_range(name_start as u32, name_end as u32));
1064                body_scan_start = self.skip_trivia(name_end);
1065            }
1066        }
1067
1068        let end = self.scan_until_matching_body_end(start, body_scan_start);
1069        (
1070            LightItem::Proc(LightProcSurface {
1071                name_range,
1072                is_global,
1073                span: text_range(start as u32, end as u32),
1074            }),
1075            end,
1076        )
1077    }
1078
1079    fn scan_statement_item(&mut self, start: usize) -> (LightItem, usize) {
1080        let Some((head_start, head_end)) = self.scan_simple_word(start) else {
1081            let end = self.scan_statement_tail(start);
1082            return (
1083                LightItem::Other {
1084                    span: text_range(start as u32, end as u32),
1085                },
1086                end,
1087            );
1088        };
1089        let head_range = text_range(head_start as u32, head_end as u32);
1090        let head_is_non_command = is_non_command_head(&self.text[head_start..head_end]);
1091        let after_head = self.skip_trivia(head_end);
1092        if self.peek_byte(after_head) == Some(b'(') || head_is_non_command {
1093            let end = self.scan_statement_tail(after_head);
1094            return (
1095                LightItem::Other {
1096                    span: text_range(start as u32, end as u32),
1097                },
1098                end,
1099            );
1100        }
1101
1102        let (end, words, opaque_tail) =
1103            self.scan_command_statement_tail(start, head_end, after_head);
1104
1105        (
1106            LightItem::Command(LightCommandSurface {
1107                head_range,
1108                captured: false,
1109                words,
1110                opaque_tail,
1111                span: text_range(start as u32, end as u32),
1112            }),
1113            end,
1114        )
1115    }
1116
1117    fn scan_command_statement_tail(
1118        &mut self,
1119        start: usize,
1120        head_end: usize,
1121        after_head: usize,
1122    ) -> (usize, Vec<LightWord>, Option<TextRange>) {
1123        let mut words = Vec::with_capacity(self.options.max_prefix_words.min(8));
1124        let mut cursor = after_head;
1125        loop {
1126            cursor = self.skip_trivia(cursor);
1127            if cursor >= self.text.len() {
1128                return (self.text.len(), words, None);
1129            }
1130            if self.byte_at(cursor) == Some(b';') {
1131                let _ = self.record_token(cursor, cursor + 1);
1132                return (cursor + 1, words, None);
1133            }
1134
1135            let consumed_bytes = cursor.saturating_sub(head_end);
1136            if words.len() >= self.options.max_prefix_words
1137                || consumed_bytes >= self.options.max_prefix_bytes
1138            {
1139                let end = self.scan_statement_tail(cursor);
1140                let body_end = self.statement_body_end(start, end);
1141                let opaque_tail =
1142                    (cursor < body_end).then(|| text_range(cursor as u32, body_end as u32));
1143                return (end, words, opaque_tail);
1144            }
1145
1146            let Some((word, next_cursor)) = self.scan_word(cursor, self.text.len()) else {
1147                if self.is_halted() {
1148                    return (self.text.len(), words, None);
1149                }
1150                let end = self.scan_statement_tail(cursor);
1151                let body_end = self.statement_body_end(start, end);
1152                let opaque_tail =
1153                    (cursor < body_end).then(|| text_range(cursor as u32, body_end as u32));
1154                return (end, words, opaque_tail);
1155            };
1156            words.push(word);
1157            cursor = next_cursor;
1158        }
1159    }
1160
1161    fn scan_statement_tail(&mut self, start: usize) -> usize {
1162        let mut cursor = start;
1163        let mut paren_depth = 0usize;
1164        let mut bracket_depth = 0usize;
1165        let mut brace_depth = 0usize;
1166        let mut in_string = false;
1167        let mut in_backquote = false;
1168
1169        while cursor < self.text.len() && !self.is_halted() {
1170            if in_string {
1171                cursor = self.advance_string_body(cursor);
1172                in_string = false;
1173                continue;
1174            }
1175            if in_backquote {
1176                cursor = self.advance_backquote_body(cursor);
1177                in_backquote = false;
1178                continue;
1179            }
1180            if self.starts_with(cursor, "//") {
1181                cursor = self.skip_line_comment(cursor);
1182                continue;
1183            }
1184            if self.starts_with(cursor, "/*") {
1185                cursor = self.skip_block_comment(cursor);
1186                continue;
1187            }
1188
1189            match self.byte_at(cursor) {
1190                Some(b'"') => {
1191                    if !self.record_token(cursor, cursor + 1) {
1192                        return self.text.len();
1193                    }
1194                    in_string = true;
1195                    cursor += 1;
1196                }
1197                Some(b'`') => {
1198                    if !self.record_token(cursor, cursor + 1) {
1199                        return self.text.len();
1200                    }
1201                    in_backquote = true;
1202                    cursor += 1;
1203                }
1204                Some(b'(') => {
1205                    if !self.record_token(cursor, cursor + 1)
1206                        || !self.enter_nesting(cursor, cursor + 1)
1207                    {
1208                        return self.text.len();
1209                    }
1210                    paren_depth += 1;
1211                    cursor += 1;
1212                }
1213                Some(b')') => {
1214                    if !self.record_token(cursor, cursor + 1) {
1215                        return self.text.len();
1216                    }
1217                    if paren_depth > 0 {
1218                        self.exit_nesting();
1219                    }
1220                    paren_depth = paren_depth.saturating_sub(1);
1221                    cursor += 1;
1222                }
1223                Some(b'[') => {
1224                    if !self.record_token(cursor, cursor + 1)
1225                        || !self.enter_nesting(cursor, cursor + 1)
1226                    {
1227                        return self.text.len();
1228                    }
1229                    bracket_depth += 1;
1230                    cursor += 1;
1231                }
1232                Some(b']') => {
1233                    if !self.record_token(cursor, cursor + 1) {
1234                        return self.text.len();
1235                    }
1236                    if bracket_depth > 0 {
1237                        self.exit_nesting();
1238                    }
1239                    bracket_depth = bracket_depth.saturating_sub(1);
1240                    cursor += 1;
1241                }
1242                Some(b'{') => {
1243                    if !self.record_token(cursor, cursor + 1)
1244                        || !self.enter_nesting(cursor, cursor + 1)
1245                    {
1246                        return self.text.len();
1247                    }
1248                    brace_depth += 1;
1249                    cursor += 1;
1250                }
1251                Some(b'}') => {
1252                    if !self.record_token(cursor, cursor + 1) {
1253                        return self.text.len();
1254                    }
1255                    if brace_depth > 0 {
1256                        self.exit_nesting();
1257                    }
1258                    brace_depth = brace_depth.saturating_sub(1);
1259                    cursor += 1;
1260                }
1261                Some(b';')
1262                    if paren_depth == 0
1263                        && bracket_depth == 0
1264                        && brace_depth == 0
1265                        && !in_string
1266                        && !in_backquote =>
1267                {
1268                    let _ = self.record_token(cursor, cursor + 1);
1269                    return cursor + 1;
1270                }
1271                Some(ch) if (ch as char).is_whitespace() => cursor = self.next_offset(cursor),
1272                Some(_) => {
1273                    let end = self.scan_simple_word_until(cursor, self.text.len());
1274                    if end <= cursor {
1275                        if !self.record_token(cursor, self.next_offset(cursor)) {
1276                            return self.text.len();
1277                        }
1278                        cursor = self.next_offset(cursor);
1279                    } else {
1280                        if !self.record_token(cursor, end) {
1281                            return self.text.len();
1282                        }
1283                        cursor = end;
1284                    }
1285                }
1286                None => break,
1287            }
1288        }
1289
1290        self.text.len()
1291    }
1292
1293    fn statement_body_end(&self, start: usize, end: usize) -> usize {
1294        let mut body_end = end;
1295        if body_end > start && self.byte_at(body_end - 1) == Some(b';') {
1296            body_end -= 1;
1297        }
1298        while body_end > start {
1299            let prev = self.prev_offset(body_end);
1300            let segment = &self.text[prev..body_end];
1301            if segment.chars().all(char::is_whitespace) {
1302                body_end = prev;
1303                continue;
1304            }
1305            break;
1306        }
1307        body_end
1308    }
1309
1310    fn scan_word(&mut self, start: usize, body_end: usize) -> Option<(LightWord, usize)> {
1311        if start >= body_end {
1312            return None;
1313        }
1314        if self.byte_at(start) == Some(b'"') {
1315            let end = self.scan_quoted_string(start);
1316            let range = text_range(start as u32, end as u32);
1317            if !self.check_literal(range) {
1318                return None;
1319            }
1320            return Some((LightWord::QuotedString { text: range, range }, end));
1321        }
1322        if self.byte_at(start) == Some(b'`') {
1323            let end = self.scan_backquote(start);
1324            let range = text_range(start as u32, end as u32);
1325            return Some((LightWord::Capture { range }, end));
1326        }
1327        if self.byte_at(start) == Some(b'{') {
1328            let end = self.scan_balanced(start, b'{', b'}');
1329            let range = text_range(start as u32, end as u32);
1330            if !self.check_literal(range) {
1331                return None;
1332            }
1333            return Some((LightWord::BraceList { range }, end));
1334        }
1335        if self.starts_with(start, "<<") {
1336            let end = self.scan_vector_literal(start);
1337            let range = text_range(start as u32, end as u32);
1338            if !self.check_literal(range) {
1339                return None;
1340            }
1341            return Some((LightWord::VectorLiteral { range }, end));
1342        }
1343        if self.byte_at(start) == Some(b'(') {
1344            let end = self.scan_balanced(start, b'(', b')');
1345            let range = text_range(start as u32, end as u32);
1346            if !self.check_literal(range) {
1347                return None;
1348            }
1349            return Some((LightWord::GroupedExpr { range }, end));
1350        }
1351
1352        let end = self.scan_simple_word_until(start, body_end);
1353        if end <= start {
1354            return None;
1355        }
1356        if !self.record_token(start, end) {
1357            return None;
1358        }
1359        let range = text_range(start as u32, end as u32);
1360        let text = &self.text[start..end];
1361        let word = if text.starts_with('$') {
1362            LightWord::Variable { range }
1363        } else if text.starts_with('-') && text.len() > 1 {
1364            LightWord::Flag { text: range, range }
1365        } else if looks_numeric_like(text) {
1366            LightWord::NumericLiteral { text: range, range }
1367        } else {
1368            LightWord::BareWord { text: range, range }
1369        };
1370        Some((word, end))
1371    }
1372
1373    fn scan_quoted_string(&mut self, start: usize) -> usize {
1374        let mut cursor = start + 1;
1375        while cursor < self.text.len() {
1376            match self.byte_at(cursor) {
1377                Some(b'\\') => {
1378                    cursor = self.next_offset(cursor + 1);
1379                }
1380                Some(b'"') => {
1381                    let end = cursor + 1;
1382                    let _ = self.record_token(start, end);
1383                    return end;
1384                }
1385                Some(_) => cursor = self.next_offset(cursor),
1386                None => break,
1387            }
1388        }
1389        if self.is_halted() {
1390            return self.text.len();
1391        }
1392        let _ = self.record_token(start, self.text.len());
1393        self.errors.push(ParseError {
1394            message: "unterminated string literal in lightweight surface parse",
1395            range: text_range(start as u32, self.text.len() as u32),
1396        });
1397        self.text.len()
1398    }
1399
1400    fn scan_backquote(&mut self, start: usize) -> usize {
1401        let mut cursor = start + 1;
1402        while cursor < self.text.len() {
1403            match self.byte_at(cursor) {
1404                Some(b'\\') => {
1405                    cursor = self.next_offset(cursor + 1);
1406                }
1407                Some(b'`') => {
1408                    let end = cursor + 1;
1409                    let _ = self.record_token(start, end);
1410                    return end;
1411                }
1412                Some(b'"') => cursor = self.scan_quoted_string(cursor),
1413                Some(_) => cursor = self.next_offset(cursor),
1414                None => break,
1415            }
1416        }
1417        if self.is_halted() {
1418            return self.text.len();
1419        }
1420        let _ = self.record_token(start, self.text.len());
1421        self.errors.push(ParseError {
1422            message: "unterminated backquote capture in lightweight surface parse",
1423            range: text_range(start as u32, self.text.len() as u32),
1424        });
1425        self.text.len()
1426    }
1427
1428    fn scan_balanced(&mut self, start: usize, open: u8, close: u8) -> usize {
1429        let mut cursor = start;
1430        let mut depth = 0usize;
1431        while cursor < self.text.len() && !self.is_halted() {
1432            if self.starts_with(cursor, "//") {
1433                cursor = self.skip_line_comment(cursor);
1434                continue;
1435            }
1436            if self.starts_with(cursor, "/*") {
1437                cursor = self.skip_block_comment(cursor);
1438                continue;
1439            }
1440            match self.byte_at(cursor) {
1441                Some(b'"') => cursor = self.scan_quoted_string(cursor),
1442                Some(b'`') => cursor = self.scan_backquote(cursor),
1443                Some(ch) if ch == open => {
1444                    if !self.record_token(cursor, cursor + 1)
1445                        || !self.enter_nesting(cursor, cursor + 1)
1446                    {
1447                        return self.text.len();
1448                    }
1449                    depth += 1;
1450                    cursor += 1;
1451                }
1452                Some(ch) if ch == close => {
1453                    if !self.record_token(cursor, cursor + 1) {
1454                        return self.text.len();
1455                    }
1456                    if depth > 0 {
1457                        self.exit_nesting();
1458                    }
1459                    depth = depth.saturating_sub(1);
1460                    cursor += 1;
1461                    if depth == 0 {
1462                        return cursor;
1463                    }
1464                }
1465                Some(b'(' | b')' | b'[' | b']' | b'{' | b'}' | b',') => {
1466                    if !self.record_token(cursor, cursor + 1) {
1467                        return self.text.len();
1468                    }
1469                    cursor += 1;
1470                }
1471                Some(ch) if (ch as char).is_whitespace() => cursor = self.next_offset(cursor),
1472                Some(_) => {
1473                    let end = self.scan_simple_word_until(cursor, self.text.len());
1474                    if end <= cursor {
1475                        if !self.record_token(cursor, self.next_offset(cursor)) {
1476                            return self.text.len();
1477                        }
1478                        cursor = self.next_offset(cursor);
1479                    } else {
1480                        if !self.record_token(cursor, end) {
1481                            return self.text.len();
1482                        }
1483                        cursor = end;
1484                    }
1485                }
1486                None => break,
1487            }
1488        }
1489        if self.is_halted() {
1490            return self.text.len();
1491        }
1492        self.errors.push(ParseError {
1493            message: "unterminated grouped surface in lightweight parse",
1494            range: text_range(start as u32, self.text.len() as u32),
1495        });
1496        self.text.len()
1497    }
1498
1499    fn scan_vector_literal(&mut self, start: usize) -> usize {
1500        let mut cursor = start + 2;
1501        if !self.record_token(start, start + 2) || !self.enter_nesting(start, start + 2) {
1502            return self.text.len();
1503        }
1504        while cursor < self.text.len() && !self.is_halted() {
1505            if self.starts_with(cursor, ">>") {
1506                let _ = self.record_token(cursor, cursor + 2);
1507                self.exit_nesting();
1508                return cursor + 2;
1509            }
1510            if self.byte_at(cursor) == Some(b'"') {
1511                cursor = self.scan_quoted_string(cursor);
1512                continue;
1513            }
1514            if self
1515                .byte_at(cursor)
1516                .is_some_and(|ch| (ch as char).is_whitespace())
1517            {
1518                cursor = self.next_offset(cursor);
1519                continue;
1520            }
1521            let end = self.scan_simple_word_until(cursor, self.text.len());
1522            if end <= cursor {
1523                let next = self.next_offset(cursor);
1524                if !self.record_token(cursor, next) {
1525                    return self.text.len();
1526                }
1527                cursor = next;
1528            } else {
1529                if !self.record_token(cursor, end) {
1530                    return self.text.len();
1531                }
1532                cursor = end;
1533            }
1534        }
1535        if self.is_halted() {
1536            return self.text.len();
1537        }
1538        self.errors.push(ParseError {
1539            message: "unterminated vector literal in lightweight parse",
1540            range: text_range(start as u32, self.text.len() as u32),
1541        });
1542        self.text.len()
1543    }
1544
1545    fn scan_until_matching_body_end(&mut self, start: usize, cursor: usize) -> usize {
1546        let mut cursor = cursor;
1547        let mut depth = 0usize;
1548        let mut saw_body = false;
1549        while cursor < self.text.len() && !self.is_halted() {
1550            if self.starts_with(cursor, "//") {
1551                cursor = self.skip_line_comment(cursor);
1552                continue;
1553            }
1554            if self.starts_with(cursor, "/*") {
1555                cursor = self.skip_block_comment(cursor);
1556                continue;
1557            }
1558            match self.byte_at(cursor) {
1559                Some(b'"') => cursor = self.scan_quoted_string(cursor),
1560                Some(b'`') => cursor = self.scan_backquote(cursor),
1561                Some(b'{') => {
1562                    if !self.record_token(cursor, cursor + 1)
1563                        || !self.enter_nesting(cursor, cursor + 1)
1564                    {
1565                        return self.text.len();
1566                    }
1567                    saw_body = true;
1568                    depth += 1;
1569                    cursor += 1;
1570                }
1571                Some(b'}') if saw_body => {
1572                    if !self.record_token(cursor, cursor + 1) {
1573                        return self.text.len();
1574                    }
1575                    if depth > 0 {
1576                        self.exit_nesting();
1577                    }
1578                    depth = depth.saturating_sub(1);
1579                    cursor += 1;
1580                    if depth == 0 {
1581                        return cursor;
1582                    }
1583                }
1584                Some(b'(' | b')' | b'[' | b']' | b',' | b';') => {
1585                    if !self.record_token(cursor, cursor + 1) {
1586                        return self.text.len();
1587                    }
1588                    cursor += 1;
1589                }
1590                Some(ch) if (ch as char).is_whitespace() => cursor = self.next_offset(cursor),
1591                Some(_) => {
1592                    let end = self.scan_simple_word_until(cursor, self.text.len());
1593                    if end <= cursor {
1594                        if !self.record_token(cursor, self.next_offset(cursor)) {
1595                            return self.text.len();
1596                        }
1597                        cursor = self.next_offset(cursor);
1598                    } else {
1599                        if !self.record_token(cursor, end) {
1600                            return self.text.len();
1601                        }
1602                        cursor = end;
1603                    }
1604                }
1605                None => break,
1606            }
1607        }
1608        if self.is_halted() {
1609            return self.text.len();
1610        }
1611        self.errors.push(ParseError {
1612            message: "unterminated proc body in lightweight surface parse",
1613            range: text_range(start as u32, self.text.len() as u32),
1614        });
1615        self.text.len()
1616    }
1617
1618    fn scan_simple_word(&mut self, start: usize) -> Option<(usize, usize)> {
1619        let start = self.skip_trivia(start);
1620        let end = self.scan_simple_word_until(start, self.text.len());
1621        if end > start && !self.record_token(start, end) {
1622            return None;
1623        }
1624        (end > start).then_some((start, end))
1625    }
1626
1627    fn scan_simple_word_until(&self, start: usize, body_end: usize) -> usize {
1628        let mut cursor = start;
1629        while cursor < body_end {
1630            if self.starts_with(cursor, "//") || self.starts_with(cursor, "/*") {
1631                break;
1632            }
1633            match self.byte_at(cursor) {
1634                Some(b';' | b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'`' | b'"') | None => break,
1635                Some(ch) if (ch as char).is_whitespace() => break,
1636                Some(_) => cursor = self.next_offset(cursor),
1637            }
1638        }
1639        cursor
1640    }
1641
1642    fn skip_trivia(&mut self, start: usize) -> usize {
1643        let mut cursor = start;
1644        while cursor < self.text.len() {
1645            if self.starts_with(cursor, "//") {
1646                cursor = self.skip_line_comment(cursor);
1647                continue;
1648            }
1649            if self.starts_with(cursor, "/*") {
1650                cursor = self.skip_block_comment(cursor);
1651                continue;
1652            }
1653            let Some(ch) = self.text[cursor..].chars().next() else {
1654                break;
1655            };
1656            if ch.is_whitespace() {
1657                cursor += ch.len_utf8();
1658                continue;
1659            }
1660            break;
1661        }
1662        cursor
1663    }
1664
1665    fn skip_trivia_peek(&self, start: usize) -> usize {
1666        let mut cursor = start;
1667        while cursor < self.text.len() {
1668            if self.starts_with(cursor, "//") {
1669                cursor = self.skip_line_comment(cursor);
1670                continue;
1671            }
1672            if self.starts_with(cursor, "/*") {
1673                let Some(after_comment) = self.skip_block_comment_peek(cursor) else {
1674                    return self.text.len();
1675                };
1676                cursor = after_comment;
1677                continue;
1678            }
1679            let Some(ch) = self.text[cursor..].chars().next() else {
1680                break;
1681            };
1682            if ch.is_whitespace() {
1683                cursor += ch.len_utf8();
1684                continue;
1685            }
1686            break;
1687        }
1688        cursor
1689    }
1690
1691    fn skip_line_comment(&self, start: usize) -> usize {
1692        let mut cursor = start + 2;
1693        while cursor < self.text.len() {
1694            match self.byte_at(cursor) {
1695                Some(b'\n') => return cursor + 1,
1696                Some(_) => cursor = self.next_offset(cursor),
1697                None => break,
1698            }
1699        }
1700        self.text.len()
1701    }
1702
1703    fn skip_block_comment(&mut self, start: usize) -> usize {
1704        let mut cursor = start + 2;
1705        while cursor < self.text.len() {
1706            if self.starts_with(cursor, "*/") {
1707                return cursor + 2;
1708            }
1709            cursor = self.next_offset(cursor);
1710        }
1711        if !self.reported_unterminated_block_comment {
1712            self.errors.push(ParseError {
1713                message: "unterminated block comment",
1714                range: text_range(start as u32, self.text.len() as u32),
1715            });
1716            self.reported_unterminated_block_comment = true;
1717        }
1718        self.text.len()
1719    }
1720
1721    fn skip_block_comment_peek(&self, start: usize) -> Option<usize> {
1722        let mut cursor = start + 2;
1723        while cursor < self.text.len() {
1724            if self.starts_with(cursor, "*/") {
1725                return Some(cursor + 2);
1726            }
1727            cursor = self.next_offset(cursor);
1728        }
1729        None
1730    }
1731
1732    fn advance_string_body(&self, start: usize) -> usize {
1733        let mut cursor = start;
1734        while cursor < self.text.len() {
1735            match self.byte_at(cursor) {
1736                Some(b'\\') => cursor = self.next_offset(cursor + 1),
1737                Some(b'"') => return cursor + 1,
1738                Some(_) => cursor = self.next_offset(cursor),
1739                None => break,
1740            }
1741        }
1742        self.text.len()
1743    }
1744
1745    fn advance_backquote_body(&self, start: usize) -> usize {
1746        let mut cursor = start;
1747        while cursor < self.text.len() {
1748            match self.byte_at(cursor) {
1749                Some(b'\\') => cursor = self.next_offset(cursor + 1),
1750                Some(b'`') => return cursor + 1,
1751                Some(b'"') => cursor = self.advance_string_body(cursor + 1),
1752                Some(_) => cursor = self.next_offset(cursor),
1753                None => break,
1754            }
1755        }
1756        self.text.len()
1757    }
1758
1759    fn is_proc_start(&mut self, start: usize) -> bool {
1760        if self.peek_keyword_end(start, "proc").is_some() {
1761            return true;
1762        }
1763        let Some(after_global) = self.peek_keyword_end(start, "global") else {
1764            return false;
1765        };
1766        let after_global = self.skip_trivia_peek(after_global);
1767        self.peek_keyword_end(after_global, "proc").is_some()
1768    }
1769
1770    fn peek_keyword_end(&self, start: usize, keyword: &str) -> Option<usize> {
1771        let cursor = self.skip_trivia_peek(start);
1772        if !self.text[cursor..].starts_with(keyword) {
1773            return None;
1774        }
1775        let end = cursor + keyword.len();
1776        let next = self.text[end..].chars().next();
1777        if next.is_some_and(is_word_continue) {
1778            return None;
1779        }
1780        Some(end)
1781    }
1782
1783    fn consume_keyword(&mut self, start: usize, keyword: &str) -> Option<usize> {
1784        let cursor = self.skip_trivia(start);
1785        if !self.text[cursor..].starts_with(keyword) {
1786            return None;
1787        }
1788        let end = cursor + keyword.len();
1789        let next = self.text[end..].chars().next();
1790        if next.is_some_and(is_word_continue) {
1791            return None;
1792        }
1793        if !self.record_token(cursor, end) {
1794            return None;
1795        }
1796        Some(end)
1797    }
1798
1799    fn starts_with(&self, start: usize, needle: &str) -> bool {
1800        self.text[start..].starts_with(needle)
1801    }
1802
1803    fn byte_at(&self, offset: usize) -> Option<u8> {
1804        self.text.as_bytes().get(offset).copied()
1805    }
1806
1807    fn peek_byte(&self, offset: usize) -> Option<u8> {
1808        self.byte_at(offset)
1809    }
1810
1811    fn next_offset(&self, offset: usize) -> usize {
1812        self.text[offset..]
1813            .chars()
1814            .next()
1815            .map_or(self.text.len(), |ch| offset + ch.len_utf8())
1816    }
1817
1818    fn prev_offset(&self, offset: usize) -> usize {
1819        let mut index = offset.saturating_sub(1);
1820        while !self.text.is_char_boundary(index) {
1821            index = index.saturating_sub(1);
1822        }
1823        index
1824    }
1825
1826    fn is_halted(&self) -> bool {
1827        self.budget.halted
1828    }
1829
1830    fn halt(&mut self, error: ParseError) {
1831        if self.reported_budget_error {
1832            return;
1833        }
1834        self.reported_budget_error = true;
1835        self.budget.halted = true;
1836        self.errors.push(error);
1837    }
1838
1839    fn record_token(&mut self, start: usize, end: usize) -> bool {
1840        let range = text_range(start as u32, end as u32);
1841        if !self.budget.record_token() {
1842            self.halt(budget_error("max_tokens", range));
1843            return false;
1844        }
1845        true
1846    }
1847
1848    fn record_statement(&mut self, range: TextRange) -> bool {
1849        if !self.budget.record_statement() {
1850            self.halt(budget_error("max_statements", range));
1851            return false;
1852        }
1853        true
1854    }
1855
1856    fn enter_nesting(&mut self, start: usize, end: usize) -> bool {
1857        let range = text_range(start as u32, end as u32);
1858        if !self.budget.enter_nesting() {
1859            self.halt(budget_error("max_nesting_depth", range));
1860            return false;
1861        }
1862        true
1863    }
1864
1865    fn exit_nesting(&mut self) {
1866        self.budget.exit_nesting();
1867    }
1868
1869    fn check_literal(&mut self, range: TextRange) -> bool {
1870        if !self.budget.check_literal(usize::from(range.len())) {
1871            self.halt(budget_error("max_literal_bytes", range));
1872            return false;
1873        }
1874        true
1875    }
1876}
1877
1878struct ByteLightScanner<'a> {
1879    bytes: &'a [u8],
1880    encoding: SourceEncoding,
1881    options: LightParseOptions,
1882    errors: Vec<ParseError>,
1883    reported_unterminated_block_comment: bool,
1884    reported_budget_error: bool,
1885    budget: LightBudgetTracker,
1886}
1887
1888impl<'a> ByteLightScanner<'a> {
1889    fn new(bytes: &'a [u8], encoding: SourceEncoding, options: LightParseOptions) -> Self {
1890        Self {
1891            bytes,
1892            encoding,
1893            options,
1894            errors: Vec::new(),
1895            reported_unterminated_block_comment: false,
1896            reported_budget_error: false,
1897            budget: LightBudgetTracker::new(options.budgets),
1898        }
1899    }
1900
1901    fn scan_with_sink(&mut self, sink: &mut impl LightItemSink) {
1902        let source = LightSourceView::Bytes {
1903            bytes: self.bytes,
1904            encoding: self.encoding,
1905        };
1906        let mut cursor = self.skip_trivia(0);
1907        while cursor < self.bytes.len() && !self.is_halted() {
1908            let (item, next_cursor) = if self.is_proc_start(cursor) {
1909                self.scan_proc_item(cursor)
1910            } else {
1911                self.scan_statement_item(cursor)
1912            };
1913            if self.is_halted() {
1914                break;
1915            }
1916            if !self.record_statement(start_range(&item)) {
1917                break;
1918            }
1919            sink.on_item(source, item);
1920            cursor = self.skip_trivia(next_cursor);
1921        }
1922    }
1923
1924    fn scan_proc_item(&mut self, start: usize) -> (LightItem, usize) {
1925        let mut cursor = start;
1926        let mut is_global = false;
1927        if let Some(after_global) = self.consume_keyword(cursor, b"global") {
1928            is_global = true;
1929            cursor = self.skip_trivia(after_global);
1930        }
1931        let after_proc = self.consume_keyword(cursor, b"proc").unwrap_or(cursor);
1932        cursor = self.skip_trivia(after_proc);
1933        let first_word = self.scan_simple_word(cursor);
1934        let mut name_range = None;
1935        let mut body_scan_start = cursor;
1936        if let Some((first_start, first_end)) = first_word {
1937            let after_first = self.skip_trivia(first_end);
1938            body_scan_start = after_first;
1939            if self.peek_byte(after_first) == Some(b'(') {
1940                name_range = Some(text_range(first_start as u32, first_end as u32));
1941            } else if let Some((name_start, name_end)) = self.scan_simple_word(after_first) {
1942                name_range = Some(text_range(name_start as u32, name_end as u32));
1943                body_scan_start = self.skip_trivia(name_end);
1944            }
1945        }
1946        let end = self.scan_until_matching_body_end(start, body_scan_start);
1947        (
1948            LightItem::Proc(LightProcSurface {
1949                name_range,
1950                is_global,
1951                span: text_range(start as u32, end as u32),
1952            }),
1953            end,
1954        )
1955    }
1956
1957    fn scan_statement_item(&mut self, start: usize) -> (LightItem, usize) {
1958        let Some((head_start, head_end)) = self.scan_simple_word(start) else {
1959            let end = self.scan_statement_tail(start);
1960            return (
1961                LightItem::Other {
1962                    span: text_range(start as u32, end as u32),
1963                },
1964                end,
1965            );
1966        };
1967        let head_range = text_range(head_start as u32, head_end as u32);
1968        let head_is_non_command = is_non_command_head_bytes(&self.bytes[head_start..head_end]);
1969        let after_head = self.skip_trivia(head_end);
1970        if self.peek_byte(after_head) == Some(b'(') || head_is_non_command {
1971            let end = self.scan_statement_tail(after_head);
1972            return (
1973                LightItem::Other {
1974                    span: text_range(start as u32, end as u32),
1975                },
1976                end,
1977            );
1978        }
1979        let (end, words, opaque_tail) =
1980            self.scan_command_statement_tail(start, head_end, after_head);
1981        (
1982            LightItem::Command(LightCommandSurface {
1983                head_range,
1984                captured: false,
1985                words,
1986                opaque_tail,
1987                span: text_range(start as u32, end as u32),
1988            }),
1989            end,
1990        )
1991    }
1992
1993    fn scan_command_statement_tail(
1994        &mut self,
1995        start: usize,
1996        head_end: usize,
1997        after_head: usize,
1998    ) -> (usize, Vec<LightWord>, Option<TextRange>) {
1999        let mut words = Vec::with_capacity(self.options.max_prefix_words.min(8));
2000        let mut cursor = after_head;
2001        loop {
2002            cursor = self.skip_trivia(cursor);
2003            if cursor >= self.bytes.len() {
2004                return (self.bytes.len(), words, None);
2005            }
2006            if self.byte_at(cursor) == Some(b';') {
2007                let _ = self.record_token(cursor, cursor + 1);
2008                return (cursor + 1, words, None);
2009            }
2010            if words.len() >= self.options.max_prefix_words
2011                || cursor.saturating_sub(head_end) >= self.options.max_prefix_bytes
2012            {
2013                let end = self.scan_statement_tail(cursor);
2014                let body_end = self.statement_body_end(start, end);
2015                let opaque_tail =
2016                    (cursor < body_end).then(|| text_range(cursor as u32, body_end as u32));
2017                return (end, words, opaque_tail);
2018            }
2019            let Some((word, next_cursor)) = self.scan_word(cursor, self.bytes.len()) else {
2020                if self.is_halted() {
2021                    return (self.bytes.len(), words, None);
2022                }
2023                let end = self.scan_statement_tail(cursor);
2024                let body_end = self.statement_body_end(start, end);
2025                let opaque_tail =
2026                    (cursor < body_end).then(|| text_range(cursor as u32, body_end as u32));
2027                return (end, words, opaque_tail);
2028            };
2029            words.push(word);
2030            cursor = next_cursor;
2031        }
2032    }
2033
2034    fn scan_statement_tail(&mut self, start: usize) -> usize {
2035        let mut cursor = start;
2036        let mut paren_depth = 0usize;
2037        let mut bracket_depth = 0usize;
2038        let mut brace_depth = 0usize;
2039        while cursor < self.bytes.len() && !self.is_halted() {
2040            if self.starts_with(cursor, b"//") {
2041                cursor = self.skip_line_comment(cursor);
2042                continue;
2043            }
2044            if self.starts_with(cursor, b"/*") {
2045                cursor = self.skip_block_comment(cursor);
2046                continue;
2047            }
2048            match self.byte_at(cursor) {
2049                Some(b'"') => cursor = self.scan_quoted_string(cursor),
2050                Some(b'`') => cursor = self.scan_backquote(cursor),
2051                Some(b'(') => {
2052                    if !self.record_token(cursor, cursor + 1)
2053                        || !self.enter_nesting(cursor, cursor + 1)
2054                    {
2055                        return self.bytes.len();
2056                    }
2057                    paren_depth += 1;
2058                    cursor += 1;
2059                }
2060                Some(b')') => {
2061                    if !self.record_token(cursor, cursor + 1) {
2062                        return self.bytes.len();
2063                    }
2064                    if paren_depth > 0 {
2065                        self.exit_nesting();
2066                    }
2067                    paren_depth = paren_depth.saturating_sub(1);
2068                    cursor += 1;
2069                }
2070                Some(b'[') => {
2071                    if !self.record_token(cursor, cursor + 1)
2072                        || !self.enter_nesting(cursor, cursor + 1)
2073                    {
2074                        return self.bytes.len();
2075                    }
2076                    bracket_depth += 1;
2077                    cursor += 1;
2078                }
2079                Some(b']') => {
2080                    if !self.record_token(cursor, cursor + 1) {
2081                        return self.bytes.len();
2082                    }
2083                    if bracket_depth > 0 {
2084                        self.exit_nesting();
2085                    }
2086                    bracket_depth = bracket_depth.saturating_sub(1);
2087                    cursor += 1;
2088                }
2089                Some(b'{') => {
2090                    if !self.record_token(cursor, cursor + 1)
2091                        || !self.enter_nesting(cursor, cursor + 1)
2092                    {
2093                        return self.bytes.len();
2094                    }
2095                    brace_depth += 1;
2096                    cursor += 1;
2097                }
2098                Some(b'}') => {
2099                    if !self.record_token(cursor, cursor + 1) {
2100                        return self.bytes.len();
2101                    }
2102                    if brace_depth > 0 {
2103                        self.exit_nesting();
2104                    }
2105                    brace_depth = brace_depth.saturating_sub(1);
2106                    cursor += 1;
2107                }
2108                Some(b';') if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
2109                    let _ = self.record_token(cursor, cursor + 1);
2110                    return cursor + 1;
2111                }
2112                Some(ch) if is_ascii_ws(ch) => cursor += 1,
2113                Some(_) => {
2114                    let end = self.scan_simple_word_until(cursor, self.bytes.len());
2115                    if end <= cursor {
2116                        let next = self.next_offset(cursor);
2117                        if !self.record_token(cursor, next) {
2118                            return self.bytes.len();
2119                        }
2120                        cursor = next;
2121                    } else {
2122                        if !self.record_token(cursor, end) {
2123                            return self.bytes.len();
2124                        }
2125                        cursor = end;
2126                    }
2127                }
2128                None => break,
2129            }
2130        }
2131        self.bytes.len()
2132    }
2133
2134    fn statement_body_end(&self, start: usize, end: usize) -> usize {
2135        let mut body_end = end;
2136        if body_end > start && self.byte_at(body_end - 1) == Some(b';') {
2137            body_end -= 1;
2138        }
2139        while body_end > start && self.byte_at(body_end - 1).is_some_and(is_ascii_ws) {
2140            body_end -= 1;
2141        }
2142        body_end
2143    }
2144
2145    fn scan_word(&mut self, start: usize, body_end: usize) -> Option<(LightWord, usize)> {
2146        if start >= body_end {
2147            return None;
2148        }
2149        if self.byte_at(start) == Some(b'"') {
2150            let end = self.scan_quoted_string(start);
2151            let range = text_range(start as u32, end as u32);
2152            if !self.check_literal(range) {
2153                return None;
2154            }
2155            return Some((LightWord::QuotedString { text: range, range }, end));
2156        }
2157        if self.byte_at(start) == Some(b'`') {
2158            let end = self.scan_backquote(start);
2159            let range = text_range(start as u32, end as u32);
2160            return Some((LightWord::Capture { range }, end));
2161        }
2162        if self.byte_at(start) == Some(b'{') {
2163            let end = self.scan_balanced(start, b'{', b'}');
2164            let range = text_range(start as u32, end as u32);
2165            if !self.check_literal(range) {
2166                return None;
2167            }
2168            return Some((LightWord::BraceList { range }, end));
2169        }
2170        if self.starts_with(start, b"<<") {
2171            let end = self.scan_vector_literal(start);
2172            let range = text_range(start as u32, end as u32);
2173            if !self.check_literal(range) {
2174                return None;
2175            }
2176            return Some((LightWord::VectorLiteral { range }, end));
2177        }
2178        if self.byte_at(start) == Some(b'(') {
2179            let end = self.scan_balanced(start, b'(', b')');
2180            let range = text_range(start as u32, end as u32);
2181            if !self.check_literal(range) {
2182                return None;
2183            }
2184            return Some((LightWord::GroupedExpr { range }, end));
2185        }
2186        let end = self.scan_simple_word_until(start, body_end);
2187        if end <= start {
2188            return None;
2189        }
2190        if !self.record_token(start, end) {
2191            return None;
2192        }
2193        let range = text_range(start as u32, end as u32);
2194        let text = &self.bytes[start..end];
2195        let word = if text.starts_with(b"$") {
2196            LightWord::Variable { range }
2197        } else if text.starts_with(b"-") && text.len() > 1 {
2198            LightWord::Flag { text: range, range }
2199        } else if looks_numeric_like_bytes(text) {
2200            LightWord::NumericLiteral { text: range, range }
2201        } else {
2202            LightWord::BareWord { text: range, range }
2203        };
2204        Some((word, end))
2205    }
2206
2207    fn scan_quoted_string(&mut self, start: usize) -> usize {
2208        let mut cursor = start + 1;
2209        while cursor < self.bytes.len() {
2210            match self.byte_at(cursor) {
2211                Some(b'\\') => cursor = self.next_offset(cursor + 1),
2212                Some(b'"') => {
2213                    let end = cursor + 1;
2214                    let _ = self.record_token(start, end);
2215                    return end;
2216                }
2217                Some(_) => cursor = self.next_offset(cursor),
2218                None => break,
2219            }
2220        }
2221        if self.is_halted() {
2222            return self.bytes.len();
2223        }
2224        let _ = self.record_token(start, self.bytes.len());
2225        self.errors.push(ParseError {
2226            message: "unterminated string literal in lightweight surface parse",
2227            range: text_range(start as u32, self.bytes.len() as u32),
2228        });
2229        self.bytes.len()
2230    }
2231
2232    fn scan_backquote(&mut self, start: usize) -> usize {
2233        let mut cursor = start + 1;
2234        while cursor < self.bytes.len() {
2235            match self.byte_at(cursor) {
2236                Some(b'\\') => cursor = self.next_offset(cursor + 1),
2237                Some(b'`') => {
2238                    let end = cursor + 1;
2239                    let _ = self.record_token(start, end);
2240                    return end;
2241                }
2242                Some(b'"') => cursor = self.scan_quoted_string(cursor),
2243                Some(_) => cursor = self.next_offset(cursor),
2244                None => break,
2245            }
2246        }
2247        if self.is_halted() {
2248            return self.bytes.len();
2249        }
2250        let _ = self.record_token(start, self.bytes.len());
2251        self.errors.push(ParseError {
2252            message: "unterminated backquote capture in lightweight surface parse",
2253            range: text_range(start as u32, self.bytes.len() as u32),
2254        });
2255        self.bytes.len()
2256    }
2257
2258    fn scan_balanced(&mut self, start: usize, open: u8, close: u8) -> usize {
2259        let mut cursor = start;
2260        let mut depth = 0usize;
2261        while cursor < self.bytes.len() && !self.is_halted() {
2262            if self.starts_with(cursor, b"//") {
2263                cursor = self.skip_line_comment(cursor);
2264                continue;
2265            }
2266            if self.starts_with(cursor, b"/*") {
2267                cursor = self.skip_block_comment(cursor);
2268                continue;
2269            }
2270            match self.byte_at(cursor) {
2271                Some(b'"') => cursor = self.scan_quoted_string(cursor),
2272                Some(b'`') => cursor = self.scan_backquote(cursor),
2273                Some(ch) if ch == open => {
2274                    if !self.record_token(cursor, cursor + 1)
2275                        || !self.enter_nesting(cursor, cursor + 1)
2276                    {
2277                        return self.bytes.len();
2278                    }
2279                    depth += 1;
2280                    cursor += 1;
2281                }
2282                Some(ch) if ch == close => {
2283                    if !self.record_token(cursor, cursor + 1) {
2284                        return self.bytes.len();
2285                    }
2286                    if depth > 0 {
2287                        self.exit_nesting();
2288                    }
2289                    depth = depth.saturating_sub(1);
2290                    cursor += 1;
2291                    if depth == 0 {
2292                        return cursor;
2293                    }
2294                }
2295                Some(b'(' | b')' | b'[' | b']' | b'{' | b'}' | b',') => {
2296                    if !self.record_token(cursor, cursor + 1) {
2297                        return self.bytes.len();
2298                    }
2299                    cursor += 1;
2300                }
2301                Some(ch) if is_ascii_ws(ch) => cursor += 1,
2302                Some(_) => {
2303                    let end = self.scan_simple_word_until(cursor, self.bytes.len());
2304                    if end <= cursor {
2305                        let next = self.next_offset(cursor);
2306                        if !self.record_token(cursor, next) {
2307                            return self.bytes.len();
2308                        }
2309                        cursor = next;
2310                    } else {
2311                        if !self.record_token(cursor, end) {
2312                            return self.bytes.len();
2313                        }
2314                        cursor = end;
2315                    }
2316                }
2317                None => break,
2318            }
2319        }
2320        if self.is_halted() {
2321            return self.bytes.len();
2322        }
2323        self.errors.push(ParseError {
2324            message: "unterminated grouped surface in lightweight parse",
2325            range: text_range(start as u32, self.bytes.len() as u32),
2326        });
2327        self.bytes.len()
2328    }
2329
2330    fn scan_vector_literal(&mut self, start: usize) -> usize {
2331        let mut cursor = start + 2;
2332        if !self.record_token(start, start + 2) || !self.enter_nesting(start, start + 2) {
2333            return self.bytes.len();
2334        }
2335        while cursor < self.bytes.len() && !self.is_halted() {
2336            if self.starts_with(cursor, b">>") {
2337                let _ = self.record_token(cursor, cursor + 2);
2338                self.exit_nesting();
2339                return cursor + 2;
2340            }
2341            if self.byte_at(cursor) == Some(b'"') {
2342                cursor = self.scan_quoted_string(cursor);
2343                continue;
2344            }
2345            if self.byte_at(cursor).is_some_and(is_ascii_ws) {
2346                cursor += 1;
2347                continue;
2348            }
2349            let end = self.scan_simple_word_until(cursor, self.bytes.len());
2350            if end <= cursor {
2351                let next = self.next_offset(cursor);
2352                if !self.record_token(cursor, next) {
2353                    return self.bytes.len();
2354                }
2355                cursor = next;
2356            } else {
2357                if !self.record_token(cursor, end) {
2358                    return self.bytes.len();
2359                }
2360                cursor = end;
2361            }
2362        }
2363        if self.is_halted() {
2364            return self.bytes.len();
2365        }
2366        self.errors.push(ParseError {
2367            message: "unterminated vector literal in lightweight parse",
2368            range: text_range(start as u32, self.bytes.len() as u32),
2369        });
2370        self.bytes.len()
2371    }
2372
2373    fn scan_until_matching_body_end(&mut self, start: usize, cursor: usize) -> usize {
2374        let mut cursor = cursor;
2375        let mut depth = 0usize;
2376        let mut saw_body = false;
2377        while cursor < self.bytes.len() && !self.is_halted() {
2378            if self.starts_with(cursor, b"//") {
2379                cursor = self.skip_line_comment(cursor);
2380                continue;
2381            }
2382            if self.starts_with(cursor, b"/*") {
2383                cursor = self.skip_block_comment(cursor);
2384                continue;
2385            }
2386            match self.byte_at(cursor) {
2387                Some(b'"') => cursor = self.scan_quoted_string(cursor),
2388                Some(b'`') => cursor = self.scan_backquote(cursor),
2389                Some(b'{') => {
2390                    if !self.record_token(cursor, cursor + 1)
2391                        || !self.enter_nesting(cursor, cursor + 1)
2392                    {
2393                        return self.bytes.len();
2394                    }
2395                    saw_body = true;
2396                    depth += 1;
2397                    cursor += 1;
2398                }
2399                Some(b'}') if saw_body => {
2400                    if !self.record_token(cursor, cursor + 1) {
2401                        return self.bytes.len();
2402                    }
2403                    if depth > 0 {
2404                        self.exit_nesting();
2405                    }
2406                    depth = depth.saturating_sub(1);
2407                    cursor += 1;
2408                    if depth == 0 {
2409                        return cursor;
2410                    }
2411                }
2412                Some(b'(' | b')' | b'[' | b']' | b',' | b';') => {
2413                    if !self.record_token(cursor, cursor + 1) {
2414                        return self.bytes.len();
2415                    }
2416                    cursor += 1;
2417                }
2418                Some(ch) if is_ascii_ws(ch) => cursor += 1,
2419                Some(_) => {
2420                    let end = self.scan_simple_word_until(cursor, self.bytes.len());
2421                    if end <= cursor {
2422                        let next = self.next_offset(cursor);
2423                        if !self.record_token(cursor, next) {
2424                            return self.bytes.len();
2425                        }
2426                        cursor = next;
2427                    } else {
2428                        if !self.record_token(cursor, end) {
2429                            return self.bytes.len();
2430                        }
2431                        cursor = end;
2432                    }
2433                }
2434                None => break,
2435            }
2436        }
2437        if self.is_halted() {
2438            return self.bytes.len();
2439        }
2440        self.errors.push(ParseError {
2441            message: "unterminated proc body in lightweight surface parse",
2442            range: text_range(start as u32, self.bytes.len() as u32),
2443        });
2444        self.bytes.len()
2445    }
2446
2447    fn scan_simple_word(&mut self, start: usize) -> Option<(usize, usize)> {
2448        let start = self.skip_trivia(start);
2449        let end = self.scan_simple_word_until(start, self.bytes.len());
2450        if end > start && !self.record_token(start, end) {
2451            return None;
2452        }
2453        (end > start).then_some((start, end))
2454    }
2455
2456    fn scan_simple_word_until(&self, start: usize, body_end: usize) -> usize {
2457        let mut cursor = start;
2458        while cursor < body_end {
2459            if self.starts_with(cursor, b"//") || self.starts_with(cursor, b"/*") {
2460                break;
2461            }
2462            match self.byte_at(cursor) {
2463                Some(b';' | b'(' | b')' | b'{' | b'}' | b'[' | b']' | b'`' | b'"') | None => break,
2464                Some(ch) if is_ascii_ws(ch) => break,
2465                Some(_) => cursor = self.next_offset(cursor),
2466            }
2467        }
2468        cursor
2469    }
2470
2471    fn skip_trivia(&mut self, start: usize) -> usize {
2472        let mut cursor = start;
2473        while cursor < self.bytes.len() {
2474            if self.starts_with(cursor, b"//") {
2475                cursor = self.skip_line_comment(cursor);
2476                continue;
2477            }
2478            if self.starts_with(cursor, b"/*") {
2479                cursor = self.skip_block_comment(cursor);
2480                continue;
2481            }
2482            if self.byte_at(cursor).is_some_and(is_ascii_ws) {
2483                cursor += 1;
2484                continue;
2485            }
2486            break;
2487        }
2488        cursor
2489    }
2490
2491    fn skip_trivia_peek(&self, start: usize) -> usize {
2492        let mut cursor = start;
2493        while cursor < self.bytes.len() {
2494            if self.starts_with(cursor, b"//") {
2495                cursor = self.skip_line_comment(cursor);
2496                continue;
2497            }
2498            if self.starts_with(cursor, b"/*") {
2499                let Some(after_comment) = self.skip_block_comment_peek(cursor) else {
2500                    return self.bytes.len();
2501                };
2502                cursor = after_comment;
2503                continue;
2504            }
2505            if self.byte_at(cursor).is_some_and(is_ascii_ws) {
2506                cursor += 1;
2507                continue;
2508            }
2509            break;
2510        }
2511        cursor
2512    }
2513
2514    fn skip_line_comment(&self, start: usize) -> usize {
2515        let mut cursor = start + 2;
2516        while cursor < self.bytes.len() {
2517            match self.byte_at(cursor) {
2518                Some(b'\n') => return cursor + 1,
2519                Some(_) => cursor = self.next_offset(cursor),
2520                None => break,
2521            }
2522        }
2523        self.bytes.len()
2524    }
2525
2526    fn skip_block_comment(&mut self, start: usize) -> usize {
2527        let mut cursor = start + 2;
2528        while cursor < self.bytes.len() {
2529            if self.starts_with(cursor, b"*/") {
2530                return cursor + 2;
2531            }
2532            cursor = self.next_offset(cursor);
2533        }
2534        if !self.reported_unterminated_block_comment {
2535            self.errors.push(ParseError {
2536                message: "unterminated block comment",
2537                range: text_range(start as u32, self.bytes.len() as u32),
2538            });
2539            self.reported_unterminated_block_comment = true;
2540        }
2541        self.bytes.len()
2542    }
2543
2544    fn skip_block_comment_peek(&self, start: usize) -> Option<usize> {
2545        let mut cursor = start + 2;
2546        while cursor < self.bytes.len() {
2547            if self.starts_with(cursor, b"*/") {
2548                return Some(cursor + 2);
2549            }
2550            cursor = self.next_offset(cursor);
2551        }
2552        None
2553    }
2554
2555    fn is_proc_start(&self, start: usize) -> bool {
2556        if self.peek_keyword_end(start, b"proc").is_some() {
2557            return true;
2558        }
2559        let Some(after_global) = self.peek_keyword_end(start, b"global") else {
2560            return false;
2561        };
2562        let after_global = self.skip_trivia_peek(after_global);
2563        self.peek_keyword_end(after_global, b"proc").is_some()
2564    }
2565
2566    fn peek_keyword_end(&self, start: usize, keyword: &[u8]) -> Option<usize> {
2567        let cursor = self.skip_trivia_peek(start);
2568        if !self.bytes.get(cursor..)?.starts_with(keyword) {
2569            return None;
2570        }
2571        let end = cursor + keyword.len();
2572        if self.byte_at(end).is_some_and(is_word_continue_byte) {
2573            return None;
2574        }
2575        Some(end)
2576    }
2577
2578    fn consume_keyword(&mut self, start: usize, keyword: &[u8]) -> Option<usize> {
2579        let cursor = self.skip_trivia(start);
2580        if !self.bytes.get(cursor..)?.starts_with(keyword) {
2581            return None;
2582        }
2583        let end = cursor + keyword.len();
2584        if self.byte_at(end).is_some_and(is_word_continue_byte) {
2585            return None;
2586        }
2587        if !self.record_token(cursor, end) {
2588            return None;
2589        }
2590        Some(end)
2591    }
2592
2593    fn starts_with(&self, start: usize, needle: &[u8]) -> bool {
2594        self.bytes
2595            .get(start..)
2596            .is_some_and(|bytes| bytes.starts_with(needle))
2597    }
2598
2599    fn byte_at(&self, offset: usize) -> Option<u8> {
2600        self.bytes.get(offset).copied()
2601    }
2602
2603    fn peek_byte(&self, offset: usize) -> Option<u8> {
2604        self.byte_at(offset)
2605    }
2606
2607    fn next_offset(&self, offset: usize) -> usize {
2608        if offset >= self.bytes.len() {
2609            return self.bytes.len();
2610        }
2611        let first = self.bytes[offset];
2612        if first < 0x80 {
2613            return offset + 1;
2614        }
2615        let len = match self.encoding {
2616            SourceEncoding::Utf8 => utf8_sequence_len(first, &self.bytes[offset..]),
2617            SourceEncoding::Cp932 => multibyte_len_cp932(first, self.byte_at(offset + 1)),
2618            SourceEncoding::Gbk => multibyte_len_gbk(first, self.byte_at(offset + 1)),
2619        };
2620        offset.saturating_add(len).min(self.bytes.len())
2621    }
2622
2623    fn is_halted(&self) -> bool {
2624        self.budget.halted
2625    }
2626
2627    fn halt(&mut self, error: ParseError) {
2628        if self.reported_budget_error {
2629            return;
2630        }
2631        self.reported_budget_error = true;
2632        self.budget.halted = true;
2633        self.errors.push(error);
2634    }
2635
2636    fn record_token(&mut self, start: usize, end: usize) -> bool {
2637        let range = text_range(start as u32, end as u32);
2638        if !self.budget.record_token() {
2639            self.halt(budget_error("max_tokens", range));
2640            return false;
2641        }
2642        true
2643    }
2644
2645    fn record_statement(&mut self, range: TextRange) -> bool {
2646        if !self.budget.record_statement() {
2647            self.halt(budget_error("max_statements", range));
2648            return false;
2649        }
2650        true
2651    }
2652
2653    fn enter_nesting(&mut self, start: usize, end: usize) -> bool {
2654        let range = text_range(start as u32, end as u32);
2655        if !self.budget.enter_nesting() {
2656            self.halt(budget_error("max_nesting_depth", range));
2657            return false;
2658        }
2659        true
2660    }
2661
2662    fn exit_nesting(&mut self) {
2663        self.budget.exit_nesting();
2664    }
2665
2666    fn check_literal(&mut self, range: TextRange) -> bool {
2667        if !self.budget.check_literal(usize::from(range.len())) {
2668            self.halt(budget_error("max_literal_bytes", range));
2669            return false;
2670        }
2671        true
2672    }
2673}
2674
2675#[derive(Debug, Clone, Copy)]
2676struct LightBudgetTracker {
2677    max_nesting_depth: usize,
2678    max_literal_bytes: usize,
2679    remaining_tokens: usize,
2680    remaining_statements: usize,
2681    remaining_nesting: usize,
2682    halted: bool,
2683}
2684
2685impl LightBudgetTracker {
2686    fn new(budgets: ParseBudgets) -> Self {
2687        Self {
2688            max_nesting_depth: budgets.max_nesting_depth,
2689            max_literal_bytes: budgets.max_literal_bytes,
2690            remaining_tokens: budgets.max_tokens,
2691            remaining_statements: budgets.max_statements,
2692            remaining_nesting: budgets.max_nesting_depth,
2693            halted: false,
2694        }
2695    }
2696
2697    fn record_token(&mut self) -> bool {
2698        if self.remaining_tokens == 0 {
2699            self.halted = true;
2700            return false;
2701        }
2702        self.remaining_tokens -= 1;
2703        true
2704    }
2705
2706    fn record_statement(&mut self) -> bool {
2707        if self.remaining_statements == 0 {
2708            self.halted = true;
2709            return false;
2710        }
2711        self.remaining_statements -= 1;
2712        true
2713    }
2714
2715    fn enter_nesting(&mut self) -> bool {
2716        if self.remaining_nesting == 0 {
2717            self.halted = true;
2718            return false;
2719        }
2720        self.remaining_nesting -= 1;
2721        true
2722    }
2723
2724    fn exit_nesting(&mut self) {
2725        if self.remaining_nesting < self.max_nesting_depth {
2726            self.remaining_nesting += 1;
2727        }
2728    }
2729
2730    fn check_literal(&mut self, len: usize) -> bool {
2731        if len > self.max_literal_bytes {
2732            self.halted = true;
2733            return false;
2734        }
2735        true
2736    }
2737}
2738
2739fn start_range(item: &LightItem) -> TextRange {
2740    match item {
2741        LightItem::Command(command) => command.span,
2742        LightItem::Proc(proc_def) => proc_def.span,
2743        LightItem::Other { span } => *span,
2744    }
2745}
2746
2747fn max_bytes_error_for_text(len: usize, budgets: ParseBudgets) -> Option<ParseError> {
2748    (len > budgets.max_bytes).then(|| budget_error("max_bytes", text_len_range(len)))
2749}
2750
2751fn max_bytes_error_for_bytes(len: usize, budgets: ParseBudgets) -> Option<ParseError> {
2752    (len > budgets.max_bytes).then(|| budget_error("max_bytes", text_range(0, 0)))
2753}
2754
2755fn max_bytes_error_for_file(path: &Path, budgets: ParseBudgets) -> io::Result<Option<ParseError>> {
2756    match fs::metadata(path) {
2757        Ok(metadata) if metadata.len() > budgets.max_bytes as u64 => {
2758            Ok(Some(budget_error("max_bytes", text_range(0, 0))))
2759        }
2760        Ok(_) => Ok(None),
2761        Err(error) if error.kind() == io::ErrorKind::NotFound => Err(error),
2762        Err(_) => Ok(None),
2763    }
2764}
2765
2766fn empty_light_scan_report(error: ParseError) -> LightScanReport {
2767    LightScanReport {
2768        source_text: String::new(),
2769        source_map: SourceMap::identity(0),
2770        source_encoding: SourceEncoding::Utf8,
2771        decode_errors: Vec::new(),
2772        errors: vec![error],
2773    }
2774}
2775
2776fn empty_shared_light_scan_report(error: ParseError) -> SharedLightScanReport {
2777    SharedLightScanReport {
2778        source_text: Arc::from(""),
2779        source_map: SourceMap::identity(0),
2780        source_encoding: SourceEncoding::Utf8,
2781        decode_errors: Vec::new(),
2782        errors: vec![error],
2783    }
2784}
2785
2786fn is_word_continue(ch: char) -> bool {
2787    ch.is_ascii_alphanumeric() || matches!(ch, '_' | '$')
2788}
2789
2790fn is_word_continue_byte(ch: u8) -> bool {
2791    ch.is_ascii_alphanumeric() || matches!(ch, b'_' | b'$')
2792}
2793
2794fn is_ascii_ws(ch: u8) -> bool {
2795    matches!(ch, b' ' | b'\t' | b'\n' | b'\r' | 0x0C)
2796}
2797
2798fn is_non_command_head(head: &str) -> bool {
2799    matches!(
2800        head,
2801        "global"
2802            | "proc"
2803            | "if"
2804            | "while"
2805            | "do"
2806            | "for"
2807            | "switch"
2808            | "return"
2809            | "break"
2810            | "continue"
2811            | "int"
2812            | "float"
2813            | "string"
2814            | "vector"
2815            | "matrix"
2816    )
2817}
2818
2819fn is_non_command_head_bytes(head: &[u8]) -> bool {
2820    matches!(
2821        head,
2822        b"global"
2823            | b"proc"
2824            | b"if"
2825            | b"while"
2826            | b"do"
2827            | b"for"
2828            | b"switch"
2829            | b"return"
2830            | b"break"
2831            | b"continue"
2832            | b"int"
2833            | b"float"
2834            | b"string"
2835            | b"vector"
2836            | b"matrix"
2837    )
2838}
2839
2840fn looks_numeric_like(text: &str) -> bool {
2841    let trimmed = text.strip_prefix(['+', '-']).unwrap_or(text);
2842    if trimmed.is_empty() {
2843        return false;
2844    }
2845    trimmed.chars().next().is_some_and(|ch| ch.is_ascii_digit())
2846        || (trimmed.starts_with('.')
2847            && trimmed[1..]
2848                .chars()
2849                .next()
2850                .is_some_and(|ch| ch.is_ascii_digit()))
2851}
2852
2853fn looks_numeric_like_bytes(text: &[u8]) -> bool {
2854    let trimmed = text
2855        .strip_prefix(b"+")
2856        .or_else(|| text.strip_prefix(b"-"))
2857        .unwrap_or(text);
2858    if trimmed.is_empty() {
2859        return false;
2860    }
2861    trimmed[0].is_ascii_digit()
2862        || (trimmed.starts_with(b".") && trimmed.get(1).is_some_and(|ch| ch.is_ascii_digit()))
2863}
2864
2865fn utf8_sequence_len(first: u8, bytes: &[u8]) -> usize {
2866    let len = if first & 0b1110_0000 == 0b1100_0000 {
2867        2
2868    } else if first & 0b1111_0000 == 0b1110_0000 {
2869        3
2870    } else if first & 0b1111_1000 == 0b1111_0000 {
2871        4
2872    } else {
2873        1
2874    };
2875    if bytes.len() >= len
2876        && bytes[1..len]
2877            .iter()
2878            .all(|byte| byte & 0b1100_0000 == 0b1000_0000)
2879    {
2880        len
2881    } else {
2882        1
2883    }
2884}
2885
2886fn multibyte_len_cp932(first: u8, second: Option<u8>) -> usize {
2887    let is_lead = matches!(first, 0x81..=0x9F | 0xE0..=0xFC);
2888    let is_trail = second.is_some_and(|byte| matches!(byte, 0x40..=0x7E | 0x80..=0xFC));
2889    if is_lead && is_trail { 2 } else { 1 }
2890}
2891
2892fn multibyte_len_gbk(first: u8, second: Option<u8>) -> usize {
2893    let is_lead = matches!(first, 0x81..=0xFE);
2894    let is_trail = second.is_some_and(|byte| matches!(byte, 0x40..=0x7E | 0x80..=0xFE));
2895    if is_lead && is_trail { 2 } else { 1 }
2896}
2897
2898#[cfg(test)]
2899mod tests {
2900    use super::parse_light_source;
2901    use mel_syntax::text_range;
2902
2903    #[test]
2904    fn unterminated_block_comment_reports_light_parse_error() {
2905        let parse = parse_light_source("createNode file -n \"f\";\n/* hidden tail");
2906
2907        assert_eq!(parse.source.items.len(), 1);
2908        assert_eq!(parse.errors.len(), 1);
2909        assert_eq!(parse.errors[0].message, "unterminated block comment");
2910        assert_eq!(parse.errors[0].range, text_range(24, 38));
2911    }
2912}