sqruff_lib_core/parser/
lexer.rs

1use std::borrow::Cow;
2use std::fmt::Debug;
3use std::ops::Range;
4use std::str::Chars;
5
6use super::markers::PositionMarker;
7use super::segments::{ErasedSegment, SegmentBuilder, Tables};
8use crate::dialects::Dialect;
9use crate::dialects::syntax::SyntaxKind;
10use crate::errors::SQLLexError;
11use crate::slice_helpers::{is_zero_slice, offset_slice};
12use crate::templaters::TemplatedFile;
13
14/// An element matched during lexing.
15#[derive(Debug, Clone)]
16pub struct Element<'a> {
17    name: &'static str,
18    text: Cow<'a, str>,
19    syntax_kind: SyntaxKind,
20}
21
22impl<'a> Element<'a> {
23    fn new(name: &'static str, syntax_kind: SyntaxKind, text: impl Into<Cow<'a, str>>) -> Self {
24        Self {
25            name,
26            syntax_kind,
27            text: text.into(),
28        }
29    }
30}
31
32/// A LexedElement, bundled with it's position in the templated file.
33#[derive(Debug)]
34pub struct TemplateElement<'a> {
35    raw: Cow<'a, str>,
36    template_slice: Range<usize>,
37    matcher: Info,
38}
39
40#[derive(Debug)]
41struct Info {
42    name: &'static str,
43    syntax_kind: SyntaxKind,
44}
45
46impl<'a> TemplateElement<'a> {
47    /// Make a TemplateElement from a LexedElement.
48    pub fn from_element(element: Element<'a>, template_slice: Range<usize>) -> Self {
49        TemplateElement {
50            raw: element.text,
51            template_slice,
52            matcher: Info {
53                name: element.name,
54                syntax_kind: element.syntax_kind,
55            },
56        }
57    }
58
59    pub fn to_segment(
60        &self,
61        pos_marker: PositionMarker,
62        subslice: Option<Range<usize>>,
63    ) -> ErasedSegment {
64        let slice = subslice.map_or_else(|| self.raw.as_ref(), |slice| &self.raw[slice]);
65        SegmentBuilder::token(0, slice, self.matcher.syntax_kind)
66            .with_position(pos_marker)
67            .finish()
68    }
69}
70
71/// A class to hold matches from the lexer.
72#[derive(Debug)]
73pub struct Match<'a> {
74    pub forward_string: &'a str,
75    pub elements: Vec<Element<'a>>,
76}
77
78#[derive(Debug, Clone)]
79pub struct Matcher {
80    pattern: Pattern,
81    subdivider: Option<Pattern>,
82    trim_post_subdivide: Option<Pattern>,
83}
84
85impl Matcher {
86    pub const fn new(pattern: Pattern) -> Self {
87        Self {
88            pattern,
89            subdivider: None,
90            trim_post_subdivide: None,
91        }
92    }
93
94    pub const fn string(
95        name: &'static str,
96        pattern: &'static str,
97        syntax_kind: SyntaxKind,
98    ) -> Self {
99        Self::new(Pattern::string(name, pattern, syntax_kind))
100    }
101
102    #[track_caller]
103    pub fn regex(name: &'static str, pattern: &'static str, syntax_kind: SyntaxKind) -> Self {
104        Self::new(Pattern::regex(name, pattern, syntax_kind))
105    }
106
107    pub fn native(name: &'static str, f: fn(&mut Cursor) -> bool, syntax_kind: SyntaxKind) -> Self {
108        Self::new(Pattern::native(name, f, syntax_kind))
109    }
110
111    #[track_caller]
112    pub fn legacy(
113        name: &'static str,
114        starts_with: fn(&str) -> bool,
115        pattern: &'static str,
116        syntax_kind: SyntaxKind,
117    ) -> Self {
118        Self::new(Pattern::legacy(name, starts_with, pattern, syntax_kind))
119    }
120
121    pub fn subdivider(mut self, subdivider: Pattern) -> Self {
122        assert!(matches!(
123            self.pattern.kind,
124            SearchPatternKind::Legacy(_, _) | SearchPatternKind::Native(_)
125        ));
126        self.subdivider = Some(subdivider);
127        self
128    }
129
130    pub fn post_subdivide(mut self, trim_post_subdivide: Pattern) -> Self {
131        assert!(matches!(
132            self.pattern.kind,
133            SearchPatternKind::Legacy(_, _) | SearchPatternKind::Native(_)
134        ));
135        self.trim_post_subdivide = Some(trim_post_subdivide);
136        self
137    }
138
139    pub fn name(&self) -> &'static str {
140        self.pattern.name
141    }
142
143    #[track_caller]
144    pub fn matches<'a>(&self, forward_string: &'a str) -> Match<'a> {
145        match self.pattern.matches(forward_string) {
146            Some(matched) => {
147                let new_elements = self.subdivide(matched, self.pattern.syntax_kind);
148
149                Match {
150                    forward_string: &forward_string[matched.len()..],
151                    elements: new_elements,
152                }
153            }
154            None => Match {
155                forward_string,
156                elements: Vec::new(),
157            },
158        }
159    }
160
161    fn subdivide<'a>(&self, matched: &'a str, matched_kind: SyntaxKind) -> Vec<Element<'a>> {
162        match &self.subdivider {
163            Some(subdivider) => {
164                let mut elem_buff = Vec::new();
165                let mut str_buff = matched;
166
167                while !str_buff.is_empty() {
168                    let Some(div_pos) = subdivider.search(str_buff) else {
169                        let mut trimmed_elems = self.trim_match(str_buff);
170                        elem_buff.append(&mut trimmed_elems);
171                        break;
172                    };
173
174                    let mut trimmed_elems = self.trim_match(&str_buff[..div_pos.start]);
175                    let div_elem = Element::new(
176                        subdivider.name,
177                        subdivider.syntax_kind,
178                        &str_buff[div_pos.start..div_pos.end],
179                    );
180
181                    elem_buff.append(&mut trimmed_elems);
182                    elem_buff.push(div_elem);
183
184                    str_buff = &str_buff[div_pos.end..];
185                }
186
187                elem_buff
188            }
189            None => {
190                vec![Element::new(self.name(), matched_kind, matched)]
191            }
192        }
193    }
194
195    fn trim_match<'a>(&self, matched_str: &'a str) -> Vec<Element<'a>> {
196        let Some(trim_post_subdivide) = &self.trim_post_subdivide else {
197            return Vec::new();
198        };
199
200        let mk_element = |text| {
201            Element::new(
202                trim_post_subdivide.name,
203                trim_post_subdivide.syntax_kind,
204                text,
205            )
206        };
207
208        let mut elem_buff = Vec::new();
209        let mut content_buff = String::new();
210        let mut str_buff = matched_str;
211
212        while !str_buff.is_empty() {
213            let Some(trim_pos) = trim_post_subdivide.search(str_buff) else {
214                break;
215            };
216
217            let start = trim_pos.start;
218            let end = trim_pos.end;
219
220            if start == 0 {
221                elem_buff.push(mk_element(&str_buff[..end]));
222                str_buff = str_buff[end..].into();
223            } else if end == str_buff.len() {
224                let raw = format!("{}{}", content_buff, &str_buff[..start]);
225
226                elem_buff.push(Element::new(
227                    trim_post_subdivide.name,
228                    trim_post_subdivide.syntax_kind,
229                    raw,
230                ));
231                elem_buff.push(mk_element(&str_buff[start..end]));
232
233                content_buff.clear();
234                str_buff = "";
235            } else {
236                content_buff.push_str(&str_buff[..end]);
237                str_buff = &str_buff[end..];
238            }
239        }
240
241        if !content_buff.is_empty() || !str_buff.is_empty() {
242            let raw = format!("{content_buff}{str_buff}");
243            elem_buff.push(Element::new(
244                self.pattern.name,
245                self.pattern.syntax_kind,
246                raw,
247            ));
248        }
249
250        elem_buff
251    }
252}
253
254#[derive(Debug, Clone)]
255pub struct Pattern {
256    name: &'static str,
257    syntax_kind: SyntaxKind,
258    kind: SearchPatternKind,
259}
260
261#[derive(Debug, Clone)]
262pub enum SearchPatternKind {
263    String(&'static str),
264    Regex(&'static str),
265    Native(fn(&mut Cursor) -> bool),
266    Legacy(fn(&str) -> bool, fancy_regex::Regex),
267}
268
269impl Pattern {
270    pub const fn string(
271        name: &'static str,
272        template: &'static str,
273        syntax_kind: SyntaxKind,
274    ) -> Self {
275        Self {
276            name,
277            syntax_kind,
278            kind: SearchPatternKind::String(template),
279        }
280    }
281
282    #[track_caller]
283    pub fn regex(name: &'static str, regex: &'static str, syntax_kind: SyntaxKind) -> Self {
284        #[cfg(debug_assertions)]
285        if regex_automata::dfa::regex::Regex::new(regex).is_err() {
286            panic!("Invalid regex pattern: {}", std::panic::Location::caller());
287        }
288
289        Self {
290            name,
291            syntax_kind,
292            kind: SearchPatternKind::Regex(regex),
293        }
294    }
295
296    pub fn native(name: &'static str, f: fn(&mut Cursor) -> bool, syntax_kind: SyntaxKind) -> Self {
297        Self {
298            name,
299            syntax_kind,
300            kind: SearchPatternKind::Native(f),
301        }
302    }
303
304    #[track_caller]
305    pub fn legacy(
306        name: &'static str,
307        starts_with: fn(&str) -> bool,
308        regex: &'static str,
309        syntax_kind: SyntaxKind,
310    ) -> Self {
311        let regex = format!("^{regex}");
312        Self {
313            name,
314            syntax_kind,
315            kind: SearchPatternKind::Legacy(starts_with, fancy_regex::Regex::new(&regex).unwrap()),
316        }
317    }
318
319    fn matches<'a>(&self, forward_string: &'a str) -> Option<&'a str> {
320        match self.kind {
321            SearchPatternKind::String(template) => {
322                if forward_string.starts_with(template) {
323                    return Some(template);
324                }
325            }
326            SearchPatternKind::Legacy(f, ref template) => {
327                if !f(forward_string) {
328                    return None;
329                }
330
331                if let Ok(Some(matched)) = template.find(forward_string)
332                    && matched.start() == 0
333                {
334                    return Some(matched.as_str());
335                }
336            }
337            SearchPatternKind::Native(f) => {
338                let mut cursor = Cursor::new(forward_string);
339                return f(&mut cursor).then(|| cursor.lexed());
340            }
341            _ => unreachable!(),
342        };
343
344        None
345    }
346
347    fn search(&self, forward_string: &str) -> Option<Range<usize>> {
348        match &self.kind {
349            SearchPatternKind::String(template) => forward_string
350                .find(template)
351                .map(|start| start..start + template.len()),
352            SearchPatternKind::Legacy(_, template) => {
353                if let Ok(Some(matched)) = template.find(forward_string) {
354                    return Some(matched.range());
355                }
356                None
357            }
358            _ => unreachable!("{:?}", self.kind),
359        }
360    }
361}
362
363pub struct Cursor<'text> {
364    text: &'text str,
365    chars: Chars<'text>,
366}
367
368impl<'text> Cursor<'text> {
369    const EOF: char = '\0';
370
371    fn new(text: &'text str) -> Self {
372        Self {
373            text,
374            chars: text.chars(),
375        }
376    }
377
378    pub fn peek(&self) -> char {
379        self.chars.clone().next().unwrap_or(Self::EOF)
380    }
381
382    pub fn peek_next(&self) -> char {
383        self.chars.clone().nth(1).unwrap_or(Self::EOF)
384    }
385
386    pub fn shift(&mut self) -> char {
387        self.chars.next().unwrap_or(Self::EOF)
388    }
389
390    pub fn shift_while(&mut self, f: impl Fn(char) -> bool + Copy) {
391        while self.peek() != Self::EOF && f(self.peek()) {
392            self.shift();
393        }
394    }
395
396    fn lexed(&self) -> &'text str {
397        let len = self.text.len() - self.chars.as_str().len();
398        &self.text[..len]
399    }
400}
401
402pub fn nested_block_comment(cursor: &mut Cursor) -> bool {
403    if cursor.peek() != '/' || cursor.peek_next() != '*' {
404        return false;
405    }
406    cursor.shift();
407    cursor.shift();
408    let mut depth = 1;
409    loop {
410        let ch = cursor.peek();
411        if ch == Cursor::EOF {
412            return false;
413        }
414        if ch == '/' && cursor.peek_next() == '*' {
415            cursor.shift();
416            cursor.shift();
417            depth += 1;
418            continue;
419        }
420        if ch == '*' && cursor.peek_next() == '/' {
421            cursor.shift();
422            cursor.shift();
423            depth -= 1;
424            if depth == 0 {
425                return true;
426            }
427            continue;
428        }
429        cursor.shift();
430    }
431}
432
433/// The Lexer class actually does the lexing step.
434#[derive(Debug, Clone)]
435pub struct Lexer {
436    syntax_map: Vec<(&'static str, SyntaxKind)>,
437    regex: regex_automata::meta::Regex,
438    matchers: Vec<Matcher>,
439    last_resort_lexer: Matcher,
440}
441
442impl<'a> From<&'a Dialect> for Lexer {
443    fn from(dialect: &'a Dialect) -> Self {
444        Lexer::new(dialect.lexer_matchers())
445    }
446}
447
448impl Lexer {
449    /// Create a new lexer.
450    pub(crate) fn new(lexer_matchers: &[Matcher]) -> Self {
451        let mut patterns = Vec::new();
452        let mut syntax_map = Vec::new();
453        let mut matchers = Vec::new();
454
455        for matcher in lexer_matchers {
456            match matcher.pattern.kind {
457                SearchPatternKind::String(pattern) | SearchPatternKind::Regex(pattern) => {
458                    let pattern = if matches!(matcher.pattern.kind, SearchPatternKind::String(_)) {
459                        fancy_regex::escape(pattern)
460                    } else {
461                        pattern.into()
462                    };
463
464                    patterns.push(pattern);
465                    syntax_map.push((matcher.pattern.name, matcher.pattern.syntax_kind));
466                }
467                SearchPatternKind::Legacy(_, _) | SearchPatternKind::Native(_) => {
468                    matchers.push(matcher.clone());
469                }
470            }
471        }
472
473        Lexer {
474            syntax_map,
475            matchers,
476            regex: regex_automata::meta::Regex::new_many(&patterns).unwrap(),
477            last_resort_lexer: Matcher::legacy(
478                "<unlexable>",
479                |_| true,
480                r"[^\t\n.]*",
481                SyntaxKind::Unlexable,
482            ),
483        }
484    }
485
486    pub fn lex(
487        &self,
488        tables: &Tables,
489        template: impl Into<TemplatedFile>,
490    ) -> (Vec<ErasedSegment>, Vec<SQLLexError>) {
491        let template = template.into();
492        let mut str_buff = template.templated_str.as_deref().unwrap();
493
494        // Lex the string to get a tuple of LexedElement
495        let mut element_buffer: Vec<Element> = Vec::new();
496
497        loop {
498            let mut res = self.lex_match(str_buff);
499            element_buffer.append(&mut res.elements);
500
501            if res.forward_string.is_empty() {
502                break;
503            }
504
505            // If we STILL can't match, then just panic out.
506            let mut resort_res = self.last_resort_lexer.matches(str_buff);
507            if !resort_res.elements.is_empty() {
508                break;
509            }
510
511            str_buff = resort_res.forward_string;
512            element_buffer.append(&mut resort_res.elements);
513        }
514
515        // Map tuple LexedElement to list of TemplateElement.
516        // This adds the template_slice to the object.
517        let templated_buffer = Lexer::map_template_slices(element_buffer, &template);
518        // Turn lexed elements into segments.
519        let mut segments = self.elements_to_segments(templated_buffer, &template);
520
521        for seg in &mut segments {
522            seg.get_mut().set_id(tables.next_id())
523        }
524
525        (segments, Vec::new())
526    }
527
528    /// Generate any lexing errors for any un-lex-ables.
529    ///
530    /// TODO: Taking in an iterator, also can make the typing better than use
531    /// unwrap.
532    #[allow(dead_code)]
533    fn violations_from_segments(segments: Vec<ErasedSegment>) -> Vec<SQLLexError> {
534        segments
535            .into_iter()
536            .filter(|s| s.is_type(SyntaxKind::Unlexable))
537            .map(|s| {
538                SQLLexError::new(
539                    format!(
540                        "Unable to lex characters: {}",
541                        s.raw().chars().take(10).collect::<String>()
542                    ),
543                    s.get_position_marker().unwrap().clone(),
544                )
545            })
546            .collect()
547    }
548
549    /// Iteratively match strings using the selection of sub-matchers.
550    fn lex_match<'b>(&self, mut forward_string: &'b str) -> Match<'b> {
551        let mut elem_buff = Vec::new();
552
553        'main: loop {
554            if forward_string.is_empty() {
555                return Match {
556                    forward_string,
557                    elements: elem_buff,
558                };
559            }
560
561            for matcher in &self.matchers {
562                let mut match_result = matcher.matches(forward_string);
563
564                if !match_result.elements.is_empty() {
565                    elem_buff.append(&mut match_result.elements);
566                    forward_string = match_result.forward_string;
567                    continue 'main;
568                }
569            }
570
571            let input =
572                regex_automata::Input::new(forward_string).anchored(regex_automata::Anchored::Yes);
573
574            if let Some(match_) = self.regex.find(input) {
575                let (name, kind) = self.syntax_map[match_.pattern().as_usize()];
576
577                elem_buff.push(Element::new(
578                    name,
579                    kind,
580                    &forward_string[match_.start()..match_.end()],
581                ));
582                forward_string = &forward_string[match_.end()..];
583
584                continue 'main;
585            }
586
587            return Match {
588                forward_string,
589                elements: elem_buff,
590            };
591        }
592    }
593
594    /// Create a tuple of TemplateElement from a tuple of LexedElement.
595    ///
596    /// This adds slices in the templated file to the original lexed
597    /// elements. We'll need this to work out the position in the source
598    /// file.
599    /// TODO Can this vec be turned into an iterator and return iterator to make
600    /// lazy?
601    fn map_template_slices<'b>(
602        elements: Vec<Element<'b>>,
603        template: &TemplatedFile,
604    ) -> Vec<TemplateElement<'b>> {
605        let mut idx = 0;
606        let mut templated_buff: Vec<TemplateElement> = Vec::with_capacity(elements.len());
607
608        for element in elements {
609            let template_slice = offset_slice(idx, element.text.len());
610            idx += element.text.len();
611
612            let templated_string = template.templated();
613            if templated_string[template_slice.clone()] != element.text {
614                panic!(
615                    "Template and lexed elements do not match. This should never happen {:?} != \
616                     {:?}",
617                    element.text, &templated_string[template_slice]
618                );
619            }
620
621            templated_buff.push(TemplateElement::from_element(element, template_slice));
622        }
623
624        templated_buff
625    }
626
627    /// Convert a tuple of lexed elements into a tuple of segments.
628    fn elements_to_segments(
629        &self,
630        elements: Vec<TemplateElement>,
631        templated_file: &TemplatedFile,
632    ) -> Vec<ErasedSegment> {
633        let mut segments = iter_segments(elements, templated_file);
634
635        // Add an end of file marker
636        let position_maker = match segments.last() {
637            Some(segment) => segment.get_position_marker().unwrap().end_point_marker(),
638            None => PositionMarker::from_point(0, 0, templated_file.clone(), None, None),
639        };
640
641        segments.push(
642            SegmentBuilder::token(0, "", SyntaxKind::EndOfFile)
643                .with_position(position_maker)
644                .finish(),
645        );
646
647        segments
648    }
649}
650
651fn iter_segments(
652    lexed_elements: Vec<TemplateElement>,
653    templated_file: &TemplatedFile,
654) -> Vec<ErasedSegment> {
655    let mut result: Vec<ErasedSegment> = Vec::with_capacity(lexed_elements.len());
656    // An index to track where we've got to in the templated file.
657    let mut tfs_idx = 0;
658    // We keep a map of previous block locations in case they re-occur.
659    // let block_stack = BlockTracker()
660    let templated_file_slices = &templated_file.sliced_file;
661
662    // Now work out source slices, and add in template placeholders.
663    for element in lexed_elements {
664        let consumed_element_length = 0;
665        let mut stashed_source_idx = None;
666
667        for (idx, tfs) in templated_file_slices
668            .iter()
669            .skip(tfs_idx)
670            .enumerate()
671            .map(|(i, tfs)| (i + tfs_idx, tfs))
672        {
673            // Is it a zero slice?
674            if is_zero_slice(&tfs.templated_slice) {
675                let _slice = if idx + 1 < templated_file_slices.len() {
676                    templated_file_slices[idx + 1].clone().into()
677                } else {
678                    None
679                };
680
681                continue;
682            }
683
684            if tfs.slice_type == "literal" {
685                let tfs_offset =
686                    (tfs.source_slice.start as isize) - (tfs.templated_slice.start as isize);
687
688                // NOTE: Greater than OR EQUAL, to include the case of it matching
689                // length exactly.
690                if element.template_slice.end <= tfs.templated_slice.end {
691                    let slice_start = stashed_source_idx.unwrap_or_else(|| {
692                        let sum = element.template_slice.start as isize
693                            + consumed_element_length as isize
694                            + tfs_offset;
695                        if sum < 0 {
696                            panic!("Slice start is negative: {sum}");
697                        }
698                        sum.try_into()
699                            .unwrap_or_else(|_| panic!("Cannot convert {sum} to usize"))
700                    });
701
702                    let source_slice_end =
703                        (element.template_slice.end as isize + tfs_offset) as usize;
704                    result.push(element.to_segment(
705                        PositionMarker::new(
706                            slice_start..source_slice_end,
707                            element.template_slice.clone(),
708                            templated_file.clone(),
709                            None,
710                            None,
711                        ),
712                        Some(consumed_element_length..element.raw.len()),
713                    ));
714
715                    // If it was an exact match, consume the templated element too.
716                    if element.template_slice.end == tfs.templated_slice.end {
717                        tfs_idx += 1
718                    }
719                    // In any case, we're done with this element. Move on
720                    break;
721                } else if element.template_slice.start >= tfs.templated_slice.end {
722                    // Element starts at or after this slice ends - skip to next slice.
723                    // This can happen when zero-length slices exist (e.g., stripped
724                    // whitespace from Jinja comments like {#- ... #}).
725                    log::debug!("Element starts at or after slice end, skipping");
726                    continue;
727                } else {
728                    // This means that the current lexed element spans across
729                    // multiple templated file slices.
730
731                    log::debug!("Consuming whole spanning literal",);
732
733                    // This almost certainly means there's a templated element
734                    // in the middle of a whole lexed element.
735
736                    // What we do here depends on whether we're allowed to split
737                    // lexed elements. This is basically only true if it's whitespace.
738                    // NOTE: We should probably make this configurable on the
739                    // matcher object, but for now we're going to look for the
740                    // name of the lexer.
741                    if element.matcher.name == "whitespace" {
742                        if stashed_source_idx.is_some() {
743                            panic!("Found literal whitespace with stashed idx!")
744                        }
745
746                        let incremental_length =
747                            tfs.templated_slice.end - element.template_slice.start;
748
749                        let source_slice_start = element.template_slice.start as isize
750                            + consumed_element_length as isize
751                            + tfs_offset;
752                        let source_slice_start =
753                            source_slice_start.try_into().unwrap_or_else(|_| {
754                                panic!("Cannot convert {source_slice_start} to usize")
755                            });
756                        let source_slice_end =
757                            source_slice_start as isize + incremental_length as isize;
758                        let source_slice_end = source_slice_end.try_into().unwrap_or_else(|_| {
759                            panic!("Cannot convert {source_slice_end} to usize")
760                        });
761
762                        result.push(element.to_segment(
763                            PositionMarker::new(
764                                source_slice_start..source_slice_end,
765                                element.template_slice.clone(),
766                                templated_file.clone(),
767                                None,
768                                None,
769                            ),
770                            offset_slice(consumed_element_length, incremental_length).into(),
771                        ));
772                        // Continue to the next slice to process remaining whitespace
773                        continue;
774                    } else {
775                        // We can't split it. We're going to end up yielding a segment
776                        // which spans multiple slices. Stash the type, and if we haven't
777                        // set the start yet, stash it too.
778                        log::debug!("Spilling over literal slice.");
779                        if stashed_source_idx.is_none() {
780                            stashed_source_idx = (element.template_slice.start + idx).into();
781                            log::debug!("Stashing a source start. {stashed_source_idx:?}");
782                        }
783                        // Continue to next slice regardless of whether we stashed
784                        continue;
785                    }
786                }
787            } else if matches!(tfs.slice_type.as_str(), "templated" | "block_start") {
788                // Found a templated slice. Does it have length in the templated file?
789                // If it doesn't, then we'll pick it up next.
790                if !is_zero_slice(&tfs.templated_slice) {
791                    // If it's a block_start. Append to the block stack.
792                    // NOTE: This is rare, but call blocks do occasionally
793                    // have length (and so don't get picked up by
794                    // _handle_zero_length_slice)
795                    if tfs.slice_type == "block_start" {
796                        unimplemented!()
797                        // block_stack.enter(tfs.source_slice)
798                    }
799
800                    // Is our current element totally contained in this slice?
801                    if element.template_slice.end <= tfs.templated_slice.end {
802                        log::debug!("Contained templated slice.");
803                        // Yes it is. Add lexed element with source slices as the whole
804                        // span of the source slice for the file slice.
805                        // If we've got an existing stashed source start, use that
806                        // as the start of the source slice.
807                        let slice_start = if let Some(stashed_source_idx) = stashed_source_idx {
808                            stashed_source_idx
809                        } else {
810                            tfs.source_slice.start + consumed_element_length
811                        };
812
813                        result.push(element.to_segment(
814                            PositionMarker::new(
815                                slice_start..tfs.source_slice.end,
816                                element.template_slice.clone(),
817                                templated_file.clone(),
818                                None,
819                                None,
820                            ),
821                            Some(consumed_element_length..element.raw.len()),
822                        ));
823
824                        // If it was an exact match, consume the templated element too.
825                        if element.template_slice.end == tfs.templated_slice.end {
826                            tfs_idx += 1
827                        }
828                        // Carry on to the next lexed element
829                        break;
830                    } else {
831                        // We've got an element which extends beyond this templated slice.
832                        // This means that a _single_ lexed element claims both some
833                        // templated elements and some non-templated elements. That could
834                        // include all kinds of things (and from here we don't know what
835                        // else is yet to come, comments, blocks, literals etc...).
836
837                        // In the `literal` version of this code we would consider
838                        // splitting the literal element here, but in the templated
839                        // side we don't. That's because the way that templated tokens
840                        // are lexed, means that they should arrive "pre-split".
841
842                        // Stash the source idx for later when we do make a segment.
843                        if stashed_source_idx.is_none() {
844                            stashed_source_idx = Some(tfs.source_slice.start);
845                            continue;
846                        }
847                        // Move on to the next template slice
848                        continue;
849                    }
850                }
851            }
852            panic!("Unable to process slice: {tfs:?}");
853        }
854    }
855    result
856}
857
858#[cfg(test)]
859mod tests {
860    use super::*;
861
862    /// Assert that a matcher does or doesn't work on a string.
863    ///
864    /// The optional `matchstring` argument, which can optionally
865    /// be None, allows to either test positive matching of a
866    /// particular string or negative matching (that it explicitly)
867    /// doesn't match.
868    fn assert_matches(in_string: &str, matcher: &Matcher, match_string: Option<&str>) {
869        let res = matcher.matches(in_string);
870        if let Some(match_string) = match_string {
871            assert_eq!(res.forward_string, &in_string[match_string.len()..]);
872            assert_eq!(res.elements.len(), 1);
873            assert_eq!(res.elements[0].text, match_string);
874        } else {
875            assert_eq!(res.forward_string, in_string);
876            assert_eq!(res.elements.len(), 0);
877        }
878    }
879
880    #[test]
881    fn test_parser_lexer_trim_post_subdivide() {
882        let matcher: Vec<Matcher> = vec![
883            Matcher::legacy(
884                "function_script_terminator",
885                |_| true,
886                r";\s+(?!\*)\/(?!\*)|\s+(?!\*)\/(?!\*)",
887                SyntaxKind::StatementTerminator,
888            )
889            .subdivider(Pattern::string("semicolon", ";", SyntaxKind::Semicolon))
890            .post_subdivide(Pattern::legacy(
891                "newline",
892                |_| true,
893                r"(\n|\r\n)+",
894                SyntaxKind::Newline,
895            )),
896        ];
897
898        let res = Lexer::new(&matcher).lex_match(";\n/\n");
899        assert_eq!(res.elements[0].text, ";");
900        assert_eq!(res.elements[1].text, "\n");
901        assert_eq!(res.elements[2].text, "/");
902        assert_eq!(res.elements.len(), 3);
903    }
904
905    /// Test the RegexLexer.
906    #[test]
907    fn test_parser_lexer_regex() {
908        let tests = &[
909            ("fsaljk", "f", "f"),
910            ("fsaljk", r"f", "f"),
911            ("fsaljk", r"[fas]*", "fsa"),
912            // Matching whitespace segments
913            ("   \t   fsaljk", r"[^\S\r\n]*", "   \t   "),
914            // Matching whitespace segments (with a newline)
915            ("   \t \n  fsaljk", r"[^\S\r\n]*", "   \t "),
916            // Matching quotes containing stuff
917            (
918                "'something boring'   \t \n  fsaljk",
919                r"'[^']*'",
920                "'something boring'",
921            ),
922            (
923                "' something exciting \t\n '   \t \n  fsaljk",
924                r"'[^']*'",
925                "' something exciting \t\n '",
926            ),
927        ];
928
929        for (raw, reg, res) in tests {
930            let matcher = Matcher::legacy("test", |_| true, reg, SyntaxKind::Word);
931
932            assert_matches(raw, &matcher, Some(res));
933        }
934    }
935
936    /// Test the lexer string
937    #[test]
938    fn test_parser_lexer_string() {
939        let matcher = Matcher::string("dot", ".", SyntaxKind::Dot);
940
941        assert_matches(".fsaljk", &matcher, Some("."));
942        assert_matches("fsaljk", &matcher, None);
943    }
944
945    /// Test the RepeatedMultiMatcher
946    #[test]
947    fn test_parser_lexer_lex_match() {
948        let matchers: Vec<Matcher> = vec![
949            Matcher::string("dot", ".", SyntaxKind::Dot),
950            Matcher::regex("test", "#[^#]*#", SyntaxKind::Dash),
951        ];
952
953        let res = Lexer::new(&matchers).lex_match("..#..#..#");
954
955        assert_eq!(res.forward_string, "#");
956        assert_eq!(res.elements.len(), 5);
957        assert_eq!(res.elements[2].text, "#..#");
958    }
959}