1use std::borrow::Cow;
2use std::fmt::Debug;
3use std::ops::Range;
4use std::str::Chars;
5
6use super::markers::PositionMarker;
7use super::segments::{ErasedSegment, SegmentBuilder, Tables};
8use crate::dialects::Dialect;
9use crate::dialects::syntax::SyntaxKind;
10use crate::errors::SQLLexError;
11use crate::slice_helpers::{is_zero_slice, offset_slice};
12use crate::templaters::TemplatedFile;
13
14#[derive(Debug, Clone)]
16pub struct Element<'a> {
17 name: &'static str,
18 text: Cow<'a, str>,
19 syntax_kind: SyntaxKind,
20}
21
22impl<'a> Element<'a> {
23 fn new(name: &'static str, syntax_kind: SyntaxKind, text: impl Into<Cow<'a, str>>) -> Self {
24 Self {
25 name,
26 syntax_kind,
27 text: text.into(),
28 }
29 }
30}
31
32#[derive(Debug)]
34pub struct TemplateElement<'a> {
35 raw: Cow<'a, str>,
36 template_slice: Range<usize>,
37 matcher: Info,
38}
39
40#[derive(Debug)]
41struct Info {
42 name: &'static str,
43 syntax_kind: SyntaxKind,
44}
45
46impl<'a> TemplateElement<'a> {
47 pub fn from_element(element: Element<'a>, template_slice: Range<usize>) -> Self {
49 TemplateElement {
50 raw: element.text,
51 template_slice,
52 matcher: Info {
53 name: element.name,
54 syntax_kind: element.syntax_kind,
55 },
56 }
57 }
58
59 pub fn to_segment(
60 &self,
61 pos_marker: PositionMarker,
62 subslice: Option<Range<usize>>,
63 ) -> ErasedSegment {
64 let slice = subslice.map_or_else(|| self.raw.as_ref(), |slice| &self.raw[slice]);
65 SegmentBuilder::token(0, slice, self.matcher.syntax_kind)
66 .with_position(pos_marker)
67 .finish()
68 }
69}
70
71#[derive(Debug)]
73pub struct Match<'a> {
74 pub forward_string: &'a str,
75 pub elements: Vec<Element<'a>>,
76}
77
78#[derive(Debug, Clone)]
79pub struct Matcher {
80 pattern: Pattern,
81 subdivider: Option<Pattern>,
82 trim_post_subdivide: Option<Pattern>,
83}
84
85impl Matcher {
86 pub const fn new(pattern: Pattern) -> Self {
87 Self {
88 pattern,
89 subdivider: None,
90 trim_post_subdivide: None,
91 }
92 }
93
94 pub const fn string(
95 name: &'static str,
96 pattern: &'static str,
97 syntax_kind: SyntaxKind,
98 ) -> Self {
99 Self::new(Pattern::string(name, pattern, syntax_kind))
100 }
101
102 #[track_caller]
103 pub fn regex(name: &'static str, pattern: &'static str, syntax_kind: SyntaxKind) -> Self {
104 Self::new(Pattern::regex(name, pattern, syntax_kind))
105 }
106
107 pub fn native(name: &'static str, f: fn(&mut Cursor) -> bool, syntax_kind: SyntaxKind) -> Self {
108 Self::new(Pattern::native(name, f, syntax_kind))
109 }
110
111 #[track_caller]
112 pub fn legacy(
113 name: &'static str,
114 starts_with: fn(&str) -> bool,
115 pattern: &'static str,
116 syntax_kind: SyntaxKind,
117 ) -> Self {
118 Self::new(Pattern::legacy(name, starts_with, pattern, syntax_kind))
119 }
120
121 pub fn subdivider(mut self, subdivider: Pattern) -> Self {
122 assert!(matches!(
123 self.pattern.kind,
124 SearchPatternKind::Legacy(_, _) | SearchPatternKind::Native(_)
125 ));
126 self.subdivider = Some(subdivider);
127 self
128 }
129
130 pub fn post_subdivide(mut self, trim_post_subdivide: Pattern) -> Self {
131 assert!(matches!(
132 self.pattern.kind,
133 SearchPatternKind::Legacy(_, _) | SearchPatternKind::Native(_)
134 ));
135 self.trim_post_subdivide = Some(trim_post_subdivide);
136 self
137 }
138
139 pub fn name(&self) -> &'static str {
140 self.pattern.name
141 }
142
143 #[track_caller]
144 pub fn matches<'a>(&self, forward_string: &'a str) -> Match<'a> {
145 match self.pattern.matches(forward_string) {
146 Some(matched) => {
147 let new_elements = self.subdivide(matched, self.pattern.syntax_kind);
148
149 Match {
150 forward_string: &forward_string[matched.len()..],
151 elements: new_elements,
152 }
153 }
154 None => Match {
155 forward_string,
156 elements: Vec::new(),
157 },
158 }
159 }
160
161 fn subdivide<'a>(&self, matched: &'a str, matched_kind: SyntaxKind) -> Vec<Element<'a>> {
162 match &self.subdivider {
163 Some(subdivider) => {
164 let mut elem_buff = Vec::new();
165 let mut str_buff = matched;
166
167 while !str_buff.is_empty() {
168 let Some(div_pos) = subdivider.search(str_buff) else {
169 let mut trimmed_elems = self.trim_match(str_buff);
170 elem_buff.append(&mut trimmed_elems);
171 break;
172 };
173
174 let mut trimmed_elems = self.trim_match(&str_buff[..div_pos.start]);
175 let div_elem = Element::new(
176 subdivider.name,
177 subdivider.syntax_kind,
178 &str_buff[div_pos.start..div_pos.end],
179 );
180
181 elem_buff.append(&mut trimmed_elems);
182 elem_buff.push(div_elem);
183
184 str_buff = &str_buff[div_pos.end..];
185 }
186
187 elem_buff
188 }
189 None => {
190 vec![Element::new(self.name(), matched_kind, matched)]
191 }
192 }
193 }
194
195 fn trim_match<'a>(&self, matched_str: &'a str) -> Vec<Element<'a>> {
196 let Some(trim_post_subdivide) = &self.trim_post_subdivide else {
197 return Vec::new();
198 };
199
200 let mk_element = |text| {
201 Element::new(
202 trim_post_subdivide.name,
203 trim_post_subdivide.syntax_kind,
204 text,
205 )
206 };
207
208 let mut elem_buff = Vec::new();
209 let mut content_buff = String::new();
210 let mut str_buff = matched_str;
211
212 while !str_buff.is_empty() {
213 let Some(trim_pos) = trim_post_subdivide.search(str_buff) else {
214 break;
215 };
216
217 let start = trim_pos.start;
218 let end = trim_pos.end;
219
220 if start == 0 {
221 elem_buff.push(mk_element(&str_buff[..end]));
222 str_buff = str_buff[end..].into();
223 } else if end == str_buff.len() {
224 let raw = format!("{}{}", content_buff, &str_buff[..start]);
225
226 elem_buff.push(Element::new(
227 trim_post_subdivide.name,
228 trim_post_subdivide.syntax_kind,
229 raw,
230 ));
231 elem_buff.push(mk_element(&str_buff[start..end]));
232
233 content_buff.clear();
234 str_buff = "";
235 } else {
236 content_buff.push_str(&str_buff[..end]);
237 str_buff = &str_buff[end..];
238 }
239 }
240
241 if !content_buff.is_empty() || !str_buff.is_empty() {
242 let raw = format!("{content_buff}{str_buff}");
243 elem_buff.push(Element::new(
244 self.pattern.name,
245 self.pattern.syntax_kind,
246 raw,
247 ));
248 }
249
250 elem_buff
251 }
252}
253
254#[derive(Debug, Clone)]
255pub struct Pattern {
256 name: &'static str,
257 syntax_kind: SyntaxKind,
258 kind: SearchPatternKind,
259}
260
261#[derive(Debug, Clone)]
262pub enum SearchPatternKind {
263 String(&'static str),
264 Regex(&'static str),
265 Native(fn(&mut Cursor) -> bool),
266 Legacy(fn(&str) -> bool, fancy_regex::Regex),
267}
268
269impl Pattern {
270 pub const fn string(
271 name: &'static str,
272 template: &'static str,
273 syntax_kind: SyntaxKind,
274 ) -> Self {
275 Self {
276 name,
277 syntax_kind,
278 kind: SearchPatternKind::String(template),
279 }
280 }
281
282 #[track_caller]
283 pub fn regex(name: &'static str, regex: &'static str, syntax_kind: SyntaxKind) -> Self {
284 #[cfg(debug_assertions)]
285 if regex_automata::dfa::regex::Regex::new(regex).is_err() {
286 panic!("Invalid regex pattern: {}", std::panic::Location::caller());
287 }
288
289 Self {
290 name,
291 syntax_kind,
292 kind: SearchPatternKind::Regex(regex),
293 }
294 }
295
296 pub fn native(name: &'static str, f: fn(&mut Cursor) -> bool, syntax_kind: SyntaxKind) -> Self {
297 Self {
298 name,
299 syntax_kind,
300 kind: SearchPatternKind::Native(f),
301 }
302 }
303
304 #[track_caller]
305 pub fn legacy(
306 name: &'static str,
307 starts_with: fn(&str) -> bool,
308 regex: &'static str,
309 syntax_kind: SyntaxKind,
310 ) -> Self {
311 let regex = format!("^{regex}");
312 Self {
313 name,
314 syntax_kind,
315 kind: SearchPatternKind::Legacy(starts_with, fancy_regex::Regex::new(®ex).unwrap()),
316 }
317 }
318
319 fn matches<'a>(&self, forward_string: &'a str) -> Option<&'a str> {
320 match self.kind {
321 SearchPatternKind::String(template) => {
322 if forward_string.starts_with(template) {
323 return Some(template);
324 }
325 }
326 SearchPatternKind::Legacy(f, ref template) => {
327 if !f(forward_string) {
328 return None;
329 }
330
331 if let Ok(Some(matched)) = template.find(forward_string)
332 && matched.start() == 0
333 {
334 return Some(matched.as_str());
335 }
336 }
337 SearchPatternKind::Native(f) => {
338 let mut cursor = Cursor::new(forward_string);
339 return f(&mut cursor).then(|| cursor.lexed());
340 }
341 _ => unreachable!(),
342 };
343
344 None
345 }
346
347 fn search(&self, forward_string: &str) -> Option<Range<usize>> {
348 match &self.kind {
349 SearchPatternKind::String(template) => forward_string
350 .find(template)
351 .map(|start| start..start + template.len()),
352 SearchPatternKind::Legacy(_, template) => {
353 if let Ok(Some(matched)) = template.find(forward_string) {
354 return Some(matched.range());
355 }
356 None
357 }
358 _ => unreachable!("{:?}", self.kind),
359 }
360 }
361}
362
363pub struct Cursor<'text> {
364 text: &'text str,
365 chars: Chars<'text>,
366}
367
368impl<'text> Cursor<'text> {
369 const EOF: char = '\0';
370
371 fn new(text: &'text str) -> Self {
372 Self {
373 text,
374 chars: text.chars(),
375 }
376 }
377
378 pub fn peek(&self) -> char {
379 self.chars.clone().next().unwrap_or(Self::EOF)
380 }
381
382 pub fn peek_next(&self) -> char {
383 self.chars.clone().nth(1).unwrap_or(Self::EOF)
384 }
385
386 pub fn shift(&mut self) -> char {
387 self.chars.next().unwrap_or(Self::EOF)
388 }
389
390 pub fn shift_while(&mut self, f: impl Fn(char) -> bool + Copy) {
391 while self.peek() != Self::EOF && f(self.peek()) {
392 self.shift();
393 }
394 }
395
396 fn lexed(&self) -> &'text str {
397 let len = self.text.len() - self.chars.as_str().len();
398 &self.text[..len]
399 }
400}
401
402pub fn nested_block_comment(cursor: &mut Cursor) -> bool {
403 if cursor.peek() != '/' || cursor.peek_next() != '*' {
404 return false;
405 }
406 cursor.shift();
407 cursor.shift();
408 let mut depth = 1;
409 loop {
410 let ch = cursor.peek();
411 if ch == Cursor::EOF {
412 return false;
413 }
414 if ch == '/' && cursor.peek_next() == '*' {
415 cursor.shift();
416 cursor.shift();
417 depth += 1;
418 continue;
419 }
420 if ch == '*' && cursor.peek_next() == '/' {
421 cursor.shift();
422 cursor.shift();
423 depth -= 1;
424 if depth == 0 {
425 return true;
426 }
427 continue;
428 }
429 cursor.shift();
430 }
431}
432
433#[derive(Debug, Clone)]
435pub struct Lexer {
436 syntax_map: Vec<(&'static str, SyntaxKind)>,
437 regex: regex_automata::meta::Regex,
438 matchers: Vec<Matcher>,
439 last_resort_lexer: Matcher,
440}
441
442impl<'a> From<&'a Dialect> for Lexer {
443 fn from(dialect: &'a Dialect) -> Self {
444 Lexer::new(dialect.lexer_matchers())
445 }
446}
447
448impl Lexer {
449 pub(crate) fn new(lexer_matchers: &[Matcher]) -> Self {
451 let mut patterns = Vec::new();
452 let mut syntax_map = Vec::new();
453 let mut matchers = Vec::new();
454
455 for matcher in lexer_matchers {
456 match matcher.pattern.kind {
457 SearchPatternKind::String(pattern) | SearchPatternKind::Regex(pattern) => {
458 let pattern = if matches!(matcher.pattern.kind, SearchPatternKind::String(_)) {
459 fancy_regex::escape(pattern)
460 } else {
461 pattern.into()
462 };
463
464 patterns.push(pattern);
465 syntax_map.push((matcher.pattern.name, matcher.pattern.syntax_kind));
466 }
467 SearchPatternKind::Legacy(_, _) | SearchPatternKind::Native(_) => {
468 matchers.push(matcher.clone());
469 }
470 }
471 }
472
473 Lexer {
474 syntax_map,
475 matchers,
476 regex: regex_automata::meta::Regex::new_many(&patterns).unwrap(),
477 last_resort_lexer: Matcher::legacy(
478 "<unlexable>",
479 |_| true,
480 r"[^\t\n.]*",
481 SyntaxKind::Unlexable,
482 ),
483 }
484 }
485
486 pub fn lex(
487 &self,
488 tables: &Tables,
489 template: impl Into<TemplatedFile>,
490 ) -> (Vec<ErasedSegment>, Vec<SQLLexError>) {
491 let template = template.into();
492 let mut str_buff = template.templated_str.as_deref().unwrap();
493
494 let mut element_buffer: Vec<Element> = Vec::new();
496
497 loop {
498 let mut res = self.lex_match(str_buff);
499 element_buffer.append(&mut res.elements);
500
501 if res.forward_string.is_empty() {
502 break;
503 }
504
505 let mut resort_res = self.last_resort_lexer.matches(str_buff);
507 if !resort_res.elements.is_empty() {
508 break;
509 }
510
511 str_buff = resort_res.forward_string;
512 element_buffer.append(&mut resort_res.elements);
513 }
514
515 let templated_buffer = Lexer::map_template_slices(element_buffer, &template);
518 let mut segments = self.elements_to_segments(templated_buffer, &template);
520
521 for seg in &mut segments {
522 seg.get_mut().set_id(tables.next_id())
523 }
524
525 (segments, Vec::new())
526 }
527
528 #[allow(dead_code)]
533 fn violations_from_segments(segments: Vec<ErasedSegment>) -> Vec<SQLLexError> {
534 segments
535 .into_iter()
536 .filter(|s| s.is_type(SyntaxKind::Unlexable))
537 .map(|s| {
538 SQLLexError::new(
539 format!(
540 "Unable to lex characters: {}",
541 s.raw().chars().take(10).collect::<String>()
542 ),
543 s.get_position_marker().unwrap().clone(),
544 )
545 })
546 .collect()
547 }
548
549 fn lex_match<'b>(&self, mut forward_string: &'b str) -> Match<'b> {
551 let mut elem_buff = Vec::new();
552
553 'main: loop {
554 if forward_string.is_empty() {
555 return Match {
556 forward_string,
557 elements: elem_buff,
558 };
559 }
560
561 for matcher in &self.matchers {
562 let mut match_result = matcher.matches(forward_string);
563
564 if !match_result.elements.is_empty() {
565 elem_buff.append(&mut match_result.elements);
566 forward_string = match_result.forward_string;
567 continue 'main;
568 }
569 }
570
571 let input =
572 regex_automata::Input::new(forward_string).anchored(regex_automata::Anchored::Yes);
573
574 if let Some(match_) = self.regex.find(input) {
575 let (name, kind) = self.syntax_map[match_.pattern().as_usize()];
576
577 elem_buff.push(Element::new(
578 name,
579 kind,
580 &forward_string[match_.start()..match_.end()],
581 ));
582 forward_string = &forward_string[match_.end()..];
583
584 continue 'main;
585 }
586
587 return Match {
588 forward_string,
589 elements: elem_buff,
590 };
591 }
592 }
593
594 fn map_template_slices<'b>(
602 elements: Vec<Element<'b>>,
603 template: &TemplatedFile,
604 ) -> Vec<TemplateElement<'b>> {
605 let mut idx = 0;
606 let mut templated_buff: Vec<TemplateElement> = Vec::with_capacity(elements.len());
607
608 for element in elements {
609 let template_slice = offset_slice(idx, element.text.len());
610 idx += element.text.len();
611
612 let templated_string = template.templated();
613 if templated_string[template_slice.clone()] != element.text {
614 panic!(
615 "Template and lexed elements do not match. This should never happen {:?} != \
616 {:?}",
617 element.text, &templated_string[template_slice]
618 );
619 }
620
621 templated_buff.push(TemplateElement::from_element(element, template_slice));
622 }
623
624 templated_buff
625 }
626
627 fn elements_to_segments(
629 &self,
630 elements: Vec<TemplateElement>,
631 templated_file: &TemplatedFile,
632 ) -> Vec<ErasedSegment> {
633 let mut segments = iter_segments(elements, templated_file);
634
635 let position_maker = match segments.last() {
637 Some(segment) => segment.get_position_marker().unwrap().end_point_marker(),
638 None => PositionMarker::from_point(0, 0, templated_file.clone(), None, None),
639 };
640
641 segments.push(
642 SegmentBuilder::token(0, "", SyntaxKind::EndOfFile)
643 .with_position(position_maker)
644 .finish(),
645 );
646
647 segments
648 }
649}
650
651fn iter_segments(
652 lexed_elements: Vec<TemplateElement>,
653 templated_file: &TemplatedFile,
654) -> Vec<ErasedSegment> {
655 let mut result: Vec<ErasedSegment> = Vec::with_capacity(lexed_elements.len());
656 let mut tfs_idx = 0;
658 let templated_file_slices = &templated_file.sliced_file;
661
662 for element in lexed_elements {
664 let consumed_element_length = 0;
665 let mut stashed_source_idx = None;
666
667 for (idx, tfs) in templated_file_slices
668 .iter()
669 .skip(tfs_idx)
670 .enumerate()
671 .map(|(i, tfs)| (i + tfs_idx, tfs))
672 {
673 if is_zero_slice(&tfs.templated_slice) {
675 let _slice = if idx + 1 < templated_file_slices.len() {
676 templated_file_slices[idx + 1].clone().into()
677 } else {
678 None
679 };
680
681 continue;
682 }
683
684 if tfs.slice_type == "literal" {
685 let tfs_offset =
686 (tfs.source_slice.start as isize) - (tfs.templated_slice.start as isize);
687
688 if element.template_slice.end <= tfs.templated_slice.end {
691 let slice_start = stashed_source_idx.unwrap_or_else(|| {
692 let sum = element.template_slice.start as isize
693 + consumed_element_length as isize
694 + tfs_offset;
695 if sum < 0 {
696 panic!("Slice start is negative: {sum}");
697 }
698 sum.try_into()
699 .unwrap_or_else(|_| panic!("Cannot convert {sum} to usize"))
700 });
701
702 let source_slice_end =
703 (element.template_slice.end as isize + tfs_offset) as usize;
704 result.push(element.to_segment(
705 PositionMarker::new(
706 slice_start..source_slice_end,
707 element.template_slice.clone(),
708 templated_file.clone(),
709 None,
710 None,
711 ),
712 Some(consumed_element_length..element.raw.len()),
713 ));
714
715 if element.template_slice.end == tfs.templated_slice.end {
717 tfs_idx += 1
718 }
719 break;
721 } else if element.template_slice.start >= tfs.templated_slice.end {
722 log::debug!("Element starts at or after slice end, skipping");
726 continue;
727 } else {
728 log::debug!("Consuming whole spanning literal",);
732
733 if element.matcher.name == "whitespace" {
742 if stashed_source_idx.is_some() {
743 panic!("Found literal whitespace with stashed idx!")
744 }
745
746 let incremental_length =
747 tfs.templated_slice.end - element.template_slice.start;
748
749 let source_slice_start = element.template_slice.start as isize
750 + consumed_element_length as isize
751 + tfs_offset;
752 let source_slice_start =
753 source_slice_start.try_into().unwrap_or_else(|_| {
754 panic!("Cannot convert {source_slice_start} to usize")
755 });
756 let source_slice_end =
757 source_slice_start as isize + incremental_length as isize;
758 let source_slice_end = source_slice_end.try_into().unwrap_or_else(|_| {
759 panic!("Cannot convert {source_slice_end} to usize")
760 });
761
762 result.push(element.to_segment(
763 PositionMarker::new(
764 source_slice_start..source_slice_end,
765 element.template_slice.clone(),
766 templated_file.clone(),
767 None,
768 None,
769 ),
770 offset_slice(consumed_element_length, incremental_length).into(),
771 ));
772 continue;
774 } else {
775 log::debug!("Spilling over literal slice.");
779 if stashed_source_idx.is_none() {
780 stashed_source_idx = (element.template_slice.start + idx).into();
781 log::debug!("Stashing a source start. {stashed_source_idx:?}");
782 }
783 continue;
785 }
786 }
787 } else if matches!(tfs.slice_type.as_str(), "templated" | "block_start") {
788 if !is_zero_slice(&tfs.templated_slice) {
791 if tfs.slice_type == "block_start" {
796 unimplemented!()
797 }
799
800 if element.template_slice.end <= tfs.templated_slice.end {
802 log::debug!("Contained templated slice.");
803 let slice_start = if let Some(stashed_source_idx) = stashed_source_idx {
808 stashed_source_idx
809 } else {
810 tfs.source_slice.start + consumed_element_length
811 };
812
813 result.push(element.to_segment(
814 PositionMarker::new(
815 slice_start..tfs.source_slice.end,
816 element.template_slice.clone(),
817 templated_file.clone(),
818 None,
819 None,
820 ),
821 Some(consumed_element_length..element.raw.len()),
822 ));
823
824 if element.template_slice.end == tfs.templated_slice.end {
826 tfs_idx += 1
827 }
828 break;
830 } else {
831 if stashed_source_idx.is_none() {
844 stashed_source_idx = Some(tfs.source_slice.start);
845 continue;
846 }
847 continue;
849 }
850 }
851 }
852 panic!("Unable to process slice: {tfs:?}");
853 }
854 }
855 result
856}
857
858#[cfg(test)]
859mod tests {
860 use super::*;
861
862 fn assert_matches(in_string: &str, matcher: &Matcher, match_string: Option<&str>) {
869 let res = matcher.matches(in_string);
870 if let Some(match_string) = match_string {
871 assert_eq!(res.forward_string, &in_string[match_string.len()..]);
872 assert_eq!(res.elements.len(), 1);
873 assert_eq!(res.elements[0].text, match_string);
874 } else {
875 assert_eq!(res.forward_string, in_string);
876 assert_eq!(res.elements.len(), 0);
877 }
878 }
879
880 #[test]
881 fn test_parser_lexer_trim_post_subdivide() {
882 let matcher: Vec<Matcher> = vec![
883 Matcher::legacy(
884 "function_script_terminator",
885 |_| true,
886 r";\s+(?!\*)\/(?!\*)|\s+(?!\*)\/(?!\*)",
887 SyntaxKind::StatementTerminator,
888 )
889 .subdivider(Pattern::string("semicolon", ";", SyntaxKind::Semicolon))
890 .post_subdivide(Pattern::legacy(
891 "newline",
892 |_| true,
893 r"(\n|\r\n)+",
894 SyntaxKind::Newline,
895 )),
896 ];
897
898 let res = Lexer::new(&matcher).lex_match(";\n/\n");
899 assert_eq!(res.elements[0].text, ";");
900 assert_eq!(res.elements[1].text, "\n");
901 assert_eq!(res.elements[2].text, "/");
902 assert_eq!(res.elements.len(), 3);
903 }
904
905 #[test]
907 fn test_parser_lexer_regex() {
908 let tests = &[
909 ("fsaljk", "f", "f"),
910 ("fsaljk", r"f", "f"),
911 ("fsaljk", r"[fas]*", "fsa"),
912 (" \t fsaljk", r"[^\S\r\n]*", " \t "),
914 (" \t \n fsaljk", r"[^\S\r\n]*", " \t "),
916 (
918 "'something boring' \t \n fsaljk",
919 r"'[^']*'",
920 "'something boring'",
921 ),
922 (
923 "' something exciting \t\n ' \t \n fsaljk",
924 r"'[^']*'",
925 "' something exciting \t\n '",
926 ),
927 ];
928
929 for (raw, reg, res) in tests {
930 let matcher = Matcher::legacy("test", |_| true, reg, SyntaxKind::Word);
931
932 assert_matches(raw, &matcher, Some(res));
933 }
934 }
935
936 #[test]
938 fn test_parser_lexer_string() {
939 let matcher = Matcher::string("dot", ".", SyntaxKind::Dot);
940
941 assert_matches(".fsaljk", &matcher, Some("."));
942 assert_matches("fsaljk", &matcher, None);
943 }
944
945 #[test]
947 fn test_parser_lexer_lex_match() {
948 let matchers: Vec<Matcher> = vec![
949 Matcher::string("dot", ".", SyntaxKind::Dot),
950 Matcher::regex("test", "#[^#]*#", SyntaxKind::Dash),
951 ];
952
953 let res = Lexer::new(&matchers).lex_match("..#..#..#");
954
955 assert_eq!(res.forward_string, "#");
956 assert_eq!(res.elements.len(), 5);
957 assert_eq!(res.elements[2].text, "#..#");
958 }
959}