oak_core/lexer/scan_string.rs
1use crate::{SyntaxKind, Token};
2use std::range::Range;
3
4/// Configuration for string scanning
5#[derive(Debug, Clone)]
6pub struct StringConfig {
7 /// Quote characters that can start/end strings
8 pub quotes: &'static [char],
9 /// Custom escape characters (default is backslash)
10 pub escape: Option<char>,
11}
12
13/// Configuration for multiline string scanning
14pub struct StringMultilineConfig {
15 /// Opening delimiters for multiline strings (e.g., ["\"\"\"", "'''"])
16 pub open_delimiters: &'static [&'static str],
17 /// Closing delimiters for multiline strings (e.g., ["\"\"\"", "'''"])
18 pub close_delimiters: &'static [&'static str],
19 /// Whether to allow escape sequences in multiline strings
20 pub escape: Option<char>,
21}
22
23impl Default for StringConfig {
24 fn default() -> Self {
25 Self { quotes: &['"'], escape: Some('\\') }
26 }
27}
28
29impl Default for StringMultilineConfig {
30 fn default() -> Self {
31 Self { open_delimiters: &["\"\"\"", "'''"], close_delimiters: &["\"\"\"", "'''"], escape: Some('\\') }
32 }
33}
34
35impl StringConfig {
36 /// Scan for a string at the given position
37 ///
38 /// # Arguments
39 ///
40 /// * `view` - The text view to scan
41 /// * `start` - The starting byte position
42 /// * `kind` - The token kind to assign to the string
43 ///
44 /// # Returns
45 ///
46 /// A token if a string is found, `None` otherwise
47 pub fn scan<K: SyntaxKind>(&self, view: &str, start: usize, kind: K) -> Option<Token<K>> {
48 for quote in self.quotes {
49 if view.starts_with(*quote) {
50 let end_index = view[start + 1..].find(*quote);
51 if let Some(end_index) = end_index {
52 return Some(Token { kind, span: Range { start, end: start + 1 + end_index } });
53 }
54 }
55 }
56 None
57 }
58}
59impl StringMultilineConfig {
60 /// Scan for a multiline string at the given position
61 ///
62 /// # Arguments
63 ///
64 /// * `view` - The text view to scan
65 /// * `start` - The starting byte position
66 /// * `kind` - The token kind to assign to the string
67 ///
68 /// # Returns
69 ///
70 /// A token if a multiline string is found, `None` otherwise
71 pub fn scan<K: SyntaxKind>(&self, view: &str, start: usize, kind: K) -> Option<Token<K>> {
72 let remaining = &view[start..];
73
74 // Try each opening delimiter
75 for (i, open_delim) in self.open_delimiters.iter().enumerate() {
76 if remaining.starts_with(open_delim) {
77 let close_delim = self.close_delimiters[i];
78 let mut pos = open_delim.len();
79
80 // Scan until we find the closing delimiter
81 while pos < remaining.len() {
82 // Check for closing delimiter
83 if remaining[pos..].starts_with(close_delim) {
84 pos += close_delim.len();
85 return Some(Token { kind, span: Range { start, end: start + pos } });
86 }
87
88 // Handle escape sequences if enabled
89 if let Some(escape_char) = self.escape {
90 if remaining[pos..].starts_with(escape_char) && pos + 1 < remaining.len() {
91 // Skip the escape character and the next character
92 pos += 2;
93 continue;
94 }
95 }
96
97 // Move to next character
98 let ch = remaining[pos..].chars().next().unwrap_or('\0');
99 pos += ch.len_utf8();
100 }
101
102 // If we reach here, we didn't find a closing delimiter
103 // Return the token anyway, but it will be incomplete
104 return Some(Token { kind, span: Range { start, end: start + remaining.len() } });
105 }
106 }
107
108 None
109 }
110}