oak_core/lexer/
scan_string.rs

1use crate::{SyntaxKind, Token};
2use std::range::Range;
3
4/// Configuration for string scanning
5#[derive(Debug, Clone)]
6pub struct StringConfig {
7    /// Quote characters that can start/end strings
8    pub quotes: &'static [char],
9    /// Custom escape characters (default is backslash)
10    pub escape: Option<char>,
11}
12
13/// Configuration for multiline string scanning
14pub struct StringMultilineConfig {
15    /// Opening delimiters for multiline strings (e.g., ["\"\"\"", "'''"])
16    pub open_delimiters: &'static [&'static str],
17    /// Closing delimiters for multiline strings (e.g., ["\"\"\"", "'''"])
18    pub close_delimiters: &'static [&'static str],
19    /// Whether to allow escape sequences in multiline strings
20    pub escape: Option<char>,
21}
22
23impl Default for StringConfig {
24    fn default() -> Self {
25        Self { quotes: &['"'], escape: Some('\\') }
26    }
27}
28
29impl Default for StringMultilineConfig {
30    fn default() -> Self {
31        Self { open_delimiters: &["\"\"\"", "'''"], close_delimiters: &["\"\"\"", "'''"], escape: Some('\\') }
32    }
33}
34
35impl StringConfig {
36    /// Scan for a string at the given position
37    ///
38    /// # Arguments
39    ///
40    /// * `view` - The text view to scan
41    /// * `start` - The starting byte position
42    /// * `kind` - The token kind to assign to the string
43    ///
44    /// # Returns
45    ///
46    /// A token if a string is found, `None` otherwise
47    pub fn scan<K: SyntaxKind>(&self, view: &str, start: usize, kind: K) -> Option<Token<K>> {
48        for quote in self.quotes {
49            if view.starts_with(*quote) {
50                let end_index = view[start + 1..].find(*quote);
51                if let Some(end_index) = end_index {
52                    return Some(Token { kind, span: Range { start, end: start + 1 + end_index } });
53                }
54            }
55        }
56        None
57    }
58}
59impl StringMultilineConfig {
60    /// Scan for a multiline string at the given position
61    ///
62    /// # Arguments
63    ///
64    /// * `view` - The text view to scan
65    /// * `start` - The starting byte position
66    /// * `kind` - The token kind to assign to the string
67    ///
68    /// # Returns
69    ///
70    /// A token if a multiline string is found, `None` otherwise
71    pub fn scan<K: SyntaxKind>(&self, view: &str, start: usize, kind: K) -> Option<Token<K>> {
72        let remaining = &view[start..];
73
74        // Try each opening delimiter
75        for (i, open_delim) in self.open_delimiters.iter().enumerate() {
76            if remaining.starts_with(open_delim) {
77                let close_delim = self.close_delimiters[i];
78                let mut pos = open_delim.len();
79
80                // Scan until we find the closing delimiter
81                while pos < remaining.len() {
82                    // Check for closing delimiter
83                    if remaining[pos..].starts_with(close_delim) {
84                        pos += close_delim.len();
85                        return Some(Token { kind, span: Range { start, end: start + pos } });
86                    }
87
88                    // Handle escape sequences if enabled
89                    if let Some(escape_char) = self.escape {
90                        if remaining[pos..].starts_with(escape_char) && pos + 1 < remaining.len() {
91                            // Skip the escape character and the next character
92                            pos += 2;
93                            continue;
94                        }
95                    }
96
97                    // Move to next character
98                    let ch = remaining[pos..].chars().next().unwrap_or('\0');
99                    pos += ch.len_utf8();
100                }
101
102                // If we reach here, we didn't find a closing delimiter
103                // Return the token anyway, but it will be incomplete
104                return Some(Token { kind, span: Range { start, end: start + remaining.len() } });
105            }
106        }
107
108        None
109    }
110}