sentinel_modsec/parser/
lexer.rs1use std::iter::Peekable;
4use std::str::Chars;
5
6#[derive(Debug, Clone)]
8pub struct Token {
9 pub kind: TokenKind,
11 pub line: usize,
13 pub column: usize,
15}
16
17#[derive(Debug, Clone, PartialEq)]
19pub enum TokenKind {
20 Directive(String),
22 Word(String),
24 QuotedString(String),
26 Comment,
28 Newline,
30 Eof,
32}
33
34pub struct Lexer<'a> {
36 input: Peekable<Chars<'a>>,
37 line: usize,
38 column: usize,
39 at_line_start: bool,
40}
41
42impl<'a> Lexer<'a> {
43 pub fn new(input: &'a str) -> Self {
45 Self {
46 input: input.chars().peekable(),
47 line: 1,
48 column: 1,
49 at_line_start: true,
50 }
51 }
52
53 pub fn peek(&mut self) -> Option<char> {
55 self.input.peek().copied()
56 }
57
58 fn advance(&mut self) -> Option<char> {
60 let c = self.input.next();
61 if let Some(ch) = c {
62 if ch == '\n' {
63 self.line += 1;
64 self.column = 1;
65 self.at_line_start = true;
66 } else {
67 self.column += 1;
68 if !ch.is_whitespace() {
69 self.at_line_start = false;
70 }
71 }
72 }
73 c
74 }
75
76 pub fn skip_whitespace(&mut self) {
78 loop {
79 match self.input.peek() {
80 Some(&' ') | Some(&'\t') => {
81 self.advance();
82 }
83 Some(&'\\') => {
84 let mut chars = self.input.clone();
86 chars.next(); if chars.peek() == Some(&'\n') {
88 self.advance(); self.advance(); } else if chars.peek() == Some(&'\r') {
92 chars.next();
93 if chars.peek() == Some(&'\n') {
94 self.advance(); self.advance(); self.advance(); } else {
98 break;
99 }
100 } else {
101 break;
102 }
103 }
104 _ => break,
105 }
106 }
107 }
108
109 fn skip_all_whitespace(&mut self) {
111 while let Some(&c) = self.input.peek() {
112 if c.is_whitespace() {
113 self.advance();
114 } else {
115 break;
116 }
117 }
118 }
119
120 pub fn next_token(&mut self) -> Option<Token> {
122 self.skip_whitespace();
123
124 let line = self.line;
125 let column = self.column;
126
127 match self.peek()? {
128 '\n' => {
129 self.advance();
130 Some(Token {
131 kind: TokenKind::Newline,
132 line,
133 column,
134 })
135 }
136 '#' => {
137 while let Some(c) = self.advance() {
139 if c == '\n' {
140 break;
141 }
142 }
143 Some(Token {
144 kind: TokenKind::Comment,
145 line,
146 column,
147 })
148 }
149 '"' | '\'' => {
150 let quote = self.advance().unwrap();
151 let s = self.read_quoted_string(quote);
152 Some(Token {
153 kind: TokenKind::QuotedString(s),
154 line,
155 column,
156 })
157 }
158 '\\' => {
159 self.advance();
161 if self.peek() == Some('\n') {
162 self.advance();
163 }
164 self.next_token()
165 }
166 _ => {
167 let was_at_line_start = self.at_line_start;
169 let word = self.read_word();
170 if word.is_empty() {
171 return None;
172 }
173
174 let kind = if was_at_line_start
176 && (word.to_lowercase().starts_with("sec")
177 || word.to_lowercase() == "include")
178 {
179 TokenKind::Directive(word)
180 } else {
181 TokenKind::Word(word)
182 };
183
184 Some(Token { kind, line, column })
185 }
186 }
187 }
188
189 fn read_quoted_string(&mut self, quote: char) -> String {
191 let mut s = String::new();
192 let mut escaped = false;
193
194 while let Some(c) = self.advance() {
195 if escaped {
196 match c {
197 '\n' => {
198 while self.peek().map(|c| c == ' ' || c == '\t').unwrap_or(false) {
200 self.advance();
201 }
202 }
203 '\r' => {
204 if self.peek() == Some('\n') {
206 self.advance();
207 }
208 while self.peek().map(|c| c == ' ' || c == '\t').unwrap_or(false) {
210 self.advance();
211 }
212 }
213 'n' => s.push('\n'),
214 't' => s.push('\t'),
215 'r' => s.push('\r'),
216 '\\' => s.push('\\'),
217 '"' => s.push('"'),
218 '\'' => s.push('\''),
219 _ => {
220 s.push('\\');
221 s.push(c);
222 }
223 }
224 escaped = false;
225 } else if c == '\\' {
226 escaped = true;
227 } else if c == quote {
228 break;
229 } else {
230 s.push(c);
231 }
232 }
233
234 s
235 }
236
237 fn read_word(&mut self) -> String {
239 let mut s = String::new();
240
241 while let Some(&c) = self.input.peek() {
242 if c == '\\' {
243 self.advance();
245 if self.peek() == Some('\n') {
246 self.advance();
247 continue;
249 } else {
250 s.push('\\');
252 continue;
253 }
254 }
255 if c.is_whitespace() || c == '"' || c == '\'' || c == '#' {
256 break;
257 }
258 s.push(c);
259 self.advance();
260 }
261
262 s
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269
270 #[test]
271 fn test_lex_directive() {
272 let mut lexer = Lexer::new("SecRule");
273 let token = lexer.next_token().unwrap();
274 assert!(matches!(token.kind, TokenKind::Directive(s) if s == "SecRule"));
275 }
276
277 #[test]
278 fn test_lex_quoted_string() {
279 let mut lexer = Lexer::new(r#""hello world""#);
280 let token = lexer.next_token().unwrap();
281 assert!(matches!(token.kind, TokenKind::QuotedString(s) if s == "hello world"));
282 }
283
284 #[test]
285 fn test_lex_escaped_quote() {
286 let mut lexer = Lexer::new(r#""hello \"world\"""#);
287 let token = lexer.next_token().unwrap();
288 assert!(matches!(token.kind, TokenKind::QuotedString(s) if s == r#"hello "world""#));
289 }
290
291 #[test]
292 fn test_lex_comment() {
293 let mut lexer = Lexer::new("# this is a comment\nSecRule");
294 let token = lexer.next_token().unwrap();
295 assert!(matches!(token.kind, TokenKind::Comment));
296
297 let token = lexer.next_token().unwrap();
298 assert!(matches!(token.kind, TokenKind::Directive(s) if s == "SecRule"));
299 }
300
301 #[test]
302 fn test_lex_line_continuation() {
303 let mut lexer = Lexer::new("Sec\\\nRule");
305 let token = lexer.next_token().unwrap();
306 assert!(matches!(token.kind, TokenKind::Directive(s) if s == "SecRule"));
308 }
309
310 #[test]
311 fn test_lex_line_continuation_between_tokens() {
312 let mut lexer = Lexer::new("SecRule \\\n REQUEST_URI");
314 let token = lexer.next_token().unwrap();
315 assert!(matches!(token.kind, TokenKind::Directive(s) if s == "SecRule"));
316
317 let token = lexer.next_token().unwrap();
318 assert!(matches!(token.kind, TokenKind::Word(s) if s == "REQUEST_URI"));
319 }
320
321 #[test]
322 fn test_lex_full_rule() {
323 let mut lexer = Lexer::new(r#"SecRule REQUEST_URI "@contains /admin" "id:1,deny""#);
324
325 let token = lexer.next_token().unwrap();
326 assert!(matches!(token.kind, TokenKind::Directive(s) if s == "SecRule"));
327
328 let token = lexer.next_token().unwrap();
329 assert!(matches!(token.kind, TokenKind::Word(s) if s == "REQUEST_URI"));
330
331 let token = lexer.next_token().unwrap();
332 assert!(matches!(token.kind, TokenKind::QuotedString(s) if s == "@contains /admin"));
333
334 let token = lexer.next_token().unwrap();
335 assert!(matches!(token.kind, TokenKind::QuotedString(s) if s == "id:1,deny"));
336 }
337}