sentinel_modsec/parser/
variable.rs

1//! Variable parsing for SecRule.
2//!
3//! Optimized with perfect hash function for O(1) variable name lookup.
4
5use crate::error::{Error, Result};
6use phf::phf_map;
7
8/// A variable specification in a SecRule.
9#[derive(Debug, Clone)]
10pub struct VariableSpec {
11    /// The variable name.
12    pub name: VariableName,
13    /// Optional selection (e.g., ARGS:foo or ARGS:/^user/).
14    pub selection: Option<Selection>,
15    /// Count mode (& prefix).
16    pub count_mode: bool,
17    /// Exclusions (e.g., !ARGS:foo).
18    pub exclusions: Vec<String>,
19}
20
21/// Selection mode for collection variables.
22#[derive(Debug, Clone)]
23pub enum Selection {
24    /// Static key selection (ARGS:foo).
25    Key(String),
26    /// Regex key selection (ARGS:/^user/).
27    Regex(String),
28}
29
30/// Variable names supported by ModSecurity.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
32pub enum VariableName {
33    // Request variables
34    Args, ArgsGet, ArgsPost, ArgsNames, ArgsGetNames, ArgsPostNames, ArgsCombinedSize,
35    RequestUri, RequestUriRaw, RequestFilename, RequestBasename, RequestLine,
36    RequestMethod, RequestProtocol, RequestHeaders, RequestHeadersNames,
37    RequestCookies, RequestCookiesNames, RequestBody, RequestBodyLength, QueryString,
38
39    // Response variables
40    ResponseStatus, ResponseProtocol, ResponseHeaders, ResponseHeadersNames,
41    ResponseBody, ResponseContentType, ResponseContentLength,
42
43    // Server/Client info
44    RemoteAddr, RemotePort, RemoteHost, RemoteUser,
45    ServerAddr, ServerPort, ServerName,
46
47    // Collections
48    Tx, Session, Env, Ip, Global, Resource, User, Geo,
49
50    // Matched data
51    MatchedVar, MatchedVars, MatchedVarName, MatchedVarsNames,
52
53    // Time variables
54    Time, TimeEpoch, TimeDay, TimeHour, TimeMin, TimeSec, TimeWday, TimeMon, TimeYear,
55
56    // Files
57    Files, FilesSizes, FilesTmpnames, FilesCombinedSize, FilesNames,
58
59    // Special
60    UniqueId, InboundAnomalyScore, OutboundAnomalyScore, Duration,
61    MultipartBoundaryQuoted, MultipartBoundaryWhitespace, MultipartDataAfter,
62    MultipartDataBefore, MultipartFileLimitExceeded, MultipartHeaderFolding,
63    MultipartInvalidHeaderFolding, MultipartInvalidPart, MultipartInvalidQuoting,
64    MultipartLfLine, MultipartMissingSemicolon, MultipartStrictError,
65    MultipartUnmatchedBoundary,
66
67    // XML
68    Xml,
69
70    // Web server
71    WebserverErrorLog, HighestSeverity, StatusLine, FullRequest, FullRequestLength,
72
73    // Auth
74    AuthType,
75
76    // Request body processing
77    ReqBodyProcessor, ReqBodyError, ReqBodyErrorMsg, ReqBodyProcessorError, ReqBodyProcessorErrorMsg,
78
79    // Multipart strict
80    MultipartStrictCheck,
81}
82
83/// Perfect hash map for O(1) variable name lookup.
84static VARIABLE_MAP: phf::Map<&'static str, VariableName> = phf_map! {
85    "ARGS" => VariableName::Args,
86    "ARGS_GET" => VariableName::ArgsGet,
87    "ARGS_POST" => VariableName::ArgsPost,
88    "ARGS_NAMES" => VariableName::ArgsNames,
89    "ARGS_GET_NAMES" => VariableName::ArgsGetNames,
90    "ARGS_POST_NAMES" => VariableName::ArgsPostNames,
91    "ARGS_COMBINED_SIZE" => VariableName::ArgsCombinedSize,
92    "REQUEST_URI" => VariableName::RequestUri,
93    "REQUEST_URI_RAW" => VariableName::RequestUriRaw,
94    "REQUEST_FILENAME" => VariableName::RequestFilename,
95    "REQUEST_BASENAME" => VariableName::RequestBasename,
96    "REQUEST_LINE" => VariableName::RequestLine,
97    "REQUEST_METHOD" => VariableName::RequestMethod,
98    "REQUEST_PROTOCOL" => VariableName::RequestProtocol,
99    "REQUEST_HEADERS" => VariableName::RequestHeaders,
100    "REQUEST_HEADERS_NAMES" => VariableName::RequestHeadersNames,
101    "REQUEST_COOKIES" => VariableName::RequestCookies,
102    "REQUEST_COOKIES_NAMES" => VariableName::RequestCookiesNames,
103    "REQUEST_BODY" => VariableName::RequestBody,
104    "REQUEST_BODY_LENGTH" => VariableName::RequestBodyLength,
105    "QUERY_STRING" => VariableName::QueryString,
106    "RESPONSE_STATUS" => VariableName::ResponseStatus,
107    "RESPONSE_PROTOCOL" => VariableName::ResponseProtocol,
108    "RESPONSE_HEADERS" => VariableName::ResponseHeaders,
109    "RESPONSE_HEADERS_NAMES" => VariableName::ResponseHeadersNames,
110    "RESPONSE_BODY" => VariableName::ResponseBody,
111    "RESPONSE_CONTENT_TYPE" => VariableName::ResponseContentType,
112    "RESPONSE_CONTENT_LENGTH" => VariableName::ResponseContentLength,
113    "REMOTE_ADDR" => VariableName::RemoteAddr,
114    "REMOTE_PORT" => VariableName::RemotePort,
115    "REMOTE_HOST" => VariableName::RemoteHost,
116    "REMOTE_USER" => VariableName::RemoteUser,
117    "SERVER_ADDR" => VariableName::ServerAddr,
118    "SERVER_PORT" => VariableName::ServerPort,
119    "SERVER_NAME" => VariableName::ServerName,
120    "TX" => VariableName::Tx,
121    "SESSION" => VariableName::Session,
122    "ENV" => VariableName::Env,
123    "IP" => VariableName::Ip,
124    "GLOBAL" => VariableName::Global,
125    "RESOURCE" => VariableName::Resource,
126    "USER" => VariableName::User,
127    "GEO" => VariableName::Geo,
128    "MATCHED_VAR" => VariableName::MatchedVar,
129    "MATCHED_VARS" => VariableName::MatchedVars,
130    "MATCHED_VAR_NAME" => VariableName::MatchedVarName,
131    "MATCHED_VARS_NAMES" => VariableName::MatchedVarsNames,
132    "TIME" => VariableName::Time,
133    "TIME_EPOCH" => VariableName::TimeEpoch,
134    "TIME_DAY" => VariableName::TimeDay,
135    "TIME_HOUR" => VariableName::TimeHour,
136    "TIME_MIN" => VariableName::TimeMin,
137    "TIME_SEC" => VariableName::TimeSec,
138    "TIME_WDAY" => VariableName::TimeWday,
139    "TIME_MON" => VariableName::TimeMon,
140    "TIME_YEAR" => VariableName::TimeYear,
141    "FILES" => VariableName::Files,
142    "FILES_SIZES" => VariableName::FilesSizes,
143    "FILES_TMPNAMES" => VariableName::FilesTmpnames,
144    "FILES_COMBINED_SIZE" => VariableName::FilesCombinedSize,
145    "FILES_NAMES" => VariableName::FilesNames,
146    "UNIQUE_ID" => VariableName::UniqueId,
147    "DURATION" => VariableName::Duration,
148    "HIGHEST_SEVERITY" => VariableName::HighestSeverity,
149    "STATUS_LINE" => VariableName::StatusLine,
150    "FULL_REQUEST" => VariableName::FullRequest,
151    "FULL_REQUEST_LENGTH" => VariableName::FullRequestLength,
152    "AUTH_TYPE" => VariableName::AuthType,
153    "XML" => VariableName::Xml,
154    "REQBODY_PROCESSOR" => VariableName::ReqBodyProcessor,
155    "REQBODY_ERROR" => VariableName::ReqBodyError,
156    "REQBODY_ERROR_MSG" => VariableName::ReqBodyErrorMsg,
157    "REQBODY_PROCESSOR_ERROR" => VariableName::ReqBodyProcessorError,
158    "REQBODY_PROCESSOR_ERROR_MSG" => VariableName::ReqBodyProcessorErrorMsg,
159    "MULTIPART_STRICT_ERROR" => VariableName::MultipartStrictCheck,
160};
161
162impl VariableName {
163    /// Parse a variable name from a string (O(1) lookup).
164    #[inline]
165    pub fn from_str(s: &str) -> Option<Self> {
166        // Fast path: check if already uppercase ASCII
167        if s.bytes().all(|b| b.is_ascii_uppercase() || b == b'_') {
168            return VARIABLE_MAP.get(s).copied();
169        }
170        // Slow path: need to uppercase
171        let mut buf = [0u8; 64];
172        let len = s.len().min(64);
173        for (i, b) in s.bytes().take(len).enumerate() {
174            buf[i] = b.to_ascii_uppercase();
175        }
176        let upper = std::str::from_utf8(&buf[..len]).ok()?;
177        VARIABLE_MAP.get(upper).copied()
178    }
179
180    /// Check if this variable is a collection.
181    #[inline]
182    pub fn is_collection(&self) -> bool {
183        matches!(
184            self,
185            Self::Args | Self::ArgsGet | Self::ArgsPost | Self::ArgsNames
186                | Self::RequestHeaders | Self::RequestHeadersNames
187                | Self::RequestCookies | Self::RequestCookiesNames
188                | Self::ResponseHeaders | Self::ResponseHeadersNames
189                | Self::Tx | Self::Session | Self::Env | Self::Ip
190                | Self::Global | Self::Resource | Self::User | Self::Geo
191                | Self::MatchedVars | Self::MatchedVarsNames
192                | Self::Files | Self::FilesSizes | Self::FilesTmpnames | Self::FilesNames
193        )
194    }
195}
196
197/// Parse a variable specification string.
198#[inline]
199pub fn parse_variables(input: &str) -> Result<Vec<VariableSpec>> {
200    let mut variables = Vec::with_capacity(4);
201    let mut exclusions: Vec<String> = Vec::new();
202
203    // Split by | for OR conditions
204    for part in input.split('|') {
205        let part = part.trim();
206        if part.is_empty() {
207            continue;
208        }
209
210        // Handle exclusions (!VAR)
211        if part.starts_with('!') {
212            exclusions.push(part[1..].to_string());
213            continue;
214        }
215
216        let spec = parse_single_variable(part)?;
217        variables.push(spec);
218    }
219
220    // Apply exclusions to all variables
221    if !exclusions.is_empty() {
222        for var in &mut variables {
223            var.exclusions = exclusions.clone();
224        }
225    }
226
227    Ok(variables)
228}
229
230/// Parse a single variable specification.
231#[inline]
232fn parse_single_variable(input: &str) -> Result<VariableSpec> {
233    let input = input.trim();
234    let bytes = input.as_bytes();
235
236    // Check for count mode (& prefix)
237    let (count_mode, input) = if bytes.first() == Some(&b'&') {
238        (true, &input[1..])
239    } else {
240        (false, input)
241    };
242
243    // Find colon for selection (use memchr-style search)
244    let colon_pos = input.bytes().position(|b| b == b':');
245
246    let (name_str, selection) = match colon_pos {
247        Some(pos) => {
248            let name = &input[..pos];
249            let sel_str = &input[pos + 1..];
250
251            let selection = if sel_str.starts_with('/') && sel_str.ends_with('/') && sel_str.len() > 2 {
252                Some(Selection::Regex(sel_str[1..sel_str.len() - 1].to_string()))
253            } else {
254                Some(Selection::Key(sel_str.to_string()))
255            };
256
257            (name, selection)
258        }
259        None => (input, None),
260    };
261
262    let name = VariableName::from_str(name_str).ok_or_else(|| Error::UnknownVariable {
263        name: name_str.to_string(),
264    })?;
265
266    Ok(VariableSpec {
267        name,
268        selection,
269        count_mode,
270        exclusions: Vec::new(),
271    })
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    #[test]
279    fn test_parse_simple_variable() {
280        let vars = parse_variables("REQUEST_URI").unwrap();
281        assert_eq!(vars.len(), 1);
282        assert_eq!(vars[0].name, VariableName::RequestUri);
283        assert!(vars[0].selection.is_none());
284        assert!(!vars[0].count_mode);
285    }
286
287    #[test]
288    fn test_parse_variable_with_selection() {
289        let vars = parse_variables("ARGS:username").unwrap();
290        assert_eq!(vars.len(), 1);
291        assert_eq!(vars[0].name, VariableName::Args);
292        assert!(matches!(&vars[0].selection, Some(Selection::Key(k)) if k == "username"));
293    }
294
295    #[test]
296    fn test_parse_variable_with_regex() {
297        let vars = parse_variables("ARGS:/^user/").unwrap();
298        assert_eq!(vars.len(), 1);
299        assert_eq!(vars[0].name, VariableName::Args);
300        assert!(matches!(&vars[0].selection, Some(Selection::Regex(r)) if r == "^user"));
301    }
302
303    #[test]
304    fn test_parse_count_mode() {
305        let vars = parse_variables("&ARGS").unwrap();
306        assert_eq!(vars.len(), 1);
307        assert!(vars[0].count_mode);
308    }
309
310    #[test]
311    fn test_parse_multiple_variables() {
312        let vars = parse_variables("REQUEST_URI|ARGS|REQUEST_HEADERS").unwrap();
313        assert_eq!(vars.len(), 3);
314    }
315
316    #[test]
317    fn test_variable_lookup_case_insensitive() {
318        assert_eq!(VariableName::from_str("REQUEST_URI"), Some(VariableName::RequestUri));
319        assert_eq!(VariableName::from_str("request_uri"), Some(VariableName::RequestUri));
320        assert_eq!(VariableName::from_str("Request_Uri"), Some(VariableName::RequestUri));
321    }
322}