hyperi_rustlib/expression/
profile.rs1pub const ALLOWED_FUNCTIONS: &[&str] = &[
18 "contains",
20 "startsWith",
21 "endsWith",
22 "size",
24 "has",
26 "int",
28 "uint",
29 "double",
30 "string",
31 "bool",
32];
33
34pub const RESTRICTED_REGEX: &[&str] = &["matches"];
38pub const RESTRICTED_ITERATION: &[&str] = &["map", "filter", "exists", "all", "exists_one"];
39pub const RESTRICTED_TIME: &[&str] = &["timestamp", "duration"];
40
41pub const DISALLOWED_FUNCTIONS: &[&str] = &[
43 "matches",
44 "map",
45 "filter",
46 "exists",
47 "all",
48 "exists_one",
49 "timestamp",
50 "duration",
51];
52
53#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
58pub struct ProfileConfig {
59 pub allow_regex: bool,
62 pub allow_iteration: bool,
65 pub allow_time: bool,
68}
69
70impl ProfileConfig {
71 #[must_use]
73 pub fn blocked_functions(&self) -> Vec<&'static str> {
74 let mut blocked = Vec::new();
75 if !self.allow_regex {
76 blocked.extend_from_slice(RESTRICTED_REGEX);
77 }
78 if !self.allow_iteration {
79 blocked.extend_from_slice(RESTRICTED_ITERATION);
80 }
81 if !self.allow_time {
82 blocked.extend_from_slice(RESTRICTED_TIME);
83 }
84 blocked
85 }
86}
87
88const SKIP_NAMES: &[&str] = &[
91 "true", "false", "null", "in", "has", "int", "uint", "double", "string", "bool",
92];
93
94#[must_use]
100pub fn check_profile(expr: &str) -> Vec<String> {
101 check_profile_with_config(expr, &ProfileConfig::default())
102}
103
104#[must_use]
111pub fn check_profile_with_config(expr: &str, config: &ProfileConfig) -> Vec<String> {
112 let blocked = config.blocked_functions();
113 if blocked.is_empty() {
114 return Vec::new();
115 }
116
117 let mut errors = Vec::new();
118 let bytes = expr.as_bytes();
119 let len = bytes.len();
120 let mut i = 0;
121
122 while i < len {
123 if bytes[i] == b'"' || bytes[i] == b'\'' {
125 i = skip_string_literal(bytes, i);
126 continue;
127 }
128
129 if !is_ident_start(bytes[i]) {
131 i += 1;
132 continue;
133 }
134
135 let start = i;
137 while i < len && is_ident_char(bytes[i]) {
138 i += 1;
139 }
140 let name = &expr[start..i];
141
142 let mut peek = i;
144 while peek < len && bytes[peek] == b' ' {
145 peek += 1;
146 }
147
148 if peek < len && bytes[peek] == b'(' {
150 if SKIP_NAMES.contains(&name) {
151 continue;
152 }
153
154 if blocked.contains(&name) {
155 let reason = restriction_reason(name);
156 errors.push(format!(
157 "Function '{name}()' is not allowed in the DFE expression profile. {reason}"
158 ));
159 }
160 }
161 }
162
163 errors
164}
165
166fn skip_string_literal(bytes: &[u8], start: usize) -> usize {
171 let quote = bytes[start];
172 let mut i = start + 1;
173 while i < bytes.len() {
174 if bytes[i] == b'\\' {
175 i += 2;
177 continue;
178 }
179 if bytes[i] == quote {
180 return i + 1;
181 }
182 i += 1;
183 }
184 bytes.len()
186}
187
188fn restriction_reason(name: &str) -> &'static str {
189 match name {
190 "matches" => {
191 "Regex has unbounded cost per record. Use contains()/startsWith()/endsWith() instead, or set allow_regex: true in expression config."
192 }
193 "map" | "filter" | "exists" | "all" | "exists_one" => {
194 "Per-element iteration has O(n) cost proportional to collection size. Set allow_iteration: true in expression config to permit."
195 }
196 "timestamp" | "duration" => {
197 "Time functions excluded -- ClickHouse handles time natively. Set allow_time: true in expression config to permit."
198 }
199 _ => "Restricted by DFE expression profile.",
200 }
201}
202
203fn is_ident_start(b: u8) -> bool {
204 b.is_ascii_alphabetic() || b == b'_'
205}
206
207fn is_ident_char(b: u8) -> bool {
208 b.is_ascii_alphanumeric() || b == b'_'
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
218 fn allowed_function_passes() {
219 assert!(check_profile(r#"msg.contains("error")"#).is_empty());
220 }
221
222 #[test]
223 fn starts_with_passes() {
224 assert!(check_profile(r#"path.startsWith("/api/")"#).is_empty());
225 }
226
227 #[test]
228 fn ends_with_passes() {
229 assert!(check_profile(r#"file.endsWith(".log")"#).is_empty());
230 }
231
232 #[test]
233 fn matches_blocked_by_default() {
234 let errors = check_profile(r#"name.matches("^web-[0-9]+$")"#);
235 assert_eq!(errors.len(), 1);
236 assert!(errors[0].contains("matches()"));
237 assert!(errors[0].contains("allow_regex"));
238 }
239
240 #[test]
241 fn disallowed_map_rejected() {
242 let errors = check_profile("[1,2,3].map(x, x * 2)");
243 assert_eq!(errors.len(), 1);
244 assert!(errors[0].contains("map()"));
245 }
246
247 #[test]
248 fn disallowed_filter_rejected() {
249 let errors = check_profile("[1,2,3].filter(x, x > 1)");
250 assert_eq!(errors.len(), 1);
251 assert!(errors[0].contains("filter()"));
252 }
253
254 #[test]
255 fn disallowed_timestamp_rejected() {
256 let errors = check_profile(r#"timestamp("2024-01-01T00:00:00Z")"#);
257 assert_eq!(errors.len(), 1);
258 assert!(errors[0].contains("timestamp()"));
259 }
260
261 #[test]
262 fn disallowed_duration_rejected() {
263 let errors = check_profile(r#"duration("1h")"#);
264 assert_eq!(errors.len(), 1);
265 assert!(errors[0].contains("duration()"));
266 }
267
268 #[test]
269 fn keywords_skipped() {
270 assert!(check_profile("has(user.name)").is_empty());
271 assert!(check_profile("int(x) > 10").is_empty());
272 assert!(check_profile("bool(y)").is_empty());
273 }
274
275 #[test]
276 fn plain_comparison_passes() {
277 assert!(check_profile(r#"severity == "critical""#).is_empty());
278 }
279
280 #[test]
281 fn compound_expression_passes() {
282 assert!(check_profile(r#"severity == "critical" && amount > 10000"#).is_empty());
283 }
284
285 #[test]
288 fn function_name_inside_string_not_flagged() {
289 assert!(check_profile(r#"msg.contains("filter")"#).is_empty());
291 }
292
293 #[test]
294 fn function_name_inside_string_with_parens_not_flagged() {
295 assert!(check_profile(r#"msg.contains("map(x)")"#).is_empty());
297 }
298
299 #[test]
300 fn matches_inside_string_not_flagged() {
301 assert!(check_profile(r#"msg.contains("matches")"#).is_empty());
302 }
303
304 #[test]
305 fn timestamp_inside_string_not_flagged() {
306 assert!(check_profile(r#"label == "timestamp""#).is_empty());
307 }
308
309 #[test]
310 fn escaped_quote_inside_string_handled() {
311 assert!(check_profile(r#"msg.contains("filter\"(")"#).is_empty());
313 }
314
315 #[test]
316 fn single_quoted_string_handled() {
317 assert!(check_profile("msg.contains('filter')").is_empty());
318 }
319
320 #[test]
321 fn real_call_after_string_still_caught() {
322 let errors = check_profile(r#""ok" + items.map(x, x)"#);
324 assert_eq!(errors.len(), 1);
325 assert!(errors[0].contains("map()"));
326 }
327
328 #[test]
331 fn matches_allowed_with_regex_config() {
332 let config = ProfileConfig {
333 allow_regex: true,
334 ..Default::default()
335 };
336 assert!(check_profile_with_config(r#"name.matches("^web-[0-9]+$")"#, &config).is_empty());
337 }
338
339 #[test]
340 fn map_still_blocked_with_regex_config() {
341 let config = ProfileConfig {
342 allow_regex: true,
343 ..Default::default()
344 };
345 let errors = check_profile_with_config("[1,2].map(x, x)", &config);
346 assert_eq!(errors.len(), 1);
347 assert!(errors[0].contains("map()"));
348 }
349
350 #[test]
351 fn iteration_allowed_with_config() {
352 let config = ProfileConfig {
353 allow_iteration: true,
354 ..Default::default()
355 };
356 assert!(check_profile_with_config("[1,2].map(x, x * 2)", &config).is_empty());
357 assert!(check_profile_with_config("[1,2].filter(x, x > 1)", &config).is_empty());
358 assert!(check_profile_with_config("[1,2].exists(x, x > 1)", &config).is_empty());
359 }
360
361 #[test]
362 fn time_allowed_with_config() {
363 let config = ProfileConfig {
364 allow_time: true,
365 ..Default::default()
366 };
367 assert!(
368 check_profile_with_config(r#"timestamp("2024-01-01T00:00:00Z")"#, &config).is_empty()
369 );
370 assert!(check_profile_with_config(r#"duration("1h")"#, &config).is_empty());
371 }
372
373 #[test]
374 fn all_restrictions_lifted() {
375 let config = ProfileConfig {
376 allow_regex: true,
377 allow_iteration: true,
378 allow_time: true,
379 };
380 assert!(config.blocked_functions().is_empty());
381 assert!(
382 check_profile_with_config(r#"name.matches("x") && [1].map(x, x)"#, &config).is_empty()
383 );
384 }
385
386 #[test]
389 fn identifier_not_followed_by_paren_is_fine() {
390 assert!(check_profile("filter > 10").is_empty());
392 }
393
394 #[test]
395 fn identifier_with_space_before_paren() {
396 let errors = check_profile("[1,2].map (x, x)");
397 assert_eq!(errors.len(), 1);
398 assert!(errors[0].contains("map()"));
399 }
400
401 #[test]
402 fn empty_expression() {
403 assert!(check_profile("").is_empty());
404 }
405
406 #[test]
407 fn whitespace_only() {
408 assert!(check_profile(" ").is_empty());
409 }
410
411 #[test]
412 fn multiple_violations_reported() {
413 let errors = check_profile("[1].map(x, x).filter(y, y > 0)");
414 assert_eq!(errors.len(), 2);
415 }
416}