Skip to main content

bytecode_filter/
loader.rs

1//! Filter file loading utilities.
2//!
3//! Provides functions to load and compile filters from files.
4//!
5//! ## Filter File Format
6//!
7//! Filter files support comments, configuration directives, and the filter expression.
8//!
9//! ```text
10//! # Comments start with #
11//!
12//! # Delimiter directive (optional, defaults to ";;;")
13//! @delimiter = ";;;"
14//!
15//! # Field mappings
16//! @field MESSAGE_TYPE = 1
17//! @field MESSAGE_SUB_TYPE = 2
18//! @field REQUEST_HEADERS = 11
19//!
20//! # The filter expression (everything else)
21//! MESSAGE_TYPE == "2" AND MESSAGE_SUB_TYPE == "11"
22//! ```
23
24use std::fs;
25use std::path::Path;
26
27use crate::compiler::{compile, CompileError};
28use crate::parser::ParserConfig;
29use crate::vm::CompiledFilter;
30
31/// Error type for filter loading.
32#[derive(Debug)]
33pub enum LoadError {
34    /// IO error reading the file.
35    Io(std::io::Error),
36    /// Compilation error.
37    Compile(CompileError),
38    /// Invalid directive in filter file.
39    InvalidDirective(String),
40    /// Invalid field index.
41    InvalidFieldIndex(String),
42}
43
44impl std::fmt::Display for LoadError {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        match self {
47            LoadError::Io(e) => write!(f, "IO error: {}", e),
48            LoadError::Compile(e) => write!(f, "Compile error: {}", e),
49            LoadError::InvalidDirective(s) => write!(f, "Invalid directive: {}", s),
50            LoadError::InvalidFieldIndex(s) => write!(f, "Invalid field index: {}", s),
51        }
52    }
53}
54
55impl std::error::Error for LoadError {
56    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
57        match self {
58            LoadError::Io(e) => Some(e),
59            LoadError::Compile(e) => Some(e),
60            _ => None,
61        }
62    }
63}
64
65impl From<std::io::Error> for LoadError {
66    fn from(e: std::io::Error) -> Self {
67        LoadError::Io(e)
68    }
69}
70
71impl From<CompileError> for LoadError {
72    fn from(e: CompileError) -> Self {
73        LoadError::Compile(e)
74    }
75}
76
77/// Load and compile a filter from a file.
78///
79/// The file can contain:
80/// - Comments (lines starting with `#`)
81/// - Delimiter directive: `@delimiter = ";;;"`
82/// - Field mappings: `@field FIELD_NAME = index`
83/// - The filter expression
84///
85/// If the file contains `@delimiter` or `@field` directives, they override
86/// the provided config.
87///
88/// # Arguments
89/// * `path` - Path to the filter file
90/// * `config` - Base parser configuration (can be overridden by file directives)
91///
92/// # Returns
93/// A compiled filter ready for evaluation.
94///
95/// # Example
96/// ```no_run
97/// use bytecode_filter::{load_filter_file, ParserConfig};
98///
99/// let config = ParserConfig::default();
100/// let filter = load_filter_file("filters/my.filter", &config).unwrap();
101/// ```
102///
103/// # Errors
104/// Returns `LoadError` if the file cannot be read or the filter fails to compile.
105pub fn load_filter_file(
106    path: impl AsRef<Path>,
107    config: &ParserConfig,
108) -> Result<CompiledFilter, LoadError> {
109    let content = fs::read_to_string(path)?;
110    load_filter_string(&content, config)
111}
112
113/// Load and compile a filter from a string.
114///
115/// Supports the same format as `load_filter_file`.
116///
117/// # Arguments
118/// * `content` - The filter source string
119/// * `config` - Base parser configuration (can be overridden by directives)
120///
121/// # Returns
122/// A compiled filter ready for evaluation.
123///
124/// # Errors
125/// Returns `LoadError` if parsing or compilation fails.
126pub fn load_filter_string(
127    content: &str,
128    config: &ParserConfig,
129) -> Result<CompiledFilter, LoadError> {
130    let mut local_config = config.clone();
131    let mut expression_lines = Vec::new();
132
133    for line in content.lines() {
134        let trimmed = line.trim();
135
136        // Skip empty lines and comments
137        if trimmed.is_empty() || trimmed.starts_with('#') {
138            continue;
139        }
140
141        // Parse directives
142        if trimmed.starts_with('@') {
143            parse_directive(trimmed, &mut local_config)?;
144        } else {
145            // Regular expression line — strip inline comments before joining.
146            // Without this, joining lines with " " collapses newlines and the
147            // lexer's # end-of-line comment would eat the rest of the expression.
148            let without_comment = strip_inline_comment(trimmed);
149            if !without_comment.is_empty() {
150                expression_lines.push(without_comment);
151            }
152        }
153    }
154
155    let expression = expression_lines.join(" ");
156    Ok(compile(&expression, &local_config)?)
157}
158
159/// Strip an inline `#` comment from an expression line, respecting quoted strings.
160/// Returns the expression portion (trimmed), or "" if nothing remains.
161fn strip_inline_comment(line: &str) -> &str {
162    let mut in_quote: Option<char> = None;
163    let mut prev_backslash = false;
164    for (i, ch) in line.char_indices() {
165        if prev_backslash {
166            prev_backslash = false;
167            continue;
168        }
169        if ch == '\\' {
170            prev_backslash = true;
171            continue;
172        }
173        match in_quote {
174            Some(q) if ch == q => in_quote = None,
175            Some(_) => {}
176            None if ch == '"' || ch == '\'' => in_quote = Some(ch),
177            None if ch == '#' => return line[..i].trim_end(),
178            _ => {}
179        }
180    }
181    line
182}
183
184/// Parse a directive line and update the config.
185fn parse_directive(line: &str, config: &mut ParserConfig) -> Result<(), LoadError> {
186    let line = line.trim_start_matches('@').trim();
187
188    if line.starts_with("delimiter") {
189        // @delimiter = ";;;"
190        let parts: Vec<&str> = line.splitn(2, '=').collect();
191        if parts.len() != 2 {
192            return Err(LoadError::InvalidDirective(format!(
193                "Invalid delimiter directive: {}",
194                line
195            )));
196        }
197        let value = parts[1].trim();
198        // Remove quotes and handle escape sequences
199        let delimiter = value
200            .trim_matches('"')
201            .trim_matches('\'')
202            .replace("\\t", "\t")
203            .replace("\\n", "\n")
204            .replace("\\r", "\r");
205        config.delimiter = delimiter.into_bytes();
206    } else if line.starts_with("field") {
207        // @field FIELD_NAME = index
208        let rest = line.trim_start_matches("field").trim();
209        let parts: Vec<&str> = rest.splitn(2, '=').collect();
210        if parts.len() != 2 {
211            return Err(LoadError::InvalidDirective(format!(
212                "Invalid field directive: {}",
213                line
214            )));
215        }
216        let field_name = parts[0].trim().to_string();
217        let index_str = parts[1].trim();
218        let index: u8 = index_str.parse().map_err(|_| {
219            LoadError::InvalidFieldIndex(format!(
220                "Invalid field index '{}' for field '{}'",
221                index_str, field_name
222            ))
223        })?;
224        config.fields.insert(field_name, index);
225    } else {
226        return Err(LoadError::InvalidDirective(format!(
227            "Unknown directive: @{}",
228            line
229        )));
230    }
231
232    Ok(())
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238    use bytes::Bytes;
239
240    fn test_config() -> ParserConfig {
241        let mut config = ParserConfig::default();
242        config.add_field("LEVEL", 0);
243        config.add_field("CODE", 1);
244        config.add_field("BODY", 2);
245        config
246    }
247
248    #[test]
249    fn test_load_filter_string_with_comments() {
250        let content = r#"
251            # This is a comment
252            LEVEL == "error"
253            # Another comment
254            AND CODE == "500"
255        "#;
256
257        let config = test_config();
258        let filter = load_filter_string(content, &config).unwrap();
259
260        assert!(filter.evaluate(Bytes::from("error;;;500;;;body")));
261        assert!(!filter.evaluate(Bytes::from("info;;;500;;;body")));
262    }
263
264    #[test]
265    fn test_load_filter_string_empty_lines() {
266        let content = r#"
267            LEVEL == "error"
268
269            OR
270
271            LEVEL == "warn"
272        "#;
273
274        let config = test_config();
275        let filter = load_filter_string(content, &config).unwrap();
276
277        assert!(filter.evaluate(Bytes::from("error;;;500;;;body")));
278        assert!(filter.evaluate(Bytes::from("warn;;;500;;;body")));
279        assert!(!filter.evaluate(Bytes::from("info;;;500;;;body")));
280    }
281
282    #[test]
283    fn test_load_filter_with_directives() {
284        let content = r#"
285            # Test filter with embedded config
286            @delimiter = ";;;"
287            @field STATUS = 0
288            @field CODE = 1
289
290            STATUS == "ok" AND CODE == "200"
291        "#;
292
293        let config = ParserConfig::default();
294        let filter = load_filter_string(content, &config).unwrap();
295
296        assert!(filter.evaluate(Bytes::from("ok;;;200;;;body")));
297        assert!(!filter.evaluate(Bytes::from("err;;;200;;;body")));
298    }
299
300    #[test]
301    fn test_load_filter_with_pipe_delimiter() {
302        let content = r#"
303            @delimiter = "|"
304            @field TYPE = 0
305            @field VALUE = 1
306
307            TYPE == "A" AND VALUE == "100"
308        "#;
309
310        let config = ParserConfig::default();
311        let filter = load_filter_string(content, &config).unwrap();
312
313        assert!(filter.evaluate(Bytes::from("A|100")));
314        assert!(!filter.evaluate(Bytes::from("B|100")));
315        assert!(!filter.evaluate(Bytes::from("A|200")));
316    }
317
318    #[test]
319    fn test_load_filter_override_config() {
320        let content = r#"
321            @field EXTRA = 5
322
323            EXTRA == "test"
324        "#;
325
326        let config = test_config();
327        let filter = load_filter_string(content, &config).unwrap();
328
329        let payload = Bytes::from("0;;;1;;;2;;;3;;;4;;;test");
330        assert!(filter.evaluate(payload));
331    }
332
333    #[test]
334    fn test_invalid_directive() {
335        let content = r#"
336            @unknown_directive = "value"
337            LEVEL == "error"
338        "#;
339
340        let config = test_config();
341        let result = load_filter_string(content, &config);
342        assert!(matches!(result, Err(LoadError::InvalidDirective(_))));
343    }
344
345    #[test]
346    fn test_invalid_field_index() {
347        let content = r#"
348            @field BAD_FIELD = not_a_number
349            LEVEL == "error"
350        "#;
351
352        let config = test_config();
353        let result = load_filter_string(content, &config);
354        assert!(matches!(result, Err(LoadError::InvalidFieldIndex(_))));
355    }
356
357    #[test]
358    fn test_inline_comments_not_swallowed_after_join() {
359        // Inline # comments on each line must not eat subsequent AND clauses
360        // when the loader joins expression lines with " ".
361        let content = r#"
362            LEVEL == "error" # check level
363            AND CODE == "500" # check code
364        "#;
365
366        let config = test_config();
367        let filter = load_filter_string(content, &config).unwrap();
368
369        // Both conditions must be enforced
370        assert!(filter.evaluate(Bytes::from("error;;;500;;;body")));
371        assert!(!filter.evaluate(Bytes::from("error;;;200;;;body")));  // would pass if AND was eaten
372        assert!(!filter.evaluate(Bytes::from("info;;;500;;;body")));
373    }
374
375    #[test]
376    fn test_inline_comment_respects_quoted_hash() {
377        // A # inside quotes must not be treated as a comment
378        let content = r#"
379            @field TAG = 0
380            TAG == "a#b"
381        "#;
382
383        let mut config = ParserConfig::default();
384        config.add_field("TAG", 0);
385        let filter = load_filter_string(content, &config).unwrap();
386
387        assert!(filter.evaluate(Bytes::from("a#b")));
388        assert!(!filter.evaluate(Bytes::from("a")));
389    }
390}