loglens_core/
query.rs

1// File: src/engine.rs
2
3use crate::time as time_parser;
4use serde_json::Value;
5use std::fmt;
6use regex::Regex;
7use std::sync::OnceLock;
8
9const OPERATORS: &[&str] = &[
10    // Longer operators first to avoid substring matching issues
11    "!contains+", "!contains-",
12    "!between", // Range exclusion
13    "!~=", "!contains", "!exists", "isnot", ">=", "<=", "==", "!=",
14    "contains+", "contains-",
15    "between", // Range inclusion
16    "contains", "exists",
17    // Shorter operators last
18    "is", "~=", ">", "<",
19];
20const TIMESTAMP_KEYS: &[&str] = &["timestamp", "ts", "@timestamp"];
21
22fn get_value_by_field<'a>(val: &'a Value, field_key: &str) -> Option<&'a Value> {
23    if field_key.starts_with('/') {
24        val.pointer(field_key)
25    } else {
26        val.get(field_key)
27    }
28}
29
30/// Extracts all numbers (integers, floats, negatives) from a text string.
31/// Optimized to compile the Regex only once.
32fn extract_numbers(text: &str) -> Vec<f64> {
33    static NUMBER_REGEX: OnceLock<Regex> = OnceLock::new();
34    
35    let re = NUMBER_REGEX.get_or_init(|| {
36        Regex::new(r"-?\d+(\.\d+)?").expect("Invalid number regex")
37    });
38
39    re.find_iter(text)
40        .filter_map(|mat| mat.as_str().parse::<f64>().ok())
41        .collect()
42}
43
44#[derive(Debug)]
45pub enum QueryError {
46    InvalidFormat(String),
47}
48
49impl fmt::Display for QueryError {
50    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51        match self {
52            QueryError::InvalidFormat(q) => write!(f, "Invalid query format: '{}'", q),
53        }
54    }
55}
56
57impl std::error::Error for QueryError {}
58
59fn evaluate_and_clause(value: &Value, raw_line: &str, clause: &str) -> Result<bool, QueryError> {
60    let conditions = clause.split("&&").map(|s| s.trim());
61    for condition in conditions {
62        if condition.is_empty() {
63            continue;
64        }
65        let result = evaluate_single_condition(value, raw_line, condition)?;
66        if !result {
67            return Ok(false);
68        }
69    }
70    Ok(true)
71}
72
73pub fn evaluate(value: &Value, raw_line: &str, query: &str) -> Result<bool, QueryError> {
74    if query.trim().is_empty() {
75        return Ok(true);
76    }
77    
78    let is_structured_query = OPERATORS.iter().any(|op| query.contains(op));
79
80    if !is_structured_query {
81        let mut effective_query = query;
82        let negate = query.starts_with('!');
83        if negate {
84            effective_query = &query[1..];
85        }
86        let matches = raw_line
87            .to_lowercase()
88            .contains(&effective_query.to_lowercase());
89        return Ok(if negate { !matches } else { matches });
90    }
91
92    let normalized_query = query
93        .replace(" OR ", "||")
94        .replace(" or ", "||")
95        .replace(" AND ", "&&")
96        .replace(" and ", "&&");
97
98    let or_clauses = normalized_query.split("||").map(|s| s.trim());
99
100    for or_clause in or_clauses {
101        if or_clause.is_empty() {
102            continue;
103        }
104        if evaluate_and_clause(value, raw_line, or_clause)? {
105            return Ok(true);
106        }
107    }
108    
109    Ok(false)
110}
111
112// --- Helper for BETWEEN operator logic ---
113fn evaluate_between(
114    log_value: &Value, 
115    range_str: &str, 
116    is_timestamp: bool
117) -> Result<bool, QueryError> {
118    let parts: Vec<&str> = range_str.split("..").collect();
119    
120    if parts.len() != 2 {
121        return Err(QueryError::InvalidFormat(format!(
122            "BETWEEN operator requires a range 'start..end'. Got: '{}'", 
123            range_str
124        )));
125    }
126
127    let start_str = parts[0].trim().trim_matches(|c| c == '"' || c == '\'');
128    let end_str = parts[1].trim().trim_matches(|c| c == '"' || c == '\'');
129
130    if is_timestamp {
131        let log_time = match time_parser::extract_and_parse_timestamp(log_value) {
132            Some(t) => t,
133            None => return Ok(false), 
134        };
135        
136        let t1 = time_parser::parse_time_string(start_str)
137            .map_err(|_| QueryError::InvalidFormat(format!("Invalid start time: {}", start_str)))?;
138        
139        let t2 = time_parser::parse_time_string(end_str)
140            .map_err(|_| QueryError::InvalidFormat(format!("Invalid end time: {}", end_str)))?;
141
142        // AUTO-SWAP LOGIC: Ensure we always compare Low..High
143        let (start, end) = if t1 < t2 { (t1, t2) } else { (t2, t1) };
144
145        Ok(log_time >= start && log_time <= end)
146    } else {
147        // Numeric comparison
148        if let Some(log_num) = log_value.as_f64() {
149            let n1 = start_str.parse::<f64>()
150                .map_err(|_| QueryError::InvalidFormat(format!("Invalid start number: {}", start_str)))?;
151            let n2 = end_str.parse::<f64>()
152                .map_err(|_| QueryError::InvalidFormat(format!("Invalid end number: {}", end_str)))?;
153
154            let (start, end) = if n1 < n2 { (n1, n2) } else { (n2, n1) };
155
156            Ok(log_num >= start && log_num <= end)
157        } else {
158            // String fallback (Lexicographical)
159             if let Some(log_s) = log_value.as_str() {
160                 Ok(log_s >= start_str && log_s <= end_str)
161             } else {
162                 Ok(false)
163             }
164        }
165    }
166}
167
168fn compare_time_values(
169    log_entry: &Value,
170    query_time_str_raw: &str,
171) -> Option<std::cmp::Ordering> {
172    let log_time = time_parser::extract_and_parse_timestamp(log_entry)?;
173    let query_time_str_clean = query_time_str_raw
174        .trim()
175        .trim_matches(|c| c == '"' || c == '\'');
176    let query_time = time_parser::parse_time_string(query_time_str_clean).ok()?;
177    log_time.partial_cmp(&query_time)
178}
179
180fn evaluate_single_condition(
181    value: &Value,
182    raw_line: &str,
183    condition: &str,
184) -> Result<bool, QueryError> {
185    let operator = OPERATORS.iter().find(|&&op| condition.contains(op));
186
187    if let Some(op) = operator {
188        if *op == "exists" || *op == "!exists" {
189            let field_part = condition.split(op).next().unwrap_or("").trim();
190            
191            // Handle basic num() stripping for exists check, though redundant logically
192            let field = if field_part.starts_with("num(") && field_part.ends_with(')') {
193                field_part[4..field_part.len()-1].trim()
194            } else {
195                field_part
196            };
197
198            let field_exists = get_value_by_field(value, field).is_some();
199
200            return if *op == "exists" {
201                Ok(field_exists)
202            } else {
203                Ok(!field_exists)
204            };
205        }
206
207        let (field_raw, op_str, query_value_str) = {
208            let parts: Vec<&str> = condition.splitn(2, op).map(|s| s.trim()).collect();
209            if parts.len() < 2 {
210                return Err(QueryError::InvalidFormat(condition.to_string()));
211            }
212            (parts[0], *op, parts[1])
213        };
214
215        // --- 1. Parse "num()" modifier ---
216        let (field, force_numeric) = if field_raw.starts_with("num(") && field_raw.ends_with(')') {
217            (field_raw[4..field_raw.len()-1].trim(), true)
218        } else {
219            (field_raw, false)
220        };
221
222        // --- 2. Handle BETWEEN for timestamps explicitly ---
223        if TIMESTAMP_KEYS.contains(&field) {
224             if op_str == "between" {
225                 return evaluate_between(value, query_value_str, true);
226             }
227             if op_str == "!between" {
228                 return evaluate_between(value, query_value_str, true).map(|b| !b);
229             }
230        }
231
232        // --- 3. Standard Timestamp operators ---
233        if TIMESTAMP_KEYS.contains(&field) {
234            return match compare_time_values(value, query_value_str) {
235                Some(ord) => match op_str {
236                    ">" => Ok(ord == std::cmp::Ordering::Greater),
237                    "<" => Ok(ord == std::cmp::Ordering::Less),
238                    ">=" => Ok(ord != std::cmp::Ordering::Less),
239                    "<=" => Ok(ord != std::cmp::Ordering::Greater),
240                    _ => Err(QueryError::InvalidFormat(
241                        "Timestamp fields only support >, <, >=, <=, between operators.".to_string(),
242                    )),
243                },
244                None => Ok(false),
245            };
246        }
247
248        // --- 4. "text" field logic (Searching raw line) ---
249        if field == "text" {
250            let search_value_clean = query_value_str
251                .trim()
252                .trim_matches(|c| c == '"' || c == '\'');
253
254            return match op_str {
255                "contains" | "!contains" => {
256                    let lower_raw_line = raw_line.to_lowercase();
257                    let search_terms: Vec<String> = query_value_str
258                        .split(',')
259                        .map(|s| {
260                            s.trim()
261                                .trim_matches(|c| c == '"' || c == '\'')
262                                .to_lowercase()
263                        })
264                        .filter(|s| !s.is_empty())
265                        .collect();
266
267                    if search_terms.is_empty() {
268                        return Ok(true);
269                    }
270
271                    if op_str == "contains" {
272                        Ok(search_terms
273                            .iter()
274                            .all(|term| lower_raw_line.contains(term)))
275                    } else {
276                        Ok(search_terms
277                            .iter()
278                            .all(|term| !lower_raw_line.contains(term)))
279                    }
280                }
281                // Support for 'text between 100..200'
282                "between" | "!between" => {
283                    let parts: Vec<&str> = query_value_str.split("..").collect();
284                    if parts.len() != 2 {
285                        return Err(QueryError::InvalidFormat(format!(
286                            "Operator '{}' requires a range 'start..end'. Got: '{}'",
287                            op_str, query_value_str
288                        )));
289                    }
290
291                    let s1 = parts[0].trim().trim_matches(|c| c == '"' || c == '\'');
292                    let s2 = parts[1].trim().trim_matches(|c| c == '"' || c == '\'');
293
294                    let n1 = s1.parse::<f64>().map_err(|_| {
295                        QueryError::InvalidFormat(format!("Invalid start number: {}", s1))
296                    })?;
297                    let n2 = s2.parse::<f64>().map_err(|_| {
298                        QueryError::InvalidFormat(format!("Invalid end number: {}", s2))
299                    })?;
300
301                    // Auto-swap for safety
302                    let (start, end) = if n1 < n2 { (n1, n2) } else { (n2, n1) };
303
304                    // Extract all numbers from the raw line
305                    let numbers_in_line = extract_numbers(raw_line);
306
307                    // Check if ANY number in the line is within the range
308                    let any_match = numbers_in_line.iter().any(|&n| n >= start && n <= end);
309
310                    if op_str == "between" {
311                        Ok(any_match)
312                    } else {
313                        Ok(!any_match) 
314                    }
315                }
316                "contains+" | "!contains+" | "contains-" | "!contains-" => {
317                    let query_num = match search_value_clean.parse::<f64>() {
318                        Ok(n) => n,
319                        Err(_) => {
320                            return Err(QueryError::InvalidFormat(format!(
321                                "Operator '{}' requires a numeric value, but got '{}'",
322                                op_str, query_value_str
323                            )));
324                        }
325                    };
326
327                    let numbers_in_line = extract_numbers(raw_line);
328
329                    match op_str {
330                        "contains+" => Ok(numbers_in_line.iter().any(|&n| n >= query_num)),
331                        "!contains+" => Ok(numbers_in_line.iter().all(|&n| n < query_num)),
332                        "contains-" => Ok(numbers_in_line.iter().any(|&n| n <= query_num)),
333                        "!contains-" => Ok(numbers_in_line.iter().all(|&n| n > query_num)),
334                        _ => unreachable!(),
335                    }
336                }
337                _ => Err(QueryError::InvalidFormat(
338                    "The 'text' field only supports 'contains' and 'between' variations.".to_string(),
339                )),
340            };
341        }
342
343        // --- 5. Standard Field Logic ---
344        if let Some(original_value) = get_value_by_field(value, field) {
345            
346            // Handle "num(field)" conversion logic
347            let temp_numeric_value; 
348            let log_value = if force_numeric {
349                if let Some(_) = original_value.as_f64() {
350                    original_value // Already a number
351                } else if let Some(s) = original_value.as_str() {
352                    // Try parsing string as float
353                    match s.parse::<f64>() {
354                        Ok(n) if n.is_finite() => {
355                            temp_numeric_value = Some(Value::from(n));
356                            temp_numeric_value.as_ref().unwrap()
357                        },
358                        _ => return Ok(false) // Cannot force to number -> No match
359                    }
360                } else {
361                    // Booleans, Arrays, Objects cannot be forced to simple numbers for comparison
362                    return Ok(false)
363                }
364            } else {
365                original_value
366            };
367
368            // Field EXISTS and value prepared
369            return match op_str {
370                "between" => evaluate_between(log_value, query_value_str, false),
371                "!between" => evaluate_between(log_value, query_value_str, false).map(|b| !b),
372
373                "~=" => Ok(compare_values(log_value, query_value_str, true) == Some(std::cmp::Ordering::Equal)),
374                "!~=" => Ok(compare_values(log_value, query_value_str, true) != Some(std::cmp::Ordering::Equal)),
375                
376                "contains" => {
377                    let query_clean = query_value_str.trim().trim_matches(|c| c == '"' || c == '\'');
378                    match log_value {
379                        Value::String(s) => Ok(s.contains(query_clean)),
380                        _ => Ok(false),
381                    }
382                },
383                "!contains" => {
384                    let query_clean = query_value_str.trim().trim_matches(|c| c == '"' || c == '\'');
385                    match log_value {
386                        Value::String(s) => Ok(!s.contains(query_clean)),
387                        _ => Ok(true),
388                    }
389                },
390
391                "==" | "is" => Ok(compare_values(log_value, query_value_str, false) == Some(std::cmp::Ordering::Equal)),
392                "!=" | "isnot" => Ok(compare_values(log_value, query_value_str, false) != Some(std::cmp::Ordering::Equal)),
393                ">" => Ok(compare_values(log_value, query_value_str, false) == Some(std::cmp::Ordering::Greater)),
394                "<" => Ok(compare_values(log_value, query_value_str, false) == Some(std::cmp::Ordering::Less)),
395                ">=" => Ok(compare_values(log_value, query_value_str, false).map_or(false, |ord| ord != std::cmp::Ordering::Less)),
396                "<=" => Ok(compare_values(log_value, query_value_str, false).map_or(false, |ord| ord != std::cmp::Ordering::Greater)),
397                _ => Ok(false),
398            };
399        } else {
400            // Field DOES NOT EXIST
401            return match op_str {
402                "!=" | "isnot" => Ok(true),
403                _ => Ok(false),
404            };
405        }
406    } else {
407        Err(QueryError::InvalidFormat(condition.to_string()))
408    }
409}
410
411fn compare_values(
412    log_value: &Value,
413    query_value_str_raw: &str,
414    case_insensitive: bool,
415) -> Option<std::cmp::Ordering> {
416    let query_value_clean = query_value_str_raw
417        .trim()
418        .trim_matches(|c| c == '"' || c == '\'');
419
420    if let Some(log_num) = log_value.as_f64() {
421        if let Ok(query_num) = query_value_clean.parse::<f64>() {
422            return log_num.partial_cmp(&query_num);
423        }
424    }
425
426    let log_str_equivalent = match log_value {
427        Value::String(s) => s.clone(),
428        Value::Number(n) => n.to_string(),
429        Value::Bool(b) => b.to_string(),
430        _ => return None,
431    };
432
433    if case_insensitive {
434        Some(
435            log_str_equivalent
436                .to_lowercase()
437                .as_str()
438                .cmp(&query_value_clean.to_lowercase()),
439        )
440    } else {
441        Some(log_str_equivalent.as_str().cmp(query_value_clean))
442    }
443}