Skip to main content

engram/search/
metadata.rs

1//! Advanced metadata query syntax (RML-879)
2//!
3//! Supports MongoDB-style operators for flexible metadata filtering.
4
5use serde::{Deserialize, Serialize};
6use serde_json::Value;
7
8use crate::error::{EngramError, Result};
9
10/// Metadata query with operators
11#[derive(Debug, Clone, Serialize, Deserialize)]
12#[serde(untagged)]
13pub enum MetadataQuery {
14    /// Simple equality: {"key": "value"}
15    Eq(Value),
16    /// Operators: {"key": {"$gt": 5}}
17    Operators(QueryOperators),
18}
19
20/// Query operators
21#[derive(Debug, Clone, Serialize, Deserialize, Default)]
22pub struct QueryOperators {
23    #[serde(rename = "$eq")]
24    pub eq: Option<Value>,
25    #[serde(rename = "$ne")]
26    pub ne: Option<Value>,
27    #[serde(rename = "$gt")]
28    pub gt: Option<Value>,
29    #[serde(rename = "$gte")]
30    pub gte: Option<Value>,
31    #[serde(rename = "$lt")]
32    pub lt: Option<Value>,
33    #[serde(rename = "$lte")]
34    pub lte: Option<Value>,
35    #[serde(rename = "$in")]
36    pub r#in: Option<Vec<Value>>,
37    #[serde(rename = "$nin")]
38    pub nin: Option<Vec<Value>>,
39    #[serde(rename = "$contains")]
40    pub contains: Option<Value>,
41    #[serde(rename = "$exists")]
42    pub exists: Option<bool>,
43    #[serde(rename = "$regex")]
44    pub regex: Option<String>,
45}
46
47/// Parse a metadata filter into SQL WHERE clauses
48pub fn parse_metadata_filter(
49    filter: &serde_json::Map<String, Value>,
50) -> Result<(String, Vec<Box<dyn rusqlite::ToSql + Send>>)> {
51    let mut conditions: Vec<String> = Vec::new();
52    let mut params: Vec<Box<dyn rusqlite::ToSql + Send>> = Vec::new();
53
54    for (key, value) in filter {
55        let json_path = if key.contains('.') {
56            // Nested path: metadata.config.timeout -> $.config.timeout
57            format!("$.{}", key.replace("metadata.", ""))
58        } else {
59            format!("$.{}", key)
60        };
61
62        match value {
63            // Simple equality
64            Value::String(s) => {
65                conditions.push(format!("json_extract(metadata, '{}') = ?", json_path));
66                params.push(Box::new(s.clone()));
67            }
68            Value::Number(n) => {
69                conditions.push(format!("json_extract(metadata, '{}') = ?", json_path));
70                if let Some(i) = n.as_i64() {
71                    params.push(Box::new(i));
72                } else if let Some(f) = n.as_f64() {
73                    params.push(Box::new(f));
74                }
75            }
76            Value::Bool(b) => {
77                conditions.push(format!("json_extract(metadata, '{}') = ?", json_path));
78                params.push(Box::new(*b));
79            }
80            // Operators
81            Value::Object(ops) => {
82                let (op_conditions, op_params) = parse_operators(&json_path, ops)?;
83                conditions.extend(op_conditions);
84                params.extend(op_params);
85            }
86            _ => {
87                return Err(EngramError::InvalidInput(format!(
88                    "Unsupported filter value type for key: {}",
89                    key
90                )));
91            }
92        }
93    }
94
95    let sql = if conditions.is_empty() {
96        "1=1".to_string()
97    } else {
98        conditions.join(" AND ")
99    };
100
101    Ok((sql, params))
102}
103
104/// Parse operator object into SQL conditions
105fn parse_operators(
106    json_path: &str,
107    ops: &serde_json::Map<String, Value>,
108) -> Result<(Vec<String>, Vec<Box<dyn rusqlite::ToSql + Send>>)> {
109    let mut conditions: Vec<String> = Vec::new();
110    let mut params: Vec<Box<dyn rusqlite::ToSql + Send>> = Vec::new();
111
112    for (op, value) in ops {
113        match op.as_str() {
114            "$eq" => {
115                conditions.push(format!("json_extract(metadata, '{}') = ?", json_path));
116                params.push(value_to_param(value)?);
117            }
118            "$ne" => {
119                conditions.push(format!(
120                    "(json_extract(metadata, '{}') IS NULL OR json_extract(metadata, '{}') != ?)",
121                    json_path, json_path
122                ));
123                params.push(value_to_param(value)?);
124            }
125            "$gt" => {
126                conditions.push(format!("json_extract(metadata, '{}') > ?", json_path));
127                params.push(value_to_param(value)?);
128            }
129            "$gte" => {
130                conditions.push(format!("json_extract(metadata, '{}') >= ?", json_path));
131                params.push(value_to_param(value)?);
132            }
133            "$lt" => {
134                conditions.push(format!("json_extract(metadata, '{}') < ?", json_path));
135                params.push(value_to_param(value)?);
136            }
137            "$lte" => {
138                conditions.push(format!("json_extract(metadata, '{}') <= ?", json_path));
139                params.push(value_to_param(value)?);
140            }
141            "$in" => {
142                if let Value::Array(arr) = value {
143                    if arr.is_empty() {
144                        conditions.push("0=1".to_string());
145                    } else {
146                        let placeholders: Vec<&str> = arr.iter().map(|_| "?").collect();
147                        conditions.push(format!(
148                            "json_extract(metadata, '{}') IN ({})",
149                            json_path,
150                            placeholders.join(", ")
151                        ));
152                        for v in arr {
153                            params.push(value_to_param(v)?);
154                        }
155                    }
156                } else {
157                    return Err(EngramError::InvalidInput(
158                        "$in operator requires an array".to_string(),
159                    ));
160                }
161            }
162            "$nin" => {
163                if let Value::Array(arr) = value {
164                    if arr.is_empty() {
165                        conditions.push("1=1".to_string());
166                    } else {
167                        let placeholders: Vec<&str> = arr.iter().map(|_| "?").collect();
168                        conditions.push(format!(
169                            "(json_extract(metadata, '{}') IS NULL OR json_extract(metadata, '{}') NOT IN ({}))",
170                            json_path,
171                            json_path,
172                            placeholders.join(", ")
173                        ));
174                        for v in arr {
175                            params.push(value_to_param(v)?);
176                        }
177                    }
178                } else {
179                    return Err(EngramError::InvalidInput(
180                        "$nin operator requires an array".to_string(),
181                    ));
182                }
183            }
184            "$contains" => {
185                // For array contains or string contains
186                if let Value::String(s) = value {
187                    conditions.push(format!("json_extract(metadata, '{}') LIKE ?", json_path));
188                    params.push(Box::new(format!("%{}%", s)));
189                } else {
190                    return Err(EngramError::InvalidInput(
191                        "$contains operator requires a string".to_string(),
192                    ));
193                }
194            }
195            "$exists" => {
196                if let Value::Bool(exists) = value {
197                    if *exists {
198                        conditions.push(format!(
199                            "json_extract(metadata, '{}') IS NOT NULL",
200                            json_path
201                        ));
202                    } else {
203                        conditions.push(format!("json_extract(metadata, '{}') IS NULL", json_path));
204                    }
205                }
206            }
207            "$regex" => {
208                if let Value::String(pattern) = value {
209                    // SQLite doesn't have native regex, use GLOB or LIKE
210                    // Convert basic regex to GLOB pattern
211                    let glob_pattern = regex_to_glob(pattern);
212                    conditions.push(format!("json_extract(metadata, '{}') GLOB ?", json_path));
213                    params.push(Box::new(glob_pattern));
214                } else {
215                    return Err(EngramError::InvalidInput(
216                        "$regex operator requires a string".to_string(),
217                    ));
218                }
219            }
220            _ => {
221                return Err(EngramError::InvalidInput(format!(
222                    "Unknown operator: {}",
223                    op
224                )));
225            }
226        }
227    }
228
229    Ok((conditions, params))
230}
231
232/// Convert a JSON value to a SQL parameter
233fn value_to_param(value: &Value) -> Result<Box<dyn rusqlite::ToSql + Send>> {
234    match value {
235        Value::String(s) => Ok(Box::new(s.clone())),
236        Value::Number(n) => {
237            if let Some(i) = n.as_i64() {
238                Ok(Box::new(i))
239            } else if let Some(f) = n.as_f64() {
240                Ok(Box::new(f))
241            } else {
242                Err(EngramError::InvalidInput("Invalid number".to_string()))
243            }
244        }
245        Value::Bool(b) => Ok(Box::new(*b)),
246        _ => Err(EngramError::InvalidInput(format!(
247            "Unsupported value type: {:?}",
248            value
249        ))),
250    }
251}
252
253/// Convert basic regex to SQLite GLOB pattern
254fn regex_to_glob(regex: &str) -> String {
255    regex
256        .replace(".*", "*")
257        .replace(".+", "?*")
258        .replace(".", "?")
259        .replace("^", "")
260        .replace("$", "")
261}
262
263/// Build a complete metadata filter query
264pub fn build_metadata_query(
265    base_query: &str,
266    filter: &serde_json::Map<String, Value>,
267) -> Result<(String, Vec<Box<dyn rusqlite::ToSql + Send>>)> {
268    let (filter_sql, params) = parse_metadata_filter(filter)?;
269
270    let sql = if filter_sql == "1=1" {
271        base_query.to_string()
272    } else {
273        format!("{} AND {}", base_query, filter_sql)
274    };
275
276    Ok((sql, params))
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use serde_json::json;
283
284    #[test]
285    fn test_simple_equality() {
286        let filter: serde_json::Map<String, Value> =
287            serde_json::from_value(json!({"status": "active"})).unwrap();
288        let (sql, params) = parse_metadata_filter(&filter).unwrap();
289        assert!(sql.contains("json_extract"));
290        assert_eq!(params.len(), 1);
291    }
292
293    #[test]
294    fn test_comparison_operators() {
295        let filter: serde_json::Map<String, Value> =
296            serde_json::from_value(json!({"count": {"$gt": 5, "$lte": 100}})).unwrap();
297        let (sql, params) = parse_metadata_filter(&filter).unwrap();
298        assert!(sql.contains(">"));
299        assert!(sql.contains("<="));
300        assert_eq!(params.len(), 2);
301    }
302
303    #[test]
304    fn test_in_operator() {
305        let filter: serde_json::Map<String, Value> =
306            serde_json::from_value(json!({"priority": {"$in": ["high", "critical"]}})).unwrap();
307        let (sql, params) = parse_metadata_filter(&filter).unwrap();
308        assert!(sql.contains("IN"));
309        assert_eq!(params.len(), 2);
310    }
311
312    #[test]
313    fn test_empty_in_operator() {
314        let filter: serde_json::Map<String, Value> =
315            serde_json::from_value(json!({"priority": {"$in": []}})).unwrap();
316        let (sql, params) = parse_metadata_filter(&filter).unwrap();
317        assert!(sql.contains("0=1"));
318        assert_eq!(params.len(), 0);
319    }
320
321    #[test]
322    fn test_empty_nin_operator() {
323        let filter: serde_json::Map<String, Value> =
324            serde_json::from_value(json!({"priority": {"$nin": []}})).unwrap();
325        let (sql, params) = parse_metadata_filter(&filter).unwrap();
326        assert!(sql.contains("1=1"));
327        assert_eq!(params.len(), 0);
328    }
329
330    #[test]
331    fn test_exists_operator() {
332        let filter: serde_json::Map<String, Value> =
333            serde_json::from_value(json!({"email": {"$exists": true}})).unwrap();
334        let (sql, _) = parse_metadata_filter(&filter).unwrap();
335        assert!(sql.contains("IS NOT NULL"));
336    }
337
338    #[test]
339    fn test_nested_path() {
340        let filter: serde_json::Map<String, Value> =
341            serde_json::from_value(json!({"config.timeout": 30})).unwrap();
342        let (sql, _) = parse_metadata_filter(&filter).unwrap();
343        assert!(sql.contains("$.config.timeout"));
344    }
345}