Skip to main content

faucet_core/
util.rs

1//! Shared utilities used across faucet source and sink crates.
2
3use std::collections::HashMap;
4
5use crate::FaucetError;
6use jsonpath_rust::JsonPath;
7use serde_json::Value;
8
9// ── SQL Utilities ───────────────────────────────────────────────────────────
10
11/// Quote a SQL identifier to prevent SQL injection.
12///
13/// Wraps the name in double quotes and doubles any embedded double-quotes
14/// per the SQL standard (ANSI SQL).
15///
16/// ```
17/// use faucet_core::util::quote_ident;
18/// assert_eq!(quote_ident("my_table"), "\"my_table\"");
19/// assert_eq!(quote_ident("has\"quote"), "\"has\"\"quote\"");
20/// ```
21pub fn quote_ident(name: &str) -> String {
22    format!("\"{}\"", name.replace('"', "\"\""))
23}
24
25// ── JSONPath Extraction ─────────────────────────────────────────────────────
26
27/// Extract records from a JSON value using an optional JSONPath expression.
28///
29/// - If `path` is `Some`, queries the body with the JSONPath and returns
30///   all matched values.
31/// - If `path` is `None`, returns the body as-is: arrays are unpacked into
32///   individual records, objects/scalars are returned as a single-element vec.
33pub fn extract_records(body: &Value, path: Option<&str>) -> Result<Vec<Value>, FaucetError> {
34    match path {
35        Some(p) => {
36            let results = body
37                .query(p)
38                .map_err(|e| FaucetError::JsonPath(format!("invalid JSONPath '{p}': {e}")))?;
39            Ok(results.into_iter().cloned().collect())
40        }
41        None => match body {
42            Value::Array(arr) => Ok(arr.clone()),
43            other => Ok(vec![other.clone()]),
44        },
45    }
46}
47
48// ── HTTP Response Handling ──────────────────────────────────────────────────
49
50/// Check an HTTP response status and return a [`FaucetError::HttpStatus`] on
51/// non-success responses.
52///
53/// Reads the response body for error context, truncating to `max_body_len`
54/// bytes (default: 2048) to avoid large error messages.
55pub async fn check_http_response(
56    resp: reqwest::Response,
57    max_body_len: usize,
58) -> Result<reqwest::Response, FaucetError> {
59    if resp.status().is_success() {
60        return Ok(resp);
61    }
62
63    let status = resp.status().as_u16();
64    let url = resp.url().to_string();
65    let body_text = resp.text().await.unwrap_or_default();
66
67    let body = if body_text.len() > max_body_len {
68        let end = body_text.floor_char_boundary(max_body_len);
69        format!("{}...(truncated)", &body_text[..end])
70    } else {
71        body_text
72    };
73
74    Err(FaucetError::HttpStatus { status, url, body })
75}
76
77/// Default maximum body length for error responses.
78pub const DEFAULT_ERROR_BODY_MAX_LEN: usize = 2048;
79
80// ── Context Utilities ──────────────────────────────────────────────────────
81
82/// Substitute `{key}` placeholders in a template string with values from context.
83///
84/// Value conversion rules:
85/// - `String` -> raw string (no quotes)
86/// - `Number` -> number as string
87/// - `Bool` -> `"true"` / `"false"`
88/// - `Null` -> `"null"`
89/// - `Array` / `Object` -> JSON-serialized string
90///
91/// Unmatched placeholders are left as-is.
92///
93/// **Warning:** Do NOT use this for SQL queries (SQL injection risk) or for
94/// substitution into serialized JSON (corruption risk with special characters).
95/// Use [`substitute_context_bind_params`] for SQL and [`substitute_context_json`]
96/// for serialized JSON.
97pub fn substitute_context(template: &str, context: &HashMap<String, Value>) -> String {
98    substitute_single_pass(template, context, |value| match value {
99        Value::String(s) => s.clone(),
100        Value::Number(n) => n.to_string(),
101        Value::Bool(b) => b.to_string(),
102        Value::Null => "null".to_string(),
103        other => other.to_string(),
104    })
105}
106
107/// Single left-to-right scan that replaces each recognised `{key}` placeholder
108/// with `render(value)`. Unmatched placeholders are left verbatim; replacement
109/// text is never re-scanned. Shared by [`substitute_context`] and
110/// [`substitute_context_json`] so neither is O(template × context) (#78/#36).
111fn substitute_single_pass(
112    template: &str,
113    context: &HashMap<String, Value>,
114    render: impl Fn(&Value) -> String,
115) -> String {
116    if context.is_empty() {
117        return template.to_string();
118    }
119    let mut result = String::with_capacity(template.len());
120    let mut last_copied = 0;
121    let mut search_from = 0;
122
123    while search_from < template.len() {
124        let Some(open_offset) = template[search_from..].find('{') else {
125            break;
126        };
127        let open = search_from + open_offset;
128        let Some(close_offset) = template[open + 1..].find('}') else {
129            break;
130        };
131        let close = open + 1 + close_offset;
132        let key = &template[open + 1..close];
133
134        if let Some(value) = context.get(key) {
135            result.push_str(&template[last_copied..open]);
136            result.push_str(&render(value));
137            last_copied = close + 1;
138            search_from = close + 1;
139        } else {
140            search_from = open + 1;
141        }
142    }
143
144    result.push_str(&template[last_copied..]);
145    result
146}
147
148/// Replace `{key}` placeholders with SQL bind-parameter markers, returning
149/// the rewritten query and an ordered list of values to bind.
150///
151/// Scans the template left-to-right; each recognised placeholder is replaced
152/// with the marker produced by `marker_fn(index)`, and the corresponding
153/// value is appended to the returned vector.  The same key appearing multiple
154/// times produces one bind value per occurrence.
155///
156/// `start_index` is the 1-based index for the first parameter.
157///
158/// # Marker functions
159///
160/// - PostgreSQL: `|i| format!("${i}")`
161/// - MySQL / SQLite: `|_| "?".to_string()`
162///
163/// Placeholders whose key is not present in `context` are left unchanged.
164pub fn substitute_context_bind_params(
165    template: &str,
166    context: &HashMap<String, Value>,
167    start_index: usize,
168    marker_fn: impl Fn(usize) -> String,
169) -> (String, Vec<Value>) {
170    if context.is_empty() {
171        return (template.to_string(), Vec::new());
172    }
173
174    let mut result = String::with_capacity(template.len());
175    let mut values = Vec::new();
176    let mut param_idx = start_index;
177    let mut last_copied = 0;
178    let mut search_from = 0;
179
180    while search_from < template.len() {
181        let Some(open_offset) = template[search_from..].find('{') else {
182            break;
183        };
184        let open = search_from + open_offset;
185
186        let Some(close_offset) = template[open + 1..].find('}') else {
187            break;
188        };
189        let close = open + 1 + close_offset;
190        let key = &template[open + 1..close];
191
192        if let Some(value) = context.get(key) {
193            result.push_str(&template[last_copied..open]);
194            result.push_str(&marker_fn(param_idx));
195            values.push(value.clone());
196            param_idx += 1;
197            last_copied = close + 1;
198            search_from = close + 1;
199        } else {
200            search_from = open + 1;
201        }
202    }
203
204    result.push_str(&template[last_copied..]);
205    (result, values)
206}
207
208/// Substitute `{key}` placeholders within a serialized JSON string, escaping
209/// string values so that the result remains valid JSON.
210///
211/// Use this instead of [`substitute_context`] when the template is a
212/// `serde_json`-serialized value that will be deserialized back after
213/// substitution.  String values are JSON-escaped (double-quotes, backslashes,
214/// and control characters).  Numbers, bools, and null are substituted as-is.
215pub fn substitute_context_json(template: &str, context: &HashMap<String, Value>) -> String {
216    substitute_single_pass(template, context, |value| match value {
217        Value::String(s) => json_escape_string(s),
218        Value::Number(n) => n.to_string(),
219        Value::Bool(b) => b.to_string(),
220        Value::Null => "null".to_string(),
221        other => other.to_string(),
222    })
223}
224
225/// Escape a string for safe embedding inside a JSON string value.
226///
227/// Handles double-quotes, backslashes, and control characters per RFC 8259.
228fn json_escape_string(s: &str) -> String {
229    let mut escaped = String::with_capacity(s.len());
230    for c in s.chars() {
231        match c {
232            '"' => escaped.push_str("\\\""),
233            '\\' => escaped.push_str("\\\\"),
234            '\n' => escaped.push_str("\\n"),
235            '\r' => escaped.push_str("\\r"),
236            '\t' => escaped.push_str("\\t"),
237            c if c.is_control() => {
238                escaped.push_str(&format!("\\u{:04x}", c as u32));
239            }
240            c => escaped.push(c),
241        }
242    }
243    escaped
244}
245
246/// Extract context values from a record using JSONPath expressions.
247///
248/// Each entry in `mapping` is `context_key -> json_path`. The function queries
249/// the record with each JSONPath and stores the first matched value under the
250/// corresponding context key.
251///
252/// Returns an error if any JSONPath matches nothing.
253pub fn extract_context(
254    record: &Value,
255    mapping: &HashMap<String, String>,
256) -> Result<HashMap<String, Value>, FaucetError> {
257    let mut context = HashMap::with_capacity(mapping.len());
258    for (context_key, json_path) in mapping {
259        let results = record
260            .query(json_path.as_str())
261            .map_err(|e| FaucetError::JsonPath(format!("invalid JSONPath '{json_path}': {e}")))?;
262        let value = results.first().ok_or_else(|| {
263            FaucetError::JsonPath(format!(
264                "JSONPath '{json_path}' matched nothing in record for context key '{context_key}'"
265            ))
266        })?;
267        context.insert(context_key.clone(), (*value).clone());
268    }
269    Ok(context)
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use serde_json::json;
276
277    // ── quote_ident ─────────────────────────────────────────────────────
278
279    #[test]
280    fn quote_ident_simple() {
281        assert_eq!(quote_ident("my_table"), "\"my_table\"");
282    }
283
284    #[test]
285    fn quote_ident_with_embedded_quotes() {
286        assert_eq!(quote_ident("has\"quote"), "\"has\"\"quote\"");
287    }
288
289    #[test]
290    fn quote_ident_empty() {
291        assert_eq!(quote_ident(""), "\"\"");
292    }
293
294    #[test]
295    fn quote_ident_special_chars() {
296        assert_eq!(quote_ident("table; DROP"), "\"table; DROP\"");
297    }
298
299    // ── extract_records ─────────────────────────────────────────────────
300
301    #[test]
302    fn extract_with_path() {
303        let body = json!({"data": [{"id": 1}, {"id": 2}]});
304        let records = extract_records(&body, Some("$.data[*]")).unwrap();
305        assert_eq!(records.len(), 2);
306        assert_eq!(records[0]["id"], 1);
307    }
308
309    #[test]
310    fn extract_without_path_array() {
311        let body = json!([{"id": 1}, {"id": 2}]);
312        let records = extract_records(&body, None).unwrap();
313        assert_eq!(records.len(), 2);
314    }
315
316    #[test]
317    fn extract_without_path_object() {
318        let body = json!({"id": 1});
319        let records = extract_records(&body, None).unwrap();
320        assert_eq!(records.len(), 1);
321    }
322
323    #[test]
324    fn extract_empty_result() {
325        let body = json!({"data": []});
326        let records = extract_records(&body, Some("$.data[*]")).unwrap();
327        assert!(records.is_empty());
328    }
329
330    #[test]
331    fn extract_invalid_path_returns_error() {
332        let body = json!({"data": 1});
333        // jsonpath-rust handles most paths gracefully; test error propagation.
334        let result = extract_records(&body, Some("$.data[*]"));
335        // This should succeed (empty match) or fail; either is fine as long as
336        // it doesn't panic.
337        let _ = result;
338    }
339
340    // ── substitute_context ──────────────────────────────────────────────
341
342    #[test]
343    fn substitute_context_string_values() {
344        let mut ctx = HashMap::new();
345        ctx.insert("org".to_string(), json!("acme"));
346        ctx.insert("repo".to_string(), json!("widgets"));
347        let result = substitute_context("/orgs/{org}/repos/{repo}", &ctx);
348        assert_eq!(result, "/orgs/acme/repos/widgets");
349    }
350
351    #[test]
352    fn substitute_context_number_value() {
353        let mut ctx = HashMap::new();
354        ctx.insert("id".to_string(), json!(42));
355        let result = substitute_context("/items/{id}", &ctx);
356        assert_eq!(result, "/items/42");
357    }
358
359    #[test]
360    fn substitute_context_bool_value() {
361        let mut ctx = HashMap::new();
362        ctx.insert("active".to_string(), json!(true));
363        let result = substitute_context("/filter?active={active}", &ctx);
364        assert_eq!(result, "/filter?active=true");
365    }
366
367    #[test]
368    fn substitute_context_null_value() {
369        let mut ctx = HashMap::new();
370        ctx.insert("val".to_string(), json!(null));
371        let result = substitute_context("/x/{val}", &ctx);
372        assert_eq!(result, "/x/null");
373    }
374
375    #[test]
376    fn substitute_context_array_value() {
377        let mut ctx = HashMap::new();
378        ctx.insert("ids".to_string(), json!([1, 2, 3]));
379        let result = substitute_context("/x/{ids}", &ctx);
380        assert_eq!(result, "/x/[1,2,3]");
381    }
382
383    #[test]
384    fn substitute_context_unmatched_placeholder_left_as_is() {
385        let ctx = HashMap::new();
386        let result = substitute_context("/orgs/{org}/repos", &ctx);
387        assert_eq!(result, "/orgs/{org}/repos");
388    }
389
390    #[test]
391    fn substitute_context_empty_template() {
392        let ctx = HashMap::new();
393        let result = substitute_context("", &ctx);
394        assert_eq!(result, "");
395    }
396
397    #[test]
398    fn substitute_context_replaces_all_occurrences() {
399        let mut ctx = HashMap::new();
400        ctx.insert("id".to_string(), Value::String("42".to_string()));
401        let result = substitute_context("/a/{id}/b/{id}", &ctx);
402        assert_eq!(result, "/a/42/b/42");
403    }
404
405    #[test]
406    fn substitute_context_does_not_rescan_replacement() {
407        // Single-pass: a replacement value that itself looks like a placeholder
408        // is emitted verbatim, never re-substituted (#78/#36).
409        let mut ctx = HashMap::new();
410        ctx.insert("a".to_string(), Value::String("{b}".to_string()));
411        ctx.insert("b".to_string(), Value::String("SECRET".to_string()));
412        let result = substitute_context("{a}", &ctx);
413        assert_eq!(result, "{b}");
414    }
415
416    // ── extract_context ─────────────────────────────────────────────────
417
418    #[test]
419    fn extract_context_simple_paths() {
420        let record = json!({"id": 1, "name": "alice"});
421        let mut mapping = HashMap::new();
422        mapping.insert("user_id".to_string(), "$.id".to_string());
423        mapping.insert("user_name".to_string(), "$.name".to_string());
424        let ctx = extract_context(&record, &mapping).unwrap();
425        assert_eq!(ctx["user_id"], json!(1));
426        assert_eq!(ctx["user_name"], json!("alice"));
427    }
428
429    #[test]
430    fn extract_context_nested_path() {
431        let record = json!({"data": {"info": {"id": 99}}});
432        let mut mapping = HashMap::new();
433        mapping.insert("deep_id".to_string(), "$.data.info.id".to_string());
434        let ctx = extract_context(&record, &mapping).unwrap();
435        assert_eq!(ctx["deep_id"], json!(99));
436    }
437
438    #[test]
439    fn extract_context_missing_path_returns_error() {
440        let record = json!({"id": 1});
441        let mut mapping = HashMap::new();
442        mapping.insert("missing".to_string(), "$.nonexistent".to_string());
443        let result = extract_context(&record, &mapping);
444        assert!(result.is_err());
445    }
446
447    #[test]
448    fn extract_context_empty_mapping() {
449        let record = json!({"id": 1});
450        let mapping = HashMap::new();
451        let ctx = extract_context(&record, &mapping).unwrap();
452        assert!(ctx.is_empty());
453    }
454
455    // ── substitute_context_bind_params ──────────────────────────────────
456
457    #[test]
458    fn bind_params_postgres_style() {
459        let mut ctx = HashMap::new();
460        ctx.insert("org".to_string(), json!("acme"));
461        ctx.insert("id".to_string(), json!(42));
462        let (query, values) = substitute_context_bind_params(
463            "SELECT * FROM t WHERE org = {org} AND id = {id}",
464            &ctx,
465            1,
466            |i| format!("${i}"),
467        );
468        assert_eq!(query, "SELECT * FROM t WHERE org = $1 AND id = $2");
469        assert_eq!(values.len(), 2);
470        assert_eq!(values[0], json!("acme"));
471        assert_eq!(values[1], json!(42));
472    }
473
474    #[test]
475    fn bind_params_question_mark_style() {
476        let mut ctx = HashMap::new();
477        ctx.insert("name".to_string(), json!("test"));
478        let (query, values) =
479            substitute_context_bind_params("SELECT * FROM t WHERE name = {name}", &ctx, 1, |_| {
480                "?".to_string()
481            });
482        assert_eq!(query, "SELECT * FROM t WHERE name = ?");
483        assert_eq!(values, vec![json!("test")]);
484    }
485
486    #[test]
487    fn bind_params_duplicate_key_produces_multiple_binds() {
488        let mut ctx = HashMap::new();
489        ctx.insert("id".to_string(), json!(5));
490        let (query, values) = substitute_context_bind_params(
491            "SELECT * FROM t WHERE a = {id} OR b = {id}",
492            &ctx,
493            3,
494            |i| format!("${i}"),
495        );
496        assert_eq!(query, "SELECT * FROM t WHERE a = $3 OR b = $4");
497        assert_eq!(values, vec![json!(5), json!(5)]);
498    }
499
500    #[test]
501    fn bind_params_unknown_key_left_as_is() {
502        let ctx = HashMap::new();
503        let (query, values) =
504            substitute_context_bind_params("SELECT * FROM t WHERE x = {unknown}", &ctx, 1, |i| {
505                format!("${i}")
506            });
507        assert_eq!(query, "SELECT * FROM t WHERE x = {unknown}");
508        assert!(values.is_empty());
509    }
510
511    #[test]
512    fn bind_params_mixed_known_and_unknown() {
513        let mut ctx = HashMap::new();
514        ctx.insert("id".to_string(), json!(1));
515        let (query, values) = substitute_context_bind_params(
516            "SELECT * FROM t WHERE id = {id} AND x = {unknown}",
517            &ctx,
518            1,
519            |i| format!("${i}"),
520        );
521        assert_eq!(query, "SELECT * FROM t WHERE id = $1 AND x = {unknown}");
522        assert_eq!(values, vec![json!(1)]);
523    }
524
525    #[test]
526    fn bind_params_empty_context() {
527        let ctx = HashMap::new();
528        let (query, values) =
529            substitute_context_bind_params("SELECT 1", &ctx, 1, |i| format!("${i}"));
530        assert_eq!(query, "SELECT 1");
531        assert!(values.is_empty());
532    }
533
534    #[test]
535    fn bind_params_start_index_offset() {
536        let mut ctx = HashMap::new();
537        ctx.insert("name".to_string(), json!("x"));
538        let (query, values) =
539            substitute_context_bind_params("SELECT * FROM t WHERE name = {name}", &ctx, 5, |i| {
540                format!("${i}")
541            });
542        assert_eq!(query, "SELECT * FROM t WHERE name = $5");
543        assert_eq!(values, vec![json!("x")]);
544    }
545
546    // ── substitute_context_json ─────────────────────────────────────────
547
548    #[test]
549    fn json_sub_escapes_double_quotes() {
550        let mut ctx = HashMap::new();
551        ctx.insert("name".to_string(), json!(r#"O'Brien "Bob""#));
552        let template = r#"{"name":"{name}"}"#;
553        let result = substitute_context_json(template, &ctx);
554        let parsed: Value = serde_json::from_str(&result).unwrap();
555        assert_eq!(parsed["name"], r#"O'Brien "Bob""#);
556    }
557
558    #[test]
559    fn json_sub_escapes_backslashes() {
560        let mut ctx = HashMap::new();
561        ctx.insert("path".to_string(), json!("C:\\Users\\test"));
562        let template = r#"{"path":"{path}"}"#;
563        let result = substitute_context_json(template, &ctx);
564        let parsed: Value = serde_json::from_str(&result).unwrap();
565        assert_eq!(parsed["path"], "C:\\Users\\test");
566    }
567
568    #[test]
569    fn json_sub_escapes_control_chars() {
570        let mut ctx = HashMap::new();
571        ctx.insert("text".to_string(), json!("line1\nline2\ttab"));
572        let template = r#"{"text":"{text}"}"#;
573        let result = substitute_context_json(template, &ctx);
574        let parsed: Value = serde_json::from_str(&result).unwrap();
575        assert_eq!(parsed["text"], "line1\nline2\ttab");
576    }
577
578    #[test]
579    fn json_sub_number_value() {
580        let mut ctx = HashMap::new();
581        ctx.insert("id".to_string(), json!(42));
582        let template = r#"{"user_id":"{id}"}"#;
583        let result = substitute_context_json(template, &ctx);
584        let parsed: Value = serde_json::from_str(&result).unwrap();
585        assert_eq!(parsed["user_id"], "42");
586    }
587
588    #[test]
589    fn json_sub_preserves_valid_json_without_special_chars() {
590        let mut ctx = HashMap::new();
591        ctx.insert("name".to_string(), json!("alice"));
592        let template = r#"{"filter":{"name":"{name}"}}"#;
593        let result = substitute_context_json(template, &ctx);
594        let parsed: Value = serde_json::from_str(&result).unwrap();
595        assert_eq!(parsed["filter"]["name"], "alice");
596    }
597
598    // ── json_escape_string ──────────────────────────────────────────────
599
600    #[test]
601    fn json_escape_plain_string() {
602        assert_eq!(json_escape_string("hello"), "hello");
603    }
604
605    #[test]
606    fn json_escape_quotes_and_backslashes() {
607        assert_eq!(json_escape_string(r#"a"b\c"#), r#"a\"b\\c"#);
608    }
609
610    #[test]
611    fn json_escape_newlines_and_tabs() {
612        assert_eq!(json_escape_string("a\nb\tc"), "a\\nb\\tc");
613    }
614}