Skip to main content

nodedb_sql/
dsl_bind.rs

1//! Lexer-aware parameter binding for DSL statements.
2//!
3//! # The architectural problem
4//!
5//! NodeDB's DSL statements — `UPSERT INTO`, `SEARCH`, `GRAPH`, `MATCH`,
6//! `OPTIONAL MATCH`, `CRDT MERGE`, `CREATE VECTOR INDEX`,
7//! `CREATE FULLTEXT INDEX`, `CREATE SEARCH INDEX`, `CREATE SPARSE INDEX`
8//! — are dispatched from raw SQL text because they aren't part of
9//! sqlparser's grammar. The planned-SQL path binds parameters on the
10//! parsed AST, so nothing touches the DSL text: `$N` survives into the
11//! dispatcher and reaches the engine as a literal string.
12//!
13//! The observed symptom was `cannot parse '$2' as INT` from the binary-
14//! tuple encoder. That's a symptom of a class: any DSL reached through
15//! the pgwire extended-query path silently skips parameter binding.
16//!
17//! # The fix
18//!
19//! A single chokepoint — `bind_dsl` — that every DSL-bound prepared
20//! statement must go through before execution. It tokenizes the DSL
21//! SQL with sqlparser's own tokenizer, rewrites `Token::Placeholder`
22//! occurrences to concrete literal tokens, and re-serializes. This
23//! respects string boundaries, quoted identifiers, and comments for
24//! free — sqlparser's lexer already classified them.
25//!
26//! The `BoundDslSql` newtype exists so the DSL execute path takes
27//! `BoundDslSql`, not `&str`. The compiler refuses to execute a DSL
28//! statement whose parameters haven't been bound. That is the
29//! architectural enforcement — the mechanism that made this class of
30//! bug possible (a `&str` flowing straight to the dispatcher) is
31//! structurally gone.
32
33use sqlparser::dialect::PostgreSqlDialect;
34use sqlparser::tokenizer::{Token, Tokenizer};
35
36use crate::error::{Result, SqlError};
37use crate::params::ParamValue;
38
39/// SQL text whose prepared-statement `$N` placeholders have been
40/// substituted with concrete literals.
41///
42/// The DSL dispatcher's prepared-statement entry points take
43/// `BoundDslSql`, not `&str`. This makes it impossible for a caller
44/// to forget parameter binding on a DSL path — the types enforce it.
45#[derive(Debug, Clone)]
46pub struct BoundDslSql(String);
47
48impl BoundDslSql {
49    /// Construct from SQL known not to need binding (simple-query path).
50    ///
51    /// The simple-query protocol does not carry prepared parameters, so
52    /// passing the raw text is correct at that layer. This constructor
53    /// is the only way to bypass `bind_dsl` and is named loudly so that
54    /// any future use is obvious in review.
55    pub fn from_simple_query(sql: String) -> Self {
56        Self(sql)
57    }
58
59    pub fn as_str(&self) -> &str {
60        &self.0
61    }
62
63    pub fn into_string(self) -> String {
64        self.0
65    }
66}
67
68/// Substitute `$N` placeholders in DSL SQL text with concrete literals.
69///
70/// Uses sqlparser's own tokenizer so that string literals, quoted
71/// identifiers, comments, and dollar-quoted strings are never
72/// accidentally rewritten — the tokenizer has already classified them.
73pub fn bind_dsl(sql: &str, params: &[ParamValue]) -> Result<BoundDslSql> {
74    if params.is_empty() {
75        return Ok(BoundDslSql(sql.to_owned()));
76    }
77    let dialect = PostgreSqlDialect {};
78    let tokens = Tokenizer::new(&dialect, sql)
79        .tokenize()
80        .map_err(|e| SqlError::Parse {
81            detail: format!("tokenize DSL for parameter binding: {e}"),
82        })?;
83
84    let mut out = String::with_capacity(sql.len());
85    for tok in &tokens {
86        if let Token::Placeholder(p) = tok
87            && p.starts_with('$')
88        {
89            // Every `$N` must resolve to a provided parameter. A
90            // silent pass-through here would let an out-of-range
91            // placeholder reach the engine as a raw `$N` literal —
92            // exactly the bug class this module exists to close.
93            let replacement =
94                placeholder_literal_token(p, params).ok_or_else(|| SqlError::Parse {
95                    detail: format!(
96                        "DSL parameter bind: placeholder {p} has no corresponding \
97                         parameter ({len} provided)",
98                        len = params.len()
99                    ),
100                })?;
101            out.push_str(&replacement.to_string());
102            continue;
103        }
104        out.push_str(&tok.to_string());
105    }
106    Ok(BoundDslSql(out))
107}
108
109fn placeholder_literal_token(placeholder: &str, params: &[ParamValue]) -> Option<Token> {
110    let idx_str = placeholder.strip_prefix('$')?;
111    let idx: usize = idx_str.parse().ok()?;
112    let param = params.get(idx.checked_sub(1)?)?;
113    Some(match param {
114        ParamValue::Null => Token::make_keyword("NULL"),
115        ParamValue::Bool(true) => Token::make_keyword("TRUE"),
116        ParamValue::Bool(false) => Token::make_keyword("FALSE"),
117        ParamValue::Int64(n) => Token::Number(n.to_string(), false),
118        ParamValue::Float64(f) => Token::Number(f.to_string(), false),
119        ParamValue::Text(s) => Token::SingleQuotedString(s.clone()),
120    })
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn upsert_int_and_text_params() {
129        let bound = bind_dsl(
130            "UPSERT INTO t (id, n) VALUES ($1, $2)",
131            &[ParamValue::Text("alice".into()), ParamValue::Int64(42)],
132        )
133        .unwrap();
134        assert!(
135            bound.as_str().contains("'alice'"),
136            "text param not substituted: {}",
137            bound.as_str()
138        );
139        assert!(
140            bound.as_str().contains("42"),
141            "int param not substituted: {}",
142            bound.as_str()
143        );
144        assert!(
145            !bound.as_str().contains('$'),
146            "placeholder survived: {}",
147            bound.as_str()
148        );
149    }
150
151    #[test]
152    fn search_top_k_param() {
153        let bound = bind_dsl(
154            "SEARCH v USING VECTOR(ARRAY[1.0, 0.0, 0.0], $1)",
155            &[ParamValue::Int64(5)],
156        )
157        .unwrap();
158        assert!(bound.as_str().contains(", 5)"), "got: {}", bound.as_str());
159    }
160
161    /// String literals that happen to contain `$1` must not be touched
162    /// — the tokenizer has already classified the `$1` inside quotes
163    /// as part of `Token::SingleQuotedString`, not as a placeholder.
164    #[test]
165    fn placeholder_inside_string_literal_untouched() {
166        let bound = bind_dsl(
167            "UPSERT INTO t (id, note) VALUES ($1, 'your $1 change')",
168            &[ParamValue::Text("abc".into())],
169        )
170        .unwrap();
171        assert!(
172            bound.as_str().contains("'your $1 change'"),
173            "string literal was rewritten: {}",
174            bound.as_str()
175        );
176        assert!(
177            bound.as_str().contains("'abc'"),
178            "real placeholder not bound: {}",
179            bound.as_str()
180        );
181    }
182
183    #[test]
184    fn null_param() {
185        let bound = bind_dsl(
186            "UPSERT INTO t (id, n) VALUES ($1, $2)",
187            &[ParamValue::Text("x".into()), ParamValue::Null],
188        )
189        .unwrap();
190        let s = bound.as_str();
191        assert!(s.to_uppercase().contains("NULL"), "got: {s}");
192    }
193
194    #[test]
195    fn out_of_range_placeholder_errors() {
196        let err = bind_dsl(
197            "UPSERT INTO t (id, n) VALUES ($1, $2)",
198            &[ParamValue::Text("only-one".into())],
199        )
200        .unwrap_err();
201        let msg = format!("{err:?}");
202        assert!(
203            msg.contains("$2") && msg.to_lowercase().contains("placeholder"),
204            "error must name the unresolved placeholder: {msg}"
205        );
206    }
207
208    #[test]
209    fn zero_placeholder_errors() {
210        // `$0` is not a valid pgwire parameter reference — must error,
211        // not silently pass through to the engine.
212        let err = bind_dsl(
213            "UPSERT INTO t (id) VALUES ($0)",
214            &[ParamValue::Text("x".into())],
215        )
216        .unwrap_err();
217        assert!(format!("{err:?}").contains("$0"));
218    }
219
220    #[test]
221    fn empty_params_is_noop() {
222        let sql = "UPSERT INTO t (id) VALUES ('a')";
223        let bound = bind_dsl(sql, &[]).unwrap();
224        assert_eq!(bound.as_str(), sql);
225    }
226}