Skip to main content

pmcp_server_toolkit/sql/
translate.rs

1//! Dialect-aware placeholder translation (CONN-03).
2//!
3//! Translates canonical `:name` named placeholders in a SQL string into the
4//! positional form each backend dialect expects, while preserving the binding
5//! order so the per-backend `execute()` impl can build a positional argument
6//! list from the caller's `&[(String, serde_json::Value)]`.
7//!
8//! The translation walks the SQL char-by-char with a small state machine
9//! ([`SqlWalker`]) that tracks string literals, line/block comments, and the
10//! placeholder substate so a `:name` inside `'...'`, `"..."`, `-- ...`, or
11//! `/* ... */` is NEVER rewritten. Each helper stays under PMAT cog 25 via the
12//! split-helper form (PATTERNS Pattern G) — no cognitive-complexity allow
13//! attribute is needed anywhere in this module.
14//!
15//! **Placeholder-recognition rule (REVIEWS H7):** in `Normal` state a `:` only
16//! begins a placeholder if the NEXT char is `[A-Za-z_]`. A `::` is a Postgres
17//! cast (consumed verbatim, the following type identifier is swallowed by the
18//! transitional `CastTypeName` state), a `:=` is a MySQL session-var assignment,
19//! and `:1bad` is malformed — all three emit the bare `:` verbatim and stay in
20//! `Normal`. This eliminates the `::text` mis-translation regression class.
21//!
22//! Public surface lives at `pmcp_server_toolkit::sql::translate_placeholders`
23//! (D-05): a free helper, NOT a trait method — every connector calls it the
24//! same way, so putting it on the trait would invite per-backend drift.
25
26// Why: dialect display names are proper nouns that clippy::doc_markdown
27// otherwise flags as needing back-ticks.
28#![allow(clippy::doc_markdown)]
29
30use super::Dialect;
31use std::fmt::Write as _;
32use std::iter::Peekable;
33use std::str::Chars;
34
35/// Result of translating canonical `:name` placeholders into a dialect's
36/// positional form, plus the binding order needed to bind values positionally.
37///
38/// Per-backend `execute()` impls destructure this and iterate `ordered_params`
39/// to bind driver-native positional parameters from the caller's
40/// `&[(String, serde_json::Value)]` named pairs.
41///
42/// # Example
43///
44/// ```
45/// use pmcp_server_toolkit::sql::{translate_placeholders, Dialect, TranslatedSql};
46///
47/// let translated: TranslatedSql =
48///     translate_placeholders("SELECT :id FROM t", Dialect::Postgres);
49/// assert_eq!(translated.sql, "SELECT $1 FROM t");
50/// assert_eq!(translated.ordered_params, vec!["id".to_string()]);
51/// ```
52#[derive(Debug, Clone, PartialEq, Eq)]
53pub struct TranslatedSql {
54    /// The SQL string with placeholders rewritten into the target dialect's
55    /// positional form (`$1`/`$2` for Postgres, `?` for MySQL/Athena, `:name`
56    /// kept for SQLite).
57    pub sql: String,
58    /// Placeholder names in positional binding order. The Nth entry names the
59    /// value that the Nth positional parameter should bind.
60    pub ordered_params: Vec<String>,
61}
62
63/// Translate canonical `:name` placeholders in `sql` into `dialect`'s
64/// positional form, returning the rewritten SQL plus the binding order.
65///
66/// Placeholders inside string literals (`'...'`, `"..."`), line comments
67/// (`-- ...`), and block comments (`/* ... */`, nested) are left verbatim. A
68/// `::text` Postgres cast, a `:=` MySQL session-var, and a malformed `:1bad`
69/// are all emitted verbatim per REVIEWS H7.
70///
71/// # Example
72///
73/// ```
74/// use pmcp_server_toolkit::sql::{translate_placeholders, Dialect};
75///
76/// // Repeated names get a fresh positional index per appearance.
77/// let t = translate_placeholders("WHERE a = :a AND b = :b AND c = :a", Dialect::Postgres);
78/// assert_eq!(t.sql, "WHERE a = $1 AND b = $2 AND c = $3");
79/// assert_eq!(t.ordered_params, vec!["a", "b", "a"]);
80///
81/// // SQLite keeps the SQL byte-identical but still records bind order.
82/// let s = translate_placeholders("SELECT :id FROM t", Dialect::Sqlite);
83/// assert_eq!(s.sql, "SELECT :id FROM t");
84/// assert_eq!(s.ordered_params, vec!["id"]);
85/// ```
86#[must_use]
87pub fn translate_placeholders(sql: &str, dialect: Dialect) -> TranslatedSql {
88    let mut walker = SqlWalker::new(sql, dialect);
89    walker.run();
90    walker.into_translated()
91}
92
93/// State of the [`SqlWalker`] char-by-char scan.
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95enum State {
96    /// Default scanning state — placeholders are recognized here.
97    Normal,
98    /// Inside a `'...'` or `"..."` literal (the quote char is carried).
99    StringLiteral(char),
100    /// Inside a `-- ...` line comment (ends at `\n`).
101    LineComment,
102    /// Inside a `/* ... */` block comment; the `usize` tracks nesting depth.
103    BlockComment(usize),
104    /// Reading a placeholder identifier into `pending_name`.
105    Placeholder,
106    /// Transitional state after `::` — swallows the following type identifier
107    /// (e.g. `text` in `::text`) so no placeholder lookup happens mid-cast.
108    CastTypeName,
109}
110
111/// Char-by-char SQL scanner that rewrites `:name` placeholders into a dialect's
112/// positional form while skipping literals and comments.
113struct SqlWalker<'a> {
114    chars: Peekable<Chars<'a>>,
115    state: State,
116    out: String,
117    order: Vec<String>,
118    pg_index: usize,
119    dialect: Dialect,
120    pending_name: String,
121}
122
123impl<'a> SqlWalker<'a> {
124    /// Initialize the walker over `sql` for `dialect`.
125    fn new(sql: &'a str, dialect: Dialect) -> Self {
126        Self {
127            chars: sql.chars().peekable(),
128            state: State::Normal,
129            out: String::with_capacity(sql.len()),
130            order: Vec::new(),
131            pg_index: 0,
132            dialect,
133            pending_name: String::new(),
134        }
135    }
136
137    /// Drive the scan to completion, dispatching each char by current state.
138    fn run(&mut self) {
139        while let Some(c) = self.chars.next() {
140            match self.state {
141                State::Normal => self.handle_normal(c),
142                State::StringLiteral(q) => self.handle_string(c, q),
143                State::LineComment => self.handle_line_comment(c),
144                State::BlockComment(depth) => self.handle_block_comment(c, depth),
145                State::Placeholder => self.handle_placeholder(c),
146                State::CastTypeName => self.handle_cast_type_name(c),
147            }
148        }
149        // EOF inside a placeholder: emit whatever was accumulated.
150        if self.state == State::Placeholder {
151            self.emit_placeholder_from_pending();
152        }
153    }
154
155    /// Handle a char in `Normal` state. Delegates the `:`-precedence logic to
156    /// [`Self::dispatch_colon`] (REVIEWS H7).
157    fn handle_normal(&mut self, c: char) {
158        match c {
159            '\'' | '"' => {
160                self.out.push(c);
161                self.state = State::StringLiteral(c);
162            },
163            '-' if self.chars.peek() == Some(&'-') => {
164                self.out.push(c);
165                self.out.push('-');
166                self.chars.next();
167                self.state = State::LineComment;
168            },
169            '/' if self.chars.peek() == Some(&'*') => {
170                self.out.push(c);
171                self.out.push('*');
172                self.chars.next();
173                self.state = State::BlockComment(1);
174            },
175            ':' => self.dispatch_colon(),
176            _ => self.out.push(c),
177        }
178    }
179
180    /// REVIEWS H7 colon-precedence: decide whether a `:` begins a placeholder,
181    /// a `::` cast, or is a verbatim character. Assumes the `:` was just
182    /// consumed by `run()` and `self.state == Normal`.
183    fn dispatch_colon(&mut self) {
184        match self.chars.peek().copied() {
185            Some(':') => {
186                // `::` cast prefix — emit both colons verbatim and swallow the
187                // following type identifier so no placeholder lookup occurs.
188                self.out.push(':');
189                self.out.push(':');
190                self.chars.next();
191                self.state = State::CastTypeName;
192            },
193            Some(n) if is_ident_start(n) => {
194                // Valid placeholder start — consume the `:` (do not emit) and
195                // begin reading the identifier.
196                self.pending_name.clear();
197                self.state = State::Placeholder;
198            },
199            _ => self.out.push(':'),
200        }
201    }
202
203    /// Read a placeholder identifier. On a non-identifier char, flush the
204    /// placeholder and re-dispatch that char from `Normal`.
205    fn handle_placeholder(&mut self, c: char) {
206        if is_ident_continue(c) {
207            self.pending_name.push(c);
208        } else {
209            self.emit_placeholder_from_pending();
210            self.handle_normal(c);
211        }
212    }
213
214    /// Swallow the type identifier following a `::` cast, then return to Normal.
215    fn handle_cast_type_name(&mut self, c: char) {
216        self.out.push(c);
217        if !is_ident_continue(c) {
218            self.state = State::Normal;
219        }
220    }
221
222    /// Emit the dialect's positional form for `pending_name`, record the bind
223    /// order, and return to `Normal`. Entering `Placeholder` already required a
224    /// valid identifier-start, so `pending_name` is never empty here.
225    fn emit_placeholder_from_pending(&mut self) {
226        match self.dialect {
227            Dialect::Postgres => {
228                self.pg_index += 1;
229                // Writing a formatted integer into a String never fails.
230                let _ = write!(self.out, "${}", self.pg_index);
231            },
232            Dialect::MySql | Dialect::Athena => self.out.push('?'),
233            Dialect::Sqlite => {
234                let _ = write!(self.out, ":{}", self.pending_name);
235            },
236        }
237        self.order.push(std::mem::take(&mut self.pending_name));
238        self.state = State::Normal;
239    }
240
241    /// Handle a char inside a `'...'` / `"..."` literal. A doubled quote
242    /// (`''` / `""`) is an escape and stays inside the literal.
243    fn handle_string(&mut self, c: char, q: char) {
244        self.out.push(c);
245        if c == q {
246            if self.chars.peek() == Some(&q) {
247                self.out.push(q);
248                self.chars.next();
249            } else {
250                self.state = State::Normal;
251            }
252        }
253    }
254
255    /// Handle a char inside a `-- ...` line comment; ends at newline.
256    fn handle_line_comment(&mut self, c: char) {
257        self.out.push(c);
258        if c == '\n' {
259            self.state = State::Normal;
260        }
261    }
262
263    /// Handle a char inside a `/* ... */` block comment, tracking nesting.
264    fn handle_block_comment(&mut self, c: char, depth: usize) {
265        self.out.push(c);
266        if c == '*' && self.chars.peek() == Some(&'/') {
267            self.out.push('/');
268            self.chars.next();
269            self.state = if depth <= 1 {
270                State::Normal
271            } else {
272                State::BlockComment(depth - 1)
273            };
274        } else if c == '/' && self.chars.peek() == Some(&'*') {
275            self.out.push('*');
276            self.chars.next();
277            self.state = State::BlockComment(depth + 1);
278        }
279    }
280
281    /// Consume the walker into its [`TranslatedSql`] result.
282    fn into_translated(self) -> TranslatedSql {
283        TranslatedSql {
284            sql: self.out,
285            ordered_params: self.order,
286        }
287    }
288}
289
290/// `true` if `c` can start a `:name` placeholder identifier (`[A-Za-z_]`).
291fn is_ident_start(c: char) -> bool {
292    c.is_ascii_alphabetic() || c == '_'
293}
294
295/// `true` if `c` can continue a placeholder identifier (`[A-Za-z0-9_]`).
296fn is_ident_continue(c: char) -> bool {
297    c.is_ascii_alphanumeric() || c == '_'
298}
299
300#[cfg(test)]
301mod proptests {
302    use super::*;
303    use proptest::prelude::*;
304
305    proptest! {
306        /// Invariant 1: idempotence for `:name`-free SQL — `translate.sql == input`
307        /// for every dialect, with no recorded binds.
308        #[test]
309        fn idempotence_no_placeholders(s in "[A-Za-z0-9 _\\.,;\\(\\)=]*") {
310            for d in [Dialect::Postgres, Dialect::MySql, Dialect::Athena, Dialect::Sqlite] {
311                let t = translate_placeholders(&s, d);
312                prop_assert_eq!(&t.sql, &s);
313                prop_assert!(t.ordered_params.is_empty());
314            }
315        }
316
317        /// Invariant 2: bind-order preservation — `ordered_params` lists
318        /// placeholder names left-to-right in their textual order.
319        #[test]
320        fn bind_order_preserved(names in proptest::collection::vec("[a-z]{1,5}", 1..=5)) {
321            let sql = names.iter().map(|n| format!(":{n}")).collect::<Vec<_>>().join(", ");
322            let t = translate_placeholders(&sql, Dialect::Postgres);
323            prop_assert_eq!(t.ordered_params, names);
324        }
325
326        /// Invariant 3: Postgres positional indexing — `$1..=$n` are present and
327        /// contiguous, and their count equals `ordered_params.len()`.
328        #[test]
329        fn postgres_positional_indexing(names in proptest::collection::vec("[a-z]{1,5}", 1..=5)) {
330            let sql = names.iter().map(|n| format!(":{n}")).collect::<Vec<_>>().join(", ");
331            let t = translate_placeholders(&sql, Dialect::Postgres);
332            prop_assert_eq!(t.ordered_params.len(), names.len());
333            for i in 1..=names.len() {
334                let token = format!("${i}");
335                prop_assert!(t.sql.contains(&token));
336            }
337            // No gap above n.
338            let above = format!("${}", names.len() + 1);
339            prop_assert!(!t.sql.contains(&above));
340        }
341
342        /// Invariant 4: SQLite identity — `Dialect::Sqlite` keeps SQL
343        /// byte-identical; only `ordered_params` differs.
344        #[test]
345        fn sqlite_identity(s in any::<String>()) {
346            let t = translate_placeholders(&s, Dialect::Sqlite);
347            prop_assert_eq!(t.sql, s);
348        }
349
350        /// Invariant 5: no panic on arbitrary `&str` input across all dialects.
351        #[test]
352        fn no_panic_on_arbitrary_input(s in any::<String>()) {
353            for d in [Dialect::Postgres, Dialect::MySql, Dialect::Athena, Dialect::Sqlite] {
354                let _ = translate_placeholders(&s, d);
355            }
356        }
357    }
358}
359
360#[cfg(test)]
361mod unit_tests {
362    use super::*;
363
364    fn t(sql: &str, d: Dialect) -> TranslatedSql {
365        translate_placeholders(sql, d)
366    }
367
368    #[test]
369    fn empty_input_is_identity() {
370        let r = t("", Dialect::Postgres);
371        assert_eq!(r.sql, "");
372        assert!(r.ordered_params.is_empty());
373    }
374
375    #[test]
376    fn no_placeholder_is_identity_mysql() {
377        let r = t("SELECT 1", Dialect::MySql);
378        assert_eq!(r.sql, "SELECT 1");
379        assert!(r.ordered_params.is_empty());
380    }
381
382    #[test]
383    fn single_placeholder_postgres() {
384        let r = t("SELECT :id FROM t", Dialect::Postgres);
385        assert_eq!(r.sql, "SELECT $1 FROM t");
386        assert_eq!(r.ordered_params, vec!["id"]);
387    }
388
389    #[test]
390    fn single_placeholder_mysql() {
391        let r = t("SELECT :id FROM t", Dialect::MySql);
392        assert_eq!(r.sql, "SELECT ? FROM t");
393        assert_eq!(r.ordered_params, vec!["id"]);
394    }
395
396    #[test]
397    fn single_placeholder_athena() {
398        let r = t("SELECT :id FROM t", Dialect::Athena);
399        assert_eq!(r.sql, "SELECT ? FROM t");
400        assert_eq!(r.ordered_params, vec!["id"]);
401    }
402
403    #[test]
404    fn single_placeholder_sqlite_is_identity_with_bind_order() {
405        let r = t("SELECT :id FROM t", Dialect::Sqlite);
406        assert_eq!(r.sql, "SELECT :id FROM t");
407        assert_eq!(r.ordered_params, vec!["id"]);
408    }
409
410    #[test]
411    fn repeated_name_gets_fresh_index_postgres() {
412        let r = t("WHERE a = :a AND b = :b AND c = :a", Dialect::Postgres);
413        assert_eq!(r.sql, "WHERE a = $1 AND b = $2 AND c = $3");
414        assert_eq!(r.ordered_params, vec!["a", "b", "a"]);
415    }
416
417    #[test]
418    fn three_distinct_names_all_dialects_match_must_haves() {
419        let sql = "SELECT :id FROM t WHERE x = :x AND y = :id";
420        let pg = t(sql, Dialect::Postgres);
421        assert_eq!(pg.sql, "SELECT $1 FROM t WHERE x = $2 AND y = $3");
422        assert_eq!(pg.ordered_params, vec!["id", "x", "id"]);
423
424        let my = t(sql, Dialect::MySql);
425        assert_eq!(my.sql, "SELECT ? FROM t WHERE x = ? AND y = ?");
426        assert_eq!(my.ordered_params, vec!["id", "x", "id"]);
427
428        let at = t(sql, Dialect::Athena);
429        assert_eq!(at.sql, "SELECT ? FROM t WHERE x = ? AND y = ?");
430        assert_eq!(at.ordered_params, vec!["id", "x", "id"]);
431
432        let lite = t(sql, Dialect::Sqlite);
433        assert_eq!(lite.sql, sql);
434        assert_eq!(lite.ordered_params, vec!["id", "x", "id"]);
435    }
436
437    #[test]
438    fn placeholder_inside_string_literal_not_translated() {
439        let r = t("SELECT 'WHERE name = :foo' AS x", Dialect::Postgres);
440        assert_eq!(r.sql, "SELECT 'WHERE name = :foo' AS x");
441        assert!(r.ordered_params.is_empty());
442    }
443
444    #[test]
445    fn doubled_single_quote_escape_stays_in_literal() {
446        let r = t("SELECT 'it''s :foo' AS x", Dialect::Postgres);
447        assert_eq!(r.sql, "SELECT 'it''s :foo' AS x");
448        assert!(r.ordered_params.is_empty());
449    }
450
451    #[test]
452    fn double_quoted_identifier_skips_placeholder() {
453        let r = t("SELECT \"col:name\" FROM t", Dialect::Postgres);
454        assert_eq!(r.sql, "SELECT \"col:name\" FROM t");
455        assert!(r.ordered_params.is_empty());
456    }
457
458    #[test]
459    fn placeholder_in_line_comment_not_translated() {
460        let r = t("SELECT 1 -- bind :id here", Dialect::Postgres);
461        assert_eq!(r.sql, "SELECT 1 -- bind :id here");
462        assert!(r.ordered_params.is_empty());
463    }
464
465    #[test]
466    fn line_comment_ends_at_newline() {
467        let r = t("SELECT 1 -- :a\nWHERE x = :b", Dialect::Postgres);
468        assert_eq!(r.sql, "SELECT 1 -- :a\nWHERE x = $1");
469        assert_eq!(r.ordered_params, vec!["b"]);
470    }
471
472    #[test]
473    fn placeholder_in_block_comment_not_translated() {
474        let r = t("SELECT /* :foo */ 1", Dialect::Postgres);
475        assert_eq!(r.sql, "SELECT /* :foo */ 1");
476        assert!(r.ordered_params.is_empty());
477    }
478
479    #[test]
480    fn nested_block_comment_tracked_via_depth() {
481        let r = t("SELECT /* /* :foo */ :bar */ :baz", Dialect::Postgres);
482        assert_eq!(r.sql, "SELECT /* /* :foo */ :bar */ $1");
483        assert_eq!(r.ordered_params, vec!["baz"]);
484    }
485
486    // ---- REVIEWS H7 mandatory named tests ----
487
488    #[test]
489    fn postgres_double_colon_cast_preserves_text_identifier() {
490        let r = t("SELECT :id::text FROM t", Dialect::Postgres);
491        assert_eq!(
492            r,
493            TranslatedSql {
494                sql: "SELECT $1::text FROM t".into(),
495                ordered_params: vec!["id".into()],
496            }
497        );
498    }
499
500    #[test]
501    fn postgres_double_colon_int_cast_no_placeholder() {
502        let r = t("SELECT 1::int", Dialect::Postgres);
503        assert_eq!(
504            r,
505            TranslatedSql {
506                sql: "SELECT 1::int".into(),
507                ordered_params: vec![],
508            }
509        );
510    }
511
512    #[test]
513    fn mysql_session_variable_assignment_not_a_placeholder() {
514        let r = t("SET @x := 5", Dialect::MySql);
515        assert_eq!(
516            r,
517            TranslatedSql {
518                sql: "SET @x := 5".into(),
519                ordered_params: vec![],
520            }
521        );
522    }
523
524    #[test]
525    fn colon_followed_by_digit_emits_verbatim() {
526        let r = t("SELECT :1bad FROM t", Dialect::Postgres);
527        assert_eq!(
528            r,
529            TranslatedSql {
530                sql: "SELECT :1bad FROM t".into(),
531                ordered_params: vec![],
532            }
533        );
534    }
535
536    #[test]
537    fn string_literal_cast_both_colons_verbatim() {
538        let r = t("SELECT 'foo'::text", Dialect::Postgres);
539        assert_eq!(r.sql, "SELECT 'foo'::text");
540        assert!(r.ordered_params.is_empty());
541    }
542
543    #[test]
544    fn placeholder_then_cast_then_placeholder() {
545        let r = t("SELECT :a::text, :b FROM t", Dialect::Postgres);
546        assert_eq!(r.sql, "SELECT $1::text, $2 FROM t");
547        assert_eq!(r.ordered_params, vec!["a", "b"]);
548    }
549
550    #[test]
551    fn lone_colon_at_eof_emits_verbatim() {
552        let r = t("SELECT 1:", Dialect::Postgres);
553        assert_eq!(r.sql, "SELECT 1:");
554        assert!(r.ordered_params.is_empty());
555    }
556
557    #[test]
558    fn underscore_leading_placeholder_name() {
559        let r = t("WHERE x = :_id", Dialect::Postgres);
560        assert_eq!(r.sql, "WHERE x = $1");
561        assert_eq!(r.ordered_params, vec!["_id"]);
562    }
563
564    #[test]
565    fn unterminated_literal_does_not_panic() {
566        let r = t("SELECT 'unterminated :foo", Dialect::Postgres);
567        // Remainder is treated as literal content; :foo is NOT translated.
568        assert_eq!(r.sql, "SELECT 'unterminated :foo");
569        assert!(r.ordered_params.is_empty());
570    }
571
572    #[test]
573    fn unterminated_block_comment_does_not_panic() {
574        let r = t("SELECT /* :foo", Dialect::Postgres);
575        assert_eq!(r.sql, "SELECT /* :foo");
576        assert!(r.ordered_params.is_empty());
577    }
578
579    #[test]
580    fn placeholder_at_eof_is_emitted() {
581        let r = t("WHERE id = :id", Dialect::Postgres);
582        assert_eq!(r.sql, "WHERE id = $1");
583        assert_eq!(r.ordered_params, vec!["id"]);
584    }
585}