1use std::collections::HashMap;
2use std::sync::Arc;
3
4use fancy_regex::Regex;
5use sqruff_lib_core::errors::SQLFluffUserError;
6use sqruff_lib_core::templaters::{RawFileSlice, TemplatedFile, TemplatedFileSlice};
7
8use crate::Formatter;
9use crate::core::config::FluffConfig;
10use crate::templaters::Templater;
11
12#[derive(Default)]
13pub struct PlaceholderTemplater;
14
15pub fn get_known_styles() -> HashMap<&'static str, Regex> {
16 let mut m = HashMap::new();
17
18 m.insert(
20 "colon",
21 Regex::new(r"(?<![:\w\\]):(?P<param_name>\w+)(?!:)").unwrap(),
22 );
23
24 m.insert(
27 "colon_nospaces",
28 Regex::new(r"(?<!:):(?P<param_name>\w+)").unwrap(),
29 );
30
31 m.insert(
33 "numeric_colon",
34 Regex::new(r"(?<![:\w\\]):(?P<param_name>\d+)").unwrap(),
35 );
36
37 m.insert(
39 "at",
40 Regex::new(r"(?<![:\w\\])@(?P<param_name>\w+)").unwrap(),
41 );
42
43 m.insert(
45 "pyformat",
46 Regex::new(r"(?<![:\w\\])%\((?P<param_name>[\w_]+)\)s").unwrap(),
47 );
48
49 m.insert(
51 "dollar",
52 Regex::new(r"(?<![:\w\\])\${?(?P<param_name>[\w_]+)}?").unwrap(),
53 );
54
55 m.insert(
57 "flyway_var",
58 Regex::new(r#"\${(?P<param_name>\w+[:\w_]+)}"#).unwrap(),
59 );
60
61 m.insert("question_mark", Regex::new(r"(?<![:\w\\])\?").unwrap());
63
64 m.insert(
66 "numeric_dollar",
67 Regex::new(r"(?<![:\w\\])\${?(?P<param_name>[\d]+)}?").unwrap(),
68 );
69
70 m.insert("percent", Regex::new(r"(?<![:\w\\])%s").unwrap());
72
73 m.insert(
75 "ampersand",
76 Regex::new(r"(?<!&)&{?(?P<param_name>[\w]+)}?").unwrap(),
77 );
78
79 m.insert(
81 "apache_camel",
82 Regex::new(r":#\$\{(?P<param_name>.+)}").unwrap(),
83 );
84
85 m
86}
87
88const NO_PARAM_OR_STYLE: &str =
89 "No param_regex nor param_style was provided to the placeholder templater.";
90
91impl PlaceholderTemplater {
92 fn derive_style(&self, config: &FluffConfig) -> Result<Regex, SQLFluffUserError> {
93 let config = config
94 .get("placeholder", "templater")
95 .as_map()
96 .ok_or(SQLFluffUserError::new(NO_PARAM_OR_STYLE.to_string()))?;
97 match (config.get("param_regex"), config.get("param_style")) {
98 (Some(_), Some(_)) => Err(SQLFluffUserError::new(
99 "Both param_regex and param_style were provided to the placeholder templater."
100 .to_string(),
101 )),
102 (None, None) => Err(SQLFluffUserError::new(NO_PARAM_OR_STYLE.to_string())),
103 (Some(param_regex), None) => {
104 let param_regex = param_regex.as_string().ok_or(SQLFluffUserError::new(
105 "Invalid param_regex for templater 'placeholder'".to_string(),
106 ))?;
107 let regex = Regex::new(param_regex).map_err(|e| {
108 SQLFluffUserError::new(format!("Invalid regex for param_regex: {e}"))
109 })?;
110 Ok(regex)
111 }
112 (None, Some(param_style)) => {
113 let param_style = param_style.as_string().ok_or(SQLFluffUserError::new(
114 "Invalid param_style for templater 'placeholder'".to_string(),
115 ))?;
116 let known_styles = get_known_styles();
117 let regex = known_styles.get(param_style).ok_or_else(|| {
118 SQLFluffUserError::new(format!(
119 "Unknown param_style '{param_style}' for templater 'placeholder'"
120 ))
121 })?;
122 Ok(regex.clone())
123 }
124 }
125 }
126}
127
128impl Templater for PlaceholderTemplater {
129 fn name(&self) -> &'static str {
130 "placeholder"
131 }
132
133 fn can_process_in_parallel(&self) -> bool {
134 true
135 }
136
137 fn description(&self) -> &'static str {
138 r#"Libraries such as SQLAlchemy or Psycopg use different parameter placeholder styles to mark where a parameter has to be inserted in the query.
139
140For example a query in SQLAlchemy can look like this:
141
142```sql
143SELECT * FROM table WHERE id = :myid
144```
145
146At runtime :myid will be replace by a value provided by the application and escaped as needed, but this is not standard SQL and cannot be parsed as is.
147
148In order to parse these queries is then necessary to replace these placeholders with sample values, and this is done with the placeholder templater.
149
150Placeholder templating can be enabled in the config using:
151
152```ini
153[sqruff]
154templater = placeholder
155```
156
157A few common styles are supported:
158
159```sql
160 -- colon
161 WHERE bla = :my_name
162
163 -- colon_nospaces
164 -- (use with caution as more prone to false positives)
165 WHERE bla = table:my_name
166
167 -- colon_optional_quotes
168 SELECT :"column" FROM :table WHERE bla = :'my_name'
169
170 -- numeric_colon
171 WHERE bla = :2
172
173 -- pyformat
174 WHERE bla = %(my_name)s
175
176 -- dollar
177 WHERE bla = $my_name or WHERE bla = ${my_name}
178
179 -- question_mark
180 WHERE bla = ?
181
182 -- numeric_dollar
183 WHERE bla = $3 or WHERE bla = ${3}
184
185 -- percent
186 WHERE bla = %s
187
188 -- ampersand
189 WHERE bla = &s or WHERE bla = &{s} or USE DATABASE MARK_{ENV}
190
191 -- apache_camel
192 WHERE bla = :#${qwe}
193
194 -- at
195 WHERE bla = @my_name
196```
197
198The can be configured by setting `param_style` in the config file. For example:
199
200```ini
201[sqruff:templater:placeholder]
202param_style = colon
203my_name = 'john'
204```
205
206then you can set sample values for each parameter, like my_name above. Notice that the value needs to be escaped as it will be replaced as a string during parsing. When the sample values aren’t provided, the templater will use parameter names themselves by default.
207
208When parameters are positional, like question_mark, then their name is simply the order in which they appear, starting with 1.
209
210```ini
211[sqruff:templater:placeholder]
212param_style = question_mark
2131 = 'john'
214```
215
216In case you nbeed a parameter style different from the ones provided, you can set `param_regex` in the config file. For example:
217
218```ini
219[sqruff:templater:placeholder]
220param_regex = __(?P<param_name>[\w_]+)__
221my_name = 'john'
222```
223
224N.B. quotes around param_regex in the config are interpreted literally by the templater. e.g. param_regex=’__(?P<param_name>[w_]+)__’ matches ‘__some_param__’ not __some_param__
225
226the named parameter param_name will be used as the key to replace, if missing, the parameter is assumed to be positional and numbers are used instead.
227
228Also consider making a pull request to the project to have your style added, it may be useful to other people and simplify your configuration."#
229 }
230
231 fn process(
232 &self,
233 in_str: &str,
234 f_name: &str,
235 config: &FluffConfig,
236 _: &Option<Arc<dyn Formatter>>,
237 ) -> Result<TemplatedFile, SQLFluffUserError> {
238 let mut template_slices = vec![];
239 let mut raw_slices = vec![];
240 let mut last_pos_raw = 0usize;
241 let mut last_pos_templated = 0;
242 let mut out_str = "".to_string();
243
244 let mut param_counter = 1;
246 let regex = self.derive_style(config)?;
247
248 let template_config = config.get("placeholder", "templater").as_map();
249
250 for cap in regex.captures_iter(in_str) {
251 let cap = cap.unwrap();
252 let span = cap.get(0).unwrap().range();
253
254 let param_name = if let Some(name) = cap.name("param_name") {
255 name.as_str().to_string()
256 } else {
257 let name = param_counter.to_string();
258 param_counter += 1;
259 name
260 };
261
262 let last_literal_length = span.start - last_pos_raw;
263 let replacement = template_config
264 .and_then(|config| config.get(¶m_name))
265 .map_or(Ok(param_name.clone()), |v| {
266 match (v.as_string(), v.as_int(), v.as_bool()) {
267 (Some(s), None, None) => Ok(s.to_string()),
268 (None, Some(i), None) => Ok(i.to_string()),
269 (None, None, Some(b)) => Ok(if b {
270 "true".to_string()
271 } else {
272 "false".to_string()
273 }),
274 _ => Err(SQLFluffUserError::new(format!(
275 "Invalid value for parameter replacement: {param_name}"
276 ))),
277 }
278 })?;
279
280 template_slices.push(TemplatedFileSlice {
282 slice_type: "literal".to_string(),
283 source_slice: last_pos_raw..span.start,
284 templated_slice: last_pos_templated..last_pos_templated + last_literal_length,
285 });
286
287 raw_slices.push(RawFileSlice::new(
288 in_str[last_pos_raw..span.start].to_string(),
289 "literal".to_string(),
290 last_pos_raw,
291 None,
292 None,
293 ));
294
295 out_str.push_str(&in_str[last_pos_raw..span.start]);
296
297 let start_template_pos = last_pos_templated + last_literal_length;
299 template_slices.push(TemplatedFileSlice {
300 slice_type: "templated".to_string(),
301 source_slice: span.clone(),
302 templated_slice: start_template_pos..start_template_pos + replacement.len(),
303 });
304
305 let raw_file_slice = RawFileSlice::new(
306 in_str[span.clone()].to_string(),
307 "templated".to_string(),
308 span.start,
309 None,
310 None,
311 );
312 raw_slices.push(raw_file_slice);
313
314 out_str.push_str(&replacement);
315
316 last_pos_raw = span.end;
318 last_pos_templated = start_template_pos + replacement.len();
319 }
320
321 if in_str.len() > last_pos_raw {
323 template_slices.push(TemplatedFileSlice {
324 slice_type: "literal".to_string(),
325 source_slice: last_pos_raw..in_str.len(),
326 templated_slice: last_pos_templated
327 ..last_pos_templated + (in_str.len() - last_pos_raw),
328 });
329
330 let raw_file_slice = RawFileSlice::new(
331 in_str[last_pos_raw..].to_string(),
332 "literal".to_string(),
333 last_pos_raw,
334 None,
335 None,
336 );
337 raw_slices.push(raw_file_slice);
338
339 out_str.push_str(&in_str[last_pos_raw..]);
340 }
341
342 let templated_file = TemplatedFile::new(
343 in_str.to_string(),
344 f_name.to_string(),
345 Some(out_str),
346 Some(template_slices),
347 Some(raw_slices),
348 )
349 .unwrap();
350
351 Ok(templated_file)
352 }
353}
354
355#[cfg(test)]
356mod tests {
357
358 use super::*;
359 use crate::core::linter::core::Linter;
360
361 #[test]
362 fn test_templater_no_replacement() {
364 let templater = PlaceholderTemplater {};
365 let in_str = "SELECT * FROM {{blah}} WHERE %(gnepr)s OR e~':'";
366 let config = FluffConfig::from_source(
367 "
368[sqruff:templater:placeholder]
369param_style = colon",
370 None,
371 );
372 let out_str = templater
373 .process(in_str, "test.sql", &config, &None)
374 .unwrap();
375 let out = out_str.templated();
376 assert_eq!(in_str, out)
377 }
378
379 #[test]
380 fn test_all_the_known_styles() {
381 let cases: [(&str, &str, &str, Vec<(&str, &str)>); 19] = [
383 (
384 "SELECT * FROM f, o, o WHERE a < 10\n\n",
385 "colon",
386 "SELECT * FROM f, o, o WHERE a < 10\n\n",
387 vec![],
388 ),
389 (
390 r#"
391SELECT user_mail, city_id
392FROM users_data
393WHERE userid = :user_id AND date > :start_date
394"#,
395 "colon",
396 r#"
397SELECT user_mail, city_id
398FROM users_data
399WHERE userid = 42 AND date > '2020-01-01'
400"#,
401 vec![
402 ("user_id", "42"),
403 ("start_date", "'2020-01-01'"),
404 ("city_ids", "(1, 2, 3)"),
405 ],
406 ),
407 (
408 r#"
409SELECT user_mail, city_id
410FROM users_data
411WHERE userid = :user_id AND date > :start_date"#,
412 "colon",
413 r#"
414SELECT user_mail, city_id
415FROM users_data
416WHERE userid = 42 AND date > '2020-01-01'"#,
417 vec![
418 ("user_id", "42"),
419 ("start_date", "'2020-01-01'"),
420 ("city_ids", "(1, 2, 3)"),
421 ],
422 ),
423 (
424 r#"
425SELECT user_mail, city_id
426FROM users_data
427WHERE (city_id) IN :city_ids
428AND date > '2020-10-01'
429 "#,
430 "colon",
431 r#"
432SELECT user_mail, city_id
433FROM users_data
434WHERE (city_id) IN (1, 2, 3)
435AND date > '2020-10-01'
436 "#,
437 vec![
438 ("user_id", "42"),
439 ("start_date", "'2020-01-01'"),
440 ("city_ids", "(1, 2, 3)"),
441 ],
442 ),
443 (
444 r#"
445SELECT user_mail, city_id
446FROM users_data
447WHERE userid = @user_id AND date > @start_date
448"#,
449 "at",
450 r#"
451SELECT user_mail, city_id
452FROM users_data
453WHERE userid = 42 AND date > '2020-01-01'
454"#,
455 vec![
456 ("user_id", "42"),
457 ("start_date", "'2020-01-01'"),
458 ("city_ids", "(1, 2, 3)"),
459 ],
460 ),
461 (
462 r#"
463SELECT user_mail, city_id
464FROM users_data
465WHERE userid = @user_id AND date > @start_date"#,
466 "at",
467 r#"
468SELECT user_mail, city_id
469FROM users_data
470WHERE userid = 42 AND date > '2020-01-01'"#,
471 vec![
472 ("user_id", "42"),
473 ("start_date", "'2020-01-01'"),
474 ("city_ids", "(1, 2, 3)"),
475 ],
476 ),
477 (
478 r#"
479SELECT user_mail, city_id
480FROM users_data
481WHERE (city_id) IN @city_ids
482AND date > '2020-10-01'
483 "#,
484 "at",
485 r#"
486SELECT user_mail, city_id
487FROM users_data
488WHERE (city_id) IN (1, 2, 3)
489AND date > '2020-10-01'
490 "#,
491 vec![
492 ("user_id", "42"),
493 ("start_date", "'2020-01-01'"),
494 ("city_ids", "(1, 2, 3)"),
495 ],
496 ),
497 (
498 r#"
499SELECT user_mail, city_id
500FROM users_data:table_suffix
501"#,
502 "colon_nospaces",
503 r#"
504SELECT user_mail, city_id
505FROM users_data42
506"#,
507 vec![("table_suffix", "42")],
508 ),
509 (
510 r#"
514SELECT user_mail, city_id, joined::date
515FROM users_date:table_suffix
516"#,
517 "colon_nospaces",
518 r#"
519SELECT user_mail, city_id, joined::date
520FROM users_date42
521"#,
522 vec![("table_suffix", "42")],
523 ),
524 (
525 r#"
526SELECT user_mail, city_id
527FROM users_data
528WHERE (city_id) IN ?
529AND date > ?
530 "#,
531 "question_mark",
532 r#"
533SELECT user_mail, city_id
534FROM users_data
535WHERE (city_id) IN (1, 2, 3, 45)
536AND date > '2020-10-01'
537 "#,
538 vec![("1", "(1, 2, 3, 45)"), ("2", "'2020-10-01'")],
539 ),
540 (
541 r#"
542SELECT user_mail, city_id
543FROM users_data
544WHERE (city_id) IN :1
545AND date > :45
546 "#,
547 "numeric_colon",
548 r#"
549SELECT user_mail, city_id
550FROM users_data
551WHERE (city_id) IN (1, 2, 3, 45)
552AND date > '2020-10-01'
553 "#,
554 vec![("1", "(1, 2, 3, 45)"), ("45", "'2020-10-01'")],
555 ),
556 (
557 r#"
558SELECT user_mail, city_id
559FROM users_data
560WHERE (city_id) IN %(city_id)s
561AND date > %(date)s
562AND someflag = %(someflag)s
563LIMIT %(limit)s
564 "#,
565 "pyformat",
566 r#"
567SELECT user_mail, city_id
568FROM users_data
569WHERE (city_id) IN (1, 2, 3, 45)
570AND date > '2020-10-01'
571AND someflag = false
572LIMIT 15
573 "#,
574 vec![
575 ("city_id", "(1, 2, 3, 45)"),
576 ("date", "'2020-10-01'"),
577 ("limit", "15"),
578 ("someflag", "false"),
579 ],
580 ),
581 (
582 r#"
583SELECT user_mail, city_id
584FROM users_data
585WHERE (city_id) IN $city_id
586AND date > $date
587OR date = ${date}
588 "#,
589 "dollar",
590 r#"
591SELECT user_mail, city_id
592FROM users_data
593WHERE (city_id) IN (1, 2, 3, 45)
594AND date > '2020-10-01'
595OR date = '2020-10-01'
596 "#,
597 vec![("city_id", "(1, 2, 3, 45)"), ("date", "'2020-10-01'")],
598 ),
599 (
600 r#"
601SELECT user_mail, city_id
602FROM users_data
603WHERE (city_id) IN $12
604AND date > $90
605 "#,
606 "numeric_dollar",
607 r#"
608SELECT user_mail, city_id
609FROM users_data
610WHERE (city_id) IN (1, 2, 3, 45)
611AND date > '2020-10-01'
612 "#,
613 vec![("12", "(1, 2, 3, 45)"), ("90", "'2020-10-01'")],
614 ),
615 (
616 r#"
617SELECT user_mail, city_id
618FROM users_data
619WHERE (city_id) IN %s
620AND date > %s
621 "#,
622 "percent",
623 r#"
624SELECT user_mail, city_id
625FROM users_data
626WHERE (city_id) IN (1, 2, 3, 45)
627AND date > '2020-10-01'
628 "#,
629 vec![("1", "(1, 2, 3, 45)"), ("2", "'2020-10-01'")],
630 ),
631 (
632 r#"
633USE DATABASE &{env}_MARKETING;
634USE SCHEMA &&EMEA;
635SELECT user_mail, city_id
636FROM users_data
637WHERE userid = &user_id AND date > &{start_date}
638 "#,
639 "ampersand",
640 r#"
641USE DATABASE PRD_MARKETING;
642USE SCHEMA &&EMEA;
643SELECT user_mail, city_id
644FROM users_data
645WHERE userid = 42 AND date > '2021-10-01'
646 "#,
647 vec![
648 ("env", "PRD"),
649 ("user_id", "42"),
650 ("start_date", "'2021-10-01'"),
651 ],
652 ),
653 (
654 "USE ${flywaydatabase}.test_schema;",
655 "flyway_var",
656 "USE test_db.test_schema;",
657 vec![("flywaydatabase", "test_db")],
658 ),
659 (
660 "SELECT metadata$filename, $1 FROM @stg_data_export_${env_name};",
661 "flyway_var",
662 "SELECT metadata$filename, $1 FROM @stg_data_export_staging;",
663 vec![("env_name", "staging")],
664 ),
665 (
666 "SELECT metadata$filename, $1 FROM @stg_data_export_${env_name};",
667 "flyway_var",
668 "SELECT metadata$filename, $1 FROM @stg_data_export_env_name;",
669 vec![],
670 ),
671 ];
672
673 for (in_str, param_style, expected_out, values) in cases {
674 let config = FluffConfig::from_source(
675 format!(
676 r#"
677[sqruff:templater:placeholder]
678param_style = {}
679{}
680"#,
681 param_style,
682 values
683 .iter()
684 .map(|(k, v)| format!("{} = {}", k, v))
685 .collect::<Vec<String>>()
686 .join("\n")
687 )
688 .as_str(),
689 None,
690 );
691 let templater = PlaceholderTemplater {};
692 let out_str = templater
693 .process(in_str, "test.sql", &config, &None)
694 .unwrap();
695 let out = out_str.templated();
696 assert_eq!(expected_out, out)
697 }
698 }
699
700 #[test]
701 fn test_templater_setup_none() {
704 let config = FluffConfig::from_source("", None);
705 let templater = PlaceholderTemplater {};
706 let in_str = "SELECT 2+2";
707 let out_str = templater.process(in_str, "test.sql", &config, &None);
708
709 assert!(out_str.is_err());
710 assert_eq!(
711 out_str.err().unwrap().value,
712 "No param_regex nor param_style was provided to the placeholder templater."
713 );
714 }
715
716 #[test]
717 fn test_templater_setup_both_provided() {
720 let config = FluffConfig::from_source(
721 r#"
722[sqruff:templater:placeholder]
723param_regex = __(?P<param_name>[\w_]+)__
724param_style = colon
725 "#,
726 None,
727 );
728 let templater = PlaceholderTemplater {};
729 let in_str = "SELECT 2+2";
730 let out_str = templater.process(in_str, "test.sql", &config, &None);
731
732 assert!(out_str.is_err());
733 assert_eq!(
734 out_str.err().unwrap().value,
735 "Both param_regex and param_style were provided to the placeholder templater."
736 );
737 }
738
739 #[test]
740 fn test_templater_custom_regex() {
742 let config = FluffConfig::from_source(
743 r#"
744[sqruff:templater:placeholder]
745param_regex = __(?P<param_name>[\w_]+)__
746my_name = john
747"#,
748 None,
749 );
750 let templater = PlaceholderTemplater {};
751 let in_str = "SELECT bla FROM blob WHERE id = __my_name__";
752 let out_str = templater.process(in_str, "test", &config, &None).unwrap();
753 let out = out_str.templated();
754 assert_eq!("SELECT bla FROM blob WHERE id = john", out)
755 }
756
757 #[test]
758 fn test_templater_styles_not_existing() {
760 let config = FluffConfig::from_source(
761 r#"
762[sqruff:templater:placeholder]
763param_style = unknown
764 "#,
765 None,
766 );
767 let templater = PlaceholderTemplater {};
768 let in_str = "SELECT * FROM {{blah}} WHERE %(gnepr)s OR e~':'";
769 let out_str = templater.process(in_str, "test.sql", &config, &None);
770
771 assert!(out_str.is_err());
772 assert_eq!(
773 out_str.err().unwrap().value,
774 "Unknown param_style 'unknown' for templater 'placeholder'"
775 );
776 }
777
778 #[test]
779 fn test_templater_placeholder() {
781 let config = FluffConfig::from_source(
782 r#"
783[sqruff]
784dialect = ansi
785templater = placeholder
786rules = all
787
788[sqruff:templater:placeholder]
789param_style = percent
790"#,
791 None,
792 );
793 let sql = "SELECT a,b FROM users WHERE a = %s";
794
795 let mut linter = Linter::new(config, None, None, false);
796 let result = linter.lint_string_wrapped(sql, true).fix_string();
797
798 assert_eq!(result, "SELECT\n a,\n b\nFROM users WHERE a = %s\n");
799 }
800}