1use regex::Regex;
2use std::sync::LazyLock;
3
4#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
6pub struct RefCall {
7 pub package: Option<String>,
9 pub name: String,
11}
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
15pub struct SourceCall {
16 pub source_name: String,
18 pub table_name: String,
20}
21
22static JINJA_COMMENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{#[\s\S]*?#\}").unwrap());
23
24static REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
27 Regex::new(
28 r#"(?x)
29 \{\{-?\s*
30 ref\s*\(\s*
31 (?:
32 # Two-argument form: ref('pkg', 'name') or ref("pkg", "name")
33 (?:['"]([^'"]+)['"]\s*,\s*['"]([^'"]+)['"])
34 |
35 # Single-argument form: ref('name') or ref("name")
36 ['"]([^'"]+)['"]
37 )
38 \s*\)\s*
39 -?\}\}
40 "#,
41 )
42 .unwrap()
43});
44
45static SOURCE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(
48 r#"(?x)
49 \{\{-?\s*
50 source\s*\(\s*
51 ['"]([^'"]+)['"]\s*,\s*['"]([^'"]+)['"]
52 \s*\)\s*
53 -?\}\}
54 "#,
55 )
56 .unwrap()
57});
58
59fn strip_jinja_comments(sql: &str) -> String {
61 JINJA_COMMENT.replace_all(sql, "").to_string()
62}
63
64pub fn extract_all(sql: &str, macro_prefix: &str) -> super::jinja::JinjaExtraction {
70 extract_all_with_vars(sql, macro_prefix, &std::collections::HashMap::new())
71}
72
73pub fn extract_all_with_vars(
75 sql: &str,
76 macro_prefix: &str,
77 vars: &std::collections::HashMap<String, serde_json::Value>,
78) -> super::jinja::JinjaExtraction {
79 if let Some(ext) = super::jinja::extract_via_jinja_with_vars(sql, macro_prefix, vars) {
80 return ext;
81 }
82 super::jinja::JinjaExtraction {
83 refs: extract_refs_regex(sql),
84 sources: extract_sources_regex(sql),
85 config: extract_config_regex(sql),
86 }
87}
88
89pub fn extract_refs_and_sources(sql: &str, macro_prefix: &str) -> (Vec<RefCall>, Vec<SourceCall>) {
95 extract_refs_and_sources_with_vars(sql, macro_prefix, &std::collections::HashMap::new())
96}
97
98pub fn extract_refs_and_sources_with_vars(
100 sql: &str,
101 macro_prefix: &str,
102 vars: &std::collections::HashMap<String, serde_json::Value>,
103) -> (Vec<RefCall>, Vec<SourceCall>) {
104 if let Some(ext) = super::jinja::extract_via_jinja_with_vars(sql, macro_prefix, vars) {
105 return (ext.refs, ext.sources);
106 }
107 (extract_refs_regex(sql), extract_sources_regex(sql))
108}
109
110pub fn extract_refs(sql: &str) -> Vec<RefCall> {
112 extract_refs_and_sources(sql, "").0
113}
114
115pub fn extract_sources(sql: &str) -> Vec<SourceCall> {
117 extract_refs_and_sources(sql, "").1
118}
119
120fn extract_refs_regex(sql: &str) -> Vec<RefCall> {
122 let cleaned = strip_jinja_comments(sql);
123 let mut refs = Vec::new();
124
125 for cap in REF_PATTERN.captures_iter(&cleaned) {
126 if let (Some(pkg), Some(name)) = (cap.get(1), cap.get(2)) {
127 refs.push(RefCall {
128 package: Some(pkg.as_str().to_string()),
129 name: name.as_str().to_string(),
130 });
131 } else if let Some(name) = cap.get(3) {
132 refs.push(RefCall {
133 package: None,
134 name: name.as_str().to_string(),
135 });
136 }
137 }
138
139 refs
140}
141
142fn extract_sources_regex(sql: &str) -> Vec<SourceCall> {
144 let cleaned = strip_jinja_comments(sql);
145 let mut sources = Vec::new();
146
147 for cap in SOURCE_PATTERN.captures_iter(&cleaned) {
148 sources.push(SourceCall {
149 source_name: cap[1].to_string(),
150 table_name: cap[2].to_string(),
151 });
152 }
153
154 sources
155}
156
157#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
159pub struct SqlConfig {
160 pub materialized: Option<String>,
161 pub tags: Vec<String>,
162}
163
164static CONFIG_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
166 Regex::new(
167 r#"(?x)
168 \{\{-?\s*
169 config\s*\(
170 ([\s\S]*?)
171 \)\s*
172 -?\}\}
173 "#,
174 )
175 .unwrap()
176});
177
178static MATERIALIZED_PATTERN: LazyLock<Regex> =
180 LazyLock::new(|| Regex::new(r#"materialized\s*=\s*['"]([^'"]+)['"]"#).unwrap());
181
182static TAGS_PATTERN: LazyLock<Regex> =
184 LazyLock::new(|| Regex::new(r#"tags\s*=\s*\[([^\]]*)\]"#).unwrap());
185
186static TAG_VALUE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"['"]([^'"]+)['"]"#).unwrap());
188
189pub fn extract_config(sql: &str, macro_prefix: &str) -> SqlConfig {
192 if let Some(ext) = super::jinja::extract_via_jinja(sql, macro_prefix) {
193 return ext.config;
194 }
195 extract_config_regex(sql)
196}
197
198fn extract_config_regex(sql: &str) -> SqlConfig {
200 let cleaned = strip_jinja_comments(sql);
201 let mut config = SqlConfig::default();
202
203 if let Some(cap) = CONFIG_PATTERN.captures(&cleaned) {
204 let inner = &cap[1];
205
206 if let Some(mat) = MATERIALIZED_PATTERN.captures(inner) {
207 config.materialized = Some(mat[1].to_string());
208 }
209
210 if let Some(tags_cap) = TAGS_PATTERN.captures(inner) {
211 let tags_inner = &tags_cap[1];
212 config.tags = TAG_VALUE
213 .captures_iter(tags_inner)
214 .map(|c| c[1].to_string())
215 .collect();
216 }
217 }
218
219 config
220}
221
222#[cfg(test)]
223mod tests {
224 use super::*;
225
226 #[test]
227 fn test_single_ref() {
228 let sql = "SELECT * FROM {{ ref('stg_orders') }}";
229 let refs = extract_refs(sql);
230 assert_eq!(refs.len(), 1);
231 assert_eq!(refs[0].name, "stg_orders");
232 assert!(refs[0].package.is_none());
233 }
234
235 #[test]
236 fn test_double_quoted_ref() {
237 let sql = r#"SELECT * FROM {{ ref("stg_orders") }}"#;
238 let refs = extract_refs(sql);
239 assert_eq!(refs.len(), 1);
240 assert_eq!(refs[0].name, "stg_orders");
241 }
242
243 #[test]
244 fn test_two_arg_ref() {
245 let sql = "SELECT * FROM {{ ref('other_project', 'stg_orders') }}";
246 let refs = extract_refs(sql);
247 assert_eq!(refs.len(), 1);
248 assert_eq!(refs[0].package.as_deref(), Some("other_project"));
249 assert_eq!(refs[0].name, "stg_orders");
250 }
251
252 #[test]
253 fn test_whitespace_control() {
254 let sql = "SELECT * FROM {{- ref('stg_orders') -}}";
255 let refs = extract_refs(sql);
256 assert_eq!(refs.len(), 1);
257 assert_eq!(refs[0].name, "stg_orders");
258 }
259
260 #[test]
261 fn test_multiple_refs() {
262 let sql = r#"
263 SELECT
264 o.*,
265 c.name
266 FROM {{ ref('stg_orders') }} o
267 JOIN {{ ref('stg_customers') }} c ON o.customer_id = c.id
268 "#;
269 let refs = extract_refs(sql);
270 assert_eq!(refs.len(), 2);
271 assert_eq!(refs[0].name, "stg_orders");
272 assert_eq!(refs[1].name, "stg_customers");
273 }
274
275 #[test]
276 fn test_source() {
277 let sql = "SELECT * FROM {{ source('raw', 'orders') }}";
278 let sources = extract_sources(sql);
279 assert_eq!(sources.len(), 1);
280 assert_eq!(sources[0].source_name, "raw");
281 assert_eq!(sources[0].table_name, "orders");
282 }
283
284 #[test]
285 fn test_source_whitespace_control() {
286 let sql = "SELECT * FROM {{- source('raw', 'orders') -}}";
287 let sources = extract_sources(sql);
288 assert_eq!(sources.len(), 1);
289 assert_eq!(sources[0].source_name, "raw");
290 }
291
292 #[test]
293 fn test_strip_jinja_comments() {
294 let sql = r#"
295 {# This is a comment with {{ ref('should_be_ignored') }} #}
296 SELECT * FROM {{ ref('actual_model') }}
297 "#;
298 let refs = extract_refs(sql);
299 assert_eq!(refs.len(), 1);
300 assert_eq!(refs[0].name, "actual_model");
301 }
302
303 #[test]
304 fn test_mixed_refs_and_sources() {
305 let sql = r#"
306 SELECT *
307 FROM {{ source('raw', 'orders') }}
308 JOIN {{ ref('stg_customers') }} ON 1=1
309 "#;
310 let refs = extract_refs(sql);
311 let sources = extract_sources(sql);
312 assert_eq!(refs.len(), 1);
313 assert_eq!(sources.len(), 1);
314 }
315
316 #[test]
317 fn test_no_refs() {
318 let sql = "SELECT 1 as id";
319 let refs = extract_refs(sql);
320 assert!(refs.is_empty());
321 }
322
323 #[test]
324 fn test_extra_spaces() {
325 let sql = "SELECT * FROM {{ ref( 'stg_orders' ) }}";
326 let refs = extract_refs(sql);
327 assert_eq!(refs.len(), 1);
328 assert_eq!(refs[0].name, "stg_orders");
329 }
330
331 #[test]
334 fn test_config_materialized() {
335 let sql = "{{ config(materialized='incremental') }}\nSELECT 1";
336 let config = extract_config(sql, "");
337 assert_eq!(config.materialized.as_deref(), Some("incremental"));
338 assert!(config.tags.is_empty());
339 }
340
341 #[test]
342 fn test_config_materialized_double_quotes() {
343 let sql = r#"{{ config(materialized="table") }}"#;
344 let config = extract_config(sql, "");
345 assert_eq!(config.materialized.as_deref(), Some("table"));
346 }
347
348 #[test]
349 fn test_config_tags() {
350 let sql = "{{ config(tags=['nightly', 'finance']) }}\nSELECT 1";
351 let config = extract_config(sql, "");
352 assert_eq!(config.tags, vec!["nightly", "finance"]);
353 }
354
355 #[test]
356 fn test_config_both() {
357 let sql = "{{ config(materialized='view', tags=['daily']) }}\nSELECT 1";
358 let config = extract_config(sql, "");
359 assert_eq!(config.materialized.as_deref(), Some("view"));
360 assert_eq!(config.tags, vec!["daily"]);
361 }
362
363 #[test]
364 fn test_config_whitespace_control() {
365 let sql = "{{- config(materialized='ephemeral') -}}\nSELECT 1";
366 let config = extract_config(sql, "");
367 assert_eq!(config.materialized.as_deref(), Some("ephemeral"));
368 }
369
370 #[test]
371 fn test_config_multiline() {
372 let sql = r#"{{
373 config(
374 materialized='incremental',
375 tags=['nightly', 'warehouse']
376 )
377 }}
378 SELECT 1"#;
379 let config = extract_config(sql, "");
380 assert_eq!(config.materialized.as_deref(), Some("incremental"));
381 assert_eq!(config.tags, vec!["nightly", "warehouse"]);
382 }
383
384 #[test]
385 fn test_no_config() {
386 let sql = "SELECT * FROM {{ ref('orders') }}";
387 let config = extract_config(sql, "");
388 assert!(config.materialized.is_none());
389 assert!(config.tags.is_empty());
390 }
391
392 #[test]
393 fn test_config_in_comment_ignored() {
394 let sql = r#"
395 {# {{ config(materialized='table') }} #}
396 SELECT 1
397 "#;
398 let config = extract_config(sql, "");
399 assert!(config.materialized.is_none());
400 }
401}