use std::collections::HashMap;
use anyhow::Result;
use minijinja::{
machinery::{Span, Token, WhitespaceConfig},
syntax::SyntaxConfig,
};
use regex::Regex;
const ANCHOR_PREFIX: &str = "_jinja_";
#[derive(Debug)]
pub enum JinjaBlock<'a> {
Data(&'a str),
Interpolation(Vec<(Token<'a>, Span)>),
}
#[derive(Default)]
pub struct JinjaContext<'a> {
anchor_map: HashMap<String, &'a str>,
header: Vec<&'a str>,
}
pub fn pre_process(source: &str) -> Result<(String, JinjaContext<'_>)> {
let mut blocks = Vec::new();
let mut current_block = Vec::new();
for res in
minijinja::machinery::tokenize(source, false, SyntaxConfig, WhitespaceConfig::default())
{
let (token, span) = res?;
if let Token::TemplateData(data) = token {
if !current_block.is_empty() {
blocks.push(JinjaBlock::Interpolation(current_block));
current_block = Vec::new();
}
blocks.push(JinjaBlock::Data(data))
} else {
current_block.push((token, span));
}
}
if !current_block.is_empty() {
blocks.push(JinjaBlock::Interpolation(current_block));
}
let mut anchored_source = String::new();
let mut next_anchor_id = 0;
let mut context = JinjaContext::default();
for block in blocks {
match block {
JinjaBlock::Data(data) => anchored_source += data,
JinjaBlock::Interpolation(block) => {
let (tokens, spans): (Vec<_>, _) = block.into_iter().unzip();
let source_span = find_span(source, spans);
if let Some(Token::Ident("config" | "set")) = tokens.get(1) {
context.header.push(source_span);
} else {
let id = format!("{ANCHOR_PREFIX}{next_anchor_id}");
next_anchor_id += 1;
anchored_source += &id;
context.anchor_map.insert(id, source_span);
}
}
}
}
Ok((anchored_source, context))
}
fn find_span(source: &str, spans: Vec<Span>) -> &str {
let start = spans.first().unwrap();
let end = spans.last().unwrap();
let mut start_index = 0;
let mut end_index = source.len();
let mut line = 1;
let mut col = 0;
for (index, char) in source.chars().enumerate() {
if char == '\n' {
line += 1;
col = 0;
continue;
} else {
col += 1;
}
if line == start.start_line && col == start.start_col {
start_index = index;
}
if line == end.end_line && col == end.end_col {
end_index = index + 1;
}
}
&source[start_index..end_index]
}
pub fn post_process(source: &str, context: JinjaContext) -> String {
let mut res = String::new();
for stmt in context.header {
res += stmt;
res += "\n";
}
let re = Regex::new(&format!(r"{ANCHOR_PREFIX}\d+")).unwrap();
let mut last_index = 0;
for cap in re.captures_iter(source) {
let cap = cap.get(0).unwrap();
let index = cap.start();
let anchor_id = cap.as_str();
res += &source[last_index..index];
res += context.anchor_map.get(anchor_id).unwrap_or(&anchor_id);
last_index = index + anchor_id.len();
}
res += &source[last_index..];
res
}
#[cfg(test)]
mod test {
use super::{post_process, pre_process, Span};
#[test]
fn test_find_span() {
let text = r#"line 1 col 13
line 2 col 21
some text line 3 col 31 more text
"#;
assert_eq!(
super::find_span(
text,
vec![
Span {
start_line: 2,
start_col: 9,
start_offset: 0,
end_line: 1234,
end_col: 5678,
end_offset: 0
},
Span {
start_line: 9999,
start_col: 65535,
start_offset: 0,
end_line: 3,
end_col: 31,
end_offset: 0
}
]
),
r#"line 2 col 21
some text line 3 col 31"#
);
}
#[test]
fn test_pre_process() {
let src = r###"from in_process = {{ source('salesforce', 'in_process') }}"###;
let (pre_proc_text, ctx) = pre_process(src).unwrap();
insta::assert_yaml_snapshot!(pre_proc_text, @"from in_process = _jinja_0");
insta::assert_yaml_snapshot!(ctx.anchor_map["_jinja_0"], @r#"" {{ source('salesforce', 'in_process') }}""#);
}
#[test]
fn test_post_process() {
let src = r###"from in_process = {{ source('salesforce', 'in_process') }}"###;
let (pre_proc_text, ctx) = pre_process(src).unwrap();
let post_proc_text = post_process(&pre_proc_text, ctx);
insta::assert_yaml_snapshot!(post_proc_text, @r#""from in_process = {{ source('salesforce', 'in_process') }}""#);
}
#[test]
fn test_config_interpolation() {
let src = r###"{{ config(materialized = "table") }}\nfrom in_process = {{ source('salesforce', 'in_process') }}"###;
let (pre_proc_text, ctx) = pre_process(src).unwrap();
insta::assert_yaml_snapshot!(ctx.header, @r#"- "{{ config(materialized = \"table\") }}""#);
let post_proc_text = post_process(&pre_proc_text, ctx);
insta::assert_yaml_snapshot!(post_proc_text, @r#""{{ config(materialized = \"table\") }}\n\\nfrom in_process = {{ source('salesforce', 'in_process') }}""#);
}
}