use regex::Regex;
use std::sync::LazyLock;
use crate::wrapping::LineWrapper;
use crate::wrapping::atomic_patterns::{
SINGLE_HTML_COMMENT, SINGLE_JINJA_COMMENT, SINGLE_JINJA_TAG, SINGLE_JINJA_VAR,
};
use crate::wrapping::block_heuristics::{
line_is_block_content, line_is_list_item, line_is_table_row, normalize_table_separator,
};
pub(crate) static TEMPLATE_TAG_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
let patterns = [
SINGLE_JINJA_TAG.pattern,
SINGLE_JINJA_COMMENT.pattern,
SINGLE_JINJA_VAR.pattern,
SINGLE_HTML_COMMENT.pattern,
];
Regex::new(&format!("(?s){}", patterns.join("|"))).expect("valid TEMPLATE_TAG_PATTERN regex")
});
static ADJACENT_TAGS_RE: LazyLock<Regex> = LazyLock::new(|| {
let pattern = format!(
"({close_jt})({open_jt})|({close_jc})({open_jc})|({close_jv})({open_jv})|({close_hc})({open_hc})",
close_jt = SINGLE_JINJA_TAG.close_re,
open_jt = SINGLE_JINJA_TAG.open_re,
close_jc = SINGLE_JINJA_COMMENT.close_re,
open_jc = SINGLE_JINJA_COMMENT.open_re,
close_jv = SINGLE_JINJA_VAR.close_re,
open_jv = SINGLE_JINJA_VAR.open_re,
close_hc = SINGLE_HTML_COMMENT.close_re,
open_hc = SINGLE_HTML_COMMENT.open_re,
);
Regex::new(&pattern).expect("valid ADJACENT_TAGS_RE regex")
});
static DENORMALIZE_TAGS_RE: LazyLock<Regex> = LazyLock::new(|| {
let pattern = format!(
"({close_jt}) ({open_jt})|({close_jc}) ({open_jc})|({close_jv}) ({open_jv})|({close_hc}) ({open_hc})",
close_jt = SINGLE_JINJA_TAG.close_re,
open_jt = SINGLE_JINJA_TAG.open_re,
close_jc = SINGLE_JINJA_COMMENT.close_re,
open_jc = SINGLE_JINJA_COMMENT.open_re,
close_jv = SINGLE_JINJA_VAR.close_re,
open_jv = SINGLE_JINJA_VAR.open_re,
close_hc = SINGLE_HTML_COMMENT.close_re,
open_hc = SINGLE_HTML_COMMENT.open_re,
);
Regex::new(&pattern).expect("valid DENORMALIZE_TAGS_RE regex")
});
static MULTILINE_CLOSING_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
let pattern = format!(
r"{close_jt}\s*(?P<closing_tag>{open_jt}\s*/)|{close_jc}\s*(?P<closing_comment>{open_jc}\s*/)|{close_jv}\s*(?P<closing_var>{open_jv}\s*/)|{close_hc}\s*(?P<closing_html>{open_hc}\s*/)",
close_jt = SINGLE_JINJA_TAG.close_re,
open_jt = SINGLE_JINJA_TAG.open_re,
close_jc = SINGLE_JINJA_COMMENT.close_re,
open_jc = SINGLE_JINJA_COMMENT.open_re,
close_jv = SINGLE_JINJA_VAR.close_re,
open_jv = SINGLE_JINJA_VAR.open_re,
close_hc = SINGLE_HTML_COMMENT.close_re,
open_hc = SINGLE_HTML_COMMENT.open_re,
);
Regex::new(&pattern).expect("valid MULTILINE_CLOSING_PATTERN regex")
});
pub fn normalize_adjacent_tags(text: &str) -> String {
ADJACENT_TAGS_RE
.replace_all(text, |caps: ®ex::Captures<'_>| {
let groups: Vec<Option<regex::Match<'_>>> =
(1..=caps.len()).map(|i| caps.get(i)).collect();
for i in (0..groups.len()).step_by(2) {
if let (Some(a), Some(b)) =
(&groups.get(i).copied().flatten(), &groups.get(i + 1).copied().flatten())
{
return format!("{} {}", a.as_str(), b.as_str());
}
}
caps.get(0).expect("group 0 always exists").as_str().to_string()
})
.into_owned()
}
pub fn denormalize_adjacent_tags(text: &str) -> String {
DENORMALIZE_TAGS_RE
.replace_all(text, |caps: ®ex::Captures<'_>| {
let groups: Vec<Option<regex::Match<'_>>> =
(1..=caps.len()).map(|i| caps.get(i)).collect();
for i in (0..groups.len()).step_by(2) {
if let (Some(a), Some(b)) =
(&groups.get(i).copied().flatten(), &groups.get(i + 1).copied().flatten())
{
return format!("{}{}", a.as_str(), b.as_str());
}
}
caps.get(0).expect("group 0 always exists").as_str().to_string()
})
.into_owned()
}
fn is_tag_only_line(line: &str) -> bool {
if !line.is_empty() && line.starts_with(char::is_whitespace) {
return false;
}
let stripped = line.trim();
if stripped.is_empty() {
return false;
}
let starts_tag = stripped.starts_with(SINGLE_JINJA_TAG.open_delim)
|| stripped.starts_with(SINGLE_JINJA_COMMENT.open_delim)
|| stripped.starts_with(SINGLE_JINJA_VAR.open_delim)
|| stripped.starts_with(SINGLE_HTML_COMMENT.open_delim);
let ends_tag = stripped.ends_with(SINGLE_JINJA_TAG.close_delim)
|| stripped.ends_with(SINGLE_JINJA_COMMENT.close_delim)
|| stripped.ends_with(SINGLE_JINJA_VAR.close_delim)
|| stripped.ends_with(SINGLE_HTML_COMMENT.close_delim);
starts_tag && ends_tag
}
pub fn preprocess_tag_block_spacing(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let has_tag_only_lines = lines.iter().any(|line| is_tag_only_line(line));
if !has_tag_only_lines {
return text.to_string();
}
let mut result_lines: Vec<&str> = Vec::new();
for (i, line) in lines.iter().enumerate() {
if i > 0 {
let prev_line = lines[i - 1];
let prev_is_empty = prev_line.trim().is_empty();
if !prev_is_empty && is_tag_only_line(prev_line) && line_is_block_content(line) {
result_lines.push("");
}
if !prev_is_empty && line_is_block_content(prev_line) && is_tag_only_line(line) {
result_lines.push("");
}
}
result_lines.push(line);
}
result_lines.join("\n")
}
pub(crate) fn line_ends_with_tag(line: &str) -> bool {
let stripped = line.trim_end();
if stripped.is_empty() {
return false;
}
stripped.ends_with(SINGLE_JINJA_TAG.close_delim)
|| stripped.ends_with(SINGLE_JINJA_COMMENT.close_delim)
|| stripped.ends_with(SINGLE_JINJA_VAR.close_delim)
|| stripped.ends_with(SINGLE_HTML_COMMENT.close_delim)
}
pub(crate) fn line_starts_with_tag(line: &str) -> bool {
let stripped = line.trim_start();
if stripped.is_empty() {
return false;
}
stripped.starts_with(SINGLE_JINJA_TAG.open_delim)
|| stripped.starts_with(SINGLE_JINJA_COMMENT.open_delim)
|| stripped.starts_with(SINGLE_JINJA_VAR.open_delim)
|| stripped.starts_with(SINGLE_HTML_COMMENT.open_delim)
}
fn is_unindented_tag_line(line: &str) -> bool {
if line.is_empty() {
return false;
}
if line.starts_with(char::is_whitespace) {
return false;
}
line_starts_with_tag(line)
}
fn is_closing_tag(line: &str) -> bool {
let stripped = line.trim_start();
stripped.starts_with("{% /")
|| stripped.starts_with("{# /")
|| stripped.starts_with("{{ /")
|| stripped.starts_with("<!-- /")
}
pub fn fix_closing_tag_spacing(text: &str) -> String {
let lines: Vec<&str> = text.split('\n').collect();
let mut fixed_lines: Vec<String> = Vec::new();
for (i, line) in lines.iter().enumerate() {
if is_closing_tag(line) {
let stripped = line.trim_start().to_string();
if i > 0 && !fixed_lines.is_empty() {
let prev_line = &fixed_lines[fixed_lines.len() - 1];
let prev_is_empty = prev_line.trim().is_empty();
let prev_is_block = line_is_block_content(prev_line);
if !prev_is_empty && prev_is_block {
fixed_lines.push(String::new());
}
}
fixed_lines.push(stripped);
} else {
fixed_lines.push((*line).to_string());
}
}
fixed_lines.join("\n")
}
pub fn fix_multiline_opening_tag_with_closing(text: &str) -> String {
if !text.contains('\n') {
return text.to_string();
}
let lines: Vec<&str> = text.split('\n').collect();
let mut result_lines: Vec<String> = Vec::new();
for (i, line) in lines.iter().enumerate() {
if i == 0 {
result_lines.push((*line).to_string());
continue;
}
let stripped = line.trim_start();
let is_tag_start = stripped.starts_with(SINGLE_JINJA_TAG.open_delim)
|| stripped.starts_with(SINGLE_JINJA_COMMENT.open_delim)
|| stripped.starts_with(SINGLE_JINJA_VAR.open_delim)
|| stripped.starts_with(SINGLE_HTML_COMMENT.open_delim);
if !is_tag_start {
if let Some(_m) = MULTILINE_CLOSING_PATTERN.find(line) {
let caps = MULTILINE_CLOSING_PATTERN
.captures(line)
.expect("captures must succeed after find");
let mut found = false;
for group_name in &["closing_tag", "closing_comment", "closing_var", "closing_html"]
{
if caps.name(group_name).is_some() {
let split_pos = caps
.name(group_name)
.expect("named group must exist after is_some check")
.start();
let before = line[..split_pos].trim_end();
let closing = line[split_pos..].trim_start();
result_lines.push(before.to_string());
result_lines.push(closing.to_string());
found = true;
break;
}
}
if found {
continue;
}
}
}
result_lines.push((*line).to_string());
}
result_lines.join("\n")
}
#[allow(clippy::type_complexity)]
pub(crate) fn add_tag_newline_handling(
base_wrapper: Box<dyn Fn(&str, &str, &str) -> String + Send + Sync>,
) -> LineWrapper {
Box::new(move |text: &str, initial_indent: &str, subsequent_indent: &str| -> String {
if !text.contains('\n') {
let result = base_wrapper(text, initial_indent, subsequent_indent);
return fix_multiline_opening_tag_with_closing(&result);
}
let lines: Vec<&str> = text.split('\n').collect();
if lines.len() <= 1 {
let result = base_wrapper(text, initial_indent, subsequent_indent);
return fix_multiline_opening_tag_with_closing(&result);
}
let has_tags =
lines.iter().any(|line| line_ends_with_tag(line) || line_starts_with_tag(line));
let mut segments: Vec<String> = Vec::new();
let mut current_segment_lines: Vec<&str> = Vec::new();
for (i, line) in lines.iter().enumerate() {
let is_first_line = i == 0;
let prev_ends_with_tag = !is_first_line && line_ends_with_tag(lines[i - 1]);
let curr_starts_with_tag = is_unindented_tag_line(line);
let curr_is_table = line_is_table_row(line);
let prev_is_table = !is_first_line && line_is_table_row(lines[i - 1]);
let curr_is_block = curr_is_table || (has_tags && line_is_list_item(line));
let prev_is_block =
prev_is_table || (has_tags && !is_first_line && line_is_list_item(lines[i - 1]));
if (prev_ends_with_tag || curr_starts_with_tag || curr_is_block || prev_is_block)
&& !current_segment_lines.is_empty()
{
segments.push(current_segment_lines.join("\n"));
current_segment_lines.clear();
}
current_segment_lines.push(line);
}
if !current_segment_lines.is_empty() {
segments.push(current_segment_lines.join("\n"));
}
if segments.len() == 1 {
let result = base_wrapper(text, initial_indent, subsequent_indent);
return fix_multiline_opening_tag_with_closing(&result);
}
let mut wrapped_segments: Vec<String> = Vec::new();
for (i, segment) in segments.iter().enumerate() {
let is_first = i == 0;
let cur_initial_indent = if is_first { initial_indent } else { subsequent_indent };
let segment_lines: Vec<&str> = segment.split('\n').collect();
let all_table_rows =
segment_lines.iter().filter(|l| !l.trim().is_empty()).all(|l| line_is_table_row(l));
let wrapped = if all_table_rows {
segment_lines
.iter()
.enumerate()
.map(|(j, line)| {
let indent = if j == 0 { cur_initial_indent } else { subsequent_indent };
if line.trim().is_empty() {
(*line).to_string()
} else {
format!("{indent}{}", normalize_table_separator(line))
}
})
.collect::<Vec<_>>()
.join("\n")
} else {
base_wrapper(segment, cur_initial_indent, subsequent_indent)
};
wrapped_segments.push(wrapped);
}
let mut result_parts: Vec<String> = Vec::new();
for (i, wrapped) in wrapped_segments.iter().enumerate() {
if i == 0 {
result_parts.push(wrapped.clone());
continue;
}
let prev_segment = &segments[i - 1];
let curr_segment = &segments[i];
let prev_is_block = prev_segment.split('\n').any(line_is_block_content);
let curr_is_block = curr_segment.split('\n').any(line_is_block_content);
let prev_last_line = prev_segment.split('\n').next_back().unwrap_or("");
let curr_first_line = curr_segment.split('\n').next().unwrap_or("");
let prev_is_tag = line_ends_with_tag(prev_last_line);
let curr_is_tag = is_unindented_tag_line(curr_first_line);
if (prev_is_tag && curr_is_block) || (prev_is_block && curr_is_tag) {
result_parts.push(String::new());
result_parts.push(wrapped.clone());
} else {
result_parts.push(wrapped.clone());
}
}
let result = result_parts.join("\n");
let result = fix_closing_tag_spacing(&result);
fix_multiline_opening_tag_with_closing(&result)
})
}