use crate::config::FormatOptions;
pub const BLOCK_HTML_TAGS: &[&str] = &[
"address",
"article",
"aside",
"audio",
"blockquote",
"body",
"button",
"canvas",
"caption",
"col",
"colgroup",
"datalist",
"dd",
"details",
"dialog",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"iframe",
"legend",
"li",
"link",
"main",
"map",
"menu",
"meta",
"nav",
"noscript",
"ol",
"optgroup",
"option",
"output",
"p",
"picture",
"pre",
"progress",
"script",
"section",
"select",
"source",
"style",
"summary",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"title",
"tr",
"track",
"ul",
"video",
];
fn template_break_keywords(opts: &FormatOptions) -> Vec<String> {
let mut kws: Vec<String> = vec![
"if".into(),
"else".into(),
"else if".into(),
"endif".into(),
"for".into(),
"endfor".into(),
"macro".into(),
"endmacro".into(),
"block".into(),
"endblock".into(),
"filter".into(),
"endfilter".into(),
"with".into(),
"endwith".into(),
"raw".into(),
"endraw".into(),
"include".into(),
"extends".into(),
"import".into(),
];
for b in &opts.custom_blocks {
kws.push(b.clone());
kws.push(format!("end{}", b));
}
for b in &opts.custom_blocks_unindent_line {
if !kws.contains(b) {
kws.push(b.clone());
}
}
for b in &opts.ignore_blocks {
let end = format!("end{}", b);
if !kws.contains(&end) {
kws.push(end);
}
kws.retain(|k| k != b);
}
kws
}
fn inside_raw_span(html: &str, pos: usize) -> bool {
let bytes = html.as_bytes();
let len = bytes.len();
let mut i = 0usize;
macro_rules! starts_with_at {
($needle:expr, $at:expr) => {
bytes[$at..].starts_with($needle.as_bytes())
};
}
while i < len {
if i > pos {
break;
}
if starts_with_at!("{#", i) {
let end = find_close(html, i + 2, "#}");
let end = end.unwrap_or(len);
if pos > i && pos < end + 2 {
return true;
}
i = end + 2;
continue;
}
if starts_with_at!("<!--", i) {
let end = find_close(html, i + 4, "-->");
let end = end.unwrap_or(len);
if pos > i && pos < end + 3 {
return true;
}
i = end + 3;
continue;
}
if starts_with_at!("{%", i) {
if let Some(tag_end) = find_close(html, i + 2, "%}") {
let inner = html[i + 2..tag_end].trim();
if inner == "raw" || inner.starts_with("raw ") || inner.starts_with("raw\t") {
let raw_end = html[tag_end + 2..]
.find("{% endraw %}")
.or_else(|| html[tag_end + 2..].find("{%endraw%}"))
.map(|o| tag_end + 2 + o);
let raw_end = raw_end.unwrap_or(len);
if pos > i && pos < raw_end {
return true;
}
i = raw_end;
continue;
}
i = tag_end + 2;
continue;
}
}
for tag in &["pre", "script", "style"] {
let open = format!("<{}", tag);
if starts_with_at!(open.as_str(), i) {
let close_tag = format!("</{}>", tag);
let block_end = html[i..].find(close_tag.as_str()).map(|o| i + o);
let block_end = block_end.unwrap_or(len);
if pos > i && pos < block_end + close_tag.len() {
return true;
}
i = block_end + close_tag.len();
break;
}
}
i += 1;
}
false
}
fn find_close(s: &str, from: usize, needle: &str) -> Option<usize> {
s[from..].find(needle).map(|o| from + o)
}
pub fn expand(html: &str, opts: &FormatOptions) -> String {
let html_tags: Vec<String> = BLOCK_HTML_TAGS.iter().map(|s| s.to_string()).collect();
let tmpl_kws = template_break_keywords(opts);
let mut out = html.to_string();
out = break_html_tags(&out, &html_tags);
out = break_template_tags(&out, &tmpl_kws);
collapse_blank_lines(&out)
}
fn break_html_tags(html: &str, tags: &[String]) -> String {
let mut out = String::with_capacity(html.len() + 256);
let chars: Vec<char> = html.chars().collect();
let len = chars.len();
let mut i = 0usize;
while i < len {
if chars[i] == '<' {
let rest: String = chars[i..].iter().collect();
if let Some((matched, tag_len)) = match_html_block_tag(&rest, tags) {
let byte_pos: usize = chars[..i].iter().collect::<String>().len();
let in_raw = inside_raw_span(html, byte_pos);
if in_raw {
out.push(chars[i]);
i += 1;
continue;
}
let already_own_line = out
.rfind('\n')
.map(|nl| out[nl + 1..].chars().all(char::is_whitespace))
.unwrap_or(out.is_empty());
if !already_own_line {
while out.ends_with([' ', '\t']) {
out.pop();
}
out.push('\n');
}
out.push_str(&matched);
i += tag_len;
if i < len && chars[i] != '\n' {
out.push('\n');
}
continue;
}
}
out.push(chars[i]);
i += 1;
}
out
}
fn match_html_block_tag(s: &str, tags: &[String]) -> Option<(String, usize)> {
if !s.starts_with('<') {
return None;
}
let rest = &s[1..];
let (_closing, rest2) = if let Some(stripped) = rest.strip_prefix('/') {
(true, stripped)
} else {
(false, rest)
};
let name_end = rest2
.find(|c: char| !c.is_alphanumeric() && c != '-')
.unwrap_or(rest2.len());
if name_end == 0 {
return None;
}
let name = rest2[..name_end].to_lowercase();
if !tags.contains(&name) {
return None;
}
let close_byte = super::find_html_tag_close(s)?;
let matched = &s[..close_byte + 1];
let char_len = matched.chars().count();
Some((matched.to_string(), char_len))
}
fn break_template_tags(html: &str, kws: &[String]) -> String {
let mut out = String::with_capacity(html.len() + 256);
let bytes = html.as_bytes();
let len = bytes.len();
let mut i = 0usize; let mut in_html_open_tag = false;
let mut html_attr_quote: Option<u8> = None;
while i < len {
if in_html_open_tag {
let b = bytes[i];
match html_attr_quote {
Some(q) if b == q => {
html_attr_quote = None;
let ch = html[i..].chars().next().unwrap();
out.push(ch);
i += ch.len_utf8();
continue;
}
Some(_) => {
let ch = html[i..].chars().next().unwrap();
out.push(ch);
i += ch.len_utf8();
continue;
}
None => {
if b == b'"' || b == b'\'' {
html_attr_quote = Some(b);
} else if b == b'>' {
in_html_open_tag = false;
} else if b == b'{' && i + 1 < len && bytes[i + 1] == b'%' {
if let Some(tag_end) = find_template_tag_end(html, i + 2) {
out.push_str(&html[i..tag_end]);
i = tag_end;
continue;
}
} else if b == b'{' && i + 1 < len && bytes[i + 1] == b'{' {
if let Some(end) = find_close(html, i + 2, "}}") {
out.push_str(&html[i..end + 2]);
i = end + 2;
continue;
}
}
let ch = html[i..].chars().next().unwrap();
out.push(ch);
i += ch.len_utf8();
continue;
}
}
}
if bytes[i] == b'<'
&& i + 1 < len
&& bytes[i + 1].is_ascii_alphabetic()
&& !inside_raw_span(html, i)
{
in_html_open_tag = true;
html_attr_quote = None;
}
if i + 1 < len && bytes[i] == b'{' && bytes[i + 1] == b'%' {
if inside_raw_span(html, i) {
let ch = html[i..].chars().next().unwrap();
out.push(ch);
i += ch.len_utf8();
continue;
}
if let Some(tag_end) = find_template_tag_end(html, i + 2) {
let full_tag = &html[i..tag_end];
let inner =
html[i + 2..tag_end - 2].trim_matches(|c| c == '-' || c == '+' || c == '~');
let keyword = extract_keyword(inner);
let should_break = kws.iter().any(|k| {
keyword == *k
|| keyword.starts_with(&format!("{} ", k))
|| keyword.starts_with(&format!("{}\t", k))
});
if should_break {
let already_own_line = out
.rfind('\n')
.map(|nl| out[nl + 1..].chars().all(char::is_whitespace))
.unwrap_or(out.is_empty());
if !already_own_line {
while out.ends_with([' ', '\t']) {
out.pop();
}
if !out.ends_with('\n') {
out.push('\n');
}
} else {
while out.ends_with([' ', '\t']) {
out.pop();
}
}
out.push_str(full_tag);
i = tag_end;
if i < len && bytes[i] != b'\n' {
out.push('\n');
}
continue;
}
}
}
let ch = html[i..].chars().next().unwrap();
out.push(ch);
i += ch.len_utf8();
}
out
}
fn find_template_tag_end(html: &str, from: usize) -> Option<usize> {
let bytes = html.as_bytes();
let len = bytes.len();
let mut i = from;
let mut in_quote: Option<u8> = None;
while i < len {
match in_quote {
Some(q) if bytes[i] == q => {
in_quote = None;
i += 1;
}
Some(_) => {
i += 1;
}
None => {
if bytes[i] == b'"' || bytes[i] == b'\'' {
in_quote = Some(bytes[i]);
i += 1;
} else if i + 1 < len && bytes[i] == b'%' && bytes[i + 1] == b'}' {
return Some(i + 2);
} else {
i += 1;
}
}
}
}
None
}
fn extract_keyword(inner: &str) -> String {
let trimmed =
inner.trim_start_matches(|c: char| c == '-' || c == '+' || c == '~' || c.is_whitespace());
if trimmed.starts_with("else if") {
return "else if".to_string();
}
trimmed.split_whitespace().next().unwrap_or("").to_string()
}
fn collapse_blank_lines(html: &str) -> String {
let mut result = String::with_capacity(html.len());
let mut consecutive_newlines = 0u32;
for ch in html.chars() {
if ch == '\n' {
consecutive_newlines += 1;
if consecutive_newlines <= 2 {
result.push(ch);
}
} else {
consecutive_newlines = 0;
result.push(ch);
}
}
result
}