#![allow(
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::too_many_arguments,
clippy::map_unwrap_or,
clippy::option_if_let_else,
clippy::elidable_lifetime_names,
clippy::items_after_statements,
clippy::needless_pass_by_value,
clippy::single_match_else,
clippy::manual_let_else,
clippy::match_same_arms,
clippy::missing_const_for_fn,
clippy::single_char_pattern,
clippy::naive_bytecount,
clippy::expect_used,
clippy::redundant_pub_crate,
clippy::used_underscore_binding,
clippy::redundant_field_names,
clippy::struct_field_names,
clippy::redundant_else,
clippy::similar_names
)]
use super::{Grammar, TokenRole, is_word_like};
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct FormatPolicy {
pub indent_width: usize,
pub separator: String,
pub newline: String,
pub line_break_after: Vec<String>,
pub indent_open: Vec<String>,
pub indent_close: Vec<String>,
}
impl Default for FormatPolicy {
fn default() -> Self {
Self {
indent_width: 2,
separator: " ".to_owned(),
newline: "\n".to_owned(),
line_break_after: vec![";".into(), "{".into(), "}".into()],
indent_open: vec!["{".into()],
indent_close: vec!["}".into()],
}
}
}
#[derive(Clone)]
pub(crate) enum Token {
Lit(String, TokenRole),
IndentOpen,
IndentClose,
LineBreak,
ForceSpace,
NoSpace,
AbsorberGuard(String),
Verbatim(String),
}
pub(crate) struct Output<'a> {
pub(crate) tokens: Vec<Token>,
pub(crate) policy: &'a FormatPolicy,
pub(crate) grammar: &'a Grammar,
pub(crate) current_rule: Option<String>,
pub(crate) cassette: Option<&'a dyn crate::languages::cassettes::GrammarCassette>,
}
#[derive(Clone)]
pub(crate) struct OutputSnapshot {
pub(crate) tokens_len: usize,
}
impl<'a> Output<'a> {
pub(crate) fn new(
policy: &'a FormatPolicy,
grammar: &'a Grammar,
cassette: Option<&'a dyn crate::languages::cassettes::GrammarCassette>,
) -> Self {
Self {
tokens: Vec::new(),
policy,
grammar,
current_rule: None,
cassette,
}
}
pub(crate) fn token(&mut self, value: &str) {
self.token_with_role(value, None);
}
pub(crate) fn tight_token(&mut self, value: &str) {
if value.is_empty() {
return;
}
self.tokens
.push(Token::Lit(value.to_owned(), TokenRole::Immediate));
}
pub(crate) fn token_with_role(&mut self, value: &str, explicit_role: Option<TokenRole>) {
if value.is_empty() {
return;
}
if value == "\n" || value == "\r\n" || value == "\r" {
self.tokens.push(Token::LineBreak);
return;
}
let trimmed = value.trim_end_matches(['\n', '\r']);
let trailing_newlines = value.len() - trimmed.len();
if trailing_newlines > 0 && !trimmed.is_empty() {
let role = explicit_role.unwrap_or(TokenRole::Terminal);
if role == TokenRole::BracketClose
&& self.policy.indent_close.iter().any(|t| t == trimmed)
{
self.tokens.push(Token::IndentClose);
}
self.tokens.push(Token::Lit(trimmed.to_owned(), role));
if role == TokenRole::BracketOpen {
if let Some(ref rule) = self.current_rule {
if self
.grammar
.indent_triggers
.contains(&(rule.clone(), trimmed.to_owned()))
{
self.tokens.push(Token::IndentOpen);
}
}
}
self.tokens.push(Token::LineBreak);
return;
}
let mut role = explicit_role.unwrap_or_else(|| self.lookup_role(value));
if let (Some(rule), Some(cassette)) = (self.current_rule.as_ref(), self.cassette) {
if cassette.operator_is_tight(rule, value) {
role = TokenRole::Connector;
}
}
if role == TokenRole::BracketClose && self.policy.indent_close.iter().any(|t| t == value) {
self.tokens.push(Token::IndentClose);
}
self.tokens.push(Token::Lit(value.to_owned(), role));
if role == TokenRole::BracketOpen {
let grammar_indent = self.current_rule.as_ref().is_some_and(|rule| {
self.grammar
.indent_triggers
.contains(&(rule.clone(), value.to_owned()))
});
if grammar_indent {
self.tokens.push(Token::IndentOpen);
self.tokens.push(Token::LineBreak);
}
}
let is_non_indent_bracket = self.current_rule.is_some()
&& (role == TokenRole::BracketOpen || role == TokenRole::BracketClose)
&& !self.current_rule.as_ref().is_some_and(|rule| {
self.grammar
.indent_triggers
.contains(&(rule.clone(), value.to_owned()))
});
if !is_non_indent_bracket && self.policy.line_break_after.iter().any(|t| t == value) {
self.tokens.push(Token::LineBreak);
}
}
pub(crate) fn lookup_role(&self, value: &str) -> TokenRole {
if let Some(role) = self.explicit_role(value) {
return role;
}
if is_word_like(value) {
TokenRole::Keyword
} else {
TokenRole::Operator
}
}
pub(crate) fn explicit_role(&self, value: &str) -> Option<TokenRole> {
self.current_rule
.as_ref()
.and_then(|rule| self.grammar.token_roles.get(rule))
.and_then(|role_map| role_map.get(value).copied())
}
pub(crate) fn token_with_indent_open(&mut self, value: &str, role: TokenRole) {
if value.is_empty() {
return;
}
if role == TokenRole::BracketClose && self.policy.indent_close.iter().any(|t| t == value) {
self.tokens.push(Token::IndentClose);
}
self.tokens.push(Token::Lit(value.to_owned(), role));
if role == TokenRole::BracketOpen {
self.tokens.push(Token::IndentOpen);
self.tokens.push(Token::LineBreak);
}
}
pub(crate) fn newline(&mut self) {
self.tokens.push(Token::LineBreak);
}
pub(crate) fn verbatim(&mut self, bytes: &str) {
if bytes.is_empty() {
return;
}
self.tokens.push(Token::Verbatim(bytes.to_owned()));
}
pub(crate) fn indent_open(&mut self) {
self.tokens.push(Token::IndentOpen);
self.tokens.push(Token::LineBreak);
}
pub(crate) fn indent_close(&mut self) {
self.tokens.push(Token::IndentClose);
}
pub(crate) fn snapshot(&self) -> OutputSnapshot {
OutputSnapshot {
tokens_len: self.tokens.len(),
}
}
pub(crate) fn restore(&mut self, snap: OutputSnapshot) {
self.tokens.truncate(snap.tokens_len);
}
pub(crate) fn lit_emitted_since(&self, snap: OutputSnapshot) -> bool {
self.tokens[snap.tokens_len..]
.iter()
.any(|t| matches!(t, Token::Lit(_, _) | Token::Verbatim(_)))
}
pub(crate) fn no_space(&mut self) {
self.tokens.push(Token::NoSpace);
}
pub(crate) fn force_space(&mut self) {
self.tokens.push(Token::ForceSpace);
}
pub(crate) fn finish(self) -> Vec<u8> {
layout(
&self.tokens,
self.policy,
&self.grammar.line_comment_prefixes,
&self.grammar.trailing_break_markers,
self.grammar.trailing_break_on_whitespace,
self.grammar.top_level_text_admits_newline,
)
}
}
pub(crate) fn layout(
tokens: &[Token],
policy: &FormatPolicy,
line_comment_prefixes: &[String],
trailing_break_markers: &[String],
trailing_break_on_whitespace: bool,
top_level_text_admits_newline: bool,
) -> Vec<u8> {
let mut bytes = Vec::new();
let mut indent: usize = 0;
let mut at_line_start = true;
let mut last_role: Option<TokenRole> = None;
let mut last_text: String = String::new();
let mut suppress_next_separator = false;
let mut force_next_separator = false;
let mut pending_absorber: Option<String> = None;
let mut last_content_was_verbatim = false;
let newline = policy.newline.as_bytes();
let separator = policy.separator.as_bytes();
for (tok_idx, tok) in tokens.iter().enumerate() {
if std::env::var("DBG_LAYOUT").is_ok() {
match tok {
Token::Lit(v, r) => eprintln!(
" TOK: Lit({v:?}, {r:?}) at_line_start={at_line_start} last_role={last_role:?}"
),
Token::IndentOpen => eprintln!(" TOK: IndentOpen"),
Token::IndentClose => eprintln!(" TOK: IndentClose"),
Token::LineBreak => eprintln!(" TOK: LineBreak"),
Token::NoSpace => eprintln!(" TOK: NoSpace"),
Token::ForceSpace => eprintln!(" TOK: ForceSpace"),
Token::AbsorberGuard(s) => eprintln!(" TOK: AbsorberGuard({s:?})"),
Token::Verbatim(s) => eprintln!(" TOK: Verbatim({s:?})"),
}
}
match tok {
Token::IndentOpen => indent += 1,
Token::IndentClose => {
indent = indent.saturating_sub(1);
pending_absorber = None;
if !at_line_start {
bytes.extend_from_slice(newline);
at_line_start = true;
}
}
Token::LineBreak => {
pending_absorber = None;
if !at_line_start {
bytes.extend_from_slice(newline);
at_line_start = true;
}
}
Token::NoSpace => {
suppress_next_separator = true;
}
Token::ForceSpace => {
force_next_separator = true;
}
Token::AbsorberGuard(negated) => {
pending_absorber = Some(negated.clone());
}
Token::Verbatim(bytes_str) => {
pending_absorber = None;
suppress_next_separator = false;
force_next_separator = false;
if at_line_start && !bytes_str.is_empty() {
bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
}
bytes.extend_from_slice(bytes_str.as_bytes());
at_line_start = bytes_str.ends_with(['\n', '\r']);
last_role = None;
last_text.clear();
last_content_was_verbatim = true;
}
Token::Lit(value, role) => {
if let Some(negated) = pending_absorber.take() {
if value
.chars()
.next()
.is_some_and(|c| negated_class_admits(&negated, c))
{
force_next_separator = true;
}
}
let is_block_open = *role == TokenRole::BracketOpen
&& tokens
.get(tok_idx + 1)
.is_some_and(|t| matches!(t, Token::IndentOpen));
if at_line_start {
bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
} else if let Some(prev_role) = last_role {
let boundary_has_whitespace =
last_text.ends_with([' ', '\t']) || value.starts_with([' ', '\t']);
let want_space = !suppress_next_separator
&& !boundary_has_whitespace
&& (force_next_separator
|| needs_space_by_role(prev_role, &last_text, *role, value)
|| (is_block_open
&& matches!(
prev_role,
TokenRole::Terminal | TokenRole::BracketClose
)));
if want_space {
bytes.extend_from_slice(separator);
}
}
suppress_next_separator = false;
force_next_separator = false;
bytes.extend_from_slice(value.as_bytes());
at_line_start = false;
last_content_was_verbatim = false;
last_role = Some(*role);
last_text.clear();
last_text.push_str(value);
if *role != TokenRole::Immediate
&& line_comment_prefixes
.iter()
.any(|p| value.starts_with(p.as_str()))
{
bytes.extend_from_slice(newline);
at_line_start = true;
last_role = None;
}
}
}
}
if !at_line_start
&& !last_content_was_verbatim
&& !top_level_text_admits_newline
&& !ends_with_trailing_break_marker(
&bytes,
trailing_break_markers,
trailing_break_on_whitespace,
)
{
bytes.extend_from_slice(newline);
}
bytes
}
fn ends_with_trailing_break_marker(bytes: &[u8], markers: &[String], on_whitespace: bool) -> bool {
if markers.is_empty() && !on_whitespace {
return false;
}
if on_whitespace && bytes.last().is_some_and(|b| *b == b' ' || *b == b'\t') {
return true;
}
markers.iter().any(|m| bytes.ends_with(m.as_bytes()))
}
fn negated_class_admits(negated: &str, c: char) -> bool {
let mut chars = negated.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
let excluded = match chars.next() {
Some('s') => c.is_whitespace(),
Some('t') => c == '\t',
Some('n') => c == '\n',
Some('r') => c == '\r',
Some(esc) => c == esc,
None => false,
};
if excluded {
return false;
}
} else if ch == c {
return false;
}
}
true
}
pub(crate) fn effective_spacing_role(role: TokenRole, text: &str) -> TokenRole {
match role {
TokenRole::BracketOpen | TokenRole::BracketClose if is_word_like(text) => {
TokenRole::Keyword
}
other => other,
}
}
pub(crate) fn needs_space_by_role(
last: TokenRole,
last_text: &str,
next: TokenRole,
next_text: &str,
) -> bool {
let last = effective_spacing_role(last, last_text);
let next = effective_spacing_role(next, next_text);
match (last, next) {
(TokenRole::Immediate, _) | (_, TokenRole::Immediate) => false,
(TokenRole::BracketOpen, _) | (_, TokenRole::BracketClose) => false,
(_, TokenRole::Separator) => false,
(TokenRole::Separator, _) => true,
(TokenRole::Connector, _) | (_, TokenRole::Connector) => false,
(TokenRole::Terminal, TokenRole::BracketOpen) => false,
(TokenRole::BracketClose, TokenRole::BracketOpen) => false,
(TokenRole::Keyword, _) | (_, TokenRole::Keyword) => true,
(TokenRole::Terminal, TokenRole::Terminal) => true,
(TokenRole::Terminal, TokenRole::Operator) | (TokenRole::Operator, TokenRole::Terminal) => {
true
}
(TokenRole::Operator, TokenRole::Operator) => true,
(TokenRole::BracketClose, _) => true,
(TokenRole::Operator, TokenRole::BracketOpen) => true,
}
}