use crate::lex::token::Token;
use std::ops::Range as ByteRange;
#[derive(Debug, Clone, PartialEq)]
pub enum TransformationError {
Error(String),
}
impl std::fmt::Display for TransformationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TransformationError::Error(msg) => write!(f, "Transformation error: {msg}"),
}
}
}
pub struct SemanticIndentationMapper;
impl SemanticIndentationMapper {
pub fn new() -> Self {
SemanticIndentationMapper
}
}
impl Default for SemanticIndentationMapper {
fn default() -> Self {
Self::new()
}
}
fn find_line_start(tokens: &[Token], mut pos: usize) -> usize {
while pos > 0 {
pos -= 1;
if matches!(tokens[pos], Token::BlankLine(_)) {
return pos + 1;
}
}
0
}
fn is_line_blank(tokens: &[Token], line_start: usize) -> bool {
let mut i = line_start;
while i < tokens.len() && matches!(tokens[i], Token::Indentation) {
i += 1;
}
i >= tokens.len() || matches!(tokens[i], Token::BlankLine(_))
}
fn count_line_indent_steps(tokens: &[Token], start: usize) -> usize {
let mut count = 0;
let mut i = start;
while i < tokens.len() && matches!(tokens[i], Token::Indentation) {
count += 1;
i += 1;
}
count
}
impl SemanticIndentationMapper {
pub fn map(
&mut self,
tokens: Vec<(Token, ByteRange<usize>)>,
) -> Result<Vec<(Token, ByteRange<usize>)>, TransformationError> {
let token_kinds: Vec<Token> = tokens.iter().map(|(t, _)| t.clone()).collect();
let mut result = Vec::new();
let mut current_level = 0; let mut i = 0;
while i < tokens.len() {
let line_start = find_line_start(&token_kinds, i);
let line_indent_level = count_line_indent_steps(&token_kinds, line_start);
let is_blank_line = is_line_blank(&token_kinds, line_start);
if is_blank_line {
let mut j = line_start;
while j < token_kinds.len() && !matches!(token_kinds[j], Token::BlankLine(_)) {
j += 1;
}
if j < token_kinds.len() && matches!(token_kinds[j], Token::BlankLine(_)) {
result.push((token_kinds[j].clone(), tokens[j].1.clone()));
j += 1;
}
i = j;
continue;
}
let target_level = line_indent_level;
match target_level.cmp(¤t_level) {
std::cmp::Ordering::Greater => {
let indent_start_idx = line_start;
for level_idx in 0..(target_level - current_level) {
let indent_token_idx = indent_start_idx + current_level + level_idx;
let source_tokens = if indent_token_idx < token_kinds.len()
&& matches!(token_kinds[indent_token_idx], Token::Indentation)
{
vec![tokens[indent_token_idx].clone()]
} else {
vec![]
};
result.push((Token::Indent(source_tokens), 0..0));
}
}
std::cmp::Ordering::Less => {
for _ in 0..(current_level - target_level) {
result.push((Token::Dedent(vec![]), 0..0));
}
}
std::cmp::Ordering::Equal => {
}
}
current_level = target_level;
let mut j = line_start;
for _ in 0..line_indent_level {
if j < token_kinds.len() && matches!(token_kinds[j], Token::Indentation) {
j += 1;
}
}
while j < token_kinds.len() && !matches!(token_kinds[j], Token::BlankLine(_)) {
result.push((token_kinds[j].clone(), tokens[j].1.clone()));
j += 1;
}
if j < token_kinds.len() && matches!(token_kinds[j], Token::BlankLine(_)) {
result.push((token_kinds[j].clone(), tokens[j].1.clone()));
j += 1;
}
i = j;
}
for _ in 0..current_level {
result.push((Token::Dedent(vec![]), 0..0));
}
Ok(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::testing::factories::{mk_token, Tokens};
use crate::lex::token::Token;
fn with_loc(tokens: Vec<Token>) -> Tokens {
tokens
.into_iter()
.enumerate()
.map(|(idx, token)| mk_token(token, idx, idx + 1))
.collect()
}
fn strip_loc(pairs: Tokens) -> Vec<Token> {
pairs
.into_iter()
.map(|(t, _)| {
match t {
Token::Indent(_) => Token::Indent(vec![]),
Token::Dedent(_) => Token::Dedent(vec![]),
Token::BlankLine(_) => Token::BlankLine(Some("\n".to_string())),
other => other,
}
})
.collect()
}
#[test]
fn test_simple_indentation() {
let input = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(
stripped,
vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Dedent(vec![]),
]
);
}
#[test]
fn test_multiple_indent_levels() {
let input = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Indentation,
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Text("b".to_string()),
Token::BlankLine(Some("\n".to_string())),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(
stripped,
vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indent(vec![]),
Token::Indent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Dedent(vec![]),
Token::Text("b".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Dedent(vec![]),
]
);
}
#[test]
fn test_no_indentation() {
let input = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Text("b".to_string()),
Token::BlankLine(Some("\n".to_string())),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input.clone())).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(stripped, input);
}
#[test]
fn test_empty_input() {
let input = vec![];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(stripped, vec![]);
}
#[test]
fn test_single_line() {
let input = vec![Token::Text("a".to_string())];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(stripped, vec![Token::Text("a".to_string())]);
}
#[test]
fn test_blank_lines() {
let input = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::BlankLine(Some("\n".to_string())), Token::Dash,
Token::BlankLine(Some("\n".to_string())),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(
stripped,
vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::BlankLine(Some("\n".to_string())),
Token::Dedent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
]
);
}
#[test]
fn test_blank_lines_with_indentation() {
let input = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::BlankLine(Some("\n".to_string())), Token::Dash,
Token::BlankLine(Some("\n".to_string())),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(
stripped,
vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::BlankLine(Some("\n".to_string())),
Token::Dedent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
]
);
}
#[test]
fn test_file_ending_while_indented() {
let input = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Indentation,
Token::Text("b".to_string()),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(
stripped,
vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Indent(vec![]),
Token::Text("b".to_string()),
Token::Dedent(vec![]),
Token::Dedent(vec![]),
]
);
}
#[test]
fn test_sharp_drop_in_indentation() {
let input = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Indentation,
Token::Indentation,
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Text("b".to_string()),
Token::BlankLine(Some("\n".to_string())),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(
stripped,
vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indent(vec![]),
Token::Indent(vec![]),
Token::Indent(vec![]),
Token::Dash,
Token::BlankLine(Some("\n".to_string())),
Token::Dedent(vec![]),
Token::Dedent(vec![]),
Token::Dedent(vec![]),
Token::Text("b".to_string()),
Token::BlankLine(Some("\n".to_string())),
]
);
}
#[test]
fn test_count_line_indent_steps() {
let tokens = vec![
Token::Indentation,
Token::Indentation,
Token::Dash,
Token::Text("a".to_string()),
];
assert_eq!(count_line_indent_steps(&tokens, 0), 2);
assert_eq!(count_line_indent_steps(&tokens, 2), 0);
}
#[test]
fn test_find_line_start() {
let tokens = vec![
Token::Text("a".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Dash,
];
assert_eq!(find_line_start(&tokens, 0), 0);
assert_eq!(find_line_start(&tokens, 2), 2);
assert_eq!(find_line_start(&tokens, 3), 2);
}
#[test]
fn test_source_tokens_captured_in_indent() {
let input: Tokens = vec![
mk_token(Token::Text("a".to_string()), 0, 1),
mk_token(Token::BlankLine(Some("\n".to_string())), 1, 2),
mk_token(Token::Indentation, 2, 6), mk_token(Token::Text("b".to_string()), 6, 7),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(input).unwrap();
let indent_pos = tokens
.iter()
.position(|(t, _)| matches!(t, Token::Indent(_)))
.expect("Should have Indent token");
if let Token::Indent(source_tokens) = &tokens[indent_pos].0 {
assert_eq!(
source_tokens.len(),
1,
"Indent should capture 1 source Indentation token"
);
assert_eq!(source_tokens[0].0, Token::Indentation);
assert_eq!(
source_tokens[0].1,
2..6,
"Source token should have correct range"
);
} else {
panic!("Expected Indent token");
}
assert_eq!(tokens[indent_pos].1, 0..0, "Indent uses placeholder span");
}
#[test]
fn test_source_tokens_captured_in_multiple_indents() {
let input: Tokens = vec![
mk_token(Token::Text("a".to_string()), 0, 1),
mk_token(Token::BlankLine(Some("\n".to_string())), 1, 2),
mk_token(Token::Indentation, 2, 6), mk_token(Token::Indentation, 6, 10), mk_token(Token::Text("b".to_string()), 10, 11),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(input).unwrap();
let indent_positions: Vec<_> = tokens
.iter()
.enumerate()
.filter_map(|(i, (t, _))| {
if matches!(t, Token::Indent(_)) {
Some(i)
} else {
None
}
})
.collect();
assert_eq!(indent_positions.len(), 2, "Should have 2 Indent tokens");
if let Token::Indent(source_tokens) = &tokens[indent_positions[0]].0 {
assert_eq!(source_tokens.len(), 1);
assert_eq!(source_tokens[0].1, 2..6, "First Indent source range");
}
if let Token::Indent(source_tokens) = &tokens[indent_positions[1]].0 {
assert_eq!(source_tokens.len(), 1);
assert_eq!(source_tokens[0].1, 6..10, "Second Indent source range");
}
}
#[test]
fn test_blank_line_with_spaces_does_not_dedent() {
let input = vec![
Token::Indentation,
Token::Indentation,
Token::Text("Foo".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Indentation,
Token::Text("Foo2".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::BlankLine(Some("\n".to_string())),
Token::Indentation,
Token::Indentation,
Token::Text("Bar".to_string()),
Token::BlankLine(Some("\n".to_string())),
];
let mut mapper = SemanticIndentationMapper::new();
let tokens = mapper.map(with_loc(input)).unwrap();
let stripped = strip_loc(tokens);
assert_eq!(
stripped,
vec![
Token::Indent(vec![]),
Token::Indent(vec![]),
Token::Text("Foo".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Text("Foo2".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::BlankLine(Some("\n".to_string())), Token::Text("Bar".to_string()),
Token::BlankLine(Some("\n".to_string())),
Token::Dedent(vec![]),
Token::Dedent(vec![]),
],
"Blank lines with only spaces should NOT produce dedent/indent tokens"
);
}
}