1use text_size::{TextRange, TextSize};
33
34use crate::SyntaxKind;
35use crate::lexer::RawToken;
36
37const TAB_SIZE: u32 = 4;
39
40#[derive(Debug, Clone)]
45struct LambdaCtx {
46 saved_indent_stack: Vec<u32>,
47 base: u32,
48 open_bracket_depth: u32,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct IndentDiagnostic {
58 pub range: TextRange,
60 pub message: String,
62}
63
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66enum IndentChar {
67 Tab,
68 Space,
69}
70
71#[must_use]
77pub fn run(tokens: &[RawToken], src: &str) -> (Vec<RawToken>, Vec<IndentDiagnostic>) {
78 let mut p = PrePass {
79 src,
80 out: Vec::with_capacity(tokens.len() + 16),
81 diags: Vec::new(),
82 indent_stack: vec![0],
83 bracket_depth: 0,
84 indent_char: None,
85 lambda_stack: Vec::new(),
86 };
87 p.run_lines(tokens);
88 (p.out, p.diags)
89}
90
91struct PrePass<'s> {
92 src: &'s str,
93 out: Vec<RawToken>,
94 diags: Vec<IndentDiagnostic>,
95 indent_stack: Vec<u32>,
96 bracket_depth: u32,
97 indent_char: Option<IndentChar>,
98 lambda_stack: Vec<LambdaCtx>,
101}
102
103impl PrePass<'_> {
104 fn run_lines(&mut self, tokens: &[RawToken]) {
105 let mut start = 0usize;
109 let mut i = 0usize;
110 while i < tokens.len() {
111 if tokens[i].kind == SyntaxKind::NewlinePhys {
112 self.line(&tokens[start..=i]);
113 start = i + 1;
114 }
115 i += 1;
116 }
117 if start < tokens.len() {
118 self.line(&tokens[start..]); }
120 self.finish(src_end(self.src));
121 }
122
123 fn line(&mut self, line: &[RawToken]) {
130 let Some(first) = line.iter().find(|t| !t.kind.is_trivia()) else {
135 self.copy_verbatim(line);
136 return;
137 };
138 let col = self.column(line);
139 let at = first.range.start();
140
141 self.close_lambdas(col, at);
143
144 let in_lambda = !self.lambda_stack.is_empty();
145 let suppressed = !in_lambda && self.bracket_depth > 0;
146
147 if !suppressed {
149 self.diagnose_indent(line);
150 self.emit_indent_dedent(col, at);
151 }
152
153 let mut has_terminator = false;
156 let mut last_meaningful: Option<SyntaxKind> = None;
157 for tok in line {
158 if tok.kind == SyntaxKind::NewlinePhys {
159 has_terminator = true;
160 let opens_lambda =
161 self.bracket_depth > 0 && last_meaningful == Some(SyntaxKind::Colon);
162 if self.bracket_depth == 0 || in_lambda || opens_lambda {
163 self.push_marker(SyntaxKind::Newline, tok.range.start());
164 }
165 self.out.push(*tok);
166 } else {
167 if matches!(
171 tok.kind,
172 SyntaxKind::RParen | SyntaxKind::RBrace | SyntaxKind::RBrack
173 ) && !self.lambda_stack.is_empty()
174 {
175 let new_depth = self.bracket_depth.saturating_sub(1);
176 self.close_lambdas_on_bracket(new_depth, tok.range.start());
177 }
178 self.out.push(*tok);
179 self.track_bracket(tok.kind);
180 if !tok.kind.is_trivia() {
181 last_meaningful = Some(tok.kind);
182 }
183 }
184 }
185 if !has_terminator && (self.bracket_depth == 0 || in_lambda) {
187 self.push_marker(SyntaxKind::Newline, src_end(self.src));
188 }
189
190 if self.bracket_depth > 0 && last_meaningful == Some(SyntaxKind::Colon) {
193 let saved = std::mem::replace(&mut self.indent_stack, vec![col]);
194 self.lambda_stack.push(LambdaCtx {
195 saved_indent_stack: saved,
196 base: col,
197 open_bracket_depth: self.bracket_depth,
198 });
199 }
200 }
201
202 fn close_lambdas(&mut self, col: u32, at: TextSize) {
206 while self.lambda_stack.last().is_some_and(|ctx| col <= ctx.base) {
207 let base = self.lambda_stack.last().expect("checked").base;
208 while *self.indent_stack.last().expect("lambda base present") > base {
209 self.indent_stack.pop();
210 self.push_marker(SyntaxKind::Dedent, at);
211 }
212 let ctx = self.lambda_stack.pop().expect("checked");
213 self.indent_stack = ctx.saved_indent_stack;
214 }
215 }
216
217 fn close_lambdas_on_bracket(&mut self, new_depth: u32, at: TextSize) {
224 while self
225 .lambda_stack
226 .last()
227 .is_some_and(|ctx| ctx.open_bracket_depth > new_depth)
228 {
229 let base = self.lambda_stack.last().expect("checked").base;
230 while *self.indent_stack.last().expect("lambda base present") > base {
231 self.indent_stack.pop();
232 self.push_marker(SyntaxKind::Dedent, at);
233 }
234 let ctx = self.lambda_stack.pop().expect("checked");
235 self.indent_stack = ctx.saved_indent_stack;
236 }
237 }
238
239 fn copy_verbatim(&mut self, line: &[RawToken]) {
242 for tok in line {
243 self.out.push(*tok);
244 if tok.kind != SyntaxKind::NewlinePhys {
245 self.track_bracket(tok.kind);
246 }
247 }
248 }
249
250 fn emit_indent_dedent(&mut self, col: u32, at: TextSize) {
252 let top = *self.indent_stack.last().expect("indent stack has a base 0");
253 if col > top {
254 self.indent_stack.push(col);
255 self.push_marker(SyntaxKind::Indent, at);
256 } else if col < top {
257 while *self.indent_stack.last().expect("base 0 guards the loop") > col {
258 self.indent_stack.pop();
259 self.push_marker(SyntaxKind::Dedent, at);
260 }
261 if *self.indent_stack.last().expect("non-empty") != col {
262 self.diags.push(IndentDiagnostic {
263 range: TextRange::empty(at),
264 message: "Unindent does not match any outer indentation level.".to_owned(),
265 });
266 self.indent_stack.push(col); }
268 }
269 }
270
271 fn column(&self, line: &[RawToken]) -> u32 {
275 let Some(ws) = line.first().filter(|t| t.kind == SyntaxKind::Whitespace) else {
276 return 0;
277 };
278 self.src[ws.range]
279 .bytes()
280 .fold(0u32, |col, b| col + if b == b'\t' { TAB_SIZE } else { 1 })
281 }
282
283 fn diagnose_indent(&mut self, line: &[RawToken]) {
286 let Some(ws) = line.first().filter(|t| t.kind == SyntaxKind::Whitespace) else {
287 return;
288 };
289 let text = &self.src[ws.range];
290 let mut saw_tab = false;
291 let mut saw_space = false;
292 for b in text.bytes() {
293 saw_tab |= b == b'\t';
294 saw_space |= b == b' ';
295 }
296 if saw_tab && saw_space {
297 self.diags.push(IndentDiagnostic {
298 range: ws.range,
299 message: "Mixed use of tabs and spaces for indentation.".to_owned(),
300 });
301 } else if let Some(first) = text.bytes().next() {
302 let this = if first == b'\t' {
303 IndentChar::Tab
304 } else {
305 IndentChar::Space
306 };
307 match self.indent_char {
308 None => self.indent_char = Some(this),
309 Some(file) if file != this => {
310 let (used, before) = match this {
311 IndentChar::Tab => ("tab", "space"),
312 IndentChar::Space => ("space", "tab"),
313 };
314 self.diags.push(IndentDiagnostic {
315 range: ws.range,
316 message: format!(
317 "Used {used} character for indentation instead of {before} as used before in the file."
318 ),
319 });
320 }
321 Some(_) => {}
322 }
323 }
324 }
325
326 fn finish(&mut self, at: TextSize) {
329 self.close_lambdas(0, at); while *self.indent_stack.last().expect("base 0") > 0 {
331 self.indent_stack.pop();
332 self.push_marker(SyntaxKind::Dedent, at);
333 }
334 }
335
336 fn track_bracket(&mut self, kind: SyntaxKind) {
337 match kind {
338 SyntaxKind::LParen | SyntaxKind::LBrack | SyntaxKind::LBrace => {
339 self.bracket_depth += 1;
340 }
341 SyntaxKind::RParen | SyntaxKind::RBrack | SyntaxKind::RBrace => {
342 self.bracket_depth = self.bracket_depth.saturating_sub(1);
343 }
344 _ => {}
345 }
346 }
347
348 fn push_marker(&mut self, kind: SyntaxKind, at: TextSize) {
349 self.out.push(RawToken {
350 kind,
351 range: TextRange::empty(at),
352 });
353 }
354}
355
356fn src_end(src: &str) -> TextSize {
358 TextSize::of(src)
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364 use crate::tokenize;
365
366 fn prepass(src: &str) -> Vec<RawToken> {
367 run(&tokenize(src), src).0
368 }
369
370 fn structure(src: &str) -> Vec<SyntaxKind> {
373 prepass(src)
374 .into_iter()
375 .filter(|t| !t.kind.is_trivia())
376 .map(|t| t.kind)
377 .collect()
378 }
379
380 fn diagnostics(src: &str) -> Vec<IndentDiagnostic> {
381 run(&tokenize(src), src).1
382 }
383
384 fn assert_lossless(src: &str) {
387 let rebuilt: String = prepass(src).iter().map(|t| &src[t.range]).collect();
388 assert_eq!(rebuilt, src, "prepass not lossless for {src:?}");
389 }
390
391 fn count(src: &str, kind: SyntaxKind) -> usize {
392 structure(src).into_iter().filter(|&k| k == kind).count()
393 }
394
395 #[test]
396 fn nested_func_if_drives_indent_dedent() {
397 use SyntaxKind as S;
398 let src = "func f():\n\tif x:\n\t\treturn\n";
399 assert_lossless(src);
400 assert_eq!(
401 structure(src),
402 vec![
403 S::FuncKw,
404 S::Ident,
405 S::LParen,
406 S::RParen,
407 S::Colon,
408 S::Newline,
409 S::Indent,
410 S::IfKw,
411 S::Ident,
412 S::Colon,
413 S::Newline,
414 S::Indent,
415 S::ReturnKw,
416 S::Newline,
417 S::Dedent,
418 S::Dedent,
419 ]
420 );
421 }
422
423 #[test]
424 fn line_continuation_does_not_indent() {
425 let src = "a = 1 + \\\n 2\n";
427 assert_lossless(src);
428 assert_eq!(count(src, SyntaxKind::Indent), 0);
429 assert_eq!(count(src, SyntaxKind::Newline), 1); }
431
432 #[test]
433 fn multiline_brackets_suppress_indentation() {
434 let src = "var a = [\n\t1,\n\t2,\n]\n";
436 assert_lossless(src);
437 assert_eq!(count(src, SyntaxKind::Indent), 0);
438 assert_eq!(count(src, SyntaxKind::Dedent), 0);
439 assert_eq!(count(src, SyntaxKind::Newline), 1); }
441
442 #[test]
443 fn top_level_lambda_body_indents() {
444 use SyntaxKind as S;
446 let src = "var f = func():\n\tprint()\nx = 1\n";
447 assert_lossless(src);
448 assert_eq!(count(src, S::Indent), 1);
449 assert_eq!(count(src, S::Dedent), 1);
450 }
451
452 #[test]
453 fn blank_and_comment_only_lines_keep_state() {
454 let src = "func f():\n\tx = 1\n\n# top-level comment\n\ty = 2\n";
456 assert_lossless(src);
457 assert_eq!(count(src, SyntaxKind::Indent), 1);
460 assert_eq!(count(src, SyntaxKind::Dedent), 1);
461 }
462
463 #[test]
464 fn inline_block_has_no_indent() {
465 let src = "func f(): return 1\n";
467 assert_lossless(src);
468 assert_eq!(count(src, SyntaxKind::Indent), 0);
469 assert_eq!(count(src, SyntaxKind::Newline), 1);
470 }
471
472 #[test]
473 fn dedent_to_eof_without_trailing_newline() {
474 use SyntaxKind as S;
476 let src = "func f():\n\tpass";
477 assert_lossless(src);
478 let s = structure(src);
479 assert_eq!(s.last(), Some(&S::Dedent));
480 assert_eq!(count(src, S::Indent), 1);
481 assert_eq!(count(src, S::Dedent), 1);
482 assert!(s.contains(&S::Newline));
484 }
485
486 #[test]
487 fn empty_and_comment_only_files() {
488 assert_lossless("");
490 assert_eq!(structure(""), Vec::<SyntaxKind>::new());
491 assert_lossless("# just a comment\n");
492 assert_eq!(count("# just a comment\n", SyntaxKind::Indent), 0);
493 }
494
495 #[test]
496 fn mixed_tabs_and_spaces_diagnoses_but_recovers() {
497 let src = "func f():\n \tpass\n";
499 assert_lossless(src);
500 let diags = diagnostics(src);
501 assert!(
502 diags
503 .iter()
504 .any(|d| d.message.contains("Mixed use of tabs and spaces")),
505 "expected a mixed-indent diagnostic, got {diags:?}"
506 );
507 }
508
509 #[test]
510 fn inconsistent_indent_char_across_lines_is_flagged() {
511 let src = "func f():\n\ta = 1\nfunc g():\n b = 2\n";
514 let diags = diagnostics(src);
515 assert!(
516 diags.iter().any(|d| d.message.contains("instead of")),
517 "expected an inconsistent-indent diagnostic, got {diags:?}"
518 );
519 }
520
521 #[test]
522 fn match_block_nests() {
523 use SyntaxKind as S;
524 let src = "match x:\n\t1:\n\t\tpass\n";
525 assert_lossless(src);
526 assert_eq!(count(src, S::Indent), 2);
527 assert_eq!(count(src, S::Dedent), 2);
528 assert_eq!(structure(src)[0], S::MatchKw);
529 }
530
531 #[test]
532 fn multiline_lambda_inside_brackets_indents() {
533 use SyntaxKind as S;
535 let src = "arr.sort_custom(func(a, b):\n\treturn a < b\n)\n";
536 assert_lossless(src);
537 assert_eq!(count(src, S::Indent), 1, "lambda body should Indent once");
538 assert_eq!(count(src, S::Dedent), 1, "lambda body should Dedent once");
539 let s = structure(src);
541 let colon = s.iter().position(|&k| k == S::Colon).unwrap();
543 assert_eq!(s[colon + 1], S::Newline);
544 assert_eq!(s[colon + 2], S::Indent);
545 let rparen = s.iter().rposition(|&k| k == S::RParen).unwrap();
547 assert_eq!(s[rparen - 1], S::Dedent);
548 }
549
550 #[test]
551 fn lambda_inside_multiline_array() {
552 use SyntaxKind as S;
554 let src = "var a = [\n\tfunc():\n\t\tprint()\n]\n";
555 assert_lossless(src);
556 assert_eq!(count(src, S::Indent), 1);
557 assert_eq!(count(src, S::Dedent), 1);
558 }
559
560 #[test]
561 fn nested_lambdas_inside_brackets() {
562 use SyntaxKind as S;
563 let src = "outer(func():\n\tinner(func():\n\t\tbody\n\t)\n)\n";
564 assert_lossless(src);
565 assert_eq!(count(src, S::Indent), 2, "two nested lambda bodies");
566 assert_eq!(count(src, S::Dedent), 2);
567 }
568
569 #[test]
570 fn single_line_lambda_inside_brackets_has_no_indent() {
571 use SyntaxKind as S;
573 let src = "arr.map(func(x): x * 2)\n";
574 assert_lossless(src);
575 assert_eq!(count(src, S::Indent), 0);
576 assert_eq!(count(src, S::Dedent), 0);
577 assert_eq!(count(src, S::Newline), 1);
578 }
579}