1use text_size::{TextRange, TextSize};
33
34use crate::SyntaxKind;
35use crate::lexer::RawToken;
36
37const TAB_SIZE: u32 = 4;
39
40#[derive(Debug, Clone)]
45struct LambdaCtx {
46 saved_indent_stack: Vec<u32>,
47 base: u32,
48 open_bracket_depth: u32,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct IndentDiagnostic {
58 pub range: TextRange,
60 pub message: String,
62}
63
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66enum IndentChar {
67 Tab,
68 Space,
69}
70
71#[must_use]
77pub fn run(tokens: &[RawToken], src: &str) -> (Vec<RawToken>, Vec<IndentDiagnostic>) {
78 let mut p = PrePass {
79 src,
80 out: Vec::with_capacity(tokens.len() + 16),
81 diags: Vec::new(),
82 indent_stack: vec![0],
83 bracket_depth: 0,
84 indent_char: None,
85 lambda_stack: Vec::new(),
86 };
87 p.run_lines(tokens);
88 (p.out, p.diags)
89}
90
91struct PrePass<'s> {
92 src: &'s str,
93 out: Vec<RawToken>,
94 diags: Vec<IndentDiagnostic>,
95 indent_stack: Vec<u32>,
96 bracket_depth: u32,
97 indent_char: Option<IndentChar>,
98 lambda_stack: Vec<LambdaCtx>,
101}
102
103impl PrePass<'_> {
104 fn run_lines(&mut self, tokens: &[RawToken]) {
105 let mut start = 0usize;
109 let mut i = 0usize;
110 while i < tokens.len() {
111 if tokens[i].kind == SyntaxKind::NewlinePhys {
112 self.line(&tokens[start..=i]);
113 start = i + 1;
114 }
115 i += 1;
116 }
117 if start < tokens.len() {
118 self.line(&tokens[start..]); }
120 self.finish(src_end(self.src));
121 }
122
123 fn line(&mut self, line: &[RawToken]) {
130 let Some(first) = line.iter().find(|t| !t.kind.is_trivia()) else {
135 self.copy_verbatim(line);
136 return;
137 };
138 let col = self.column(line);
139 let at = first.range.start();
140
141 if matches!(
148 first.kind,
149 SyntaxKind::RParen | SyntaxKind::RBrace | SyntaxKind::RBrack
150 ) && self
151 .lambda_stack
152 .last()
153 .is_some_and(|ctx| ctx.open_bracket_depth >= self.bracket_depth)
154 {
155 self.close_lambdas_on_bracket(self.bracket_depth.saturating_sub(1), at);
156 }
157
158 self.close_lambdas(col, at);
160
161 let in_lambda = !self.lambda_stack.is_empty();
162 let suppressed = !in_lambda && self.bracket_depth > 0;
163
164 if !suppressed {
166 self.diagnose_indent(line);
167 self.emit_indent_dedent(col, at);
168 }
169
170 let mut has_terminator = false;
173 let mut last_meaningful: Option<SyntaxKind> = None;
174 for tok in line {
175 if tok.kind == SyntaxKind::NewlinePhys {
176 has_terminator = true;
177 let opens_lambda =
178 self.bracket_depth > 0 && last_meaningful == Some(SyntaxKind::Colon);
179 if self.bracket_depth == 0 || in_lambda || opens_lambda {
180 self.push_marker(SyntaxKind::Newline, tok.range.start());
181 }
182 self.out.push(*tok);
183 } else {
184 if matches!(
188 tok.kind,
189 SyntaxKind::RParen | SyntaxKind::RBrace | SyntaxKind::RBrack
190 ) && !self.lambda_stack.is_empty()
191 {
192 let new_depth = self.bracket_depth.saturating_sub(1);
193 self.close_lambdas_on_bracket(new_depth, tok.range.start());
194 }
195 else if tok.kind == SyntaxKind::Comma
200 && self
201 .lambda_stack
202 .last()
203 .is_some_and(|ctx| ctx.open_bracket_depth == self.bracket_depth)
204 {
205 self.close_lambdas_on_bracket(
206 self.bracket_depth.saturating_sub(1),
207 tok.range.start(),
208 );
209 }
210 self.out.push(*tok);
211 self.track_bracket(tok.kind);
212 if !tok.kind.is_trivia() {
213 last_meaningful = Some(tok.kind);
214 }
215 }
216 }
217 if !has_terminator && (self.bracket_depth == 0 || in_lambda) {
219 self.push_marker(SyntaxKind::Newline, src_end(self.src));
220 }
221
222 if self.bracket_depth > 0 && last_meaningful == Some(SyntaxKind::Colon) {
225 let saved = std::mem::replace(&mut self.indent_stack, vec![col]);
226 self.lambda_stack.push(LambdaCtx {
227 saved_indent_stack: saved,
228 base: col,
229 open_bracket_depth: self.bracket_depth,
230 });
231 }
232 }
233
234 fn close_lambdas(&mut self, col: u32, at: TextSize) {
238 while self.lambda_stack.last().is_some_and(|ctx| col <= ctx.base) {
239 let base = self.lambda_stack.last().expect("checked").base;
240 while *self.indent_stack.last().expect("lambda base present") > base {
241 self.indent_stack.pop();
242 self.push_marker(SyntaxKind::Dedent, at);
243 }
244 let ctx = self.lambda_stack.pop().expect("checked");
245 self.indent_stack = ctx.saved_indent_stack;
246 }
247 }
248
249 fn close_lambdas_on_bracket(&mut self, new_depth: u32, at: TextSize) {
256 while self
257 .lambda_stack
258 .last()
259 .is_some_and(|ctx| ctx.open_bracket_depth > new_depth)
260 {
261 let base = self.lambda_stack.last().expect("checked").base;
262 while *self.indent_stack.last().expect("lambda base present") > base {
263 self.indent_stack.pop();
264 self.push_marker(SyntaxKind::Dedent, at);
265 }
266 let ctx = self.lambda_stack.pop().expect("checked");
267 self.indent_stack = ctx.saved_indent_stack;
268 }
269 }
270
271 fn copy_verbatim(&mut self, line: &[RawToken]) {
274 for tok in line {
275 self.out.push(*tok);
276 if tok.kind != SyntaxKind::NewlinePhys {
277 self.track_bracket(tok.kind);
278 }
279 }
280 }
281
282 fn emit_indent_dedent(&mut self, col: u32, at: TextSize) {
284 let top = *self.indent_stack.last().expect("indent stack has a base 0");
285 if col > top {
286 self.indent_stack.push(col);
287 self.push_marker(SyntaxKind::Indent, at);
288 } else if col < top {
289 while *self.indent_stack.last().expect("base 0 guards the loop") > col {
290 self.indent_stack.pop();
291 self.push_marker(SyntaxKind::Dedent, at);
292 }
293 if *self.indent_stack.last().expect("non-empty") != col {
294 self.diags.push(IndentDiagnostic {
295 range: TextRange::empty(at),
296 message: "Unindent does not match any outer indentation level.".to_owned(),
297 });
298 self.indent_stack.push(col); }
300 }
301 }
302
303 fn column(&self, line: &[RawToken]) -> u32 {
307 let Some(ws) = line.first().filter(|t| t.kind == SyntaxKind::Whitespace) else {
308 return 0;
309 };
310 self.src[ws.range]
311 .bytes()
312 .fold(0u32, |col, b| col + if b == b'\t' { TAB_SIZE } else { 1 })
313 }
314
315 fn diagnose_indent(&mut self, line: &[RawToken]) {
318 let Some(ws) = line.first().filter(|t| t.kind == SyntaxKind::Whitespace) else {
319 return;
320 };
321 let text = &self.src[ws.range];
322 let mut saw_tab = false;
323 let mut saw_space = false;
324 for b in text.bytes() {
325 saw_tab |= b == b'\t';
326 saw_space |= b == b' ';
327 }
328 if saw_tab && saw_space {
329 self.diags.push(IndentDiagnostic {
330 range: ws.range,
331 message: "Mixed use of tabs and spaces for indentation.".to_owned(),
332 });
333 } else if let Some(first) = text.bytes().next() {
334 let this = if first == b'\t' {
335 IndentChar::Tab
336 } else {
337 IndentChar::Space
338 };
339 match self.indent_char {
340 None => self.indent_char = Some(this),
341 Some(file) if file != this => {
342 let (used, before) = match this {
343 IndentChar::Tab => ("tab", "space"),
344 IndentChar::Space => ("space", "tab"),
345 };
346 self.diags.push(IndentDiagnostic {
347 range: ws.range,
348 message: format!(
349 "Used {used} character for indentation instead of {before} as used before in the file."
350 ),
351 });
352 }
353 Some(_) => {}
354 }
355 }
356 }
357
358 fn finish(&mut self, at: TextSize) {
361 self.close_lambdas(0, at); while *self.indent_stack.last().expect("base 0") > 0 {
363 self.indent_stack.pop();
364 self.push_marker(SyntaxKind::Dedent, at);
365 }
366 }
367
368 fn track_bracket(&mut self, kind: SyntaxKind) {
369 match kind {
370 SyntaxKind::LParen | SyntaxKind::LBrack | SyntaxKind::LBrace => {
371 self.bracket_depth += 1;
372 }
373 SyntaxKind::RParen | SyntaxKind::RBrack | SyntaxKind::RBrace => {
374 self.bracket_depth = self.bracket_depth.saturating_sub(1);
375 }
376 _ => {}
377 }
378 }
379
380 fn push_marker(&mut self, kind: SyntaxKind, at: TextSize) {
381 self.out.push(RawToken {
382 kind,
383 range: TextRange::empty(at),
384 });
385 }
386}
387
388fn src_end(src: &str) -> TextSize {
390 TextSize::of(src)
391}
392
393#[cfg(test)]
394mod tests {
395 use super::*;
396 use crate::tokenize;
397
398 fn prepass(src: &str) -> Vec<RawToken> {
399 run(&tokenize(src), src).0
400 }
401
402 fn structure(src: &str) -> Vec<SyntaxKind> {
405 prepass(src)
406 .into_iter()
407 .filter(|t| !t.kind.is_trivia())
408 .map(|t| t.kind)
409 .collect()
410 }
411
412 fn diagnostics(src: &str) -> Vec<IndentDiagnostic> {
413 run(&tokenize(src), src).1
414 }
415
416 fn assert_lossless(src: &str) {
419 let rebuilt: String = prepass(src).iter().map(|t| &src[t.range]).collect();
420 assert_eq!(rebuilt, src, "prepass not lossless for {src:?}");
421 }
422
423 fn count(src: &str, kind: SyntaxKind) -> usize {
424 structure(src).into_iter().filter(|&k| k == kind).count()
425 }
426
427 #[test]
428 fn nested_func_if_drives_indent_dedent() {
429 use SyntaxKind as S;
430 let src = "func f():\n\tif x:\n\t\treturn\n";
431 assert_lossless(src);
432 assert_eq!(
433 structure(src),
434 vec![
435 S::FuncKw,
436 S::Ident,
437 S::LParen,
438 S::RParen,
439 S::Colon,
440 S::Newline,
441 S::Indent,
442 S::IfKw,
443 S::Ident,
444 S::Colon,
445 S::Newline,
446 S::Indent,
447 S::ReturnKw,
448 S::Newline,
449 S::Dedent,
450 S::Dedent,
451 ]
452 );
453 }
454
455 #[test]
456 fn line_continuation_does_not_indent() {
457 let src = "a = 1 + \\\n 2\n";
459 assert_lossless(src);
460 assert_eq!(count(src, SyntaxKind::Indent), 0);
461 assert_eq!(count(src, SyntaxKind::Newline), 1); }
463
464 #[test]
465 fn multiline_brackets_suppress_indentation() {
466 let src = "var a = [\n\t1,\n\t2,\n]\n";
468 assert_lossless(src);
469 assert_eq!(count(src, SyntaxKind::Indent), 0);
470 assert_eq!(count(src, SyntaxKind::Dedent), 0);
471 assert_eq!(count(src, SyntaxKind::Newline), 1); }
473
474 #[test]
475 fn top_level_lambda_body_indents() {
476 use SyntaxKind as S;
478 let src = "var f = func():\n\tprint()\nx = 1\n";
479 assert_lossless(src);
480 assert_eq!(count(src, S::Indent), 1);
481 assert_eq!(count(src, S::Dedent), 1);
482 }
483
484 #[test]
485 fn blank_and_comment_only_lines_keep_state() {
486 let src = "func f():\n\tx = 1\n\n# top-level comment\n\ty = 2\n";
488 assert_lossless(src);
489 assert_eq!(count(src, SyntaxKind::Indent), 1);
492 assert_eq!(count(src, SyntaxKind::Dedent), 1);
493 }
494
495 #[test]
496 fn inline_block_has_no_indent() {
497 let src = "func f(): return 1\n";
499 assert_lossless(src);
500 assert_eq!(count(src, SyntaxKind::Indent), 0);
501 assert_eq!(count(src, SyntaxKind::Newline), 1);
502 }
503
504 #[test]
505 fn dedent_to_eof_without_trailing_newline() {
506 use SyntaxKind as S;
508 let src = "func f():\n\tpass";
509 assert_lossless(src);
510 let s = structure(src);
511 assert_eq!(s.last(), Some(&S::Dedent));
512 assert_eq!(count(src, S::Indent), 1);
513 assert_eq!(count(src, S::Dedent), 1);
514 assert!(s.contains(&S::Newline));
516 }
517
518 #[test]
519 fn empty_and_comment_only_files() {
520 assert_lossless("");
522 assert_eq!(structure(""), Vec::<SyntaxKind>::new());
523 assert_lossless("# just a comment\n");
524 assert_eq!(count("# just a comment\n", SyntaxKind::Indent), 0);
525 }
526
527 #[test]
528 fn mixed_tabs_and_spaces_diagnoses_but_recovers() {
529 let src = "func f():\n \tpass\n";
531 assert_lossless(src);
532 let diags = diagnostics(src);
533 assert!(
534 diags
535 .iter()
536 .any(|d| d.message.contains("Mixed use of tabs and spaces")),
537 "expected a mixed-indent diagnostic, got {diags:?}"
538 );
539 }
540
541 #[test]
542 fn inconsistent_indent_char_across_lines_is_flagged() {
543 let src = "func f():\n\ta = 1\nfunc g():\n b = 2\n";
546 let diags = diagnostics(src);
547 assert!(
548 diags.iter().any(|d| d.message.contains("instead of")),
549 "expected an inconsistent-indent diagnostic, got {diags:?}"
550 );
551 }
552
553 #[test]
554 fn match_block_nests() {
555 use SyntaxKind as S;
556 let src = "match x:\n\t1:\n\t\tpass\n";
557 assert_lossless(src);
558 assert_eq!(count(src, S::Indent), 2);
559 assert_eq!(count(src, S::Dedent), 2);
560 assert_eq!(structure(src)[0], S::MatchKw);
561 }
562
563 #[test]
564 fn multiline_lambda_inside_brackets_indents() {
565 use SyntaxKind as S;
567 let src = "arr.sort_custom(func(a, b):\n\treturn a < b\n)\n";
568 assert_lossless(src);
569 assert_eq!(count(src, S::Indent), 1, "lambda body should Indent once");
570 assert_eq!(count(src, S::Dedent), 1, "lambda body should Dedent once");
571 let s = structure(src);
573 let colon = s.iter().position(|&k| k == S::Colon).unwrap();
575 assert_eq!(s[colon + 1], S::Newline);
576 assert_eq!(s[colon + 2], S::Indent);
577 let rparen = s.iter().rposition(|&k| k == S::RParen).unwrap();
579 assert_eq!(s[rparen - 1], S::Dedent);
580 }
581
582 #[test]
583 fn lambda_inside_multiline_array() {
584 use SyntaxKind as S;
586 let src = "var a = [\n\tfunc():\n\t\tprint()\n]\n";
587 assert_lossless(src);
588 assert_eq!(count(src, S::Indent), 1);
589 assert_eq!(count(src, S::Dedent), 1);
590 }
591
592 #[test]
593 fn nested_lambdas_inside_brackets() {
594 use SyntaxKind as S;
595 let src = "outer(func():\n\tinner(func():\n\t\tbody\n\t)\n)\n";
596 assert_lossless(src);
597 assert_eq!(count(src, S::Indent), 2, "two nested lambda bodies");
598 assert_eq!(count(src, S::Dedent), 2);
599 }
600
601 #[test]
602 fn single_line_lambda_inside_brackets_has_no_indent() {
603 use SyntaxKind as S;
605 let src = "arr.map(func(x): x * 2)\n";
606 assert_lossless(src);
607 assert_eq!(count(src, S::Indent), 0);
608 assert_eq!(count(src, S::Dedent), 0);
609 assert_eq!(count(src, S::Newline), 1);
610 }
611}