yara_x_fmt/lib.rs
1/*! A code formatter for YARA rules
2
3This crate implements a code format for YARA in the spirit of other tools like
4`rustfmt` and `gofmt`.
5
6# Usage
7
8```no_run
9# use std::fs::File;
10use yara_x_fmt::Formatter;
11
12let input = File::open("original.yar").unwrap();
13let output = File::create("formatted.yar").unwrap();
14
15Formatter::new().format(input, output).unwrap();
16```
17*/
18use std::io;
19use std::io::Cursor;
20
21use thiserror::Error;
22
23use tokens::Token::*;
24use tokens::TokenStream;
25use yara_x_parser::cst::{CSTStream, Event, SyntaxKind};
26use yara_x_parser::{Parser, Span};
27
28use crate::align::Align;
29use crate::format_hex_patterns::FormatHexPatterns;
30use crate::indentation::AddIndentation;
31use crate::tokens::categories::*;
32use crate::tokens::*;
33use crate::trailing_spaces::RemoveTrailingSpaces;
34
35mod align;
36mod bubble;
37mod comments;
38mod format_hex_patterns;
39mod indentation;
40mod processor;
41mod tokens;
42mod trailing_spaces;
43
44#[cfg(test)]
45mod tests;
46
47/// Errors returned by [`Formatter::format`].
48#[derive(Error, Debug)]
49#[allow(clippy::large_enum_variant)]
50pub enum Error {
51 /// Error while reading from input.
52 #[error("read error: {0}")]
53 ReadError(io::Error),
54
55 /// Error while writing to output.
56 #[error("write error: {0}")]
57 WriteError(io::Error),
58
59 /// The input file contained invalid UTF-8.
60 #[error("invalid UTF-8 at {0}")]
61 InvalidUTF8(Span),
62}
63
64/// Specifies how to indent the formatted code.
65#[derive(Copy, Clone)]
66pub enum Indentation {
67 /// Use a given number of spaces per indentation level.
68 Spaces(usize),
69 /// Use one tab per indentation level.
70 Tabs,
71}
72
73/// Formats YARA source code automatically.
74pub struct Formatter {
75 align_metadata: bool,
76 align_patterns: bool,
77 indent_section_headers: bool,
78 indent_section_contents: bool,
79 newline_before_curly_brace: bool,
80 empty_line_before_section_header: bool,
81 empty_line_after_section_header: bool,
82 tab_size: usize,
83 indentation: Indentation,
84}
85
86impl Default for Formatter {
87 fn default() -> Self {
88 Self::new()
89 }
90}
91
92// Formatter public API.
93impl Formatter {
94 /// Creates a new formatter.
95 pub fn new() -> Self {
96 Formatter {
97 align_metadata: true,
98 align_patterns: true,
99 indent_section_headers: true,
100 indent_section_contents: true,
101 newline_before_curly_brace: false,
102 empty_line_before_section_header: true,
103 empty_line_after_section_header: false,
104 tab_size: 4,
105 indentation: Indentation::Spaces(2),
106 }
107 }
108
109 /// Specify if the metadata block must be aligned.
110 ///
111 /// If true, the metadata block will be converted from this...
112 ///
113 /// ```text
114 /// rule test {
115 /// meta:
116 /// short = "foo"
117 /// very_long = "bar"
118 /// even_longer = "baz"
119 /// condition:
120 /// ...
121 /// }
122 /// ```
123 ///
124 /// ... to this ...
125 ///
126 /// ```text
127 /// rule test {
128 /// meta:
129 /// short = "foo"
130 /// very_long = "bar"
131 /// even_longer = "baz"
132 /// condition:
133 /// ...
134 /// }
135 /// ```
136 ///
137 /// The default value is `true`.
138 pub fn align_metadata(mut self, yes: bool) -> Self {
139 self.align_metadata = yes;
140 self
141 }
142
143 /// Specify if the patterns definitions must be aligned.
144 ///
145 /// If true, the strings block will be converted from this...
146 ///
147 /// ```text
148 /// rule test {
149 /// strings:
150 /// $short = "foo"
151 /// $very_long = "bar"
152 /// $even_longer = "baz"
153 /// condition:
154 /// ...
155 /// }
156 /// ```
157 ///
158 /// ... to this ...
159 ///
160 /// ```text
161 /// rule test {
162 /// strings:
163 /// $short = "foo"
164 /// $very_long = "bar"
165 /// $even_longer = "baz"
166 /// condition:
167 /// ...
168 /// }
169 /// ```
170 ///
171 /// The default value is `true`.
172 pub fn align_patterns(mut self, yes: bool) -> Self {
173 self.align_patterns = yes;
174 self
175 }
176
177 /// Specify if the section definitions must be aligned.
178 ///
179 /// If true, the section headers look like this...
180 ///
181 /// ```text
182 /// rule test {
183 /// strings:
184 /// $short = "foo"
185 /// $very_long = "bar"
186 /// $even_longer = "baz"
187 /// condition:
188 /// ...
189 /// }
190 /// ```
191 ///
192 /// And if false, the section headers look like this...
193 ///
194 /// ```text
195 /// rule test {
196 /// strings:
197 /// $short = "foo"
198 /// $very_long = "bar"
199 /// $even_longer = "baz"
200 /// condition:
201 /// ...
202 /// }
203 /// ```
204 ///
205 /// The default value is `true`.
206 pub fn indent_section_headers(mut self, yes: bool) -> Self {
207 self.indent_section_headers = yes;
208 self
209 }
210
211 /// Specify if the section contents must be aligned.
212 ///
213 /// If true, the section contents look like this...
214 ///
215 /// ```text
216 /// rule test {
217 /// strings:
218 /// $short = "foo"
219 /// $very_long = "bar"
220 /// $even_longer = "baz"
221 /// condition:
222 /// ...
223 /// }
224 /// ```
225 ///
226 /// And if false, the section contents look like this...
227 ///
228 /// ```text
229 /// rule test {
230 /// strings:
231 /// $short = "foo"
232 /// $very_long = "bar"
233 /// $even_longer = "baz"
234 /// condition:
235 /// ...
236 /// }
237 /// ```
238 ///
239 /// The default value is `true`.
240 pub fn indent_section_contents(mut self, yes: bool) -> Self {
241 self.indent_section_contents = yes;
242 self
243 }
244
245 /// Number of spaces to indent, if indenting at all. Set to 0 to use tabs.
246 ///
247 /// The default is `2`.
248 #[deprecated(
249 since = "1.7.0",
250 note = "use `.indentation(Indentation::Spaces(n))` or `.indentation(Indentation::Tabs)` instead"
251 )]
252 pub fn indent_spaces(mut self, n: u8) -> Self {
253 if n == 0 {
254 self.indentation = Indentation::Tabs
255 } else {
256 self.indentation = Indentation::Spaces(n as usize)
257 }
258 self
259 }
260
261 /// Specifies how to indent the formatted source code.
262 ///
263 /// ```
264 /// # use yara_x_fmt::{Formatter, Indentation};
265 /// let indent_with_two_spaces = Formatter::new().indentation(Indentation::Spaces(2));
266 /// let indent_with_tabs = Formatter::new().indentation(Indentation::Tabs);
267 /// ```
268 pub fn indentation(mut self, indentation: Indentation) -> Self {
269 self.indentation = indentation;
270 self
271 }
272
273 /// Specifies the tab size (in spaces) expected in the unformatted source
274 /// code.
275 ///
276 /// If the input contains tab characters, the formatter uses this value to
277 /// determine how many spaces each tab represents. Setting this incorrectly
278 /// can lead to misaligned formatting when the code mixes tabs and spaces.
279 ///
280 /// Defaults to `4`.
281 pub fn input_tab_size(mut self, tab_size: usize) -> Self {
282 self.tab_size = tab_size;
283 self
284 }
285
286 /// Specify if newline should be added before the opening curly brace in a
287 /// rule declaration. If false the rule will look like this:
288 ///
289 /// ```text
290 /// rule test {
291 /// condition:
292 /// true
293 /// }
294 /// ```
295 ///
296 /// And if true, the rule will look like this:
297 ///
298 /// ```text
299 /// rule test
300 /// {
301 /// condition:
302 /// true
303 /// }
304 /// ```
305 ///
306 /// The default value is `false`.
307 pub fn newline_before_curly_brace(mut self, yes: bool) -> Self {
308 self.newline_before_curly_brace = yes;
309 self
310 }
311
312 /// Specify if an empty line should be added before the section header in a
313 /// rule. If false the rule will look like this:
314 ///
315 /// ```text
316 /// rule test {
317 /// meta:
318 /// foo = "bar"
319 /// condition:
320 /// true
321 /// }
322 /// ```
323 ///
324 /// And if true, the rule will look like this:
325 ///
326 /// ```text
327 /// rule test {
328 ///
329 /// meta:
330 /// foo = "bar"
331 ///
332 /// condition:
333 /// true
334 /// }
335 /// ```
336 ///
337 /// The default value is `false`.
338 pub fn empty_line_before_section_header(mut self, yes: bool) -> Self {
339 self.empty_line_before_section_header = yes;
340 self
341 }
342
343 /// Specify if an empty line should be added after the section header in a
344 /// rule. If false the rule will look like this:
345 ///
346 /// ```text
347 /// rule test {
348 /// condition:
349 /// true
350 /// }
351 /// ```
352 ///
353 /// And if true, the rule will look like this:
354 ///
355 /// ```text
356 /// rule test {
357 /// condition:
358 ///
359 /// true
360 /// }
361 /// ```
362 ///
363 /// The default value is `false`.
364 pub fn empty_line_after_section_header(mut self, yes: bool) -> Self {
365 self.empty_line_after_section_header = yes;
366 self
367 }
368
369 /// Reads YARA source code from `input` and write it into `output` after
370 /// formatting.
371 ///
372 /// Returns `true` if the output differs from the input.
373 ///
374 /// This function will fail if it can't read from the input, write to the
375 /// output, or when the input contains invalid UTF-8 characters.
376 pub fn format<R, W>(
377 &self,
378 mut input: R,
379 mut output: W,
380 ) -> Result<bool, Error>
381 where
382 R: io::Read,
383 W: io::Write,
384 {
385 let mut invalid_utf8 = Option::None;
386 let mut in_buf = Vec::with_capacity(256);
387
388 input.read_to_end(&mut in_buf).map_err(Error::ReadError)?;
389
390 let cst_stream = CSTStream::from(Parser::new(in_buf.as_slice()));
391
392 // Inspect the CST stream looking for events indicating the presence of
393 // invalid UTF-8 sequences.
394 let events = cst_stream.into_iter().inspect(|evt| {
395 if let Event::Token { kind: SyntaxKind::INVALID_UTF8, span } = evt
396 {
397 invalid_utf8.get_or_insert(span.clone());
398 }
399 });
400
401 let tokens = Tokens::new(in_buf.as_slice(), events);
402 let mut out_buf = Cursor::new(Vec::new());
403
404 self.format_impl(tokens)
405 .write_to(&mut out_buf)
406 .map_err(Error::WriteError)?;
407
408 if let Some(span) = invalid_utf8 {
409 return Err(Error::InvalidUTF8(span));
410 }
411
412 let modified = in_buf.ne(out_buf.get_ref());
413
414 output.write_all(out_buf.get_ref()).map_err(Error::WriteError)?;
415
416 Ok(modified)
417 }
418}
419
420// Private API for formatter.
421impl Formatter {
422 fn format_impl<'a, I>(&self, input: I) -> impl TokenStream<'a> + 'a
423 where
424 I: TokenStream<'a> + 'a,
425 {
426 // The first step is inserting newlines between top-level statements
427 // (rules, imports and includes) if they are in the same line.
428 let tokens = processor::Processor::new(input)
429 //
430 // Insert newline in front of import and include statements, making
431 // sure that they start at a new line. The newline is not inserted if
432 // the statement is at the start of the file.
433 //
434 // Example:
435 //
436 // import "foo" import "bar"
437 //
438 // Inserts newline before import "bar".
439 //
440 .add_rule(
441 |ctx| {
442 let next_token = ctx.token(1);
443 let prev_token = ctx.token(-1);
444
445 matches!(
446 next_token,
447 Begin(SyntaxKind::IMPORT_STMT)
448 | Begin(SyntaxKind::INCLUDE_STMT)
449 ) && prev_token.neq(&Begin(SyntaxKind::SOURCE_FILE))
450 && prev_token.is_not(*NEWLINE)
451 },
452 processor::actions::newline,
453 )
454 //
455 // Insert newline in front of rule declarations, making sure that
456 // rule declarations starts at a new line. The newline is not
457 // inserted if the rule is at the start of the file.
458 //
459 // Example:
460 //
461 // rule foo { ... } rule bar { ... }
462 //
463 // Inserts newline before "rule bar".
464 //
465 .add_rule(
466 |ctx| {
467 let next_token = ctx.token(1);
468 let prev_token = ctx.token(-1);
469
470 next_token.eq(&Begin(SyntaxKind::RULE_DECL))
471 && prev_token.neq(&Begin(SyntaxKind::SOURCE_FILE))
472 && prev_token.is_not(*NEWLINE)
473 },
474 processor::actions::newline,
475 );
476
477 // Process comments before removing whitespaces. Whitespaces must
478 // be the original ones while the comments are being processed in
479 // order to maintain comment indentation unaltered.
480 let tokens =
481 comments::CommentProcessor::new(tokens).tab_size(self.tab_size);
482
483 // Remove all whitespaces from the original source.
484 let tokens = processor::Processor::new(tokens).add_rule(
485 |ctx| ctx.token(1).is(*WHITESPACE),
486 processor::actions::drop,
487 );
488
489 // Displace tail comments and newlines up with respect to tokens that
490 // indicate the start and end of a grammar rule. This effectively moves
491 // such comments and newlines to the innermost grammar rule preceding
492 // them. For example, suppose that we have the following rule:
493 //
494 // import "test" // Comment
495 //
496 // rule test {
497 // strings:
498 // $a = "foo"
499 // condition:
500 // true
501 // }
502 //
503 // The sequence of tokens produced for that rule looks like:
504 //
505 // Begin(SOURCE_FILE)
506 // Begin(IMPORT_STMT)
507 // Keyword("import")
508 // Literal("tests")
509 // End(IMPORT_STMT)
510 // Begin(RULE_DECL)
511 // TailComment("// Comment")
512 // Newline
513 // Keyword("rule")
514 // .... more
515 //
516 // Notice how TailComment("// Comment") and the Newline that follows
517 // are placed just before the "rule" keyword, and inside the rule_decl
518 // grammar rule. This is not the most natural place for this tail comment,
519 // its natural place is just after the "tests" literal, like this:
520 //
521 // Begin(SOURCE_FILE)
522 // Begin(IMPORT_STMT)
523 // Keyword("import")
524 // Literal("tests")
525 // TailComment("// Comment")
526 // Newline
527 // End(IMPORT_STMT)
528 // Begin(RULE_DECL)
529 // Keyword("rule")
530 // .... more
531 //
532 // That's exactly what the Bubble pipeline does. See the documentation for
533 // the `bubble` module for more details.
534 let tokens = bubble::Bubble::new(
535 tokens,
536 |token| {
537 matches!(token, Token::TailComment(_)) || token.is(*NEWLINE)
538 },
539 |token| matches!(token, Token::Begin(_) | Token::End(_)),
540 );
541
542 // Displace newlines down with respect to tokens that indicate end of a
543 // grammar rule. This effectively moves them to the outermost grammar
544 // rule. See the documentation for the `bubble` module for more details.
545 let tokens = bubble::Bubble::new(
546 tokens,
547 |token| matches!(token, Token::End(_)),
548 |token| token.is(*NEWLINE),
549 );
550
551 // Remove newlines in multiple cases.
552 let tokens = processor::Processor::new(tokens)
553 // Remove all newlines at the beginning of the file. When the
554 // processor is at the beginning of the file token(-1) is None.
555 // Notice that this works because all these newlines have been
556 // moved up and placed before Token::Begin(source_file) by the
557 // first bubble pipeline.
558 .add_rule(
559 |ctx| ctx.token(-1).is(*NONE) && ctx.token(1).is(*NEWLINE),
560 processor::actions::drop,
561 )
562 // Remove excess of consecutive newlines, only two consecutive
563 // newlines are allowed.
564 .add_rule(
565 |ctx| {
566 ctx.token(1).is(*NEWLINE)
567 && ctx.token(2).is(*NEWLINE)
568 && ctx.token(3).is(*NEWLINE)
569 },
570 processor::actions::drop,
571 )
572 // Remove excess of newlines at the end of the file.
573 .add_rule(
574 |ctx| {
575 ctx.token(-1).is(*NEWLINE)
576 && ctx.token(1).is(*NEWLINE)
577 && ctx.token(2).is(*NONE)
578 },
579 processor::actions::drop,
580 )
581 // Remove newlines between rule tags and between rule modifiers.
582 .add_rule(
583 |ctx| {
584 (ctx.in_rule(SyntaxKind::RULE_MODS, false)
585 || ctx.in_rule(SyntaxKind::RULE_TAGS, false))
586 && ctx.token(-1).is_not(*COMMENT)
587 && ctx.token(1).is(*NEWLINE)
588 },
589 processor::actions::drop,
590 )
591 // Remove newlines after rule modifiers.
592 .add_rule(
593 |ctx| {
594 ctx.token(-1).eq(&End(SyntaxKind::RULE_MODS))
595 && ctx.token(1).is(*NEWLINE)
596 },
597 processor::actions::drop,
598 )
599 // Remove newlines after rule tags.
600 .add_rule(
601 |ctx| {
602 ctx.token(-1).eq(&End(SyntaxKind::RULE_TAGS))
603 && ctx.token(1).is(*NEWLINE)
604 },
605 processor::actions::drop,
606 );
607
608 let tokens = if self.newline_before_curly_brace {
609 // Ensure we have a newline before the opening "{" in a rule
610 // declaration. Be careful to only insert it if one does not already
611 // exist.
612 Box::new(processor::Processor::new(tokens).add_rule(
613 |ctx| {
614 ctx.in_rule(SyntaxKind::RULE_DECL, false)
615 && ctx.token(1).eq(&LBRACE)
616 && ctx.token(-1).is_not(*NEWLINE)
617 },
618 processor::actions::newline,
619 ))
620 } else {
621 // Remove newlines before the opening "{" in a rule declaration.
622 // It takes into account that we can have one or two newlines
623 // before the "{" character. In a previous step we have removed
624 // consecutive newlines leaving two at most.
625 Box::new(
626 processor::Processor::new(tokens)
627 .add_rule(
628 |ctx| {
629 ctx.in_rule(SyntaxKind::RULE_DECL, false)
630 && ctx.token(1).is(*NEWLINE)
631 && (
632 // Newline followed by "{" or ...
633 ctx.token(2).eq(&LBRACE) ||
634 // ... two newlines followed by "{"
635 ctx.token(2).is(*NEWLINE) && ctx.token(3).eq(&LBRACE)
636 )
637 },
638 processor::actions::drop,
639 ))
640 };
641
642 let tokens = processor::Processor::new(tokens)
643 //
644 // Insert additional newline in front of a rule declaration that
645 // already starts at a newline, but only if not preceded by a
646 // comment. In other words, this adds empty lines in between rule
647 // declarations, but don't do it if the rule is preceded by a
648 // comment.
649 //
650 // Example:
651 //
652 // rule foo {
653 // ...
654 // }
655 // rule bar {
656 // ...
657 // }
658 //
659 // Inserts newline before "rule bar".
660 //
661 .add_rule(
662 |ctx| {
663 ctx.token(1).eq(&Begin(SyntaxKind::RULE_DECL))
664 && ctx.token(-1).is(*NEWLINE)
665 && ctx.token(-2).is_not(*NEWLINE | *COMMENT)
666 },
667 processor::actions::newline,
668 );
669
670 let tokens = if self.empty_line_before_section_header {
671 Box::new(
672 processor::Processor::new(tokens)
673 .set_passthrough(*CONTROL)
674 .add_rule(
675 |ctx| {
676 matches!(
677 ctx.token(1),
678 Keyword(b"meta")
679 | Keyword(b"strings")
680 | Keyword(b"condition")
681 ) && ctx.token(-1).is_not(*NEWLINE)
682 },
683 processor::actions::emptyline,
684 )
685 .add_rule(
686 |ctx| {
687 matches!(
688 ctx.token(1),
689 Keyword(b"meta")
690 | Keyword(b"strings")
691 | Keyword(b"condition")
692 ) && ctx.token(-1).is(*NEWLINE)
693 && ctx.token(-2).is_not(*NEWLINE)
694 },
695 processor::actions::newline,
696 ),
697 )
698 } else {
699 Box::new(
700 processor::Processor::new(tokens)
701 .set_passthrough(*CONTROL)
702 .add_rule(
703 |ctx| {
704 matches!(
705 ctx.token(1),
706 Keyword(b"meta")
707 | Keyword(b"strings")
708 | Keyword(b"condition")
709 ) && ctx.token(-1).is_not(*NEWLINE)
710 },
711 processor::actions::newline,
712 )
713 .add_rule(
714 |ctx| {
715 ctx.token(1).is(*NEWLINE)
716 && matches!(
717 ctx.token(2),
718 Keyword(b"meta")
719 | Keyword(b"strings")
720 | Keyword(b"condition")
721 )
722 && ctx.token(-1).is(*NEWLINE)
723 },
724 processor::actions::drop,
725 ),
726 )
727 };
728
729 // Always remove empty line before the first section header.
730 let tokens = processor::Processor::new(tokens)
731 .set_passthrough(*CONTROL)
732 .add_rule(
733 |ctx| {
734 ctx.token(1).is(*NEWLINE)
735 && matches!(
736 ctx.token(2),
737 Keyword(b"meta")
738 | Keyword(b"strings")
739 | Keyword(b"condition")
740 )
741 && ctx.token(-1).is(*NEWLINE)
742 && ctx.token(-2).eq(&LBRACE)
743 },
744 processor::actions::drop,
745 );
746
747 let tokens = if self.empty_line_after_section_header {
748 Box::new(
749 processor::Processor::new(tokens)
750 .add_rule(
751 |ctx| {
752 ctx.token(-1).eq(&COLON)
753 && matches!(
754 ctx.token(-2),
755 Keyword(b"meta")
756 | Keyword(b"strings")
757 | Keyword(b"condition")
758 )
759 && ctx.token(1).is_not(*NEWLINE)
760 },
761 processor::actions::emptyline,
762 )
763 .add_rule(
764 |ctx| {
765 ctx.token(-1).eq(&COLON)
766 && matches!(
767 ctx.token(-2),
768 Keyword(b"meta")
769 | Keyword(b"strings")
770 | Keyword(b"condition")
771 )
772 && ctx.token(1).is(*NEWLINE)
773 && ctx.token(2).is_not(*NEWLINE)
774 },
775 processor::actions::newline,
776 ),
777 )
778 } else {
779 Box::new(
780 processor::Processor::new(tokens)
781 .add_rule(
782 |ctx| {
783 ctx.token(-1).eq(&COLON)
784 && matches!(
785 ctx.token(-2),
786 Keyword(b"meta")
787 | Keyword(b"strings")
788 | Keyword(b"condition")
789 )
790 && ctx.token(1).is_not(*NEWLINE)
791 },
792 processor::actions::newline,
793 )
794 .add_rule(
795 |ctx| {
796 ctx.token(-1).eq(&COLON)
797 && matches!(
798 ctx.token(-2),
799 Keyword(b"meta")
800 | Keyword(b"strings")
801 | Keyword(b"condition")
802 )
803 && ctx.token(1).is(*NEWLINE)
804 && ctx.token(2).is(*NEWLINE)
805 },
806 processor::actions::drop,
807 ),
808 )
809 };
810
811 // Add newline at multiple places.
812 let tokens = processor::Processor::new(tokens)
813 .set_passthrough(*CONTROL)
814 // Add a newline in front of meta definitions in the "meta" section.
815 .add_rule(
816 |ctx| {
817 ctx.in_rule(SyntaxKind::META_DEF, false)
818 && ctx.token(1).is(*IDENTIFIER)
819 && ctx.token(-1).is_not(*NEWLINE)
820 },
821 processor::actions::newline,
822 )
823 // Add newline in front of pattern identifiers in the "strings"
824 // section.
825 .add_rule(
826 |ctx| {
827 ctx.in_rule(SyntaxKind::PATTERN_DEF, false)
828 && ctx.token(1).is(*IDENTIFIER)
829 && ctx.token(-1).is_not(*NEWLINE)
830 },
831 processor::actions::newline,
832 )
833 // Add newline before each identifier in a `with` statement.
834 .add_rule(
835 |ctx| {
836 ctx.in_rule(SyntaxKind::WITH_DECL, false)
837 && ctx.token(1).is(*IDENTIFIER)
838 && ctx.token(-1).is_not(*NEWLINE)
839 },
840 processor::actions::newline,
841 )
842 // Add newline before the closing brace at the end of rule.
843 .add_rule(
844 |ctx| {
845 ctx.in_rule(SyntaxKind::RULE_DECL, false)
846 && ctx.token(1).eq(&RBRACE)
847 && ctx.token(-1).is_not(*NEWLINE)
848 },
849 processor::actions::newline,
850 )
851 // Add empty line at the end of the file
852 .add_rule(
853 |ctx| ctx.token(1).is(*NONE) && ctx.token(-1).is_not(*NEWLINE),
854 processor::actions::newline,
855 );
856
857 let tokens = FormatHexPatterns::new(tokens);
858
859 let tokens: Box<dyn Iterator<Item = Token<'a>>> =
860 if self.indent_section_headers {
861 Box::new(Self::indent_body(tokens))
862 } else {
863 Box::new(tokens)
864 };
865
866 let tokens: Box<dyn Iterator<Item = Token<'a>>> =
867 if self.indent_section_contents {
868 Box::new(Self::indent_sections(tokens))
869 } else {
870 Box::new(tokens)
871 };
872
873 let tokens = Self::indent_hex_patterns(tokens);
874 let tokens = Self::indent_parenthesized_exprs(tokens);
875 let tokens = Self::indent_with_expr(tokens);
876
877 // indent_body and indent_sections will insert Indentation tokens, but
878 // won't take into account that those tokens must appear before the
879 // newline they are expected to affect. This fixes the issue by moving
880 // indentation tokens in front of newline tokens if they appear in
881 // reverse order.
882 let tokens = bubble::Bubble::new(
883 tokens,
884 |token| matches!(token, Indentation(_)),
885 |token| token.is(*NEWLINE),
886 );
887
888 // Make sure that comments (except inline comments) are followed by
889 // newline. In most cases this is already the case, but some of the rules
890 // that remove newlines may remove those appearing after the comment.
891 let tokens = processor::Processor::new(tokens)
892 .set_passthrough(*CONTROL)
893 .add_rule(
894 |ctx| {
895 matches!(
896 ctx.token(-1),
897 HeadComment(_) | TailComment(_) | BlockComment(_)
898 ) && ctx.token(1).is_not(*NEWLINE)
899 },
900 processor::actions::newline,
901 );
902
903 let tokens = Self::add_spacing(tokens);
904 let tokens = Self::align_comments_in_hex_patterns(tokens);
905
906 let tokens: Box<dyn Iterator<Item = Token<'a>>> =
907 if self.align_metadata {
908 Box::new(Self::align_meta_section(tokens))
909 } else {
910 Box::new(tokens)
911 };
912
913 let tokens: Box<dyn Iterator<Item = Token<'a>>> =
914 if self.align_patterns {
915 Box::new(Self::align_patterns_section(tokens))
916 } else {
917 Box::new(tokens)
918 };
919
920 let tokens = AddIndentation::new(tokens, self.indentation);
921
922 RemoveTrailingSpaces::new(tokens)
923 }
924
925 /// Indents the sections (meta, strings, condition) of a rule one level up.
926 /// For example, for this input:
927 ///
928 /// ```text
929 /// rule foo {
930 /// strings:
931 /// $a = "foo"
932 /// condition:
933 /// true
934 /// }
935 /// ```
936 ///
937 /// ... the result is ...
938 ///
939 /// ```text
940 /// rule foo {
941 /// strings:
942 /// $a = "foo"
943 /// condition:
944 /// true
945 /// }
946 /// ```
947 fn indent_sections<'a, I>(input: I) -> impl TokenStream<'a> + 'a
948 where
949 I: TokenStream<'a> + 'a,
950 {
951 processor::Processor::new(input)
952 // Ignore all comments
953 .set_passthrough(*COMMENT)
954 // Increase indentation after "condition:"
955 .add_rule(
956 |ctx| {
957 ctx.in_rule(SyntaxKind::CONDITION_BLK, false)
958 && ctx.token(-1).eq(&COLON)
959 },
960 processor::actions::insert(Indentation(1)),
961 )
962 // Decrease indentation after the condition.
963 .add_rule(
964 |ctx| {
965 ctx.token(1).eq(&End(SyntaxKind::CONDITION_BLK))
966 && ctx.token(-1).neq(&Indentation(-1))
967 },
968 processor::actions::insert(Indentation(-1)),
969 )
970 // Increase indentation after "meta:"
971 .add_rule(
972 |ctx| {
973 ctx.in_rule(SyntaxKind::META_BLK, false)
974 && ctx.token(-1).eq(&COLON)
975 },
976 processor::actions::insert(Indentation(1)),
977 )
978 // Decrease indentation after meta definitions
979 .add_rule(
980 |ctx| {
981 ctx.token(1).eq(&End(SyntaxKind::META_BLK))
982 && ctx.token(-1).neq(&Indentation(-1))
983 },
984 processor::actions::insert(Indentation(-1)),
985 )
986 // Increase indentation after "strings:"
987 .add_rule(
988 |ctx| {
989 ctx.in_rule(SyntaxKind::PATTERNS_BLK, false)
990 && ctx.token(-1).eq(&COLON)
991 },
992 processor::actions::insert(Indentation(1)),
993 )
994 // Decrease indentation after pattern definitions.
995 .add_rule(
996 |ctx| {
997 ctx.token(1).eq(&End(SyntaxKind::PATTERNS_BLK))
998 && ctx.token(-1).neq(&Indentation(-1))
999 },
1000 processor::actions::insert(Indentation(-1)),
1001 )
1002 }
1003
1004 /// Indents the body of a rule. For this input...
1005 ///
1006 /// ```text
1007 /// rule foo {
1008 /// strings:
1009 /// $a = "foo"
1010 /// condition:
1011 /// true
1012 /// }
1013 /// ```
1014 /// ... the result is ...
1015 ///
1016 /// ```text
1017 /// rule foo {
1018 /// strings:
1019 /// $a = "foo"
1020 /// condition:
1021 /// true
1022 /// }
1023 /// ```
1024 ///
1025 fn indent_body<'a, I>(input: I) -> impl TokenStream<'a> + 'a
1026 where
1027 I: TokenStream<'a> + 'a,
1028 {
1029 processor::Processor::new(input)
1030 // Ignore all comments.
1031 .set_passthrough(*COMMENT)
1032 // Increase indentation after the opening brace in a rule
1033 // declaration.
1034 .add_rule(
1035 |ctx| {
1036 ctx.in_rule(SyntaxKind::RULE_DECL, false)
1037 && ctx.token(-1).eq(&LBRACE)
1038 },
1039 processor::actions::insert(Indentation(1)),
1040 )
1041 .add_rule(
1042 |ctx| {
1043 ctx.in_rule(SyntaxKind::RULE_DECL, false)
1044 && ctx.token(1).eq(&RBRACE)
1045 && ctx.token(-1).neq(&Indentation(-1))
1046 },
1047 processor::actions::insert(Indentation(-1)),
1048 )
1049 }
1050
1051 /// Indent parenthesized expressions in rule conditions. For this input...
1052 ///
1053 /// ```text
1054 /// rule foo {
1055 /// strings:
1056 /// $a = "foo"
1057 /// $b = "bar"
1058 /// condition:
1059 /// (
1060 /// $a and $b
1061 /// )
1062 /// }
1063 /// ```
1064 ///
1065 /// ... the result is ...
1066 ///
1067 /// ```text
1068 /// rule foo {
1069 /// strings:
1070 /// $a = "foo"
1071 /// $b = "bar"
1072 /// condition:
1073 /// (
1074 /// $a and $b
1075 /// )
1076 /// }
1077 /// ```
1078 fn indent_parenthesized_exprs<'a, I>(input: I) -> impl TokenStream<'a> + 'a
1079 where
1080 I: TokenStream<'a> + 'a,
1081 {
1082 processor::Processor::new(input)
1083 .set_passthrough(*COMMENT)
1084 .add_rule(
1085 |ctx| {
1086 ctx.in_rule(SyntaxKind::BOOLEAN_EXPR, true)
1087 && ctx.token(-1).eq(&LPAREN)
1088 },
1089 processor::actions::insert(Indentation(1)),
1090 )
1091 .add_rule(
1092 |ctx| {
1093 ctx.in_rule(SyntaxKind::BOOLEAN_EXPR, true)
1094 && ctx.token(1).eq(&RPAREN)
1095 && ctx.token(-1).neq(&Indentation(-1))
1096 },
1097 processor::actions::insert(Indentation(-1)),
1098 )
1099 }
1100
1101 /// Indent `with` expressions. For this input...
1102 ///
1103 /// ```text
1104 /// rule foo {
1105 /// condition:
1106 /// with
1107 /// foo = "foo"
1108 /// bar = "bar": (...)
1109 /// }
1110 /// ```
1111 ///
1112 /// ... the result is ...
1113 ///
1114 /// ```text
1115 /// rule foo {
1116 /// condition:
1117 /// with
1118 /// foo = "foo"
1119 /// bar = "bar": (...)
1120 /// }
1121 /// ```
1122 fn indent_with_expr<'a, I>(input: I) -> impl TokenStream<'a> + 'a
1123 where
1124 I: TokenStream<'a> + 'a,
1125 {
1126 processor::Processor::new(input)
1127 // Ignore all comments.
1128 .set_passthrough(*COMMENT)
1129 // Increase indentation after the `with` keyword.
1130 .add_rule(
1131 |ctx| {
1132 ctx.in_rule(SyntaxKind::WITH_EXPR, false)
1133 && ctx.token(-1).eq(&Keyword(b"with"))
1134 },
1135 processor::actions::insert(Indentation(1)),
1136 )
1137 // Decrease indentation after the `with` expression.
1138 .add_rule(
1139 |ctx| {
1140 ctx.token(1).eq(&End(SyntaxKind::WITH_EXPR))
1141 && ctx.token(-1).neq(&Indentation(-1))
1142 },
1143 processor::actions::insert(Indentation(-1)),
1144 )
1145 }
1146
1147 /// Indent hex patterns. For this input...
1148 ///
1149 /// ```text
1150 /// $a = {
1151 /// 00 ..
1152 /// 01 ..
1153 /// 02
1154 /// }
1155 /// }
1156 /// ```
1157 ///
1158 /// ... the result is ...
1159 ///
1160 /// ```text
1161 /// $a = {
1162 /// 00 ..
1163 /// 01 ..
1164 /// 02
1165 /// }
1166 /// }
1167 /// ```
1168 fn indent_hex_patterns<'a, I>(input: I) -> impl TokenStream<'a> + 'a
1169 where
1170 I: TokenStream<'a> + 'a,
1171 {
1172 processor::Processor::new(input)
1173 .set_passthrough(*COMMENT)
1174 .add_rule(
1175 |ctx| {
1176 ctx.in_rule(SyntaxKind::HEX_PATTERN, true)
1177 && ctx.token(-1).eq(&LBRACE)
1178 },
1179 processor::actions::insert(Indentation(1)),
1180 )
1181 .add_rule(
1182 |ctx| {
1183 ctx.in_rule(SyntaxKind::HEX_PATTERN, true)
1184 && ctx.token(1).eq(&RBRACE)
1185 && ctx.token(-1).neq(&Indentation(-1))
1186 },
1187 processor::actions::insert(Indentation(-1)),
1188 )
1189 }
1190
1191 /// Aligns the equals signs in pattern definitions. For example, for this
1192 /// input:
1193 ///
1194 /// ```text
1195 /// rule foo {
1196 /// strings:
1197 /// $short = "foo"
1198 /// $very_long = "bar"
1199 /// $even_longer = "baz"
1200 /// condition:
1201 /// true
1202 /// }
1203 /// ```
1204 ///
1205 /// ... the result is ...
1206 ///
1207 /// ```text
1208 /// rule foo {
1209 /// strings:
1210 /// $short = "foo"
1211 /// $very_long = "bar"
1212 /// $even_longer = "baz"
1213 /// condition:
1214 /// true
1215 /// }
1216 /// ```
1217 ///
1218 /// Pattern groups separated by empty lines are handled independently, for
1219 /// example:
1220 ///
1221 /// ```text
1222 /// rule foo {
1223 /// strings:
1224 /// $short = "foo"
1225 /// $very_long = "bar"
1226 ///
1227 /// $even_longer = "baz"
1228 /// $longest_of_all = "qux"
1229 /// condition:
1230 /// true
1231 /// }
1232 /// ```
1233 ///
1234 /// The patterns in the first block are aligned together, but they are not
1235 /// influenced by the longer lines in the second block.
1236 ///
1237 /// The input must contain at least one newline character after each
1238 /// pattern definition.
1239 fn align_patterns_section<'a, I>(input: I) -> impl TokenStream<'a> + 'a
1240 where
1241 I: TokenStream<'a> + 'a,
1242 {
1243 Self::align(input, SyntaxKind::PATTERNS_BLK, SyntaxKind::PATTERN_DEF)
1244 }
1245
1246 /// Aligns the equals signs in metadata definitions.
1247 ///
1248 /// This is similar to [`Formatter::align_patterns_section`] but for metadata.
1249 fn align_meta_section<'a, I>(input: I) -> impl TokenStream<'a> + 'a
1250 where
1251 I: TokenStream<'a> + 'a,
1252 {
1253 Self::align(input, SyntaxKind::META_BLK, SyntaxKind::META_DEF)
1254 }
1255
1256 fn align<'a, I>(
1257 input: I,
1258 block_kind: SyntaxKind,
1259 item_kind: SyntaxKind,
1260 ) -> impl TokenStream<'a> + 'a
1261 where
1262 I: TokenStream<'a> + 'a,
1263 {
1264 // First insert the alignment markers at the appropriate places...
1265 let input_with_markers = processor::Processor::new(input)
1266 // Insert `AlignmentBlockBegin` after the start of each block.
1267 .add_rule(
1268 move |ctx| ctx.token(-1).eq(&Begin(block_kind)),
1269 processor::actions::insert(AlignmentBlockBegin),
1270 )
1271 // Insert `AlignmentBlockEnd` just before the end each block.
1272 .add_rule(
1273 move |ctx| {
1274 ctx.token(1).eq(&End(block_kind))
1275 && ctx.token(-1).neq(&AlignmentBlockEnd)
1276 },
1277 processor::actions::insert(AlignmentBlockEnd),
1278 )
1279 .add_rule(
1280 move |ctx| {
1281 ctx.in_rule(block_kind, false)
1282 && ctx.token(-2).eq(&Newline)
1283 && ctx.token(-1).eq(&Newline)
1284 },
1285 |ctx| {
1286 ctx.push_output_token(Some(AlignmentBlockEnd));
1287 ctx.push_output_token(Some(AlignmentBlockBegin));
1288 },
1289 )
1290 // Insert `AlignmentMarker` before each equal sign.
1291 .add_rule(
1292 move |ctx| {
1293 ctx.in_rule(item_kind, false)
1294 && ctx.token(1).eq(&EQUAL)
1295 && ctx.token(-1).neq(&AlignmentMarker)
1296 },
1297 processor::actions::insert(AlignmentMarker),
1298 );
1299
1300 // ... then pass the token stream with the markers to Aligner, which
1301 // returns a token stream that replaces the markers with the
1302 // appropriate number of spaces.
1303 Align::new(input_with_markers)
1304 }
1305
1306 /// Aligns tail comments inside hex patterns. For this input...
1307 ///
1308 /// ```text
1309 /// rule foo {
1310 /// strings:
1311 /// $hex = {
1312 /// 00 01 // Lorem
1313 /// 00 01 02 // ipsum
1314 /// }
1315 /// condition:
1316 /// true
1317 /// }
1318 /// ```
1319 ///
1320 /// ... the result is ...
1321 ///
1322 /// ```text
1323 /// rule foo {
1324 /// strings:
1325 /// $hex = {
1326 /// 00 01 // Lorem
1327 /// 00 01 02 // ipsum
1328 /// }
1329 /// condition:
1330 /// true
1331 /// }
1332 /// ```
1333 ///
1334 fn align_comments_in_hex_patterns<'a, I>(
1335 input: I,
1336 ) -> impl TokenStream<'a> + 'a
1337 where
1338 I: TokenStream<'a> + 'a,
1339 {
1340 // First insert the alignment markers at the appropriate places...
1341 let input_with_markers = processor::Processor::new(input)
1342 .add_rule(
1343 |ctx| ctx.token(-1).eq(&Begin(SyntaxKind::HEX_PATTERN)),
1344 processor::actions::insert(AlignmentBlockBegin),
1345 )
1346 .add_rule(
1347 |ctx| {
1348 ctx.token(1).eq(&End(SyntaxKind::HEX_PATTERN))
1349 && ctx.token(-1).neq(&AlignmentBlockEnd)
1350 },
1351 processor::actions::insert(AlignmentBlockEnd),
1352 )
1353 .add_rule(
1354 |ctx| {
1355 ctx.in_rule(SyntaxKind::HEX_PATTERN, true)
1356 && matches!(ctx.token(1), Token::TailComment(_))
1357 && ctx.token(-1).neq(&AlignmentMarker)
1358 },
1359 processor::actions::insert(AlignmentMarker),
1360 );
1361
1362 // ... then pass the token stream with the markers to Aligner, which
1363 // returns a token stream that replaces the markers with the
1364 // appropriate number of spaces.
1365 Align::new(input_with_markers)
1366 }
1367
1368 fn add_spacing<'a, I>(input: I) -> impl TokenStream<'a> + 'a
1369 where
1370 I: TokenStream<'a> + 'a,
1371 {
1372 processor::Processor::new(input)
1373 // Ignore all control tokens.
1374 .set_passthrough(*CONTROL)
1375 // Insert spaces in-between all tokens, except in the following
1376 // cases:
1377 // - No space after "(" and "["
1378 // - No space before ")" and "]"
1379 // - No space before ":"
1380 // - No space before ","
1381 // - No space before or after ".." (e.g: (0..10))
1382 // - No space before or after "." (e.g: foo.bar)
1383 // - No space in-between identifiers and "(" or "[" (e.g: array[0],
1384 // func("foo")).
1385 // - No space before or after "-" in pattern modifiers and hex jumps
1386 // (e.g: xor(0-255), [0-10]).
1387 .add_rule(
1388 |ctx| {
1389 let prev_token = ctx.token(-1);
1390 let next_token = ctx.token(1);
1391
1392 // Insert space if previous token is anything except ( or [,
1393 // and next token is anything except ) or ].
1394 let add_space = prev_token.is(*TEXT ^ *LGROUPING)
1395 && next_token.is(*TEXT ^ *RGROUPING);
1396
1397 let drop_space =
1398 // Don't insert space if next token is ":"
1399 next_token.eq(&COLON)
1400 || next_token.eq(&COMMA)
1401 // Don't insert space after "-"
1402 || prev_token.eq(&HYPHEN)
1403 // Don't insert spaces around "."
1404 || prev_token.eq(&DOT)
1405 || next_token.eq(&DOT)
1406 // don't insert space in-between some identifier and "("
1407 // or "[".
1408 || prev_token.is(*IDENTIFIER)
1409 && next_token.is(*LGROUPING)
1410 // don't insert space in-between some identifier and "*"
1411 // like in $a*
1412 || prev_token.is(*IDENTIFIER)
1413 && next_token.eq(&ASTERISK)
1414 // don't insert spaces before "(" in pattern modifiers.
1415 || ctx.in_rule(SyntaxKind::PATTERN_MOD, false)
1416 && next_token.is(*LGROUPING)
1417 // don't insert spaces before or after "-" in pattern
1418 // modifiers and hex jumps.
1419 || (ctx.in_rule(SyntaxKind::PATTERN_MOD, false) ||
1420 ctx.in_rule(SyntaxKind::HEX_JUMP, false))
1421 && (next_token.eq(&HYPHEN) || prev_token.eq(&HYPHEN));
1422
1423 add_space && !drop_space
1424 },
1425 processor::actions::space,
1426 )
1427 // Insert space before after inline comment.
1428 .add_rule(
1429 |ctx| {
1430 matches!(
1431 ctx.token(-1),
1432 InlineComment(_)
1433 )
1434 },
1435 processor::actions::space
1436 )
1437 // Insert two spaces before trailing comments in a line.
1438 .add_rule(
1439 |ctx| {
1440 ctx.token(-1).is(*TEXT) &&
1441 ctx.token(1).is(*COMMENT)
1442 },
1443 |ctx| {
1444 ctx.push_output_token(Some(Whitespace));
1445 ctx.push_output_token(Some(Whitespace));
1446 }
1447 )
1448 }
1449}