solana_sbpf/
asm_parser.rs

1#![allow(clippy::arithmetic_side_effects)]
2// Copyright 2017 Rich Lane <lanerl@gmail.com>
3//
4// Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
5// the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8// Rust-doc comments were left in the module, but it is no longer publicly exposed from the root
9// file of the crate. Do not expect to find those comments in the documentation of the crate.
10
11//! This module parses eBPF assembly language source code.
12
13use combine::{
14    attempt, between,
15    char::{alpha_num, char, digit, hex_digit, spaces, string},
16    combine_parse_partial, combine_parser_impl,
17    easy::{Error, Errors, Info},
18    eof, many, many1, one_of, optional, parse_mode, parser, sep_by, skip_many,
19    stream::state::{SourcePosition, State},
20    Parser, Stream,
21};
22
23/// Operand of an instruction.
24#[derive(Clone, Debug, PartialEq, Eq)]
25pub enum Operand {
26    /// Register number.
27    Register(i64),
28    /// Jump offset or immediate.
29    Integer(i64),
30    /// Register number and offset.
31    Memory(i64, i64),
32    /// Jump target label.
33    Label(String),
34}
35
36/// Parsed statement.
37#[derive(Debug, PartialEq, Eq)]
38pub enum Statement {
39    /// Parsed label (name).
40    Label { name: String },
41    /// Parsed directive (name, operands).
42    Directive {
43        name: String,
44        operands: Vec<Operand>,
45    },
46    /// Parsed instruction (name, operands).
47    Instruction {
48        name: String,
49        operands: Vec<Operand>,
50    },
51}
52
53parser! {
54    fn ident[I]()(I) -> String where [I: Stream<Item=char>] {
55        many1(alpha_num().or(char('_')))
56    }
57}
58
59parser! {
60    fn mnemonic[I]()(I) -> String where [I: Stream<Item=char>] {
61        many1(alpha_num())
62    }
63}
64
65parser! {
66    fn integer[I]()(I) -> i64 where [I: Stream<Item=char>] {
67        let sign = optional(one_of("-+".chars()).skip(skip_many(char(' ')))).map(|x| match x {
68            Some('-') => -1,
69            _ => 1,
70        });
71        let hex = string("0x")
72            .with(many1(hex_digit()))
73            .map(|x: String| u64::from_str_radix(&x, 16).unwrap_or(0) as i64);
74        let dec = many1(digit()).map(|x: String| x.parse::<i64>().unwrap_or(0));
75        (sign, attempt(hex).or(dec))
76            .map(|(s, x)| s * x)
77    }
78}
79
80parser! {
81    fn register[I]()(I) -> i64 where [I: Stream<Item=char>] {
82        char('r')
83            .with(many1(digit()))
84            .map(|x: String| x.parse::<i64>().unwrap_or(0))
85    }
86}
87
88parser! {
89    fn operand[I]()(I) -> Operand where [I: Stream<Item=char>] {
90        let register_operand = register().map(Operand::Register);
91        let immediate = integer().map(Operand::Integer);
92        let memory = between(
93            char('['),
94            char(']'),
95            (register().skip(skip_many(char(' '))), optional(integer())),
96        )
97        .map(|t| Operand::Memory(t.0, t.1.unwrap_or(0)));
98        let label = ident().map(Operand::Label);
99        register_operand
100            .or(immediate)
101            .or(memory)
102            .or(label)
103    }
104}
105
106parser! {
107    fn label[I]()(I) -> Statement where [I: Stream<Item=char>] {
108        (ident(), char(':'))
109            .map(|t| Statement::Label { name: t.0 })
110    }
111}
112
113parser! {
114    fn directive[I]()(I) -> Statement where [I: Stream<Item=char>] {
115        let operands = sep_by(operand(), char(',').skip(skip_many(char(' '))));
116        (char('.').with(many1(alpha_num())).skip(skip_many(char(' '))), operands)
117            .map(|t| Statement::Directive { name: t.0, operands: t.1 })
118    }
119}
120
121parser! {
122    fn instruction[I]()(I) -> Statement where [I: Stream<Item=char>] {
123        let operands = sep_by(operand(), char(',').skip(skip_many(char(' '))));
124        (mnemonic().skip(skip_many(char(' '))), operands)
125            .map(|t| Statement::Instruction { name: t.0, operands: t.1 })
126    }
127}
128
129fn format_info(info: &Info<char, &str>) -> String {
130    match *info {
131        Info::Token(x) => format!("{x:?}"),
132        Info::Range(x) => format!("{x:?}"),
133        Info::Owned(ref x) => x.clone(),
134        Info::Borrowed(x) => x.to_string(),
135    }
136}
137
138fn format_error(error: &Error<char, &str>) -> String {
139    match *error {
140        Error::Unexpected(ref x) => format!("unexpected {}", format_info(x)),
141        Error::Expected(ref x) => format!("expected {}", format_info(x)),
142        Error::Message(ref x) => format_info(x),
143        Error::Other(ref x) => format!("{x:?}"),
144    }
145}
146
147fn format_parse_error(parse_error: &Errors<char, &str, SourcePosition>) -> String {
148    format!(
149        "Parse error at line {} column {}: {}",
150        parse_error.position.line,
151        parse_error.position.column,
152        parse_error
153            .errors
154            .iter()
155            .map(format_error)
156            .collect::<Vec<String>>()
157            .join(", ")
158    )
159}
160
161/// Parse a string into a list of instructions.
162///
163/// The instructions are not validated and may have invalid names and operand types.
164pub fn parse(input: &str) -> Result<Vec<Statement>, String> {
165    match spaces()
166        .with(many(
167            attempt(label())
168                .or(directive())
169                .or(instruction())
170                .skip(spaces()),
171        ))
172        .skip(eof())
173        .easy_parse(State::with_positioner(input, SourcePosition::default()))
174    {
175        Ok((insts, _)) => Ok(insts),
176        Err(err) => Err(format_parse_error(&err)),
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::{
183        ident, instruction, integer, mnemonic, operand, parse, register, Operand, Statement,
184    };
185    use combine::Parser;
186
187    // Unit tests for the different kinds of parsers.
188
189    #[test]
190    fn test_ident() {
191        assert_eq!(
192            ident().parse("entrypoint"),
193            Ok(("entrypoint".to_string(), ""))
194        );
195        assert_eq!(ident().parse("lbb_1"), Ok(("lbb_1".to_string(), "")));
196        assert_eq!(ident().parse("exit:"), Ok(("exit".to_string(), ":")));
197    }
198
199    #[test]
200    fn test_mnemonic() {
201        assert_eq!(mnemonic().parse("nop"), Ok(("nop".to_string(), "")));
202        assert_eq!(mnemonic().parse("add32"), Ok(("add32".to_string(), "")));
203        assert_eq!(mnemonic().parse("add32*"), Ok(("add32".to_string(), "*")));
204    }
205
206    #[test]
207    fn test_integer() {
208        assert_eq!(integer().parse("0"), Ok((0, "")));
209        assert_eq!(integer().parse("42"), Ok((42, "")));
210        assert_eq!(integer().parse("+42"), Ok((42, "")));
211        assert_eq!(integer().parse("-42"), Ok((-42, "")));
212        assert_eq!(integer().parse("0x0"), Ok((0, "")));
213        assert_eq!(
214            integer().parse("0x123456789abcdef0"),
215            Ok((0x123456789abcdef0, ""))
216        );
217        assert_eq!(integer().parse("-0x1f"), Ok((-31, "")));
218    }
219
220    #[test]
221    fn test_register() {
222        assert_eq!(register().parse("r0"), Ok((0, "")));
223        assert_eq!(register().parse("r15"), Ok((15, "")));
224    }
225
226    #[test]
227    fn test_operand() {
228        assert_eq!(operand().parse("r0"), Ok((Operand::Register(0), "")));
229        assert_eq!(operand().parse("r15"), Ok((Operand::Register(15), "")));
230        assert_eq!(operand().parse("0"), Ok((Operand::Integer(0), "")));
231        assert_eq!(operand().parse("42"), Ok((Operand::Integer(42), "")));
232        assert_eq!(operand().parse("[r1]"), Ok((Operand::Memory(1, 0), "")));
233        assert_eq!(operand().parse("[r3+5]"), Ok((Operand::Memory(3, 5), "")));
234        assert_eq!(
235            operand().parse("[r3+0x1f]"),
236            Ok((Operand::Memory(3, 31), ""))
237        );
238        assert_eq!(
239            operand().parse("[r3-0x1f]"),
240            Ok((Operand::Memory(3, -31), ""))
241        );
242        assert_eq!(operand().parse("[r5 + 3]"), Ok((Operand::Memory(5, 3), "")));
243        assert_eq!(
244            operand().parse("[r11 - 0x30]"),
245            Ok((Operand::Memory(11, -48), ""))
246        );
247    }
248
249    #[test]
250    fn test_instruction() {
251        assert_eq!(
252            instruction().parse("exit"),
253            Ok((
254                Statement::Instruction {
255                    name: "exit".to_string(),
256                    operands: vec![],
257                },
258                ""
259            ))
260        );
261
262        assert_eq!(
263            instruction().parse("call 2"),
264            Ok((
265                Statement::Instruction {
266                    name: "call".to_string(),
267                    operands: vec![Operand::Integer(2)],
268                },
269                ""
270            ))
271        );
272
273        assert_eq!(
274            instruction().parse("addi r1, 2"),
275            Ok((
276                Statement::Instruction {
277                    name: "addi".to_string(),
278                    operands: vec![Operand::Register(1), Operand::Integer(2)],
279                },
280                ""
281            ))
282        );
283
284        assert_eq!(
285            instruction().parse("ldxb r2, [r1+12]"),
286            Ok((
287                Statement::Instruction {
288                    name: "ldxb".to_string(),
289                    operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
290                },
291                ""
292            ))
293        );
294
295        assert_eq!(
296            instruction().parse("lsh r3, 0x8"),
297            Ok((
298                Statement::Instruction {
299                    name: "lsh".to_string(),
300                    operands: vec![Operand::Register(3), Operand::Integer(8)],
301                },
302                ""
303            ))
304        );
305
306        assert_eq!(
307            instruction().parse("jne r3, 0x8, +37"),
308            Ok((
309                Statement::Instruction {
310                    name: "jne".to_string(),
311                    operands: vec![
312                        Operand::Register(3),
313                        Operand::Integer(8),
314                        Operand::Integer(37)
315                    ],
316                },
317                ""
318            ))
319        );
320
321        // Whitespace between operands is optional.
322        assert_eq!(
323            instruction().parse("jne r3,0x8,+37"),
324            Ok((
325                Statement::Instruction {
326                    name: "jne".to_string(),
327                    operands: vec![
328                        Operand::Register(3),
329                        Operand::Integer(8),
330                        Operand::Integer(37)
331                    ],
332                },
333                ""
334            ))
335        );
336    }
337
338    // Other unit tests: try to parse various set of instructions.
339
340    #[test]
341    fn test_empty() {
342        assert_eq!(parse(""), Ok(vec![]));
343    }
344
345    #[test]
346    fn test_exit() {
347        // No operands.
348        assert_eq!(
349            parse("exit"),
350            Ok(vec![Statement::Instruction {
351                name: "exit".to_string(),
352                operands: vec![],
353            }])
354        );
355    }
356
357    #[test]
358    fn test_lsh() {
359        // Register and immediate operands.
360        assert_eq!(
361            parse("lsh r3, 0x20"),
362            Ok(vec![Statement::Instruction {
363                name: "lsh".to_string(),
364                operands: vec![Operand::Register(3), Operand::Integer(0x20)],
365            }])
366        );
367    }
368
369    #[test]
370    fn test_ja() {
371        // Jump offset operand.
372        assert_eq!(
373            parse("ja +1"),
374            Ok(vec![Statement::Instruction {
375                name: "ja".to_string(),
376                operands: vec![Operand::Integer(1)],
377            }])
378        );
379    }
380
381    #[test]
382    fn test_ldxh() {
383        // Register and memory operands.
384        assert_eq!(
385            parse("ldxh r4, [r1+12]"),
386            Ok(vec![Statement::Instruction {
387                name: "ldxh".to_string(),
388                operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
389            }])
390        );
391    }
392
393    #[test]
394    fn test_tcp_sack() {
395        // Sample program from ubpf.
396        // We could technically indent the instructions since the parser support white spaces at
397        // the beginning, but there is another test for that.
398        let src = "\
399ldxb r2, [r1+12]
400ldxb r3, [r1+13]
401lsh r3, 0x8
402or r3, r2
403mov r0, 0x0
404jne r3, 0x8, +37
405ldxb r2, [r1+23]
406jne r2, 0x6, +35
407ldxb r2, [r1+14]
408add r1, 0xe
409and r2, 0xf
410lsh r2, 0x2
411add r1, r2
412mov r0, 0x0
413ldxh r4, [r1+12]
414add r1, 0x14
415rsh r4, 0x2
416and r4, 0x3c
417mov r2, r4
418add r2, 0xffffffec
419mov r5, 0x15
420mov r3, 0x0
421jgt r5, r4, +20
422mov r5, r3
423lsh r5, 0x20
424arsh r5, 0x20
425mov r4, r1
426add r4, r5
427ldxb r5, [r4]
428jeq r5, 0x1, +4
429jeq r5, 0x0, +12
430mov r6, r3
431jeq r5, 0x5, +9
432ja +2
433add r3, 0x1
434mov r6, r3
435ldxb r3, [r4+1]
436add r3, r6
437lsh r3, 0x20
438arsh r3, 0x20
439jsgt r2, r3, -18
440ja +1
441mov r0, 0x1
442exit
443";
444
445        assert_eq!(
446            parse(src),
447            Ok(vec![
448                Statement::Instruction {
449                    name: "ldxb".to_string(),
450                    operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
451                },
452                Statement::Instruction {
453                    name: "ldxb".to_string(),
454                    operands: vec![Operand::Register(3), Operand::Memory(1, 13)],
455                },
456                Statement::Instruction {
457                    name: "lsh".to_string(),
458                    operands: vec![Operand::Register(3), Operand::Integer(8)],
459                },
460                Statement::Instruction {
461                    name: "or".to_string(),
462                    operands: vec![Operand::Register(3), Operand::Register(2)],
463                },
464                Statement::Instruction {
465                    name: "mov".to_string(),
466                    operands: vec![Operand::Register(0), Operand::Integer(0)],
467                },
468                Statement::Instruction {
469                    name: "jne".to_string(),
470                    operands: vec![
471                        Operand::Register(3),
472                        Operand::Integer(8),
473                        Operand::Integer(37)
474                    ],
475                },
476                Statement::Instruction {
477                    name: "ldxb".to_string(),
478                    operands: vec![Operand::Register(2), Operand::Memory(1, 23)],
479                },
480                Statement::Instruction {
481                    name: "jne".to_string(),
482                    operands: vec![
483                        Operand::Register(2),
484                        Operand::Integer(6),
485                        Operand::Integer(35)
486                    ],
487                },
488                Statement::Instruction {
489                    name: "ldxb".to_string(),
490                    operands: vec![Operand::Register(2), Operand::Memory(1, 14)],
491                },
492                Statement::Instruction {
493                    name: "add".to_string(),
494                    operands: vec![Operand::Register(1), Operand::Integer(14)],
495                },
496                Statement::Instruction {
497                    name: "and".to_string(),
498                    operands: vec![Operand::Register(2), Operand::Integer(15)],
499                },
500                Statement::Instruction {
501                    name: "lsh".to_string(),
502                    operands: vec![Operand::Register(2), Operand::Integer(2)],
503                },
504                Statement::Instruction {
505                    name: "add".to_string(),
506                    operands: vec![Operand::Register(1), Operand::Register(2)],
507                },
508                Statement::Instruction {
509                    name: "mov".to_string(),
510                    operands: vec![Operand::Register(0), Operand::Integer(0)],
511                },
512                Statement::Instruction {
513                    name: "ldxh".to_string(),
514                    operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
515                },
516                Statement::Instruction {
517                    name: "add".to_string(),
518                    operands: vec![Operand::Register(1), Operand::Integer(20)],
519                },
520                Statement::Instruction {
521                    name: "rsh".to_string(),
522                    operands: vec![Operand::Register(4), Operand::Integer(2)],
523                },
524                Statement::Instruction {
525                    name: "and".to_string(),
526                    operands: vec![Operand::Register(4), Operand::Integer(60)],
527                },
528                Statement::Instruction {
529                    name: "mov".to_string(),
530                    operands: vec![Operand::Register(2), Operand::Register(4)],
531                },
532                Statement::Instruction {
533                    name: "add".to_string(),
534                    operands: vec![Operand::Register(2), Operand::Integer(4294967276)],
535                },
536                Statement::Instruction {
537                    name: "mov".to_string(),
538                    operands: vec![Operand::Register(5), Operand::Integer(21)],
539                },
540                Statement::Instruction {
541                    name: "mov".to_string(),
542                    operands: vec![Operand::Register(3), Operand::Integer(0)],
543                },
544                Statement::Instruction {
545                    name: "jgt".to_string(),
546                    operands: vec![
547                        Operand::Register(5),
548                        Operand::Register(4),
549                        Operand::Integer(20)
550                    ],
551                },
552                Statement::Instruction {
553                    name: "mov".to_string(),
554                    operands: vec![Operand::Register(5), Operand::Register(3)],
555                },
556                Statement::Instruction {
557                    name: "lsh".to_string(),
558                    operands: vec![Operand::Register(5), Operand::Integer(32)],
559                },
560                Statement::Instruction {
561                    name: "arsh".to_string(),
562                    operands: vec![Operand::Register(5), Operand::Integer(32)],
563                },
564                Statement::Instruction {
565                    name: "mov".to_string(),
566                    operands: vec![Operand::Register(4), Operand::Register(1)],
567                },
568                Statement::Instruction {
569                    name: "add".to_string(),
570                    operands: vec![Operand::Register(4), Operand::Register(5)],
571                },
572                Statement::Instruction {
573                    name: "ldxb".to_string(),
574                    operands: vec![Operand::Register(5), Operand::Memory(4, 0)],
575                },
576                Statement::Instruction {
577                    name: "jeq".to_string(),
578                    operands: vec![
579                        Operand::Register(5),
580                        Operand::Integer(1),
581                        Operand::Integer(4)
582                    ],
583                },
584                Statement::Instruction {
585                    name: "jeq".to_string(),
586                    operands: vec![
587                        Operand::Register(5),
588                        Operand::Integer(0),
589                        Operand::Integer(12)
590                    ],
591                },
592                Statement::Instruction {
593                    name: "mov".to_string(),
594                    operands: vec![Operand::Register(6), Operand::Register(3)],
595                },
596                Statement::Instruction {
597                    name: "jeq".to_string(),
598                    operands: vec![
599                        Operand::Register(5),
600                        Operand::Integer(5),
601                        Operand::Integer(9)
602                    ],
603                },
604                Statement::Instruction {
605                    name: "ja".to_string(),
606                    operands: vec![Operand::Integer(2)],
607                },
608                Statement::Instruction {
609                    name: "add".to_string(),
610                    operands: vec![Operand::Register(3), Operand::Integer(1)],
611                },
612                Statement::Instruction {
613                    name: "mov".to_string(),
614                    operands: vec![Operand::Register(6), Operand::Register(3)],
615                },
616                Statement::Instruction {
617                    name: "ldxb".to_string(),
618                    operands: vec![Operand::Register(3), Operand::Memory(4, 1)],
619                },
620                Statement::Instruction {
621                    name: "add".to_string(),
622                    operands: vec![Operand::Register(3), Operand::Register(6)],
623                },
624                Statement::Instruction {
625                    name: "lsh".to_string(),
626                    operands: vec![Operand::Register(3), Operand::Integer(32)],
627                },
628                Statement::Instruction {
629                    name: "arsh".to_string(),
630                    operands: vec![Operand::Register(3), Operand::Integer(32)],
631                },
632                Statement::Instruction {
633                    name: "jsgt".to_string(),
634                    operands: vec![
635                        Operand::Register(2),
636                        Operand::Register(3),
637                        Operand::Integer(-18)
638                    ],
639                },
640                Statement::Instruction {
641                    name: "ja".to_string(),
642                    operands: vec![Operand::Integer(1)],
643                },
644                Statement::Instruction {
645                    name: "mov".to_string(),
646                    operands: vec![Operand::Register(0), Operand::Integer(1)],
647                },
648                Statement::Instruction {
649                    name: "exit".to_string(),
650                    operands: vec![],
651                }
652            ])
653        );
654    }
655
656    #[test]
657    fn test_error_eof() {
658        // Unexpected end of input in a register name.
659        assert_eq!(
660            parse("lsh r"),
661            Err(
662                "Parse error at line 1 column 6: unexpected end of input, expected digit"
663                    .to_string()
664            )
665        );
666    }
667
668    #[test]
669    fn test_error_unexpected_character() {
670        // Unexpected character at end of input.
671        assert_eq!(
672            parse("exit\n^"),
673            Err(
674                "Parse error at line 2 column 1: unexpected '^', expected letter or digit, expected '_', expected '.', expected whitespaces, expected end of input".to_string()
675            )
676        );
677    }
678
679    #[test]
680    fn test_initial_whitespace() {
681        assert_eq!(
682            parse(
683                "
684                          exit"
685            ),
686            Ok(vec![Statement::Instruction {
687                name: "exit".to_string(),
688                operands: vec![],
689            }])
690        );
691    }
692}