Skip to main content

cargo_show_asm/asm/
statements.rs

1use nom::branch::alt;
2use nom::bytes::complete::{escaped_transform, tag, take_while_m_n, take_while1};
3use nom::character::complete::{self, newline, none_of, not_line_ending, one_of, space0, space1};
4use nom::combinator::{map, opt, recognize, value, verify};
5use nom::multi::count;
6use nom::sequence::{delimited, pair, preceded, terminated};
7use nom::{AsChar, IResult, Parser as _};
8use owo_colors::OwoColorize;
9use std::borrow::Cow;
10use std::collections::HashSet;
11use std::path::Path;
12use std::sync::LazyLock;
13
14use crate::demangle::LabelKind;
15use crate::opts::NameDisplay;
16use crate::{color, demangle};
17
18#[derive(Clone, Debug, Eq, PartialEq)]
19pub enum Statement<'a> {
20    Label(Label<'a>),
21    Directive(Directive<'a>),
22    Instruction(Instruction<'a>),
23    Nothing,
24    Assignment(&'a str, &'a str),
25    Dunno(&'a str),
26}
27
28#[derive(Clone, Debug, Eq, PartialEq)]
29pub struct Instruction<'a> {
30    pub op: &'a str,
31    pub args: Option<&'a str>,
32}
33
34impl<'a> Instruction<'a> {
35    pub fn parse(input: &'a str) -> IResult<&'a str, Self> {
36        preceded(tag("\t"), alt((Self::parse_regular, Self::parse_sharp))).parse(input)
37    }
38
39    fn parse_sharp(input: &'a str) -> IResult<&'a str, Self> {
40        let sharps = take_while_m_n(1, 2, |c| c == '#');
41        let sharp_tag = pair(sharps, not_line_ending);
42        map(recognize(sharp_tag), |op| Instruction { op, args: None }).parse(input)
43    }
44
45    fn parse_regular(input: &'a str) -> IResult<&'a str, Self> {
46        // NOTE: ARM allows `.` inside instruction names e.g. `b.ne` for branch not equal
47        //       Wasm also uses `.` in instr names, and uses `_` for `end_function`
48        //       "powerpc-nintendo-none-eabi.json" uses `+` and `-` in `blt-`/`blt+`...
49        let op = take_while1(|c| AsChar::is_alphanum(c) || matches!(c, '.' | '_' | '-' | '+'));
50        let args = opt(preceded(space1, not_line_ending));
51        map(pair(op, args), |(op, args)| Instruction { op, args }).parse(input)
52    }
53}
54
55// All data directive names the original regex matched
56static DATA_DIRS: LazyLock<HashSet<&str>> = LazyLock::new(|| {
57    // all of those can insert something as well... Not sure if it's a full list or not
58    // .long, .short .octa, .quad, .word,
59    // .single .double .float
60    // .ascii, .asciz, .string, .string8 .string16 .string32 .string64
61    // .2byte .4byte .8byte
62    // .dc
63    // .inst .insn
64
65    [
66        "string64", "string32", "string16", "string8", "dcb.x", "dcb.w", "dcb.s", "dcb.l", "dcb.d",
67        "dcb.b", "ds.x", "ds.w", "ds.s", "ds.p", "ds.l", "ds.d", "ds.b", "dc.x", "dc.w", "dc.s",
68        "dc.l", "dc.d", "dc.a", "dc.b", "8byte", "4byte", "2byte", "byte", "xword", "value",
69        "zero", "word", "skip", "single", "string", "space", "short", "quad", "octa", "long",
70        "int", "hword", "half", "float", "fill", "dword", "double", "dc", "ds", "dcb", "asciz",
71        "ascii",
72    ]
73    .into_iter()
74    .collect()
75});
76
77fn parse_data_dec(input: &str) -> IResult<&str, Directive<'_>> {
78    let trimmed = input.trim_start();
79    let Some(rest) = trimmed.strip_prefix('.') else {
80        use nom::error::{Error, ErrorKind};
81        return Err(nom::Err::Error(Error::new(input, ErrorKind::Eof)));
82    };
83
84    let word_end = rest.find([' ', '\t']).unwrap_or(rest.len());
85    let directive = &rest[..word_end];
86
87    if !DATA_DIRS.contains(&directive) {
88        use nom::error::{Error, ErrorKind};
89        return Err(nom::Err::Error(Error::new(input, ErrorKind::Eof)));
90    }
91
92    let after_directive = &rest[word_end..];
93    let after_ws = after_directive.trim_start();
94    let data_end = after_ws.find('\n').unwrap_or(after_ws.len());
95    let data = &after_ws[..data_end];
96
97    let leading_ws = input.len() - trimmed.len();
98    let consumed = leading_ws + 1 + rest.len() - after_ws.len() + data.len();
99
100    Ok((&input[consumed..], Directive::Data(directive, data)))
101}
102
103impl Statement<'_> {
104    /// Should we skip it for --simplify output?
105    pub fn boring(&self) -> bool {
106        if let Statement::Directive(Directive::SetValue(_, _)) = self {
107            return false;
108        }
109        if let Statement::Directive(Directive::SectionStart(name)) = self {
110            if name.starts_with(".data") || name.starts_with(".rodata") {
111                return false;
112            }
113        }
114        matches!(self, Statement::Directive(_) | Statement::Dunno(_))
115    }
116}
117
118impl std::fmt::Display for Instruction<'_> {
119    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
120        let display = NameDisplay::from(&*f);
121        if self.op.starts_with("#DEBUG_VALUE:") {
122            write!(f, "{}", color!(self.op, OwoColorize::blue))?;
123        } else {
124            write!(f, "{}", color!(self.op, OwoColorize::bright_blue))?;
125        }
126        if let Some(args) = self.args {
127            let args = demangle::contents(args, display);
128            let w_label = demangle::color_local_labels(&args);
129            write!(f, " {w_label}")?;
130        }
131        Ok(())
132    }
133}
134
135impl std::fmt::Display for Statement<'_> {
136    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137        match self {
138            Statement::Label(l) => l.fmt(f),
139            Statement::Directive(d) => {
140                if f.alternate() {
141                    write!(f, "{d:#}")
142                } else {
143                    write!(f, "{d}")
144                }
145            }
146            Statement::Instruction(i) => {
147                if f.sign_minus() {
148                    write!(f, "\t{i:-#}")
149                } else if f.alternate() {
150                    write!(f, "\t{i:#}")
151                } else {
152                    write!(f, "\t{i}")
153                }
154            }
155            Statement::Nothing => Ok(()),
156            Statement::Dunno(l) => write!(f, "{l}"),
157            Statement::Assignment(key, val) => {
158                let display = NameDisplay::from(&*f);
159                let key = demangle::contents(key, display);
160                let val = demangle::contents(val, display);
161                write!(
162                    f,
163                    "{} = {}",
164                    color!(key, OwoColorize::bright_cyan),
165                    color!(val, OwoColorize::bright_cyan)
166                )
167            }
168        }
169    }
170}
171
172impl std::fmt::Display for Directive<'_> {
173    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
174        let display = NameDisplay::from(&*f);
175        match self {
176            Directive::File(ff) => ff.fmt(f),
177            Directive::Loc(l) => l.fmt(f),
178            Directive::Generic(g) => g.fmt(f),
179            Directive::SetValue(key, val) => {
180                let key = demangle::contents(key, display);
181                let val = demangle::contents(val, display);
182                write!(
183                    f,
184                    ".{} {}, {}",
185                    color!("set", OwoColorize::bright_magenta),
186                    color!(key, OwoColorize::bright_cyan),
187                    color!(val, OwoColorize::bright_cyan)
188                )
189            }
190            Directive::SectionStart(s) => {
191                let dem = demangle::contents(s, display);
192                write!(f, "{} {dem}", color!(".section", OwoColorize::bright_red))
193            }
194            Directive::SubsectionsViaSym => write!(
195                f,
196                ".{}",
197                color!("subsections_via_symbols", OwoColorize::bright_red)
198            ),
199            Directive::SymIsFun(s) => {
200                let dem = demangle::contents(s, display);
201                write!(
202                    f,
203                    ".{}\t{dem},@function",
204                    color!("type", OwoColorize::bright_magenta)
205                )
206            }
207            Directive::Data(ty, data) => {
208                let data = demangle::contents(data, display);
209                let w_label = demangle::color_local_labels(&data);
210                write!(
211                    f,
212                    "\t.{}\t{}",
213                    color!(ty, OwoColorize::bright_magenta),
214                    color!(w_label, OwoColorize::bright_cyan)
215                )
216            }
217            Directive::Global(data) => {
218                let data = demangle::contents(data, display);
219                let w_label = demangle::color_local_labels(&data);
220                write!(
221                    f,
222                    "\t.{}\t{}",
223                    color!("globl", OwoColorize::bright_magenta),
224                    color!(w_label, OwoColorize::bright_cyan)
225                )
226            }
227        }
228    }
229}
230
231impl std::fmt::Display for FilePath {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        std::fmt::Display::fmt(&self.as_full_path().display(), f)
234    }
235}
236
237impl std::fmt::Display for File<'_> {
238    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
239        write!(f, "\t.file\t{} {}", self.index, self.path)?;
240        if let Some(md5) = self.md5 {
241            write!(f, " {md5}")?;
242        }
243        Ok(())
244    }
245}
246
247impl std::fmt::Display for GenericDirective<'_> {
248    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
249        let display = NameDisplay::from(&*f);
250        write!(
251            f,
252            "\t.{}",
253            color!(
254                demangle::contents(self.0, display),
255                OwoColorize::bright_magenta
256            )
257        )
258    }
259}
260
261impl std::fmt::Display for Loc<'_> {
262    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
263        match self.extra {
264            Some(x) => write!(
265                f,
266                "\t.loc\t{file} {line} {col} {x}",
267                file = self.file,
268                line = self.line,
269                col = self.column,
270            ),
271            None => write!(
272                f,
273                "\t.loc\t{file} {line} {col}",
274                file = self.file,
275                line = self.line,
276                col = self.column
277            ),
278        }
279    }
280}
281
282impl From<&std::fmt::Formatter<'_>> for NameDisplay {
283    fn from(f: &std::fmt::Formatter) -> Self {
284        if f.sign_minus() {
285            NameDisplay::Mangled
286        } else if f.alternate() {
287            NameDisplay::Full
288        } else {
289            NameDisplay::Short
290        }
291    }
292}
293
294impl std::fmt::Display for Label<'_> {
295    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
296        let display = NameDisplay::from(&*f);
297        write!(
298            f,
299            "{}:",
300            color!(
301                demangle::contents(self.id, display),
302                OwoColorize::bright_yellow
303            )
304        )
305    }
306}
307
308#[derive(Copy, Clone, Debug, Eq, PartialEq)]
309pub struct Label<'a> {
310    pub id: &'a str,
311    pub kind: LabelKind,
312}
313
314impl<'a> Label<'a> {
315    pub fn parse(input: &'a str) -> IResult<&'a str, Self> {
316        // TODO: label can't start with a digit
317        let no_comment = tag(":");
318        let comment = terminated(
319            tag(":"),
320            (
321                take_while1(|c| c == ' ' || c == '\t'),
322                tag("#"),
323                take_while1(|c| c != '\n'),
324            ),
325        );
326        map(
327            terminated(take_while1(good_for_label), alt((comment, no_comment))),
328            |id: &str| Label {
329                id,
330                kind: demangle::label_kind(id),
331            },
332        )
333        .parse(input)
334    }
335}
336
337#[derive(Copy, Clone, Debug, Eq, Default)]
338pub struct Loc<'a> {
339    pub file: u64,
340    pub line: u64,
341    pub column: u64,
342    pub extra: Option<&'a str>,
343}
344
345impl PartialEq for Loc<'_> {
346    fn eq(&self, other: &Self) -> bool {
347        self.file == other.file && self.line == other.line
348    }
349}
350
351impl<'a> Loc<'a> {
352    pub fn parse(input: &'a str) -> IResult<&'a str, Self> {
353        // DWARF2 (Unix):      .loc               fileno lineno [column] [options]
354        // CodeView (Windows): .cv_loc functionid fileno lineno [column] [prologue_end] [is_stmt value]
355        map(
356            (
357                alt((
358                    tag("\t.loc\t"),
359                    terminated(tag("\t.cv_loc\t"), (complete::u64, space1)),
360                )),
361                complete::u64,
362                space1,
363                complete::u64,
364                space1,
365                complete::u64,
366                opt(preceded(tag(" "), take_while1(|c| c != '\n'))),
367            ),
368            |(_, file, _, line, _, column, extra)| Loc {
369                file,
370                line,
371                column,
372                extra,
373            },
374        )
375        .parse(input)
376    }
377}
378
379#[derive(Clone, Debug, PartialEq, Eq)]
380pub enum FilePath {
381    FullPath(String),
382    PathAndFileName { path: String, name: String },
383}
384
385impl FilePath {
386    pub fn as_full_path(&self) -> Cow<'_, Path> {
387        match self {
388            FilePath::FullPath(path) => Cow::Borrowed(Path::new(path)),
389            FilePath::PathAndFileName { path, name } => Cow::Owned(Path::new(path).join(name)),
390        }
391    }
392
393    /// Optionally expand `~/` to `home_dir`
394    ///
395    /// Rewritten debug paths may use `~/` for privacy, but that's not a real path,
396    /// because `~` is usually expanded by the shell.
397    pub fn as_full_path_with_home_dir(&self, home_dir: Option<&Path>) -> Cow<'_, Path> {
398        let path = self.as_full_path();
399
400        if let Some(home_dir) = home_dir {
401            if let Ok(path_in_home) = path.strip_prefix("~") {
402                return Cow::Owned(home_dir.join(path_in_home));
403            }
404        }
405
406        path
407    }
408}
409
410#[derive(Clone, Debug, PartialEq, Eq)]
411pub struct File<'a> {
412    pub index: u64,
413    pub path: FilePath,
414    pub md5: Option<&'a str>,
415}
416
417fn parse_quoted_string(input: &str) -> IResult<&str, String> {
418    // Inverse of MCAsmStreamer::PrintQuotedString() in MCAsmStreamer.cpp in llvm.
419    delimited(
420        tag("\""),
421        escaped_transform(
422            none_of("\\\""),
423            '\\',
424            alt((
425                value('\\', tag("\\")),
426                value('\"', tag("\"")),
427                value('\x08', tag("b")),
428                value('\x0c', tag("f")),
429                value('\n', tag("n")),
430                value('\r', tag("r")),
431                value('\t', tag("t")),
432                // 3 digits in base 8
433                map(count(one_of("01234567"), 3), |digits| {
434                    let mut v = 0u8;
435                    for c in digits {
436                        v = (v << 3) | c.to_digit(8).unwrap() as u8;
437                    }
438                    char::from(v)
439                }),
440            )),
441        ),
442        tag("\""),
443    )
444    .parse(input)
445}
446
447// Workaround for a problem in llvm code that produces debug symbols on Windows.
448// As of the time of writing, CodeViewDebug::getFullFilepath() in CodeViewDebug.cpp
449// replaces all occurrences of "\\" with "\".
450// This breaks paths that start with "\\?\" (a prefix instructing Windows to skip
451// filename parsing) - they turn into "\?\", which is invalid.
452// Here we turn "\?\" back into "\\?\".
453// Hopefully this will get fixed in llvm, and we'll remove this.
454fn fixup_windows_file_path(mut p: String) -> String {
455    if p.starts_with("\\?\\") {
456        p.insert(0, '\\');
457    }
458    p
459}
460
461impl<'a> File<'a> {
462    pub fn parse(input: &'a str) -> IResult<&'a str, Self> {
463        // DWARF2/DWARF5 (Unix): .file    fileno [dirname] "filename" [md5]
464        // CodeView (Windows):   .cv_file fileno           "filename" ["checksum"] [checksumkind]
465        alt((
466            map(
467                (
468                    tag("\t.file\t"),
469                    complete::u64,
470                    space1,
471                    parse_quoted_string,
472                    opt(preceded(space1, parse_quoted_string)),
473                    opt(preceded(space1, complete::hex_digit1)),
474                ),
475                |(_, fileno, _, filepath, filename, md5)| File {
476                    index: fileno,
477                    path: match filename {
478                        Some(filename) => FilePath::PathAndFileName {
479                            path: filepath,
480                            name: filename,
481                        },
482                        None => FilePath::FullPath(filepath),
483                    },
484                    md5,
485                },
486            ),
487            map(
488                (
489                    tag("\t.cv_file\t"),
490                    complete::u64,
491                    space1,
492                    parse_quoted_string,
493                    opt(preceded(
494                        space1,
495                        delimited(tag("\""), complete::hex_digit1, tag("\"")),
496                    )),
497                    opt(preceded(space1, complete::u64)),
498                ),
499                |(_, fileno, _, filename, checksum, checksumkind)| File {
500                    index: fileno,
501                    path: FilePath::FullPath(fixup_windows_file_path(filename)),
502                    // FileChecksumKind enum: { None, MD5, SHA1, SHA256 }
503                    // (from llvm's CodeView.h)
504                    md5: if checksumkind == Some(1) {
505                        checksum
506                    } else {
507                        None
508                    },
509                },
510            ),
511        ))
512        .parse(input)
513    }
514}
515
516#[test]
517fn test_parse_label() {
518    assert_eq!(
519        Label::parse("GCC_except_table0:"),
520        Ok((
521            "",
522            Label {
523                id: "GCC_except_table0",
524                kind: LabelKind::Unknown,
525            }
526        ))
527    );
528    assert_eq!(
529        Label::parse(
530            "__ZN4core3ptr50drop_in_place$LT$rand..rngs..thread..ThreadRng$GT$17hba90ed09529257ccE:"
531        ),
532        Ok((
533            "",
534            Label {
535                id: "__ZN4core3ptr50drop_in_place$LT$rand..rngs..thread..ThreadRng$GT$17hba90ed09529257ccE",
536                kind: LabelKind::Global,
537            }
538        ))
539    );
540    assert_eq!(
541        Label::parse(".Lexception0:"),
542        Ok((
543            "",
544            Label {
545                id: ".Lexception0",
546                kind: LabelKind::Local
547            }
548        ))
549    );
550    assert_eq!(
551        Label::parse("LBB0_1:"),
552        Ok((
553            "",
554            Label {
555                id: "LBB0_1",
556                kind: LabelKind::Local
557            }
558        ))
559    );
560    assert_eq!(
561        Label::parse("Ltmp12:"),
562        Ok((
563            "",
564            Label {
565                id: "Ltmp12",
566                kind: LabelKind::Temp
567            }
568        ))
569    );
570    assert_eq!(
571        Label::parse(".Ltmp0:\t# comment"),
572        Ok((
573            "",
574            Label {
575                id: ".Ltmp0",
576                kind: LabelKind::Local,
577            }
578        ))
579    );
580    assert_eq!(
581        Label::parse(
582            "__ZN4core3ptr50drop_in_place$LT$rand..rngs..thread..ThreadRng$GT$17hba90ed09529257ccE: # @\"rand\""
583        ),
584        Ok((
585            "",
586            Label {
587                id: "__ZN4core3ptr50drop_in_place$LT$rand..rngs..thread..ThreadRng$GT$17hba90ed09529257ccE",
588                kind: LabelKind::Global,
589            }
590        ))
591    );
592    assert_eq!(
593        Label::parse(
594            "_ZN44_$LT$$RF$T$u20$as$u20$core..fmt..Display$GT$3fmt17h6557947cc19e5571E: # @\"_ZN44_$LT$$RF$T$u20$as$u20$core..fmt..Display$GT$3fmt17h6557947cc19e5571E\""
595        ),
596        Ok((
597            "",
598            Label {
599                id: "_ZN44_$LT$$RF$T$u20$as$u20$core..fmt..Display$GT$3fmt17h6557947cc19e5571E",
600                kind: LabelKind::Global,
601            }
602        ))
603    );
604    assert_eq!(
605        Label::parse(
606            "_ZN6sample4main17hb59e25bba3071c26E:    # @_ZN6sample4main17hb59e25bba3071c26E"
607        ),
608        Ok((
609            "",
610            Label {
611                id: "_ZN6sample4main17hb59e25bba3071c26E",
612                kind: LabelKind::Global,
613            }
614        ))
615    );
616}
617
618#[test]
619fn test_parse_loc() {
620    assert_eq!(
621        Loc::parse("\t.loc\t31 26 29"),
622        Ok((
623            "",
624            Loc {
625                file: 31,
626                line: 26,
627                column: 29,
628                extra: None
629            }
630        ))
631    );
632    assert_eq!(
633        Loc::parse("\t.loc\t31 26 29 is_stmt 0"),
634        Ok((
635            "",
636            Loc {
637                file: 31,
638                line: 26,
639                column: 29,
640                extra: Some("is_stmt 0")
641            }
642        ))
643    );
644    assert_eq!(
645        Loc::parse("\t.loc\t31 26 29 prologue_end"),
646        Ok((
647            "",
648            Loc {
649                file: 31,
650                line: 26,
651                column: 29,
652                extra: Some("prologue_end")
653            }
654        ))
655    );
656    assert_eq!(
657        Loc::parse("\t.cv_loc\t9 6 1 0"),
658        Ok((
659            "",
660            Loc {
661                file: 6,
662                line: 1,
663                column: 0,
664                extra: None,
665            }
666        ))
667    );
668    assert_eq!(
669        Loc::parse("\t.cv_loc\t9 6 1 0 rest of the line is ignored"),
670        Ok((
671            "",
672            Loc {
673                file: 6,
674                line: 1,
675                column: 0,
676                extra: Some("rest of the line is ignored"),
677            }
678        ))
679    );
680}
681
682#[test]
683fn test_home_dir() {
684    assert_eq!(
685        FilePath::FullPath("~/subdir/in/home".into())
686            .as_full_path_with_home_dir(Some("/home/dir".as_ref())),
687        Path::new("/home/dir/subdir/in/home")
688    );
689
690    assert_eq!(
691        FilePath::PathAndFileName {
692            path: "~/subdir/in/home".into(),
693            name: "filename".into(),
694        }
695        .as_full_path_with_home_dir(Some("/home/dir".as_ref())),
696        Path::new("/home/dir/subdir/in/home/filename"),
697    );
698
699    assert_eq!(
700        FilePath::PathAndFileName {
701            path: "~/~/tilde/~".into(),
702            name: "~".into(),
703        }
704        .as_full_path_with_home_dir(Some("home/dir/".as_ref())),
705        Path::new("home/dir/~/tilde/~/~"),
706    );
707}
708
709#[test]
710fn test_parse_file() {
711    let (rest, file) = File::parse("\t.file\t9 \"/home/ubuntu/buf-test/src/main.rs\"").unwrap();
712    assert!(rest.is_empty());
713    assert_eq!(
714        file,
715        File {
716            index: 9,
717            path: FilePath::FullPath("/home/ubuntu/buf-test/src/main.rs".to_owned()),
718            md5: None
719        }
720    );
721    assert_eq!(
722        file.path.as_full_path(),
723        Path::new("/home/ubuntu/buf-test/src/main.rs")
724    );
725
726    let (rest, file) = File::parse("\t.file\t9 \"/home/ubuntu/buf-test\" \"src/main.rs\"").unwrap();
727    assert!(rest.is_empty());
728    assert_eq!(
729        file,
730        File {
731            index: 9,
732            path: FilePath::PathAndFileName {
733                path: "/home/ubuntu/buf-test".to_owned(),
734                name: "src/main.rs".to_owned()
735            },
736            md5: None,
737        }
738    );
739    assert_eq!(
740        file.path.as_full_path(),
741        Path::new("/home/ubuntu/buf-test/src/main.rs")
742    );
743
744    let (rest, file) = File::parse(
745        "\t.file\t9 \"/home/ubuntu/buf-test\" \"src/main.rs\" 74ab618651b843a815bf806bd6c50c19",
746    )
747    .unwrap();
748    assert!(rest.is_empty());
749    assert_eq!(
750        file,
751        File {
752            index: 9,
753            path: FilePath::PathAndFileName {
754                path: "/home/ubuntu/buf-test".to_owned(),
755                name: "src/main.rs".to_owned()
756            },
757            md5: Some("74ab618651b843a815bf806bd6c50c19"),
758        }
759    );
760    assert_eq!(
761        file.path.as_full_path(),
762        Path::new("/home/ubuntu/buf-test/src/main.rs")
763    );
764
765    let (rest, file) = File::parse(
766        "\t.file\t9 \"/home/\\000path\\twith\\nlots\\\"of\\runprintable\\147characters\\blike\\\\this\\f\" \"src/main.rs\" 74ab618651b843a815bf806bd6c50c19",
767    )
768    .unwrap();
769    assert!(rest.is_empty());
770    assert_eq!(
771        file,
772        File {
773            index: 9,
774            path: FilePath::PathAndFileName {
775                path: "/home/\x00path\twith\nlots\"of\runprintable\x67characters\x08like\\this\x0c"
776                    .to_owned(),
777                name: "src/main.rs".to_owned()
778            },
779            md5: Some("74ab618651b843a815bf806bd6c50c19"),
780        }
781    );
782    assert_eq!(
783        file.path.as_full_path(),
784        Path::new(
785            "/home/\x00path\twith\nlots\"of\runprintable\x67characters\x08like\\this\x0c/src/main.rs"
786        )
787    );
788
789    let (rest, file) = File::parse(
790        "\t.cv_file\t6 \"\\\\?\\\\C:\\\\Foo\\\\Bar\\\\src\\\\main.rs\" \"778FECDE2D48F9B948BA07E6E0B4AB983123B71B\" 2",
791    )
792    .unwrap();
793    assert!(rest.is_empty());
794    assert_eq!(
795        file,
796        File {
797            index: 6,
798            path: FilePath::FullPath("\\\\?\\C:\\Foo\\Bar\\src\\main.rs".to_owned()),
799            md5: None,
800        }
801    );
802
803    let (rest, file) = File::parse(
804        "\t.cv_file\t6 \"C:\\\\Foo\\\\Bar\\\\src\\\\main.rs\" \"778FECDE2D48F9B948BA07E6E0B4AB98\" 1",
805    )
806    .unwrap();
807    assert!(rest.is_empty());
808    assert_eq!(
809        file,
810        File {
811            index: 6,
812            path: FilePath::FullPath("C:\\Foo\\Bar\\src\\main.rs".to_owned()),
813            md5: Some("778FECDE2D48F9B948BA07E6E0B4AB98"),
814        }
815    );
816}
817
818#[test]
819fn parse_function_alias() {
820    assert_eq!(
821        parse_statement("\t.type\ttwo,@function\n").unwrap().1,
822        Statement::Directive(Directive::SymIsFun("two"))
823    );
824
825    assert_eq!(
826        parse_statement(".set\ttwo,\tone_plus_one\n").unwrap().1,
827        Statement::Directive(Directive::SetValue("two", "one_plus_one"))
828    );
829}
830
831#[test]
832fn parse_data_decl() {
833    assert_eq!(
834        parse_statement("  .asciz  \"sample_merged\"\n").unwrap().1,
835        Statement::Directive(Directive::Data("asciz", "\"sample_merged\""))
836    );
837    assert_eq!(
838        parse_statement("          .byte   0\n").unwrap().1,
839        Statement::Directive(Directive::Data("byte", "0"))
840    );
841    assert_eq!(
842        parse_statement("\t.long   .Linfo_st\n").unwrap().1,
843        Statement::Directive(Directive::Data("long", ".Linfo_st"))
844    );
845}
846
847#[derive(Clone, Debug, Eq, PartialEq)]
848pub enum Directive<'a> {
849    File(File<'a>),
850    Loc(Loc<'a>),
851    Global(&'a str),
852    Generic(GenericDirective<'a>),
853    SymIsFun(&'a str),
854    SetValue(&'a str, &'a str),
855    SubsectionsViaSym,
856    SectionStart(&'a str),
857    Data(&'a str, &'a str),
858}
859
860#[derive(Clone, Debug, Eq, PartialEq)]
861pub struct GenericDirective<'a>(pub &'a str);
862
863pub fn parse_statement(input: &str) -> IResult<&str, Statement<'_>> {
864    let label = map(Label::parse, Statement::Label);
865
866    let file = map(File::parse, Directive::File);
867
868    let loc = map(Loc::parse, Directive::Loc);
869
870    let section = map(
871        preceded(tag("\t.section"), take_while1(|c| c != '\n')),
872        |s: &str| Directive::SectionStart(s.trim()),
873    );
874    let generic = map(preceded(tag("\t."), take_while1(|c| c != '\n')), |s| {
875        Directive::Generic(GenericDirective(s))
876    });
877    let set = map(
878        (
879            tag(".set"),
880            space1,
881            take_while1(good_for_label),
882            tag(","),
883            space0,
884            take_while1(|c| c != '\n'),
885        ),
886        |(_, _, name, _, _, val)| Directive::SetValue(name, val),
887    );
888    let ssvs = map(tag(".subsections_via_symbols"), |_| {
889        Directive::SubsectionsViaSym
890    });
891
892    let assignment = map(
893        (
894            take_while1(good_for_label),
895            space0,
896            tag("="),
897            space0,
898            take_while1(|c| c != '\n'),
899        ),
900        |(src, _, _, _, dst)| Statement::Assignment(src, dst),
901    );
902
903    let dunno = map(take_while1(|c| c != '\n'), Statement::Dunno);
904    // let dunno = |input: &str| todo!("{:?}", &input[..100]);
905
906    let instr = map(Instruction::parse, Statement::Instruction);
907    let nothing = map(verify(not_line_ending, str::is_empty), |_| {
908        Statement::Nothing
909    });
910
911    let typ = map(
912        (
913            tag("\t.type"),
914            space1,
915            take_while1(good_for_label),
916            tag(",@function"),
917        ),
918        |(_, _, id, _)| Directive::SymIsFun(id),
919    );
920
921    let global = map(
922        (
923            space0,
924            alt((tag(".globl"), tag(".global"))),
925            space1,
926            take_while1(|c| good_for_label(c) || c == '@'),
927        ),
928        |(_, _, _, name)| Directive::Global(name),
929    );
930    let dir = map(
931        alt((
932            file,
933            global,
934            loc,
935            set,
936            ssvs,
937            section,
938            typ,
939            parse_data_dec,
940            generic,
941        )),
942        Statement::Directive,
943    );
944
945    // use terminated on the subparsers so that if the subparser doesn't consume the whole line, it's discarded
946    // we assume that each label/instruction/directive will only take one line
947    terminated(
948        alt((label, dir, instr, nothing, assignment, dunno)),
949        newline,
950    )
951    .parse(input)
952}
953
954fn good_for_label(c: char) -> bool {
955    c == '.' || c == '$' || c == '_' || c.is_ascii_alphanumeric()
956}
957impl Statement<'_> {
958    /// Is this a label that starts with ".Lfunc_end"?
959    pub(crate) fn is_end_of_fn(&self) -> bool {
960        let check_id = |id: &str| id.strip_prefix('.').unwrap_or(id).starts_with("Lfunc_end");
961        matches!(self, Statement::Label(Label { id, .. }) if check_id(id))
962    }
963
964    /// Is this a .section directive?
965    pub(crate) fn is_section_start(&self) -> bool {
966        matches!(self, Statement::Directive(Directive::SectionStart(_)))
967    }
968
969    /// Is this a .global directive?
970    pub(crate) fn is_global(&self) -> bool {
971        matches!(self, Statement::Directive(Directive::Global(_)))
972    }
973}