mapfile_parser/
parser.rs

1/* SPDX-FileCopyrightText: © 2025 Decompollaborate */
2/* SPDX-License-Identifier: MIT */
3
4use std::{
5    collections::{HashMap, HashSet},
6    ffi::OsStr,
7    path::{Path, PathBuf},
8};
9
10use regex::*;
11
12use crate::{mapfile::MapFile, section, segment, symbol, utils};
13
14lazy_static! {
15    static ref BANNED_SYMBOL_NAMES: HashSet<&'static str> = {
16        let mut symbol_names = HashSet::new();
17        symbol_names.insert("gcc2_compiled.");
18        symbol_names
19    };
20}
21
22// TODO: Change all the deprecated functions to private and undeprecate them in a future version.
23
24impl MapFile {
25    /// Creates a new `MapFile` with the contents from the file pointed by the
26    /// `map_path` argument.
27    ///
28    /// The format of the map will be guessed based on its contents.
29    ///
30    /// Currently supported map formats:
31    /// - GNU ld
32    /// - clang ld.lld
33    /// - Metrowerks ld
34    #[must_use]
35    pub fn new_from_map_file(map_path: &Path) -> Self {
36        let mut m = Self::new_impl();
37        #[allow(deprecated)]
38        m.read_map_file(map_path);
39        m
40    }
41
42    /// Creates a new `MapFile` by parsing the contents of the map.
43    ///
44    /// The format of the map will be guessed based on its contents.
45    ///
46    /// Currently supported map formats:
47    /// - GNU ld
48    /// - clang ld.lld
49    /// - Metrowerks ld
50    #[must_use]
51    pub fn new_from_map_str(map_contents: &str) -> Self {
52        let mut m = Self::new_impl();
53        #[allow(deprecated)]
54        m.parse_map_contents(map_contents);
55        m
56    }
57
58    /// Parses the contents of a GNU ld map.
59    ///
60    /// The `map_contents` argument must contain the contents of a GNU ld mapfile.
61    #[must_use]
62    pub fn new_from_gnu_map_str(map_contents: &str) -> Self {
63        let mut m = Self::new_impl();
64        #[allow(deprecated)]
65        m.parse_map_contents_gnu(map_contents);
66        m
67    }
68
69    /// Parses the contents of a clang ld.lld map.
70    ///
71    /// The `map_contents` argument must contain the contents of a clang ld.lld mapfile.
72    #[must_use]
73    pub fn new_from_lld_map_str(map_contents: &str) -> Self {
74        let mut m = Self::new_impl();
75        #[allow(deprecated)]
76        m.parse_map_contents_lld(map_contents);
77        m
78    }
79
80    /// Parses the contents of a Metrowerks ld (mwld) map.
81    ///
82    /// The `map_contents` argument must contain the contents of a Metrowerks ld mapfile.
83    #[must_use]
84    pub fn new_from_mw_map_str(map_contents: &str) -> Self {
85        let mut m = Self::new_impl();
86        m.parse_map_contents_mw(map_contents);
87        m
88    }
89
90    pub(crate) fn new_impl() -> Self {
91        Self {
92            segments_list: Vec::new(),
93
94            #[cfg(feature = "python_bindings")]
95            debugging: false,
96        }
97    }
98
99    #[deprecated(
100        since = "2.8.0",
101        note = "Use either `new_from_map_file` or `new_from_map_str` instead."
102    )]
103    #[allow(clippy::new_without_default)]
104    pub fn new() -> Self {
105        Self::new_impl()
106    }
107
108    /**
109    Opens the mapfile pointed by the `map_path` argument and parses it.
110
111    The format of the map will be guessed based on its contents.
112
113    Currently supported map formats:
114    - GNU ld
115    - clang ld.lld
116    - Metrowerks ld
117     */
118    #[deprecated(since = "2.8.0", note = "Prefer `MapFile::new_from_map_file` instead")]
119    pub fn read_map_file(&mut self, map_path: &Path) {
120        let map_contents = utils::read_file_contents(map_path);
121
122        #[allow(deprecated)]
123        self.parse_map_contents(&map_contents);
124    }
125
126    /**
127    Parses the contents of the map.
128
129    The `map_contents` argument must contain the contents of a mapfile.
130
131    The format of the map will be guessed based on its contents.
132
133    Currently supported mapfile formats:
134    - GNU ld
135    - clang ld.lld
136    - Metrowerks ld
137    */
138    #[deprecated(since = "2.8.0", note = "Prefer `MapFile::new_from_map_str` instead")]
139    pub fn parse_map_contents(&mut self, map_contents: &str) {
140        let regex_lld_header =
141            Regex::new(r"\s+VMA\s+LMA\s+Size\s+Align\s+Out\s+In\s+Symbol").unwrap();
142
143        if regex_lld_header.is_match(map_contents) {
144            #[allow(deprecated)]
145            self.parse_map_contents_lld(map_contents);
146        } else if map_contents.starts_with("Link map of ")
147            || map_contents.contains(" section layout")
148        {
149            self.parse_map_contents_mw(map_contents);
150        } else {
151            // GNU is the fallback
152            #[allow(deprecated)]
153            self.parse_map_contents_gnu(map_contents);
154        }
155    }
156}
157
158impl MapFile {
159    /**
160    Parses the contents of a GNU ld map.
161
162    The `map_contents` argument must contain the contents of a GNU ld mapfile.
163     */
164    #[deprecated(
165        since = "2.8.0",
166        note = "Prefer `MapFile::new_from_gnu_map_str` instead"
167    )]
168    pub fn parse_map_contents_gnu(&mut self, map_contents: &str) {
169        // TODO: maybe move somewhere else?
170        let regex_section_alone_entry = Regex::new(r"^\s+(?P<section>[^*][^\s]+)\s*$").unwrap();
171        let regex_section_data_entry = Regex::new(r"^\s+(?P<section>([^*][^\s]+)?)\s+(?P<vram>0x[^\s]+)\s+(?P<size>0x[^\s]+)\s+(?P<name>[^\s]+)$").unwrap();
172        let regex_function_entry =
173            Regex::new(r"^\s+(?P<vram>0x[^\s]+)\s+(?P<name>[^\s]+)$").unwrap();
174        // regex_function_entry = re.compile(r"^\s+(?P<vram>0x[^\s]+)\s+(?P<name>[^\s]+)((\s*=\s*(?P<expression>.+))?)$")
175        let regex_label = Regex::new(r"(?P<name>\.?L[0-9A-F]{8})$").unwrap();
176        let regex_fill =
177            Regex::new(r"^\s+(?P<fill>\*[^\s\*]+\*)\s+(?P<vram>0x[^\s]+)\s+(?P<size>0x[^\s]+)\s+(?P<fillValue>[0-9a-zA-Z]*)$")
178                .unwrap();
179        let regex_segment_entry = Regex::new(r"(?P<name>([^\s]+)?)\s+(?P<vram>0x[^\s]+)\s+(?P<size>0x[^\s]+)\s+(?P<loadaddress>(load address)?)\s+(?P<vrom>0x[^\s]+)$").unwrap();
180        let regex_romless_segment_entry =
181            Regex::new(r"(?P<name>([^\s]+)?)\s+(?P<vram>0x[^\s]+)\s+(?P<size>0x[^\s]+)$").unwrap();
182
183        let map_data = MapFile::preprocess_map_data_gnu(map_contents);
184
185        let mut temp_segment_list = vec![segment::Segment::new_placeholder()];
186
187        let mut in_section = false;
188
189        let mut prev_line = "";
190        for line in map_data.lines() {
191            if in_section {
192                if !line.starts_with("        ") {
193                    in_section = false;
194                } else if !regex_label.is_match(line) {
195                    // Filter out jump table's labels
196
197                    // Find symbols
198                    if let Some(entry_match) = regex_function_entry.captures(line) {
199                        // println!("{entry_match:?}");
200                        let sym_name = &entry_match["name"];
201
202                        if !BANNED_SYMBOL_NAMES.contains(&sym_name) {
203                            let sym_vram = utils::parse_hex(&entry_match["vram"]);
204
205                            let current_segment = temp_segment_list.last_mut().unwrap();
206                            let current_section = current_segment.sections_list.last_mut().unwrap();
207
208                            current_section
209                                .symbols
210                                .push(symbol::Symbol::new_default(sym_name.into(), sym_vram));
211                        }
212                    }
213                }
214            }
215
216            if !in_section {
217                if let Some(section_entry_match) = regex_section_data_entry.captures(line) {
218                    let filepath = PathBuf::from(&section_entry_match["name"]);
219                    let vram = utils::parse_hex(&section_entry_match["vram"]);
220                    let size = utils::parse_hex(&section_entry_match["size"]);
221                    let section_type = &section_entry_match["section"];
222
223                    if size > 0 {
224                        // TODO: de-duplicate the following code:
225
226                        if !section_type.is_empty() {
227                            in_section = true;
228                            let current_segment = temp_segment_list.last_mut().unwrap();
229
230                            current_segment
231                                .sections_list
232                                .push(section::Section::new_default(
233                                    filepath,
234                                    vram,
235                                    size,
236                                    section_type.into(),
237                                ));
238                        } else if let Some(section_alone_match) =
239                            regex_section_alone_entry.captures(prev_line)
240                        {
241                            // Some sections may be too large, making the entry be splitted between two lines, making the section name be in one line and the rest of the info in the next one
242
243                            let section_type = &section_alone_match["section"];
244
245                            in_section = true;
246                            let current_segment = temp_segment_list.last_mut().unwrap();
247
248                            current_segment
249                                .sections_list
250                                .push(section::Section::new_default(
251                                    filepath,
252                                    vram,
253                                    size,
254                                    section_type.into(),
255                                ));
256                        }
257                    }
258                } else if let Some(segment_entry_match) = regex_segment_entry.captures(line) {
259                    let mut name = &segment_entry_match["name"];
260                    let vram = utils::parse_hex(&segment_entry_match["vram"]);
261                    let size = utils::parse_hex(&segment_entry_match["size"]);
262                    let vrom = Some(utils::parse_hex(&segment_entry_match["vrom"]));
263
264                    if name.is_empty() {
265                        // If the segment name is too long then this line gets break in two lines
266                        name = prev_line;
267                    }
268
269                    temp_segment_list.push(segment::Segment::new_default(
270                        name.into(),
271                        vram,
272                        size,
273                        vrom,
274                    ));
275                } else if let Some(segment_entry_match) = regex_romless_segment_entry.captures(line)
276                {
277                    // Some segments do not have a rom address
278                    let mut name = &segment_entry_match["name"];
279                    let vram = utils::parse_hex(&segment_entry_match["vram"]);
280                    let size = utils::parse_hex(&segment_entry_match["size"]);
281                    let vrom = None;
282
283                    if name.is_empty() {
284                        // If the segment name is too long then this line gets break in two lines
285                        name = prev_line;
286                    }
287
288                    temp_segment_list.push(segment::Segment::new_default(
289                        name.into(),
290                        vram,
291                        size,
292                        vrom,
293                    ));
294                } else if let Some(fill_match) = regex_fill.captures(line) {
295                    // Make a dummy file to handle *fill*
296                    let mut filepath = PathBuf::new();
297                    let mut vram = 0;
298                    let size = utils::parse_hex(&fill_match["size"]);
299                    let mut section_type = "".to_owned();
300
301                    let current_segment = temp_segment_list.last_mut().unwrap();
302
303                    if !current_segment.sections_list.is_empty() {
304                        let prev_section = current_segment.sections_list.last().unwrap();
305                        let mut name = prev_section
306                            .filepath
307                            .file_name()
308                            .unwrap_or_else(|| OsStr::new(""))
309                            .to_owned();
310
311                        name.push("__fill__");
312                        filepath = prev_section.filepath.with_file_name(name);
313                        vram = prev_section.vram + prev_section.size;
314                        section_type.clone_from(&prev_section.section_type);
315                    }
316
317                    current_segment
318                        .sections_list
319                        .push(section::Section::new_fill(
320                            filepath,
321                            vram,
322                            size,
323                            section_type,
324                        ));
325                }
326            }
327
328            prev_line = line;
329        }
330
331        self.segments_list = Self::post_process_segments_gnu(temp_segment_list);
332    }
333
334    fn post_process_segments_gnu(
335        temp_segment_list: Vec<segment::Segment>,
336    ) -> Vec<segment::Segment> {
337        let mut segments_list = Vec::with_capacity(temp_segment_list.len());
338
339        // We need to keep a calculated rom in case the segment doesn't specify it explicitly
340        let mut current_calculated_section_rom = 0;
341
342        for (i, segment) in temp_segment_list.into_iter().enumerate() {
343            if i == 0 && segment.is_placeholder() {
344                // skip the dummy segment if it has no size, sections or symbols
345                continue;
346            }
347            if segment.size == 0 && segment.sections_list.is_empty() {
348                // Drop empty segments
349                continue;
350            }
351
352            let mut new_segment = segment.clone_no_sectionlist();
353
354            let mut vrom_offset = if let Some(vrom) = segment.vrom {
355                current_calculated_section_rom = vrom;
356                vrom
357            } else {
358                new_segment.vrom = Some(current_calculated_section_rom);
359                current_calculated_section_rom
360            };
361            for mut section in segment.sections_list.into_iter() {
362                if section.is_placeholder() {
363                    // drop placeholders
364                    continue;
365                }
366
367                // The size of the section
368                let mut acummulated_size = 0;
369                let is_noload_section = section.is_noload_section();
370
371                if let Some(vrom) = section.vrom {
372                    vrom_offset = vrom;
373                }
374
375                if !is_noload_section {
376                    section.vrom = Some(vrom_offset);
377                }
378
379                // Look up for static symbols.
380                // gnu ld does not provide symbol sizes, so the only static
381                // symbol we may infer are the ones that are at the beginning
382                // of the section.
383                if let Some(first_sym) = section.symbols.first() {
384                    if first_sym.vram != section.vram {
385                        // There's at least one static symbol at the beginning of the section.
386                        let sym_name = generate_static_sym_name_for_address(
387                            section.vram,
388                            &section.filepath,
389                            &section.section_type,
390                        );
391                        let sym = symbol::Symbol::new_static(
392                            sym_name,
393                            section.vram,
394                            first_sym.vram - section.vram,
395                            section.vrom,
396                        );
397                        section.symbols.insert(0, sym);
398                    }
399                } else if section.size > 0 && !section.is_fill {
400                    // There's at least one static symbol.
401                    let sym_name = generate_static_sym_name_for_address(
402                        section.vram,
403                        &section.filepath,
404                        &section.section_type,
405                    );
406                    let sym = symbol::Symbol::new_static(
407                        sym_name,
408                        section.vram,
409                        section.size,
410                        section.vrom,
411                    );
412                    section.symbols.insert(0, sym);
413                }
414
415                let symbols_count = section.symbols.len();
416                if symbols_count > 0 {
417                    let mut sym_vrom = vrom_offset;
418
419                    // Calculate size of each symbol
420                    for index in 0..symbols_count - 1 {
421                        let next_sym_vram = section.symbols[index + 1].vram;
422                        let sym = &mut section.symbols[index];
423                        let sym_size = next_sym_vram - sym.vram;
424                        acummulated_size += sym_size;
425
426                        sym.size = sym_size;
427
428                        if !is_noload_section {
429                            // Only set vrom of non bss variables
430                            sym.vrom = Some(sym_vrom);
431                            sym_vrom += sym_size;
432                        }
433                    }
434
435                    // Calculate size of last symbol of the section
436                    let sym = &mut section.symbols[symbols_count - 1];
437                    let sym_size = section.size - acummulated_size;
438                    sym.size = sym_size;
439                    if !is_noload_section {
440                        sym.vrom = Some(sym_vrom);
441                        //sym_vrom += sym_size;
442                    }
443
444                    Self::fixup_non_matching_symbols_for_section(&mut section);
445                }
446
447                if !is_noload_section {
448                    vrom_offset += section.size;
449                    current_calculated_section_rom += section.size;
450                }
451
452                new_segment.sections_list.push(section);
453            }
454
455            segments_list.push(new_segment);
456        }
457
458        segments_list.shrink_to_fit();
459        segments_list
460    }
461
462    /**
463    Parses the contents of a clang ld.lld map.
464
465    The `map_contents` argument must contain the contents of a clang ld.lld mapfile.
466     */
467    #[deprecated(
468        since = "2.8.0",
469        note = "Prefer `MapFile::new_from_lld_map_str` instead"
470    )]
471    pub fn parse_map_contents_lld(&mut self, map_contents: &str) {
472        let map_data = map_contents;
473
474        // Every line starts with this information, so instead of duplicating it we put them on one single regex
475        let regex_row_entry = Regex::new(r"^\s*(?P<vram>[0-9a-fA-F]+)\s+(?P<vrom>[0-9a-fA-F]+)\s+(?P<size>[0-9a-fA-F]+)\s+(?P<align>[0-9a-fA-F]+) ").unwrap();
476
477        let regex_segment_entry = Regex::new(r"^(?P<name>[^\s]+)$").unwrap();
478        let regex_fill = Regex::new(r"^\s+(?P<expr>\.\s*\+=\s*.+)$").unwrap();
479        let regex_section_data_entry =
480            Regex::new(r"^\s+(?P<name>[^\s]+):\((?P<section>[^\s()]+)\)$$").unwrap();
481        let regex_label = Regex::new(r"^\s+(?P<name>\.?L[0-9A-F]{8})$").unwrap();
482        let regex_symbol_entry = Regex::new(r"^\s+(?P<name>[^\s]+)$").unwrap();
483
484        let mut temp_segment_list = vec![segment::Segment::new_placeholder()];
485
486        for line in map_data.lines() {
487            if let Some(row_entry_match) = regex_row_entry.captures(line) {
488                let vram = utils::parse_hex(&row_entry_match["vram"]);
489                let vrom = Some(utils::parse_hex(&row_entry_match["vrom"]));
490                let size = utils::parse_hex(&row_entry_match["size"]);
491                let align = utils::parse_hex(&row_entry_match["align"]);
492
493                let subline = &line[row_entry_match.get(0).unwrap().len()..];
494
495                if let Some(segment_entry_match) = regex_segment_entry.captures(subline) {
496                    let name = &segment_entry_match["name"];
497
498                    let mut new_segment =
499                        segment::Segment::new_default(name.into(), vram, size, vrom);
500                    new_segment.align = Some(align);
501
502                    temp_segment_list.push(new_segment);
503                } else if regex_fill.is_match(subline) {
504                    // Make a dummy section to handle pads (. += XX)
505
506                    let mut filepath = PathBuf::new();
507                    let mut section_type = "".to_owned();
508
509                    let current_segment = temp_segment_list.last_mut().unwrap();
510
511                    if !current_segment.sections_list.is_empty() {
512                        let prev_section = current_segment.sections_list.last().unwrap();
513                        let mut name = prev_section.filepath.file_name().unwrap().to_owned();
514
515                        name.push("__fill__");
516                        filepath = prev_section.filepath.with_file_name(name);
517                        section_type.clone_from(&prev_section.section_type);
518                    }
519
520                    let mut new_section =
521                        section::Section::new_fill(filepath, vram, size, section_type);
522                    if !utils::is_noload_section(&new_section.section_type) {
523                        new_section.vrom = vrom;
524                    }
525                    current_segment.sections_list.push(new_section);
526                } else if let Some(section_entry_match) = regex_section_data_entry.captures(subline)
527                {
528                    let filepath = PathBuf::from(&section_entry_match["name"]);
529                    let section_type = &section_entry_match["section"];
530
531                    if size > 0 {
532                        let current_segment = temp_segment_list.last_mut().unwrap();
533
534                        let mut new_section = section::Section::new_default(
535                            filepath,
536                            vram,
537                            size,
538                            section_type.into(),
539                        );
540                        if !utils::is_noload_section(&new_section.section_type) {
541                            new_section.vrom = vrom;
542                        }
543                        new_section.align = Some(align);
544
545                        current_segment.sections_list.push(new_section);
546                    }
547                } else if regex_label.is_match(subline) {
548                    // pass
549                } else if let Some(symbol_entry_match) = regex_symbol_entry.captures(subline) {
550                    let name = &symbol_entry_match["name"];
551
552                    if !BANNED_SYMBOL_NAMES.contains(&name) {
553                        let current_segment = temp_segment_list.last_mut().unwrap();
554                        let current_section = current_segment.sections_list.last_mut().unwrap();
555
556                        let mut new_symbol = symbol::Symbol::new_default(name.into(), vram);
557                        if size > 0 {
558                            new_symbol.size = size;
559                        }
560                        if !current_section.is_noload_section() {
561                            new_symbol.vrom = vrom
562                        }
563                        new_symbol.align = Some(align);
564
565                        current_section.symbols.push(new_symbol);
566                    }
567                }
568            }
569        }
570
571        self.segments_list = Self::post_process_segments_lld(temp_segment_list);
572    }
573
574    fn post_process_segments_lld(
575        temp_segment_list: Vec<segment::Segment>,
576    ) -> Vec<segment::Segment> {
577        let mut segments_list = Vec::with_capacity(temp_segment_list.len());
578
579        for (i, segment) in temp_segment_list.into_iter().enumerate() {
580            if i == 0 && segment.is_placeholder() {
581                // skip the dummy segment if it has no size, sections or symbols
582                continue;
583            }
584
585            let mut new_segment = segment.clone_no_sectionlist();
586
587            for mut section in segment.sections_list.into_iter() {
588                if section.is_placeholder() {
589                    // drop placeholders
590                    continue;
591                }
592
593                let mut acummulated_size = 0;
594                let symbols_count = section.symbols.len();
595                let mut has_statics = false;
596
597                if let Some(first_sym) = section.symbols.first() {
598                    if first_sym.vram != section.vram {
599                        has_statics = true;
600                    }
601                }
602
603                if symbols_count > 0 {
604                    // Calculate the size of symbols that the map section did not report.
605                    // usually asm symbols and not C ones
606
607                    for index in 0..symbols_count - 1 {
608                        let next_sym_vram = section.symbols[index + 1].vram;
609                        let sym = &mut section.symbols[index];
610
611                        let sym_size = next_sym_vram - sym.vram;
612                        acummulated_size += sym_size;
613
614                        if sym.size == 0 {
615                            sym.size = sym_size;
616                        } else if sym.size != sym_size {
617                            has_statics = true;
618                        }
619                    }
620
621                    // Calculate size of last symbol of the section
622                    let sym = &mut section.symbols[symbols_count - 1];
623                    let sym_size = section.size - acummulated_size;
624                    if sym.size == 0 {
625                        sym.size = sym_size;
626                    } else if sym.size != sym_size {
627                        has_statics = true;
628                    }
629
630                    Self::fixup_non_matching_symbols_for_section(&mut section);
631                } else if section.size > 0 {
632                    has_statics = true;
633                }
634
635                if has_statics {
636                    section = fill_static_symbols(section);
637                }
638
639                new_segment.sections_list.push(section);
640            }
641
642            segments_list.push(new_segment);
643        }
644
645        segments_list.shrink_to_fit();
646        segments_list
647    }
648
649    fn parse_map_contents_mw(&mut self, map_contents: &str) {
650        let map_data = preprocess_map_data_mw(map_contents);
651
652        let memory_map = parse_memory_map_mw(map_data);
653        let regex_entries = MwRegexEntries::new(map_data);
654
655        let mut temp_segment_list = vec![segment::Segment::new_placeholder()];
656
657        // Use a bunch of characters that shouldn't be valid in any os as a marker that we haven't found a file yet.
658        let invalid_file_name = "invalid file <>:\"/\\|?*";
659        let mut current_filename = invalid_file_name.to_string();
660
661        for line in map_data.lines() {
662            // Check for regex_entries.common_row first since it is more likely to match
663            if let (Some(row_entry_match), false) = (
664                regex_entries.common_row.captures(line),
665                temp_segment_list.is_empty(),
666            ) {
667                let starting = utils::parse_hex(&row_entry_match["starting"]);
668                let size = utils::parse_hex(&row_entry_match["size"]);
669                let vram = utils::parse_hex(&row_entry_match["vram"]);
670                let align = utils::parse_hex(&row_entry_match["align"]);
671
672                let rom = row_entry_match.name("rom").map_or_else(
673                    || {
674                        temp_segment_list
675                            .last()
676                            .unwrap()
677                            .vrom
678                            .map(|segment_rom| segment_rom + starting)
679                    },
680                    |x| Some(utils::parse_hex(x.as_str())),
681                );
682
683                let subline = &row_entry_match["subline"];
684
685                if regex_entries.label.is_match(subline) {
686                    // pass
687                } else if let Some(symbol_entry_match) = regex_entries.symbol.captures(subline) {
688                    let filename = &symbol_entry_match["filename"];
689
690                    if filename == current_filename {
691                        // We are still in the same file
692                        let symbol = &symbol_entry_match["name"];
693
694                        if !BANNED_SYMBOL_NAMES.contains(&symbol) {
695                            let current_segment = temp_segment_list.last_mut().unwrap();
696                            let current_section = current_segment.sections_list.last_mut().unwrap();
697
698                            let mut new_symbol =
699                                symbol::Symbol::new_default(symbol.to_string(), vram);
700                            if size > 0 {
701                                new_symbol.size = size;
702                            }
703                            if !current_section.is_noload_section() {
704                                new_symbol.vrom = rom
705                            }
706                            if align > 0 {
707                                new_symbol.align = Some(align);
708                            }
709
710                            current_section.symbols.push(new_symbol);
711                        }
712                    } else {
713                        // New file!
714                        if size > 0 {
715                            let section_type = &symbol_entry_match["name"];
716                            let filepath = PathBuf::from(filename);
717
718                            let current_segment = temp_segment_list.last_mut().unwrap();
719
720                            let mut new_section = section::Section::new_default(
721                                filepath,
722                                vram,
723                                size,
724                                section_type.into(),
725                            );
726                            if !utils::is_noload_section(&new_section.section_type) {
727                                new_section.vrom = rom
728                            }
729
730                            current_segment.sections_list.push(new_section);
731
732                            // I'm not sure how to treat these cases.
733                            // I guess we can treat them as files without symbols for now...
734                            current_filename = if filename == "Linker Generated Symbol File" {
735                                invalid_file_name.to_string()
736                            } else {
737                                filename.to_string()
738                            };
739                        }
740                    }
741                } else if regex_entries.fill.is_match(subline) {
742                    // Make a dummy section to handle pads
743                    let current_segment = temp_segment_list.last_mut().unwrap();
744
745                    let mut filepath = PathBuf::new();
746                    let mut section_type = "".to_owned();
747
748                    if let Some(prev_section) = current_segment.sections_list.last() {
749                        let mut name = prev_section.filepath.file_name().unwrap().to_owned();
750
751                        name.push("__fill__");
752                        filepath = prev_section.filepath.with_file_name(name);
753                        section_type.clone_from(&prev_section.section_type);
754                    }
755
756                    let mut new_section =
757                        section::Section::new_fill(filepath, vram, size, section_type);
758                    new_section.align = Some(align);
759                    if !utils::is_noload_section(&new_section.section_type) {
760                        new_section.vrom = rom;
761                    }
762                    current_segment.sections_list.push(new_section);
763
764                    // Don't count this as a valid file.
765                    current_filename = invalid_file_name.to_string();
766                } else {
767                    // println!("'{}'", subline);
768                }
769            } else if let Some(segment_entry_match) = regex_entries.segment.captures(line) {
770                let name = &segment_entry_match["name"];
771
772                let new_segment = if let Some(segment_entry) = memory_map.get(name) {
773                    let vram = segment_entry.starting_address;
774                    let size = segment_entry.size;
775                    let vrom = Some(segment_entry.file_offset);
776                    segment::Segment::new_default(name.to_string(), vram, size, vrom)
777                } else {
778                    let mut temp = segment::Segment::new_placeholder();
779                    temp.name = name.to_string();
780                    temp
781                };
782
783                temp_segment_list.push(new_segment);
784
785                // Reset the tracked filename state.
786                // This avoid carrying the filename from one segment to the other
787                current_filename = invalid_file_name.to_string();
788            }
789        }
790
791        self.segments_list = Self::post_process_segments_mw(temp_segment_list);
792    }
793
794    fn post_process_segments_mw(temp_segment_list: Vec<segment::Segment>) -> Vec<segment::Segment> {
795        let mut segments_list = Vec::with_capacity(temp_segment_list.len());
796
797        for (i, segment) in temp_segment_list.into_iter().enumerate() {
798            if i == 0 && (segment.sections_list.is_empty() || segment.is_placeholder()) {
799                // skip the dummy segment if it has no size, sections or symbols
800                continue;
801            }
802
803            let mut new_segment = segment.clone_no_sectionlist();
804
805            for mut section in segment.sections_list.into_iter() {
806                if section.is_placeholder() {
807                    // drop placeholders
808                    continue;
809                }
810
811                let symbols_count = section.symbols.len();
812                if symbols_count > 0 {
813                    Self::fixup_non_matching_symbols_for_section(&mut section);
814                }
815
816                let section = fill_static_symbols(section);
817                new_segment.sections_list.push(section);
818            }
819
820            segments_list.push(new_segment);
821        }
822
823        segments_list.shrink_to_fit();
824        segments_list
825    }
826}
827
828fn preprocess_map_data_mw(map_data: &str) -> &str {
829    // Skip the stuff we don't care about.
830    if let Some(aux_var) = map_data.find(" section layout") {
831        // We want to preserve the name of the first layout, so we need to
832        // backtrack a bit to find the start of the line
833        if let Some(start_index) = map_data[..=aux_var].rfind("\n") {
834            return &map_data[start_index + 1..];
835        }
836    }
837
838    map_data
839}
840
841struct MwMemoryMapEntry {
842    starting_address: u64,
843    size: u64,
844    file_offset: u64,
845}
846
847fn parse_memory_map_mw(map_data: &str) -> HashMap<String, MwMemoryMapEntry> {
848    let map_data = {
849        if let Some(start_index) = map_data.find("Memory map:") {
850            if let Some(end_index) = map_data[start_index..].find("Linker generated symbols:") {
851                &map_data[start_index..start_index + end_index]
852            } else {
853                &map_data[start_index..]
854            }
855        } else {
856            map_data
857        }
858    };
859
860    let mut memory_map = HashMap::new();
861    let entry = Regex::new(r"^\s*(?P<name>[^ ]+)\s+(?P<address>[0-9a-fA-F]+)\s+(?P<size>[0-9a-fA-F]+)\s+(?P<offset>[0-9a-fA-F]+)$").unwrap();
862
863    for line in map_data.lines() {
864        if let Some(entry_match) = entry.captures(line) {
865            let name = &entry_match["name"];
866            let starting_address = utils::parse_hex(&entry_match["address"]);
867            let size = utils::parse_hex(&entry_match["size"]);
868            let file_offset = utils::parse_hex(&entry_match["offset"]);
869
870            memory_map.insert(
871                name.to_string(),
872                MwMemoryMapEntry {
873                    starting_address,
874                    size,
875                    file_offset,
876                },
877            );
878        }
879    }
880
881    memory_map
882}
883
884struct MwRegexEntries {
885    common_row: Regex,
886    segment: Regex,
887    label: Regex,
888    symbol: Regex,
889    fill: Regex,
890}
891
892impl MwRegexEntries {
893    fn new(map_data: &str) -> Self {
894        // Almost every line starts with this information, so instead of duplicating it we put them on one single regex
895        let common_row = if map_data.contains("address  Size   address  offset") {
896            // mwld 2.7+
897            Regex::new(r"^\s*(?P<starting>[0-9a-fA-F]+)\s+(?P<size>[0-9a-fA-F]+)\s+(?P<vram>[0-9a-fA-F]+)\s+(?P<rom>[0-9a-fA-F]+)\s+(?P<align>[0-9a-fA-F]+)\s+(?P<subline>.+)").unwrap()
898        } else {
899            // mwld 1.3.2-
900            Regex::new(r"^\s*(?P<starting>[0-9a-fA-F]+)\s+(?P<size>[0-9a-fA-F]+)\s+(?P<vram>[0-9a-fA-F]+)\s+(?P<align>[0-9a-fA-F]+)\s+(?P<subline>.+)").unwrap()
901        };
902
903        let segment = Regex::new(r"^(?P<name>.+) section layout$").unwrap();
904        let label = Regex::new(r"^(?P<label>lbl_[0-9A-F]{8})\s+(?P<filename>.+?)\s*$").unwrap();
905        let symbol = Regex::new(r"^\s*(?P<name>[^ ]+)\s+(?P<filename>.+?)\s*$").unwrap();
906        let fill = Regex::new(r"^\s*\*fill\*\s*$").unwrap();
907
908        Self {
909            common_row,
910            segment,
911            label,
912            symbol,
913            fill,
914        }
915    }
916}
917
918impl MapFile {
919    fn preprocess_map_data_gnu(map_data: &str) -> &str {
920        // Skip the stuff we don't care about
921        // Looking for this string will only work on English machines (or C locales)
922        // but it doesn't matter much, because if this string is not found then the
923        // parsing should still work, but just a bit slower because of the extra crap
924        if let Some(aux_var) = map_data.find("\nLinker script and memory map") {
925            if let Some(start_index) = map_data[aux_var + 1..].find('\n') {
926                return &map_data[aux_var + 1 + start_index + 1..];
927            }
928        }
929
930        map_data
931    }
932
933    fn fixup_non_matching_symbols_for_section(section: &mut section::Section) {
934        // Fixup `.NON_MATCHING` symbols.
935        // These kind of symbols have the same address as their
936        // real counterpart, but their order is not guaranteed,
937        // meaning we may have set the symbol size's to the
938        // non_matching placeholder instead of the actual symbol.
939        let mut nonmatchings_syms_original = Vec::new();
940        let mut nonmatchings_syms_suffix = Vec::new();
941        for (index, sym) in section.symbols.iter().enumerate() {
942            if sym.name.ends_with(".NON_MATCHING") {
943                let real_name = sym.name.replace(".NON_MATCHING", "");
944
945                if let Some((real_sym, real_index)) =
946                    section.find_symbol_and_index_by_name(&real_name)
947                {
948                    // One of the sizes should be zero, while the
949                    // other non-zero, so we take the largest.
950                    nonmatchings_syms_original.push((real_index, sym.size.max(real_sym.size)));
951                    nonmatchings_syms_suffix.push(index);
952                }
953            }
954        }
955        for (index, new_size) in nonmatchings_syms_original {
956            if let Some(sym) = section.symbols.get_mut(index) {
957                sym.size = new_size;
958                sym.nonmatching_sym_exists = true;
959            }
960        }
961        for index in nonmatchings_syms_suffix {
962            if let Some(sym) = section.symbols.get_mut(index) {
963                sym.size = 0;
964            }
965        }
966    }
967}
968
969fn fill_static_symbols(mut section: section::Section) -> section::Section {
970    let mut new_symbols = Vec::with_capacity(section.symbols.len() * 2);
971
972    let mut current_vram = section.vram;
973    for sym in section.symbols {
974        if sym.vram > current_vram && sym.size > 0 {
975            let static_size = sym.vram - current_vram;
976            if static_size >= 0x4 {
977                // sizes smaller than 0x4 are usually just padding between symbols
978                let vrom = section.vrom.map(|x| x + static_size);
979                let sym_name = generate_static_sym_name_for_address(
980                    current_vram,
981                    &section.filepath,
982                    &section.section_type,
983                );
984                let sym = symbol::Symbol::new_static(sym_name, current_vram, static_size, vrom);
985                new_symbols.push(sym);
986            }
987        }
988        if sym.size > 0 {
989            current_vram = sym.vram + sym.size;
990        }
991        new_symbols.push(sym);
992    }
993    if current_vram < section.vram + section.size {
994        let static_size = section.vram + section.size - current_vram;
995        if static_size >= 0x10 {
996            // sizes smaller than 0x10 bytes are usually just section padding
997            let vrom = section.vrom.map(|x| x + static_size);
998            let sym_name = generate_static_sym_name_for_address(
999                current_vram,
1000                &section.filepath,
1001                &section.section_type,
1002            );
1003            let sym = symbol::Symbol::new_static(sym_name, current_vram, static_size, vrom);
1004            new_symbols.push(sym);
1005        }
1006    }
1007
1008    new_symbols.shrink_to_fit();
1009    section.symbols = new_symbols;
1010
1011    section
1012}
1013
1014fn generate_static_sym_name_for_address(
1015    static_vram: u64,
1016    filepath: &Path,
1017    section_type: &str,
1018) -> String {
1019    // We should not change the `$_static_symbol_` prefix because other
1020    // tools may rely on this naming scheme.
1021    // The rest of the name _may_ be fair game, but we should try to avoid
1022    // changing it without a good reason.
1023    format!(
1024        "$_static_symbol_{:08X}_{}_{}",
1025        static_vram,
1026        filepath.display(),
1027        section_type,
1028    )
1029}