symbolic_il2cpp/line_mapping/
from_object.rs

1use std::collections::BTreeMap;
2use std::io::Write;
3use std::iter::Enumerate;
4use std::str::Lines;
5
6use symbolic_common::{ByteView, DebugId};
7use symbolic_debuginfo::{DebugSession, ObjectLike};
8
9/// A line mapping extracted from an object.
10///
11/// This is only intended as an intermediate structure for serialization,
12/// not for lookups.
13pub struct ObjectLineMapping {
14    mapping: BTreeMap<String, BTreeMap<String, BTreeMap<u32, u32>>>,
15    debug_id: DebugId,
16}
17
18impl ObjectLineMapping {
19    /// Create a line mapping from the given `object`.
20    ///
21    /// The mapping is constructed by iterating over all the source files referenced by `object` and
22    /// parsing Il2cpp `source_info` records from each.
23    pub fn from_object<'data, 'object, O, E>(object: &'object O) -> Result<Self, E>
24    where
25        O: ObjectLike<'data, 'object, Error = E>,
26    {
27        let session = object.debug_session()?;
28        let debug_id = object.debug_id();
29
30        let mut mapping = BTreeMap::new();
31
32        for cpp_file in session.files() {
33            let cpp_file_path = cpp_file?.abs_path_str();
34            if mapping.contains_key(&cpp_file_path) {
35                continue;
36            }
37
38            if let Ok(cpp_source) = ByteView::open(&cpp_file_path) {
39                let cpp_mapping = Self::parse_source_file(&cpp_source);
40                if !cpp_mapping.is_empty() {
41                    mapping.insert(cpp_file_path, cpp_mapping);
42                }
43            }
44        }
45
46        Ok(Self { mapping, debug_id })
47    }
48
49    /// Create a line mapping from the source file.
50    ///
51    /// The mapping is constructed by parsing Il2cpp `source_info` records in the given source file.
52    pub(crate) fn parse_source_file(cpp_source: &[u8]) -> BTreeMap<String, BTreeMap<u32, u32>> {
53        let mut cpp_mapping = BTreeMap::new();
54
55        for SourceInfo {
56            cpp_line,
57            cs_file,
58            cs_line,
59        } in SourceInfos::new(cpp_source)
60        {
61            let cs_mapping = cpp_mapping
62                .entry(cs_file.to_string())
63                .or_insert_with(BTreeMap::new);
64            cs_mapping.insert(cpp_line, cs_line);
65        }
66
67        cpp_mapping
68    }
69
70    /// Serializes the line mapping to the given writer as JSON.
71    ///
72    /// The mapping is serialized in the form of nested objects:
73    /// C++ file => C# file => C++ line => C# line
74    ///
75    /// Returns `false` if the resulting JSON did not contain any mappings.
76    pub fn to_writer<W: Write>(mut self, writer: &mut W) -> std::io::Result<bool> {
77        let is_empty = self.mapping.is_empty();
78
79        // This is a big hack: We need the files for different architectures to be different.
80        // To achieve this, we put the debug-id of the file (which is different between architectures)
81        // into the same structure as the normal map, like so:
82        // `"__debug-id__": {"00000000-0000-0000-0000-000000000000": {}}`
83        // When parsing via `LineMapping::parse`, this *looks like* a valid entry, but we will
84        // most likely never have a C++ file named `__debug-id__` ;-)
85        let value = BTreeMap::from([(self.debug_id.to_string(), Default::default())]);
86        self.mapping.insert("__debug-id__".to_owned(), value);
87
88        serde_json::to_writer(writer, &self.mapping)?;
89        Ok(!is_empty)
90    }
91}
92
93/// An Il2cpp `source_info` record.
94#[derive(Debug, PartialEq, Eq)]
95pub(crate) struct SourceInfo<'data> {
96    /// The C++ source line the `source_info` was parsed from.
97    pub(crate) cpp_line: u32,
98    /// The corresponding C# source file.
99    cs_file: &'data str,
100    /// The corresponding C# source line.
101    pub(crate) cs_line: u32,
102}
103
104/// An iterator over Il2cpp `source_info` markers.
105///
106/// The Iterator yields `SourceInfo`s.
107pub(crate) struct SourceInfos<'data> {
108    lines: Enumerate<Lines<'data>>,
109    current: Option<(&'data str, u32)>,
110}
111
112impl<'data> SourceInfos<'data> {
113    /// Parses the `source` leniently, yielding an empty Iterator for non-utf8 data.
114    pub(crate) fn new(source: &'data [u8]) -> Self {
115        let lines = std::str::from_utf8(source)
116            .ok()
117            .unwrap_or_default()
118            .lines()
119            .enumerate();
120        Self {
121            lines,
122            current: None,
123        }
124    }
125}
126
127impl<'data> Iterator for SourceInfos<'data> {
128    type Item = SourceInfo<'data>;
129
130    fn next(&mut self) -> Option<Self::Item> {
131        for (cpp_line_nr, cpp_src_line) in &mut self.lines {
132            match parse_line(cpp_src_line) {
133                // A new source info record. Emit the previously found one, if there is one.
134                Some((cs_file, cs_line)) => {
135                    if let Some((cs_file, cs_line)) = self.current.replace((cs_file, cs_line)) {
136                        return Some(SourceInfo {
137                            cpp_line: cpp_line_nr as u32,
138                            cs_file,
139                            cs_line,
140                        });
141                    }
142                }
143
144                // A comment. Just continue.
145                None if cpp_src_line.trim_start().starts_with("//") => continue,
146                // A source line. Emit the previously found source info record, if there is one.
147                None => {
148                    if let Some((cs_file, cs_line)) = self.current.take() {
149                        return Some(SourceInfo {
150                            cpp_line: (cpp_line_nr + 1) as u32,
151                            cs_file,
152                            cs_line,
153                        });
154                    }
155                }
156            }
157        }
158        None
159    }
160}
161
162/// Extracts the `(file, line)` information
163///
164/// For example, `//<source_info:main.cs:17>`
165/// would be parsed as `("main.cs", 17)`.
166fn parse_line(line: &str) -> Option<(&str, u32)> {
167    let line = line.trim();
168    let source_ref = line.strip_prefix("//<source_info:")?;
169    let source_ref = source_ref.strip_suffix('>')?;
170    let (file, line) = source_ref.rsplit_once(':')?;
171    let line = line.parse().ok()?;
172    Some((file, line))
173}
174
175#[cfg(test)]
176mod tests {
177    use super::{SourceInfo, SourceInfos};
178
179    #[test]
180    fn one_mapping() {
181        let cpp_source = b"
182            Lorem ipsum dolor sit amet
183            //<source_info:main.cs:17>
184            // some
185            // more
186            // comments
187            actual source code";
188
189        let source_infos: Vec<_> = SourceInfos::new(cpp_source).collect();
190
191        assert_eq!(
192            source_infos,
193            vec![SourceInfo {
194                cpp_line: 7,
195                cs_file: "main.cs",
196                cs_line: 17,
197            }]
198        )
199    }
200
201    #[test]
202    fn several_mappings() {
203        let cpp_source = b"
204            Lorem ipsum dolor sit amet
205            //<source_info:main.cs:17>
206            // some
207            // comments
208            actual source code 1
209            actual source code 2
210
211            //<source_info:main.cs:29>
212            actual source code 3
213
214            //<source_info:main.cs:46>
215            // more
216            // comments
217            actual source code 4";
218
219        let source_infos: Vec<_> = SourceInfos::new(cpp_source).collect();
220
221        assert_eq!(
222            source_infos,
223            vec![
224                SourceInfo {
225                    cpp_line: 6,
226                    cs_file: "main.cs",
227                    cs_line: 17,
228                },
229                SourceInfo {
230                    cpp_line: 10,
231                    cs_file: "main.cs",
232                    cs_line: 29,
233                },
234                SourceInfo {
235                    cpp_line: 15,
236                    cs_file: "main.cs",
237                    cs_line: 46,
238                }
239            ]
240        )
241    }
242
243    #[test]
244    fn missing_source_line() {
245        let cpp_source = b"
246            Lorem ipsum dolor sit amet
247            //<source_info:main.cs:17>
248            // some
249            // comments
250            //<source_info:main.cs:29>
251            actual source code";
252
253        let source_infos: Vec<_> = SourceInfos::new(cpp_source).collect();
254
255        // The first source info has no source line to attach to, so it should use the line
256        // immediately before the second source_info.
257        assert_eq!(
258            source_infos,
259            vec![
260                SourceInfo {
261                    cpp_line: 5,
262                    cs_file: "main.cs",
263                    cs_line: 17,
264                },
265                SourceInfo {
266                    cpp_line: 7,
267                    cs_file: "main.cs",
268                    cs_line: 29,
269                },
270            ]
271        )
272    }
273
274    #[test]
275    fn broken() {
276        let cpp_source = b"
277            Lorem ipsum dolor sit amet
278            //<source_info:main.cs:17>
279            // some
280            // more
281            // comments";
282
283        // Since there is no non-comment line for the source info to attach to,
284        // no source infos should be returned.
285        assert_eq!(SourceInfos::new(cpp_source).count(), 0);
286    }
287}