1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
//! This crate allows obtaining symbol information from binaries and compilation artifacts.
//! It maps raw code addresses to symbol strings, and, if available, file name + line number
//! information.
//! The API was designed for the Firefox profiler.
//!
//! The main entry point of this crate is the async `query_api` function, which accepts a
//! JSON string with the query input. The JSON API matches the API of the [Mozilla
//! symbolication server ("Tecken")](https://tecken.readthedocs.io/en/latest/symbolication.html).
//! An alternative JSON-free API is available too, but it is not very ergonomic.
//!
//! # Design constraints
//!
//! This crate operates under the following design constraints:
//!
//!  - Must be usable from JavaScript / WebAssembly: The Firefox profiler runs this code in a
//!    WebAssembly environment, invoked from a privileged piece of JavaScript code inside Firefox itself.
//!    This setup allows us to download the profiler-get-symbols wasm bundle on demand, rather than shipping
//!    it with Firefox, which would increase the Firefox download size for a piece of functionality
//!    that the vast majority of Firefox users don't need.
//!  - Performance: We want to be able to obtain symbol data from a fresh build of a locally compiled
//!    Firefox instance as quickly as possible, without an expensive preprocessing step. The time between
//!    "finished compilation" and "returned symbol data" should be minimized. This means that symbol
//!    data needs to be obtained directly from the compilation artifacts rather than from, say, a
//!    dSYM bundle or a Breakpad .sym file.
//!  - Must scale to large inputs: This applies to both the size of the API request and the size of the
//!    object files that need to be parsed: The Firefox profiler will supply anywhere between tens of
//!    thousands and hundreds of thousands of different code addresses in a single symbolication request.
//!    Firefox build artifacts such as libxul.so can be multiple gigabytes big, and contain around 300000
//!    function symbols. We want to serve such requests within a few seconds or less.
//!  - "Best effort" basis: If only limited symbol information is available, for example from system
//!    libraries, we want to return whatever limited information we have.
//!
//! The WebAssembly requirement means that this crate cannot contain any direct file access.
//! Instead, all file access is mediated through a `FileAndPathHelper` trait which has to be implemented
//! by the caller. Furthermore, the API request does not carry any absolute file paths, so the resolution
//! to absolute file paths needs to be done by the caller as well.
//!
//! # Supported formats and data
//!
//! This crate supports obtaining symbol data from PE binaries (Windows), PDB files (Windows),
//! mach-o binaries (including fat binaries) (macOS & iOS), and ELF binaries (Linux, Android, etc.).
//! For mach-o files it also supports finding debug information in external objects, by following
//! OSO stabs entries.
//! It supports gathering both basic symbol information (function name strings) as well as information
//! based on debug data, i.e. inline callstacks where each frame has a function name, a file name,
//! and a line number.
//! For debug data we support both DWARF debug data (inside mach-o and ELF binaries) and PDB debug data.
//!
//! # Example
//!
//! ```
//! use profiler_get_symbols::{
//!     FileContents, FileAndPathHelper, FileAndPathHelperResult, OptionallySendFuture,
//!     CandidatePathInfo, FileLocation
//! };
//!
//! async fn run_query() -> String {
//!     let this_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
//!     let helper = ExampleHelper {
//!         artifact_directory: this_dir.join("..").join("fixtures").join("win64-ci")
//!     };
//!     profiler_get_symbols::query_api(
//!         "/symbolicate/v5",
//!         r#"{
//!             "memoryMap": [
//!               [
//!                 "firefox.pdb",
//!                 "AA152DEB2D9B76084C4C44205044422E1"
//!               ]
//!             ],
//!             "stacks": [
//!               [
//!                 [0, 204776],
//!                 [0, 129423],
//!                 [0, 244290],
//!                 [0, 244219]
//!               ]
//!             ]
//!           }"#,
//!         &helper,
//!     ).await
//! }
//!
//! struct ExampleHelper {
//!     artifact_directory: std::path::PathBuf,
//! }
//!
//! impl<'h> FileAndPathHelper<'h> for ExampleHelper {
//!     type F = Vec<u8>;
//!     type OpenFileFuture =
//!         std::pin::Pin<Box<dyn std::future::Future<Output = FileAndPathHelperResult<Self::F>> + 'h>>;
//!
//!     fn get_candidate_paths_for_binary_or_pdb(
//!         &self,
//!         debug_name: &str,
//!         _breakpad_id: &str,
//!     ) -> FileAndPathHelperResult<Vec<CandidatePathInfo>> {
//!         Ok(vec![CandidatePathInfo::SingleFile(FileLocation::Path(self.artifact_directory.join(debug_name)))])
//!     }
//!
//!     fn open_file(
//!         &'h self,
//!         location: &FileLocation,
//!     ) -> std::pin::Pin<Box<dyn std::future::Future<Output = FileAndPathHelperResult<Self::F>> + 'h>> {
//!         async fn read_file_impl(path: std::path::PathBuf) -> FileAndPathHelperResult<Vec<u8>> {
//!             Ok(std::fs::read(&path)?)
//!         }
//!
//!         let path = match location {
//!             FileLocation::Path(path) => path.clone(),
//!             FileLocation::Custom(_) => panic!("Unexpected FileLocation::Custom"),
//!         };
//!         Box::pin(read_file_impl(path.to_path_buf()))
//!     }
//! }
//! ```

pub use object;
pub use pdb_addr2line::pdb;
use shared::SymbolicationResultKind;

use std::path::{Path, PathBuf};

use object::{macho::FatHeader, read::FileKind, Endianness};
use pdb::PDB;
use serde_json::json;

mod cache;
mod chunked_read_buffer_manager;
mod compact_symbol_table;
mod dwarf;
mod dyld_cache;
mod elf;
mod error;
mod macho;
mod shared;
mod symbolicate;
mod windows;

pub use crate::shared::{SymbolicationQuery, SymbolicationResult};
use dyld_cache::DyldCache;

pub use crate::cache::{FileByteSource, FileContentsWithChunkedCaching};
pub use crate::compact_symbol_table::CompactSymbolTable;
pub use crate::error::{GetSymbolsError, Result};
use crate::shared::FileContentsWrapper;
pub use crate::shared::{
    CandidatePathInfo, FileAndPathHelper, FileAndPathHelperError, FileAndPathHelperResult,
    FileContents, FileLocation, OptionallySendFuture,
};

/// Returns a symbol table in `CompactSymbolTable` format for the requested binary.
/// `FileAndPathHelper` must be implemented by the caller, to provide file access.
pub async fn get_compact_symbol_table<'h>(
    debug_name: &str,
    breakpad_id: &str,
    helper: &'h impl FileAndPathHelper<'h>,
) -> Result<CompactSymbolTable> {
    get_symbolication_result(
        SymbolicationQuery {
            debug_name,
            breakpad_id,
            result_kind: SymbolicationResultKind::AllSymbols,
        },
        helper,
    )
    .await
}

/// A generic method which is used in the implementation of both `get_compact_symbol_table`
/// and `query_api`. Allows obtaining symbol data for a given binary. The level of detail
/// is determined by `query.result_kind`: The caller can
/// either get a regular symbol table, or extended information for a set of addresses, if
/// the information is present in the found files. See `SymbolicationResultKind` for
/// more details.
pub async fn get_symbolication_result<'h, R>(
    query: SymbolicationQuery<'_>,
    helper: &'h impl FileAndPathHelper<'h>,
) -> Result<R>
where
    R: SymbolicationResult,
{
    let candidate_paths_for_binary = helper
        .get_candidate_paths_for_binary_or_pdb(query.debug_name, query.breakpad_id)
        .map_err(|e| {
            GetSymbolsError::HelperErrorDuringGetCandidatePathsForBinaryOrPdb(
                query.debug_name.to_string(),
                query.breakpad_id.to_string(),
                e,
            )
        })?;

    let mut last_err = None;
    for candidate_info in candidate_paths_for_binary {
        let result = match candidate_info {
            CandidatePathInfo::SingleFile(file_location) => {
                try_get_symbolication_result_from_path(query.clone(), &file_location, helper).await
            }
            CandidatePathInfo::InDyldCache {
                dyld_cache_path,
                dylib_path,
            } => {
                try_get_symbolication_result_from_dyld_shared_cache(
                    query.clone(),
                    &dyld_cache_path,
                    &dylib_path,
                    helper,
                )
                .await
            }
        };

        match result {
            Ok(result) => return Ok(result),
            Err(err) => last_err = Some(err),
        };
    }
    Err(last_err.unwrap_or_else(|| {
        GetSymbolsError::NoCandidatePathForBinary(
            query.debug_name.to_string(),
            query.breakpad_id.to_string(),
        )
    }))
}

/// This is the main API of this crate.
/// It implements the "Tecken" JSON API, which is also used by the Mozilla symbol server.
/// It's intended to be used as a drop-in "local symbol server" which gathers its data
/// directly from file artifacts produced during compilation (rather than consulting
/// e.g. a database).
/// The caller needs to implement the `FileAndPathHelper` trait to provide file system access.
/// The return value is a JSON string.
///
/// The following "URLs" are supported:
///  - `/symbolicate/v5`: This API is documented at <https://tecken.readthedocs.io/en/latest/symbolication.html>.
///    The returned data has two extra fields: inlines (per address) and module_errors (per job).
///  - `/symbolicate/v5-legacy`: Like v5, but lacking any data that comes from debug information,
///    i.e. files, lines and inlines. This is faster.
pub async fn query_api<'h>(
    request_url: &str,
    request_json_data: &str,
    helper: &'h impl FileAndPathHelper<'h>,
) -> String {
    if request_url == "/symbolicate/v5-legacy" {
        symbolicate::v5::query_api_json(request_json_data, helper, false).await
    } else if request_url == "/symbolicate/v5" {
        symbolicate::v5::query_api_json(request_json_data, helper, true).await
    } else {
        json!({ "error": format!("Unrecognized URL {}", request_url) }).to_string()
    }
}

async fn try_get_symbolication_result_from_path<'h, R, H>(
    query: SymbolicationQuery<'_>,
    file_location: &FileLocation,
    helper: &'h H,
) -> Result<R>
where
    R: SymbolicationResult,
    H: FileAndPathHelper<'h>,
{
    let file_contents = helper.open_file(file_location).await.map_err(|e| {
        GetSymbolsError::HelperErrorDuringOpenFile(file_location.to_string_lossy(), e)
    })?;

    let file_contents = FileContentsWrapper::new(file_contents);

    if let Ok(file_kind) = FileKind::parse(&file_contents) {
        match file_kind {
            FileKind::Elf32 | FileKind::Elf64 => {
                elf::get_symbolication_result(file_kind, file_contents, query)
            }
            FileKind::MachOFat32 => {
                let arches = FatHeader::parse_arch32(&file_contents)
                    .map_err(|e| GetSymbolsError::ObjectParseError(file_kind, e))?;
                let range = macho::get_arch_range(&file_contents, arches, query.breakpad_id)?;
                macho::get_symbolication_result(file_contents, Some(range), 0, query, helper).await
            }
            FileKind::MachOFat64 => {
                let arches = FatHeader::parse_arch64(&file_contents)
                    .map_err(|e| GetSymbolsError::ObjectParseError(file_kind, e))?;
                let range = macho::get_arch_range(&file_contents, arches, query.breakpad_id)?;
                macho::get_symbolication_result(file_contents, Some(range), 0, query, helper).await
            }
            FileKind::MachO32 | FileKind::MachO64 => {
                macho::get_symbolication_result(file_contents, None, 0, query, helper).await
            }
            FileKind::Pe32 | FileKind::Pe64 => {
                windows::get_symbolication_result_via_binary(
                    file_kind,
                    file_contents,
                    query,
                    file_location,
                    helper,
                )
                .await
            }
            _ => Err(GetSymbolsError::InvalidInputError(
                "Input was Archive, Coff or Wasm format, which are unsupported for now",
            )),
        }
    } else if let Ok(pdb) = PDB::open(&file_contents) {
        // This is a PDB file.
        windows::get_symbolication_result(pdb, query)
    } else {
        Err(GetSymbolsError::InvalidInputError(
            "The file does not have a known format; PDB::open was not able to parse it and object::FileKind::parse was not able to detect the format.",
        ))
    }
}

async fn try_get_symbolication_result_from_dyld_shared_cache<'h, R, H>(
    query: SymbolicationQuery<'_>,
    dyld_cache_path: &Path,
    dylib_path: &str,
    helper: &'h H,
) -> Result<R>
where
    R: SymbolicationResult,
    H: FileAndPathHelper<'h>,
{
    let mut chunk_index = 0;

    let (header_offset, file_contents) = loop {
        let chunk_path: PathBuf = if chunk_index == 0 {
            dyld_cache_path.to_path_buf()
        } else {
            let mut s = dyld_cache_path.as_os_str().to_os_string();
            s.push(&format!(".{}", chunk_index));
            s.into()
        };

        let chunk_location = FileLocation::Path(chunk_path);
        let file_contents = helper.open_file(&chunk_location).await.map_err(|e| {
            if chunk_index == 0 {
                GetSymbolsError::HelperErrorDuringOpenFile(
                    dyld_cache_path.to_string_lossy().to_string(),
                    e,
                )
            } else {
                GetSymbolsError::DyldCacheOutOfChunks(
                    dyld_cache_path.to_string_lossy().to_string(),
                    chunk_index,
                )
            }
        })?;

        let file_contents = FileContentsWrapper::new(file_contents);
        let cache = DyldCache::<Endianness, _>::parse(&file_contents)
            .map_err(GetSymbolsError::DyldCacheParseError)?;
        let image = cache.images().find(|image| image.path() == Ok(dylib_path));
        let image = match image {
            Some(image) => image,
            None => {
                return Err(GetSymbolsError::NoMatchingDyldCacheImagePath(
                    dylib_path.to_string(),
                ))
            }
        };

        // Check if the image is present in this cache chunk.
        if let Ok(file_offset) = image.file_offset() {
            // Found the right chunk! Exit the loop.
            break (file_offset, file_contents);
        }

        // This dyld cache chunk did not contain a mapping which contained this image.
        // Go to the next chunk.
        chunk_index += 1;
    };

    return macho::get_symbolication_result(file_contents, None, header_offset, query, helper)
        .await;
}