Skip to main content

symbolic_debuginfo/sourcebundle/
mod.rs

1//! Support for Source Bundles, a proprietary archive containing source code.
2//!
3//! This module defines the [`SourceBundle`] type. Since not all object file containers specify a
4//! standardized way to inline sources into debug information, this can be used to associate source
5//! contents to debug files.
6//!
7//! Source bundles are ZIP archives with a well-defined internal structure. Most importantly, they
8//! contain source files in a nested directory structure. Additionally, there is meta data
9//! associated to every source file, which allows to store additional properties, such as the
10//! original file system path, a web URL, and custom headers.
11//!
12//! The internal structure is as follows:
13//!
14//! ```txt
15//! manifest.json
16//! files/
17//!   file1.txt
18//!   subfolder/
19//!     file2.txt
20//! ```
21//!
22//! `SourceBundle` implements the [`ObjectLike`] trait. When created from another object, it carries
23//! over its meta data, such as the [`debug_id`] or [`code_id`]. However, source bundles never store
24//! symbols or debug information. To obtain sources or iterate files stored in this source bundle,
25//! use [`SourceBundle::debug_session`].
26//!
27//! Source bundles can be created manually or by converting any `ObjectLike` using
28//! [`SourceBundleWriter`].
29//!
30//! [`ObjectLike`]: ../trait.ObjectLike.html
31//! [`SourceBundle`]: struct.SourceBundle.html
32//! [`debug_id`]: struct.SourceBundle.html#method.debug_id
33//! [`code_id`]: struct.SourceBundle.html#method.code_id
34//! [`SourceBundle::debug_session`]: struct.SourceBundle.html#method.debug_session
35//! [`SourceBundleWriter`]: struct.SourceBundleWriter.html
36//!
37//! ## Artifact Bundles
38//!
39//! Source bundles share the format with a related concept, called an "artifact bundle".  Artifact
40//! bundles are essentially source bundles but they typically contain sources referred to by
41//! JavaScript source maps and source maps themselves.  For instance in an artifact
42//! bundle a file entry has a `url` and might carry `headers` or individual debug IDs
43//! per source file.
44
45mod utf8_reader;
46
47use std::borrow::Cow;
48use std::collections::{BTreeMap, BTreeSet, HashMap};
49use std::error::Error;
50use std::fmt::{Display, Formatter};
51use std::fs::{File, OpenOptions};
52use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek, Write};
53use std::path::Path;
54use std::sync::{Arc, LazyLock};
55use std::{fmt, io};
56
57use parking_lot::Mutex;
58use regex::Regex;
59use serde::{Deserialize, Deserializer, Serialize};
60use thiserror::Error;
61use zip::{write::SimpleFileOptions, ZipWriter};
62
63use symbolic_common::{Arch, AsSelf, CodeId, DebugId, SourceLinkMappings};
64
65use self::utf8_reader::Utf8Reader;
66use crate::base::*;
67use crate::js::{
68    discover_debug_id, discover_sourcemap_embedded_debug_id, discover_sourcemaps_location,
69};
70
71/// Magic bytes of a source bundle. They are prepended to the ZIP file.
72static BUNDLE_MAGIC: [u8; 4] = *b"SYSB";
73
74/// Version of the bundle and manifest format.
75static BUNDLE_VERSION: u32 = 2;
76
77/// Relative path to the manifest file in the bundle file.
78static MANIFEST_PATH: &str = "manifest.json";
79
80/// Path at which files will be written into the bundle.
81static FILES_PATH: &str = "files";
82
83static SANE_PATH_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r":?[/\\]+").unwrap());
84
85/// The error type for [`SourceBundleError`].
86#[non_exhaustive]
87#[derive(Clone, Copy, Debug, PartialEq, Eq)]
88pub enum SourceBundleErrorKind {
89    /// The source bundle container is damaged.
90    BadZip,
91
92    /// An error when reading/writing the manifest.
93    BadManifest,
94
95    /// The `Object` contains invalid data and cannot be converted.
96    BadDebugFile,
97
98    /// Generic error when writing a source bundle, most likely IO.
99    WriteFailed,
100
101    /// The file is not valid UTF-8 or could not be read for another reason.
102    ReadFailed,
103}
104
105impl fmt::Display for SourceBundleErrorKind {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        match self {
108            Self::BadZip => write!(f, "malformed zip archive"),
109            Self::BadManifest => write!(f, "failed to read/write source bundle manifest"),
110            Self::BadDebugFile => write!(f, "malformed debug info file"),
111            Self::WriteFailed => write!(f, "failed to write source bundle"),
112            Self::ReadFailed => write!(f, "file could not be read as UTF-8"),
113        }
114    }
115}
116
117/// An error returned when handling [`SourceBundle`](struct.SourceBundle.html).
118#[derive(Debug, Error)]
119#[error("{kind}")]
120pub struct SourceBundleError {
121    kind: SourceBundleErrorKind,
122    #[source]
123    source: Option<Box<dyn Error + Send + Sync + 'static>>,
124}
125
126impl SourceBundleError {
127    /// Creates a new SourceBundle error from a known kind of error as well as an arbitrary error
128    /// payload.
129    ///
130    /// This function is used to generically create source bundle errors which do not originate from
131    /// `symbolic` itself. The `source` argument is an arbitrary payload which will be contained in
132    /// this [`SourceBundleError`].
133    pub fn new<E>(kind: SourceBundleErrorKind, source: E) -> Self
134    where
135        E: Into<Box<dyn Error + Send + Sync>>,
136    {
137        let source = Some(source.into());
138        Self { kind, source }
139    }
140
141    /// Returns the corresponding [`SourceBundleErrorKind`] for this error.
142    pub fn kind(&self) -> SourceBundleErrorKind {
143        self.kind
144    }
145}
146
147impl From<SourceBundleErrorKind> for SourceBundleError {
148    fn from(kind: SourceBundleErrorKind) -> Self {
149        Self { kind, source: None }
150    }
151}
152
153/// Trims matching suffices of a string in-place.
154fn trim_end_matches<F>(string: &mut String, pat: F)
155where
156    F: FnMut(char) -> bool,
157{
158    let cutoff = string.trim_end_matches(pat).len();
159    string.truncate(cutoff);
160}
161
162/// The type of a [`SourceFileInfo`](struct.SourceFileInfo.html).
163#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
164#[serde(rename_all = "snake_case")]
165pub enum SourceFileType {
166    /// Regular source file.
167    Source,
168
169    /// Minified source code.
170    MinifiedSource,
171
172    /// JavaScript sourcemap.
173    SourceMap,
174
175    /// Indexed JavaScript RAM bundle.
176    IndexedRamBundle,
177}
178
179/// Meta data information of a file in a [`SourceBundle`](struct.SourceBundle.html).
180#[derive(Clone, Debug, Default, Serialize, Deserialize)]
181pub struct SourceFileInfo {
182    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
183    ty: Option<SourceFileType>,
184
185    #[serde(default, skip_serializing_if = "String::is_empty")]
186    path: String,
187
188    #[serde(default, skip_serializing_if = "String::is_empty")]
189    url: String,
190
191    #[serde(
192        default,
193        skip_serializing_if = "BTreeMap::is_empty",
194        deserialize_with = "deserialize_headers"
195    )]
196    headers: BTreeMap<String, String>,
197}
198
199/// Helper to ensure that header keys are normalized to lowercase
200fn deserialize_headers<'de, D>(deserializer: D) -> Result<BTreeMap<String, String>, D::Error>
201where
202    D: Deserializer<'de>,
203{
204    let rv: BTreeMap<String, String> = Deserialize::deserialize(deserializer)?;
205    if rv.is_empty()
206        || rv
207            .keys()
208            .all(|x| !x.chars().any(|c| c.is_ascii_uppercase()))
209    {
210        Ok(rv)
211    } else {
212        Ok(rv
213            .into_iter()
214            .map(|(k, v)| (k.to_ascii_lowercase(), v))
215            .collect())
216    }
217}
218
219impl SourceFileInfo {
220    /// Creates default file information.
221    pub fn new() -> Self {
222        Self::default()
223    }
224
225    /// Returns the type of the source file.
226    pub fn ty(&self) -> Option<SourceFileType> {
227        self.ty
228    }
229
230    /// Sets the type of the source file.
231    pub fn set_ty(&mut self, ty: SourceFileType) {
232        self.ty = Some(ty);
233    }
234
235    /// Returns the absolute file system path of this file.
236    pub fn path(&self) -> Option<&str> {
237        match self.path.as_str() {
238            "" => None,
239            path => Some(path),
240        }
241    }
242
243    /// Sets the absolute file system path of this file.
244    pub fn set_path(&mut self, path: String) {
245        self.path = path;
246    }
247
248    /// Returns the web URL that of this file.
249    pub fn url(&self) -> Option<&str> {
250        match self.url.as_str() {
251            "" => None,
252            url => Some(url),
253        }
254    }
255
256    /// Sets the web URL of this file.
257    pub fn set_url(&mut self, url: String) {
258        self.url = url;
259    }
260
261    /// Iterates over all attributes represented as headers.
262    pub fn headers(&self) -> impl Iterator<Item = (&str, &str)> {
263        self.headers.iter().map(|(k, v)| (k.as_str(), v.as_str()))
264    }
265
266    /// Retrieves the specified header, if it exists.
267    pub fn header(&self, header: &str) -> Option<&str> {
268        if !header.chars().any(|x| x.is_ascii_uppercase()) {
269            self.headers.get(header).map(String::as_str)
270        } else {
271            self.headers.iter().find_map(|(k, v)| {
272                if k.eq_ignore_ascii_case(header) {
273                    Some(v.as_str())
274                } else {
275                    None
276                }
277            })
278        }
279    }
280
281    /// Adds a custom attribute following header conventions.
282    ///
283    /// Header keys are converted to lowercase before writing as this is
284    /// the canonical format for headers. However, the file format does
285    /// support headers to be case insensitive and they will be lower cased
286    /// upon reading.
287    ///
288    /// Headers on files are primarily be used to add auxiliary information
289    /// to files.  The following headers are known and processed:
290    ///
291    /// - `debug-id`: see [`debug_id`](Self::debug_id)
292    /// - `sourcemap` (and `x-sourcemap`): see [`source_mapping_url`](Self::source_mapping_url)
293    pub fn add_header(&mut self, header: String, value: String) {
294        let mut header = header;
295        if header.chars().any(|x| x.is_ascii_uppercase()) {
296            header = header.to_ascii_lowercase();
297        }
298        self.headers.insert(header, value);
299    }
300
301    /// The debug ID of this minified source or sourcemap if it has any.
302    ///
303    /// Files have a debug ID if they have a header with the key `debug-id`.
304    /// At present debug IDs in source bundles are only ever given to minified
305    /// source files.
306    pub fn debug_id(&self) -> Option<DebugId> {
307        self.header("debug-id").and_then(|x| x.parse().ok())
308    }
309
310    /// The source mapping URL of the given minified source.
311    ///
312    /// Files have a source mapping URL if they have a header with the
313    /// key `sourcemap` (or the `x-sourcemap` legacy header) as part the
314    /// source map specification.
315    pub fn source_mapping_url(&self) -> Option<&str> {
316        self.header("sourcemap")
317            .or_else(|| self.header("x-sourcemap"))
318    }
319
320    /// Returns `true` if this instance does not carry any information.
321    pub fn is_empty(&self) -> bool {
322        self.path.is_empty() && self.ty.is_none() && self.headers.is_empty()
323    }
324}
325
326/// A descriptor that provides information about a source file.
327///
328/// This descriptor is returned from [`source_by_path`](DebugSession::source_by_path)
329/// and friends.
330///
331/// This descriptor holds information that can be used to retrieve information
332/// about the source file.  A descriptor has to have at least one of the following
333/// to be valid:
334///
335/// - [`contents`](Self::contents)
336/// - [`url`](Self::url)
337/// - [`debug_id`](Self::debug_id)
338///
339/// Debug sessions are not permitted to return invalid source file descriptors.
340pub struct SourceFileDescriptor<'a> {
341    contents: Option<Cow<'a, str>>,
342    remote_url: Option<Cow<'a, str>>,
343    file_info: Option<&'a SourceFileInfo>,
344}
345
346impl<'a> SourceFileDescriptor<'a> {
347    /// Creates an embedded source file descriptor.
348    pub(crate) fn new_embedded(
349        content: Cow<'a, str>,
350        file_info: Option<&'a SourceFileInfo>,
351    ) -> SourceFileDescriptor<'a> {
352        SourceFileDescriptor {
353            contents: Some(content),
354            remote_url: None,
355            file_info,
356        }
357    }
358
359    /// Creates an remote source file descriptor.
360    pub(crate) fn new_remote(remote_url: Cow<'a, str>) -> SourceFileDescriptor<'a> {
361        SourceFileDescriptor {
362            contents: None,
363            remote_url: Some(remote_url),
364            file_info: None,
365        }
366    }
367
368    /// The type of the file the descriptor points to.
369    pub fn ty(&self) -> SourceFileType {
370        self.file_info
371            .and_then(|x| x.ty())
372            .unwrap_or(SourceFileType::Source)
373    }
374
375    /// The contents of the source file as string, if it's available.
376    ///
377    /// Portable PDBs for instance will often have source information, but rely on
378    /// remote file fetching via Sourcelink to get to the contents.  In that case
379    /// a file descriptor is created, but the contents are missing and instead the
380    /// [`url`](Self::url) can be used.
381    pub fn contents(&self) -> Option<&str> {
382        self.contents.as_deref()
383    }
384
385    /// The contents of the source file as string, if it's available.
386    ///
387    /// This unwraps the [`SourceFileDescriptor`] directly and might avoid a copy of `contents`
388    /// later on.
389    pub fn into_contents(self) -> Option<Cow<'a, str>> {
390        self.contents
391    }
392
393    /// If available returns the URL of this source.
394    ///
395    /// For certain files this is the canoncial URL of where the file is placed.  This
396    /// for instance is the case for minified JavaScript files or source maps which might
397    /// have a canonical URL.  In case of portable PDBs this is also where you would fetch
398    /// the source code from if source links are used.
399    pub fn url(&self) -> Option<&str> {
400        if let Some(ref url) = self.remote_url {
401            Some(url)
402        } else {
403            self.file_info.and_then(|x| x.url())
404        }
405    }
406
407    /// If available returns the file path of this source.
408    ///
409    /// For source bundles that are a companion file to a debug file, this is the canonical
410    /// path of the source file.
411    pub fn path(&self) -> Option<&str> {
412        self.file_info.and_then(|x| x.path())
413    }
414
415    /// The debug ID of the file if available.
416    ///
417    /// For source maps or minified source files symbolic supports embedded debug IDs.  If they
418    /// are in use, the debug ID is returned from here.  The debug ID is discovered from the
419    /// file's `debug-id` header or the embedded `debugId` reference in the file body.
420    pub fn debug_id(&self) -> Option<DebugId> {
421        self.file_info.and_then(|x| x.debug_id()).or_else(|| {
422            if matches!(
423                self.ty(),
424                SourceFileType::Source | SourceFileType::MinifiedSource
425            ) {
426                self.contents().and_then(discover_debug_id)
427            } else if matches!(self.ty(), SourceFileType::SourceMap) {
428                self.contents()
429                    .and_then(discover_sourcemap_embedded_debug_id)
430            } else {
431                None
432            }
433        })
434    }
435
436    /// The source mapping URL reference of the file.
437    ///
438    /// This is used to refer to a source map from a minified file.  Only minified source files
439    /// will have a relationship to a source map.  The source mapping is discovered either from
440    /// a `sourcemap` header in the source manifest, or the `sourceMappingURL` reference in the body.
441    pub fn source_mapping_url(&self) -> Option<&str> {
442        self.file_info
443            .and_then(|x| x.source_mapping_url())
444            .or_else(|| {
445                if matches!(
446                    self.ty(),
447                    SourceFileType::Source | SourceFileType::MinifiedSource
448                ) {
449                    self.contents().and_then(discover_sourcemaps_location)
450                } else {
451                    None
452                }
453            })
454    }
455}
456
457/// Version number of a [`SourceBundle`](struct.SourceBundle.html).
458#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
459pub struct SourceBundleVersion(pub u32);
460
461impl SourceBundleVersion {
462    /// Creates a new source bundle version.
463    pub fn new(version: u32) -> Self {
464        Self(version)
465    }
466
467    /// Determines whether this version can be handled.
468    ///
469    /// This will return `false`, if the version is newer than what is supported by this library
470    /// version.
471    pub fn is_valid(self) -> bool {
472        self.0 <= BUNDLE_VERSION
473    }
474
475    /// Returns whether the given bundle is at the latest supported versino.
476    pub fn is_latest(self) -> bool {
477        self.0 == BUNDLE_VERSION
478    }
479}
480
481impl Default for SourceBundleVersion {
482    fn default() -> Self {
483        Self(BUNDLE_VERSION)
484    }
485}
486
487/// Binary header of the source bundle archive.
488///
489/// This header precedes the ZIP archive. It is used to detect these files on the file system.
490#[repr(C, packed)]
491#[derive(Clone, Copy, Debug)]
492struct SourceBundleHeader {
493    /// Magic bytes header.
494    pub magic: [u8; 4],
495
496    /// Version of the bundle.
497    pub version: u32,
498}
499
500impl SourceBundleHeader {
501    fn as_bytes(&self) -> &[u8] {
502        let ptr = self as *const Self as *const u8;
503        unsafe { std::slice::from_raw_parts(ptr, std::mem::size_of::<Self>()) }
504    }
505}
506
507impl Default for SourceBundleHeader {
508    fn default() -> Self {
509        SourceBundleHeader {
510            magic: BUNDLE_MAGIC,
511            version: BUNDLE_VERSION,
512        }
513    }
514}
515
516/// Manifest of a [`SourceBundle`] containing information on its contents.
517///
518/// [`SourceBundle`]: struct.SourceBundle.html
519#[derive(Clone, Debug, Default, Serialize, Deserialize)]
520struct SourceBundleManifest {
521    /// Descriptors for all files in this bundle.
522    #[serde(default)]
523    pub files: BTreeMap<String, SourceFileInfo>,
524
525    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
526    pub source_links: BTreeMap<String, String>,
527
528    /// Arbitrary attributes to include in the bundle.
529    #[serde(flatten)]
530    pub attributes: BTreeMap<String, String>,
531}
532
533struct SourceBundleIndex<'data> {
534    manifest: SourceBundleManifest,
535    indexed_files: HashMap<FileKey<'data>, Arc<String>>,
536}
537
538impl<'data> SourceBundleIndex<'data> {
539    pub fn parse(
540        archive: &mut zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>,
541    ) -> Result<Self, SourceBundleError> {
542        let manifest_file = archive
543            .by_name("manifest.json")
544            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
545        let manifest: SourceBundleManifest = serde_json::from_reader(manifest_file)
546            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadManifest, e))?;
547
548        let files = &manifest.files;
549        let mut indexed_files = HashMap::with_capacity(files.len());
550
551        for (zip_path, file_info) in files {
552            let zip_path = Arc::new(zip_path.clone());
553            if !file_info.path.is_empty() {
554                indexed_files.insert(
555                    FileKey::Path(normalize_path(&file_info.path).into()),
556                    zip_path.clone(),
557                );
558            }
559            if !file_info.url.is_empty() {
560                indexed_files.insert(FileKey::Url(file_info.url.clone().into()), zip_path.clone());
561            }
562            if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) {
563                indexed_files.insert(FileKey::DebugId(debug_id, ty), zip_path.clone());
564            }
565        }
566
567        Ok(Self {
568            manifest,
569            indexed_files,
570        })
571    }
572}
573
574/// A bundle of source code files.
575///
576/// To create a source bundle, see [`SourceBundleWriter`]. For more information, see the [module
577/// level documentation].
578///
579/// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
580/// [module level documentation]: index.html
581pub struct SourceBundle<'data> {
582    data: &'data [u8],
583    archive: zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>,
584    index: Arc<SourceBundleIndex<'data>>,
585}
586
587impl fmt::Debug for SourceBundle<'_> {
588    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
589        f.debug_struct("SourceBundle")
590            .field("code_id", &self.code_id())
591            .field("debug_id", &self.debug_id())
592            .field("arch", &self.arch())
593            .field("kind", &self.kind())
594            .field("load_address", &format_args!("{:#x}", self.load_address()))
595            .field("has_symbols", &self.has_symbols())
596            .field("has_debug_info", &self.has_debug_info())
597            .field("has_unwind_info", &self.has_unwind_info())
598            .field("has_sources", &self.has_sources())
599            .field("is_malformed", &self.is_malformed())
600            .finish()
601    }
602}
603
604impl<'data> SourceBundle<'data> {
605    /// Tests whether the buffer could contain a `SourceBundle`.
606    pub fn test(bytes: &[u8]) -> bool {
607        bytes.starts_with(&BUNDLE_MAGIC)
608    }
609
610    /// Tries to parse a `SourceBundle` from the given slice.
611    pub fn parse(data: &'data [u8]) -> Result<SourceBundle<'data>, SourceBundleError> {
612        let mut archive = zip::read::ZipArchive::new(std::io::Cursor::new(data))
613            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
614
615        let index = Arc::new(SourceBundleIndex::parse(&mut archive)?);
616
617        Ok(SourceBundle {
618            archive,
619            data,
620            index,
621        })
622    }
623
624    /// Returns the version of this source bundle format.
625    pub fn version(&self) -> SourceBundleVersion {
626        SourceBundleVersion(BUNDLE_VERSION)
627    }
628
629    /// The container file format, which is always `FileFormat::SourceBundle`.
630    pub fn file_format(&self) -> FileFormat {
631        FileFormat::SourceBundle
632    }
633
634    /// The code identifier of this object.
635    ///
636    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
637    /// in the [`SourceBundleWriter`] by setting the `"code_id"` attribute.
638    ///
639    /// [`ObjectLike`]: ../trait.ObjectLike.html
640    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
641    pub fn code_id(&self) -> Option<CodeId> {
642        self.index
643            .manifest
644            .attributes
645            .get("code_id")
646            .and_then(|x| x.parse().ok())
647    }
648
649    /// The code identifier of this object.
650    ///
651    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
652    /// in the [`SourceBundleWriter`] by setting the `"debug_id"` attribute.
653    ///
654    /// [`ObjectLike`]: ../trait.ObjectLike.html
655    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
656    pub fn debug_id(&self) -> DebugId {
657        self.index
658            .manifest
659            .attributes
660            .get("debug_id")
661            .and_then(|x| x.parse().ok())
662            .unwrap_or_default()
663    }
664
665    /// The debug file name of this object.
666    ///
667    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
668    /// in the [`SourceBundleWriter`] by setting the `"object_name"` attribute.
669    ///
670    /// [`ObjectLike`]: ../trait.ObjectLike.html
671    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
672    pub fn name(&self) -> Option<&str> {
673        self.index
674            .manifest
675            .attributes
676            .get("object_name")
677            .map(|x| x.as_str())
678    }
679
680    /// The CPU architecture of this object.
681    ///
682    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
683    /// in the [`SourceBundleWriter`] by setting the `"arch"` attribute.
684    ///
685    /// [`ObjectLike`]: ../trait.ObjectLike.html
686    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
687    pub fn arch(&self) -> Arch {
688        self.index
689            .manifest
690            .attributes
691            .get("arch")
692            .and_then(|s| s.parse().ok())
693            .unwrap_or_default()
694    }
695
696    /// The kind of this object.
697    ///
698    /// Because source bundles do not contain real objects this is always `ObjectKind::None`.
699    fn kind(&self) -> ObjectKind {
700        ObjectKind::Sources
701    }
702
703    /// The address at which the image prefers to be loaded into memory.
704    ///
705    /// Because source bundles do not contain this information is always `0`.
706    pub fn load_address(&self) -> u64 {
707        0
708    }
709
710    /// Determines whether this object exposes a public symbol table.
711    ///
712    /// Source bundles never have symbols.
713    pub fn has_symbols(&self) -> bool {
714        false
715    }
716
717    /// Returns an iterator over symbols in the public symbol table.
718    pub fn symbols(&self) -> SourceBundleSymbolIterator<'data> {
719        std::iter::empty()
720    }
721
722    /// Returns an ordered map of symbols in the symbol table.
723    pub fn symbol_map(&self) -> SymbolMap<'data> {
724        self.symbols().collect()
725    }
726
727    /// Determines whether this object contains debug information.
728    ///
729    /// Source bundles never have debug info.
730    pub fn has_debug_info(&self) -> bool {
731        false
732    }
733
734    /// Constructs a debugging session.
735    ///
736    /// A debugging session loads certain information from the object file and creates caches for
737    /// efficient access to various records in the debug information. Since this can be quite a
738    /// costly process, try to reuse the debugging session as long as possible.
739    pub fn debug_session(&self) -> Result<SourceBundleDebugSession<'data>, SourceBundleError> {
740        // NOTE: The `SourceBundleDebugSession` still needs interior mutability, so it still needs
741        // to carry its own Mutex. However that is still preferable to sharing the Mutex of the
742        // `SourceBundle`, which might be shared by multiple threads.
743        // The only thing here that really needs to be `mut` is the `Cursor` / `Seek` position.
744        let archive = Mutex::new(self.archive.clone());
745        let source_links = SourceLinkMappings::new(
746            self.index
747                .manifest
748                .source_links
749                .iter()
750                .map(|(k, v)| (&k[..], &v[..])),
751        );
752        Ok(SourceBundleDebugSession {
753            index: Arc::clone(&self.index),
754            archive,
755            source_links,
756        })
757    }
758
759    /// Determines whether this object contains stack unwinding information.
760    pub fn has_unwind_info(&self) -> bool {
761        false
762    }
763
764    /// Determines whether this object contains embedded source.
765    pub fn has_sources(&self) -> bool {
766        true
767    }
768
769    /// Determines whether this object is malformed and was only partially parsed
770    pub fn is_malformed(&self) -> bool {
771        false
772    }
773
774    /// Returns the raw data of the source bundle.
775    pub fn data(&self) -> &'data [u8] {
776        self.data
777    }
778
779    /// Returns true if this source bundle contains no source code.
780    pub fn is_empty(&self) -> bool {
781        self.index.manifest.files.is_empty()
782    }
783}
784
785impl<'slf, 'data: 'slf> AsSelf<'slf> for SourceBundle<'data> {
786    type Ref = SourceBundle<'slf>;
787
788    fn as_self(&'slf self) -> &'slf Self::Ref {
789        unsafe { std::mem::transmute(self) }
790    }
791}
792
793impl<'data> Parse<'data> for SourceBundle<'data> {
794    type Error = SourceBundleError;
795
796    fn parse(data: &'data [u8]) -> Result<Self, Self::Error> {
797        SourceBundle::parse(data)
798    }
799
800    fn test(data: &'data [u8]) -> bool {
801        SourceBundle::test(data)
802    }
803}
804
805impl<'data: 'object, 'object> ObjectLike<'data, 'object> for SourceBundle<'data> {
806    type Error = SourceBundleError;
807    type Session = SourceBundleDebugSession<'data>;
808    type SymbolIterator = SourceBundleSymbolIterator<'data>;
809
810    fn file_format(&self) -> FileFormat {
811        self.file_format()
812    }
813
814    fn code_id(&self) -> Option<CodeId> {
815        self.code_id()
816    }
817
818    fn debug_id(&self) -> DebugId {
819        self.debug_id()
820    }
821
822    fn arch(&self) -> Arch {
823        self.arch()
824    }
825
826    fn kind(&self) -> ObjectKind {
827        self.kind()
828    }
829
830    fn load_address(&self) -> u64 {
831        self.load_address()
832    }
833
834    fn has_symbols(&self) -> bool {
835        self.has_symbols()
836    }
837
838    fn symbol_map(&self) -> SymbolMap<'data> {
839        self.symbol_map()
840    }
841
842    fn symbols(&self) -> Self::SymbolIterator {
843        self.symbols()
844    }
845
846    fn has_debug_info(&self) -> bool {
847        self.has_debug_info()
848    }
849
850    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
851        self.debug_session()
852    }
853
854    fn has_unwind_info(&self) -> bool {
855        self.has_unwind_info()
856    }
857
858    fn has_sources(&self) -> bool {
859        self.has_sources()
860    }
861
862    fn is_malformed(&self) -> bool {
863        self.is_malformed()
864    }
865}
866
867/// An iterator yielding symbols from a source bundle.
868pub type SourceBundleSymbolIterator<'data> = std::iter::Empty<Symbol<'data>>;
869
870#[derive(Debug, Hash, PartialEq, Eq)]
871enum FileKey<'a> {
872    Path(Cow<'a, str>),
873    Url(Cow<'a, str>),
874    DebugId(DebugId, SourceFileType),
875}
876
877/// Debug session for SourceBundle objects.
878pub struct SourceBundleDebugSession<'data> {
879    archive: Mutex<zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>>,
880    index: Arc<SourceBundleIndex<'data>>,
881    source_links: SourceLinkMappings,
882}
883
884impl SourceBundleDebugSession<'_> {
885    /// Returns an iterator over all source files in this debug file.
886    pub fn files(&self) -> SourceBundleFileIterator<'_> {
887        SourceBundleFileIterator {
888            files: self.index.manifest.files.values(),
889        }
890    }
891
892    /// Returns an iterator over all functions in this debug file.
893    pub fn functions(&self) -> SourceBundleFunctionIterator<'_> {
894        std::iter::empty()
895    }
896
897    /// Get source by the path of a file in the bundle.
898    fn source_by_zip_path(&self, zip_path: &str) -> Result<String, SourceBundleError> {
899        let mut archive = self.archive.lock();
900        let mut file = archive
901            .by_name(zip_path)
902            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
903        let mut source_content = String::new();
904
905        file.read_to_string(&mut source_content)
906            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
907        Ok(source_content)
908    }
909
910    /// Looks up a source file descriptor.
911    ///
912    /// The file is looked up in both the embedded files and
913    /// in the included source link mappings, in that order.
914    fn get_source_file_descriptor(
915        &self,
916        key: FileKey,
917    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
918        if let Some(zip_path) = self.index.indexed_files.get(&key) {
919            let zip_path = zip_path.as_str();
920            let content = Cow::Owned(self.source_by_zip_path(zip_path)?);
921            let info = self.index.manifest.files.get(zip_path);
922            let descriptor = SourceFileDescriptor::new_embedded(content, info);
923            return Ok(Some(descriptor));
924        }
925
926        let FileKey::Path(path) = key else {
927            return Ok(None);
928        };
929
930        Ok(self
931            .source_links
932            .resolve(&path)
933            .map(|s| SourceFileDescriptor::new_remote(s.into())))
934    }
935
936    /// See [DebugSession::source_by_path] for more information.
937    pub fn source_by_path(
938        &self,
939        path: &str,
940    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
941        self.get_source_file_descriptor(FileKey::Path(normalize_path(path).into()))
942    }
943
944    /// Like [`source_by_path`](Self::source_by_path) but looks up by URL.
945    pub fn source_by_url(
946        &self,
947        url: &str,
948    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
949        self.get_source_file_descriptor(FileKey::Url(url.into()))
950    }
951
952    /// Looks up some source by debug ID and file type.
953    ///
954    /// Lookups by [`DebugId`] require knowledge of the file that is supposed to be
955    /// looked up as multiple files (one per type) can share the same debug ID.
956    /// Special care needs to be taken about [`SourceFileType::IndexedRamBundle`]
957    /// and [`SourceFileType::SourceMap`] which are different file types despite
958    /// the name of it.
959    ///
960    /// # Note on Abstractions
961    ///
962    /// This method is currently not exposed via a standardized debug session
963    /// as it's primarily used for the JavaScript processing system which uses
964    /// different abstractions.
965    pub fn source_by_debug_id(
966        &self,
967        debug_id: DebugId,
968        ty: SourceFileType,
969    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
970        self.get_source_file_descriptor(FileKey::DebugId(debug_id, ty))
971    }
972}
973
974impl<'session> DebugSession<'session> for SourceBundleDebugSession<'_> {
975    type Error = SourceBundleError;
976    type FunctionIterator = SourceBundleFunctionIterator<'session>;
977    type FileIterator = SourceBundleFileIterator<'session>;
978
979    fn functions(&'session self) -> Self::FunctionIterator {
980        self.functions()
981    }
982
983    fn files(&'session self) -> Self::FileIterator {
984        self.files()
985    }
986
987    fn source_by_path(&self, path: &str) -> Result<Option<SourceFileDescriptor<'_>>, Self::Error> {
988        self.source_by_path(path)
989    }
990}
991
992impl<'slf, 'data: 'slf> AsSelf<'slf> for SourceBundleDebugSession<'data> {
993    type Ref = SourceBundleDebugSession<'slf>;
994
995    fn as_self(&'slf self) -> &'slf Self::Ref {
996        unsafe { std::mem::transmute(self) }
997    }
998}
999
1000/// An iterator over source files in a SourceBundle object.
1001pub struct SourceBundleFileIterator<'s> {
1002    files: std::collections::btree_map::Values<'s, String, SourceFileInfo>,
1003}
1004
1005impl<'s> Iterator for SourceBundleFileIterator<'s> {
1006    type Item = Result<FileEntry<'s>, SourceBundleError>;
1007
1008    fn next(&mut self) -> Option<Self::Item> {
1009        let source_file = self.files.next()?;
1010        Some(Ok(FileEntry::new(
1011            Cow::default(),
1012            FileInfo::from_path(source_file.path.as_bytes()),
1013        )))
1014    }
1015}
1016
1017/// An iterator over functions in a SourceBundle object.
1018pub type SourceBundleFunctionIterator<'s> =
1019    std::iter::Empty<Result<Function<'s>, SourceBundleError>>;
1020
1021impl SourceBundleManifest {
1022    /// Creates a new, empty manifest.
1023    pub fn new() -> Self {
1024        Self::default()
1025    }
1026}
1027
1028/// Generates a normalized path for a file in the bundle.
1029///
1030/// This removes all special characters. The path in the bundle will mostly resemble the original
1031/// path, except for unsupported components.
1032fn sanitize_bundle_path(path: &str) -> String {
1033    let mut sanitized = SANE_PATH_RE.replace_all(path, "/").into_owned();
1034    if sanitized.starts_with('/') {
1035        sanitized.remove(0);
1036    }
1037    sanitized
1038}
1039
1040/// Normalizes all paths to follow the Linux standard of using forward slashes.
1041fn normalize_path(path: &str) -> String {
1042    path.replace('\\', "/")
1043}
1044
1045/// Contains information about a file skipped in the SourceBundleWriter
1046#[derive(Debug)]
1047pub struct SkippedFileInfo<'a> {
1048    path: &'a str,
1049    reason: &'a str,
1050}
1051
1052impl<'a> SkippedFileInfo<'a> {
1053    fn new(path: &'a str, reason: &'a str) -> Self {
1054        Self { path, reason }
1055    }
1056
1057    /// Returns the path of the skipped file.
1058    pub fn path(&self) -> &str {
1059        self.path
1060    }
1061
1062    /// Get the human-readable reason why the file was skipped
1063    pub fn reason(&self) -> &str {
1064        self.reason
1065    }
1066}
1067
1068impl Display for SkippedFileInfo<'_> {
1069    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1070        write!(f, "Skipped file {} due to: {}", self.path, self.reason)
1071    }
1072}
1073
1074/// Writer to create [`SourceBundles`].
1075///
1076/// Writers can either [create a new file] or be created from an [existing file]. Then, use
1077/// [`add_file`] to add files and finally call [`finish`] to flush the archive to
1078/// the underlying writer.
1079///
1080/// Note that dropping the writer without calling [`finish`] will result in an incomplete bundle.
1081///
1082/// ```no_run
1083/// # use std::fs::File;
1084/// # use symbolic_debuginfo::sourcebundle::{SourceBundleWriter, SourceFileInfo};
1085/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1086/// let mut bundle = SourceBundleWriter::create("bundle.zip")?;
1087///
1088/// // Add file called "foo.txt"
1089/// let file = File::open("my_file.txt")?;
1090/// bundle.add_file("foo.txt", file, SourceFileInfo::default())?;
1091///
1092/// // Flush the bundle to disk
1093/// bundle.finish()?;
1094/// # Ok(()) }
1095/// ```
1096///
1097/// [`SourceBundles`]: struct.SourceBundle.html
1098/// [create a new file]: struct.SourceBundleWriter.html#method.create
1099/// [existing file]: struct.SourceBundleWriter.html#method.new
1100/// [`add_file`]: struct.SourceBundleWriter.html#method.add_file
1101/// [`finish`]: struct.SourceBundleWriter.html#method.finish
1102pub struct SourceBundleWriter<W>
1103where
1104    W: Seek + Write,
1105{
1106    manifest: SourceBundleManifest,
1107    writer: ZipWriter<W>,
1108    collect_il2cpp: bool,
1109    skipped_file_callback: Box<dyn FnMut(SkippedFileInfo)>,
1110}
1111
1112fn default_file_options() -> SimpleFileOptions {
1113    // TODO: should we maybe acknowledge that its the year 2023 and switch to zstd eventually?
1114    // Though it obviously needs to be supported across the whole platform,
1115    // which does not seem to be the case for Python?
1116
1117    // Depending on `zip` crate feature flags, it might default to the current time.
1118    // Using an explicit `DateTime::default` gives us a deterministic `1980-01-01T00:00:00`.
1119    SimpleFileOptions::default().last_modified_time(zip::DateTime::default())
1120}
1121
1122impl<W> SourceBundleWriter<W>
1123where
1124    W: Seek + Write,
1125{
1126    /// Creates a bundle writer on the given file.
1127    pub fn start(mut writer: W) -> Result<Self, SourceBundleError> {
1128        let header = SourceBundleHeader::default();
1129        writer
1130            .write_all(header.as_bytes())
1131            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1132
1133        Ok(SourceBundleWriter {
1134            manifest: SourceBundleManifest::new(),
1135            writer: ZipWriter::new(writer),
1136            collect_il2cpp: false,
1137            skipped_file_callback: Box::new(|_| ()),
1138        })
1139    }
1140
1141    /// Returns whether the bundle contains any files.
1142    pub fn is_empty(&self) -> bool {
1143        self.manifest.files.is_empty()
1144    }
1145
1146    /// This controls if source files should be scanned for Il2cpp-specific source annotations,
1147    /// and the referenced C# files should be bundled up as well.
1148    pub fn collect_il2cpp_sources(&mut self, collect_il2cpp: bool) {
1149        self.collect_il2cpp = collect_il2cpp;
1150    }
1151
1152    /// Sets a meta data attribute of the bundle.
1153    ///
1154    /// Attributes are flushed to the bundle when it is [finished]. Thus, they can be retrieved or
1155    /// changed at any time before flushing the writer.
1156    ///
1157    /// If the attribute was set before, the prior value is returned.
1158    ///
1159    /// [finished]: struct.SourceBundleWriter.html#method.remove_attribute
1160    pub fn set_attribute<K, V>(&mut self, key: K, value: V) -> Option<String>
1161    where
1162        K: Into<String>,
1163        V: Into<String>,
1164    {
1165        self.manifest.attributes.insert(key.into(), value.into())
1166    }
1167
1168    /// Removes a meta data attribute of the bundle.
1169    ///
1170    /// If the attribute was set, the last value is returned.
1171    pub fn remove_attribute<K>(&mut self, key: K) -> Option<String>
1172    where
1173        K: AsRef<str>,
1174    {
1175        self.manifest.attributes.remove(key.as_ref())
1176    }
1177
1178    /// Returns the value of a meta data attribute.
1179    pub fn attribute<K>(&mut self, key: K) -> Option<&str>
1180    where
1181        K: AsRef<str>,
1182    {
1183        self.manifest
1184            .attributes
1185            .get(key.as_ref())
1186            .map(String::as_str)
1187    }
1188
1189    /// Determines whether a file at the given path has been added already.
1190    pub fn has_file<S>(&self, path: S) -> bool
1191    where
1192        S: AsRef<str>,
1193    {
1194        let full_path = &self.file_path(path.as_ref());
1195        self.manifest.files.contains_key(full_path)
1196    }
1197
1198    /// Adds a file and its info to the bundle.
1199    ///
1200    /// Only files containing valid UTF-8 are accepted.
1201    ///
1202    /// Multiple files can be added at the same path. For the first duplicate, a counter will be
1203    /// appended to the file name. Any subsequent duplicate increases that counter. For example:
1204    ///
1205    /// ```no_run
1206    /// # use std::fs::File;
1207    /// # use symbolic_debuginfo::sourcebundle::{SourceBundleWriter, SourceFileInfo};
1208    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1209    /// let mut bundle = SourceBundleWriter::create("bundle.zip")?;
1210    ///
1211    /// // Add file at "foo.txt"
1212    /// bundle.add_file("foo.txt", File::open("my_duplicate.txt")?, SourceFileInfo::default())?;
1213    /// assert!(bundle.has_file("foo.txt"));
1214    ///
1215    /// // Add duplicate at "foo.txt.1"
1216    /// bundle.add_file("foo.txt", File::open("my_duplicate.txt")?, SourceFileInfo::default())?;
1217    /// assert!(bundle.has_file("foo.txt.1"));
1218    /// # Ok(()) }
1219    /// ```
1220    ///
1221    /// Returns `Ok(true)` if the file was successfully added, or `Ok(false)` if the file aready
1222    /// existed. Otherwise, an error is returned if writing the file fails.
1223    pub fn add_file<S, R>(
1224        &mut self,
1225        path: S,
1226        file: R,
1227        info: SourceFileInfo,
1228    ) -> Result<(), SourceBundleError>
1229    where
1230        S: AsRef<str>,
1231        R: Read,
1232    {
1233        let mut file_reader = Utf8Reader::new(file);
1234
1235        let full_path = self.file_path(path.as_ref());
1236        let unique_path = self.unique_path(full_path);
1237
1238        self.writer
1239            .start_file(unique_path.clone(), default_file_options())
1240            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1241
1242        match io::copy(&mut file_reader, &mut self.writer) {
1243            Err(e) => {
1244                self.writer
1245                    .abort_file()
1246                    .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1247
1248                // ErrorKind::InvalidData is returned by Utf8Reader when the file is not valid UTF-8.
1249                let error_kind = match e.kind() {
1250                    ErrorKind::InvalidData => SourceBundleErrorKind::ReadFailed,
1251                    _ => SourceBundleErrorKind::WriteFailed,
1252                };
1253
1254                Err(SourceBundleError::new(error_kind, e))
1255            }
1256            Ok(_) => {
1257                self.manifest.files.insert(unique_path, info);
1258                Ok(())
1259            }
1260        }
1261    }
1262
1263    /// Calls add_file, and handles any ReadFailed errors by calling the skipped_file_callback.
1264    fn add_file_skip_read_failed<S, R>(
1265        &mut self,
1266        path: S,
1267        file: R,
1268        info: SourceFileInfo,
1269    ) -> Result<(), SourceBundleError>
1270    where
1271        S: AsRef<str>,
1272        R: Read,
1273    {
1274        let result = self.add_file(&path, file, info);
1275
1276        if let Err(e) = &result {
1277            if e.kind == SourceBundleErrorKind::ReadFailed {
1278                let reason = e.to_string();
1279                let skipped_info = SkippedFileInfo::new(path.as_ref(), &reason);
1280                (self.skipped_file_callback)(skipped_info);
1281
1282                return Ok(());
1283            }
1284        }
1285
1286        result
1287    }
1288
1289    /// Set a callback, which is called for every file that is skipped from being included in the
1290    /// source bundle. The callback receives information about the file being skipped.
1291    pub fn with_skipped_file_callback(
1292        mut self,
1293        callback: impl FnMut(SkippedFileInfo) + 'static,
1294    ) -> Self {
1295        self.skipped_file_callback = Box::new(callback);
1296        self
1297    }
1298
1299    /// Writes a single object into the bundle.
1300    ///
1301    /// Returns `Ok(true)` if any source files were added to the bundle, or `Ok(false)` if no
1302    /// sources could be resolved. Otherwise, an error is returned if writing the bundle fails.
1303    ///
1304    /// This finishes the source bundle and flushes the underlying writer.
1305    pub fn write_object<'data, 'object, O, E>(
1306        self,
1307        object: &'object O,
1308        object_name: &str,
1309    ) -> Result<bool, SourceBundleError>
1310    where
1311        O: ObjectLike<'data, 'object, Error = E>,
1312        E: std::error::Error + Send + Sync + 'static,
1313    {
1314        self.write_object_with_filter(object, object_name, |_, _| true)
1315    }
1316
1317    /// Writes a single object into the bundle.
1318    ///
1319    /// Returns `Ok(true)` if any source files were added to the bundle, or `Ok(false)` if no
1320    /// sources could be resolved. Otherwise, an error is returned if writing the bundle fails.
1321    ///
1322    /// This finishes the source bundle and flushes the underlying writer.
1323    ///
1324    /// Before a file is written a callback is invoked which can return `false` to skip a file.
1325    pub fn write_object_with_filter<'data, 'object, O, E, F>(
1326        mut self,
1327        object: &'object O,
1328        object_name: &str,
1329        mut filter: F,
1330    ) -> Result<bool, SourceBundleError>
1331    where
1332        O: ObjectLike<'data, 'object, Error = E>,
1333        E: std::error::Error + Send + Sync + 'static,
1334        F: FnMut(&FileEntry, &Option<SourceFileDescriptor<'_>>) -> bool,
1335    {
1336        let mut files_handled = BTreeSet::new();
1337        let mut referenced_files = BTreeSet::new();
1338
1339        let session = object
1340            .debug_session()
1341            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadDebugFile, e))?;
1342
1343        self.set_attribute("arch", object.arch().to_string());
1344        self.set_attribute("debug_id", object.debug_id().to_string());
1345        self.set_attribute("object_name", object_name);
1346        if let Some(code_id) = object.code_id() {
1347            self.set_attribute("code_id", code_id.to_string());
1348        }
1349
1350        for file_result in session.files() {
1351            let file = file_result
1352                .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadDebugFile, e))?;
1353            let filename = file.abs_path_str();
1354
1355            if files_handled.contains(&filename) {
1356                continue;
1357            }
1358
1359            let source = if filename.starts_with('<') && filename.ends_with('>') {
1360                None
1361            } else {
1362                let source_from_object = session
1363                    .source_by_path(&filename)
1364                    .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadDebugFile, e))?;
1365                if filter(&file, &source_from_object) {
1366                    // Note: we could also use source code directly from the object, but that's not
1367                    // what happened here previously - only collected locally present files.
1368                    std::fs::read(&filename).ok()
1369                } else {
1370                    None
1371                }
1372            };
1373
1374            if let Some(source) = source {
1375                let bundle_path = sanitize_bundle_path(&filename);
1376                let mut info = SourceFileInfo::new();
1377                info.set_ty(SourceFileType::Source);
1378                info.set_path(filename.clone());
1379
1380                if self.collect_il2cpp {
1381                    collect_il2cpp_sources(&source, &mut referenced_files);
1382                }
1383
1384                self.add_file_skip_read_failed(bundle_path, source.as_slice(), info)?;
1385            }
1386
1387            files_handled.insert(filename);
1388        }
1389
1390        for filename in referenced_files {
1391            if files_handled.contains(&filename) {
1392                continue;
1393            }
1394
1395            if let Some(source) = File::open(&filename).ok().map(BufReader::new) {
1396                let bundle_path = sanitize_bundle_path(&filename);
1397                let mut info = SourceFileInfo::new();
1398                info.set_ty(SourceFileType::Source);
1399                info.set_path(filename.clone());
1400
1401                self.add_file_skip_read_failed(bundle_path, source, info)?
1402            }
1403        }
1404
1405        let is_empty = self.is_empty();
1406        self.finish()?;
1407
1408        Ok(!is_empty)
1409    }
1410
1411    /// Writes the manifest to the bundle and flushes the underlying file handle.
1412    pub fn finish(mut self) -> Result<(), SourceBundleError> {
1413        self.write_manifest()?;
1414        self.writer
1415            .finish()
1416            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1417        Ok(())
1418    }
1419
1420    /// Returns the full path for a file within the source bundle.
1421    fn file_path(&self, path: &str) -> String {
1422        format!("{FILES_PATH}/{path}")
1423    }
1424
1425    /// Returns a unique path for a file.
1426    ///
1427    /// Returns the path if the file does not exist already. Otherwise, a counter is appended to the
1428    /// file path (e.g. `.1`, `.2`, etc).
1429    fn unique_path(&self, mut path: String) -> String {
1430        let mut duplicates = 0;
1431
1432        while self.manifest.files.contains_key(&path) {
1433            duplicates += 1;
1434            match duplicates {
1435                1 => path.push_str(".1"),
1436                _ => {
1437                    use std::fmt::Write;
1438                    trim_end_matches(&mut path, char::is_numeric);
1439                    write!(path, ".{duplicates}").unwrap();
1440                }
1441            }
1442        }
1443
1444        path
1445    }
1446
1447    /// Flushes the manifest file to the bundle.
1448    fn write_manifest(&mut self) -> Result<(), SourceBundleError> {
1449        self.writer
1450            .start_file(MANIFEST_PATH, default_file_options())
1451            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1452
1453        serde_json::to_writer(&mut self.writer, &self.manifest)
1454            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadManifest, e))?;
1455
1456        Ok(())
1457    }
1458}
1459
1460/// Processes the `source`, looking for `il2cpp` specific reference comments.
1461///
1462/// The files referenced by those comments are added to the `referenced_files` Set.
1463fn collect_il2cpp_sources(source: &[u8], referenced_files: &mut BTreeSet<String>) {
1464    if let Ok(source) = std::str::from_utf8(source) {
1465        for line in source.lines() {
1466            let line = line.trim();
1467
1468            if let Some(source_ref) = line.strip_prefix("//<source_info:") {
1469                if let Some((file, _line)) = source_ref.rsplit_once(':') {
1470                    if !referenced_files.contains(file) {
1471                        referenced_files.insert(file.to_string());
1472                    }
1473                }
1474            }
1475        }
1476    }
1477}
1478
1479impl SourceBundleWriter<BufWriter<File>> {
1480    /// Create a bundle writer that writes its output to the given path.
1481    ///
1482    /// If the file does not exist at the given path, it is created. If the file does exist, it is
1483    /// overwritten.
1484    pub fn create<P>(path: P) -> Result<SourceBundleWriter<BufWriter<File>>, SourceBundleError>
1485    where
1486        P: AsRef<Path>,
1487    {
1488        let file = OpenOptions::new()
1489            .read(true)
1490            .write(true)
1491            .create(true)
1492            .truncate(true)
1493            .open(path)
1494            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1495
1496        Self::start(BufWriter::new(file))
1497    }
1498}
1499
1500#[cfg(test)]
1501mod tests {
1502    use super::*;
1503
1504    use std::io::Cursor;
1505
1506    use similar_asserts::assert_eq;
1507    use tempfile::NamedTempFile;
1508
1509    #[test]
1510    fn test_has_file() -> Result<(), SourceBundleError> {
1511        let writer = Cursor::new(Vec::new());
1512        let mut bundle = SourceBundleWriter::start(writer)?;
1513
1514        bundle.add_file("bar.txt", &b"filecontents"[..], SourceFileInfo::default())?;
1515        assert!(bundle.has_file("bar.txt"));
1516
1517        bundle.finish()?;
1518        Ok(())
1519    }
1520
1521    #[test]
1522    fn test_non_utf8() -> Result<(), SourceBundleError> {
1523        let writer = Cursor::new(Vec::new());
1524        let mut bundle = SourceBundleWriter::start(writer)?;
1525
1526        assert!(bundle
1527            .add_file(
1528                "bar.txt",
1529                &[0, 159, 146, 150][..],
1530                SourceFileInfo::default()
1531            )
1532            .is_err());
1533
1534        Ok(())
1535    }
1536
1537    #[test]
1538    fn test_duplicate_files() -> Result<(), SourceBundleError> {
1539        let writer = Cursor::new(Vec::new());
1540        let mut bundle = SourceBundleWriter::start(writer)?;
1541
1542        bundle.add_file("bar.txt", &b"filecontents"[..], SourceFileInfo::default())?;
1543        bundle.add_file("bar.txt", &b"othercontents"[..], SourceFileInfo::default())?;
1544        assert!(bundle.has_file("bar.txt"));
1545        assert!(bundle.has_file("bar.txt.1"));
1546
1547        bundle.finish()?;
1548        Ok(())
1549    }
1550
1551    #[test]
1552    fn debugsession_is_sendsync() {
1553        fn is_sendsync<T: Send + Sync>() {}
1554        is_sendsync::<SourceBundleDebugSession>();
1555    }
1556
1557    #[test]
1558    fn test_normalize_paths() -> Result<(), SourceBundleError> {
1559        let mut writer = Cursor::new(Vec::new());
1560        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1561
1562        for filename in &[
1563            "C:\\users\\martin\\mydebugfile.cs",
1564            "/usr/martin/mydebugfile.h",
1565        ] {
1566            let mut info = SourceFileInfo::new();
1567            info.set_ty(SourceFileType::Source);
1568            info.set_path(filename.to_string());
1569            bundle.add_file_skip_read_failed(
1570                sanitize_bundle_path(filename),
1571                &b"somerandomdata"[..],
1572                info,
1573            )?;
1574        }
1575
1576        bundle.finish()?;
1577        let bundle_bytes = writer.into_inner();
1578        let bundle = SourceBundle::parse(&bundle_bytes)?;
1579
1580        let session = bundle.debug_session().unwrap();
1581
1582        assert!(session
1583            .source_by_path("C:\\users\\martin\\mydebugfile.cs")?
1584            .is_some());
1585        assert!(session
1586            .source_by_path("C:/users/martin/mydebugfile.cs")?
1587            .is_some());
1588        assert!(session
1589            .source_by_path("C:\\users\\martin/mydebugfile.cs")?
1590            .is_some());
1591        assert!(session
1592            .source_by_path("/usr/martin/mydebugfile.h")?
1593            .is_some());
1594        assert!(session
1595            .source_by_path("\\usr\\martin\\mydebugfile.h")?
1596            .is_some());
1597
1598        Ok(())
1599    }
1600
1601    #[test]
1602    fn test_source_descriptor() -> Result<(), SourceBundleError> {
1603        let mut writer = Cursor::new(Vec::new());
1604        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1605
1606        let mut info = SourceFileInfo::default();
1607        info.set_url("https://example.com/bar.js.min".into());
1608        info.set_path("/files/bar.js.min".into());
1609        info.set_ty(SourceFileType::MinifiedSource);
1610        info.add_header(
1611            "debug-id".into(),
1612            "5e618b9f-54a9-4389-b196-519819dd7c47".into(),
1613        );
1614        info.add_header("sourcemap".into(), "bar.js.map".into());
1615        bundle.add_file("bar.js", &b"filecontents"[..], info)?;
1616        assert!(bundle.has_file("bar.js"));
1617
1618        bundle.finish()?;
1619        let bundle_bytes = writer.into_inner();
1620        let bundle = SourceBundle::parse(&bundle_bytes)?;
1621
1622        let sess = bundle.debug_session().unwrap();
1623        let f = sess
1624            .source_by_debug_id(
1625                "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(),
1626                SourceFileType::MinifiedSource,
1627            )
1628            .unwrap()
1629            .expect("should exist");
1630        assert_eq!(f.contents(), Some("filecontents"));
1631        assert_eq!(f.ty(), SourceFileType::MinifiedSource);
1632        assert_eq!(f.url(), Some("https://example.com/bar.js.min"));
1633        assert_eq!(f.path(), Some("/files/bar.js.min"));
1634        assert_eq!(f.source_mapping_url(), Some("bar.js.map"));
1635
1636        assert!(sess
1637            .source_by_debug_id(
1638                "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(),
1639                SourceFileType::Source
1640            )
1641            .unwrap()
1642            .is_none());
1643
1644        Ok(())
1645    }
1646
1647    #[test]
1648    fn test_source_mapping_url() -> Result<(), SourceBundleError> {
1649        let mut writer = Cursor::new(Vec::new());
1650        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1651
1652        let mut info = SourceFileInfo::default();
1653        info.set_url("https://example.com/bar.min.js".into());
1654        info.set_ty(SourceFileType::MinifiedSource);
1655        bundle.add_file(
1656            "bar.js",
1657            &b"filecontents\n//# sourceMappingURL=bar.js.map"[..],
1658            info,
1659        )?;
1660
1661        bundle.finish()?;
1662        let bundle_bytes = writer.into_inner();
1663        let bundle = SourceBundle::parse(&bundle_bytes)?;
1664
1665        let sess = bundle.debug_session().unwrap();
1666        let f = sess
1667            .source_by_url("https://example.com/bar.min.js")
1668            .unwrap()
1669            .expect("should exist");
1670        assert_eq!(f.ty(), SourceFileType::MinifiedSource);
1671        assert_eq!(f.url(), Some("https://example.com/bar.min.js"));
1672        assert_eq!(f.source_mapping_url(), Some("bar.js.map"));
1673
1674        Ok(())
1675    }
1676
1677    #[test]
1678    fn test_source_embedded_debug_id() -> Result<(), SourceBundleError> {
1679        let mut writer = Cursor::new(Vec::new());
1680        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1681
1682        let mut info = SourceFileInfo::default();
1683        info.set_url("https://example.com/bar.min.js".into());
1684        info.set_ty(SourceFileType::MinifiedSource);
1685        bundle.add_file(
1686            "bar.js",
1687            &b"filecontents\n//# debugId=5b65abfb23384f0bb3b964c8f734d43f"[..],
1688            info,
1689        )?;
1690
1691        bundle.finish()?;
1692        let bundle_bytes = writer.into_inner();
1693        let bundle = SourceBundle::parse(&bundle_bytes)?;
1694
1695        let sess = bundle.debug_session().unwrap();
1696        let f = sess
1697            .source_by_url("https://example.com/bar.min.js")
1698            .unwrap()
1699            .expect("should exist");
1700        assert_eq!(f.ty(), SourceFileType::MinifiedSource);
1701        assert_eq!(
1702            f.debug_id(),
1703            Some("5b65abfb-2338-4f0b-b3b9-64c8f734d43f".parse().unwrap())
1704        );
1705
1706        Ok(())
1707    }
1708
1709    #[test]
1710    fn test_sourcemap_embedded_debug_id() -> Result<(), SourceBundleError> {
1711        let mut writer = Cursor::new(Vec::new());
1712        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1713
1714        let mut info = SourceFileInfo::default();
1715        info.set_url("https://example.com/bar.js.map".into());
1716        info.set_ty(SourceFileType::SourceMap);
1717        bundle.add_file(
1718            "bar.js.map",
1719            &br#"{"debug_id": "5b65abfb-2338-4f0b-b3b9-64c8f734d43f"}"#[..],
1720            info,
1721        )?;
1722
1723        bundle.finish()?;
1724        let bundle_bytes = writer.into_inner();
1725        let bundle = SourceBundle::parse(&bundle_bytes)?;
1726
1727        let sess = bundle.debug_session().unwrap();
1728        let f = sess
1729            .source_by_url("https://example.com/bar.js.map")
1730            .unwrap()
1731            .expect("should exist");
1732        assert_eq!(f.ty(), SourceFileType::SourceMap);
1733        assert_eq!(
1734            f.debug_id(),
1735            Some("5b65abfb-2338-4f0b-b3b9-64c8f734d43f".parse().unwrap())
1736        );
1737
1738        Ok(())
1739    }
1740
1741    #[test]
1742    fn test_il2cpp_reference() -> Result<(), Box<dyn std::error::Error>> {
1743        let mut cpp_file = NamedTempFile::new()?;
1744        let mut cs_file = NamedTempFile::new()?;
1745
1746        let cpp_contents = format!("foo\n//<source_info:{}:111>\nbar", cs_file.path().display());
1747
1748        // well, a source bundle itself is an `ObjectLike` :-)
1749        let object_buf = {
1750            let mut writer = Cursor::new(Vec::new());
1751            let mut bundle = SourceBundleWriter::start(&mut writer)?;
1752
1753            let path = cpp_file.path().to_string_lossy();
1754            let mut info = SourceFileInfo::new();
1755            info.set_ty(SourceFileType::Source);
1756            info.set_path(path.to_string());
1757            bundle.add_file(path, cpp_contents.as_bytes(), info)?;
1758
1759            bundle.finish()?;
1760            writer.into_inner()
1761        };
1762        let object = SourceBundle::parse(&object_buf)?;
1763
1764        // write file contents to temp files
1765        cpp_file.write_all(cpp_contents.as_bytes())?;
1766        cs_file.write_all(b"some C# source")?;
1767
1768        // write the actual source bundle based on the `object`
1769        let mut output_buf = Cursor::new(Vec::new());
1770        let mut writer = SourceBundleWriter::start(&mut output_buf)?;
1771        writer.collect_il2cpp_sources(true);
1772
1773        let written = writer.write_object(&object, "whatever")?;
1774        assert!(written);
1775        let output_buf = output_buf.into_inner();
1776
1777        // and collect all the included files
1778        let source_bundle = SourceBundle::parse(&output_buf)?;
1779        let session = source_bundle.debug_session()?;
1780        let actual_files: BTreeMap<_, _> = session
1781            .files()
1782            .flatten()
1783            .flat_map(|f| {
1784                let path = f.abs_path_str();
1785                session
1786                    .source_by_path(&path)
1787                    .ok()
1788                    .flatten()
1789                    .map(|source| (path, source.contents().unwrap().to_string()))
1790            })
1791            .collect();
1792
1793        let mut expected_files = BTreeMap::new();
1794        expected_files.insert(cpp_file.path().to_string_lossy().into_owned(), cpp_contents);
1795        expected_files.insert(
1796            cs_file.path().to_string_lossy().into_owned(),
1797            String::from("some C# source"),
1798        );
1799
1800        assert_eq!(actual_files, expected_files);
1801
1802        Ok(())
1803    }
1804
1805    #[test]
1806    fn test_bundle_paths() {
1807        assert_eq!(sanitize_bundle_path("foo"), "foo");
1808        assert_eq!(sanitize_bundle_path("foo/bar"), "foo/bar");
1809        assert_eq!(sanitize_bundle_path("/foo/bar"), "foo/bar");
1810        assert_eq!(sanitize_bundle_path("C:/foo/bar"), "C/foo/bar");
1811        assert_eq!(sanitize_bundle_path("\\foo\\bar"), "foo/bar");
1812        assert_eq!(sanitize_bundle_path("\\\\UNC\\foo\\bar"), "UNC/foo/bar");
1813    }
1814
1815    #[test]
1816    fn test_source_links() -> Result<(), SourceBundleError> {
1817        let mut writer = Cursor::new(Vec::new());
1818        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1819
1820        let mut info = SourceFileInfo::default();
1821        info.set_url("https://example.com/bar/index.min.js".into());
1822        info.set_path("/files/bar/index.min.js".into());
1823        info.set_ty(SourceFileType::MinifiedSource);
1824        bundle.add_file("bar/index.js", &b"filecontents"[..], info)?;
1825        assert!(bundle.has_file("bar/index.js"));
1826
1827        bundle
1828            .manifest
1829            .source_links
1830            .insert("/files/bar/*".to_string(), "https://nope.com/*".into());
1831        bundle
1832            .manifest
1833            .source_links
1834            .insert("/files/foo/*".to_string(), "https://example.com/*".into());
1835
1836        bundle.finish()?;
1837        let bundle_bytes = writer.into_inner();
1838        let bundle = SourceBundle::parse(&bundle_bytes)?;
1839
1840        let sess = bundle.debug_session().unwrap();
1841
1842        // This should be resolved by source link
1843        let foo = sess
1844            .source_by_path("/files/foo/index.min.js")
1845            .unwrap()
1846            .expect("should exist");
1847        assert_eq!(foo.contents(), None);
1848        assert_eq!(foo.ty(), SourceFileType::Source);
1849        assert_eq!(foo.url(), Some("https://example.com/index.min.js"));
1850        assert_eq!(foo.path(), None);
1851
1852        // This should be resolved by embedded file, even though the link also exists
1853        let bar = sess
1854            .source_by_path("/files/bar/index.min.js")
1855            .unwrap()
1856            .expect("should exist");
1857        assert_eq!(bar.contents(), Some("filecontents"));
1858        assert_eq!(bar.ty(), SourceFileType::MinifiedSource);
1859        assert_eq!(bar.url(), Some("https://example.com/bar/index.min.js"));
1860        assert_eq!(bar.path(), Some("/files/bar/index.min.js"));
1861
1862        Ok(())
1863    }
1864}