symbolic_debuginfo/sourcebundle/
mod.rs

1//! Support for Source Bundles, a proprietary archive containing source code.
2//!
3//! This module defines the [`SourceBundle`] type. Since not all object file containers specify a
4//! standardized way to inline sources into debug information, this can be used to associate source
5//! contents to debug files.
6//!
7//! Source bundles are ZIP archives with a well-defined internal structure. Most importantly, they
8//! contain source files in a nested directory structure. Additionally, there is meta data
9//! associated to every source file, which allows to store additional properties, such as the
10//! original file system path, a web URL, and custom headers.
11//!
12//! The internal structure is as follows:
13//!
14//! ```txt
15//! manifest.json
16//! files/
17//!   file1.txt
18//!   subfolder/
19//!     file2.txt
20//! ```
21//!
22//! `SourceBundle` implements the [`ObjectLike`] trait. When created from another object, it carries
23//! over its meta data, such as the [`debug_id`] or [`code_id`]. However, source bundles never store
24//! symbols or debug information. To obtain sources or iterate files stored in this source bundle,
25//! use [`SourceBundle::debug_session`].
26//!
27//! Source bundles can be created manually or by converting any `ObjectLike` using
28//! [`SourceBundleWriter`].
29//!
30//! [`ObjectLike`]: ../trait.ObjectLike.html
31//! [`SourceBundle`]: struct.SourceBundle.html
32//! [`debug_id`]: struct.SourceBundle.html#method.debug_id
33//! [`code_id`]: struct.SourceBundle.html#method.code_id
34//! [`SourceBundle::debug_session`]: struct.SourceBundle.html#method.debug_session
35//! [`SourceBundleWriter`]: struct.SourceBundleWriter.html
36//!
37//! ## Artifact Bundles
38//!
39//! Source bundles share the format with a related concept, called an "artifact bundle".  Artifact
40//! bundles are essentially source bundles but they typically contain sources referred to by
41//! JavaScript source maps and source maps themselves.  For instance in an artifact
42//! bundle a file entry has a `url` and might carry `headers` or individual debug IDs
43//! per source file.
44
45mod utf8_reader;
46
47use std::borrow::Cow;
48use std::collections::{BTreeMap, BTreeSet, HashMap};
49use std::error::Error;
50use std::fmt::{Display, Formatter};
51use std::fs::{File, OpenOptions};
52use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek, Write};
53use std::path::Path;
54use std::sync::Arc;
55use std::{fmt, io};
56
57use parking_lot::Mutex;
58use regex::Regex;
59use serde::{Deserialize, Deserializer, Serialize};
60use thiserror::Error;
61use zip::{write::SimpleFileOptions, ZipWriter};
62
63use symbolic_common::{Arch, AsSelf, CodeId, DebugId, SourceLinkMappings};
64
65use self::utf8_reader::Utf8Reader;
66use crate::base::*;
67use crate::js::{
68    discover_debug_id, discover_sourcemap_embedded_debug_id, discover_sourcemaps_location,
69};
70
71/// Magic bytes of a source bundle. They are prepended to the ZIP file.
72static BUNDLE_MAGIC: [u8; 4] = *b"SYSB";
73
74/// Version of the bundle and manifest format.
75static BUNDLE_VERSION: u32 = 2;
76
77/// Relative path to the manifest file in the bundle file.
78static MANIFEST_PATH: &str = "manifest.json";
79
80/// Path at which files will be written into the bundle.
81static FILES_PATH: &str = "files";
82
83lazy_static::lazy_static! {
84    static ref SANE_PATH_RE: Regex = Regex::new(r":?[/\\]+").unwrap();
85}
86
87/// The error type for [`SourceBundleError`].
88#[non_exhaustive]
89#[derive(Clone, Copy, Debug, PartialEq, Eq)]
90pub enum SourceBundleErrorKind {
91    /// The source bundle container is damaged.
92    BadZip,
93
94    /// An error when reading/writing the manifest.
95    BadManifest,
96
97    /// The `Object` contains invalid data and cannot be converted.
98    BadDebugFile,
99
100    /// Generic error when writing a source bundle, most likely IO.
101    WriteFailed,
102
103    /// The file is not valid UTF-8 or could not be read for another reason.
104    ReadFailed,
105}
106
107impl fmt::Display for SourceBundleErrorKind {
108    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
109        match self {
110            Self::BadZip => write!(f, "malformed zip archive"),
111            Self::BadManifest => write!(f, "failed to read/write source bundle manifest"),
112            Self::BadDebugFile => write!(f, "malformed debug info file"),
113            Self::WriteFailed => write!(f, "failed to write source bundle"),
114            Self::ReadFailed => write!(f, "file could not be read as UTF-8"),
115        }
116    }
117}
118
119/// An error returned when handling [`SourceBundle`](struct.SourceBundle.html).
120#[derive(Debug, Error)]
121#[error("{kind}")]
122pub struct SourceBundleError {
123    kind: SourceBundleErrorKind,
124    #[source]
125    source: Option<Box<dyn Error + Send + Sync + 'static>>,
126}
127
128impl SourceBundleError {
129    /// Creates a new SourceBundle error from a known kind of error as well as an arbitrary error
130    /// payload.
131    ///
132    /// This function is used to generically create source bundle errors which do not originate from
133    /// `symbolic` itself. The `source` argument is an arbitrary payload which will be contained in
134    /// this [`SourceBundleError`].
135    pub fn new<E>(kind: SourceBundleErrorKind, source: E) -> Self
136    where
137        E: Into<Box<dyn Error + Send + Sync>>,
138    {
139        let source = Some(source.into());
140        Self { kind, source }
141    }
142
143    /// Returns the corresponding [`SourceBundleErrorKind`] for this error.
144    pub fn kind(&self) -> SourceBundleErrorKind {
145        self.kind
146    }
147}
148
149impl From<SourceBundleErrorKind> for SourceBundleError {
150    fn from(kind: SourceBundleErrorKind) -> Self {
151        Self { kind, source: None }
152    }
153}
154
155/// Trims matching suffices of a string in-place.
156fn trim_end_matches<F>(string: &mut String, pat: F)
157where
158    F: FnMut(char) -> bool,
159{
160    let cutoff = string.trim_end_matches(pat).len();
161    string.truncate(cutoff);
162}
163
164/// The type of a [`SourceFileInfo`](struct.SourceFileInfo.html).
165#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
166#[serde(rename_all = "snake_case")]
167pub enum SourceFileType {
168    /// Regular source file.
169    Source,
170
171    /// Minified source code.
172    MinifiedSource,
173
174    /// JavaScript sourcemap.
175    SourceMap,
176
177    /// Indexed JavaScript RAM bundle.
178    IndexedRamBundle,
179}
180
181/// Meta data information of a file in a [`SourceBundle`](struct.SourceBundle.html).
182#[derive(Clone, Debug, Default, Serialize, Deserialize)]
183pub struct SourceFileInfo {
184    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
185    ty: Option<SourceFileType>,
186
187    #[serde(default, skip_serializing_if = "String::is_empty")]
188    path: String,
189
190    #[serde(default, skip_serializing_if = "String::is_empty")]
191    url: String,
192
193    #[serde(
194        default,
195        skip_serializing_if = "BTreeMap::is_empty",
196        deserialize_with = "deserialize_headers"
197    )]
198    headers: BTreeMap<String, String>,
199}
200
201/// Helper to ensure that header keys are normalized to lowercase
202fn deserialize_headers<'de, D>(deserializer: D) -> Result<BTreeMap<String, String>, D::Error>
203where
204    D: Deserializer<'de>,
205{
206    let rv: BTreeMap<String, String> = Deserialize::deserialize(deserializer)?;
207    if rv.is_empty()
208        || rv
209            .keys()
210            .all(|x| !x.chars().any(|c| c.is_ascii_uppercase()))
211    {
212        Ok(rv)
213    } else {
214        Ok(rv
215            .into_iter()
216            .map(|(k, v)| (k.to_ascii_lowercase(), v))
217            .collect())
218    }
219}
220
221impl SourceFileInfo {
222    /// Creates default file information.
223    pub fn new() -> Self {
224        Self::default()
225    }
226
227    /// Returns the type of the source file.
228    pub fn ty(&self) -> Option<SourceFileType> {
229        self.ty
230    }
231
232    /// Sets the type of the source file.
233    pub fn set_ty(&mut self, ty: SourceFileType) {
234        self.ty = Some(ty);
235    }
236
237    /// Returns the absolute file system path of this file.
238    pub fn path(&self) -> Option<&str> {
239        match self.path.as_str() {
240            "" => None,
241            path => Some(path),
242        }
243    }
244
245    /// Sets the absolute file system path of this file.
246    pub fn set_path(&mut self, path: String) {
247        self.path = path;
248    }
249
250    /// Returns the web URL that of this file.
251    pub fn url(&self) -> Option<&str> {
252        match self.url.as_str() {
253            "" => None,
254            url => Some(url),
255        }
256    }
257
258    /// Sets the web URL of this file.
259    pub fn set_url(&mut self, url: String) {
260        self.url = url;
261    }
262
263    /// Iterates over all attributes represented as headers.
264    pub fn headers(&self) -> impl Iterator<Item = (&str, &str)> {
265        self.headers.iter().map(|(k, v)| (k.as_str(), v.as_str()))
266    }
267
268    /// Retrieves the specified header, if it exists.
269    pub fn header(&self, header: &str) -> Option<&str> {
270        if !header.chars().any(|x| x.is_ascii_uppercase()) {
271            self.headers.get(header).map(String::as_str)
272        } else {
273            self.headers.iter().find_map(|(k, v)| {
274                if k.eq_ignore_ascii_case(header) {
275                    Some(v.as_str())
276                } else {
277                    None
278                }
279            })
280        }
281    }
282
283    /// Adds a custom attribute following header conventions.
284    ///
285    /// Header keys are converted to lowercase before writing as this is
286    /// the canonical format for headers. However, the file format does
287    /// support headers to be case insensitive and they will be lower cased
288    /// upon reading.
289    ///
290    /// Headers on files are primarily be used to add auxiliary information
291    /// to files.  The following headers are known and processed:
292    ///
293    /// - `debug-id`: see [`debug_id`](Self::debug_id)
294    /// - `sourcemap` (and `x-sourcemap`): see [`source_mapping_url`](Self::source_mapping_url)
295    pub fn add_header(&mut self, header: String, value: String) {
296        let mut header = header;
297        if header.chars().any(|x| x.is_ascii_uppercase()) {
298            header = header.to_ascii_lowercase();
299        }
300        self.headers.insert(header, value);
301    }
302
303    /// The debug ID of this minified source or sourcemap if it has any.
304    ///
305    /// Files have a debug ID if they have a header with the key `debug-id`.
306    /// At present debug IDs in source bundles are only ever given to minified
307    /// source files.
308    pub fn debug_id(&self) -> Option<DebugId> {
309        self.header("debug-id").and_then(|x| x.parse().ok())
310    }
311
312    /// The source mapping URL of the given minified source.
313    ///
314    /// Files have a source mapping URL if they have a header with the
315    /// key `sourcemap` (or the `x-sourcemap` legacy header) as part the
316    /// source map specification.
317    pub fn source_mapping_url(&self) -> Option<&str> {
318        self.header("sourcemap")
319            .or_else(|| self.header("x-sourcemap"))
320    }
321
322    /// Returns `true` if this instance does not carry any information.
323    pub fn is_empty(&self) -> bool {
324        self.path.is_empty() && self.ty.is_none() && self.headers.is_empty()
325    }
326}
327
328/// A descriptor that provides information about a source file.
329///
330/// This descriptor is returned from [`source_by_path`](DebugSession::source_by_path)
331/// and friends.
332///
333/// This descriptor holds information that can be used to retrieve information
334/// about the source file.  A descriptor has to have at least one of the following
335/// to be valid:
336///
337/// - [`contents`](Self::contents)
338/// - [`url`](Self::url)
339/// - [`debug_id`](Self::debug_id)
340///
341/// Debug sessions are not permitted to return invalid source file descriptors.
342pub struct SourceFileDescriptor<'a> {
343    contents: Option<Cow<'a, str>>,
344    remote_url: Option<Cow<'a, str>>,
345    file_info: Option<&'a SourceFileInfo>,
346}
347
348impl<'a> SourceFileDescriptor<'a> {
349    /// Creates an embedded source file descriptor.
350    pub(crate) fn new_embedded(
351        content: Cow<'a, str>,
352        file_info: Option<&'a SourceFileInfo>,
353    ) -> SourceFileDescriptor<'a> {
354        SourceFileDescriptor {
355            contents: Some(content),
356            remote_url: None,
357            file_info,
358        }
359    }
360
361    /// Creates an remote source file descriptor.
362    pub(crate) fn new_remote(remote_url: Cow<'a, str>) -> SourceFileDescriptor<'a> {
363        SourceFileDescriptor {
364            contents: None,
365            remote_url: Some(remote_url),
366            file_info: None,
367        }
368    }
369
370    /// The type of the file the descriptor points to.
371    pub fn ty(&self) -> SourceFileType {
372        self.file_info
373            .and_then(|x| x.ty())
374            .unwrap_or(SourceFileType::Source)
375    }
376
377    /// The contents of the source file as string, if it's available.
378    ///
379    /// Portable PDBs for instance will often have source information, but rely on
380    /// remote file fetching via Sourcelink to get to the contents.  In that case
381    /// a file descriptor is created, but the contents are missing and instead the
382    /// [`url`](Self::url) can be used.
383    pub fn contents(&self) -> Option<&str> {
384        self.contents.as_deref()
385    }
386
387    /// The contents of the source file as string, if it's available.
388    ///
389    /// This unwraps the [`SourceFileDescriptor`] directly and might avoid a copy of `contents`
390    /// later on.
391    pub fn into_contents(self) -> Option<Cow<'a, str>> {
392        self.contents
393    }
394
395    /// If available returns the URL of this source.
396    ///
397    /// For certain files this is the canoncial URL of where the file is placed.  This
398    /// for instance is the case for minified JavaScript files or source maps which might
399    /// have a canonical URL.  In case of portable PDBs this is also where you would fetch
400    /// the source code from if source links are used.
401    pub fn url(&self) -> Option<&str> {
402        if let Some(ref url) = self.remote_url {
403            Some(url)
404        } else {
405            self.file_info.and_then(|x| x.url())
406        }
407    }
408
409    /// If available returns the file path of this source.
410    ///
411    /// For source bundles that are a companion file to a debug file, this is the canonical
412    /// path of the source file.
413    pub fn path(&self) -> Option<&str> {
414        self.file_info.and_then(|x| x.path())
415    }
416
417    /// The debug ID of the file if available.
418    ///
419    /// For source maps or minified source files symbolic supports embedded debug IDs.  If they
420    /// are in use, the debug ID is returned from here.  The debug ID is discovered from the
421    /// file's `debug-id` header or the embedded `debugId` reference in the file body.
422    pub fn debug_id(&self) -> Option<DebugId> {
423        self.file_info.and_then(|x| x.debug_id()).or_else(|| {
424            if matches!(
425                self.ty(),
426                SourceFileType::Source | SourceFileType::MinifiedSource
427            ) {
428                self.contents().and_then(discover_debug_id)
429            } else if matches!(self.ty(), SourceFileType::SourceMap) {
430                self.contents()
431                    .and_then(discover_sourcemap_embedded_debug_id)
432            } else {
433                None
434            }
435        })
436    }
437
438    /// The source mapping URL reference of the file.
439    ///
440    /// This is used to refer to a source map from a minified file.  Only minified source files
441    /// will have a relationship to a source map.  The source mapping is discovered either from
442    /// a `sourcemap` header in the source manifest, or the `sourceMappingURL` reference in the body.
443    pub fn source_mapping_url(&self) -> Option<&str> {
444        self.file_info
445            .and_then(|x| x.source_mapping_url())
446            .or_else(|| {
447                if matches!(
448                    self.ty(),
449                    SourceFileType::Source | SourceFileType::MinifiedSource
450                ) {
451                    self.contents().and_then(discover_sourcemaps_location)
452                } else {
453                    None
454                }
455            })
456    }
457}
458
459/// Version number of a [`SourceBundle`](struct.SourceBundle.html).
460#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
461pub struct SourceBundleVersion(pub u32);
462
463impl SourceBundleVersion {
464    /// Creates a new source bundle version.
465    pub fn new(version: u32) -> Self {
466        Self(version)
467    }
468
469    /// Determines whether this version can be handled.
470    ///
471    /// This will return `false`, if the version is newer than what is supported by this library
472    /// version.
473    pub fn is_valid(self) -> bool {
474        self.0 <= BUNDLE_VERSION
475    }
476
477    /// Returns whether the given bundle is at the latest supported versino.
478    pub fn is_latest(self) -> bool {
479        self.0 == BUNDLE_VERSION
480    }
481}
482
483impl Default for SourceBundleVersion {
484    fn default() -> Self {
485        Self(BUNDLE_VERSION)
486    }
487}
488
489/// Binary header of the source bundle archive.
490///
491/// This header precedes the ZIP archive. It is used to detect these files on the file system.
492#[repr(C, packed)]
493#[derive(Clone, Copy, Debug)]
494struct SourceBundleHeader {
495    /// Magic bytes header.
496    pub magic: [u8; 4],
497
498    /// Version of the bundle.
499    pub version: u32,
500}
501
502impl SourceBundleHeader {
503    fn as_bytes(&self) -> &[u8] {
504        let ptr = self as *const Self as *const u8;
505        unsafe { std::slice::from_raw_parts(ptr, std::mem::size_of::<Self>()) }
506    }
507}
508
509impl Default for SourceBundleHeader {
510    fn default() -> Self {
511        SourceBundleHeader {
512            magic: BUNDLE_MAGIC,
513            version: BUNDLE_VERSION,
514        }
515    }
516}
517
518/// Manifest of a [`SourceBundle`] containing information on its contents.
519///
520/// [`SourceBundle`]: struct.SourceBundle.html
521#[derive(Clone, Debug, Default, Serialize, Deserialize)]
522struct SourceBundleManifest {
523    /// Descriptors for all files in this bundle.
524    #[serde(default)]
525    pub files: BTreeMap<String, SourceFileInfo>,
526
527    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
528    pub source_links: BTreeMap<String, String>,
529
530    /// Arbitrary attributes to include in the bundle.
531    #[serde(flatten)]
532    pub attributes: BTreeMap<String, String>,
533}
534
535struct SourceBundleIndex<'data> {
536    manifest: SourceBundleManifest,
537    indexed_files: HashMap<FileKey<'data>, Arc<String>>,
538}
539
540impl<'data> SourceBundleIndex<'data> {
541    pub fn parse(
542        archive: &mut zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>,
543    ) -> Result<Self, SourceBundleError> {
544        let manifest_file = archive
545            .by_name("manifest.json")
546            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
547        let manifest: SourceBundleManifest = serde_json::from_reader(manifest_file)
548            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadManifest, e))?;
549
550        let files = &manifest.files;
551        let mut indexed_files = HashMap::with_capacity(files.len());
552
553        for (zip_path, file_info) in files {
554            let zip_path = Arc::new(zip_path.clone());
555            if !file_info.path.is_empty() {
556                indexed_files.insert(
557                    FileKey::Path(normalize_path(&file_info.path).into()),
558                    zip_path.clone(),
559                );
560            }
561            if !file_info.url.is_empty() {
562                indexed_files.insert(FileKey::Url(file_info.url.clone().into()), zip_path.clone());
563            }
564            if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) {
565                indexed_files.insert(FileKey::DebugId(debug_id, ty), zip_path.clone());
566            }
567        }
568
569        Ok(Self {
570            manifest,
571            indexed_files,
572        })
573    }
574}
575
576/// A bundle of source code files.
577///
578/// To create a source bundle, see [`SourceBundleWriter`]. For more information, see the [module
579/// level documentation].
580///
581/// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
582/// [module level documentation]: index.html
583pub struct SourceBundle<'data> {
584    data: &'data [u8],
585    archive: zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>,
586    index: Arc<SourceBundleIndex<'data>>,
587}
588
589impl fmt::Debug for SourceBundle<'_> {
590    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
591        f.debug_struct("SourceBundle")
592            .field("code_id", &self.code_id())
593            .field("debug_id", &self.debug_id())
594            .field("arch", &self.arch())
595            .field("kind", &self.kind())
596            .field("load_address", &format_args!("{:#x}", self.load_address()))
597            .field("has_symbols", &self.has_symbols())
598            .field("has_debug_info", &self.has_debug_info())
599            .field("has_unwind_info", &self.has_unwind_info())
600            .field("has_sources", &self.has_sources())
601            .field("is_malformed", &self.is_malformed())
602            .finish()
603    }
604}
605
606impl<'data> SourceBundle<'data> {
607    /// Tests whether the buffer could contain a `SourceBundle`.
608    pub fn test(bytes: &[u8]) -> bool {
609        bytes.starts_with(&BUNDLE_MAGIC)
610    }
611
612    /// Tries to parse a `SourceBundle` from the given slice.
613    pub fn parse(data: &'data [u8]) -> Result<SourceBundle<'data>, SourceBundleError> {
614        let mut archive = zip::read::ZipArchive::new(std::io::Cursor::new(data))
615            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
616
617        let index = Arc::new(SourceBundleIndex::parse(&mut archive)?);
618
619        Ok(SourceBundle {
620            archive,
621            data,
622            index,
623        })
624    }
625
626    /// Returns the version of this source bundle format.
627    pub fn version(&self) -> SourceBundleVersion {
628        SourceBundleVersion(BUNDLE_VERSION)
629    }
630
631    /// The container file format, which is always `FileFormat::SourceBundle`.
632    pub fn file_format(&self) -> FileFormat {
633        FileFormat::SourceBundle
634    }
635
636    /// The code identifier of this object.
637    ///
638    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
639    /// in the [`SourceBundleWriter`] by setting the `"code_id"` attribute.
640    ///
641    /// [`ObjectLike`]: ../trait.ObjectLike.html
642    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
643    pub fn code_id(&self) -> Option<CodeId> {
644        self.index
645            .manifest
646            .attributes
647            .get("code_id")
648            .and_then(|x| x.parse().ok())
649    }
650
651    /// The code identifier of this object.
652    ///
653    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
654    /// in the [`SourceBundleWriter`] by setting the `"debug_id"` attribute.
655    ///
656    /// [`ObjectLike`]: ../trait.ObjectLike.html
657    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
658    pub fn debug_id(&self) -> DebugId {
659        self.index
660            .manifest
661            .attributes
662            .get("debug_id")
663            .and_then(|x| x.parse().ok())
664            .unwrap_or_default()
665    }
666
667    /// The debug file name of this object.
668    ///
669    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
670    /// in the [`SourceBundleWriter`] by setting the `"object_name"` attribute.
671    ///
672    /// [`ObjectLike`]: ../trait.ObjectLike.html
673    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
674    pub fn name(&self) -> Option<&str> {
675        self.index
676            .manifest
677            .attributes
678            .get("object_name")
679            .map(|x| x.as_str())
680    }
681
682    /// The CPU architecture of this object.
683    ///
684    /// This is only set if the source bundle was created from an [`ObjectLike`]. It can also be set
685    /// in the [`SourceBundleWriter`] by setting the `"arch"` attribute.
686    ///
687    /// [`ObjectLike`]: ../trait.ObjectLike.html
688    /// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
689    pub fn arch(&self) -> Arch {
690        self.index
691            .manifest
692            .attributes
693            .get("arch")
694            .and_then(|s| s.parse().ok())
695            .unwrap_or_default()
696    }
697
698    /// The kind of this object.
699    ///
700    /// Because source bundles do not contain real objects this is always `ObjectKind::None`.
701    fn kind(&self) -> ObjectKind {
702        ObjectKind::Sources
703    }
704
705    /// The address at which the image prefers to be loaded into memory.
706    ///
707    /// Because source bundles do not contain this information is always `0`.
708    pub fn load_address(&self) -> u64 {
709        0
710    }
711
712    /// Determines whether this object exposes a public symbol table.
713    ///
714    /// Source bundles never have symbols.
715    pub fn has_symbols(&self) -> bool {
716        false
717    }
718
719    /// Returns an iterator over symbols in the public symbol table.
720    pub fn symbols(&self) -> SourceBundleSymbolIterator<'data> {
721        std::iter::empty()
722    }
723
724    /// Returns an ordered map of symbols in the symbol table.
725    pub fn symbol_map(&self) -> SymbolMap<'data> {
726        self.symbols().collect()
727    }
728
729    /// Determines whether this object contains debug information.
730    ///
731    /// Source bundles never have debug info.
732    pub fn has_debug_info(&self) -> bool {
733        false
734    }
735
736    /// Constructs a debugging session.
737    ///
738    /// A debugging session loads certain information from the object file and creates caches for
739    /// efficient access to various records in the debug information. Since this can be quite a
740    /// costly process, try to reuse the debugging session as long as possible.
741    pub fn debug_session(&self) -> Result<SourceBundleDebugSession<'data>, SourceBundleError> {
742        // NOTE: The `SourceBundleDebugSession` still needs interior mutability, so it still needs
743        // to carry its own Mutex. However that is still preferable to sharing the Mutex of the
744        // `SourceBundle`, which might be shared by multiple threads.
745        // The only thing here that really needs to be `mut` is the `Cursor` / `Seek` position.
746        let archive = Mutex::new(self.archive.clone());
747        let source_links = SourceLinkMappings::new(
748            self.index
749                .manifest
750                .source_links
751                .iter()
752                .map(|(k, v)| (&k[..], &v[..])),
753        );
754        Ok(SourceBundleDebugSession {
755            index: Arc::clone(&self.index),
756            archive,
757            source_links,
758        })
759    }
760
761    /// Determines whether this object contains stack unwinding information.
762    pub fn has_unwind_info(&self) -> bool {
763        false
764    }
765
766    /// Determines whether this object contains embedded source.
767    pub fn has_sources(&self) -> bool {
768        true
769    }
770
771    /// Determines whether this object is malformed and was only partially parsed
772    pub fn is_malformed(&self) -> bool {
773        false
774    }
775
776    /// Returns the raw data of the source bundle.
777    pub fn data(&self) -> &'data [u8] {
778        self.data
779    }
780
781    /// Returns true if this source bundle contains no source code.
782    pub fn is_empty(&self) -> bool {
783        self.index.manifest.files.is_empty()
784    }
785}
786
787impl<'slf, 'data: 'slf> AsSelf<'slf> for SourceBundle<'data> {
788    type Ref = SourceBundle<'slf>;
789
790    fn as_self(&'slf self) -> &'slf Self::Ref {
791        unsafe { std::mem::transmute(self) }
792    }
793}
794
795impl<'data> Parse<'data> for SourceBundle<'data> {
796    type Error = SourceBundleError;
797
798    fn parse(data: &'data [u8]) -> Result<Self, Self::Error> {
799        SourceBundle::parse(data)
800    }
801
802    fn test(data: &'data [u8]) -> bool {
803        SourceBundle::test(data)
804    }
805}
806
807impl<'data: 'object, 'object> ObjectLike<'data, 'object> for SourceBundle<'data> {
808    type Error = SourceBundleError;
809    type Session = SourceBundleDebugSession<'data>;
810    type SymbolIterator = SourceBundleSymbolIterator<'data>;
811
812    fn file_format(&self) -> FileFormat {
813        self.file_format()
814    }
815
816    fn code_id(&self) -> Option<CodeId> {
817        self.code_id()
818    }
819
820    fn debug_id(&self) -> DebugId {
821        self.debug_id()
822    }
823
824    fn arch(&self) -> Arch {
825        self.arch()
826    }
827
828    fn kind(&self) -> ObjectKind {
829        self.kind()
830    }
831
832    fn load_address(&self) -> u64 {
833        self.load_address()
834    }
835
836    fn has_symbols(&self) -> bool {
837        self.has_symbols()
838    }
839
840    fn symbol_map(&self) -> SymbolMap<'data> {
841        self.symbol_map()
842    }
843
844    fn symbols(&self) -> Self::SymbolIterator {
845        self.symbols()
846    }
847
848    fn has_debug_info(&self) -> bool {
849        self.has_debug_info()
850    }
851
852    fn debug_session(&self) -> Result<Self::Session, Self::Error> {
853        self.debug_session()
854    }
855
856    fn has_unwind_info(&self) -> bool {
857        self.has_unwind_info()
858    }
859
860    fn has_sources(&self) -> bool {
861        self.has_sources()
862    }
863
864    fn is_malformed(&self) -> bool {
865        self.is_malformed()
866    }
867}
868
869/// An iterator yielding symbols from a source bundle.
870pub type SourceBundleSymbolIterator<'data> = std::iter::Empty<Symbol<'data>>;
871
872#[derive(Debug, Hash, PartialEq, Eq)]
873enum FileKey<'a> {
874    Path(Cow<'a, str>),
875    Url(Cow<'a, str>),
876    DebugId(DebugId, SourceFileType),
877}
878
879/// Debug session for SourceBundle objects.
880pub struct SourceBundleDebugSession<'data> {
881    archive: Mutex<zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>>,
882    index: Arc<SourceBundleIndex<'data>>,
883    source_links: SourceLinkMappings,
884}
885
886impl SourceBundleDebugSession<'_> {
887    /// Returns an iterator over all source files in this debug file.
888    pub fn files(&self) -> SourceBundleFileIterator<'_> {
889        SourceBundleFileIterator {
890            files: self.index.manifest.files.values(),
891        }
892    }
893
894    /// Returns an iterator over all functions in this debug file.
895    pub fn functions(&self) -> SourceBundleFunctionIterator<'_> {
896        std::iter::empty()
897    }
898
899    /// Get source by the path of a file in the bundle.
900    fn source_by_zip_path(&self, zip_path: &str) -> Result<String, SourceBundleError> {
901        let mut archive = self.archive.lock();
902        let mut file = archive
903            .by_name(zip_path)
904            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
905        let mut source_content = String::new();
906
907        file.read_to_string(&mut source_content)
908            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
909        Ok(source_content)
910    }
911
912    /// Looks up a source file descriptor.
913    ///
914    /// The file is looked up in both the embedded files and
915    /// in the included source link mappings, in that order.
916    fn get_source_file_descriptor(
917        &self,
918        key: FileKey,
919    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
920        if let Some(zip_path) = self.index.indexed_files.get(&key) {
921            let zip_path = zip_path.as_str();
922            let content = Cow::Owned(self.source_by_zip_path(zip_path)?);
923            let info = self.index.manifest.files.get(zip_path);
924            let descriptor = SourceFileDescriptor::new_embedded(content, info);
925            return Ok(Some(descriptor));
926        }
927
928        let FileKey::Path(path) = key else {
929            return Ok(None);
930        };
931
932        Ok(self
933            .source_links
934            .resolve(&path)
935            .map(|s| SourceFileDescriptor::new_remote(s.into())))
936    }
937
938    /// See [DebugSession::source_by_path] for more information.
939    pub fn source_by_path(
940        &self,
941        path: &str,
942    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
943        self.get_source_file_descriptor(FileKey::Path(normalize_path(path).into()))
944    }
945
946    /// Like [`source_by_path`](Self::source_by_path) but looks up by URL.
947    pub fn source_by_url(
948        &self,
949        url: &str,
950    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
951        self.get_source_file_descriptor(FileKey::Url(url.into()))
952    }
953
954    /// Looks up some source by debug ID and file type.
955    ///
956    /// Lookups by [`DebugId`] require knowledge of the file that is supposed to be
957    /// looked up as multiple files (one per type) can share the same debug ID.
958    /// Special care needs to be taken about [`SourceFileType::IndexedRamBundle`]
959    /// and [`SourceFileType::SourceMap`] which are different file types despite
960    /// the name of it.
961    ///
962    /// # Note on Abstractions
963    ///
964    /// This method is currently not exposed via a standardized debug session
965    /// as it's primarily used for the JavaScript processing system which uses
966    /// different abstractions.
967    pub fn source_by_debug_id(
968        &self,
969        debug_id: DebugId,
970        ty: SourceFileType,
971    ) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
972        self.get_source_file_descriptor(FileKey::DebugId(debug_id, ty))
973    }
974}
975
976impl<'session> DebugSession<'session> for SourceBundleDebugSession<'_> {
977    type Error = SourceBundleError;
978    type FunctionIterator = SourceBundleFunctionIterator<'session>;
979    type FileIterator = SourceBundleFileIterator<'session>;
980
981    fn functions(&'session self) -> Self::FunctionIterator {
982        self.functions()
983    }
984
985    fn files(&'session self) -> Self::FileIterator {
986        self.files()
987    }
988
989    fn source_by_path(&self, path: &str) -> Result<Option<SourceFileDescriptor<'_>>, Self::Error> {
990        self.source_by_path(path)
991    }
992}
993
994impl<'slf, 'data: 'slf> AsSelf<'slf> for SourceBundleDebugSession<'data> {
995    type Ref = SourceBundleDebugSession<'slf>;
996
997    fn as_self(&'slf self) -> &'slf Self::Ref {
998        unsafe { std::mem::transmute(self) }
999    }
1000}
1001
1002/// An iterator over source files in a SourceBundle object.
1003pub struct SourceBundleFileIterator<'s> {
1004    files: std::collections::btree_map::Values<'s, String, SourceFileInfo>,
1005}
1006
1007impl<'s> Iterator for SourceBundleFileIterator<'s> {
1008    type Item = Result<FileEntry<'s>, SourceBundleError>;
1009
1010    fn next(&mut self) -> Option<Self::Item> {
1011        let source_file = self.files.next()?;
1012        Some(Ok(FileEntry::new(
1013            Cow::default(),
1014            FileInfo::from_path(source_file.path.as_bytes()),
1015        )))
1016    }
1017}
1018
1019/// An iterator over functions in a SourceBundle object.
1020pub type SourceBundleFunctionIterator<'s> =
1021    std::iter::Empty<Result<Function<'s>, SourceBundleError>>;
1022
1023impl SourceBundleManifest {
1024    /// Creates a new, empty manifest.
1025    pub fn new() -> Self {
1026        Self::default()
1027    }
1028}
1029
1030/// Generates a normalized path for a file in the bundle.
1031///
1032/// This removes all special characters. The path in the bundle will mostly resemble the original
1033/// path, except for unsupported components.
1034fn sanitize_bundle_path(path: &str) -> String {
1035    let mut sanitized = SANE_PATH_RE.replace_all(path, "/").into_owned();
1036    if sanitized.starts_with('/') {
1037        sanitized.remove(0);
1038    }
1039    sanitized
1040}
1041
1042/// Normalizes all paths to follow the Linux standard of using forward slashes.
1043fn normalize_path(path: &str) -> String {
1044    path.replace('\\', "/")
1045}
1046
1047/// Contains information about a file skipped in the SourceBundleWriter
1048#[derive(Debug)]
1049pub struct SkippedFileInfo<'a> {
1050    path: &'a str,
1051    reason: &'a str,
1052}
1053
1054impl<'a> SkippedFileInfo<'a> {
1055    fn new(path: &'a str, reason: &'a str) -> Self {
1056        Self { path, reason }
1057    }
1058
1059    /// Returns the path of the skipped file.
1060    pub fn path(&self) -> &str {
1061        self.path
1062    }
1063
1064    /// Get the human-readable reason why the file was skipped
1065    pub fn reason(&self) -> &str {
1066        self.reason
1067    }
1068}
1069
1070impl Display for SkippedFileInfo<'_> {
1071    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1072        write!(f, "Skipped file {} due to: {}", self.path, self.reason)
1073    }
1074}
1075
1076/// Writer to create [`SourceBundles`].
1077///
1078/// Writers can either [create a new file] or be created from an [existing file]. Then, use
1079/// [`add_file`] to add files and finally call [`finish`] to flush the archive to
1080/// the underlying writer.
1081///
1082/// Note that dropping the writer
1083///
1084/// ```no_run
1085/// # use std::fs::File;
1086/// # use symbolic_debuginfo::sourcebundle::{SourceBundleWriter, SourceFileInfo};
1087/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1088/// let mut bundle = SourceBundleWriter::create("bundle.zip")?;
1089///
1090/// // Add file called "foo.txt"
1091/// let file = File::open("my_file.txt")?;
1092/// bundle.add_file("foo.txt", file, SourceFileInfo::default())?;
1093///
1094/// // Flush the bundle to disk
1095/// bundle.finish()?;
1096/// # Ok(()) }
1097/// ```
1098///
1099/// [`SourceBundles`]: struct.SourceBundle.html
1100/// [create a new file]: struct.SourceBundleWriter.html#method.create
1101/// [existing file]: struct.SourceBundleWriter.html#method.new
1102/// [`add_file`]: struct.SourceBundleWriter.html#method.add_file
1103/// [`finish`]: struct.SourceBundleWriter.html#method.finish
1104pub struct SourceBundleWriter<W>
1105where
1106    W: Seek + Write,
1107{
1108    manifest: SourceBundleManifest,
1109    writer: ZipWriter<W>,
1110    collect_il2cpp: bool,
1111    skipped_file_callback: Box<dyn FnMut(SkippedFileInfo)>,
1112}
1113
1114fn default_file_options() -> SimpleFileOptions {
1115    // TODO: should we maybe acknowledge that its the year 2023 and switch to zstd eventually?
1116    // Though it obviously needs to be supported across the whole platform,
1117    // which does not seem to be the case for Python?
1118
1119    // Depending on `zip` crate feature flags, it might default to the current time.
1120    // Using an explicit `DateTime::default` gives us a deterministic `1980-01-01T00:00:00`.
1121    SimpleFileOptions::default().last_modified_time(zip::DateTime::default())
1122}
1123
1124impl<W> SourceBundleWriter<W>
1125where
1126    W: Seek + Write,
1127{
1128    /// Creates a bundle writer on the given file.
1129    pub fn start(mut writer: W) -> Result<Self, SourceBundleError> {
1130        let header = SourceBundleHeader::default();
1131        writer
1132            .write_all(header.as_bytes())
1133            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1134
1135        Ok(SourceBundleWriter {
1136            manifest: SourceBundleManifest::new(),
1137            writer: ZipWriter::new(writer),
1138            collect_il2cpp: false,
1139            skipped_file_callback: Box::new(|_| ()),
1140        })
1141    }
1142
1143    /// Returns whether the bundle contains any files.
1144    pub fn is_empty(&self) -> bool {
1145        self.manifest.files.is_empty()
1146    }
1147
1148    /// This controls if source files should be scanned for Il2cpp-specific source annotations,
1149    /// and the referenced C# files should be bundled up as well.
1150    pub fn collect_il2cpp_sources(&mut self, collect_il2cpp: bool) {
1151        self.collect_il2cpp = collect_il2cpp;
1152    }
1153
1154    /// Sets a meta data attribute of the bundle.
1155    ///
1156    /// Attributes are flushed to the bundle when it is [finished]. Thus, they can be retrieved or
1157    /// changed at any time before flushing the writer.
1158    ///
1159    /// If the attribute was set before, the prior value is returned.
1160    ///
1161    /// [finished]: struct.SourceBundleWriter.html#method.remove_attribute
1162    pub fn set_attribute<K, V>(&mut self, key: K, value: V) -> Option<String>
1163    where
1164        K: Into<String>,
1165        V: Into<String>,
1166    {
1167        self.manifest.attributes.insert(key.into(), value.into())
1168    }
1169
1170    /// Removes a meta data attribute of the bundle.
1171    ///
1172    /// If the attribute was set, the last value is returned.
1173    pub fn remove_attribute<K>(&mut self, key: K) -> Option<String>
1174    where
1175        K: AsRef<str>,
1176    {
1177        self.manifest.attributes.remove(key.as_ref())
1178    }
1179
1180    /// Returns the value of a meta data attribute.
1181    pub fn attribute<K>(&mut self, key: K) -> Option<&str>
1182    where
1183        K: AsRef<str>,
1184    {
1185        self.manifest
1186            .attributes
1187            .get(key.as_ref())
1188            .map(String::as_str)
1189    }
1190
1191    /// Determines whether a file at the given path has been added already.
1192    pub fn has_file<S>(&self, path: S) -> bool
1193    where
1194        S: AsRef<str>,
1195    {
1196        let full_path = &self.file_path(path.as_ref());
1197        self.manifest.files.contains_key(full_path)
1198    }
1199
1200    /// Adds a file and its info to the bundle.
1201    ///
1202    /// Only files containing valid UTF-8 are accepted.
1203    ///
1204    /// Multiple files can be added at the same path. For the first duplicate, a counter will be
1205    /// appended to the file name. Any subsequent duplicate increases that counter. For example:
1206    ///
1207    /// ```no_run
1208    /// # use std::fs::File;
1209    /// # use symbolic_debuginfo::sourcebundle::{SourceBundleWriter, SourceFileInfo};
1210    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1211    /// let mut bundle = SourceBundleWriter::create("bundle.zip")?;
1212    ///
1213    /// // Add file at "foo.txt"
1214    /// bundle.add_file("foo.txt", File::open("my_duplicate.txt")?, SourceFileInfo::default())?;
1215    /// assert!(bundle.has_file("foo.txt"));
1216    ///
1217    /// // Add duplicate at "foo.txt.1"
1218    /// bundle.add_file("foo.txt", File::open("my_duplicate.txt")?, SourceFileInfo::default())?;
1219    /// assert!(bundle.has_file("foo.txt.1"));
1220    /// # Ok(()) }
1221    /// ```
1222    ///
1223    /// Returns `Ok(true)` if the file was successfully added, or `Ok(false)` if the file aready
1224    /// existed. Otherwise, an error is returned if writing the file fails.
1225    pub fn add_file<S, R>(
1226        &mut self,
1227        path: S,
1228        file: R,
1229        info: SourceFileInfo,
1230    ) -> Result<(), SourceBundleError>
1231    where
1232        S: AsRef<str>,
1233        R: Read,
1234    {
1235        let mut file_reader = Utf8Reader::new(file);
1236
1237        let full_path = self.file_path(path.as_ref());
1238        let unique_path = self.unique_path(full_path);
1239
1240        self.writer
1241            .start_file(unique_path.clone(), default_file_options())
1242            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1243
1244        match io::copy(&mut file_reader, &mut self.writer) {
1245            Err(e) => {
1246                self.writer
1247                    .abort_file()
1248                    .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1249
1250                // ErrorKind::InvalidData is returned by Utf8Reader when the file is not valid UTF-8.
1251                let error_kind = match e.kind() {
1252                    ErrorKind::InvalidData => SourceBundleErrorKind::ReadFailed,
1253                    _ => SourceBundleErrorKind::WriteFailed,
1254                };
1255
1256                Err(SourceBundleError::new(error_kind, e))
1257            }
1258            Ok(_) => {
1259                self.manifest.files.insert(unique_path, info);
1260                Ok(())
1261            }
1262        }
1263    }
1264
1265    /// Calls add_file, and handles any ReadFailed errors by calling the skipped_file_callback.
1266    fn add_file_skip_read_failed<S, R>(
1267        &mut self,
1268        path: S,
1269        file: R,
1270        info: SourceFileInfo,
1271    ) -> Result<(), SourceBundleError>
1272    where
1273        S: AsRef<str>,
1274        R: Read,
1275    {
1276        let result = self.add_file(&path, file, info);
1277
1278        if let Err(e) = &result {
1279            if e.kind == SourceBundleErrorKind::ReadFailed {
1280                let reason = e.to_string();
1281                let skipped_info = SkippedFileInfo::new(path.as_ref(), &reason);
1282                (self.skipped_file_callback)(skipped_info);
1283
1284                return Ok(());
1285            }
1286        }
1287
1288        result
1289    }
1290
1291    /// Set a callback, which is called for every file that is skipped from being included in the
1292    /// source bundle. The callback receives information about the file being skipped.
1293    pub fn with_skipped_file_callback(
1294        mut self,
1295        callback: impl FnMut(SkippedFileInfo) + 'static,
1296    ) -> Self {
1297        self.skipped_file_callback = Box::new(callback);
1298        self
1299    }
1300
1301    /// Writes a single object into the bundle.
1302    ///
1303    /// Returns `Ok(true)` if any source files were added to the bundle, or `Ok(false)` if no
1304    /// sources could be resolved. Otherwise, an error is returned if writing the bundle fails.
1305    ///
1306    /// This finishes the source bundle and flushes the underlying writer.
1307    pub fn write_object<'data, 'object, O, E>(
1308        self,
1309        object: &'object O,
1310        object_name: &str,
1311    ) -> Result<bool, SourceBundleError>
1312    where
1313        O: ObjectLike<'data, 'object, Error = E>,
1314        E: std::error::Error + Send + Sync + 'static,
1315    {
1316        self.write_object_with_filter(object, object_name, |_, _| true)
1317    }
1318
1319    /// Writes a single object into the bundle.
1320    ///
1321    /// Returns `Ok(true)` if any source files were added to the bundle, or `Ok(false)` if no
1322    /// sources could be resolved. Otherwise, an error is returned if writing the bundle fails.
1323    ///
1324    /// This finishes the source bundle and flushes the underlying writer.
1325    ///
1326    /// Before a file is written a callback is invoked which can return `false` to skip a file.
1327    pub fn write_object_with_filter<'data, 'object, O, E, F>(
1328        mut self,
1329        object: &'object O,
1330        object_name: &str,
1331        mut filter: F,
1332    ) -> Result<bool, SourceBundleError>
1333    where
1334        O: ObjectLike<'data, 'object, Error = E>,
1335        E: std::error::Error + Send + Sync + 'static,
1336        F: FnMut(&FileEntry, &Option<SourceFileDescriptor<'_>>) -> bool,
1337    {
1338        let mut files_handled = BTreeSet::new();
1339        let mut referenced_files = BTreeSet::new();
1340
1341        let session = object
1342            .debug_session()
1343            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadDebugFile, e))?;
1344
1345        self.set_attribute("arch", object.arch().to_string());
1346        self.set_attribute("debug_id", object.debug_id().to_string());
1347        self.set_attribute("object_name", object_name);
1348        if let Some(code_id) = object.code_id() {
1349            self.set_attribute("code_id", code_id.to_string());
1350        }
1351
1352        for file_result in session.files() {
1353            let file = file_result
1354                .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadDebugFile, e))?;
1355            let filename = file.abs_path_str();
1356
1357            if files_handled.contains(&filename) {
1358                continue;
1359            }
1360
1361            let source = if filename.starts_with('<') && filename.ends_with('>') {
1362                None
1363            } else {
1364                let source_from_object = session
1365                    .source_by_path(&filename)
1366                    .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadDebugFile, e))?;
1367                if filter(&file, &source_from_object) {
1368                    // Note: we could also use source code directly from the object, but that's not
1369                    // what happened here previously - only collected locally present files.
1370                    std::fs::read(&filename).ok()
1371                } else {
1372                    None
1373                }
1374            };
1375
1376            if let Some(source) = source {
1377                let bundle_path = sanitize_bundle_path(&filename);
1378                let mut info = SourceFileInfo::new();
1379                info.set_ty(SourceFileType::Source);
1380                info.set_path(filename.clone());
1381
1382                if self.collect_il2cpp {
1383                    collect_il2cpp_sources(&source, &mut referenced_files);
1384                }
1385
1386                self.add_file_skip_read_failed(bundle_path, source.as_slice(), info)?;
1387            }
1388
1389            files_handled.insert(filename);
1390        }
1391
1392        for filename in referenced_files {
1393            if files_handled.contains(&filename) {
1394                continue;
1395            }
1396
1397            if let Some(source) = File::open(&filename).ok().map(BufReader::new) {
1398                let bundle_path = sanitize_bundle_path(&filename);
1399                let mut info = SourceFileInfo::new();
1400                info.set_ty(SourceFileType::Source);
1401                info.set_path(filename.clone());
1402
1403                self.add_file_skip_read_failed(bundle_path, source, info)?
1404            }
1405        }
1406
1407        let is_empty = self.is_empty();
1408        self.finish()?;
1409
1410        Ok(!is_empty)
1411    }
1412
1413    /// Writes the manifest to the bundle and flushes the underlying file handle.
1414    pub fn finish(mut self) -> Result<(), SourceBundleError> {
1415        self.write_manifest()?;
1416        self.writer
1417            .finish()
1418            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1419        Ok(())
1420    }
1421
1422    /// Returns the full path for a file within the source bundle.
1423    fn file_path(&self, path: &str) -> String {
1424        format!("{FILES_PATH}/{path}")
1425    }
1426
1427    /// Returns a unique path for a file.
1428    ///
1429    /// Returns the path if the file does not exist already. Otherwise, a counter is appended to the
1430    /// file path (e.g. `.1`, `.2`, etc).
1431    fn unique_path(&self, mut path: String) -> String {
1432        let mut duplicates = 0;
1433
1434        while self.manifest.files.contains_key(&path) {
1435            duplicates += 1;
1436            match duplicates {
1437                1 => path.push_str(".1"),
1438                _ => {
1439                    use std::fmt::Write;
1440                    trim_end_matches(&mut path, char::is_numeric);
1441                    write!(path, ".{duplicates}").unwrap();
1442                }
1443            }
1444        }
1445
1446        path
1447    }
1448
1449    /// Flushes the manifest file to the bundle.
1450    fn write_manifest(&mut self) -> Result<(), SourceBundleError> {
1451        self.writer
1452            .start_file(MANIFEST_PATH, default_file_options())
1453            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1454
1455        serde_json::to_writer(&mut self.writer, &self.manifest)
1456            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadManifest, e))?;
1457
1458        Ok(())
1459    }
1460}
1461
1462/// Processes the `source`, looking for `il2cpp` specific reference comments.
1463///
1464/// The files referenced by those comments are added to the `referenced_files` Set.
1465fn collect_il2cpp_sources(source: &[u8], referenced_files: &mut BTreeSet<String>) {
1466    if let Ok(source) = std::str::from_utf8(source) {
1467        for line in source.lines() {
1468            let line = line.trim();
1469
1470            if let Some(source_ref) = line.strip_prefix("//<source_info:") {
1471                if let Some((file, _line)) = source_ref.rsplit_once(':') {
1472                    if !referenced_files.contains(file) {
1473                        referenced_files.insert(file.to_string());
1474                    }
1475                }
1476            }
1477        }
1478    }
1479}
1480
1481impl SourceBundleWriter<BufWriter<File>> {
1482    /// Create a bundle writer that writes its output to the given path.
1483    ///
1484    /// If the file does not exist at the given path, it is created. If the file does exist, it is
1485    /// overwritten.
1486    pub fn create<P>(path: P) -> Result<SourceBundleWriter<BufWriter<File>>, SourceBundleError>
1487    where
1488        P: AsRef<Path>,
1489    {
1490        let file = OpenOptions::new()
1491            .read(true)
1492            .write(true)
1493            .create(true)
1494            .truncate(true)
1495            .open(path)
1496            .map_err(|e| SourceBundleError::new(SourceBundleErrorKind::WriteFailed, e))?;
1497
1498        Self::start(BufWriter::new(file))
1499    }
1500}
1501
1502#[cfg(test)]
1503mod tests {
1504    use super::*;
1505
1506    use std::io::Cursor;
1507
1508    use similar_asserts::assert_eq;
1509    use tempfile::NamedTempFile;
1510
1511    #[test]
1512    fn test_has_file() -> Result<(), SourceBundleError> {
1513        let writer = Cursor::new(Vec::new());
1514        let mut bundle = SourceBundleWriter::start(writer)?;
1515
1516        bundle.add_file("bar.txt", &b"filecontents"[..], SourceFileInfo::default())?;
1517        assert!(bundle.has_file("bar.txt"));
1518
1519        bundle.finish()?;
1520        Ok(())
1521    }
1522
1523    #[test]
1524    fn test_non_utf8() -> Result<(), SourceBundleError> {
1525        let writer = Cursor::new(Vec::new());
1526        let mut bundle = SourceBundleWriter::start(writer)?;
1527
1528        assert!(bundle
1529            .add_file(
1530                "bar.txt",
1531                &[0, 159, 146, 150][..],
1532                SourceFileInfo::default()
1533            )
1534            .is_err());
1535
1536        Ok(())
1537    }
1538
1539    #[test]
1540    fn test_duplicate_files() -> Result<(), SourceBundleError> {
1541        let writer = Cursor::new(Vec::new());
1542        let mut bundle = SourceBundleWriter::start(writer)?;
1543
1544        bundle.add_file("bar.txt", &b"filecontents"[..], SourceFileInfo::default())?;
1545        bundle.add_file("bar.txt", &b"othercontents"[..], SourceFileInfo::default())?;
1546        assert!(bundle.has_file("bar.txt"));
1547        assert!(bundle.has_file("bar.txt.1"));
1548
1549        bundle.finish()?;
1550        Ok(())
1551    }
1552
1553    #[test]
1554    fn debugsession_is_sendsync() {
1555        fn is_sendsync<T: Send + Sync>() {}
1556        is_sendsync::<SourceBundleDebugSession>();
1557    }
1558
1559    #[test]
1560    fn test_normalize_paths() -> Result<(), SourceBundleError> {
1561        let mut writer = Cursor::new(Vec::new());
1562        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1563
1564        for filename in &[
1565            "C:\\users\\martin\\mydebugfile.cs",
1566            "/usr/martin/mydebugfile.h",
1567        ] {
1568            let mut info = SourceFileInfo::new();
1569            info.set_ty(SourceFileType::Source);
1570            info.set_path(filename.to_string());
1571            bundle.add_file_skip_read_failed(
1572                sanitize_bundle_path(filename),
1573                &b"somerandomdata"[..],
1574                info,
1575            )?;
1576        }
1577
1578        bundle.finish()?;
1579        let bundle_bytes = writer.into_inner();
1580        let bundle = SourceBundle::parse(&bundle_bytes)?;
1581
1582        let session = bundle.debug_session().unwrap();
1583
1584        assert!(session
1585            .source_by_path("C:\\users\\martin\\mydebugfile.cs")?
1586            .is_some());
1587        assert!(session
1588            .source_by_path("C:/users/martin/mydebugfile.cs")?
1589            .is_some());
1590        assert!(session
1591            .source_by_path("C:\\users\\martin/mydebugfile.cs")?
1592            .is_some());
1593        assert!(session
1594            .source_by_path("/usr/martin/mydebugfile.h")?
1595            .is_some());
1596        assert!(session
1597            .source_by_path("\\usr\\martin\\mydebugfile.h")?
1598            .is_some());
1599
1600        Ok(())
1601    }
1602
1603    #[test]
1604    fn test_source_descriptor() -> Result<(), SourceBundleError> {
1605        let mut writer = Cursor::new(Vec::new());
1606        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1607
1608        let mut info = SourceFileInfo::default();
1609        info.set_url("https://example.com/bar.js.min".into());
1610        info.set_path("/files/bar.js.min".into());
1611        info.set_ty(SourceFileType::MinifiedSource);
1612        info.add_header(
1613            "debug-id".into(),
1614            "5e618b9f-54a9-4389-b196-519819dd7c47".into(),
1615        );
1616        info.add_header("sourcemap".into(), "bar.js.map".into());
1617        bundle.add_file("bar.js", &b"filecontents"[..], info)?;
1618        assert!(bundle.has_file("bar.js"));
1619
1620        bundle.finish()?;
1621        let bundle_bytes = writer.into_inner();
1622        let bundle = SourceBundle::parse(&bundle_bytes)?;
1623
1624        let sess = bundle.debug_session().unwrap();
1625        let f = sess
1626            .source_by_debug_id(
1627                "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(),
1628                SourceFileType::MinifiedSource,
1629            )
1630            .unwrap()
1631            .expect("should exist");
1632        assert_eq!(f.contents(), Some("filecontents"));
1633        assert_eq!(f.ty(), SourceFileType::MinifiedSource);
1634        assert_eq!(f.url(), Some("https://example.com/bar.js.min"));
1635        assert_eq!(f.path(), Some("/files/bar.js.min"));
1636        assert_eq!(f.source_mapping_url(), Some("bar.js.map"));
1637
1638        assert!(sess
1639            .source_by_debug_id(
1640                "5e618b9f-54a9-4389-b196-519819dd7c47".parse().unwrap(),
1641                SourceFileType::Source
1642            )
1643            .unwrap()
1644            .is_none());
1645
1646        Ok(())
1647    }
1648
1649    #[test]
1650    fn test_source_mapping_url() -> Result<(), SourceBundleError> {
1651        let mut writer = Cursor::new(Vec::new());
1652        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1653
1654        let mut info = SourceFileInfo::default();
1655        info.set_url("https://example.com/bar.min.js".into());
1656        info.set_ty(SourceFileType::MinifiedSource);
1657        bundle.add_file(
1658            "bar.js",
1659            &b"filecontents\n//# sourceMappingURL=bar.js.map"[..],
1660            info,
1661        )?;
1662
1663        bundle.finish()?;
1664        let bundle_bytes = writer.into_inner();
1665        let bundle = SourceBundle::parse(&bundle_bytes)?;
1666
1667        let sess = bundle.debug_session().unwrap();
1668        let f = sess
1669            .source_by_url("https://example.com/bar.min.js")
1670            .unwrap()
1671            .expect("should exist");
1672        assert_eq!(f.ty(), SourceFileType::MinifiedSource);
1673        assert_eq!(f.url(), Some("https://example.com/bar.min.js"));
1674        assert_eq!(f.source_mapping_url(), Some("bar.js.map"));
1675
1676        Ok(())
1677    }
1678
1679    #[test]
1680    fn test_source_embedded_debug_id() -> Result<(), SourceBundleError> {
1681        let mut writer = Cursor::new(Vec::new());
1682        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1683
1684        let mut info = SourceFileInfo::default();
1685        info.set_url("https://example.com/bar.min.js".into());
1686        info.set_ty(SourceFileType::MinifiedSource);
1687        bundle.add_file(
1688            "bar.js",
1689            &b"filecontents\n//# debugId=5b65abfb23384f0bb3b964c8f734d43f"[..],
1690            info,
1691        )?;
1692
1693        bundle.finish()?;
1694        let bundle_bytes = writer.into_inner();
1695        let bundle = SourceBundle::parse(&bundle_bytes)?;
1696
1697        let sess = bundle.debug_session().unwrap();
1698        let f = sess
1699            .source_by_url("https://example.com/bar.min.js")
1700            .unwrap()
1701            .expect("should exist");
1702        assert_eq!(f.ty(), SourceFileType::MinifiedSource);
1703        assert_eq!(
1704            f.debug_id(),
1705            Some("5b65abfb-2338-4f0b-b3b9-64c8f734d43f".parse().unwrap())
1706        );
1707
1708        Ok(())
1709    }
1710
1711    #[test]
1712    fn test_sourcemap_embedded_debug_id() -> Result<(), SourceBundleError> {
1713        let mut writer = Cursor::new(Vec::new());
1714        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1715
1716        let mut info = SourceFileInfo::default();
1717        info.set_url("https://example.com/bar.js.map".into());
1718        info.set_ty(SourceFileType::SourceMap);
1719        bundle.add_file(
1720            "bar.js.map",
1721            &br#"{"debug_id": "5b65abfb-2338-4f0b-b3b9-64c8f734d43f"}"#[..],
1722            info,
1723        )?;
1724
1725        bundle.finish()?;
1726        let bundle_bytes = writer.into_inner();
1727        let bundle = SourceBundle::parse(&bundle_bytes)?;
1728
1729        let sess = bundle.debug_session().unwrap();
1730        let f = sess
1731            .source_by_url("https://example.com/bar.js.map")
1732            .unwrap()
1733            .expect("should exist");
1734        assert_eq!(f.ty(), SourceFileType::SourceMap);
1735        assert_eq!(
1736            f.debug_id(),
1737            Some("5b65abfb-2338-4f0b-b3b9-64c8f734d43f".parse().unwrap())
1738        );
1739
1740        Ok(())
1741    }
1742
1743    #[test]
1744    fn test_il2cpp_reference() -> Result<(), Box<dyn std::error::Error>> {
1745        let mut cpp_file = NamedTempFile::new()?;
1746        let mut cs_file = NamedTempFile::new()?;
1747
1748        let cpp_contents = format!("foo\n//<source_info:{}:111>\nbar", cs_file.path().display());
1749
1750        // well, a source bundle itself is an `ObjectLike` :-)
1751        let object_buf = {
1752            let mut writer = Cursor::new(Vec::new());
1753            let mut bundle = SourceBundleWriter::start(&mut writer)?;
1754
1755            let path = cpp_file.path().to_string_lossy();
1756            let mut info = SourceFileInfo::new();
1757            info.set_ty(SourceFileType::Source);
1758            info.set_path(path.to_string());
1759            bundle.add_file(path, cpp_contents.as_bytes(), info)?;
1760
1761            bundle.finish()?;
1762            writer.into_inner()
1763        };
1764        let object = SourceBundle::parse(&object_buf)?;
1765
1766        // write file contents to temp files
1767        cpp_file.write_all(cpp_contents.as_bytes())?;
1768        cs_file.write_all(b"some C# source")?;
1769
1770        // write the actual source bundle based on the `object`
1771        let mut output_buf = Cursor::new(Vec::new());
1772        let mut writer = SourceBundleWriter::start(&mut output_buf)?;
1773        writer.collect_il2cpp_sources(true);
1774
1775        let written = writer.write_object(&object, "whatever")?;
1776        assert!(written);
1777        let output_buf = output_buf.into_inner();
1778
1779        // and collect all the included files
1780        let source_bundle = SourceBundle::parse(&output_buf)?;
1781        let session = source_bundle.debug_session()?;
1782        let actual_files: BTreeMap<_, _> = session
1783            .files()
1784            .flatten()
1785            .flat_map(|f| {
1786                let path = f.abs_path_str();
1787                session
1788                    .source_by_path(&path)
1789                    .ok()
1790                    .flatten()
1791                    .map(|source| (path, source.contents().unwrap().to_string()))
1792            })
1793            .collect();
1794
1795        let mut expected_files = BTreeMap::new();
1796        expected_files.insert(cpp_file.path().to_string_lossy().into_owned(), cpp_contents);
1797        expected_files.insert(
1798            cs_file.path().to_string_lossy().into_owned(),
1799            String::from("some C# source"),
1800        );
1801
1802        assert_eq!(actual_files, expected_files);
1803
1804        Ok(())
1805    }
1806
1807    #[test]
1808    fn test_bundle_paths() {
1809        assert_eq!(sanitize_bundle_path("foo"), "foo");
1810        assert_eq!(sanitize_bundle_path("foo/bar"), "foo/bar");
1811        assert_eq!(sanitize_bundle_path("/foo/bar"), "foo/bar");
1812        assert_eq!(sanitize_bundle_path("C:/foo/bar"), "C/foo/bar");
1813        assert_eq!(sanitize_bundle_path("\\foo\\bar"), "foo/bar");
1814        assert_eq!(sanitize_bundle_path("\\\\UNC\\foo\\bar"), "UNC/foo/bar");
1815    }
1816
1817    #[test]
1818    fn test_source_links() -> Result<(), SourceBundleError> {
1819        let mut writer = Cursor::new(Vec::new());
1820        let mut bundle = SourceBundleWriter::start(&mut writer)?;
1821
1822        let mut info = SourceFileInfo::default();
1823        info.set_url("https://example.com/bar/index.min.js".into());
1824        info.set_path("/files/bar/index.min.js".into());
1825        info.set_ty(SourceFileType::MinifiedSource);
1826        bundle.add_file("bar/index.js", &b"filecontents"[..], info)?;
1827        assert!(bundle.has_file("bar/index.js"));
1828
1829        bundle
1830            .manifest
1831            .source_links
1832            .insert("/files/bar/*".to_string(), "https://nope.com/*".into());
1833        bundle
1834            .manifest
1835            .source_links
1836            .insert("/files/foo/*".to_string(), "https://example.com/*".into());
1837
1838        bundle.finish()?;
1839        let bundle_bytes = writer.into_inner();
1840        let bundle = SourceBundle::parse(&bundle_bytes)?;
1841
1842        let sess = bundle.debug_session().unwrap();
1843
1844        // This should be resolved by source link
1845        let foo = sess
1846            .source_by_path("/files/foo/index.min.js")
1847            .unwrap()
1848            .expect("should exist");
1849        assert_eq!(foo.contents(), None);
1850        assert_eq!(foo.ty(), SourceFileType::Source);
1851        assert_eq!(foo.url(), Some("https://example.com/index.min.js"));
1852        assert_eq!(foo.path(), None);
1853
1854        // This should be resolved by embedded file, even though the link also exists
1855        let bar = sess
1856            .source_by_path("/files/bar/index.min.js")
1857            .unwrap()
1858            .expect("should exist");
1859        assert_eq!(bar.contents(), Some("filecontents"));
1860        assert_eq!(bar.ty(), SourceFileType::MinifiedSource);
1861        assert_eq!(bar.url(), Some("https://example.com/bar/index.min.js"));
1862        assert_eq!(bar.path(), Some("/files/bar/index.min.js"));
1863
1864        Ok(())
1865    }
1866}