rattler/install/
link.rs

1//! This module contains the logic to link a give file from the package cache into the target directory.
2//! See [`link_file`] for more information.
3use fs_err as fs;
4use memmap2::Mmap;
5use once_cell::sync::Lazy;
6use rattler_conda_types::package::{FileMode, PathType, PathsEntry, PrefixPlaceholder};
7use rattler_conda_types::Platform;
8use rattler_digest::Sha256;
9use rattler_digest::{HashingWriter, Sha256Hash};
10use reflink_copy::reflink;
11use regex::Regex;
12use std::borrow::Cow;
13use std::fmt;
14use std::fmt::Formatter;
15use std::fs::Permissions;
16use std::io::{BufWriter, ErrorKind, Read, Seek, Write};
17use std::path::{Path, PathBuf};
18
19use super::apple_codesign::{codesign, AppleCodeSignBehavior};
20use super::Prefix;
21
22/// Describes the method to "link" a file from the source directory (or the cache directory) to the
23/// destination directory.
24#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
25pub enum LinkMethod {
26    /// A ref link is created from the cache to the destination. This ensures that the file does
27    /// not take up more disk-space and that the file is not accidentally modified in the cache.
28    Reflink,
29
30    /// A hard link is created from the cache to the destination. This ensures that the file does
31    /// not take up more disk-space but has the downside that if the file is accidentally modified
32    /// it is also modified in the cache.
33    Hardlink,
34
35    /// A soft link is created. The link does not refer to the original file in the cache directory
36    /// but instead it points to another file in the destination.
37    Softlink,
38
39    /// A copy of a file is created from a file in the cache directory to a file in the destination
40    /// directory.
41    Copy,
42
43    /// A copy of a file is created and it is also patched.
44    Patched(FileMode),
45}
46
47impl fmt::Display for LinkMethod {
48    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
49        match self {
50            LinkMethod::Hardlink => write!(f, "hardlink"),
51            LinkMethod::Softlink => write!(f, "softlink"),
52            LinkMethod::Reflink => write!(f, "reflink"),
53            LinkMethod::Copy => write!(f, "copy"),
54            LinkMethod::Patched(FileMode::Binary) => write!(f, "binary patched"),
55            LinkMethod::Patched(FileMode::Text) => write!(f, "text patched"),
56        }
57    }
58}
59
60/// Errors that can occur when calling [`link_file`].
61#[derive(Debug, thiserror::Error)]
62pub enum LinkFileError {
63    /// An IO error occurred.
64    #[error("unexpected io operation while {0}")]
65    IoError(String, #[source] std::io::Error),
66
67    /// The source file could not be opened.
68    #[error("could not open source file for reading")]
69    FailedToOpenSourceFile(#[source] std::io::Error),
70
71    /// The source file could not be opened.
72    #[error("failed to read the source file")]
73    FailedToReadSourceFile(#[source] std::io::Error),
74
75    /// Unable to read the contents of a symlink
76    #[error("could not open source file")]
77    FailedToReadSymlink(#[source] std::io::Error),
78
79    /// Linking the file from the source to the destination failed.
80    #[error("failed to {0} file to destination")]
81    FailedToLink(LinkMethod, #[source] std::io::Error),
82
83    /// The source file metadata could not be read.
84    #[error("could not source file metadata")]
85    FailedToReadSourceFileMetadata(#[source] std::io::Error),
86
87    /// The destination file could not be opened.
88    #[error("could not open destination file for writing")]
89    FailedToOpenDestinationFile(#[source] std::io::Error),
90
91    /// The permissions could not be updated on the destination file.
92    #[error("could not update destination file permissions")]
93    FailedToUpdateDestinationFilePermissions(#[source] std::io::Error),
94
95    /// The binary (dylib or executable) could not be signed (codesign -f -s -) on
96    /// macOS ARM64 (Apple Silicon).
97    #[error("failed to sign Apple binary")]
98    FailedToSignAppleBinary,
99
100    /// No Python version was specified when installing a noarch package.
101    #[error("cannot install noarch python files because there is no python version specified ")]
102    MissingPythonInfo,
103
104    /// The hash of the file could not be computed.
105    #[error("failed to compute the sha256 hash of the file")]
106    FailedToComputeSha(#[source] std::io::Error),
107}
108
109/// The successful result of calling [`link_file`].
110#[derive(Debug)]
111pub struct LinkedFile {
112    /// True if an existing file already existed and linking overwrote the original file.
113    pub clobbered: bool,
114
115    /// The SHA256 hash of the resulting file.
116    pub sha256: rattler_digest::Sha256Hash,
117
118    /// The size of the final file in bytes.
119    pub file_size: u64,
120
121    /// The relative path of the file in the destination directory. This might be different from the
122    /// relative path in the source directory for python noarch packages.
123    pub relative_path: PathBuf,
124
125    /// The way the file was linked
126    pub method: LinkMethod,
127
128    /// The original prefix placeholder that was replaced
129    pub prefix_placeholder: Option<String>,
130}
131
132/// Installs a single file from a `package_dir` to the the `target_dir`. Replaces any
133/// `prefix_placeholder` in the file with the `prefix`.
134///
135/// `relative_path` is the path of the file in the `package_dir` (and the `target_dir`).
136///
137/// Note that usually the `target_prefix` is equal to `target_dir` but it might differ. See
138/// [`crate::install::InstallOptions::target_prefix`] for more information.
139#[allow(clippy::too_many_arguments)] // TODO: Fix this properly
140pub fn link_file(
141    path_json_entry: &PathsEntry,
142    destination_relative_path: PathBuf,
143    package_dir: &Path,
144    target_dir: &Prefix,
145    target_prefix: &str,
146    allow_symbolic_links: bool,
147    allow_hard_links: bool,
148    allow_ref_links: bool,
149    target_platform: Platform,
150    apple_codesign_behavior: AppleCodeSignBehavior,
151) -> Result<LinkedFile, LinkFileError> {
152    let source_path = package_dir.join(&path_json_entry.relative_path);
153
154    let destination_path = target_dir.path().join(&destination_relative_path);
155
156    // Temporary variables to store intermediate computations in. If we already computed the file
157    // size or the sha hash we dont have to recompute them at the end of the function.
158    let mut sha256 = None;
159    let mut file_size = path_json_entry.size_in_bytes;
160
161    let link_method = if let Some(PrefixPlaceholder {
162        file_mode,
163        placeholder,
164    }) = path_json_entry.prefix_placeholder.as_ref()
165    {
166        // Memory map the source file. This provides us with easy access to a continuous stream of
167        // bytes which makes it easier to search for the placeholder prefix.
168        let source = map_or_read_source_file(&source_path)?;
169
170        // Detect file type from the content
171        let file_type = FileType::detect(source.as_ref());
172
173        // Open the destination file
174        let destination = BufWriter::with_capacity(
175            50 * 1024,
176            fs::File::create(&destination_path)
177                .map_err(LinkFileError::FailedToOpenDestinationFile)?,
178        );
179        let mut destination_writer = HashingWriter::<_, rattler_digest::Sha256>::new(destination);
180
181        // Convert back-slashes (\) on windows with forward-slashes (/) to avoid problems with
182        // string escaping. For instance if we replace the prefix in the following text
183        //
184        // ```text
185        // string = "c:\\old_prefix"
186        // ```
187        //
188        // with the path `c:\new_prefix` the text will become:
189        //
190        // ```text
191        // string = "c:\new_prefix"
192        // ```
193        //
194        // In this case the literal string is not properly escape. This is fixed by using
195        // forward-slashes on windows instead.
196        let target_prefix = if target_platform.is_windows() {
197            Cow::Owned(target_prefix.replace('\\', "/"))
198        } else {
199            Cow::Borrowed(target_prefix)
200        };
201
202        // Replace the prefix placeholder in the file with the new placeholder
203        copy_and_replace_placeholders(
204            source.as_ref(),
205            &mut destination_writer,
206            placeholder,
207            &target_prefix,
208            &target_platform,
209            *file_mode,
210        )
211        .map_err(|err| LinkFileError::IoError(String::from("replacing placeholders"), err))?;
212
213        let (mut file, current_hash) = destination_writer.finalize();
214
215        // We computed the hash of the file while writing and from the file we can also infer the
216        // size of it.
217        sha256 = Some(current_hash);
218        file_size = file.stream_position().ok();
219
220        // We no longer need the file.
221        drop(file);
222
223        // Copy over filesystem permissions. We do this to ensure that the destination file has the
224        // same permissions as the source file.
225        let metadata = fs::symlink_metadata(&source_path)
226            .map_err(LinkFileError::FailedToReadSourceFileMetadata)?;
227        fs::set_permissions(&destination_path, metadata.permissions())
228            .map_err(LinkFileError::FailedToUpdateDestinationFilePermissions)?;
229
230        // (re)sign the binary if the file is executable or is a Mach-O binary (e.g., dylib)
231        if (has_executable_permissions(&metadata.permissions())
232            || file_type == Some(FileType::MachO))
233            && target_platform == Platform::OsxArm64
234            && *file_mode == FileMode::Binary
235        {
236            // Did the binary actually change?
237            let mut content_changed = false;
238            if let Some(original_hash) = &path_json_entry.sha256 {
239                content_changed = original_hash != &current_hash;
240            }
241
242            // If the binary changed it requires resigning.
243            if content_changed && apple_codesign_behavior != AppleCodeSignBehavior::DoNothing {
244                match codesign(&destination_path) {
245                    Ok(_) => {}
246                    Err(e) => {
247                        if apple_codesign_behavior == AppleCodeSignBehavior::Fail {
248                            return Err(e);
249                        }
250                    }
251                }
252
253                // The file on disk changed from the original file so the hash and file size
254                // also became invalid. Let's recompute them.
255                sha256 = Some(
256                    rattler_digest::compute_file_digest::<Sha256>(&destination_path)
257                        .map_err(LinkFileError::FailedToComputeSha)?,
258                );
259                file_size = Some(
260                    fs::symlink_metadata(&destination_path)
261                        .map_err(LinkFileError::FailedToOpenDestinationFile)?
262                        .len(),
263                );
264            }
265        }
266        LinkMethod::Patched(*file_mode)
267    } else if path_json_entry.path_type == PathType::HardLink && allow_ref_links {
268        reflink_to_destination(&source_path, &destination_path, allow_hard_links)?
269    } else if path_json_entry.path_type == PathType::HardLink && allow_hard_links {
270        hardlink_to_destination(&source_path, &destination_path)?
271    } else if path_json_entry.path_type == PathType::SoftLink && allow_symbolic_links {
272        symlink_to_destination(&source_path, &destination_path)?
273    } else {
274        copy_to_destination(&source_path, &destination_path)?
275    };
276
277    // Compute the final SHA256 if we didnt already or if its not stored in the paths.json entry.
278    let sha256 = if let Some(sha256) = sha256 {
279        sha256
280    } else if link_method == LinkMethod::Softlink {
281        // we hash the content of the symlink file. Note that this behavior is different from
282        // conda or mamba (where the target of the symlink is hashed). However, hashing the target
283        // of the symlink is more tricky in our case as we link everything in parallel and would have to
284        // potentially "wait" for dependencies to be available.
285        // This needs to be taken into account when verifying an installation.
286        let linked_path = destination_path
287            .read_link()
288            .map_err(LinkFileError::FailedToReadSymlink)?;
289        rattler_digest::compute_bytes_digest::<Sha256>(
290            linked_path.as_os_str().to_string_lossy().as_bytes(),
291        )
292    } else if let Some(sha256) = path_json_entry.sha256 {
293        sha256
294    } else if path_json_entry.path_type == PathType::HardLink {
295        rattler_digest::compute_file_digest::<Sha256>(&destination_path)
296            .map_err(LinkFileError::FailedToComputeSha)?
297    } else {
298        // This is either a softlink or a directory.
299        // Computing the hash for a directory is not possible.
300        // This hash is `0000...0000`
301        Sha256Hash::default()
302    };
303
304    // Compute the final file size if we didnt already.
305    let file_size = if let Some(file_size) = file_size {
306        file_size
307    } else if let Some(size_in_bytes) = path_json_entry.size_in_bytes {
308        size_in_bytes
309    } else {
310        let metadata = fs::symlink_metadata(&destination_path)
311            .map_err(LinkFileError::FailedToOpenDestinationFile)?;
312        metadata.len()
313    };
314
315    let prefix_placeholder: Option<String> = path_json_entry
316        .prefix_placeholder
317        .as_ref()
318        .map(|p| p.placeholder.clone());
319
320    Ok(LinkedFile {
321        clobbered: false,
322        sha256,
323        file_size,
324        relative_path: destination_relative_path,
325        method: link_method,
326        prefix_placeholder,
327    })
328}
329
330/// Either a memory mapped file or the complete contents of a file read to memory.
331enum MmapOrBytes {
332    Mmap(Mmap),
333    Bytes(Vec<u8>),
334}
335
336impl AsRef<[u8]> for MmapOrBytes {
337    fn as_ref(&self) -> &[u8] {
338        match &self {
339            MmapOrBytes::Mmap(mmap) => mmap.as_ref(),
340            MmapOrBytes::Bytes(bytes) => bytes.as_slice(),
341        }
342    }
343}
344
345/// Either memory maps, or reads the contents of the file at the specified location.
346///
347/// This method prefers to memory map the file to reduce the memory load but if memory mapping fails
348/// it falls back to reading the contents of the file.
349///
350/// This fallback exists because we've seen that in some particular situations memory mapping is not
351/// allowed. A particular dubious case we've encountered is described in the this issue:
352/// <https://github.com/prefix-dev/pixi/issues/234>
353#[allow(clippy::verbose_file_reads)]
354fn map_or_read_source_file(source_path: &Path) -> Result<MmapOrBytes, LinkFileError> {
355    let mut file = fs::File::open(source_path).map_err(LinkFileError::FailedToOpenSourceFile)?;
356
357    // Try to memory map the file
358    let mmap = unsafe { Mmap::map(&file) };
359
360    // If memory mapping the file failed for whatever reason, try reading it directly to
361    // memory instead.
362    Ok(match mmap {
363        Ok(memory) => MmapOrBytes::Mmap(memory),
364        Err(err) => {
365            tracing::warn!(
366                "failed to memory map {}: {err}. Reading the file to memory instead.",
367                source_path.display()
368            );
369            let mut bytes = Vec::new();
370            file.read_to_end(&mut bytes)
371                .map_err(LinkFileError::FailedToReadSourceFile)?;
372            MmapOrBytes::Bytes(bytes)
373        }
374    })
375}
376
377/// Reflink (Copy-On-Write) the specified file from the source (or cached) directory. If the file
378/// already exists it is removed and the operation is retried.
379fn reflink_to_destination(
380    source_path: &Path,
381    destination_path: &Path,
382    allow_hard_links: bool,
383) -> Result<LinkMethod, LinkFileError> {
384    loop {
385        match reflink(source_path, destination_path) {
386            Ok(_) => {
387                #[cfg(target_os = "linux")]
388                {
389                    // Copy over filesystem permissions. We do this to ensure that the destination file has the
390                    // same permissions as the source file.
391                    let metadata = fs::metadata(source_path)
392                        .map_err(LinkFileError::FailedToReadSourceFileMetadata)?;
393                    fs::set_permissions(destination_path, metadata.permissions())
394                        .map_err(LinkFileError::FailedToUpdateDestinationFilePermissions)?;
395                }
396                return Ok(LinkMethod::Reflink);
397            }
398            Err(e) if e.kind() == ErrorKind::AlreadyExists => {
399                fs::remove_file(destination_path).map_err(|err| {
400                    LinkFileError::IoError(String::from("removing clobbered file"), err)
401                })?;
402            }
403            Err(e) if e.kind() == ErrorKind::Unsupported && allow_hard_links => {
404                return hardlink_to_destination(source_path, destination_path);
405            }
406            Err(e) if e.kind() == ErrorKind::Unsupported && !allow_hard_links => {
407                return copy_to_destination(source_path, destination_path);
408            }
409            Err(_) => {
410                return if allow_hard_links {
411                    hardlink_to_destination(source_path, destination_path)
412                } else {
413                    copy_to_destination(source_path, destination_path)
414                };
415            }
416        }
417    }
418}
419
420/// Hard link the specified file from the source (or cached) directory. If the file already exists
421/// it is removed and the operation is retried.
422fn hardlink_to_destination(
423    source_path: &Path,
424    destination_path: &Path,
425) -> Result<LinkMethod, LinkFileError> {
426    loop {
427        match fs::hard_link(source_path, destination_path) {
428            Ok(_) => return Ok(LinkMethod::Hardlink),
429            Err(e) if e.kind() == ErrorKind::AlreadyExists => {
430                fs::remove_file(destination_path).map_err(|err| {
431                    LinkFileError::IoError(String::from("removing clobbered file"), err)
432                })?;
433            }
434            Err(e) => {
435                tracing::debug!(
436                    "failed to hardlink {}: {e}, falling back to copying.",
437                    destination_path.display()
438                );
439                return copy_to_destination(source_path, destination_path);
440            }
441        }
442    }
443}
444
445/// Symlink the specified file from the source (or cached) directory. If the file already exists it
446/// is removed and the operation is retried.
447fn symlink_to_destination(
448    source_path: &Path,
449    destination_path: &Path,
450) -> Result<LinkMethod, LinkFileError> {
451    let linked_path = source_path
452        .read_link()
453        .map_err(LinkFileError::FailedToReadSymlink)?;
454    loop {
455        match symlink(&linked_path, destination_path) {
456            Ok(_) => return Ok(LinkMethod::Softlink),
457            Err(e) if e.kind() == ErrorKind::AlreadyExists => {
458                fs::remove_file(destination_path).map_err(|err| {
459                    LinkFileError::IoError(String::from("removing clobbered file"), err)
460                })?;
461            }
462            Err(e) => {
463                tracing::debug!(
464                    "failed to symlink {}: {e}, falling back to copying.",
465                    destination_path.display()
466                );
467                return copy_to_destination(source_path, destination_path);
468            }
469        }
470    }
471}
472
473/// Copy the specified file from the source (or cached) directory. If the file already exists it is
474/// removed and the operation is retried.
475fn copy_to_destination(
476    source_path: &Path,
477    destination_path: &Path,
478) -> Result<LinkMethod, LinkFileError> {
479    loop {
480        match fs::copy(source_path, destination_path) {
481            Err(e) if e.kind() == ErrorKind::AlreadyExists => {
482                // If the file already exists, remove it and try again.
483                fs::remove_file(destination_path).map_err(|err| {
484                    LinkFileError::IoError(String::from("removing clobbered file"), err)
485                })?;
486            }
487            Ok(_) => return Ok(LinkMethod::Copy),
488            Err(e) => return Err(LinkFileError::FailedToLink(LinkMethod::Copy, e)),
489        }
490    }
491}
492
493/// Given the contents of a file copy it to the `destination` and in the process replace the
494/// `prefix_placeholder` text with the `target_prefix` text.
495///
496/// This switches to more specialized functions that handle the replacement of either
497/// textual and binary placeholders, the [`FileMode`] enum switches between the two functions.
498/// See both [`copy_and_replace_cstring_placeholder`] and [`copy_and_replace_textual_placeholder`]
499pub fn copy_and_replace_placeholders(
500    source_bytes: &[u8],
501    mut destination: impl Write,
502    prefix_placeholder: &str,
503    target_prefix: &str,
504    target_platform: &Platform,
505    file_mode: FileMode,
506) -> Result<(), std::io::Error> {
507    match file_mode {
508        FileMode::Text => {
509            copy_and_replace_textual_placeholder(
510                source_bytes,
511                destination,
512                prefix_placeholder,
513                target_prefix,
514                target_platform,
515            )?;
516        }
517        FileMode::Binary => {
518            // conda does not replace the prefix in the binary files on windows
519            // DLLs are loaded quite differently anyways (there is no rpath, for example).
520            if target_platform.is_windows() {
521                destination.write_all(source_bytes)?;
522            } else {
523                copy_and_replace_cstring_placeholder(
524                    source_bytes,
525                    destination,
526                    prefix_placeholder,
527                    target_prefix,
528                )?;
529            }
530        }
531    }
532    Ok(())
533}
534
535static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| {
536    // ^(#!      pretty much the whole match string
537    // (?:[ ]*)  allow spaces between #! and beginning of
538    //           the executable path
539    // (/(?:\\ |[^ \n\r\t])*)  the executable is the next
540    //                         text block without an
541    //                         escaped space or non-space
542    //                         whitespace character
543    // (.*))$    the rest of the line can contain option
544    //           flags and end whole_shebang group
545    Regex::new(r"^(#!(?:[ ]*)(/(?:\\ |[^ \n\r\t])*)(.*))$").unwrap()
546});
547
548static PYTHON_REGEX: Lazy<Regex> = Lazy::new(|| {
549    // Match string starting with `python`, and optional version number
550    // followed by optional flags.
551    // python matches the string `python`
552    // (?:\d+(?:\.\d+)*)? matches an optional version number
553    Regex::new(r"^python(?:\d+(?:\.\d+)?)?$").unwrap()
554});
555
556/// Finds if the shebang line length is valid.
557fn is_valid_shebang_length(shebang: &str, platform: &Platform) -> bool {
558    const MAX_SHEBANG_LENGTH_LINUX: usize = 127;
559    const MAX_SHEBANG_LENGTH_MACOS: usize = 512;
560
561    if platform.is_linux() {
562        shebang.len() <= MAX_SHEBANG_LENGTH_LINUX
563    } else if platform.is_osx() {
564        shebang.len() <= MAX_SHEBANG_LENGTH_MACOS
565    } else {
566        true
567    }
568}
569
570/// Convert a shebang to use `/usr/bin/env` to find the executable.
571/// This is useful for long shebangs or shebangs with spaces.
572fn convert_shebang_to_env(shebang: Cow<'_, str>) -> Cow<'_, str> {
573    if let Some(captures) = SHEBANG_REGEX.captures(&shebang) {
574        let path = &captures[2];
575        let exe_name = path.rsplit_once('/').map_or(path, |(_, f)| f);
576        if PYTHON_REGEX.is_match(exe_name) {
577            Cow::Owned(format!(
578                "#!/bin/sh\n'''exec' \"{}\"{} \"$0\" \"$@\" #'''",
579                path, &captures[3]
580            ))
581        } else {
582            Cow::Owned(format!("#!/usr/bin/env {}{}", exe_name, &captures[3]))
583        }
584    } else {
585        shebang
586    }
587}
588
589/// Long shebangs and shebangs with spaces are invalid.
590/// Long shebangs are longer than 127 on Linux or 512 on macOS characters.
591/// Shebangs with spaces are replaced with a shebang that uses `/usr/bin/env` to find the executable.
592/// This function replaces long shebangs with a shebang that uses `/usr/bin/env` to find the
593/// executable.
594fn replace_shebang<'a>(
595    shebang: Cow<'a, str>,
596    old_new: (&str, &str),
597    platform: &Platform,
598) -> Cow<'a, str> {
599    // If the new shebang would contain a space, return a `#!/usr/bin/env` shebang
600    assert!(
601        shebang.starts_with("#!"),
602        "Shebang does not start with #! ({shebang})",
603    );
604
605    if old_new.1.contains(' ') {
606        // Doesn't matter if we don't replace anything
607        if !shebang.contains(old_new.0) {
608            return shebang;
609        }
610        // we convert the shebang without spaces to a new shebang, and only then replace
611        // which is relevant for the Python case
612        let new_shebang = convert_shebang_to_env(shebang).replace(old_new.0, old_new.1);
613        return new_shebang.into();
614    }
615
616    let shebang: Cow<'_, str> = shebang.replace(old_new.0, old_new.1).into();
617
618    if !shebang.starts_with("#!") {
619        tracing::warn!("Shebang does not start with #! ({})", shebang);
620        return shebang;
621    }
622
623    if is_valid_shebang_length(&shebang, platform) {
624        shebang
625    } else {
626        convert_shebang_to_env(shebang)
627    }
628}
629
630/// Given the contents of a file copy it to the `destination` and in the process replace the
631/// `prefix_placeholder` text with the `target_prefix` text.
632///
633/// This is a text based version where the complete string is replaced. This works fine for text
634/// files but will not work correctly for binary files where the length of the string is often
635/// important. See [`copy_and_replace_cstring_placeholder`] when you are dealing with binary
636/// content.
637pub fn copy_and_replace_textual_placeholder(
638    mut source_bytes: &[u8],
639    mut destination: impl Write,
640    prefix_placeholder: &str,
641    target_prefix: &str,
642    target_platform: &Platform,
643) -> Result<(), std::io::Error> {
644    // Get the prefixes as bytes
645    let old_prefix = prefix_placeholder.as_bytes();
646    let new_prefix = target_prefix.as_bytes();
647
648    // check if we have a shebang. We need to handle it differently because it has a maximum length
649    // that can be exceeded in very long target prefix's.
650    if target_platform.is_unix() && source_bytes.starts_with(b"#!") {
651        // extract first line
652        let (first, rest) =
653            source_bytes.split_at(source_bytes.iter().position(|&c| c == b'\n').unwrap_or(0));
654        let first_line = String::from_utf8_lossy(first);
655        let new_shebang = replace_shebang(
656            first_line,
657            (prefix_placeholder, target_prefix),
658            target_platform,
659        );
660        // let replaced = first_line.replace(prefix_placeholder, target_prefix);
661        destination.write_all(new_shebang.as_bytes())?;
662        source_bytes = rest;
663    }
664
665    let mut last_match = 0;
666
667    for index in memchr::memmem::find_iter(source_bytes, old_prefix) {
668        destination.write_all(&source_bytes[last_match..index])?;
669        destination.write_all(new_prefix)?;
670        last_match = index + old_prefix.len();
671    }
672
673    // Write remaining bytes
674    if last_match < source_bytes.len() {
675        destination.write_all(&source_bytes[last_match..])?;
676    }
677
678    Ok(())
679}
680
681/// Given the contents of a file, copies it to the `destination` and in the process replace any
682/// binary c-style string that contains the text `prefix_placeholder` with a binary compatible
683/// c-string where the `prefix_placeholder` text is replaced with the `target_prefix` text.
684///
685/// The length of the input will match the output.
686///
687/// This function replaces binary c-style strings. If you want to simply find-and-replace text in a
688/// file instead use the [`copy_and_replace_textual_placeholder`] function.
689pub fn copy_and_replace_cstring_placeholder(
690    mut source_bytes: &[u8],
691    mut destination: impl Write,
692    prefix_placeholder: &str,
693    target_prefix: &str,
694) -> Result<(), std::io::Error> {
695    // Get the prefixes as bytes
696    let old_prefix = prefix_placeholder.as_bytes();
697    let new_prefix = target_prefix.as_bytes();
698
699    let finder = memchr::memmem::Finder::new(old_prefix);
700
701    loop {
702        if let Some(index) = finder.find(source_bytes) {
703            // write all bytes up to the old prefix, followed by the new prefix.
704            destination.write_all(&source_bytes[..index])?;
705
706            // Find the end of the c-style string. The nul terminator basically.
707            let mut end = index + old_prefix.len();
708            while end < source_bytes.len() && source_bytes[end] != b'\0' {
709                end += 1;
710            }
711
712            let mut out = Vec::new();
713            let mut old_bytes = &source_bytes[index..end];
714            let old_len = old_bytes.len();
715
716            // replace all occurrences of the old prefix with the new prefix
717            while let Some(index) = finder.find(old_bytes) {
718                out.write_all(&old_bytes[..index])?;
719                out.write_all(new_prefix)?;
720                old_bytes = &old_bytes[index + old_prefix.len()..];
721            }
722            out.write_all(old_bytes)?;
723            // write everything up to the old length
724            if out.len() > old_len {
725                destination.write_all(&out[..old_len])?;
726            } else {
727                destination.write_all(&out)?;
728            }
729
730            // Compute the padding required when replacing the old prefix(es) with the new one. If the old
731            // prefix is longer than the new one we need to add padding to ensure that the entire part
732            // will hold the same number of bytes. We do this by adding '\0's (e.g. nul terminators). This
733            // ensures that the text will remain a valid nul-terminated string.
734            let padding = old_len.saturating_sub(out.len());
735            destination.write_all(&vec![0; padding])?;
736
737            // Continue with the rest of the bytes.
738            source_bytes = &source_bytes[end..];
739        } else {
740            // The old prefix was not found in the (remaining) source bytes.
741            // Write the rest of the bytes
742            destination.write_all(source_bytes)?;
743
744            return Ok(());
745        }
746    }
747}
748
749fn symlink(source_path: &Path, destination_path: &Path) -> std::io::Result<()> {
750    #[cfg(windows)]
751    return fs_err::os::windows::fs::symlink_file(source_path, destination_path);
752    #[cfg(unix)]
753    return fs_err::os::unix::fs::symlink(source_path, destination_path);
754}
755
756#[allow(unused_variables)]
757fn has_executable_permissions(permissions: &Permissions) -> bool {
758    #[cfg(windows)]
759    return false;
760    #[cfg(unix)]
761    return std::os::unix::fs::PermissionsExt::mode(permissions) & 0o111 != 0;
762}
763
764/// Represents the type of file detected from its content
765#[derive(Debug, Clone, Copy, Eq, PartialEq)]
766pub enum FileType {
767    /// A Mach-O binary (executable, dylib, bundle, etc.)
768    MachO,
769}
770
771impl FileType {
772    // Mach-O magic bytes constants
773    const MACHO_FAT_MAGIC: u32 = 0xcafebabe; // Fat/Universal binary (big-endian)
774    const MACHO_FAT_CIGAM: u32 = 0xbebafeca; // Fat/Universal binary (little-endian)
775    const MACHO_MAGIC_32: u32 = 0xfeedface; // Mach-O 32-bit (big-endian)
776    const MACHO_CIGAM_32: u32 = 0xcefaedfe; // Mach-O 32-bit (little-endian)
777    const MACHO_MAGIC_64: u32 = 0xfeedfacf; // Mach-O 64-bit (big-endian)
778    const MACHO_CIGAM_64: u32 = 0xcffaedfe; // Mach-O 64-bit (little-endian)
779
780    /// Detects the file type by checking its magic bytes.
781    /// Returns `Some(FileType)` if a known file type is detected, `None` otherwise.
782    fn detect(bytes: &[u8]) -> Option<Self> {
783        if bytes.len() < 4 {
784            return None;
785        }
786
787        let magic = u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
788
789        match magic {
790            Self::MACHO_FAT_MAGIC
791            | Self::MACHO_FAT_CIGAM
792            | Self::MACHO_MAGIC_32
793            | Self::MACHO_CIGAM_32
794            | Self::MACHO_MAGIC_64
795            | Self::MACHO_CIGAM_64 => Some(FileType::MachO),
796            _ => None,
797        }
798    }
799}
800
801#[cfg(test)]
802mod test {
803    use super::PYTHON_REGEX;
804    use fs_err as fs;
805    use rattler_conda_types::Platform;
806    use rstest::rstest;
807    use std::io::Cursor;
808
809    #[rstest]
810    #[case("Hello, cruel world!", "cruel", "fabulous", "Hello, fabulous world!")]
811    #[case(
812        "prefix_placeholder",
813        "prefix_placeholder",
814        "target_prefix",
815        "target_prefix"
816    )]
817    pub fn test_copy_and_replace_textual_placeholder(
818        #[case] input: &str,
819        #[case] prefix_placeholder: &str,
820        #[case] target_prefix: &str,
821        #[case] expected_output: &str,
822    ) {
823        let mut output = Cursor::new(Vec::new());
824        super::copy_and_replace_textual_placeholder(
825            input.as_bytes(),
826            &mut output,
827            prefix_placeholder,
828            target_prefix,
829            &Platform::Linux64,
830        )
831        .unwrap();
832        assert_eq!(
833            &String::from_utf8_lossy(&output.into_inner()),
834            expected_output
835        );
836    }
837
838    #[rstest]
839    #[case(
840        b"12345Hello, fabulous world!\x006789",
841        "fabulous",
842        "cruel",
843        b"12345Hello, cruel world!\x00\x00\x00\x006789"
844    )]
845    #[case(b"short\x00", "short", "verylong", b"veryl\x00")]
846    #[case(b"short1234\x00", "short", "verylong", b"verylong1\x00")]
847    pub fn test_copy_and_replace_binary_placeholder(
848        #[case] input: &[u8],
849        #[case] prefix_placeholder: &str,
850        #[case] target_prefix: &str,
851        #[case] expected_output: &[u8],
852    ) {
853        assert_eq!(
854            expected_output.len(),
855            input.len(),
856            "input and expected output must have the same length"
857        );
858        let mut output = Cursor::new(Vec::new());
859        super::copy_and_replace_cstring_placeholder(
860            input,
861            &mut output,
862            prefix_placeholder,
863            target_prefix,
864        )
865        .unwrap();
866        assert_eq!(&output.into_inner(), expected_output);
867    }
868
869    #[test]
870    fn replace_binary_path_var() {
871        let input =
872            b"beginrandomdataPATH=/placeholder/etc/share:/placeholder/bin/:\x00somemoretext";
873        let mut output = Cursor::new(Vec::new());
874        super::copy_and_replace_cstring_placeholder(input, &mut output, "/placeholder", "/target")
875            .unwrap();
876        let out = &output.into_inner();
877        assert_eq!(out, b"beginrandomdataPATH=/target/etc/share:/target/bin/:\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00somemoretext");
878        assert_eq!(out.len(), input.len());
879    }
880
881    #[test]
882    fn test_replace_shebang() {
883        let shebang_with_spaces = "#!/path/placeholder/executable -o test -x".into();
884        let replaced = super::replace_shebang(
885            shebang_with_spaces,
886            ("placeholder", "with space"),
887            &Platform::Linux64,
888        );
889        assert_eq!(replaced, "#!/usr/bin/env executable -o test -x");
890    }
891
892    #[test]
893    fn test_replace_long_shebang() {
894        let short_shebang = "#!/path/to/executable -x 123".into();
895        let replaced = super::replace_shebang(short_shebang, ("", ""), &Platform::Linux64);
896        assert_eq!(replaced, "#!/path/to/executable -x 123");
897
898        let shebang = "#!/this/is/loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong/executable -o test -x";
899        let replaced = super::replace_shebang(shebang.into(), ("", ""), &Platform::Linux64);
900        assert_eq!(replaced, "#!/usr/bin/env executable -o test -x");
901
902        let replaced = super::replace_shebang(shebang.into(), ("", ""), &Platform::Osx64);
903        assert_eq!(replaced, shebang);
904
905        let shebang_with_escapes = "#!/this/is/loooooooooooooooooooooooooooooooooooooooooooooooooooo\\ oooooo\\ oooooo\\ oooooooooooooooooooooooooooooooooooong/exe\\ cutable -o test -x";
906        let replaced =
907            super::replace_shebang(shebang_with_escapes.into(), ("", ""), &Platform::Linux64);
908        assert_eq!(replaced, "#!/usr/bin/env exe\\ cutable -o test -x");
909
910        let shebang = "#!    /this/is/looooooooooooooooooooooooooooooooooooooooooooo\\ \\ ooooooo\\ oooooo\\ oooooo\\ ooooooooooooooooo\\ ooooooooooooooooooong/exe\\ cutable -o \"te  st\" -x";
911        let replaced = super::replace_shebang(shebang.into(), ("", ""), &Platform::Linux64);
912        assert_eq!(replaced, "#!/usr/bin/env exe\\ cutable -o \"te  st\" -x");
913
914        let shebang = "#!/usr/bin/env perl";
915        let replaced = super::replace_shebang(
916            shebang.into(),
917            ("/placeholder", "/with space"),
918            &Platform::Linux64,
919        );
920        assert_eq!(replaced, shebang);
921
922        let shebang = "#!/placeholder/perl";
923        let replaced = super::replace_shebang(
924            shebang.into(),
925            ("/placeholder", "/with space"),
926            &Platform::Linux64,
927        );
928        assert_eq!(replaced, "#!/usr/bin/env perl");
929    }
930
931    #[test]
932    fn replace_python_shebang() {
933        let short_shebang = "#!/path/to/python3.12".into();
934        let replaced = super::replace_shebang(
935            short_shebang,
936            ("/path/to", "/new/prefix/with spaces/bin"),
937            &Platform::Linux64,
938        );
939        insta::assert_snapshot!(replaced);
940
941        let short_shebang = "#!/path/to/python3.12 -x 123".into();
942        let replaced = super::replace_shebang(
943            short_shebang,
944            ("/path/to", "/new/prefix/with spaces/bin"),
945            &Platform::Linux64,
946        );
947        insta::assert_snapshot!(replaced);
948    }
949
950    #[test]
951    fn test_replace_long_prefix_in_text_file() {
952        let test_data_dir =
953            std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../test-data");
954        let test_file = test_data_dir.join("shebang_test.txt");
955        let prefix_placeholder = "/this/is/placeholder";
956        let mut target_prefix = "/super/long/".to_string();
957        for _ in 0..15 {
958            target_prefix.push_str("verylongstring/");
959        }
960        let input = fs::read(test_file).unwrap();
961        let mut output = Cursor::new(Vec::new());
962        super::copy_and_replace_textual_placeholder(
963            &input,
964            &mut output,
965            prefix_placeholder,
966            &target_prefix,
967            &Platform::Linux64,
968        )
969        .unwrap();
970
971        let output = output.into_inner();
972        let replaced = String::from_utf8_lossy(&output);
973        insta::assert_snapshot!(replaced);
974    }
975
976    #[test]
977    fn test_python_regex() {
978        // Test the regex
979        let test_strings = vec!["python", "python3", "python3.12", "python2.7"];
980
981        for s in test_strings {
982            assert!(PYTHON_REGEX.is_match(s));
983        }
984
985        let no_match_strings = vec![
986            "python3.12.1",
987            "python3.12.1.1",
988            "foo",
989            "foo3.2",
990            "pythondoc",
991        ];
992
993        for s in no_match_strings {
994            assert!(!PYTHON_REGEX.is_match(s));
995        }
996    }
997
998    #[test]
999    fn test_detect_file_type() {
1000        use super::FileType;
1001
1002        // Test Mach-O 64-bit magic (big-endian)
1003        let macho_64_be = [0xfe, 0xed, 0xfa, 0xcf, 0x00, 0x00];
1004        assert_eq!(FileType::detect(&macho_64_be), Some(FileType::MachO));
1005
1006        // Test Mach-O 64-bit magic (little-endian)
1007        let macho_64_le = [0xcf, 0xfa, 0xed, 0xfe, 0x00, 0x00];
1008        assert_eq!(FileType::detect(&macho_64_le), Some(FileType::MachO));
1009
1010        // Test Mach-O 32-bit magic (big-endian)
1011        let macho_32_be = [0xfe, 0xed, 0xfa, 0xce, 0x00, 0x00];
1012        assert_eq!(FileType::detect(&macho_32_be), Some(FileType::MachO));
1013
1014        // Test Mach-O 32-bit magic (little-endian)
1015        let macho_32_le = [0xce, 0xfa, 0xed, 0xfe, 0x00, 0x00];
1016        assert_eq!(FileType::detect(&macho_32_le), Some(FileType::MachO));
1017
1018        // Test Fat/Universal binary magic (big-endian)
1019        let fat_be = [0xca, 0xfe, 0xba, 0xbe, 0x00, 0x00];
1020        assert_eq!(FileType::detect(&fat_be), Some(FileType::MachO));
1021
1022        // Test Fat/Universal binary magic (little-endian)
1023        let fat_le = [0xbe, 0xba, 0xfe, 0xca, 0x00, 0x00];
1024        assert_eq!(FileType::detect(&fat_le), Some(FileType::MachO));
1025
1026        // Test non-Mach-O file
1027        let not_macho = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05];
1028        assert_eq!(FileType::detect(&not_macho), None);
1029
1030        // Test short file
1031        let short = [0xfe, 0xed];
1032        assert_eq!(FileType::detect(&short), None);
1033
1034        // Test empty file
1035        let empty: [u8; 0] = [];
1036        assert_eq!(FileType::detect(&empty), None);
1037    }
1038}