objects/util/symlink.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Canonical symlink-target → blob-bytes conversion.
3//!
4//! Git stores a symlink's target as the raw bytes of the link, which on Unix
5//! is an arbitrary byte sequence and need not be valid UTF-8. Every site that
6//! turns a symlink target into a blob (capture, status hashing, diff/rename
7//! similarity, patch generation) must use these raw bytes — a `to_string_lossy`
8//! conversion replaces invalid bytes with U+FFFD, producing a hash that never
9//! matches git's and a patch that recreates the link with a corrupted target.
10//! This is the single source of truth so the platform byte extraction is not
11//! forked across crates.
12
13use std::path::Path;
14
15/// The bytes git would store as a symlink's blob: the raw OS bytes of the
16/// link target. On non-Unix platforms the target is text and lossy conversion
17/// is acceptable because the filesystem does not preserve arbitrary bytes.
18pub fn symlink_target_bytes(target: &Path) -> Vec<u8> {
19 #[cfg(unix)]
20 {
21 use std::os::unix::ffi::OsStrExt;
22 target.as_os_str().as_bytes().to_vec()
23 }
24 #[cfg(not(unix))]
25 {
26 target.to_string_lossy().as_bytes().to_vec()
27 }
28}