Skip to main content

socket_patch_core/patch/
file_hash.rs

1use std::path::Path;
2
3use crate::hash::git_sha256::compute_git_sha256_from_reader;
4
5/// Compute Git-compatible SHA256 hash of file contents using streaming.
6///
7/// Opens the file *once* and derives the size from that open handle (an
8/// `fstat`), then streams the same handle through the hasher without loading
9/// the entire file into memory.
10///
11/// Deriving the size from the open file descriptor — rather than `stat`-ing the
12/// path separately and then re-opening it — is what makes this safe under
13/// concurrent mutation. The patch engine hashes files that other processes (or
14/// an attacker) may rename/replace at any moment. If we measured the size of
15/// one path resolution and read the bytes of another, a swap to a *same-sized*
16/// file would slip past the size-mismatch guard in
17/// [`compute_git_sha256_from_reader`] and produce a hash whose Git header (the
18/// size) and body came from different inodes. Reading both from the same `fd`
19/// makes that impossible.
20///
21/// Only regular files are accepted. Following a path to a directory or a
22/// special file (FIFO, device, …) and hashing it is never meaningful here, and
23/// on some platforms a directory can read as zero bytes — which would otherwise
24/// be silently reported as the empty-blob hash.
25pub async fn compute_file_git_sha256(filepath: impl AsRef<Path>) -> Result<String, std::io::Error> {
26    let filepath = filepath.as_ref();
27
28    // Open the file once; everything below operates on this single descriptor.
29    let file = tokio::fs::File::open(filepath).await?;
30
31    // Size comes from the open handle (fstat), so it and the bytes we hash are
32    // guaranteed to refer to the same inode even if the path is replaced.
33    let metadata = file.metadata().await?;
34
35    if !metadata.is_file() {
36        return Err(std::io::Error::new(
37            std::io::ErrorKind::InvalidInput,
38            format!("git sha256: {} is not a regular file", filepath.display()),
39        ));
40    }
41
42    let file_size = metadata.len();
43    let reader = tokio::io::BufReader::new(file);
44
45    compute_git_sha256_from_reader(file_size, reader).await
46}
47
48#[cfg(test)]
49mod tests {
50    use super::*;
51    use crate::hash::git_sha256::compute_git_sha256_from_bytes;
52
53    #[tokio::test]
54    async fn test_compute_file_git_sha256_matches_bytes() {
55        let dir = tempfile::tempdir().unwrap();
56        let file_path = dir.path().join("test.txt");
57
58        let content = b"Hello, World!";
59        tokio::fs::write(&file_path, content).await.unwrap();
60
61        let file_hash = compute_file_git_sha256(&file_path).await.unwrap();
62        let bytes_hash = compute_git_sha256_from_bytes(content);
63
64        assert_eq!(file_hash, bytes_hash);
65    }
66
67    #[tokio::test]
68    async fn test_compute_file_git_sha256_empty_file() {
69        let dir = tempfile::tempdir().unwrap();
70        let file_path = dir.path().join("empty.txt");
71
72        tokio::fs::write(&file_path, b"").await.unwrap();
73
74        let file_hash = compute_file_git_sha256(&file_path).await.unwrap();
75        let bytes_hash = compute_git_sha256_from_bytes(b"");
76
77        assert_eq!(file_hash, bytes_hash);
78    }
79
80    #[tokio::test]
81    async fn test_compute_file_git_sha256_not_found() {
82        let result = compute_file_git_sha256("/nonexistent/file.txt").await;
83        assert!(result.is_err());
84    }
85
86    #[tokio::test]
87    async fn test_compute_file_git_sha256_large_content() {
88        let dir = tempfile::tempdir().unwrap();
89        let file_path = dir.path().join("large.bin");
90
91        // Create a file larger than the 8192 byte buffer
92        let content: Vec<u8> = (0..20000).map(|i| (i % 256) as u8).collect();
93        tokio::fs::write(&file_path, &content).await.unwrap();
94
95        let file_hash = compute_file_git_sha256(&file_path).await.unwrap();
96        let bytes_hash = compute_git_sha256_from_bytes(&content);
97
98        assert_eq!(file_hash, bytes_hash);
99    }
100
101    /// A directory must be rejected with an error, not silently hashed as the
102    /// empty blob. On some platforms reading a directory descriptor yields zero
103    /// bytes; without the `is_file` guard that would return the hash of `""`
104    /// and the patch engine would compare a real file's expected hash against a
105    /// directory's bogus one.
106    #[tokio::test]
107    async fn test_compute_file_git_sha256_rejects_directory() {
108        let dir = tempfile::tempdir().unwrap();
109
110        let result = compute_file_git_sha256(dir.path()).await;
111        let err = result.expect_err("hashing a directory must error");
112
113        // On Unix a directory opens successfully and the `is_file` guard
114        // rejects it with `InvalidInput`. On Windows `File::open` on a
115        // directory fails at the open call itself (a different OS error kind),
116        // so we only pin the specific kind off-Windows. Either way the
117        // contract that matters holds: it errors and never hashes.
118        #[cfg(not(windows))]
119        assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput);
120
121        // It must specifically NOT have returned the empty-blob hash.
122        let empty_blob = compute_git_sha256_from_bytes(b"");
123        assert_ne!(
124            err.to_string(),
125            empty_blob,
126            "directory should error, never produce the empty-blob hash"
127        );
128    }
129
130    /// A symlink to a regular file follows through `File::open` and hashes the
131    /// target's contents (the size also comes from the resolved file via
132    /// fstat), matching a direct byte hash of that content.
133    #[cfg(unix)]
134    #[tokio::test]
135    async fn test_compute_file_git_sha256_follows_symlink_to_file() {
136        let dir = tempfile::tempdir().unwrap();
137        let target = dir.path().join("target.txt");
138        let link = dir.path().join("link.txt");
139
140        let content = b"symlinked content";
141        tokio::fs::write(&target, content).await.unwrap();
142        tokio::fs::symlink(&target, &link).await.unwrap();
143
144        let link_hash = compute_file_git_sha256(&link).await.unwrap();
145        let bytes_hash = compute_git_sha256_from_bytes(content);
146
147        assert_eq!(link_hash, bytes_hash);
148    }
149
150    /// A symlink whose target is a directory must be rejected, exactly like a
151    /// directory passed directly — the `is_file` check operates on the resolved
152    /// open handle.
153    #[cfg(unix)]
154    #[tokio::test]
155    async fn test_compute_file_git_sha256_rejects_symlink_to_directory() {
156        let dir = tempfile::tempdir().unwrap();
157        let subdir = dir.path().join("subdir");
158        let link = dir.path().join("dirlink");
159
160        tokio::fs::create_dir(&subdir).await.unwrap();
161        tokio::fs::symlink(&subdir, &link).await.unwrap();
162
163        let result = compute_file_git_sha256(&link).await;
164        let err = result.expect_err("symlink to a directory must error");
165        assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput);
166    }
167
168    /// A broken symlink (dangling target) must surface the open error rather
169    /// than panicking or returning a hash.
170    #[cfg(unix)]
171    #[tokio::test]
172    async fn test_compute_file_git_sha256_broken_symlink_errors() {
173        let dir = tempfile::tempdir().unwrap();
174        let link = dir.path().join("dangling");
175
176        tokio::fs::symlink(dir.path().join("does-not-exist"), &link)
177            .await
178            .unwrap();
179
180        let result = compute_file_git_sha256(&link).await;
181        assert!(result.is_err(), "dangling symlink must error");
182    }
183}