Skip to main content

git_closure/
materialize.rs

1/// Snapshot consumption: verification and filesystem materialization.
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4
5#[cfg(unix)]
6use std::os::unix::fs::symlink;
7#[cfg(unix)]
8use std::os::unix::fs::PermissionsExt;
9
10use crate::error::GitClosureError;
11use crate::snapshot::hash::{compute_snapshot_hash, sha256_hex};
12use crate::snapshot::serial::parse_snapshot;
13use crate::snapshot::{Result, VerifyReport};
14use crate::utils::{
15    ensure_no_symlink_ancestors, io_error_with_path, lexical_normalize, reject_if_symlink,
16};
17
18// ── Public API ────────────────────────────────────────────────────────────────
19
20/// Policy profiles for snapshot materialization.
21#[derive(Debug, Clone, Default, PartialEq, Eq)]
22pub enum MaterializePolicy {
23    /// Preserve current strict behavior:
24    /// - output directory must be empty
25    /// - symlink entries are allowed (platform permitting)
26    #[default]
27    Strict,
28    /// Allow materializing into a non-empty output directory.
29    ///
30    /// Existing files may be overwritten if the snapshot contains matching
31    /// paths.
32    TrustedNonempty,
33    /// Reject snapshots that contain symlink entries.
34    ///
35    /// Useful for environments that cannot or must not create symlinks.
36    NoSymlink,
37}
38
39/// Options controlling materialization behavior.
40#[derive(Debug, Clone, Default, PartialEq, Eq)]
41pub struct MaterializeOptions {
42    pub policy: MaterializePolicy,
43}
44
45/// Verifies the structural and content integrity of a snapshot file.
46///
47/// Checks performed:
48/// 1. `snapshot-hash` header matches recomputed hash over file metadata.
49/// 2. Each regular file's `:sha256` matches `SHA-256(content)`.
50/// 3. Each regular file's `:size` matches `content.len()`.
51/// 4. Each path is safe (no `..`, no absolute paths).
52/// 5. Each mode string is valid octal.
53pub fn verify_snapshot(snapshot: &Path) -> Result<VerifyReport> {
54    let text = fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
55
56    let (header, files) = parse_snapshot(&text)?;
57
58    let recomputed = compute_snapshot_hash(&files);
59    if recomputed != header.snapshot_hash {
60        return Err(GitClosureError::HashMismatch {
61            expected: header.snapshot_hash,
62            actual: recomputed,
63        });
64    }
65
66    for file in &files {
67        let _ = sanitized_relative_path(&file.path)?;
68
69        if let Some(target) = &file.symlink_target {
70            let synthetic_root = PathBuf::from("/gcl-verify-root");
71            let entry_parent = synthetic_root.join(
72                Path::new(&file.path)
73                    .parent()
74                    .unwrap_or_else(|| Path::new("")),
75            );
76            let effective_target = if Path::new(target).is_absolute() {
77                Path::new(target).to_path_buf()
78            } else {
79                entry_parent.join(target)
80            };
81            let normalized = lexical_normalize(&effective_target)?;
82            if !normalized.starts_with(&synthetic_root) {
83                return Err(GitClosureError::UnsafePath(format!(
84                    "symlink target would escape output root for {}: {}",
85                    file.path, target
86                )));
87            }
88            continue;
89        }
90
91        let digest = sha256_hex(&file.content);
92        if digest != file.sha256 {
93            return Err(GitClosureError::ContentHashMismatch {
94                path: file.path.clone(),
95                expected: file.sha256.clone(),
96                actual: digest,
97            });
98        }
99
100        if file.content.len() as u64 != file.size {
101            return Err(GitClosureError::SizeMismatch {
102                path: file.path.clone(),
103                expected: file.size,
104                actual: file.content.len() as u64,
105            });
106        }
107
108        u32::from_str_radix(&file.mode, 8).map_err(|err| {
109            GitClosureError::Parse(format!(
110                "invalid octal mode for {}: {} ({err})",
111                file.path, file.mode
112            ))
113        })?;
114    }
115
116    Ok(VerifyReport {
117        file_count: files.len(),
118    })
119}
120
121/// Materializes a snapshot into `output`, creating the directory tree and
122/// restoring file contents and permissions.
123///
124/// **Preconditions:**
125/// - `output` must be empty or newly created.  Materializing into a non-empty
126///   directory is rejected to prevent TOCTOU-style symlink-escalation attacks
127///   via pre-planted symlinks that bypass the lexical containment check.
128/// - All paths in the snapshot must be safe (no `..`, no absolute paths).
129/// - Symlink targets must not escape `output` when resolved lexically.
130/// - On non-Unix platforms, mode parsing still occurs but applying POSIX
131///   permissions is intentionally a no-op in v0.1.
132pub fn materialize_snapshot(snapshot: &Path, output: &Path) -> Result<()> {
133    materialize_snapshot_with_options(snapshot, output, &MaterializeOptions::default())
134}
135
136/// Materializes a snapshot with explicit policy controls.
137pub fn materialize_snapshot_with_options(
138    snapshot: &Path,
139    output: &Path,
140    options: &MaterializeOptions,
141) -> Result<()> {
142    let text = fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
143
144    let (header, files) = parse_snapshot(&text)?;
145
146    let recomputed = compute_snapshot_hash(&files);
147    if recomputed != header.snapshot_hash {
148        return Err(GitClosureError::HashMismatch {
149            expected: header.snapshot_hash,
150            actual: recomputed,
151        });
152    }
153
154    fs::create_dir_all(output).map_err(|err| io_error_with_path(err, output))?;
155
156    let output_abs = fs::canonicalize(output).map_err(|err| io_error_with_path(err, output))?;
157
158    if options.policy != MaterializePolicy::TrustedNonempty {
159        // Safety invariant: require an empty output directory.
160        // See module-level doc comment for the security rationale.
161        let is_empty = output_abs
162            .read_dir()
163            .map_err(|err| io_error_with_path(err, &output_abs))?
164            .next()
165            .is_none();
166        if !is_empty {
167            return Err(GitClosureError::Parse(format!(
168                "output directory must be empty: {}",
169                output_abs.display()
170            )));
171        }
172    }
173
174    for file in files {
175        let relative = sanitized_relative_path(&file.path)?;
176        let destination = output_abs.join(relative);
177
178        if !destination.starts_with(&output_abs) {
179            return Err(GitClosureError::UnsafePath(file.path));
180        }
181
182        if let Some(parent) = destination.parent() {
183            ensure_no_symlink_ancestors(&output_abs, parent)?;
184            fs::create_dir_all(parent).map_err(|err| io_error_with_path(err, parent))?;
185        }
186
187        if let Some(target) = &file.symlink_target {
188            if options.policy == MaterializePolicy::NoSymlink {
189                return Err(GitClosureError::Parse(format!(
190                    "symlink entry is disallowed by materialize policy: {}",
191                    file.path
192                )));
193            }
194            let target_path = Path::new(target);
195            let effective_target = if target_path.is_absolute() {
196                target_path.to_path_buf()
197            } else {
198                destination
199                    .parent()
200                    .unwrap_or(&output_abs)
201                    .join(target_path)
202            };
203            let normalized_target = lexical_normalize(&effective_target)?;
204            if !normalized_target.starts_with(&output_abs) {
205                return Err(GitClosureError::UnsafePath(format!(
206                    "symlink target escapes output directory for {}: {}",
207                    file.path, target
208                )));
209            }
210            reject_if_symlink(&destination)?;
211            #[cfg(unix)]
212            {
213                symlink(target_path, &destination)?;
214                continue;
215            }
216            #[cfg(not(unix))]
217            {
218                return Err(GitClosureError::Parse(format!(
219                    "symlink materialization is not supported on this platform: {}",
220                    file.path
221                )));
222            }
223        }
224
225        let digest = sha256_hex(&file.content);
226        if digest != file.sha256 {
227            return Err(GitClosureError::ContentHashMismatch {
228                path: file.path,
229                expected: file.sha256,
230                actual: digest,
231            });
232        }
233
234        ensure_no_symlink_ancestors(&output_abs, &destination)?;
235        fs::write(&destination, &file.content)
236            .map_err(|err| io_error_with_path(err, &destination))?;
237
238        let mode = u32::from_str_radix(&file.mode, 8).map_err(|err| {
239            GitClosureError::Parse(format!(
240                "invalid octal mode for {}: {} ({err})",
241                file.path, file.mode
242            ))
243        })?;
244        #[cfg(unix)]
245        {
246            let permissions = fs::Permissions::from_mode(mode);
247            fs::set_permissions(&destination, permissions)
248                .map_err(|err| io_error_with_path(err, &destination))?;
249        }
250    }
251
252    Ok(())
253}
254
255// ── Path safety helpers ───────────────────────────────────────────────────────
256
257/// Validates that a snapshot path is a safe, normalized relative path and
258/// converts it to a `PathBuf` suitable for `output.join(path)`.
259pub(crate) fn sanitized_relative_path(path: &str) -> Result<PathBuf> {
260    if path.is_empty() {
261        return Err(GitClosureError::UnsafePath("path is empty".to_string()));
262    }
263
264    let candidate = Path::new(path);
265
266    if candidate.is_absolute() {
267        return Err(GitClosureError::UnsafePath(path.to_string()));
268    }
269
270    let mut clean = PathBuf::new();
271    for component in candidate.components() {
272        match component {
273            Component::Normal(part) => clean.push(part),
274            Component::CurDir
275            | Component::ParentDir
276            | Component::RootDir
277            | Component::Prefix(_) => {
278                return Err(GitClosureError::UnsafePath(path.to_string()));
279            }
280        }
281    }
282
283    if clean.as_os_str().is_empty() {
284        return Err(GitClosureError::UnsafePath(format!(
285            "path normalizes to empty path: {path}"
286        )));
287    }
288
289    Ok(clean)
290}