Skip to main content

git_closure/snapshot/
build.rs

1/// Snapshot construction from a source directory.
2///
3/// Entry points: [`build_snapshot`], [`build_snapshot_with_options`],
4/// [`build_snapshot_from_source`], [`build_snapshot_from_provider`].
5use std::ffi::OsStr;
6use std::fs;
7use std::io::Write as _;
8use std::path::{Component, Path};
9
10#[cfg(unix)]
11use std::os::unix::fs::PermissionsExt;
12
13use ignore::WalkBuilder;
14
15use crate::error::GitClosureError;
16use crate::git::{
17    ensure_git_source_is_clean, is_within_prefix, tracked_paths_from_index,
18    untracked_paths_from_status, GitRepoContext,
19};
20use crate::providers::{fetch_source, Provider, ProviderKind, SourceSpec};
21use crate::utils::io_error_with_path;
22
23use crate::providers::run_command_output;
24
25use super::hash::{compute_snapshot_hash, sha256_hex};
26use super::serial::serialize_snapshot;
27use super::{BuildOptions, Result, SnapshotFile, SnapshotHeader};
28
29// ── Public API ────────────────────────────────────────────────────────────────
30
31/// Builds a snapshot of `source` using default options.
32pub fn build_snapshot(source: &Path, output: &Path) -> Result<()> {
33    build_snapshot_with_options(source, output, &BuildOptions::default())
34}
35
36/// Builds a snapshot from a URL or source specifier, fetching it via `provider_kind`.
37pub fn build_snapshot_from_source(
38    source: &str,
39    output: &Path,
40    options: &BuildOptions,
41    provider_kind: ProviderKind,
42) -> Result<()> {
43    let mut annotated_options = options.clone();
44    annotated_options.source_annotation = source_annotation_for_source(source, provider_kind)?;
45    let fetched = fetch_source(source, provider_kind)?;
46    build_snapshot_with_options(&fetched.root, output, &annotated_options)
47}
48
49/// Builds a snapshot using a caller-supplied [`Provider`] implementation.
50pub fn build_snapshot_from_provider<P: Provider>(
51    provider: &P,
52    source: &str,
53    output: &Path,
54    options: &BuildOptions,
55) -> Result<()> {
56    let fetched = provider.fetch(source)?;
57    build_snapshot_with_options(&fetched.root, output, options)
58}
59
60/// Core build function: collects, sorts, hashes, and serializes all files.
61pub fn build_snapshot_with_options(
62    source: &Path,
63    output: &Path,
64    options: &BuildOptions,
65) -> Result<()> {
66    let source = fs::canonicalize(source).map_err(|err| io_error_with_path(err, source))?;
67
68    if !source.is_dir() {
69        return Err(GitClosureError::Parse(format!(
70            "source is not a directory: {}",
71            source.display()
72        )));
73    }
74
75    let mut files = collect_files(&source, options)?;
76    files.sort_by(|a, b| a.path.cmp(&b.path));
77
78    let snapshot_hash = compute_snapshot_hash(&files);
79    let (git_rev, git_branch) = read_git_metadata(&source);
80    let mut extra_headers = Vec::new();
81    if let Some((source_uri, source_provider)) = &options.source_annotation {
82        extra_headers.push(("source-uri".to_string(), source_uri.clone()));
83        extra_headers.push(("source-provider".to_string(), source_provider.clone()));
84    }
85    let header = SnapshotHeader {
86        snapshot_hash,
87        file_count: files.len(),
88        git_rev,
89        git_branch,
90        extra_headers,
91    };
92    let serialized = serialize_snapshot(&files, &header);
93
94    if let Some(parent) = output.parent() {
95        fs::create_dir_all(parent).map_err(|err| io_error_with_path(err, parent))?;
96    }
97
98    let mut writer = fs::File::create(output).map_err(|err| io_error_with_path(err, output))?;
99    writer.write_all(serialized.as_bytes())?;
100
101    Ok(())
102}
103
104// ── File collection ───────────────────────────────────────────────────────────
105
106pub(crate) fn collect_files(root: &Path, options: &BuildOptions) -> Result<Vec<SnapshotFile>> {
107    if let Some(repo_context) = GitRepoContext::discover(root)? {
108        return collect_files_from_git_repo(&repo_context, options);
109    }
110
111    collect_files_from_ignore_walk(root)
112}
113
114// ── T-21: Consolidated file-attribute helper (eliminates 6-tuple duplication) ─
115
116/// Resolved attributes for a single filesystem entry.
117///
118/// Unifies the previously duplicated symlink/regular-file classification logic
119/// that appeared identically in both `collect_files_from_git_repo` and
120/// `collect_files_from_ignore_walk`.
121pub(crate) struct FileAttributes {
122    pub(crate) sha256: String,
123    pub(crate) mode: String,
124    pub(crate) size: u64,
125    pub(crate) encoding: Option<String>,
126    pub(crate) symlink_target: Option<String>,
127    pub(crate) content: Vec<u8>,
128}
129
130/// Reads a single filesystem entry and computes all snapshot attributes.
131///
132/// `path` must point to the actual file (or symlink) on disk.  `metadata` must
133/// be obtained via `symlink_metadata` so that symlinks are not followed.
134pub(crate) fn collect_file_attributes(
135    path: &Path,
136    metadata: &fs::Metadata,
137) -> Result<FileAttributes> {
138    if metadata.file_type().is_symlink() {
139        let target = fs::read_link(path)?;
140        let target = target
141            .to_str()
142            .ok_or_else(|| {
143                GitClosureError::Parse(format!("non-UTF-8 symlink target: {}", path.display()))
144            })?
145            .to_string();
146        Ok(FileAttributes {
147            sha256: String::new(),
148            mode: "120000".to_string(),
149            size: 0,
150            encoding: None,
151            symlink_target: Some(target),
152            content: Vec::new(),
153        })
154    } else {
155        let bytes = fs::read(path)?;
156        let sha256 = sha256_hex(&bytes);
157        #[cfg(unix)]
158        let mode = format!("{:o}", metadata.permissions().mode() & 0o777);
159        #[cfg(not(unix))]
160        let mode = "644".to_string();
161        let size = bytes.len() as u64;
162        let encoding = if std::str::from_utf8(&bytes).is_ok() {
163            None
164        } else {
165            Some("base64".to_string())
166        };
167        Ok(FileAttributes {
168            sha256,
169            mode,
170            size,
171            encoding,
172            symlink_target: None,
173            content: bytes,
174        })
175    }
176}
177
178fn collect_files_from_git_repo(
179    context: &GitRepoContext,
180    options: &BuildOptions,
181) -> Result<Vec<SnapshotFile>> {
182    if options.require_clean {
183        ensure_git_source_is_clean(context)?;
184    }
185
186    let mut repo_relative_paths = tracked_paths_from_index(context)?;
187    if options.include_untracked {
188        let untracked = untracked_paths_from_status(context)?;
189        repo_relative_paths.extend(untracked);
190    }
191
192    repo_relative_paths.sort();
193    repo_relative_paths.dedup();
194
195    let mut files = Vec::new();
196    let source_root = context.workdir.join(&context.source_prefix);
197    for repo_relative in repo_relative_paths {
198        if !is_within_prefix(&repo_relative, &context.source_prefix) {
199            continue;
200        }
201
202        let absolute = context.workdir.join(&repo_relative);
203        let metadata = match fs::symlink_metadata(&absolute) {
204            Ok(metadata) => metadata,
205            Err(_) => continue,
206        };
207
208        if !metadata.is_file() && !metadata.file_type().is_symlink() {
209            continue;
210        }
211
212        let relative = absolute.strip_prefix(&source_root).map_err(|err| {
213            GitClosureError::Parse(format!(
214                "failed to create source-relative path for git entry: {} ({err})",
215                absolute.display(),
216            ))
217        })?;
218
219        let normalized = normalize_relative_path(relative)?;
220        let attrs = collect_file_attributes(&absolute, &metadata)?;
221
222        files.push(SnapshotFile {
223            path: normalized,
224            sha256: attrs.sha256,
225            mode: attrs.mode,
226            size: attrs.size,
227            encoding: attrs.encoding,
228            symlink_target: attrs.symlink_target,
229            content: attrs.content,
230        });
231    }
232
233    Ok(files)
234}
235
236fn collect_files_from_ignore_walk(root: &Path) -> Result<Vec<SnapshotFile>> {
237    let mut collected = Vec::new();
238
239    let walker = WalkBuilder::new(root)
240        .hidden(false)
241        .standard_filters(true)
242        .follow_links(false)
243        .git_ignore(true)
244        .git_global(true)
245        .git_exclude(true)
246        .build();
247
248    for entry in walker {
249        let entry = entry.map_err(|err| {
250            GitClosureError::Parse(format!("failed to walk source directory: {err}"))
251        })?;
252        let path = entry.path();
253
254        if path == root {
255            continue;
256        }
257
258        let metadata = fs::symlink_metadata(path)?;
259
260        if !metadata.is_file() && !metadata.file_type().is_symlink() {
261            continue;
262        }
263
264        let relative = path.strip_prefix(root).map_err(|err| {
265            GitClosureError::Parse(format!(
266                "failed to strip source prefix: {} ({err})",
267                path.display()
268            ))
269        })?;
270
271        let normalized = normalize_relative_path(relative)?;
272        let attrs = collect_file_attributes(path, &metadata)?;
273
274        collected.push(SnapshotFile {
275            path: normalized,
276            sha256: attrs.sha256,
277            mode: attrs.mode,
278            size: attrs.size,
279            encoding: attrs.encoding,
280            symlink_target: attrs.symlink_target,
281            content: attrs.content,
282        });
283    }
284
285    Ok(collected)
286}
287
288// ── Git metadata capture ──────────────────────────────────────────────────────
289
290/// Attempts to read the current git revision and branch from `dir`.
291/// Both fields are best-effort: failures (non-git directory, detached HEAD,
292/// git not on PATH) silently return `None` — they must not abort the build.
293fn read_git_metadata(dir: &Path) -> (Option<String>, Option<String>) {
294    let rev = run_command_output("git", &["rev-parse", "HEAD"], Some(dir))
295        .ok()
296        .filter(|o| o.status.success())
297        .and_then(|o| String::from_utf8(o.stdout).ok())
298        .map(|s| s.trim().to_string())
299        .filter(|s| !s.is_empty());
300
301    let branch = run_command_output("git", &["symbolic-ref", "--short", "HEAD"], Some(dir))
302        .ok()
303        .filter(|o| o.status.success())
304        .and_then(|o| String::from_utf8(o.stdout).ok())
305        .map(|s| s.trim().to_string())
306        .filter(|s| !s.is_empty());
307
308    (rev, branch)
309}
310
311fn source_annotation_for_source(
312    source: &str,
313    provider_kind: ProviderKind,
314) -> Result<Option<(String, String)>> {
315    let selected = selected_provider_kind(source, provider_kind)?;
316    let provider_label = match selected {
317        ProviderKind::Local => return Ok(None),
318        ProviderKind::GitClone => "git-clone",
319        ProviderKind::Nix => "nix",
320        ProviderKind::GithubApi => "github-api",
321        ProviderKind::Auto => unreachable!("provider auto should be resolved"),
322    };
323
324    Ok(Some((source.to_string(), provider_label.to_string())))
325}
326
327fn selected_provider_kind(source: &str, requested: ProviderKind) -> Result<ProviderKind> {
328    if requested != ProviderKind::Auto {
329        return Ok(requested);
330    }
331
332    let spec = SourceSpec::parse(source)?;
333    match spec {
334        SourceSpec::LocalPath(_) => Ok(ProviderKind::Local),
335        SourceSpec::NixFlakeRef(_) => Ok(ProviderKind::Nix),
336        SourceSpec::GitHubRepo { .. } => Ok(ProviderKind::GithubApi),
337        SourceSpec::GitLabRepo { .. } | SourceSpec::GitRemoteUrl(_) => Ok(ProviderKind::GitClone),
338        SourceSpec::Unknown(value) => Err(GitClosureError::Parse(format!(
339            "unsupported source syntax for auto provider: {value}"
340        ))),
341    }
342}
343
344// ── Path normalization ────────────────────────────────────────────────────────
345
346/// Converts a relative filesystem path to a normalized forward-slash UTF-8
347/// string suitable for inclusion in a snapshot.
348///
349/// Rejects absolute paths, `.`, `..`, and any non-UTF-8 component.
350pub(crate) fn normalize_relative_path(path: &Path) -> Result<String> {
351    if path.is_absolute() {
352        return Err(GitClosureError::UnsafePath(path.display().to_string()));
353    }
354
355    let mut components = Vec::new();
356    for component in path.components() {
357        match component {
358            Component::Normal(part) => {
359                if part == OsStr::new(".") || part == OsStr::new("..") {
360                    return Err(GitClosureError::UnsafePath(path.display().to_string()));
361                }
362                components.push(
363                    part.to_str()
364                        .ok_or_else(|| {
365                            GitClosureError::Parse(format!(
366                                "non-UTF-8 path component: {}",
367                                path.display()
368                            ))
369                        })?
370                        .to_string(),
371                );
372            }
373            Component::CurDir
374            | Component::ParentDir
375            | Component::RootDir
376            | Component::Prefix(_) => {
377                return Err(GitClosureError::UnsafePath(path.display().to_string()));
378            }
379        }
380    }
381
382    if components.is_empty() {
383        return Err(GitClosureError::UnsafePath(
384            "empty relative path".to_string(),
385        ));
386    }
387
388    Ok(components.join("/"))
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394    use std::fs;
395    use tempfile::TempDir;
396
397    #[test]
398    fn normalize_relative_path_simple() {
399        assert_eq!(
400            normalize_relative_path(Path::new("src/lib.rs")).unwrap(),
401            "src/lib.rs"
402        );
403    }
404
405    #[test]
406    fn normalize_relative_path_emits_forward_slashes() {
407        let nested = Path::new("dir").join("sub").join("file.txt");
408        let normalized = normalize_relative_path(&nested).expect("normalize nested path");
409        assert_eq!(normalized, "dir/sub/file.txt");
410        assert!(
411            !normalized.contains('\\'),
412            "snapshot path must not use backslash separators"
413        );
414    }
415
416    #[test]
417    fn normalize_relative_path_rejects_absolute() {
418        assert!(normalize_relative_path(Path::new("/etc/passwd")).is_err());
419    }
420
421    #[test]
422    fn normalize_relative_path_rejects_parent_traversal() {
423        assert!(normalize_relative_path(Path::new("../etc/passwd")).is_err());
424    }
425
426    #[test]
427    fn collect_file_attributes_regular_file() {
428        let dir = TempDir::new().unwrap();
429        let file = dir.path().join("hello.txt");
430        fs::write(&file, b"hello\n").unwrap();
431        let meta = fs::symlink_metadata(&file).unwrap();
432        let attrs = collect_file_attributes(&file, &meta).unwrap();
433        assert!(attrs.symlink_target.is_none());
434        assert_eq!(attrs.content, b"hello\n");
435        assert_eq!(attrs.size, 6);
436        assert!(
437            attrs.encoding.is_none(),
438            "UTF-8 file must not have base64 encoding"
439        );
440    }
441
442    #[test]
443    fn collect_file_attributes_binary_file_gets_base64_encoding() {
444        let dir = TempDir::new().unwrap();
445        let file = dir.path().join("blob.bin");
446        fs::write(&file, [0u8, 159, 255]).unwrap();
447        let meta = fs::symlink_metadata(&file).unwrap();
448        let attrs = collect_file_attributes(&file, &meta).unwrap();
449        assert_eq!(attrs.encoding.as_deref(), Some("base64"));
450    }
451
452    #[cfg(unix)]
453    #[test]
454    fn collect_file_attributes_symlink() {
455        let dir = TempDir::new().unwrap();
456        fs::write(dir.path().join("target.txt"), b"x").unwrap();
457        std::os::unix::fs::symlink("target.txt", dir.path().join("link")).unwrap();
458        let link = dir.path().join("link");
459        let meta = fs::symlink_metadata(&link).unwrap();
460        let attrs = collect_file_attributes(&link, &meta).unwrap();
461        assert_eq!(attrs.symlink_target.as_deref(), Some("target.txt"));
462        assert_eq!(attrs.mode, "120000");
463        assert!(attrs.content.is_empty());
464    }
465
466    #[test]
467    fn collect_files_from_git_repo_precomputes_source_root_once() {
468        let source = include_str!("build.rs");
469        let legacy = [
470            "strip_prefix(",
471            "context.workdir.join(&context.source_prefix)",
472            ")",
473        ]
474        .join("");
475        assert!(
476            !source.contains(&legacy),
477            "collect_files_from_git_repo should avoid recomputing source root in loop"
478        );
479    }
480
481    #[test]
482    fn source_annotation_for_github_api_includes_uri_and_provider() {
483        let annotation =
484            source_annotation_for_source("gh:owner/repo@main", ProviderKind::GithubApi)
485                .expect("github api source annotation should resolve");
486        assert_eq!(
487            annotation,
488            Some(("gh:owner/repo@main".to_string(), "github-api".to_string()))
489        );
490    }
491
492    #[test]
493    fn build_with_source_annotation_writes_headers_without_changing_snapshot_hash() {
494        let source = TempDir::new().expect("create source tempdir");
495        let out = TempDir::new().expect("create output tempdir");
496        fs::write(source.path().join("a.txt"), b"alpha\n").expect("write source file");
497
498        let plain_snapshot = out.path().join("plain.gcl");
499        build_snapshot(source.path(), &plain_snapshot).expect("build plain snapshot");
500
501        let annotated_snapshot = out.path().join("annotated.gcl");
502        build_snapshot_with_options(
503            source.path(),
504            &annotated_snapshot,
505            &BuildOptions {
506                include_untracked: false,
507                require_clean: false,
508                source_annotation: Some((
509                    "gh:owner/repo@main".to_string(),
510                    "github-api".to_string(),
511                )),
512            },
513        )
514        .expect("build annotated snapshot");
515
516        let plain_text = fs::read_to_string(&plain_snapshot).expect("read plain snapshot");
517        let annotated_text =
518            fs::read_to_string(&annotated_snapshot).expect("read annotated snapshot");
519
520        let (plain_header, _plain_files) =
521            crate::snapshot::serial::parse_snapshot(&plain_text).expect("parse plain snapshot");
522        let (annotated_header, _annotated_files) =
523            crate::snapshot::serial::parse_snapshot(&annotated_text)
524                .expect("parse annotated snapshot");
525
526        assert_eq!(plain_header.snapshot_hash, annotated_header.snapshot_hash);
527        assert!(annotated_header
528            .extra_headers
529            .contains(&("source-uri".to_string(), "gh:owner/repo@main".to_string())));
530        assert!(annotated_header
531            .extra_headers
532            .contains(&("source-provider".to_string(), "github-api".to_string())));
533    }
534
535    #[test]
536    fn build_from_local_source_does_not_emit_provenance_headers() {
537        let source = TempDir::new().expect("create source tempdir");
538        fs::write(source.path().join("x.txt"), b"hello\n").expect("write source file");
539
540        let out = TempDir::new().expect("create output tempdir");
541        let snapshot = out.path().join("snapshot.gcl");
542        build_snapshot_from_source(
543            source.path().to_str().expect("utf-8 source path"),
544            &snapshot,
545            &BuildOptions::default(),
546            ProviderKind::Local,
547        )
548        .expect("build from local provider");
549
550        let text = fs::read_to_string(snapshot).expect("read snapshot");
551        let (header, _files) =
552            crate::snapshot::serial::parse_snapshot(&text).expect("parse snapshot");
553        assert!(
554            !header
555                .extra_headers
556                .iter()
557                .any(|(k, _)| k == "source-uri" || k == "source-provider"),
558            "local builds must not emit source provenance headers"
559        );
560    }
561}