1use std::ffi::OsStr;
6use std::fs;
7use std::io::Write as _;
8use std::path::{Component, Path};
9
10#[cfg(unix)]
11use std::os::unix::fs::PermissionsExt;
12
13use ignore::WalkBuilder;
14
15use crate::error::GitClosureError;
16use crate::git::{
17 ensure_git_source_is_clean, is_within_prefix, tracked_paths_from_index,
18 untracked_paths_from_status, GitRepoContext,
19};
20use crate::providers::{fetch_source, Provider, ProviderKind, SourceSpec};
21use crate::utils::io_error_with_path;
22
23use crate::providers::run_command_output;
24
25use super::hash::{compute_snapshot_hash, sha256_hex};
26use super::serial::serialize_snapshot;
27use super::{BuildOptions, Result, SnapshotFile, SnapshotHeader};
28
29pub fn build_snapshot(source: &Path, output: &Path) -> Result<()> {
33 build_snapshot_with_options(source, output, &BuildOptions::default())
34}
35
36pub fn build_snapshot_from_source(
38 source: &str,
39 output: &Path,
40 options: &BuildOptions,
41 provider_kind: ProviderKind,
42) -> Result<()> {
43 let mut annotated_options = options.clone();
44 annotated_options.source_annotation = source_annotation_for_source(source, provider_kind)?;
45 let fetched = fetch_source(source, provider_kind)?;
46 build_snapshot_with_options(&fetched.root, output, &annotated_options)
47}
48
49pub fn build_snapshot_from_provider<P: Provider>(
51 provider: &P,
52 source: &str,
53 output: &Path,
54 options: &BuildOptions,
55) -> Result<()> {
56 let fetched = provider.fetch(source)?;
57 build_snapshot_with_options(&fetched.root, output, options)
58}
59
60pub fn build_snapshot_with_options(
62 source: &Path,
63 output: &Path,
64 options: &BuildOptions,
65) -> Result<()> {
66 let source = fs::canonicalize(source).map_err(|err| io_error_with_path(err, source))?;
67
68 if !source.is_dir() {
69 return Err(GitClosureError::Parse(format!(
70 "source is not a directory: {}",
71 source.display()
72 )));
73 }
74
75 let mut files = collect_files(&source, options)?;
76 files.sort_by(|a, b| a.path.cmp(&b.path));
77
78 let snapshot_hash = compute_snapshot_hash(&files);
79 let (git_rev, git_branch) = read_git_metadata(&source);
80 let mut extra_headers = Vec::new();
81 if let Some((source_uri, source_provider)) = &options.source_annotation {
82 extra_headers.push(("source-uri".to_string(), source_uri.clone()));
83 extra_headers.push(("source-provider".to_string(), source_provider.clone()));
84 }
85 let header = SnapshotHeader {
86 snapshot_hash,
87 file_count: files.len(),
88 git_rev,
89 git_branch,
90 extra_headers,
91 };
92 let serialized = serialize_snapshot(&files, &header);
93
94 if let Some(parent) = output.parent() {
95 fs::create_dir_all(parent).map_err(|err| io_error_with_path(err, parent))?;
96 }
97
98 let mut writer = fs::File::create(output).map_err(|err| io_error_with_path(err, output))?;
99 writer.write_all(serialized.as_bytes())?;
100
101 Ok(())
102}
103
104pub(crate) fn collect_files(root: &Path, options: &BuildOptions) -> Result<Vec<SnapshotFile>> {
107 if let Some(repo_context) = GitRepoContext::discover(root)? {
108 return collect_files_from_git_repo(&repo_context, options);
109 }
110
111 collect_files_from_ignore_walk(root)
112}
113
114pub(crate) struct FileAttributes {
122 pub(crate) sha256: String,
123 pub(crate) mode: String,
124 pub(crate) size: u64,
125 pub(crate) encoding: Option<String>,
126 pub(crate) symlink_target: Option<String>,
127 pub(crate) content: Vec<u8>,
128}
129
130pub(crate) fn collect_file_attributes(
135 path: &Path,
136 metadata: &fs::Metadata,
137) -> Result<FileAttributes> {
138 if metadata.file_type().is_symlink() {
139 let target = fs::read_link(path)?;
140 let target = target
141 .to_str()
142 .ok_or_else(|| {
143 GitClosureError::Parse(format!("non-UTF-8 symlink target: {}", path.display()))
144 })?
145 .to_string();
146 Ok(FileAttributes {
147 sha256: String::new(),
148 mode: "120000".to_string(),
149 size: 0,
150 encoding: None,
151 symlink_target: Some(target),
152 content: Vec::new(),
153 })
154 } else {
155 let bytes = fs::read(path)?;
156 let sha256 = sha256_hex(&bytes);
157 #[cfg(unix)]
158 let mode = format!("{:o}", metadata.permissions().mode() & 0o777);
159 #[cfg(not(unix))]
160 let mode = "644".to_string();
161 let size = bytes.len() as u64;
162 let encoding = if std::str::from_utf8(&bytes).is_ok() {
163 None
164 } else {
165 Some("base64".to_string())
166 };
167 Ok(FileAttributes {
168 sha256,
169 mode,
170 size,
171 encoding,
172 symlink_target: None,
173 content: bytes,
174 })
175 }
176}
177
178fn collect_files_from_git_repo(
179 context: &GitRepoContext,
180 options: &BuildOptions,
181) -> Result<Vec<SnapshotFile>> {
182 if options.require_clean {
183 ensure_git_source_is_clean(context)?;
184 }
185
186 let mut repo_relative_paths = tracked_paths_from_index(context)?;
187 if options.include_untracked {
188 let untracked = untracked_paths_from_status(context)?;
189 repo_relative_paths.extend(untracked);
190 }
191
192 repo_relative_paths.sort();
193 repo_relative_paths.dedup();
194
195 let mut files = Vec::new();
196 let source_root = context.workdir.join(&context.source_prefix);
197 for repo_relative in repo_relative_paths {
198 if !is_within_prefix(&repo_relative, &context.source_prefix) {
199 continue;
200 }
201
202 let absolute = context.workdir.join(&repo_relative);
203 let metadata = match fs::symlink_metadata(&absolute) {
204 Ok(metadata) => metadata,
205 Err(_) => continue,
206 };
207
208 if !metadata.is_file() && !metadata.file_type().is_symlink() {
209 continue;
210 }
211
212 let relative = absolute.strip_prefix(&source_root).map_err(|err| {
213 GitClosureError::Parse(format!(
214 "failed to create source-relative path for git entry: {} ({err})",
215 absolute.display(),
216 ))
217 })?;
218
219 let normalized = normalize_relative_path(relative)?;
220 let attrs = collect_file_attributes(&absolute, &metadata)?;
221
222 files.push(SnapshotFile {
223 path: normalized,
224 sha256: attrs.sha256,
225 mode: attrs.mode,
226 size: attrs.size,
227 encoding: attrs.encoding,
228 symlink_target: attrs.symlink_target,
229 content: attrs.content,
230 });
231 }
232
233 Ok(files)
234}
235
236fn collect_files_from_ignore_walk(root: &Path) -> Result<Vec<SnapshotFile>> {
237 let mut collected = Vec::new();
238
239 let walker = WalkBuilder::new(root)
240 .hidden(false)
241 .standard_filters(true)
242 .follow_links(false)
243 .git_ignore(true)
244 .git_global(true)
245 .git_exclude(true)
246 .build();
247
248 for entry in walker {
249 let entry = entry.map_err(|err| {
250 GitClosureError::Parse(format!("failed to walk source directory: {err}"))
251 })?;
252 let path = entry.path();
253
254 if path == root {
255 continue;
256 }
257
258 let metadata = fs::symlink_metadata(path)?;
259
260 if !metadata.is_file() && !metadata.file_type().is_symlink() {
261 continue;
262 }
263
264 let relative = path.strip_prefix(root).map_err(|err| {
265 GitClosureError::Parse(format!(
266 "failed to strip source prefix: {} ({err})",
267 path.display()
268 ))
269 })?;
270
271 let normalized = normalize_relative_path(relative)?;
272 let attrs = collect_file_attributes(path, &metadata)?;
273
274 collected.push(SnapshotFile {
275 path: normalized,
276 sha256: attrs.sha256,
277 mode: attrs.mode,
278 size: attrs.size,
279 encoding: attrs.encoding,
280 symlink_target: attrs.symlink_target,
281 content: attrs.content,
282 });
283 }
284
285 Ok(collected)
286}
287
288fn read_git_metadata(dir: &Path) -> (Option<String>, Option<String>) {
294 let rev = run_command_output("git", &["rev-parse", "HEAD"], Some(dir))
295 .ok()
296 .filter(|o| o.status.success())
297 .and_then(|o| String::from_utf8(o.stdout).ok())
298 .map(|s| s.trim().to_string())
299 .filter(|s| !s.is_empty());
300
301 let branch = run_command_output("git", &["symbolic-ref", "--short", "HEAD"], Some(dir))
302 .ok()
303 .filter(|o| o.status.success())
304 .and_then(|o| String::from_utf8(o.stdout).ok())
305 .map(|s| s.trim().to_string())
306 .filter(|s| !s.is_empty());
307
308 (rev, branch)
309}
310
311fn source_annotation_for_source(
312 source: &str,
313 provider_kind: ProviderKind,
314) -> Result<Option<(String, String)>> {
315 let selected = selected_provider_kind(source, provider_kind)?;
316 let provider_label = match selected {
317 ProviderKind::Local => return Ok(None),
318 ProviderKind::GitClone => "git-clone",
319 ProviderKind::Nix => "nix",
320 ProviderKind::GithubApi => "github-api",
321 ProviderKind::Auto => unreachable!("provider auto should be resolved"),
322 };
323
324 Ok(Some((source.to_string(), provider_label.to_string())))
325}
326
327fn selected_provider_kind(source: &str, requested: ProviderKind) -> Result<ProviderKind> {
328 if requested != ProviderKind::Auto {
329 return Ok(requested);
330 }
331
332 let spec = SourceSpec::parse(source)?;
333 match spec {
334 SourceSpec::LocalPath(_) => Ok(ProviderKind::Local),
335 SourceSpec::NixFlakeRef(_) => Ok(ProviderKind::Nix),
336 SourceSpec::GitHubRepo { .. } => Ok(ProviderKind::GithubApi),
337 SourceSpec::GitLabRepo { .. } | SourceSpec::GitRemoteUrl(_) => Ok(ProviderKind::GitClone),
338 SourceSpec::Unknown(value) => Err(GitClosureError::Parse(format!(
339 "unsupported source syntax for auto provider: {value}"
340 ))),
341 }
342}
343
344pub(crate) fn normalize_relative_path(path: &Path) -> Result<String> {
351 if path.is_absolute() {
352 return Err(GitClosureError::UnsafePath(path.display().to_string()));
353 }
354
355 let mut components = Vec::new();
356 for component in path.components() {
357 match component {
358 Component::Normal(part) => {
359 if part == OsStr::new(".") || part == OsStr::new("..") {
360 return Err(GitClosureError::UnsafePath(path.display().to_string()));
361 }
362 components.push(
363 part.to_str()
364 .ok_or_else(|| {
365 GitClosureError::Parse(format!(
366 "non-UTF-8 path component: {}",
367 path.display()
368 ))
369 })?
370 .to_string(),
371 );
372 }
373 Component::CurDir
374 | Component::ParentDir
375 | Component::RootDir
376 | Component::Prefix(_) => {
377 return Err(GitClosureError::UnsafePath(path.display().to_string()));
378 }
379 }
380 }
381
382 if components.is_empty() {
383 return Err(GitClosureError::UnsafePath(
384 "empty relative path".to_string(),
385 ));
386 }
387
388 Ok(components.join("/"))
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394 use std::fs;
395 use tempfile::TempDir;
396
397 #[test]
398 fn normalize_relative_path_simple() {
399 assert_eq!(
400 normalize_relative_path(Path::new("src/lib.rs")).unwrap(),
401 "src/lib.rs"
402 );
403 }
404
405 #[test]
406 fn normalize_relative_path_emits_forward_slashes() {
407 let nested = Path::new("dir").join("sub").join("file.txt");
408 let normalized = normalize_relative_path(&nested).expect("normalize nested path");
409 assert_eq!(normalized, "dir/sub/file.txt");
410 assert!(
411 !normalized.contains('\\'),
412 "snapshot path must not use backslash separators"
413 );
414 }
415
416 #[test]
417 fn normalize_relative_path_rejects_absolute() {
418 assert!(normalize_relative_path(Path::new("/etc/passwd")).is_err());
419 }
420
421 #[test]
422 fn normalize_relative_path_rejects_parent_traversal() {
423 assert!(normalize_relative_path(Path::new("../etc/passwd")).is_err());
424 }
425
426 #[test]
427 fn collect_file_attributes_regular_file() {
428 let dir = TempDir::new().unwrap();
429 let file = dir.path().join("hello.txt");
430 fs::write(&file, b"hello\n").unwrap();
431 let meta = fs::symlink_metadata(&file).unwrap();
432 let attrs = collect_file_attributes(&file, &meta).unwrap();
433 assert!(attrs.symlink_target.is_none());
434 assert_eq!(attrs.content, b"hello\n");
435 assert_eq!(attrs.size, 6);
436 assert!(
437 attrs.encoding.is_none(),
438 "UTF-8 file must not have base64 encoding"
439 );
440 }
441
442 #[test]
443 fn collect_file_attributes_binary_file_gets_base64_encoding() {
444 let dir = TempDir::new().unwrap();
445 let file = dir.path().join("blob.bin");
446 fs::write(&file, [0u8, 159, 255]).unwrap();
447 let meta = fs::symlink_metadata(&file).unwrap();
448 let attrs = collect_file_attributes(&file, &meta).unwrap();
449 assert_eq!(attrs.encoding.as_deref(), Some("base64"));
450 }
451
452 #[cfg(unix)]
453 #[test]
454 fn collect_file_attributes_symlink() {
455 let dir = TempDir::new().unwrap();
456 fs::write(dir.path().join("target.txt"), b"x").unwrap();
457 std::os::unix::fs::symlink("target.txt", dir.path().join("link")).unwrap();
458 let link = dir.path().join("link");
459 let meta = fs::symlink_metadata(&link).unwrap();
460 let attrs = collect_file_attributes(&link, &meta).unwrap();
461 assert_eq!(attrs.symlink_target.as_deref(), Some("target.txt"));
462 assert_eq!(attrs.mode, "120000");
463 assert!(attrs.content.is_empty());
464 }
465
466 #[test]
467 fn collect_files_from_git_repo_precomputes_source_root_once() {
468 let source = include_str!("build.rs");
469 let legacy = [
470 "strip_prefix(",
471 "context.workdir.join(&context.source_prefix)",
472 ")",
473 ]
474 .join("");
475 assert!(
476 !source.contains(&legacy),
477 "collect_files_from_git_repo should avoid recomputing source root in loop"
478 );
479 }
480
481 #[test]
482 fn source_annotation_for_github_api_includes_uri_and_provider() {
483 let annotation =
484 source_annotation_for_source("gh:owner/repo@main", ProviderKind::GithubApi)
485 .expect("github api source annotation should resolve");
486 assert_eq!(
487 annotation,
488 Some(("gh:owner/repo@main".to_string(), "github-api".to_string()))
489 );
490 }
491
492 #[test]
493 fn build_with_source_annotation_writes_headers_without_changing_snapshot_hash() {
494 let source = TempDir::new().expect("create source tempdir");
495 let out = TempDir::new().expect("create output tempdir");
496 fs::write(source.path().join("a.txt"), b"alpha\n").expect("write source file");
497
498 let plain_snapshot = out.path().join("plain.gcl");
499 build_snapshot(source.path(), &plain_snapshot).expect("build plain snapshot");
500
501 let annotated_snapshot = out.path().join("annotated.gcl");
502 build_snapshot_with_options(
503 source.path(),
504 &annotated_snapshot,
505 &BuildOptions {
506 include_untracked: false,
507 require_clean: false,
508 source_annotation: Some((
509 "gh:owner/repo@main".to_string(),
510 "github-api".to_string(),
511 )),
512 },
513 )
514 .expect("build annotated snapshot");
515
516 let plain_text = fs::read_to_string(&plain_snapshot).expect("read plain snapshot");
517 let annotated_text =
518 fs::read_to_string(&annotated_snapshot).expect("read annotated snapshot");
519
520 let (plain_header, _plain_files) =
521 crate::snapshot::serial::parse_snapshot(&plain_text).expect("parse plain snapshot");
522 let (annotated_header, _annotated_files) =
523 crate::snapshot::serial::parse_snapshot(&annotated_text)
524 .expect("parse annotated snapshot");
525
526 assert_eq!(plain_header.snapshot_hash, annotated_header.snapshot_hash);
527 assert!(annotated_header
528 .extra_headers
529 .contains(&("source-uri".to_string(), "gh:owner/repo@main".to_string())));
530 assert!(annotated_header
531 .extra_headers
532 .contains(&("source-provider".to_string(), "github-api".to_string())));
533 }
534
535 #[test]
536 fn build_from_local_source_does_not_emit_provenance_headers() {
537 let source = TempDir::new().expect("create source tempdir");
538 fs::write(source.path().join("x.txt"), b"hello\n").expect("write source file");
539
540 let out = TempDir::new().expect("create output tempdir");
541 let snapshot = out.path().join("snapshot.gcl");
542 build_snapshot_from_source(
543 source.path().to_str().expect("utf-8 source path"),
544 &snapshot,
545 &BuildOptions::default(),
546 ProviderKind::Local,
547 )
548 .expect("build from local provider");
549
550 let text = fs::read_to_string(snapshot).expect("read snapshot");
551 let (header, _files) =
552 crate::snapshot::serial::parse_snapshot(&text).expect("parse snapshot");
553 assert!(
554 !header
555 .extra_headers
556 .iter()
557 .any(|(k, _)| k == "source-uri" || k == "source-provider"),
558 "local builds must not emit source provenance headers"
559 );
560 }
561}