soar_dl/oci.rs
1use std::{
2 fs::{File, OpenOptions, Permissions},
3 io::{Read as _, Seek as _, SeekFrom, Write as _},
4 os::unix::fs::PermissionsExt as _,
5 path::{Path, PathBuf},
6 sync::{Arc, Mutex},
7 thread,
8};
9
10use serde::Deserialize;
11use soar_utils::fs::is_elf;
12use tracing::{debug, trace};
13use ureq::http::header::{ACCEPT, AUTHORIZATION, ETAG, IF_RANGE, RANGE};
14
15use crate::{
16 download::Download,
17 error::DownloadError,
18 filter::Filter,
19 http_client::SHARED_AGENT,
20 types::{OverwriteMode, Progress, ResumeInfo},
21 xattr::{read_resume, remove_resume, write_resume},
22};
23
24#[derive(Debug, Clone)]
25pub struct OciReference {
26 pub registry: String,
27 pub package: String,
28 pub tag: String,
29}
30
31impl From<&str> for OciReference {
32 /// Parses an OCI/GHCR reference string into an `OciReference`.
33 ///
34 /// Accepts strings with optional `ghcr.io/` prefix and extracts the package and tag or digest:
35 /// - `package@sha256:<digest>` → digest used as `tag`
36 /// - `package:<tag>` → tag used as `tag`
37 /// - otherwise the full path is treated as `package` and `tag` is set to `"latest"`.
38 ///
39 /// # Examples
40 ///
41 /// ```
42 /// use soar_dl::oci::OciReference;
43 ///
44 /// let r = OciReference::from("ghcr.io/org/repo@sha256:deadbeef");
45 /// assert_eq!(r.registry, "ghcr.io");
46 /// assert_eq!(r.package, "org/repo");
47 /// assert_eq!(r.tag, "sha256:deadbeef");
48 ///
49 /// let r2 = OciReference::from("org/repo:1.2.3");
50 /// assert_eq!(r2.registry, "ghcr.io");
51 /// assert_eq!(r2.package, "org/repo");
52 /// assert_eq!(r2.tag, "1.2.3");
53 ///
54 /// let r3 = OciReference::from("org/repo");
55 /// assert_eq!(r3.registry, "ghcr.io");
56 /// assert_eq!(r3.package, "org/repo");
57 /// assert_eq!(r3.tag, "latest");
58 /// ```
59 fn from(value: &str) -> Self {
60 let paths = value.trim_start_matches("ghcr.io/");
61
62 // <package>@sha256:<digest>
63 if let Some((package, digest)) = paths.split_once('@') {
64 return Self {
65 registry: "ghcr.io".to_string(),
66 package: package.to_string(),
67 tag: digest.to_string(),
68 };
69 }
70
71 // <package>:<tag>
72 if let Some((package, tag)) = paths.split_once(':') {
73 return Self {
74 registry: "ghcr.io".to_string(),
75 package: package.to_string(),
76 tag: tag.to_string(),
77 };
78 }
79
80 Self {
81 registry: "ghcr.io".to_string(),
82 package: paths.to_string(),
83 tag: "latest".to_string(),
84 }
85 }
86}
87
88#[derive(Debug, Clone, Deserialize)]
89pub struct OciManifest {
90 #[serde(rename = "mediaType")]
91 pub media_type: String,
92 pub config: OciConfig,
93 pub layers: Vec<OciLayer>,
94}
95
96#[derive(Debug, Clone, Deserialize)]
97pub struct OciConfig {
98 #[serde(rename = "mediaType")]
99 pub media_type: String,
100 pub digest: String,
101 pub size: u64,
102}
103
104#[derive(Debug, Clone, Deserialize)]
105pub struct OciLayer {
106 #[serde(rename = "mediaType")]
107 pub media_type: String,
108 pub digest: String,
109 pub size: u64,
110 #[serde(default)]
111 pub annotations: std::collections::HashMap<String, String>,
112}
113
114impl OciLayer {
115 /// Extracts the human-friendly title from the layer's annotations.
116 ///
117 /// Returns `Some(&str)` containing the value of the `"org.opencontainers.image.title"`
118 /// annotation if present, or `None` if that annotation is absent.
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// use std::collections::HashMap;
124 /// use soar_dl::oci::OciLayer;
125 ///
126 /// let mut annotations = HashMap::new();
127 /// annotations.insert(
128 /// "org.opencontainers.image.title".to_string(),
129 /// "example.txt".to_string(),
130 /// );
131 ///
132 /// let layer = OciLayer {
133 /// media_type: String::new(),
134 /// digest: String::new(),
135 /// size: 0,
136 /// annotations,
137 /// };
138 ///
139 /// assert_eq!(layer.title(), Some("example.txt"));
140 /// ```
141 pub fn title(&self) -> Option<&str> {
142 self.annotations
143 .get("org.opencontainers.image.title")
144 .map(|s| s.as_str())
145 }
146}
147
148#[derive(Clone)]
149pub struct OciDownload {
150 reference: OciReference,
151 api: String,
152 filter: Filter,
153 output: Option<String>,
154 overwrite: OverwriteMode,
155 extract: bool,
156 extract_to: Option<PathBuf>,
157 parallel: usize,
158 on_progress: Option<Arc<dyn Fn(Progress) + Send + Sync>>,
159}
160
161impl OciDownload {
162 /// Creates a new `OciDownload` for the given OCI reference using sensible defaults.
163 ///
164 /// Defaults:
165 /// - `api` = "https://ghcr.io/v2"
166 /// - `filter` = `Filter::default()`
167 /// - no output path (downloads to current working directory unless `output` is set)
168 /// - `overwrite` = `OverwriteMode::Prompt`
169 /// - `extract` = `false`
170 /// - `parallel` = 1 (sequential downloads)
171 /// - no progress callback
172 ///
173 /// # Examples
174 ///
175 /// ```
176 /// use soar_dl::oci::OciDownload;
177 ///
178 /// let dl = OciDownload::new("ghcr.io/myorg/myrepo:latest");
179 /// // configure and run:
180 /// // let result = dl.output("out").execute();
181 /// ```
182 pub fn new(reference: impl Into<OciReference>) -> Self {
183 Self {
184 reference: reference.into(),
185 api: "https://ghcr.io/v2".into(),
186 filter: Filter::default(),
187 output: None,
188 overwrite: OverwriteMode::Prompt,
189 extract: false,
190 extract_to: None,
191 parallel: 1,
192 on_progress: None,
193 }
194 }
195
196 /// Sets the base API URL used for manifest and blob requests and returns the downloader for chaining.
197 ///
198 /// # Examples
199 ///
200 /// ```
201 /// use soar_dl::oci::OciDownload;
202 ///
203 /// let dl = OciDownload::new("ghcr.io/foo/bar:latest").api("https://ghcr.example.com/v2");
204 /// ```
205 pub fn api(mut self, api: impl Into<String>) -> Self {
206 self.api = api.into();
207 self
208 }
209
210 /// Sets the layer filter used to select which OCI image layers will be downloaded.
211 ///
212 /// The provided `filter` is applied when a manifest is inspected to decide which layers are included
213 /// in the download operation. Returns the downloader with the filter updated for further chaining.
214 ///
215 /// # Examples
216 ///
217 /// ```
218 /// use soar_dl::oci::OciDownload;
219 /// use soar_dl::filter::Filter;
220 ///
221 /// let downloader = OciDownload::new("owner/repo:tag")
222 /// .filter(Filter::default())
223 /// .output("out/dir");
224 /// ```
225 pub fn filter(mut self, filter: Filter) -> Self {
226 self.filter = filter;
227 self
228 }
229
230 /// Sets the output directory path where downloaded files will be written.
231 ///
232 /// The provided path is stored and used as the destination for downloaded blobs and extracted contents.
233 ///
234 /// # Examples
235 ///
236 /// ```
237 /// use soar_dl::oci::OciDownload;
238 ///
239 /// let dl = OciDownload::new("ghcr.io/org/repo:tag").output("downloads/");
240 /// ```
241 pub fn output(mut self, path: impl Into<String>) -> Self {
242 self.output = Some(path.into());
243 self
244 }
245
246 /// Sets how existing files are handled when writing downloads.
247 ///
248 /// This configures the downloader's overwrite behavior to the provided `mode`.
249 ///
250 /// # Examples
251 ///
252 /// ```
253 /// use soar_dl::oci::OciDownload;
254 /// use soar_dl::types::OverwriteMode;
255 ///
256 /// let dl = OciDownload::new("ghcr.io/example/repo:tag").overwrite(OverwriteMode::Prompt);
257 /// ```
258 /// Returns the modified downloader with the given overwrite mode.
259 pub fn overwrite(mut self, mode: OverwriteMode) -> Self {
260 self.overwrite = mode;
261 self
262 }
263
264 /// Enable or disable extraction of downloaded layers.
265 ///
266 /// When `true`, downloaded archive layers will be extracted after download into the extraction
267 /// destination (if set via `extract_to`) or into the configured output directory. When `false`,
268 /// archives are left as downloaded.
269 ///
270 /// # Examples
271 ///
272 /// ```
273 /// use soar_dl::oci::OciDownload;
274 ///
275 /// let dl = OciDownload::new("ghcr.io/org/pkg:1.0")
276 /// .extract(true)
277 /// .extract_to("/tmp/out");
278 /// ```
279 pub fn extract(mut self, extract: bool) -> Self {
280 self.extract = extract;
281 self
282 }
283
284 /// Sets the destination directory for extracting downloaded archives.
285 ///
286 /// When extraction is enabled, extracted files will be written to this path instead of the download output directory.
287 ///
288 /// # Examples
289 ///
290 /// ```
291 /// use soar_dl::oci::OciDownload;
292 ///
293 /// let downloader = OciDownload::new("ghcr.io/org/pkg:1.0")
294 /// .extract(true)
295 /// .extract_to("/tmp/extracted");
296 /// ```
297 pub fn extract_to(mut self, path: impl Into<PathBuf>) -> Self {
298 self.extract_to = Some(path.into());
299 self
300 }
301
302 /// Set the number of parallel download workers (at least 1).
303 ///
304 /// If `n` is less than 1, the value will be clamped to 1. Returns the updated downloader to allow chaining.
305 ///
306 /// # Examples
307 ///
308 /// ```
309 /// use soar_dl::oci::OciDownload;
310 ///
311 /// let _ = OciDownload::new("owner/repo:tag").parallel(4);
312 /// ```
313 pub fn parallel(mut self, n: usize) -> Self {
314 self.parallel = n.max(1);
315 self
316 }
317
318 /// Registers a progress callback to receive download `Progress` events.
319 ///
320 /// The provided callback will be invoked with progress updates (e.g., Starting, Chunk, Complete)
321 /// from the download workers and must be thread-safe (`Send + Sync`) and `'static`.
322 ///
323 /// # Examples
324 ///
325 /// ```
326 /// use soar_dl::oci::OciDownload;
327 ///
328 /// let downloader = OciDownload::new("ghcr.io/owner/repo:tag")
329 /// .progress(|progress| {
330 /// println!("progress: {:?}", progress);
331 /// });
332 /// ```
333 pub fn progress<F>(mut self, f: F) -> Self
334 where
335 F: Fn(Progress) + Send + Sync + 'static,
336 {
337 self.on_progress = Some(Arc::new(f));
338 self
339 }
340
341 /// Downloads the OCI reference according to the configured options and returns the downloaded file paths.
342 ///
343 /// Attempts a direct blob download if the reference tag is a digest (e.g., `sha256:...`); otherwise it
344 /// fetches the image manifest, selects layers whose titles match the configured filter, creates the
345 /// output directory if provided, and downloads the selected layers.
346 ///
347 /// Emits `Progress::Starting` and `Progress::Complete` events via the registered progress callback when present.
348 ///
349 /// # Returns
350 ///
351 /// A `Vec<PathBuf>` containing the filesystem paths of the downloaded files on success. Returns a
352 /// `DownloadError::LayerNotFound` if no manifest layers match the configured filter, or other
353 /// `DownloadError` variants for network, IO, or extraction failures.
354 ///
355 /// # Examples
356 ///
357 /// ```no_run
358 /// # use std::path::PathBuf;
359 /// # use soar_dl::oci::OciDownload;
360 /// // Download layers from an OCI reference into "out" directory.
361 /// let paths: Vec<PathBuf> = OciDownload::new("ghcr.io/owner/repo:latest")
362 /// .output("out")
363 /// .execute()
364 /// .unwrap();
365 /// assert!(!paths.is_empty());
366 /// ```
367 pub fn execute(self) -> Result<Vec<PathBuf>, DownloadError> {
368 debug!(
369 registry = self.reference.registry,
370 package = self.reference.package,
371 tag = self.reference.tag,
372 "starting OCI download"
373 );
374
375 // If it's a blob digest, download directly
376 if self.reference.tag.starts_with("sha256:") {
377 trace!("tag is digest, downloading blob directly");
378 return self.download_blob();
379 }
380
381 let manifest = self.fetch_manifest()?;
382
383 let layers: Vec<_> = manifest
384 .layers
385 .iter()
386 .filter(|layer| {
387 layer
388 .title()
389 .map(|t| self.filter.matches(t))
390 .unwrap_or(false)
391 })
392 .collect();
393
394 if layers.is_empty() {
395 debug!("no matching layers found in manifest");
396 return Err(DownloadError::LayerNotFound);
397 }
398
399 let total_size: u64 = layers.iter().map(|l| l.size).sum();
400 debug!(
401 layer_count = layers.len(),
402 total_size = total_size,
403 "downloading layers"
404 );
405
406 if let Some(cb) = &self.on_progress {
407 cb(Progress::Starting {
408 total: total_size,
409 });
410 }
411
412 let output_dir = self
413 .output
414 .as_deref()
415 .map(PathBuf::from)
416 .unwrap_or_default();
417 if !output_dir.as_os_str().is_empty() {
418 std::fs::create_dir_all(&output_dir)?;
419 }
420
421 let paths = if self.parallel > 1 {
422 self.download_layers_parallel(&layers, &output_dir)?
423 } else {
424 self.download_layers_sequential(&layers, &output_dir)?
425 };
426
427 if let Some(cb) = &self.on_progress {
428 cb(Progress::Complete {
429 total: total_size,
430 });
431 }
432
433 Ok(paths)
434 }
435
436 /// Downloads the provided OCI layers one after another into `output_dir` and returns the paths of the saved files.
437 ///
438 /// Each layer is written using its `title()` as the filename. If `self.extract` is enabled, each downloaded archive
439 /// is extracted into `self.extract_to` (if set) or into `output_dir`. The method updates an internal downloaded byte
440 /// counter while performing transfers.
441 ///
442 /// # Errors
443 ///
444 /// Returns a `DownloadError` if any individual layer download or extraction fails.
445 ///
446 /// # Parameters
447 ///
448 /// - `layers`: slice of layer references to download (order is preserved).
449 /// - `output_dir`: destination directory for downloaded files.
450 ///
451 /// # Returns
452 ///
453 /// A `Vec<PathBuf>` containing the full paths to the downloaded (and optionally extracted) files.
454 fn download_layers_sequential(
455 &self,
456 layers: &[&OciLayer],
457 output_dir: &Path,
458 ) -> Result<Vec<PathBuf>, DownloadError> {
459 let mut paths = Vec::new();
460 let mut downloaded = 0u64;
461 let total_size: u64 = layers.iter().map(|l| l.size).sum();
462
463 for layer in layers {
464 let filename = layer.title().unwrap();
465 let path = output_dir.join(filename);
466
467 if path.is_file() {
468 if let Ok(metadata) = path.metadata() {
469 if metadata.len() == layer.size {
470 downloaded += layer.size;
471 if let Some(ref cb) = self.on_progress {
472 cb(Progress::Chunk {
473 current: downloaded,
474 total: total_size,
475 });
476 }
477 paths.push(path);
478 continue;
479 }
480 }
481 }
482
483 self.download_layer(layer, &path, &mut downloaded, total_size)?;
484
485 if self.extract {
486 let extract_dir = self
487 .extract_to
488 .clone()
489 .unwrap_or_else(|| output_dir.to_path_buf());
490 compak::extract_archive(&path, &extract_dir)?;
491 }
492
493 paths.push(path);
494 }
495
496 Ok(paths)
497 }
498
499 /// Download multiple OCI layers concurrently and return the downloaded file paths.
500 ///
501 /// This method downloads the provided layers using up to `self.parallel` worker threads,
502 /// writes each layer into `output_dir` (using the layer title as filename), optionally
503 /// extracts archives when extraction is enabled, and aggregates any errors that occur
504 /// across worker threads into a single `DownloadError::Multiple`.
505 ///
506 /// - If a layer has no title, it is skipped.
507 /// - On success, returns a `Vec<PathBuf>` containing the paths to successfully downloaded files.
508 /// - On failure, returns `DownloadError::Multiple` with stringified error messages from workers.
509 ///
510 /// # Parameters
511 ///
512 /// - `layers`: slice of layer references to download. Each layer's title is used as the filename.
513 /// - `output_dir`: directory where downloaded files will be written.
514 ///
515 /// # Returns
516 ///
517 /// `Ok(Vec<PathBuf>)` with the paths of successfully downloaded files, or `Err(DownloadError::Multiple)`
518 /// if one or more worker threads report errors.
519 ///
520 /// # Examples
521 ///
522 /// ```no_run
523 /// use soar_dl::oci::OciDownload;
524 ///
525 /// let dl = OciDownload::new("ghcr.io/owner/repo:tag")
526 /// .output("out")
527 /// .parallel(4);
528 /// let _ = dl.execute(); // invokes the parallel download path when appropriate
529 /// ```
530 pub fn download_layers_parallel(
531 &self,
532 layers: &[&OciLayer],
533 output_dir: &Path,
534 ) -> Result<Vec<PathBuf>, DownloadError> {
535 let downloaded = Arc::new(Mutex::new(0u64));
536 let paths = Arc::new(Mutex::new(Vec::new()));
537 let errors = Arc::new(Mutex::new(Vec::new()));
538
539 let total_size: u64 = layers.iter().map(|l| l.size).sum();
540
541 let owned_layers: Vec<OciLayer> = layers.iter().map(|&layer| layer.clone()).collect();
542 let chunks: Vec<_> = owned_layers
543 .chunks(layers.len().div_ceil(self.parallel))
544 .map(|chunk| chunk.to_vec())
545 .collect();
546
547 let handles: Vec<_> = chunks
548 .into_iter()
549 .map(|chunk| {
550 let api = self.api.clone();
551 let reference = self.reference.clone();
552 let output_dir = output_dir.to_path_buf();
553 let downloaded = Arc::clone(&downloaded);
554 let paths = Arc::clone(&paths);
555 let errors = Arc::clone(&errors);
556 let extract = self.extract;
557 let extract_to = self.extract_to.clone();
558 let on_progress = self.on_progress.clone();
559
560 thread::spawn(move || {
561 for layer in chunk {
562 let filename = match layer.title() {
563 Some(f) => f,
564 None => continue,
565 };
566 let path = output_dir.join(filename);
567
568 if path.is_file() {
569 if let Ok(metadata) = path.metadata() {
570 if metadata.len() == layer.size {
571 {
572 let mut shared = downloaded.lock().unwrap();
573 *shared += layer.size;
574 let current = *shared;
575 if let Some(ref cb) = on_progress {
576 cb(Progress::Chunk {
577 current,
578 total: total_size,
579 });
580 }
581 }
582 paths.lock().unwrap().push(path);
583 continue;
584 }
585 }
586 }
587
588 let mut local_downloaded = 0u64;
589 let result = download_layer_impl(
590 &api,
591 &reference,
592 &layer,
593 &path,
594 &mut local_downloaded,
595 on_progress.as_ref(),
596 &downloaded,
597 total_size,
598 );
599
600 match result {
601 Ok(()) => {
602 if extract {
603 let extract_dir =
604 extract_to.clone().unwrap_or_else(|| output_dir.clone());
605 if let Err(e) = compak::extract_archive(&path, &extract_dir) {
606 errors.lock().unwrap().push(format!("{e}"));
607 continue;
608 }
609 }
610 paths.lock().unwrap().push(path);
611 }
612 Err(e) => {
613 errors.lock().unwrap().push(format!("{e}"));
614 }
615 }
616 }
617 })
618 })
619 .collect();
620
621 for handle in handles {
622 if let Err(err) = handle.join() {
623 errors
624 .lock()
625 .unwrap()
626 .push(format!("Worker thread panicked: {err:?}"));
627 }
628 }
629
630 let errors = errors.lock().unwrap();
631 if !errors.is_empty() {
632 return Err(DownloadError::Multiple {
633 errors: errors.clone(),
634 });
635 }
636
637 let paths = paths.lock().unwrap().clone();
638 Ok(paths)
639 }
640
641 /// Fetches the OCI/Docker manifest for the configured reference and returns it deserialized as an `OciManifest`.
642 ///
643 /// The request is made against the download instance's `api` base and the reference's `package`/`tag`. On
644 /// non-success HTTP status codes this returns `DownloadError::HttpError`; if the response body cannot be
645 /// parsed as a manifest JSON this returns `DownloadError::InvalidResponse`.
646 ///
647 /// # Examples
648 ///
649 /// ```no_run
650 /// use soar_dl::oci::OciDownload;
651 ///
652 /// let dl = OciDownload::new("ghcr.io/example/repo:latest");
653 /// let manifest = dl.fetch_manifest().unwrap();
654 /// ```
655 pub fn fetch_manifest(&self) -> Result<OciManifest, DownloadError> {
656 let url = format!(
657 "{}/{}/manifests/{}",
658 self.api.trim_end_matches('/'),
659 self.reference.package,
660 self.reference.tag
661 );
662 debug!(url = url, "fetching OCI manifest");
663
664 let mut resp = SHARED_AGENT
665 .get(&url)
666 .header(
667 ACCEPT,
668 "application/vnd.docker.distribution.manifest.v2+json, \
669 application/vnd.docker.distribution.manifest.list.v2+json, \
670 application/vnd.oci.image.manifest.v1+json, \
671 application/vnd.oci.image.index.v1+json",
672 )
673 .header(AUTHORIZATION, "Bearer QQ==")
674 .call()?;
675
676 trace!(
677 status = resp.status().as_u16(),
678 "manifest response received"
679 );
680
681 if !resp.status().is_success() {
682 debug!(status = resp.status().as_u16(), "manifest fetch failed");
683 return Err(DownloadError::HttpError {
684 status: resp.status().as_u16(),
685 url,
686 });
687 }
688
689 let manifest: OciManifest = resp
690 .body_mut()
691 .read_json()
692 .map_err(|_| DownloadError::InvalidResponse)?;
693
694 trace!(
695 layers = manifest.layers.len(),
696 media_type = manifest.media_type,
697 "manifest parsed successfully"
698 );
699
700 Ok(manifest)
701 }
702
703 /// Downloads the single blob identified by the downloader's reference into the configured output location.
704 ///
705 /// The resulting file is written using the configured output path (or a name derived from the reference if no output is set), and download options such as overwrite mode and the registered progress callback are respected.
706 ///
707 /// # Examples
708 ///
709 /// ```no_run
710 /// use soar_dl::oci::OciDownload;
711 ///
712 /// let dl = OciDownload::new("ghcr.io/org/package:tag");
713 /// let paths = dl.download_blob().unwrap();
714 /// assert_eq!(paths.len(), 1);
715 /// let downloaded = &paths[0];
716 /// println!("Downloaded to: {:?}", downloaded);
717 /// ```
718 pub fn download_blob(&self) -> Result<Vec<PathBuf>, DownloadError> {
719 let filename = self
720 .reference
721 .package
722 .rsplit_once('/')
723 .map(|(_, name)| name)
724 .unwrap_or(&self.reference.tag);
725
726 let output = self.output.as_deref().unwrap_or(filename);
727
728 let url = format!(
729 "{}/{}/blobs/{}",
730 self.api.trim_end_matches('/'),
731 self.reference.package,
732 self.reference.tag
733 );
734
735 let dl = Download::new(url)
736 .output(output)
737 .overwrite(self.overwrite)
738 .ghcr_blob();
739
740 let dl = if let Some(ref cb) = self.on_progress {
741 let cb = cb.clone();
742 dl.progress(move |p| cb(p))
743 } else {
744 dl
745 };
746
747 let path = dl.execute()?;
748
749 Ok(vec![path])
750 }
751
752 /// Downloads a single OCI layer blob to the given file path, updating the provided
753 /// cumulative `downloaded` byte counter and emitting progress events if configured.
754 ///
755 /// # Parameters
756 ///
757 /// - `layer`: The manifest layer to download.
758 /// - `path`: Destination filesystem path for the downloaded blob.
759 /// - `downloaded`: Mutable cumulative byte counter; this function increments it by the
760 /// number of bytes written for this layer.
761 ///
762 /// # Returns
763 ///
764 /// `Ok(())` on success, or a `DownloadError` describing the failure.
765 ///
766 /// # Examples
767 ///
768 /// ```no_run
769 /// use std::path::Path;
770 /// use soar_dl::oci::{OciDownload, OciLayer};
771 ///
772 /// let downloader = OciDownload::new("ghcr.io/owner/repo:tag");
773 /// let layer = OciLayer {
774 /// media_type: "application/vnd.oci.image.layer.v1.tar".to_string(),
775 /// digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
776 /// size: 1024,
777 /// annotations: Default::default(),
778 /// };
779 /// let mut downloaded = 0u64;
780 /// let dest = Path::new("/tmp/layer.tar");
781 /// downloader.download_layer(&layer, dest, &mut downloaded, 1024).unwrap();
782 /// assert!(downloaded > 0);
783 /// ```
784 pub fn download_layer(
785 &self,
786 layer: &OciLayer,
787 path: &Path,
788 downloaded: &mut u64,
789 total_size: u64,
790 ) -> Result<(), DownloadError> {
791 download_layer_impl(
792 &self.api,
793 &self.reference,
794 layer,
795 path,
796 downloaded,
797 self.on_progress.as_ref(),
798 &Arc::new(Mutex::new(0u64)),
799 total_size,
800 )
801 }
802}
803
804/// Downloads a single OCI layer blob to the given file path with resume support, progress reporting, and post-download handling.
805///
806/// This function:
807/// - Resumes partially downloaded blobs when resume metadata exists (uses Range and If-Range headers).
808/// - Appends to the existing file when a partial response (206) is returned, otherwise creates a new file.
809/// - Periodically persists resume metadata while downloading (every 1 MiB).
810/// - Emits `Progress::Chunk` updates via the optional `on_progress` callback using the shared downloaded counter.
811/// - Marks the file executable (0o755) if it appears to be an ELF binary and removes any resume metadata on success.
812///
813/// Returns `Ok(())` on success or a `DownloadError` on failure.
814#[allow(clippy::too_many_arguments)]
815fn download_layer_impl(
816 api: &str,
817 reference: &OciReference,
818 layer: &OciLayer,
819 path: &Path,
820 local_downloaded: &mut u64,
821 on_progress: Option<&Arc<dyn Fn(Progress) + Send + Sync>>,
822 shared_downloaded: &Arc<Mutex<u64>>,
823 total_size: u64,
824) -> Result<(), DownloadError> {
825 let url = format!(
826 "{}/{}/blobs/{}",
827 api.trim_end_matches('/'),
828 reference.package,
829 layer.digest
830 );
831
832 trace!(
833 digest = layer.digest,
834 size = layer.size,
835 path = %path.display(),
836 "downloading layer"
837 );
838
839 let resume_info = read_resume(path);
840 let (resume_from, etag) = resume_info
841 .as_ref()
842 .map(|r| (Some(r.downloaded), r.etag.as_deref()))
843 .unwrap_or((None, None));
844
845 let mut req = SHARED_AGENT.get(&url).header(AUTHORIZATION, "Bearer QQ==");
846
847 if let Some(pos) = resume_from {
848 trace!(resume_from = pos, "attempting to resume download");
849 req = req.header(RANGE, &format!("bytes={}-", pos));
850 if let Some(tag) = etag {
851 req = req.header(IF_RANGE, tag);
852 }
853 }
854
855 let resp = req.call()?;
856
857 if !resp.status().is_success() {
858 debug!(
859 status = resp.status().as_u16(),
860 digest = layer.digest,
861 "layer download failed"
862 );
863 return Err(DownloadError::HttpError {
864 status: resp.status().as_u16(),
865 url,
866 });
867 }
868
869 let is_resuming = resume_from.is_some() && resp.status() == 206;
870 if is_resuming {
871 trace!(resumed_from = resume_from.unwrap(), "resuming download");
872 }
873 let mut file = if is_resuming {
874 let resume_pos = resume_from.unwrap();
875 if let Some(cb) = on_progress {
876 cb(Progress::Resuming {
877 current: resume_pos,
878 total: total_size,
879 });
880 }
881 // Truncate file to resume position to avoid duplicated bytes
882 // (file may have more bytes than last checkpoint due to writes between checkpoints)
883 let mut file = OpenOptions::new().write(true).open(path)?;
884 file.set_len(resume_pos)?;
885 file.seek(SeekFrom::End(0))?;
886 file
887 } else {
888 File::create(path)?
889 };
890
891 let new_etag = resp
892 .headers()
893 .get(ETAG)
894 .and_then(|h| h.to_str().ok())
895 .map(String::from);
896 let mut reader = resp.into_body().into_reader();
897 let mut buffer = [0u8; 8192];
898 let resume_offset = resume_from.unwrap_or(0);
899 *local_downloaded = resume_offset;
900 let mut last_checkpoint = *local_downloaded / (1024 * 1024);
901
902 // Add resume offset to shared counter so progress shows correct cumulative total
903 if is_resuming {
904 let mut shared = shared_downloaded.lock().unwrap();
905 *shared += resume_offset;
906 }
907
908 loop {
909 let n = reader.read(&mut buffer)?;
910 if n == 0 {
911 break;
912 }
913
914 file.write_all(&buffer[..n])?;
915 *local_downloaded += n as u64;
916
917 let current_total = {
918 let mut shared = shared_downloaded.lock().unwrap();
919 *shared += n as u64;
920 *shared
921 };
922
923 let checkpoint = *local_downloaded / (1024 * 1024);
924 if checkpoint > last_checkpoint {
925 last_checkpoint = checkpoint;
926 write_resume(
927 path,
928 &ResumeInfo {
929 downloaded: *local_downloaded,
930 total: layer.size,
931 etag: new_etag.clone(),
932 last_modified: None,
933 },
934 )?;
935 }
936
937 if let Some(cb) = on_progress {
938 cb(Progress::Chunk {
939 current: current_total,
940 total: total_size,
941 });
942 }
943 }
944
945 if is_elf(path) {
946 trace!(path = %path.display(), "setting executable permissions on ELF binary");
947 std::fs::set_permissions(path, Permissions::from_mode(0o755))?;
948 }
949
950 remove_resume(path)?;
951 trace!(
952 path = %path.display(),
953 bytes = *local_downloaded,
954 "layer download complete"
955 );
956 Ok(())
957}
958
959#[cfg(test)]
960mod tests {
961 use super::*;
962
963 #[test]
964 fn test_oci_reference_from_str_simple() {
965 let reference = OciReference::from("org/repo:tag");
966 assert_eq!(reference.registry, "ghcr.io");
967 assert_eq!(reference.package, "org/repo");
968 assert_eq!(reference.tag, "tag");
969 }
970
971 #[test]
972 fn test_oci_reference_from_str_with_prefix() {
973 let reference = OciReference::from("ghcr.io/org/repo:latest");
974 assert_eq!(reference.registry, "ghcr.io");
975 assert_eq!(reference.package, "org/repo");
976 assert_eq!(reference.tag, "latest");
977 }
978
979 #[test]
980 fn test_oci_reference_from_str_with_digest() {
981 let reference = OciReference::from("org/repo@sha256:deadbeef1234567890");
982 assert_eq!(reference.registry, "ghcr.io");
983 assert_eq!(reference.package, "org/repo");
984 assert_eq!(reference.tag, "sha256:deadbeef1234567890");
985 }
986
987 #[test]
988 fn test_oci_reference_from_str_no_tag() {
989 let reference = OciReference::from("org/repo");
990 assert_eq!(reference.registry, "ghcr.io");
991 assert_eq!(reference.package, "org/repo");
992 assert_eq!(reference.tag, "latest");
993 }
994
995 #[test]
996 fn test_oci_reference_from_str_nested_package() {
997 let reference = OciReference::from("org/team/repo:v1.0");
998 assert_eq!(reference.registry, "ghcr.io");
999 assert_eq!(reference.package, "org/team/repo");
1000 assert_eq!(reference.tag, "v1.0");
1001 }
1002
1003 #[test]
1004 fn test_oci_reference_from_str_digest_with_prefix() {
1005 let reference = OciReference::from("ghcr.io/org/repo@sha256:abc123");
1006 assert_eq!(reference.registry, "ghcr.io");
1007 assert_eq!(reference.package, "org/repo");
1008 assert_eq!(reference.tag, "sha256:abc123");
1009 }
1010
1011 #[test]
1012 fn test_oci_reference_clone() {
1013 let ref1 = OciReference::from("org/repo:tag");
1014 let ref2 = ref1.clone();
1015 assert_eq!(ref1.registry, ref2.registry);
1016 assert_eq!(ref1.package, ref2.package);
1017 assert_eq!(ref1.tag, ref2.tag);
1018 }
1019
1020 #[test]
1021 fn test_oci_layer_title_present() {
1022 let mut annotations = std::collections::HashMap::new();
1023 annotations.insert(
1024 "org.opencontainers.image.title".to_string(),
1025 "myfile.tar.gz".to_string(),
1026 );
1027
1028 let layer = OciLayer {
1029 media_type: "application/vnd.oci.image.layer.v1.tar".to_string(),
1030 digest: "sha256:abc123".to_string(),
1031 size: 1024,
1032 annotations,
1033 };
1034
1035 assert_eq!(layer.title(), Some("myfile.tar.gz"));
1036 }
1037
1038 #[test]
1039 fn test_oci_layer_title_absent() {
1040 let layer = OciLayer {
1041 media_type: "application/vnd.oci.image.layer.v1.tar".to_string(),
1042 digest: "sha256:abc123".to_string(),
1043 size: 1024,
1044 annotations: std::collections::HashMap::new(),
1045 };
1046
1047 assert_eq!(layer.title(), None);
1048 }
1049
1050 #[test]
1051 fn test_oci_layer_clone() {
1052 let mut annotations = std::collections::HashMap::new();
1053 annotations.insert("key".to_string(), "value".to_string());
1054
1055 let layer1 = OciLayer {
1056 media_type: "type".to_string(),
1057 digest: "digest".to_string(),
1058 size: 100,
1059 annotations,
1060 };
1061
1062 let layer2 = layer1.clone();
1063 assert_eq!(layer1.media_type, layer2.media_type);
1064 assert_eq!(layer1.digest, layer2.digest);
1065 assert_eq!(layer1.size, layer2.size);
1066 }
1067
1068 #[test]
1069 fn test_oci_download_new() {
1070 let dl = OciDownload::new("org/repo:tag");
1071 assert_eq!(dl.reference.registry, "ghcr.io");
1072 assert_eq!(dl.reference.package, "org/repo");
1073 assert_eq!(dl.reference.tag, "tag");
1074 assert_eq!(dl.parallel, 1);
1075 assert!(!dl.extract);
1076 }
1077
1078 #[test]
1079 fn test_oci_download_builder_pattern() {
1080 let dl = OciDownload::new("org/repo:tag")
1081 .api("https://custom.registry/v2")
1082 .output("downloads")
1083 .extract(true)
1084 .extract_to("/tmp/extract")
1085 .parallel(4);
1086
1087 assert_eq!(dl.api, "https://custom.registry/v2");
1088 assert_eq!(dl.output, Some("downloads".to_string()));
1089 assert!(dl.extract);
1090 assert_eq!(dl.extract_to, Some(PathBuf::from("/tmp/extract")));
1091 assert_eq!(dl.parallel, 4);
1092 }
1093
1094 #[test]
1095 fn test_oci_download_parallel_clamped() {
1096 let dl = OciDownload::new("org/repo:tag").parallel(0);
1097 assert_eq!(dl.parallel, 1);
1098
1099 let dl = OciDownload::new("org/repo:tag").parallel(100);
1100 assert_eq!(dl.parallel, 100);
1101 }
1102
1103 #[test]
1104 fn test_oci_manifest_deserialize() {
1105 let json = r#"{
1106 "mediaType": "application/vnd.oci.image.manifest.v1+json",
1107 "config": {
1108 "mediaType": "application/vnd.oci.image.config.v1+json",
1109 "digest": "sha256:config123",
1110 "size": 512
1111 },
1112 "layers": [
1113 {
1114 "mediaType": "application/vnd.oci.image.layer.v1.tar",
1115 "digest": "sha256:layer123",
1116 "size": 1024,
1117 "annotations": {
1118 "org.opencontainers.image.title": "file.tar.gz"
1119 }
1120 }
1121 ]
1122 }"#;
1123
1124 let manifest: OciManifest = serde_json::from_str(json).unwrap();
1125 assert_eq!(
1126 manifest.media_type,
1127 "application/vnd.oci.image.manifest.v1+json"
1128 );
1129 assert_eq!(manifest.config.digest, "sha256:config123");
1130 assert_eq!(manifest.layers.len(), 1);
1131 assert_eq!(manifest.layers[0].title(), Some("file.tar.gz"));
1132 }
1133
1134 #[test]
1135 fn test_oci_layer_deserialize_without_annotations() {
1136 let json = r#"{
1137 "mediaType": "application/vnd.oci.image.layer.v1.tar",
1138 "digest": "sha256:abc",
1139 "size": 2048
1140 }"#;
1141
1142 let layer: OciLayer = serde_json::from_str(json).unwrap();
1143 assert_eq!(layer.media_type, "application/vnd.oci.image.layer.v1.tar");
1144 assert_eq!(layer.digest, "sha256:abc");
1145 assert_eq!(layer.size, 2048);
1146 assert!(layer.annotations.is_empty());
1147 }
1148
1149 #[test]
1150 fn test_oci_config_deserialize() {
1151 let json = r#"{
1152 "mediaType": "application/vnd.oci.image.config.v1+json",
1153 "digest": "sha256:xyz789",
1154 "size": 256
1155 }"#;
1156
1157 let config: OciConfig = serde_json::from_str(json).unwrap();
1158 assert_eq!(
1159 config.media_type,
1160 "application/vnd.oci.image.config.v1+json"
1161 );
1162 assert_eq!(config.digest, "sha256:xyz789");
1163 assert_eq!(config.size, 256);
1164 }
1165}