Skip to main content

soar_dl/
oci.rs

1use std::{
2    fs::{File, OpenOptions, Permissions},
3    io::{Read as _, Seek as _, SeekFrom, Write as _},
4    os::unix::fs::PermissionsExt as _,
5    path::{Path, PathBuf},
6    sync::{Arc, Mutex},
7    thread,
8};
9
10use serde::Deserialize;
11use soar_utils::fs::is_elf;
12use tracing::{debug, trace};
13use ureq::http::header::{ACCEPT, AUTHORIZATION, ETAG, IF_RANGE, RANGE};
14
15use crate::{
16    download::Download,
17    error::DownloadError,
18    filter::Filter,
19    http_client::SHARED_AGENT,
20    types::{OverwriteMode, Progress, ResumeInfo},
21    xattr::{read_resume, remove_resume, write_resume},
22};
23
24#[derive(Debug, Clone)]
25pub struct OciReference {
26    pub registry: String,
27    pub package: String,
28    pub tag: String,
29}
30
31impl From<&str> for OciReference {
32    /// Parses an OCI/GHCR reference string into an `OciReference`.
33    ///
34    /// Accepts strings with optional `ghcr.io/` prefix and extracts the package and tag or digest:
35    /// - `package@sha256:<digest>` → digest used as `tag`
36    /// - `package:<tag>` → tag used as `tag`
37    /// - otherwise the full path is treated as `package` and `tag` is set to `"latest"`.
38    ///
39    /// # Examples
40    ///
41    /// ```
42    /// use soar_dl::oci::OciReference;
43    ///
44    /// let r = OciReference::from("ghcr.io/org/repo@sha256:deadbeef");
45    /// assert_eq!(r.registry, "ghcr.io");
46    /// assert_eq!(r.package, "org/repo");
47    /// assert_eq!(r.tag, "sha256:deadbeef");
48    ///
49    /// let r2 = OciReference::from("org/repo:1.2.3");
50    /// assert_eq!(r2.registry, "ghcr.io");
51    /// assert_eq!(r2.package, "org/repo");
52    /// assert_eq!(r2.tag, "1.2.3");
53    ///
54    /// let r3 = OciReference::from("org/repo");
55    /// assert_eq!(r3.registry, "ghcr.io");
56    /// assert_eq!(r3.package, "org/repo");
57    /// assert_eq!(r3.tag, "latest");
58    /// ```
59    fn from(value: &str) -> Self {
60        let paths = value.trim_start_matches("ghcr.io/");
61
62        // <package>@sha256:<digest>
63        if let Some((package, digest)) = paths.split_once('@') {
64            return Self {
65                registry: "ghcr.io".to_string(),
66                package: package.to_string(),
67                tag: digest.to_string(),
68            };
69        }
70
71        // <package>:<tag>
72        if let Some((package, tag)) = paths.split_once(':') {
73            return Self {
74                registry: "ghcr.io".to_string(),
75                package: package.to_string(),
76                tag: tag.to_string(),
77            };
78        }
79
80        Self {
81            registry: "ghcr.io".to_string(),
82            package: paths.to_string(),
83            tag: "latest".to_string(),
84        }
85    }
86}
87
88#[derive(Debug, Clone, Deserialize)]
89pub struct OciManifest {
90    #[serde(rename = "mediaType")]
91    pub media_type: String,
92    pub config: OciConfig,
93    pub layers: Vec<OciLayer>,
94}
95
96#[derive(Debug, Clone, Deserialize)]
97pub struct OciConfig {
98    #[serde(rename = "mediaType")]
99    pub media_type: String,
100    pub digest: String,
101    pub size: u64,
102}
103
104#[derive(Debug, Clone, Deserialize)]
105pub struct OciLayer {
106    #[serde(rename = "mediaType")]
107    pub media_type: String,
108    pub digest: String,
109    pub size: u64,
110    #[serde(default)]
111    pub annotations: std::collections::HashMap<String, String>,
112}
113
114impl OciLayer {
115    /// Extracts the human-friendly title from the layer's annotations.
116    ///
117    /// Returns `Some(&str)` containing the value of the `"org.opencontainers.image.title"`
118    /// annotation if present, or `None` if that annotation is absent.
119    ///
120    /// # Examples
121    ///
122    /// ```
123    /// use std::collections::HashMap;
124    /// use soar_dl::oci::OciLayer;
125    ///
126    /// let mut annotations = HashMap::new();
127    /// annotations.insert(
128    ///     "org.opencontainers.image.title".to_string(),
129    ///     "example.txt".to_string(),
130    /// );
131    ///
132    /// let layer = OciLayer {
133    ///     media_type: String::new(),
134    ///     digest: String::new(),
135    ///     size: 0,
136    ///     annotations,
137    /// };
138    ///
139    /// assert_eq!(layer.title(), Some("example.txt"));
140    /// ```
141    pub fn title(&self) -> Option<&str> {
142        self.annotations
143            .get("org.opencontainers.image.title")
144            .map(|s| s.as_str())
145    }
146}
147
148#[derive(Clone)]
149pub struct OciDownload {
150    reference: OciReference,
151    api: String,
152    filter: Filter,
153    output: Option<String>,
154    overwrite: OverwriteMode,
155    extract: bool,
156    extract_to: Option<PathBuf>,
157    parallel: usize,
158    on_progress: Option<Arc<dyn Fn(Progress) + Send + Sync>>,
159}
160
161impl OciDownload {
162    /// Creates a new `OciDownload` for the given OCI reference using sensible defaults.
163    ///
164    /// Defaults:
165    /// - `api` = "https://ghcr.io/v2"
166    /// - `filter` = `Filter::default()`
167    /// - no output path (downloads to current working directory unless `output` is set)
168    /// - `overwrite` = `OverwriteMode::Prompt`
169    /// - `extract` = `false`
170    /// - `parallel` = 1 (sequential downloads)
171    /// - no progress callback
172    ///
173    /// # Examples
174    ///
175    /// ```
176    /// use soar_dl::oci::OciDownload;
177    ///
178    /// let dl = OciDownload::new("ghcr.io/myorg/myrepo:latest");
179    /// // configure and run:
180    /// // let result = dl.output("out").execute();
181    /// ```
182    pub fn new(reference: impl Into<OciReference>) -> Self {
183        Self {
184            reference: reference.into(),
185            api: "https://ghcr.io/v2".into(),
186            filter: Filter::default(),
187            output: None,
188            overwrite: OverwriteMode::Prompt,
189            extract: false,
190            extract_to: None,
191            parallel: 1,
192            on_progress: None,
193        }
194    }
195
196    /// Sets the base API URL used for manifest and blob requests and returns the downloader for chaining.
197    ///
198    /// # Examples
199    ///
200    /// ```
201    /// use soar_dl::oci::OciDownload;
202    ///
203    /// let dl = OciDownload::new("ghcr.io/foo/bar:latest").api("https://ghcr.example.com/v2");
204    /// ```
205    pub fn api(mut self, api: impl Into<String>) -> Self {
206        self.api = api.into();
207        self
208    }
209
210    /// Sets the layer filter used to select which OCI image layers will be downloaded.
211    ///
212    /// The provided `filter` is applied when a manifest is inspected to decide which layers are included
213    /// in the download operation. Returns the downloader with the filter updated for further chaining.
214    ///
215    /// # Examples
216    ///
217    /// ```
218    /// use soar_dl::oci::OciDownload;
219    /// use soar_dl::filter::Filter;
220    ///
221    /// let downloader = OciDownload::new("owner/repo:tag")
222    ///     .filter(Filter::default())
223    ///     .output("out/dir");
224    /// ```
225    pub fn filter(mut self, filter: Filter) -> Self {
226        self.filter = filter;
227        self
228    }
229
230    /// Sets the output directory path where downloaded files will be written.
231    ///
232    /// The provided path is stored and used as the destination for downloaded blobs and extracted contents.
233    ///
234    /// # Examples
235    ///
236    /// ```
237    /// use soar_dl::oci::OciDownload;
238    ///
239    /// let dl = OciDownload::new("ghcr.io/org/repo:tag").output("downloads/");
240    /// ```
241    pub fn output(mut self, path: impl Into<String>) -> Self {
242        self.output = Some(path.into());
243        self
244    }
245
246    /// Sets how existing files are handled when writing downloads.
247    ///
248    /// This configures the downloader's overwrite behavior to the provided `mode`.
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// use soar_dl::oci::OciDownload;
254    /// use soar_dl::types::OverwriteMode;
255    ///
256    /// let dl = OciDownload::new("ghcr.io/example/repo:tag").overwrite(OverwriteMode::Prompt);
257    /// ```
258    ///  Returns the modified downloader with the given overwrite mode.
259    pub fn overwrite(mut self, mode: OverwriteMode) -> Self {
260        self.overwrite = mode;
261        self
262    }
263
264    /// Enable or disable extraction of downloaded layers.
265    ///
266    /// When `true`, downloaded archive layers will be extracted after download into the extraction
267    /// destination (if set via `extract_to`) or into the configured output directory. When `false`,
268    /// archives are left as downloaded.
269    ///
270    /// # Examples
271    ///
272    /// ```
273    /// use soar_dl::oci::OciDownload;
274    ///
275    /// let dl = OciDownload::new("ghcr.io/org/pkg:1.0")
276    ///     .extract(true)
277    ///     .extract_to("/tmp/out");
278    /// ```
279    pub fn extract(mut self, extract: bool) -> Self {
280        self.extract = extract;
281        self
282    }
283
284    /// Sets the destination directory for extracting downloaded archives.
285    ///
286    /// When extraction is enabled, extracted files will be written to this path instead of the download output directory.
287    ///
288    /// # Examples
289    ///
290    /// ```
291    /// use soar_dl::oci::OciDownload;
292    ///
293    /// let downloader = OciDownload::new("ghcr.io/org/pkg:1.0")
294    ///     .extract(true)
295    ///     .extract_to("/tmp/extracted");
296    /// ```
297    pub fn extract_to(mut self, path: impl Into<PathBuf>) -> Self {
298        self.extract_to = Some(path.into());
299        self
300    }
301
302    /// Set the number of parallel download workers (at least 1).
303    ///
304    /// If `n` is less than 1, the value will be clamped to 1. Returns the updated downloader to allow chaining.
305    ///
306    /// # Examples
307    ///
308    /// ```
309    /// use soar_dl::oci::OciDownload;
310    ///
311    /// let _ = OciDownload::new("owner/repo:tag").parallel(4);
312    /// ```
313    pub fn parallel(mut self, n: usize) -> Self {
314        self.parallel = n.max(1);
315        self
316    }
317
318    /// Registers a progress callback to receive download `Progress` events.
319    ///
320    /// The provided callback will be invoked with progress updates (e.g., Starting, Chunk, Complete)
321    /// from the download workers and must be thread-safe (`Send + Sync`) and `'static`.
322    ///
323    /// # Examples
324    ///
325    /// ```
326    /// use soar_dl::oci::OciDownload;
327    ///
328    /// let downloader = OciDownload::new("ghcr.io/owner/repo:tag")
329    ///     .progress(|progress| {
330    ///         println!("progress: {:?}", progress);
331    ///     });
332    /// ```
333    pub fn progress<F>(mut self, f: F) -> Self
334    where
335        F: Fn(Progress) + Send + Sync + 'static,
336    {
337        self.on_progress = Some(Arc::new(f));
338        self
339    }
340
341    /// Downloads the OCI reference according to the configured options and returns the downloaded file paths.
342    ///
343    /// Attempts a direct blob download if the reference tag is a digest (e.g., `sha256:...`); otherwise it
344    /// fetches the image manifest, selects layers whose titles match the configured filter, creates the
345    /// output directory if provided, and downloads the selected layers.
346    ///
347    /// Emits `Progress::Starting` and `Progress::Complete` events via the registered progress callback when present.
348    ///
349    /// # Returns
350    ///
351    /// A `Vec<PathBuf>` containing the filesystem paths of the downloaded files on success. Returns a
352    /// `DownloadError::LayerNotFound` if no manifest layers match the configured filter, or other
353    /// `DownloadError` variants for network, IO, or extraction failures.
354    ///
355    /// # Examples
356    ///
357    /// ```no_run
358    /// # use std::path::PathBuf;
359    /// # use soar_dl::oci::OciDownload;
360    /// // Download layers from an OCI reference into "out" directory.
361    /// let paths: Vec<PathBuf> = OciDownload::new("ghcr.io/owner/repo:latest")
362    ///     .output("out")
363    ///     .execute()
364    ///     .unwrap();
365    /// assert!(!paths.is_empty());
366    /// ```
367    pub fn execute(self) -> Result<Vec<PathBuf>, DownloadError> {
368        debug!(
369            registry = self.reference.registry,
370            package = self.reference.package,
371            tag = self.reference.tag,
372            "starting OCI download"
373        );
374
375        // If it's a blob digest, download directly
376        if self.reference.tag.starts_with("sha256:") {
377            trace!("tag is digest, downloading blob directly");
378            return self.download_blob();
379        }
380
381        let manifest = self.fetch_manifest()?;
382
383        let layers: Vec<_> = manifest
384            .layers
385            .iter()
386            .filter(|layer| {
387                layer
388                    .title()
389                    .map(|t| self.filter.matches(t))
390                    .unwrap_or(false)
391            })
392            .collect();
393
394        if layers.is_empty() {
395            debug!("no matching layers found in manifest");
396            return Err(DownloadError::LayerNotFound);
397        }
398
399        let total_size: u64 = layers.iter().map(|l| l.size).sum();
400        debug!(
401            layer_count = layers.len(),
402            total_size = total_size,
403            "downloading layers"
404        );
405
406        if let Some(cb) = &self.on_progress {
407            cb(Progress::Starting {
408                total: total_size,
409            });
410        }
411
412        let output_dir = self
413            .output
414            .as_deref()
415            .map(PathBuf::from)
416            .unwrap_or_default();
417        if !output_dir.as_os_str().is_empty() {
418            std::fs::create_dir_all(&output_dir)?;
419        }
420
421        let paths = if self.parallel > 1 {
422            self.download_layers_parallel(&layers, &output_dir)?
423        } else {
424            self.download_layers_sequential(&layers, &output_dir)?
425        };
426
427        if let Some(cb) = &self.on_progress {
428            cb(Progress::Complete {
429                total: total_size,
430            });
431        }
432
433        Ok(paths)
434    }
435
436    /// Downloads the provided OCI layers one after another into `output_dir` and returns the paths of the saved files.
437    ///
438    /// Each layer is written using its `title()` as the filename. If `self.extract` is enabled, each downloaded archive
439    /// is extracted into `self.extract_to` (if set) or into `output_dir`. The method updates an internal downloaded byte
440    /// counter while performing transfers.
441    ///
442    /// # Errors
443    ///
444    /// Returns a `DownloadError` if any individual layer download or extraction fails.
445    ///
446    /// # Parameters
447    ///
448    /// - `layers`: slice of layer references to download (order is preserved).
449    /// - `output_dir`: destination directory for downloaded files.
450    ///
451    /// # Returns
452    ///
453    /// A `Vec<PathBuf>` containing the full paths to the downloaded (and optionally extracted) files.
454    fn download_layers_sequential(
455        &self,
456        layers: &[&OciLayer],
457        output_dir: &Path,
458    ) -> Result<Vec<PathBuf>, DownloadError> {
459        let mut paths = Vec::new();
460        let mut downloaded = 0u64;
461        let total_size: u64 = layers.iter().map(|l| l.size).sum();
462
463        for layer in layers {
464            let filename = layer.title().unwrap();
465            let path = output_dir.join(filename);
466
467            if path.is_file() {
468                if let Ok(metadata) = path.metadata() {
469                    if metadata.len() == layer.size {
470                        downloaded += layer.size;
471                        if let Some(ref cb) = self.on_progress {
472                            cb(Progress::Chunk {
473                                current: downloaded,
474                                total: total_size,
475                            });
476                        }
477                        paths.push(path);
478                        continue;
479                    }
480                }
481            }
482
483            self.download_layer(layer, &path, &mut downloaded, total_size)?;
484
485            if self.extract {
486                let extract_dir = self
487                    .extract_to
488                    .clone()
489                    .unwrap_or_else(|| output_dir.to_path_buf());
490                compak::extract_archive(&path, &extract_dir)?;
491            }
492
493            paths.push(path);
494        }
495
496        Ok(paths)
497    }
498
499    /// Download multiple OCI layers concurrently and return the downloaded file paths.
500    ///
501    /// This method downloads the provided layers using up to `self.parallel` worker threads,
502    /// writes each layer into `output_dir` (using the layer title as filename), optionally
503    /// extracts archives when extraction is enabled, and aggregates any errors that occur
504    /// across worker threads into a single `DownloadError::Multiple`.
505    ///
506    /// - If a layer has no title, it is skipped.
507    /// - On success, returns a `Vec<PathBuf>` containing the paths to successfully downloaded files.
508    /// - On failure, returns `DownloadError::Multiple` with stringified error messages from workers.
509    ///
510    /// # Parameters
511    ///
512    /// - `layers`: slice of layer references to download. Each layer's title is used as the filename.
513    /// - `output_dir`: directory where downloaded files will be written.
514    ///
515    /// # Returns
516    ///
517    /// `Ok(Vec<PathBuf>)` with the paths of successfully downloaded files, or `Err(DownloadError::Multiple)`
518    /// if one or more worker threads report errors.
519    ///
520    /// # Examples
521    ///
522    /// ```no_run
523    /// use soar_dl::oci::OciDownload;
524    ///
525    /// let dl = OciDownload::new("ghcr.io/owner/repo:tag")
526    ///     .output("out")
527    ///     .parallel(4);
528    /// let _ = dl.execute(); // invokes the parallel download path when appropriate
529    /// ```
530    pub fn download_layers_parallel(
531        &self,
532        layers: &[&OciLayer],
533        output_dir: &Path,
534    ) -> Result<Vec<PathBuf>, DownloadError> {
535        let downloaded = Arc::new(Mutex::new(0u64));
536        let paths = Arc::new(Mutex::new(Vec::new()));
537        let errors = Arc::new(Mutex::new(Vec::new()));
538
539        let total_size: u64 = layers.iter().map(|l| l.size).sum();
540
541        let owned_layers: Vec<OciLayer> = layers.iter().map(|&layer| layer.clone()).collect();
542        let chunks: Vec<_> = owned_layers
543            .chunks(layers.len().div_ceil(self.parallel))
544            .map(|chunk| chunk.to_vec())
545            .collect();
546
547        let handles: Vec<_> = chunks
548            .into_iter()
549            .map(|chunk| {
550                let api = self.api.clone();
551                let reference = self.reference.clone();
552                let output_dir = output_dir.to_path_buf();
553                let downloaded = Arc::clone(&downloaded);
554                let paths = Arc::clone(&paths);
555                let errors = Arc::clone(&errors);
556                let extract = self.extract;
557                let extract_to = self.extract_to.clone();
558                let on_progress = self.on_progress.clone();
559
560                thread::spawn(move || {
561                    for layer in chunk {
562                        let filename = match layer.title() {
563                            Some(f) => f,
564                            None => continue,
565                        };
566                        let path = output_dir.join(filename);
567
568                        if path.is_file() {
569                            if let Ok(metadata) = path.metadata() {
570                                if metadata.len() == layer.size {
571                                    {
572                                        let mut shared = downloaded.lock().unwrap();
573                                        *shared += layer.size;
574                                        let current = *shared;
575                                        if let Some(ref cb) = on_progress {
576                                            cb(Progress::Chunk {
577                                                current,
578                                                total: total_size,
579                                            });
580                                        }
581                                    }
582                                    paths.lock().unwrap().push(path);
583                                    continue;
584                                }
585                            }
586                        }
587
588                        let mut local_downloaded = 0u64;
589                        let result = download_layer_impl(
590                            &api,
591                            &reference,
592                            &layer,
593                            &path,
594                            &mut local_downloaded,
595                            on_progress.as_ref(),
596                            &downloaded,
597                            total_size,
598                        );
599
600                        match result {
601                            Ok(()) => {
602                                if extract {
603                                    let extract_dir =
604                                        extract_to.clone().unwrap_or_else(|| output_dir.clone());
605                                    if let Err(e) = compak::extract_archive(&path, &extract_dir) {
606                                        errors.lock().unwrap().push(format!("{e}"));
607                                        continue;
608                                    }
609                                }
610                                paths.lock().unwrap().push(path);
611                            }
612                            Err(e) => {
613                                errors.lock().unwrap().push(format!("{e}"));
614                            }
615                        }
616                    }
617                })
618            })
619            .collect();
620
621        for handle in handles {
622            if let Err(err) = handle.join() {
623                errors
624                    .lock()
625                    .unwrap()
626                    .push(format!("Worker thread panicked: {err:?}"));
627            }
628        }
629
630        let errors = errors.lock().unwrap();
631        if !errors.is_empty() {
632            return Err(DownloadError::Multiple {
633                errors: errors.clone(),
634            });
635        }
636
637        let paths = paths.lock().unwrap().clone();
638        Ok(paths)
639    }
640
641    /// Fetches the OCI/Docker manifest for the configured reference and returns it deserialized as an `OciManifest`.
642    ///
643    /// The request is made against the download instance's `api` base and the reference's `package`/`tag`. On
644    /// non-success HTTP status codes this returns `DownloadError::HttpError`; if the response body cannot be
645    /// parsed as a manifest JSON this returns `DownloadError::InvalidResponse`.
646    ///
647    /// # Examples
648    ///
649    /// ```no_run
650    /// use soar_dl::oci::OciDownload;
651    ///
652    /// let dl = OciDownload::new("ghcr.io/example/repo:latest");
653    /// let manifest = dl.fetch_manifest().unwrap();
654    /// ```
655    pub fn fetch_manifest(&self) -> Result<OciManifest, DownloadError> {
656        let url = format!(
657            "{}/{}/manifests/{}",
658            self.api.trim_end_matches('/'),
659            self.reference.package,
660            self.reference.tag
661        );
662        debug!(url = url, "fetching OCI manifest");
663
664        let mut resp = SHARED_AGENT
665            .get(&url)
666            .header(
667                ACCEPT,
668                "application/vnd.docker.distribution.manifest.v2+json, \
669                application/vnd.docker.distribution.manifest.list.v2+json, \
670                application/vnd.oci.image.manifest.v1+json, \
671                application/vnd.oci.image.index.v1+json",
672            )
673            .header(AUTHORIZATION, "Bearer QQ==")
674            .call()?;
675
676        trace!(
677            status = resp.status().as_u16(),
678            "manifest response received"
679        );
680
681        if !resp.status().is_success() {
682            debug!(status = resp.status().as_u16(), "manifest fetch failed");
683            return Err(DownloadError::HttpError {
684                status: resp.status().as_u16(),
685                url,
686            });
687        }
688
689        let manifest: OciManifest = resp
690            .body_mut()
691            .read_json()
692            .map_err(|_| DownloadError::InvalidResponse)?;
693
694        trace!(
695            layers = manifest.layers.len(),
696            media_type = manifest.media_type,
697            "manifest parsed successfully"
698        );
699
700        Ok(manifest)
701    }
702
703    /// Downloads the single blob identified by the downloader's reference into the configured output location.
704    ///
705    /// The resulting file is written using the configured output path (or a name derived from the reference if no output is set), and download options such as overwrite mode and the registered progress callback are respected.
706    ///
707    /// # Examples
708    ///
709    /// ```no_run
710    /// use soar_dl::oci::OciDownload;
711    ///
712    /// let dl = OciDownload::new("ghcr.io/org/package:tag");
713    /// let paths = dl.download_blob().unwrap();
714    /// assert_eq!(paths.len(), 1);
715    /// let downloaded = &paths[0];
716    /// println!("Downloaded to: {:?}", downloaded);
717    /// ```
718    pub fn download_blob(&self) -> Result<Vec<PathBuf>, DownloadError> {
719        let filename = self
720            .reference
721            .package
722            .rsplit_once('/')
723            .map(|(_, name)| name)
724            .unwrap_or(&self.reference.tag);
725
726        let output = self.output.as_deref().unwrap_or(filename);
727
728        let url = format!(
729            "{}/{}/blobs/{}",
730            self.api.trim_end_matches('/'),
731            self.reference.package,
732            self.reference.tag
733        );
734
735        let dl = Download::new(url)
736            .output(output)
737            .overwrite(self.overwrite)
738            .ghcr_blob();
739
740        let dl = if let Some(ref cb) = self.on_progress {
741            let cb = cb.clone();
742            dl.progress(move |p| cb(p))
743        } else {
744            dl
745        };
746
747        let path = dl.execute()?;
748
749        Ok(vec![path])
750    }
751
752    /// Downloads a single OCI layer blob to the given file path, updating the provided
753    /// cumulative `downloaded` byte counter and emitting progress events if configured.
754    ///
755    /// # Parameters
756    ///
757    /// - `layer`: The manifest layer to download.
758    /// - `path`: Destination filesystem path for the downloaded blob.
759    /// - `downloaded`: Mutable cumulative byte counter; this function increments it by the
760    ///   number of bytes written for this layer.
761    ///
762    /// # Returns
763    ///
764    /// `Ok(())` on success, or a `DownloadError` describing the failure.
765    ///
766    /// # Examples
767    ///
768    /// ```no_run
769    /// use std::path::Path;
770    /// use soar_dl::oci::{OciDownload, OciLayer};
771    ///
772    /// let downloader = OciDownload::new("ghcr.io/owner/repo:tag");
773    /// let layer = OciLayer {
774    ///     media_type: "application/vnd.oci.image.layer.v1.tar".to_string(),
775    ///     digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
776    ///     size: 1024,
777    ///     annotations: Default::default(),
778    /// };
779    /// let mut downloaded = 0u64;
780    /// let dest = Path::new("/tmp/layer.tar");
781    /// downloader.download_layer(&layer, dest, &mut downloaded, 1024).unwrap();
782    /// assert!(downloaded > 0);
783    /// ```
784    pub fn download_layer(
785        &self,
786        layer: &OciLayer,
787        path: &Path,
788        downloaded: &mut u64,
789        total_size: u64,
790    ) -> Result<(), DownloadError> {
791        download_layer_impl(
792            &self.api,
793            &self.reference,
794            layer,
795            path,
796            downloaded,
797            self.on_progress.as_ref(),
798            &Arc::new(Mutex::new(0u64)),
799            total_size,
800        )
801    }
802}
803
804/// Downloads a single OCI layer blob to the given file path with resume support, progress reporting, and post-download handling.
805///
806/// This function:
807/// - Resumes partially downloaded blobs when resume metadata exists (uses Range and If-Range headers).
808/// - Appends to the existing file when a partial response (206) is returned, otherwise creates a new file.
809/// - Periodically persists resume metadata while downloading (every 1 MiB).
810/// - Emits `Progress::Chunk` updates via the optional `on_progress` callback using the shared downloaded counter.
811/// - Marks the file executable (0o755) if it appears to be an ELF binary and removes any resume metadata on success.
812///
813/// Returns `Ok(())` on success or a `DownloadError` on failure.
814#[allow(clippy::too_many_arguments)]
815fn download_layer_impl(
816    api: &str,
817    reference: &OciReference,
818    layer: &OciLayer,
819    path: &Path,
820    local_downloaded: &mut u64,
821    on_progress: Option<&Arc<dyn Fn(Progress) + Send + Sync>>,
822    shared_downloaded: &Arc<Mutex<u64>>,
823    total_size: u64,
824) -> Result<(), DownloadError> {
825    let url = format!(
826        "{}/{}/blobs/{}",
827        api.trim_end_matches('/'),
828        reference.package,
829        layer.digest
830    );
831
832    trace!(
833        digest = layer.digest,
834        size = layer.size,
835        path = %path.display(),
836        "downloading layer"
837    );
838
839    let resume_info = read_resume(path);
840    let (resume_from, etag) = resume_info
841        .as_ref()
842        .map(|r| (Some(r.downloaded), r.etag.as_deref()))
843        .unwrap_or((None, None));
844
845    let mut req = SHARED_AGENT.get(&url).header(AUTHORIZATION, "Bearer QQ==");
846
847    if let Some(pos) = resume_from {
848        trace!(resume_from = pos, "attempting to resume download");
849        req = req.header(RANGE, &format!("bytes={}-", pos));
850        if let Some(tag) = etag {
851            req = req.header(IF_RANGE, tag);
852        }
853    }
854
855    let resp = req.call()?;
856
857    if !resp.status().is_success() {
858        debug!(
859            status = resp.status().as_u16(),
860            digest = layer.digest,
861            "layer download failed"
862        );
863        return Err(DownloadError::HttpError {
864            status: resp.status().as_u16(),
865            url,
866        });
867    }
868
869    let is_resuming = resume_from.is_some() && resp.status() == 206;
870    if is_resuming {
871        trace!(resumed_from = resume_from.unwrap(), "resuming download");
872    }
873    let mut file = if is_resuming {
874        let resume_pos = resume_from.unwrap();
875        if let Some(cb) = on_progress {
876            cb(Progress::Resuming {
877                current: resume_pos,
878                total: total_size,
879            });
880        }
881        // Truncate file to resume position to avoid duplicated bytes
882        // (file may have more bytes than last checkpoint due to writes between checkpoints)
883        let mut file = OpenOptions::new().write(true).open(path)?;
884        file.set_len(resume_pos)?;
885        file.seek(SeekFrom::End(0))?;
886        file
887    } else {
888        File::create(path)?
889    };
890
891    let new_etag = resp
892        .headers()
893        .get(ETAG)
894        .and_then(|h| h.to_str().ok())
895        .map(String::from);
896    let mut reader = resp.into_body().into_reader();
897    let mut buffer = [0u8; 8192];
898    let resume_offset = resume_from.unwrap_or(0);
899    *local_downloaded = resume_offset;
900    let mut last_checkpoint = *local_downloaded / (1024 * 1024);
901
902    // Add resume offset to shared counter so progress shows correct cumulative total
903    if is_resuming {
904        let mut shared = shared_downloaded.lock().unwrap();
905        *shared += resume_offset;
906    }
907
908    loop {
909        let n = reader.read(&mut buffer)?;
910        if n == 0 {
911            break;
912        }
913
914        file.write_all(&buffer[..n])?;
915        *local_downloaded += n as u64;
916
917        let current_total = {
918            let mut shared = shared_downloaded.lock().unwrap();
919            *shared += n as u64;
920            *shared
921        };
922
923        let checkpoint = *local_downloaded / (1024 * 1024);
924        if checkpoint > last_checkpoint {
925            last_checkpoint = checkpoint;
926            write_resume(
927                path,
928                &ResumeInfo {
929                    downloaded: *local_downloaded,
930                    total: layer.size,
931                    etag: new_etag.clone(),
932                    last_modified: None,
933                },
934            )?;
935        }
936
937        if let Some(cb) = on_progress {
938            cb(Progress::Chunk {
939                current: current_total,
940                total: total_size,
941            });
942        }
943    }
944
945    if is_elf(path) {
946        trace!(path = %path.display(), "setting executable permissions on ELF binary");
947        std::fs::set_permissions(path, Permissions::from_mode(0o755))?;
948    }
949
950    remove_resume(path)?;
951    trace!(
952        path = %path.display(),
953        bytes = *local_downloaded,
954        "layer download complete"
955    );
956    Ok(())
957}
958
959#[cfg(test)]
960mod tests {
961    use super::*;
962
963    #[test]
964    fn test_oci_reference_from_str_simple() {
965        let reference = OciReference::from("org/repo:tag");
966        assert_eq!(reference.registry, "ghcr.io");
967        assert_eq!(reference.package, "org/repo");
968        assert_eq!(reference.tag, "tag");
969    }
970
971    #[test]
972    fn test_oci_reference_from_str_with_prefix() {
973        let reference = OciReference::from("ghcr.io/org/repo:latest");
974        assert_eq!(reference.registry, "ghcr.io");
975        assert_eq!(reference.package, "org/repo");
976        assert_eq!(reference.tag, "latest");
977    }
978
979    #[test]
980    fn test_oci_reference_from_str_with_digest() {
981        let reference = OciReference::from("org/repo@sha256:deadbeef1234567890");
982        assert_eq!(reference.registry, "ghcr.io");
983        assert_eq!(reference.package, "org/repo");
984        assert_eq!(reference.tag, "sha256:deadbeef1234567890");
985    }
986
987    #[test]
988    fn test_oci_reference_from_str_no_tag() {
989        let reference = OciReference::from("org/repo");
990        assert_eq!(reference.registry, "ghcr.io");
991        assert_eq!(reference.package, "org/repo");
992        assert_eq!(reference.tag, "latest");
993    }
994
995    #[test]
996    fn test_oci_reference_from_str_nested_package() {
997        let reference = OciReference::from("org/team/repo:v1.0");
998        assert_eq!(reference.registry, "ghcr.io");
999        assert_eq!(reference.package, "org/team/repo");
1000        assert_eq!(reference.tag, "v1.0");
1001    }
1002
1003    #[test]
1004    fn test_oci_reference_from_str_digest_with_prefix() {
1005        let reference = OciReference::from("ghcr.io/org/repo@sha256:abc123");
1006        assert_eq!(reference.registry, "ghcr.io");
1007        assert_eq!(reference.package, "org/repo");
1008        assert_eq!(reference.tag, "sha256:abc123");
1009    }
1010
1011    #[test]
1012    fn test_oci_reference_clone() {
1013        let ref1 = OciReference::from("org/repo:tag");
1014        let ref2 = ref1.clone();
1015        assert_eq!(ref1.registry, ref2.registry);
1016        assert_eq!(ref1.package, ref2.package);
1017        assert_eq!(ref1.tag, ref2.tag);
1018    }
1019
1020    #[test]
1021    fn test_oci_layer_title_present() {
1022        let mut annotations = std::collections::HashMap::new();
1023        annotations.insert(
1024            "org.opencontainers.image.title".to_string(),
1025            "myfile.tar.gz".to_string(),
1026        );
1027
1028        let layer = OciLayer {
1029            media_type: "application/vnd.oci.image.layer.v1.tar".to_string(),
1030            digest: "sha256:abc123".to_string(),
1031            size: 1024,
1032            annotations,
1033        };
1034
1035        assert_eq!(layer.title(), Some("myfile.tar.gz"));
1036    }
1037
1038    #[test]
1039    fn test_oci_layer_title_absent() {
1040        let layer = OciLayer {
1041            media_type: "application/vnd.oci.image.layer.v1.tar".to_string(),
1042            digest: "sha256:abc123".to_string(),
1043            size: 1024,
1044            annotations: std::collections::HashMap::new(),
1045        };
1046
1047        assert_eq!(layer.title(), None);
1048    }
1049
1050    #[test]
1051    fn test_oci_layer_clone() {
1052        let mut annotations = std::collections::HashMap::new();
1053        annotations.insert("key".to_string(), "value".to_string());
1054
1055        let layer1 = OciLayer {
1056            media_type: "type".to_string(),
1057            digest: "digest".to_string(),
1058            size: 100,
1059            annotations,
1060        };
1061
1062        let layer2 = layer1.clone();
1063        assert_eq!(layer1.media_type, layer2.media_type);
1064        assert_eq!(layer1.digest, layer2.digest);
1065        assert_eq!(layer1.size, layer2.size);
1066    }
1067
1068    #[test]
1069    fn test_oci_download_new() {
1070        let dl = OciDownload::new("org/repo:tag");
1071        assert_eq!(dl.reference.registry, "ghcr.io");
1072        assert_eq!(dl.reference.package, "org/repo");
1073        assert_eq!(dl.reference.tag, "tag");
1074        assert_eq!(dl.parallel, 1);
1075        assert!(!dl.extract);
1076    }
1077
1078    #[test]
1079    fn test_oci_download_builder_pattern() {
1080        let dl = OciDownload::new("org/repo:tag")
1081            .api("https://custom.registry/v2")
1082            .output("downloads")
1083            .extract(true)
1084            .extract_to("/tmp/extract")
1085            .parallel(4);
1086
1087        assert_eq!(dl.api, "https://custom.registry/v2");
1088        assert_eq!(dl.output, Some("downloads".to_string()));
1089        assert!(dl.extract);
1090        assert_eq!(dl.extract_to, Some(PathBuf::from("/tmp/extract")));
1091        assert_eq!(dl.parallel, 4);
1092    }
1093
1094    #[test]
1095    fn test_oci_download_parallel_clamped() {
1096        let dl = OciDownload::new("org/repo:tag").parallel(0);
1097        assert_eq!(dl.parallel, 1);
1098
1099        let dl = OciDownload::new("org/repo:tag").parallel(100);
1100        assert_eq!(dl.parallel, 100);
1101    }
1102
1103    #[test]
1104    fn test_oci_manifest_deserialize() {
1105        let json = r#"{
1106            "mediaType": "application/vnd.oci.image.manifest.v1+json",
1107            "config": {
1108                "mediaType": "application/vnd.oci.image.config.v1+json",
1109                "digest": "sha256:config123",
1110                "size": 512
1111            },
1112            "layers": [
1113                {
1114                    "mediaType": "application/vnd.oci.image.layer.v1.tar",
1115                    "digest": "sha256:layer123",
1116                    "size": 1024,
1117                    "annotations": {
1118                        "org.opencontainers.image.title": "file.tar.gz"
1119                    }
1120                }
1121            ]
1122        }"#;
1123
1124        let manifest: OciManifest = serde_json::from_str(json).unwrap();
1125        assert_eq!(
1126            manifest.media_type,
1127            "application/vnd.oci.image.manifest.v1+json"
1128        );
1129        assert_eq!(manifest.config.digest, "sha256:config123");
1130        assert_eq!(manifest.layers.len(), 1);
1131        assert_eq!(manifest.layers[0].title(), Some("file.tar.gz"));
1132    }
1133
1134    #[test]
1135    fn test_oci_layer_deserialize_without_annotations() {
1136        let json = r#"{
1137            "mediaType": "application/vnd.oci.image.layer.v1.tar",
1138            "digest": "sha256:abc",
1139            "size": 2048
1140        }"#;
1141
1142        let layer: OciLayer = serde_json::from_str(json).unwrap();
1143        assert_eq!(layer.media_type, "application/vnd.oci.image.layer.v1.tar");
1144        assert_eq!(layer.digest, "sha256:abc");
1145        assert_eq!(layer.size, 2048);
1146        assert!(layer.annotations.is_empty());
1147    }
1148
1149    #[test]
1150    fn test_oci_config_deserialize() {
1151        let json = r#"{
1152            "mediaType": "application/vnd.oci.image.config.v1+json",
1153            "digest": "sha256:xyz789",
1154            "size": 256
1155        }"#;
1156
1157        let config: OciConfig = serde_json::from_str(json).unwrap();
1158        assert_eq!(
1159            config.media_type,
1160            "application/vnd.oci.image.config.v1+json"
1161        );
1162        assert_eq!(config.digest, "sha256:xyz789");
1163        assert_eq!(config.size, 256);
1164    }
1165}