microsandbox_core/management/
image.rs

1//! Container image management for Microsandbox.
2//!
3//! This module provides functionality for managing container images from various
4//! registries. It supports pulling images from Docker and Sandboxes.io registries,
5//! handling image layers, and managing the local image cache.
6
7use crate::{
8    management::db::{self, OCI_DB_MIGRATOR},
9    oci::{DockerRegistry, OciRegistryPull, Reference},
10    MicrosandboxError, MicrosandboxResult,
11};
12#[cfg(feature = "cli")]
13use flate2::read::GzDecoder;
14use futures::future;
15#[cfg(feature = "cli")]
16use indicatif::{ProgressBar, ProgressStyle};
17#[cfg(feature = "cli")]
18use microsandbox_utils::term::{self, MULTI_PROGRESS};
19use microsandbox_utils::{env, EXTRACTED_LAYER_SUFFIX, LAYERS_SUBDIR, OCI_DB_FILENAME};
20use sqlx::{Pool, Sqlite};
21#[cfg(feature = "cli")]
22use std::io::{Read, Result as IoResult};
23use std::path::{Path, PathBuf};
24#[cfg(feature = "cli")]
25use tar::Archive;
26use tempfile::tempdir;
27use tokio::fs;
28#[cfg(not(feature = "cli"))]
29use tokio::process::Command;
30#[cfg(feature = "cli")]
31use tokio::task::spawn_blocking;
32
33//--------------------------------------------------------------------------------------------------
34// Constants
35//--------------------------------------------------------------------------------------------------
36
37/// The domain name for the Docker registry.
38const DOCKER_REGISTRY: &str = "docker.io";
39
40/// The domain name for the Sandboxes registry.
41const SANDBOXES_REGISTRY: &str = "sandboxes.io";
42
43#[cfg(feature = "cli")]
44/// Spinner message used for extracting layers.
45const EXTRACT_LAYERS_MSG: &str = "Extracting layers";
46
47//--------------------------------------------------------------------------------------------------
48// Functions
49//--------------------------------------------------------------------------------------------------
50
51/// Pulls an image or image group from a supported registry (Docker or Sandboxes.io).
52///
53/// This function handles pulling container images from different registries based on the provided
54/// parameters. It supports both single image pulls and image group pulls (for Sandboxes.io registry only).
55///
56/// For Sandboxes.io registry:
57/// - Library repository images are pulled from Docker registry for compatibility
58/// - Other namespaces are also pulled from Docker registry with a warning about potential future changes
59///
60/// ## Arguments
61///
62/// * `name` - The reference to the image or image group to pull
63/// * `image` - If true, indicates that a single image should be pulled
64/// * `image_group` - If true, indicates that an image group should be pulled (Sandboxes.io only)
65/// * `layer_path` - The path to store the layer files
66///
67/// ## Errors
68///
69/// Returns an error in the following cases:
70/// * Both `image` and `image_group` are true (invalid combination)
71/// * Image group pull is requested for a non-Sandboxes.io registry
72/// * Unsupported registry is specified
73/// * Registry-specific pull operations fail
74///
75/// # Examples
76///
77/// ```no_run
78/// use microsandbox_core::management::image;
79/// use microsandbox_core::oci::Reference;
80/// use std::path::PathBuf;
81///
82/// # #[tokio::main]
83/// # async fn main() -> anyhow::Result<()> {
84/// // Pull a single image from Docker registry
85/// image::pull("docker.io/library/ubuntu:latest".parse().unwrap(), true, false, None).await?;
86///
87/// // Pull an image from Sandboxes.io registry
88/// image::pull("sandboxes.io/library/alpine:latest".parse().unwrap(), true, false, None).await?;
89///
90/// // Pull an image from the default registry (when no registry is specified in the reference)
91/// image::pull("nginx:latest".parse().unwrap(), true, false, None).await?;
92///
93/// // You can set the OCI_REGISTRY_DOMAIN environment variable to specify your default registry
94/// std::env::set_var("OCI_REGISTRY_DOMAIN", "docker.io");
95/// image::pull("alpine:latest".parse().unwrap(), true, false, None).await?;
96///
97/// // Pull an image from Docker registry and store the layers in a custom directory
98/// image::pull("docker.io/library/ubuntu:latest".parse().unwrap(), true, false, Some(PathBuf::from("/custom/path"))).await?;
99/// # Ok(())
100/// # }
101/// ```
102pub async fn pull(
103    name: Reference,
104    image: bool,
105    image_group: bool,
106    layer_path: Option<PathBuf>,
107) -> MicrosandboxResult<()> {
108    // Both cannot be true
109    if image && image_group {
110        return Err(MicrosandboxError::InvalidArgument(
111            "both image and image_group cannot be true".to_string(),
112        ));
113    }
114
115    if image_group {
116        return Err(MicrosandboxError::InvalidArgument(
117            "image group pull is currently not supported".to_string(),
118        ));
119    }
120
121    // Single image pull mode (default if both flags are false, or if image is true)
122    let registry = name.to_string().split('/').next().unwrap_or("").to_string();
123    let temp_download_dir = tempdir()?.into_path();
124
125    tracing::info!(
126        "temporary download directory: {}",
127        temp_download_dir.display()
128    );
129
130    if registry == DOCKER_REGISTRY {
131        pull_from_docker_registry(&name, &temp_download_dir, layer_path).await
132    } else if registry == SANDBOXES_REGISTRY {
133        pull_from_sandboxes_registry(&name, &temp_download_dir, layer_path).await
134    } else {
135        Err(MicrosandboxError::InvalidArgument(format!(
136            "Unsupported registry: {}",
137            registry
138        )))
139    }
140}
141
142/// Pulls a single image from the Docker registry.
143///
144/// ## Arguments
145///
146/// * `image` - The reference to the Docker image to pull
147/// * `download_dir` - The directory to download the image layers to
148/// * `layer_path` - Optional custom path to store layers
149///
150/// ## Errors
151///
152/// Returns an error if:
153/// * Failed to create temporary directories
154/// * Failed to initialize Docker registry client
155/// * Failed to pull the image from Docker registry
156pub async fn pull_from_docker_registry(
157    image: &Reference,
158    download_dir: impl AsRef<Path>,
159    layer_path: Option<PathBuf>,
160) -> MicrosandboxResult<()> {
161    let download_dir = download_dir.as_ref();
162    let microsandbox_home_path = env::get_microsandbox_home_path();
163    let db_path = microsandbox_home_path.join(OCI_DB_FILENAME);
164
165    // Use custom layer_path if specified, otherwise use default microsandbox layers directory
166    let layers_dir = match layer_path {
167        Some(path) => path,
168        None => microsandbox_home_path.join(LAYERS_SUBDIR),
169    };
170
171    // Create layers directory if it doesn't exist
172    fs::create_dir_all(&layers_dir).await?;
173
174    let docker_registry = DockerRegistry::new(download_dir, &db_path).await?;
175
176    // Get or create a connection pool to the database
177    let pool = db::get_or_create_pool(&db_path, &OCI_DB_MIGRATOR).await?;
178
179    // Check if we need to pull the image
180    if check_image_layers(&pool, image, &layers_dir).await? {
181        tracing::info!("image {} and all its layers exist, skipping pull", image);
182        return Ok(());
183    }
184
185    docker_registry
186        .pull_image(image.get_repository(), image.get_selector().clone())
187        .await?;
188
189    // Find and extract layers in parallel
190    let layer_paths = collect_layer_files(download_dir).await?;
191
192    #[cfg(feature = "cli")]
193    let extract_layers_sp = term::create_spinner(
194        EXTRACT_LAYERS_MSG.to_string(),
195        None,
196        Some(layer_paths.len() as u64),
197    );
198
199    let extraction_futures: Vec<_> = layer_paths
200        .into_iter()
201        .map(|path| {
202            let layers_dir = layers_dir.clone();
203            #[cfg(feature = "cli")]
204            let extract_layers_sp = extract_layers_sp.clone();
205            async move {
206                let result = extract_layer(path, &layers_dir).await;
207                #[cfg(feature = "cli")]
208                extract_layers_sp.inc(1);
209                result
210            }
211        })
212        .collect();
213
214    // Wait for all extractions to complete
215    for result in future::join_all(extraction_futures).await {
216        result?;
217    }
218
219    #[cfg(feature = "cli")]
220    extract_layers_sp.finish();
221
222    Ok(())
223}
224
225/// Pulls a single image from the Sandboxes.io registry.
226///
227/// For library repository images, this function delegates to `pull_from_docker_registry` for compatibility.
228/// For other namespaces, it also uses Docker registry but displays a warning about potential future changes.
229///
230/// ## Arguments
231///
232/// * `image` - The reference to the Sandboxes.io image to pull
233/// * `download_dir` - The directory to download the image layers to
234/// * `layer_path` - Optional custom path to store layers
235///
236/// ## Errors
237///
238/// Returns an error if the underlying Docker registry pull fails
239pub async fn pull_from_sandboxes_registry(
240    image: &Reference,
241    download_dir: impl AsRef<Path>,
242    layer_path: Option<PathBuf>,
243) -> MicrosandboxResult<()> {
244    // Check if this is a library repository image
245    let repository = image.get_repository();
246
247    // Create a Docker reference string using the original repository but with docker.io registry
248    // Format: docker.io/repository:tag
249    let docker_ref_str = format!(
250        "{}/{}",
251        DOCKER_REGISTRY,
252        image
253            .to_string()
254            .split('/')
255            .skip(1)
256            .collect::<Vec<&str>>()
257            .join("/")
258    );
259    let docker_reference: Reference = docker_ref_str.parse()?;
260
261    if repository.starts_with("library/") {
262        tracing::info!("pulling library image from Docker registry for compatibility");
263    } else {
264        tracing::warn!(
265            "Non-library namespace image requested from Sandboxes registry: {}",
266            repository
267        );
268        tracing::warn!(
269            "Currently using Docker registry for compatibility, but namespace mappings may change in the future"
270        );
271        tracing::info!(
272            "To ensure consistent behavior, consider setting OCI_REGISTRY_DOMAIN=docker.io if you want to use Docker registry consistently"
273        );
274    }
275
276    pull_from_docker_registry(&docker_reference, download_dir, layer_path).await
277}
278
279/// Pulls an image group from the Sandboxes.io registry.
280///
281/// ## Arguments
282///
283/// * `group` - The reference to the image group to pull
284/// ## Errors
285///
286/// Returns an error if:
287/// * Sandboxes registry image group pull is not implemented
288pub async fn pull_group_from_sandboxes_registry(_group: &Reference) -> MicrosandboxResult<()> {
289    return Err(MicrosandboxError::NotImplemented(
290        "Sandboxes registry image group pull is not implemented".to_string(),
291    ));
292}
293
294//--------------------------------------------------------------------------------------------------
295// Functions: Helpers
296//--------------------------------------------------------------------------------------------------
297
298/// Checks if all layers for an image exist in both the database and the layers directory.
299///
300/// ## Arguments
301///
302/// * `pool` - The database connection pool
303/// * `image` - The reference to the image to check
304/// * `layers_dir` - The directory where layers should be stored
305///
306/// ## Returns
307///
308/// Returns Ok(true) if all layers exist and are valid, Ok(false) if any layers are missing
309/// or invalid. Any errors during the check process will return Ok(false) with a warning log.
310async fn check_image_layers(
311    pool: &Pool<Sqlite>,
312    image: &Reference,
313    layers_dir: impl AsRef<Path>,
314) -> MicrosandboxResult<bool> {
315    let layers_dir = layers_dir.as_ref();
316
317    // Check if the image exists in the database
318    match db::image_exists(pool, &image.to_string()).await {
319        Ok(true) => {
320            // Image exists, get all layer digests for this image
321            match db::get_image_layer_digests(pool, &image.to_string()).await {
322                Ok(layer_digests) => {
323                    tracing::info!("layer_digests: {:?}", layer_digests);
324                    if layer_digests.is_empty() {
325                        tracing::warn!("no layers found for image {}", image);
326                        return Ok(false);
327                    }
328
329                    // Check if all layers exist in the layers directory
330                    for digest in &layer_digests {
331                        let layer_path =
332                            layers_dir.join(format!("{}.{}", digest, EXTRACTED_LAYER_SUFFIX));
333                        if !layer_path.exists() {
334                            tracing::warn!("layer {} not found in layers directory", digest);
335                            return Ok(false);
336                        }
337
338                        // Also check that the layer directory actually has content
339                        let mut read_dir = fs::read_dir(&layer_path).await?;
340                        let dir_empty = read_dir.next_entry().await?.is_none();
341                        if dir_empty {
342                            tracing::warn!("layer {} exists but is empty", digest);
343                        }
344
345                        tracing::info!("layer {} found in layers directory", digest);
346                    }
347
348                    // Get the layers from database to verify database records exist for all digests
349                    let db_layers = db::get_layers_by_digest(pool, &layer_digests).await?;
350
351                    if db_layers.len() < layer_digests.len() {
352                        tracing::warn!(
353                            "some layers for image {} exist on disk but missing in db",
354                            image
355                        );
356                        return Ok(false);
357                    }
358
359                    tracing::info!("all layers for image {} exist and are valid", image);
360                    Ok(true)
361                }
362                Err(e) => {
363                    tracing::warn!("error checking layer digests: {}, will pull image", e);
364                    Ok(false)
365                }
366            }
367        }
368        Ok(false) => {
369            tracing::warn!("image {} does not exist in db, will pull image", image);
370            Ok(false)
371        }
372        Err(e) => {
373            tracing::warn!("error checking image existence: {}, will pull image", e);
374            Ok(false)
375        }
376    }
377}
378
379/// Extracts a layer from the downloaded tar.gz file into an extracted directory.
380/// The extracted directory will be named as <layer-name>.extracted
381async fn extract_layer(
382    layer_path: impl AsRef<std::path::Path>,
383    extract_base_dir: impl AsRef<Path>,
384) -> MicrosandboxResult<()> {
385    let layer_path = layer_path.as_ref();
386    let file_name = layer_path
387        .file_name()
388        .and_then(|n| n.to_str())
389        .ok_or_else(|| MicrosandboxError::LayerHandling {
390            source: std::io::Error::new(std::io::ErrorKind::NotFound, "invalid layer file name"),
391            layer: layer_path.display().to_string(),
392        })?;
393
394    // Create the extraction directory with name <layer-name>.extracted
395    let extract_dir = extract_base_dir
396        .as_ref()
397        .join(format!("{}.{}", file_name, EXTRACTED_LAYER_SUFFIX));
398
399    // Check if the layer is already extracted
400    if extract_dir.exists() {
401        // Check if the directory has content (not empty)
402        let mut read_dir =
403            fs::read_dir(&extract_dir)
404                .await
405                .map_err(|e| MicrosandboxError::LayerHandling {
406                    source: e,
407                    layer: file_name.to_string(),
408                })?;
409
410        if read_dir.next_entry().await?.is_some() {
411            tracing::info!(
412                "layer {} already extracted at {}, skipping extraction",
413                file_name,
414                extract_dir.display()
415            );
416            return Ok(());
417        }
418    }
419
420    fs::create_dir_all(&extract_dir)
421        .await
422        .map_err(|e| MicrosandboxError::LayerHandling {
423            source: e,
424            layer: file_name.to_string(),
425        })?;
426
427    tracing::info!(
428        "extracting layer {} to {}",
429        file_name,
430        extract_dir.display()
431    );
432
433    #[cfg(feature = "cli")]
434    struct ProgressReader<R> {
435        inner: R,
436        bar: ProgressBar,
437    }
438    #[cfg(feature = "cli")]
439    impl<R: Read> Read for ProgressReader<R> {
440        fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
441            let n = self.inner.read(buf)?;
442            if n > 0 {
443                self.bar.inc(n as u64);
444            }
445            Ok(n)
446        }
447    }
448
449    #[cfg(feature = "cli")]
450    {
451        let total_bytes = fs::metadata(layer_path).await?.len();
452        let pb = MULTI_PROGRESS.add(ProgressBar::new(total_bytes));
453        pb.set_style(
454            ProgressStyle::with_template(
455                "{prefix:.bold.dim} {bar:40.green/green.dim} {bytes:.bold}/{total_bytes:.dim}",
456            )
457            .unwrap()
458            .progress_chars("=+-"),
459        );
460        let digest_short = if let Some(rest) = file_name.strip_prefix("sha256:") {
461            &rest[..8.min(rest.len())]
462        } else {
463            &file_name[..8.min(file_name.len())]
464        };
465        pb.set_prefix(format!("{}", digest_short));
466
467        let layer_path_clone = layer_path.to_path_buf();
468        let extract_dir_clone = extract_dir.clone();
469        let pb_clone = pb.clone();
470
471        spawn_blocking(move || -> MicrosandboxResult<()> {
472            let file = std::fs::File::open(&layer_path_clone)?;
473            let reader = ProgressReader {
474                inner: file,
475                bar: pb_clone.clone(),
476            };
477            let decoder = GzDecoder::new(reader);
478            let mut archive = Archive::new(decoder);
479            archive.unpack(&extract_dir_clone)?;
480            Ok(())
481        })
482        .await
483        .map_err(|e| MicrosandboxError::LayerExtraction(format!("{:?}", e)))??;
484
485        pb.finish_and_clear();
486    }
487
488    #[cfg(not(feature = "cli"))]
489    {
490        // Fallback to system tar when cli disabled
491        let output = Command::new("tar")
492            .arg("-xzf")
493            .arg(layer_path)
494            .arg("-C")
495            .arg(&extract_dir)
496            .output()
497            .await
498            .map_err(|e| MicrosandboxError::LayerHandling {
499                source: e,
500                layer: file_name.to_string(),
501            })?;
502
503        if !output.status.success() {
504            let error_msg = String::from_utf8_lossy(&output.stderr);
505            return Err(MicrosandboxError::LayerExtraction(format!(
506                "Failed to extract layer {}: {}",
507                file_name, error_msg
508            )));
509        }
510    }
511
512    tracing::info!(
513        "successfully extracted layer {} to {}",
514        file_name,
515        extract_dir.display()
516    );
517    Ok(())
518}
519
520/// Collects all layer files in the given directory that start with "sha256:".
521async fn collect_layer_files(dir: impl AsRef<Path>) -> MicrosandboxResult<Vec<PathBuf>> {
522    let mut layer_paths = Vec::new();
523    let mut read_dir = fs::read_dir(dir).await?;
524
525    while let Ok(Some(entry)) = read_dir.next_entry().await {
526        let path = entry.path();
527        if path.is_file() {
528            if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
529                if file_name.starts_with("sha256:") {
530                    layer_paths.push(path.clone());
531                }
532            }
533        }
534    }
535
536    tracing::info!("found {} layers to extract", layer_paths.len());
537    Ok(layer_paths)
538}
539
540//--------------------------------------------------------------------------------------------------
541// Tests
542//--------------------------------------------------------------------------------------------------
543
544#[cfg(test)]
545mod tests {
546    use super::*;
547    use tempfile::TempDir;
548
549    #[test_log::test(tokio::test)]
550    #[ignore = "makes network requests to Docker registry to pull an image"]
551    async fn test_image_pull_from_docker_registry() -> MicrosandboxResult<()> {
552        // Create temporary directories for test
553        let temp_dir = TempDir::new()?;
554        let microsandbox_home = temp_dir.path().join("microsandbox_home");
555        let download_dir = temp_dir.path().join("download");
556        fs::create_dir_all(&microsandbox_home).await?;
557        fs::create_dir_all(&download_dir).await?;
558
559        // Set up test environment
560        std::env::set_var("MICROSANDBOX_HOME", microsandbox_home.to_str().unwrap());
561
562        // Create test image reference (using a small image for faster tests)
563        let image_ref: Reference = "docker.io/library/nginx:stable-alpine".parse().unwrap();
564
565        // Call the function under test
566        pull_from_docker_registry(&image_ref, &download_dir, None).await?;
567
568        // Initialize database connection for verification
569        let db_path = microsandbox_home.join(OCI_DB_FILENAME);
570        let pool = db::get_or_create_pool(&db_path, &OCI_DB_MIGRATOR).await?;
571
572        // Verify image exists in database
573        let image_exists = db::image_exists(&pool, &image_ref.to_string()).await?;
574        assert!(image_exists, "Image should exist in database");
575
576        // Verify layers directory exists and contains extracted layers
577        let layers_dir = microsandbox_home.join(LAYERS_SUBDIR);
578        assert!(layers_dir.exists(), "Layers directory should exist");
579
580        // Verify extracted layer directories exist
581        let mut entries = fs::read_dir(&layers_dir).await?;
582        let mut found_extracted_layers = false;
583        while let Some(entry) = entries.next_entry().await? {
584            if entry
585                .file_name()
586                .to_string_lossy()
587                .ends_with(EXTRACTED_LAYER_SUFFIX)
588            {
589                found_extracted_layers = true;
590                assert!(
591                    entry.path().is_dir(),
592                    "Extracted layer path should be a directory"
593                );
594            }
595        }
596        assert!(
597            found_extracted_layers,
598            "Should have found extracted layer directories"
599        );
600
601        // Verify nginx files exist in the extracted layers
602        helper::verify_nginx_files(&layers_dir).await?;
603
604        Ok(())
605    }
606}
607
608#[cfg(test)]
609mod helper {
610    use super::*;
611
612    /// Helper function to verify that all expected nginx files exist in the extracted layers
613    pub(super) async fn verify_nginx_files(layers_dir: impl AsRef<Path>) -> MicrosandboxResult<()> {
614        let mut found_nginx_conf = false;
615        let mut found_default_conf = false;
616        let mut found_nginx_binary = false;
617
618        // Check each extracted layer directory for nginx files
619        let mut entries = fs::read_dir(layers_dir).await?;
620        while let Some(entry) = entries.next_entry().await? {
621            if !entry
622                .file_name()
623                .to_string_lossy()
624                .ends_with(EXTRACTED_LAYER_SUFFIX)
625            {
626                continue;
627            }
628
629            let layer_path = entry.path();
630            tracing::info!("checking layer: {}", layer_path.display());
631
632            // Check for nginx.conf
633            let nginx_conf = layer_path.join("etc").join("nginx").join("nginx.conf");
634            if nginx_conf.exists() {
635                found_nginx_conf = true;
636                tracing::info!("found nginx.conf at {}", nginx_conf.display());
637            }
638
639            // Check for default.conf
640            let default_conf = layer_path
641                .join("etc")
642                .join("nginx")
643                .join("conf.d")
644                .join("default.conf");
645            if default_conf.exists() {
646                found_default_conf = true;
647                tracing::info!("found default.conf at {}", default_conf.display());
648            }
649
650            // Check for nginx binary
651            let nginx_binary = layer_path.join("usr").join("sbin").join("nginx");
652            if nginx_binary.exists() {
653                found_nginx_binary = true;
654                tracing::info!("found nginx binary at {}", nginx_binary.display());
655            }
656
657            // If we found all files, we can stop checking
658            if found_nginx_conf && found_default_conf && found_nginx_binary {
659                break;
660            }
661        }
662
663        // Assert that we found all the expected files
664        assert!(
665            found_nginx_conf,
666            "nginx.conf should exist in one of the layers"
667        );
668        assert!(
669            found_default_conf,
670            "default.conf should exist in one of the layers"
671        );
672        assert!(
673            found_nginx_binary,
674            "nginx binary should exist in one of the layers"
675        );
676
677        Ok(())
678    }
679}