use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
use crate::catalog::LayerCatalog;
use crate::popularity::PopularityMap;
use crate::spec::ResolvedPackage;
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub enum PackingStrategy {
#[default]
OnePerPackage,
PopularityBased { max_layers: usize },
CatalogAware { max_layers: usize },
}
#[derive(Debug, Clone)]
pub struct LayerGroup {
pub packages: Vec<ResolvedPackage>,
}
pub fn pack(
packages: &[ResolvedPackage],
strategy: &PackingStrategy,
popularity: Option<&PopularityMap>,
catalog: Option<&LayerCatalog>,
) -> Vec<LayerGroup> {
match strategy {
PackingStrategy::OnePerPackage => packages
.iter()
.map(|p| LayerGroup {
packages: vec![p.clone()],
})
.collect(),
PackingStrategy::PopularityBased { max_layers } => {
pack_by_popularity(packages, *max_layers, popularity)
}
PackingStrategy::CatalogAware { max_layers } => {
pack_by_catalog(packages, *max_layers, catalog)
}
}
}
fn pack_by_popularity(
packages: &[ResolvedPackage],
max_layers: usize,
popularity: Option<&PopularityMap>,
) -> Vec<LayerGroup> {
if max_layers < 3 || packages.is_empty() {
return vec![LayerGroup {
packages: packages.to_vec(),
}];
}
let mut sorted = packages.to_vec();
sorted.sort_by(|a, b| {
let sa = popularity.map(|p| p.score(&a.name)).unwrap_or(0);
let sb = popularity.map(|p| p.score(&b.name)).unwrap_or(0);
sb.cmp(&sa).then(a.name.cmp(&b.name))
});
let solo_count = max_layers.saturating_sub(2).min(sorted.len());
let (solo, tail) = sorted.split_at(solo_count);
let mut groups: Vec<LayerGroup> = solo
.iter()
.map(|p| LayerGroup {
packages: vec![p.clone()],
})
.collect();
if !tail.is_empty() {
groups.push(LayerGroup {
packages: tail.to_vec(),
});
}
groups
}
fn pack_by_catalog(
packages: &[ResolvedPackage],
max_layers: usize,
catalog: Option<&LayerCatalog>,
) -> Vec<LayerGroup> {
if max_layers < 3 || packages.is_empty() {
return vec![LayerGroup {
packages: packages.to_vec(),
}];
}
let mut sorted = packages.to_vec();
sorted.sort_by(|a, b| {
let ca = catalog
.and_then(|c| c.get(&a.name, &a.version, &a.build))
.map(|e| e.count)
.unwrap_or(0);
let cb = catalog
.and_then(|c| c.get(&b.name, &b.version, &b.build))
.map(|e| e.count)
.unwrap_or(0);
cb.cmp(&ca).then(a.name.cmp(&b.name))
});
let solo_count = max_layers.saturating_sub(2).min(sorted.len());
let (solo, tail) = sorted.split_at(solo_count);
let mut groups: Vec<LayerGroup> = solo
.iter()
.map(|p| LayerGroup {
packages: vec![p.clone()],
})
.collect();
if !tail.is_empty() {
groups.push(LayerGroup {
packages: tail.to_vec(),
});
}
groups
}
pub fn placeholder_descriptor(pkg: &ResolvedPackage) -> LayerDescriptor {
LayerDescriptor {
digest: String::new(),
size: 0,
media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
conda_package: Some(CondaPackagePin {
name: pkg.name.clone(),
version: pkg.version.clone(),
build: pkg.build.clone(),
channel: pkg.channel.clone(),
sha256: pkg.sha256.clone(),
}),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn pkg(name: &str) -> ResolvedPackage {
crate::spec::ResolvedPackage {
name: name.into(),
version: "1.0.0".into(),
build: "h0_0".into(),
channel: "conda-forge".into(),
url: format!("https://example.com/{name}.conda"),
sha256: "abc".into(),
filename: format!("{name}-1.0.0-h0_0.conda"),
depends: vec![],
}
}
#[test]
fn one_per_package_gives_n_groups() {
let pkgs = vec![pkg("openssl"), pkg("zlib"), pkg("samtools")];
let groups = pack(&pkgs, &PackingStrategy::OnePerPackage, None, None);
assert_eq!(groups.len(), 3);
assert_eq!(groups[0].packages[0].name, "openssl");
}
#[test]
fn popularity_packing_respects_max_layers() {
let pkgs: Vec<_> = (0..10).map(|i| pkg(&format!("pkg{i:02}"))).collect();
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 5 },
None,
None,
);
assert_eq!(groups.len(), 4);
assert_eq!(groups.last().unwrap().packages.len(), 7); }
#[test]
fn popularity_packing_degenerate_small_input() {
let pkgs = vec![pkg("samtools")];
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
None,
None,
);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].packages[0].name, "samtools");
}
#[test]
fn popular_packages_placed_before_rare_ones() {
let mut pop = PopularityMap::new();
for _ in 0..10 {
pop.record_tool(&["openssl".into()]);
}
for _ in 0..3 {
pop.record_tool(&["zlib".into()]);
}
pop.record_tool(&["rare".into()]);
let pkgs = vec![pkg("rare"), pkg("zlib"), pkg("openssl")];
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
None,
);
assert_eq!(groups.len(), 3);
assert_eq!(groups[0].packages[0].name, "openssl");
assert_eq!(groups[1].packages[0].name, "zlib");
assert_eq!(groups[2].packages[0].name, "rare");
}
#[test]
fn rare_packages_land_in_long_tail() {
let mut pop = PopularityMap::new();
pop.record_tool(&["openssl".into(), "zlib".into()]);
pop.record_tool(&["openssl".into(), "bz2".into()]);
let pkgs = vec![
pkg("openssl"),
pkg("zlib"),
pkg("bz2"),
pkg("rare1"),
pkg("rare2"),
];
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 5 },
Some(&pop),
None,
);
assert_eq!(groups.len(), 4);
assert_eq!(groups[0].packages[0].name, "openssl");
let tail = groups.last().unwrap();
assert_eq!(tail.packages.len(), 2);
}
#[test]
fn packing_is_deterministic_for_same_scores() {
let mut pop = PopularityMap::new();
pop.record_tool(&["aa".into(), "bb".into(), "cc".into()]);
let pkgs = vec![pkg("cc"), pkg("aa"), pkg("bb")];
let groups1 = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
None,
);
let groups2 = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
None,
);
let names1: Vec<_> = groups1
.iter()
.map(|g| g.packages[0].name.as_str())
.collect();
let names2: Vec<_> = groups2
.iter()
.map(|g| g.packages[0].name.as_str())
.collect();
assert_eq!(names1, names2, "packing must be deterministic");
assert_eq!(names1, vec!["aa", "bb", "cc"]);
}
#[test]
fn shared_popular_packages_get_solo_layers_across_tools() {
const NUM_TOOLS: usize = 100;
const MAX_LAYERS: usize = 64;
const SHARED_PKGS: &[&str] = &[
"openssl",
"zlib",
"libgcc",
"libstdcxx",
"ncurses",
"xz",
"bzip2",
];
const UNIQUE_SUFFIX: &str = "tool-specific-pkg";
let all_tool_packages: Vec<Vec<String>> = (0..NUM_TOOLS)
.map(|i| {
let mut pkgs: Vec<String> = SHARED_PKGS.iter().map(|s| s.to_string()).collect();
pkgs.push(format!("{UNIQUE_SUFFIX}-{i}"));
pkgs
})
.collect();
let mut pop = PopularityMap::new();
for tool_pkgs in &all_tool_packages {
pop.record_tool(tool_pkgs);
}
for tool_idx in [0usize, 42, 99] {
let pkgs: Vec<_> = all_tool_packages[tool_idx]
.iter()
.map(|name| crate::spec::ResolvedPackage {
name: name.clone(),
version: "1.0.0".into(),
build: "h0_0".into(),
channel: "conda-forge".into(),
url: format!("https://example.com/{name}.conda"),
sha256: format!("sha256-{name}"),
filename: format!("{name}-1.0.0-h0_0.conda"),
depends: vec![],
})
.collect();
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased {
max_layers: MAX_LAYERS,
},
Some(&pop),
None,
);
for shared in SHARED_PKGS {
let solo = groups
.iter()
.any(|g| g.packages.len() == 1 && g.packages[0].name == *shared);
assert!(
solo,
"shared package '{}' must get its own layer in tool-{tool_idx}",
shared
);
}
}
}
#[test]
fn shared_package_has_same_solo_group_across_tools() {
let mut pop = PopularityMap::new();
pop.record_tool(&["openssl".into(), "samtools".into()]);
pop.record_tool(&["openssl".into(), "bwa".into()]);
let samtools_pkgs = vec![pkg("openssl"), pkg("samtools")];
let bwa_pkgs = vec![pkg("openssl"), pkg("bwa")];
let groups_s = pack(
&samtools_pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
None,
);
let groups_b = pack(
&bwa_pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
None,
);
assert_eq!(groups_s[0].packages[0].name, "openssl");
assert_eq!(groups_b[0].packages[0].name, "openssl");
assert_eq!(
groups_s[0].packages[0].sha256,
groups_b[0].packages[0].sha256,
);
}
fn pkg_versioned(name: &str, version: &str, build: &str) -> ResolvedPackage {
ResolvedPackage {
name: name.into(),
version: version.into(),
build: build.into(),
channel: "conda-forge".into(),
url: format!("https://example.com/{name}.conda"),
sha256: "abc".into(),
filename: format!("{name}-{version}-{build}.conda"),
depends: vec![],
}
}
#[test]
fn catalog_aware_prioritizes_known_packages() {
let mut cat = LayerCatalog::new();
cat.record("openssl", "1.0.0", "h0_0", "sha256:aaa");
cat.record("openssl", "1.0.0", "h0_0", "sha256:aaa");
cat.record("zlib", "1.0.0", "h0_0", "sha256:bbb");
let pkgs = vec![
pkg_versioned("rare", "1.0.0", "h0_0"),
pkg_versioned("zlib", "1.0.0", "h0_0"),
pkg_versioned("openssl", "1.0.0", "h0_0"),
];
let groups = pack(
&pkgs,
&PackingStrategy::CatalogAware { max_layers: 64 },
None,
Some(&cat),
);
assert_eq!(groups.len(), 3);
assert_eq!(groups[0].packages[0].name, "openssl"); assert_eq!(groups[1].packages[0].name, "zlib"); assert_eq!(groups[2].packages[0].name, "rare"); }
#[test]
fn catalog_aware_pushes_unknown_to_long_tail_when_budget_tight() {
let mut cat = LayerCatalog::new();
cat.record("openssl", "1.0.0", "h0_0", "sha256:aaa");
cat.record("zlib", "1.0.0", "h0_0", "sha256:bbb");
cat.record("libgcc", "1.0.0", "h0_0", "sha256:ccc");
let pkgs = vec![
pkg_versioned("rare1", "1.0.0", "h0_0"),
pkg_versioned("rare2", "1.0.0", "h0_0"),
pkg_versioned("openssl", "1.0.0", "h0_0"),
pkg_versioned("zlib", "1.0.0", "h0_0"),
pkg_versioned("libgcc", "1.0.0", "h0_0"),
];
let groups = pack(
&pkgs,
&PackingStrategy::CatalogAware { max_layers: 5 },
None,
Some(&cat),
);
assert_eq!(groups.len(), 4);
let solo_names: Vec<_> = groups[..3]
.iter()
.map(|g| g.packages[0].name.as_str())
.collect();
assert!(solo_names.contains(&"openssl"));
assert!(solo_names.contains(&"zlib"));
assert!(solo_names.contains(&"libgcc"));
let tail = groups.last().unwrap();
assert_eq!(tail.packages.len(), 2);
let tail_names: Vec<_> = tail.packages.iter().map(|p| p.name.as_str()).collect();
assert!(tail_names.contains(&"rare1"));
assert!(tail_names.contains(&"rare2"));
}
}