use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
use crate::popularity::PopularityMap;
use crate::spec::ResolvedPackage;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PackingStrategy {
OnePerPackage,
PopularityBased { max_layers: usize },
}
impl Default for PackingStrategy {
fn default() -> Self {
Self::OnePerPackage
}
}
#[derive(Debug, Clone)]
pub struct LayerGroup {
pub packages: Vec<ResolvedPackage>,
}
pub fn pack(
packages: &[ResolvedPackage],
strategy: &PackingStrategy,
popularity: Option<&PopularityMap>,
) -> Vec<LayerGroup> {
match strategy {
PackingStrategy::OnePerPackage => packages
.iter()
.map(|p| LayerGroup {
packages: vec![p.clone()],
})
.collect(),
PackingStrategy::PopularityBased { max_layers } => {
pack_by_popularity(packages, *max_layers, popularity)
}
}
}
fn pack_by_popularity(
packages: &[ResolvedPackage],
max_layers: usize,
popularity: Option<&PopularityMap>,
) -> Vec<LayerGroup> {
if max_layers < 3 || packages.is_empty() {
return vec![LayerGroup {
packages: packages.to_vec(),
}];
}
let mut sorted = packages.to_vec();
sorted.sort_by(|a, b| {
let sa = popularity.map(|p| p.score(&a.name)).unwrap_or(0);
let sb = popularity.map(|p| p.score(&b.name)).unwrap_or(0);
sb.cmp(&sa).then(a.name.cmp(&b.name))
});
let solo_count = max_layers.saturating_sub(2).min(sorted.len());
let (solo, tail) = sorted.split_at(solo_count);
let mut groups: Vec<LayerGroup> = solo
.iter()
.map(|p| LayerGroup {
packages: vec![p.clone()],
})
.collect();
if !tail.is_empty() {
groups.push(LayerGroup {
packages: tail.to_vec(),
});
}
groups
}
pub fn placeholder_descriptor(pkg: &ResolvedPackage) -> LayerDescriptor {
LayerDescriptor {
digest: String::new(),
size: 0,
media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
conda_package: Some(CondaPackagePin {
name: pkg.name.clone(),
version: pkg.version.clone(),
build: pkg.build.clone(),
channel: pkg.channel.clone(),
sha256: pkg.sha256.clone(),
}),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn pkg(name: &str) -> ResolvedPackage {
crate::spec::ResolvedPackage {
name: name.into(),
version: "1.0.0".into(),
build: "h0_0".into(),
channel: "conda-forge".into(),
url: format!("https://example.com/{name}.conda"),
sha256: "abc".into(),
filename: format!("{name}-1.0.0-h0_0.conda"),
}
}
#[test]
fn one_per_package_gives_n_groups() {
let pkgs = vec![pkg("openssl"), pkg("zlib"), pkg("samtools")];
let groups = pack(&pkgs, &PackingStrategy::OnePerPackage, None);
assert_eq!(groups.len(), 3);
assert_eq!(groups[0].packages[0].name, "openssl");
}
#[test]
fn popularity_packing_respects_max_layers() {
let pkgs: Vec<_> = (0..10).map(|i| pkg(&format!("pkg{i:02}"))).collect();
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 5 },
None,
);
assert_eq!(groups.len(), 4);
assert_eq!(groups.last().unwrap().packages.len(), 7); }
#[test]
fn popularity_packing_degenerate_small_input() {
let pkgs = vec![pkg("samtools")];
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
None,
);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].packages[0].name, "samtools");
}
#[test]
fn popular_packages_placed_before_rare_ones() {
let mut pop = PopularityMap::new();
for _ in 0..10 {
pop.record_tool(&["openssl".into()]);
}
for _ in 0..3 {
pop.record_tool(&["zlib".into()]);
}
pop.record_tool(&["rare".into()]);
let pkgs = vec![pkg("rare"), pkg("zlib"), pkg("openssl")];
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
);
assert_eq!(groups.len(), 3);
assert_eq!(groups[0].packages[0].name, "openssl");
assert_eq!(groups[1].packages[0].name, "zlib");
assert_eq!(groups[2].packages[0].name, "rare");
}
#[test]
fn rare_packages_land_in_long_tail() {
let mut pop = PopularityMap::new();
pop.record_tool(&["openssl".into(), "zlib".into()]);
pop.record_tool(&["openssl".into(), "bz2".into()]);
let pkgs = vec![pkg("openssl"), pkg("zlib"), pkg("bz2"), pkg("rare1"), pkg("rare2")];
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 5 },
Some(&pop),
);
assert_eq!(groups.len(), 4);
assert_eq!(groups[0].packages[0].name, "openssl");
let tail = groups.last().unwrap();
assert_eq!(tail.packages.len(), 2);
}
#[test]
fn packing_is_deterministic_for_same_scores() {
let mut pop = PopularityMap::new();
pop.record_tool(&["aa".into(), "bb".into(), "cc".into()]);
let pkgs = vec![pkg("cc"), pkg("aa"), pkg("bb")];
let groups1 = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
);
let groups2 = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
);
let names1: Vec<_> = groups1.iter().map(|g| g.packages[0].name.as_str()).collect();
let names2: Vec<_> = groups2.iter().map(|g| g.packages[0].name.as_str()).collect();
assert_eq!(names1, names2, "packing must be deterministic");
assert_eq!(names1, vec!["aa", "bb", "cc"]);
}
#[test]
fn shared_popular_packages_get_solo_layers_across_tools() {
const NUM_TOOLS: usize = 100;
const MAX_LAYERS: usize = 64;
const SHARED_PKGS: &[&str] = &[
"openssl", "zlib", "libgcc", "libstdcxx", "ncurses", "xz", "bzip2",
];
const UNIQUE_SUFFIX: &str = "tool-specific-pkg";
let all_tool_packages: Vec<Vec<String>> = (0..NUM_TOOLS)
.map(|i| {
let mut pkgs: Vec<String> = SHARED_PKGS.iter().map(|s| s.to_string()).collect();
pkgs.push(format!("{UNIQUE_SUFFIX}-{i}"));
pkgs
})
.collect();
let mut pop = PopularityMap::new();
for tool_pkgs in &all_tool_packages {
pop.record_tool(tool_pkgs);
}
for tool_idx in [0usize, 42, 99] {
let pkgs: Vec<_> = all_tool_packages[tool_idx]
.iter()
.map(|name| crate::spec::ResolvedPackage {
name: name.clone(),
version: "1.0.0".into(),
build: "h0_0".into(),
channel: "conda-forge".into(),
url: format!("https://example.com/{name}.conda"),
sha256: format!("sha256-{name}"),
filename: format!("{name}-1.0.0-h0_0.conda"),
})
.collect();
let groups = pack(
&pkgs,
&PackingStrategy::PopularityBased { max_layers: MAX_LAYERS },
Some(&pop),
);
for shared in SHARED_PKGS {
let solo = groups.iter().any(|g| {
g.packages.len() == 1 && g.packages[0].name == *shared
});
assert!(
solo,
"shared package '{}' must get its own layer in tool-{tool_idx}",
shared
);
}
}
}
#[test]
fn shared_package_has_same_solo_group_across_tools() {
let mut pop = PopularityMap::new();
pop.record_tool(&["openssl".into(), "samtools".into()]);
pop.record_tool(&["openssl".into(), "bwa".into()]);
let samtools_pkgs = vec![pkg("openssl"), pkg("samtools")];
let bwa_pkgs = vec![pkg("openssl"), pkg("bwa")];
let groups_s = pack(
&samtools_pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
);
let groups_b = pack(
&bwa_pkgs,
&PackingStrategy::PopularityBased { max_layers: 64 },
Some(&pop),
);
assert_eq!(groups_s[0].packages[0].name, "openssl");
assert_eq!(groups_b[0].packages[0].name, "openssl");
assert_eq!(
groups_s[0].packages[0].sha256,
groups_b[0].packages[0].sha256,
);
}
}