1use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
2
3use crate::popularity::PopularityMap;
4use crate::spec::ResolvedPackage;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum PackingStrategy {
13 OnePerPackage,
15 PopularityBased { max_layers: usize },
17}
18
19impl Default for PackingStrategy {
20 fn default() -> Self {
21 Self::OnePerPackage
22 }
23}
24
25#[derive(Debug, Clone)]
27pub struct LayerGroup {
28 pub packages: Vec<ResolvedPackage>,
29}
30
31pub fn pack(
41 packages: &[ResolvedPackage],
42 strategy: &PackingStrategy,
43 popularity: Option<&PopularityMap>,
44) -> Vec<LayerGroup> {
45 match strategy {
46 PackingStrategy::OnePerPackage => packages
47 .iter()
48 .map(|p| LayerGroup {
49 packages: vec![p.clone()],
50 })
51 .collect(),
52
53 PackingStrategy::PopularityBased { max_layers } => {
54 pack_by_popularity(packages, *max_layers, popularity)
55 }
56 }
57}
58
59fn pack_by_popularity(
72 packages: &[ResolvedPackage],
73 max_layers: usize,
74 popularity: Option<&PopularityMap>,
75) -> Vec<LayerGroup> {
76 if max_layers < 3 || packages.is_empty() {
77 return vec![LayerGroup {
78 packages: packages.to_vec(),
79 }];
80 }
81
82 let mut sorted = packages.to_vec();
84 sorted.sort_by(|a, b| {
85 let sa = popularity.map(|p| p.score(&a.name)).unwrap_or(0);
86 let sb = popularity.map(|p| p.score(&b.name)).unwrap_or(0);
87 sb.cmp(&sa).then(a.name.cmp(&b.name))
88 });
89
90 let solo_count = max_layers.saturating_sub(2).min(sorted.len());
91 let (solo, tail) = sorted.split_at(solo_count);
92
93 let mut groups: Vec<LayerGroup> = solo
94 .iter()
95 .map(|p| LayerGroup {
96 packages: vec![p.clone()],
97 })
98 .collect();
99
100 if !tail.is_empty() {
101 groups.push(LayerGroup {
102 packages: tail.to_vec(),
103 });
104 }
105 groups
106}
107
108pub fn placeholder_descriptor(pkg: &ResolvedPackage) -> LayerDescriptor {
112 LayerDescriptor {
113 digest: String::new(),
114 size: 0,
115 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
116 conda_package: Some(CondaPackagePin {
117 name: pkg.name.clone(),
118 version: pkg.version.clone(),
119 build: pkg.build.clone(),
120 channel: pkg.channel.clone(),
121 sha256: pkg.sha256.clone(),
122 }),
123 }
124}
125
126#[cfg(test)]
127mod tests {
128 use super::*;
129
130 fn pkg(name: &str) -> ResolvedPackage {
131 crate::spec::ResolvedPackage {
132 name: name.into(),
133 version: "1.0.0".into(),
134 build: "h0_0".into(),
135 channel: "conda-forge".into(),
136 url: format!("https://example.com/{name}.conda"),
137 sha256: "abc".into(),
138 filename: format!("{name}-1.0.0-h0_0.conda"),
139 }
140 }
141
142 #[test]
143 fn one_per_package_gives_n_groups() {
144 let pkgs = vec![pkg("openssl"), pkg("zlib"), pkg("samtools")];
145 let groups = pack(&pkgs, &PackingStrategy::OnePerPackage, None);
146 assert_eq!(groups.len(), 3);
147 assert_eq!(groups[0].packages[0].name, "openssl");
148 }
149
150 #[test]
151 fn popularity_packing_respects_max_layers() {
152 let pkgs: Vec<_> = (0..10).map(|i| pkg(&format!("pkg{i:02}"))).collect();
153 let groups = pack(
154 &pkgs,
155 &PackingStrategy::PopularityBased { max_layers: 5 },
156 None,
157 );
158 assert_eq!(groups.len(), 4);
160 assert_eq!(groups.last().unwrap().packages.len(), 7); }
162
163 #[test]
164 fn popularity_packing_degenerate_small_input() {
165 let pkgs = vec![pkg("samtools")];
166 let groups = pack(
167 &pkgs,
168 &PackingStrategy::PopularityBased { max_layers: 64 },
169 None,
170 );
171 assert_eq!(groups.len(), 1);
172 assert_eq!(groups[0].packages[0].name, "samtools");
173 }
174
175 #[test]
176 fn popular_packages_placed_before_rare_ones() {
177 let mut pop = PopularityMap::new();
178 for _ in 0..10 {
180 pop.record_tool(&["openssl".into()]);
181 }
182 for _ in 0..3 {
183 pop.record_tool(&["zlib".into()]);
184 }
185 pop.record_tool(&["rare".into()]);
186
187 let pkgs = vec![pkg("rare"), pkg("zlib"), pkg("openssl")];
188 let groups = pack(
189 &pkgs,
190 &PackingStrategy::PopularityBased { max_layers: 64 },
191 Some(&pop),
192 );
193
194 assert_eq!(groups.len(), 3);
196 assert_eq!(groups[0].packages[0].name, "openssl");
197 assert_eq!(groups[1].packages[0].name, "zlib");
198 assert_eq!(groups[2].packages[0].name, "rare");
199 }
200
201 #[test]
202 fn rare_packages_land_in_long_tail() {
203 let mut pop = PopularityMap::new();
204 pop.record_tool(&["openssl".into(), "zlib".into()]);
205 pop.record_tool(&["openssl".into(), "bz2".into()]);
206
207 let pkgs = vec![pkg("openssl"), pkg("zlib"), pkg("bz2"), pkg("rare1"), pkg("rare2")];
209 let groups = pack(
210 &pkgs,
211 &PackingStrategy::PopularityBased { max_layers: 5 },
212 Some(&pop),
213 );
214
215 assert_eq!(groups.len(), 4);
217 assert_eq!(groups[0].packages[0].name, "openssl");
218 let tail = groups.last().unwrap();
220 assert_eq!(tail.packages.len(), 2);
221 }
222
223 #[test]
224 fn packing_is_deterministic_for_same_scores() {
225 let mut pop = PopularityMap::new();
226 pop.record_tool(&["aa".into(), "bb".into(), "cc".into()]);
227
228 let pkgs = vec![pkg("cc"), pkg("aa"), pkg("bb")];
229 let groups1 = pack(
230 &pkgs,
231 &PackingStrategy::PopularityBased { max_layers: 64 },
232 Some(&pop),
233 );
234 let groups2 = pack(
235 &pkgs,
236 &PackingStrategy::PopularityBased { max_layers: 64 },
237 Some(&pop),
238 );
239
240 let names1: Vec<_> = groups1.iter().map(|g| g.packages[0].name.as_str()).collect();
241 let names2: Vec<_> = groups2.iter().map(|g| g.packages[0].name.as_str()).collect();
242 assert_eq!(names1, names2, "packing must be deterministic");
243 assert_eq!(names1, vec!["aa", "bb", "cc"]);
245 }
246
247 #[test]
252 fn shared_popular_packages_get_solo_layers_across_tools() {
253 const NUM_TOOLS: usize = 100;
254 const MAX_LAYERS: usize = 64;
255 const SHARED_PKGS: &[&str] = &[
256 "openssl", "zlib", "libgcc", "libstdcxx", "ncurses", "xz", "bzip2",
257 ];
258 const UNIQUE_SUFFIX: &str = "tool-specific-pkg";
259
260 let all_tool_packages: Vec<Vec<String>> = (0..NUM_TOOLS)
262 .map(|i| {
263 let mut pkgs: Vec<String> = SHARED_PKGS.iter().map(|s| s.to_string()).collect();
264 pkgs.push(format!("{UNIQUE_SUFFIX}-{i}"));
265 pkgs
266 })
267 .collect();
268
269 let mut pop = PopularityMap::new();
271 for tool_pkgs in &all_tool_packages {
272 pop.record_tool(tool_pkgs);
273 }
274
275 for tool_idx in [0usize, 42, 99] {
277 let pkgs: Vec<_> = all_tool_packages[tool_idx]
278 .iter()
279 .map(|name| crate::spec::ResolvedPackage {
280 name: name.clone(),
281 version: "1.0.0".into(),
282 build: "h0_0".into(),
283 channel: "conda-forge".into(),
284 url: format!("https://example.com/{name}.conda"),
285 sha256: format!("sha256-{name}"),
286 filename: format!("{name}-1.0.0-h0_0.conda"),
287 })
288 .collect();
289
290 let groups = pack(
291 &pkgs,
292 &PackingStrategy::PopularityBased { max_layers: MAX_LAYERS },
293 Some(&pop),
294 );
295
296 for shared in SHARED_PKGS {
298 let solo = groups.iter().any(|g| {
299 g.packages.len() == 1 && g.packages[0].name == *shared
300 });
301 assert!(
302 solo,
303 "shared package '{}' must get its own layer in tool-{tool_idx}",
304 shared
305 );
306 }
307 }
308 }
309
310 #[test]
313 fn shared_package_has_same_solo_group_across_tools() {
314 let mut pop = PopularityMap::new();
315 pop.record_tool(&["openssl".into(), "samtools".into()]);
316 pop.record_tool(&["openssl".into(), "bwa".into()]);
317
318 let samtools_pkgs = vec![pkg("openssl"), pkg("samtools")];
319 let bwa_pkgs = vec![pkg("openssl"), pkg("bwa")];
320
321 let groups_s = pack(
322 &samtools_pkgs,
323 &PackingStrategy::PopularityBased { max_layers: 64 },
324 Some(&pop),
325 );
326 let groups_b = pack(
327 &bwa_pkgs,
328 &PackingStrategy::PopularityBased { max_layers: 64 },
329 Some(&pop),
330 );
331
332 assert_eq!(groups_s[0].packages[0].name, "openssl");
334 assert_eq!(groups_b[0].packages[0].name, "openssl");
335
336 assert_eq!(
339 groups_s[0].packages[0].sha256,
340 groups_b[0].packages[0].sha256,
341 );
342 }
343}