1use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
2
3use crate::catalog::LayerCatalog;
4use crate::popularity::PopularityMap;
5use crate::spec::ResolvedPackage;
6
7#[derive(Debug, Clone, PartialEq, Eq, Default)]
13pub enum PackingStrategy {
14 #[default]
16 OnePerPackage,
17 PopularityBased { max_layers: usize },
23 CatalogAware { max_layers: usize },
35}
36
37#[derive(Debug, Clone)]
39pub struct LayerGroup {
40 pub packages: Vec<ResolvedPackage>,
41}
42
43pub fn pack(
48 packages: &[ResolvedPackage],
49 strategy: &PackingStrategy,
50 popularity: Option<&PopularityMap>,
51 catalog: Option<&LayerCatalog>,
52) -> Vec<LayerGroup> {
53 match strategy {
54 PackingStrategy::OnePerPackage => packages
55 .iter()
56 .map(|p| LayerGroup {
57 packages: vec![p.clone()],
58 })
59 .collect(),
60
61 PackingStrategy::PopularityBased { max_layers } => {
62 pack_by_popularity(packages, *max_layers, popularity)
63 }
64
65 PackingStrategy::CatalogAware { max_layers } => {
66 pack_by_catalog(packages, *max_layers, catalog)
67 }
68 }
69}
70
71fn pack_by_popularity(
84 packages: &[ResolvedPackage],
85 max_layers: usize,
86 popularity: Option<&PopularityMap>,
87) -> Vec<LayerGroup> {
88 if max_layers < 3 || packages.is_empty() {
89 return vec![LayerGroup {
90 packages: packages.to_vec(),
91 }];
92 }
93
94 let mut sorted = packages.to_vec();
96 sorted.sort_by(|a, b| {
97 let sa = popularity.map(|p| p.score(&a.name)).unwrap_or(0);
98 let sb = popularity.map(|p| p.score(&b.name)).unwrap_or(0);
99 sb.cmp(&sa).then(a.name.cmp(&b.name))
100 });
101
102 let solo_count = max_layers.saturating_sub(2).min(sorted.len());
103 let (solo, tail) = sorted.split_at(solo_count);
104
105 let mut groups: Vec<LayerGroup> = solo
106 .iter()
107 .map(|p| LayerGroup {
108 packages: vec![p.clone()],
109 })
110 .collect();
111
112 if !tail.is_empty() {
113 groups.push(LayerGroup {
114 packages: tail.to_vec(),
115 });
116 }
117 groups
118}
119
120fn pack_by_catalog(
131 packages: &[ResolvedPackage],
132 max_layers: usize,
133 catalog: Option<&LayerCatalog>,
134) -> Vec<LayerGroup> {
135 if max_layers < 3 || packages.is_empty() {
136 return vec![LayerGroup {
137 packages: packages.to_vec(),
138 }];
139 }
140
141 let mut sorted = packages.to_vec();
142 sorted.sort_by(|a, b| {
143 let ca = catalog
144 .and_then(|c| c.get(&a.name, &a.version, &a.build))
145 .map(|e| e.count)
146 .unwrap_or(0);
147 let cb = catalog
148 .and_then(|c| c.get(&b.name, &b.version, &b.build))
149 .map(|e| e.count)
150 .unwrap_or(0);
151 cb.cmp(&ca).then(a.name.cmp(&b.name))
153 });
154
155 let solo_count = max_layers.saturating_sub(2).min(sorted.len());
156 let (solo, tail) = sorted.split_at(solo_count);
157
158 let mut groups: Vec<LayerGroup> = solo
159 .iter()
160 .map(|p| LayerGroup {
161 packages: vec![p.clone()],
162 })
163 .collect();
164
165 if !tail.is_empty() {
166 groups.push(LayerGroup {
167 packages: tail.to_vec(),
168 });
169 }
170 groups
171}
172
173pub fn placeholder_descriptor(pkg: &ResolvedPackage) -> LayerDescriptor {
177 LayerDescriptor {
178 digest: String::new(),
179 size: 0,
180 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
181 conda_package: Some(CondaPackagePin {
182 name: pkg.name.clone(),
183 version: pkg.version.clone(),
184 build: pkg.build.clone(),
185 channel: pkg.channel.clone(),
186 sha256: pkg.sha256.clone(),
187 }),
188 }
189}
190
191#[cfg(test)]
192mod tests {
193 use super::*;
194
195 fn pkg(name: &str) -> ResolvedPackage {
196 crate::spec::ResolvedPackage {
197 name: name.into(),
198 version: "1.0.0".into(),
199 build: "h0_0".into(),
200 channel: "conda-forge".into(),
201 url: format!("https://example.com/{name}.conda"),
202 sha256: "abc".into(),
203 filename: format!("{name}-1.0.0-h0_0.conda"),
204 depends: vec![],
205 }
206 }
207
208 #[test]
209 fn one_per_package_gives_n_groups() {
210 let pkgs = vec![pkg("openssl"), pkg("zlib"), pkg("samtools")];
211 let groups = pack(&pkgs, &PackingStrategy::OnePerPackage, None, None);
212 assert_eq!(groups.len(), 3);
213 assert_eq!(groups[0].packages[0].name, "openssl");
214 }
215
216 #[test]
217 fn popularity_packing_respects_max_layers() {
218 let pkgs: Vec<_> = (0..10).map(|i| pkg(&format!("pkg{i:02}"))).collect();
219 let groups = pack(
220 &pkgs,
221 &PackingStrategy::PopularityBased { max_layers: 5 },
222 None,
223 None,
224 );
225 assert_eq!(groups.len(), 4);
227 assert_eq!(groups.last().unwrap().packages.len(), 7); }
229
230 #[test]
231 fn popularity_packing_degenerate_small_input() {
232 let pkgs = vec![pkg("samtools")];
233 let groups = pack(
234 &pkgs,
235 &PackingStrategy::PopularityBased { max_layers: 64 },
236 None,
237 None,
238 );
239 assert_eq!(groups.len(), 1);
240 assert_eq!(groups[0].packages[0].name, "samtools");
241 }
242
243 #[test]
244 fn popular_packages_placed_before_rare_ones() {
245 let mut pop = PopularityMap::new();
246 for _ in 0..10 {
248 pop.record_tool(&["openssl".into()]);
249 }
250 for _ in 0..3 {
251 pop.record_tool(&["zlib".into()]);
252 }
253 pop.record_tool(&["rare".into()]);
254
255 let pkgs = vec![pkg("rare"), pkg("zlib"), pkg("openssl")];
256 let groups = pack(
257 &pkgs,
258 &PackingStrategy::PopularityBased { max_layers: 64 },
259 Some(&pop),
260 None,
261 );
262
263 assert_eq!(groups.len(), 3);
265 assert_eq!(groups[0].packages[0].name, "openssl");
266 assert_eq!(groups[1].packages[0].name, "zlib");
267 assert_eq!(groups[2].packages[0].name, "rare");
268 }
269
270 #[test]
271 fn rare_packages_land_in_long_tail() {
272 let mut pop = PopularityMap::new();
273 pop.record_tool(&["openssl".into(), "zlib".into()]);
274 pop.record_tool(&["openssl".into(), "bz2".into()]);
275
276 let pkgs = vec![
278 pkg("openssl"),
279 pkg("zlib"),
280 pkg("bz2"),
281 pkg("rare1"),
282 pkg("rare2"),
283 ];
284 let groups = pack(
285 &pkgs,
286 &PackingStrategy::PopularityBased { max_layers: 5 },
287 Some(&pop),
288 None,
289 );
290
291 assert_eq!(groups.len(), 4);
293 assert_eq!(groups[0].packages[0].name, "openssl");
294 let tail = groups.last().unwrap();
296 assert_eq!(tail.packages.len(), 2);
297 }
298
299 #[test]
300 fn packing_is_deterministic_for_same_scores() {
301 let mut pop = PopularityMap::new();
302 pop.record_tool(&["aa".into(), "bb".into(), "cc".into()]);
303
304 let pkgs = vec![pkg("cc"), pkg("aa"), pkg("bb")];
305 let groups1 = pack(
306 &pkgs,
307 &PackingStrategy::PopularityBased { max_layers: 64 },
308 Some(&pop),
309 None,
310 );
311 let groups2 = pack(
312 &pkgs,
313 &PackingStrategy::PopularityBased { max_layers: 64 },
314 Some(&pop),
315 None,
316 );
317
318 let names1: Vec<_> = groups1
319 .iter()
320 .map(|g| g.packages[0].name.as_str())
321 .collect();
322 let names2: Vec<_> = groups2
323 .iter()
324 .map(|g| g.packages[0].name.as_str())
325 .collect();
326 assert_eq!(names1, names2, "packing must be deterministic");
327 assert_eq!(names1, vec!["aa", "bb", "cc"]);
329 }
330
331 #[test]
336 fn shared_popular_packages_get_solo_layers_across_tools() {
337 const NUM_TOOLS: usize = 100;
338 const MAX_LAYERS: usize = 64;
339 const SHARED_PKGS: &[&str] = &[
340 "openssl",
341 "zlib",
342 "libgcc",
343 "libstdcxx",
344 "ncurses",
345 "xz",
346 "bzip2",
347 ];
348 const UNIQUE_SUFFIX: &str = "tool-specific-pkg";
349
350 let all_tool_packages: Vec<Vec<String>> = (0..NUM_TOOLS)
352 .map(|i| {
353 let mut pkgs: Vec<String> = SHARED_PKGS.iter().map(|s| s.to_string()).collect();
354 pkgs.push(format!("{UNIQUE_SUFFIX}-{i}"));
355 pkgs
356 })
357 .collect();
358
359 let mut pop = PopularityMap::new();
361 for tool_pkgs in &all_tool_packages {
362 pop.record_tool(tool_pkgs);
363 }
364
365 for tool_idx in [0usize, 42, 99] {
367 let pkgs: Vec<_> = all_tool_packages[tool_idx]
368 .iter()
369 .map(|name| crate::spec::ResolvedPackage {
370 name: name.clone(),
371 version: "1.0.0".into(),
372 build: "h0_0".into(),
373 channel: "conda-forge".into(),
374 url: format!("https://example.com/{name}.conda"),
375 sha256: format!("sha256-{name}"),
376 filename: format!("{name}-1.0.0-h0_0.conda"),
377 depends: vec![],
378 })
379 .collect();
380
381 let groups = pack(
382 &pkgs,
383 &PackingStrategy::PopularityBased {
384 max_layers: MAX_LAYERS,
385 },
386 Some(&pop),
387 None,
388 );
389
390 for shared in SHARED_PKGS {
392 let solo = groups
393 .iter()
394 .any(|g| g.packages.len() == 1 && g.packages[0].name == *shared);
395 assert!(
396 solo,
397 "shared package '{}' must get its own layer in tool-{tool_idx}",
398 shared
399 );
400 }
401 }
402 }
403
404 #[test]
407 fn shared_package_has_same_solo_group_across_tools() {
408 let mut pop = PopularityMap::new();
409 pop.record_tool(&["openssl".into(), "samtools".into()]);
410 pop.record_tool(&["openssl".into(), "bwa".into()]);
411
412 let samtools_pkgs = vec![pkg("openssl"), pkg("samtools")];
413 let bwa_pkgs = vec![pkg("openssl"), pkg("bwa")];
414
415 let groups_s = pack(
416 &samtools_pkgs,
417 &PackingStrategy::PopularityBased { max_layers: 64 },
418 Some(&pop),
419 None,
420 );
421 let groups_b = pack(
422 &bwa_pkgs,
423 &PackingStrategy::PopularityBased { max_layers: 64 },
424 Some(&pop),
425 None,
426 );
427
428 assert_eq!(groups_s[0].packages[0].name, "openssl");
430 assert_eq!(groups_b[0].packages[0].name, "openssl");
431
432 assert_eq!(
435 groups_s[0].packages[0].sha256,
436 groups_b[0].packages[0].sha256,
437 );
438 }
439
440 fn pkg_versioned(name: &str, version: &str, build: &str) -> ResolvedPackage {
441 ResolvedPackage {
442 name: name.into(),
443 version: version.into(),
444 build: build.into(),
445 channel: "conda-forge".into(),
446 url: format!("https://example.com/{name}.conda"),
447 sha256: "abc".into(),
448 filename: format!("{name}-{version}-{build}.conda"),
449 depends: vec![],
450 }
451 }
452
453 #[test]
454 fn catalog_aware_prioritizes_known_packages() {
455 let mut cat = LayerCatalog::new();
456 cat.record("openssl", "1.0.0", "h0_0", "sha256:aaa");
458 cat.record("openssl", "1.0.0", "h0_0", "sha256:aaa");
459 cat.record("zlib", "1.0.0", "h0_0", "sha256:bbb");
460
461 let pkgs = vec![
462 pkg_versioned("rare", "1.0.0", "h0_0"),
463 pkg_versioned("zlib", "1.0.0", "h0_0"),
464 pkg_versioned("openssl", "1.0.0", "h0_0"),
465 ];
466 let groups = pack(
467 &pkgs,
468 &PackingStrategy::CatalogAware { max_layers: 64 },
469 None,
470 Some(&cat),
471 );
472
473 assert_eq!(groups.len(), 3);
475 assert_eq!(groups[0].packages[0].name, "openssl"); assert_eq!(groups[1].packages[0].name, "zlib"); assert_eq!(groups[2].packages[0].name, "rare"); }
479
480 #[test]
481 fn catalog_aware_pushes_unknown_to_long_tail_when_budget_tight() {
482 let mut cat = LayerCatalog::new();
483 cat.record("openssl", "1.0.0", "h0_0", "sha256:aaa");
484 cat.record("zlib", "1.0.0", "h0_0", "sha256:bbb");
485 cat.record("libgcc", "1.0.0", "h0_0", "sha256:ccc");
486
487 let pkgs = vec![
489 pkg_versioned("rare1", "1.0.0", "h0_0"),
490 pkg_versioned("rare2", "1.0.0", "h0_0"),
491 pkg_versioned("openssl", "1.0.0", "h0_0"),
492 pkg_versioned("zlib", "1.0.0", "h0_0"),
493 pkg_versioned("libgcc", "1.0.0", "h0_0"),
494 ];
495 let groups = pack(
496 &pkgs,
497 &PackingStrategy::CatalogAware { max_layers: 5 },
498 None,
499 Some(&cat),
500 );
501
502 assert_eq!(groups.len(), 4);
504 let solo_names: Vec<_> = groups[..3]
505 .iter()
506 .map(|g| g.packages[0].name.as_str())
507 .collect();
508 assert!(solo_names.contains(&"openssl"));
509 assert!(solo_names.contains(&"zlib"));
510 assert!(solo_names.contains(&"libgcc"));
511 let tail = groups.last().unwrap();
512 assert_eq!(tail.packages.len(), 2);
513 let tail_names: Vec<_> = tail.packages.iter().map(|p| p.name.as_str()).collect();
514 assert!(tail_names.contains(&"rare1"));
515 assert!(tail_names.contains(&"rare2"));
516 }
517}