1use bv_core::lockfile::{CondaPackagePin, LayerDescriptor};
2
3use crate::popularity::PopularityMap;
4use crate::spec::ResolvedPackage;
5
6#[derive(Debug, Clone, PartialEq, Eq, Default)]
12pub enum PackingStrategy {
13 #[default]
15 OnePerPackage,
16 PopularityBased { max_layers: usize },
18}
19
20#[derive(Debug, Clone)]
22pub struct LayerGroup {
23 pub packages: Vec<ResolvedPackage>,
24}
25
26pub fn pack(
36 packages: &[ResolvedPackage],
37 strategy: &PackingStrategy,
38 popularity: Option<&PopularityMap>,
39) -> Vec<LayerGroup> {
40 match strategy {
41 PackingStrategy::OnePerPackage => packages
42 .iter()
43 .map(|p| LayerGroup {
44 packages: vec![p.clone()],
45 })
46 .collect(),
47
48 PackingStrategy::PopularityBased { max_layers } => {
49 pack_by_popularity(packages, *max_layers, popularity)
50 }
51 }
52}
53
54fn pack_by_popularity(
67 packages: &[ResolvedPackage],
68 max_layers: usize,
69 popularity: Option<&PopularityMap>,
70) -> Vec<LayerGroup> {
71 if max_layers < 3 || packages.is_empty() {
72 return vec![LayerGroup {
73 packages: packages.to_vec(),
74 }];
75 }
76
77 let mut sorted = packages.to_vec();
79 sorted.sort_by(|a, b| {
80 let sa = popularity.map(|p| p.score(&a.name)).unwrap_or(0);
81 let sb = popularity.map(|p| p.score(&b.name)).unwrap_or(0);
82 sb.cmp(&sa).then(a.name.cmp(&b.name))
83 });
84
85 let solo_count = max_layers.saturating_sub(2).min(sorted.len());
86 let (solo, tail) = sorted.split_at(solo_count);
87
88 let mut groups: Vec<LayerGroup> = solo
89 .iter()
90 .map(|p| LayerGroup {
91 packages: vec![p.clone()],
92 })
93 .collect();
94
95 if !tail.is_empty() {
96 groups.push(LayerGroup {
97 packages: tail.to_vec(),
98 });
99 }
100 groups
101}
102
103pub fn placeholder_descriptor(pkg: &ResolvedPackage) -> LayerDescriptor {
107 LayerDescriptor {
108 digest: String::new(),
109 size: 0,
110 media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
111 conda_package: Some(CondaPackagePin {
112 name: pkg.name.clone(),
113 version: pkg.version.clone(),
114 build: pkg.build.clone(),
115 channel: pkg.channel.clone(),
116 sha256: pkg.sha256.clone(),
117 }),
118 }
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124
125 fn pkg(name: &str) -> ResolvedPackage {
126 crate::spec::ResolvedPackage {
127 name: name.into(),
128 version: "1.0.0".into(),
129 build: "h0_0".into(),
130 channel: "conda-forge".into(),
131 url: format!("https://example.com/{name}.conda"),
132 sha256: "abc".into(),
133 filename: format!("{name}-1.0.0-h0_0.conda"),
134 depends: vec![],
135 }
136 }
137
138 #[test]
139 fn one_per_package_gives_n_groups() {
140 let pkgs = vec![pkg("openssl"), pkg("zlib"), pkg("samtools")];
141 let groups = pack(&pkgs, &PackingStrategy::OnePerPackage, None);
142 assert_eq!(groups.len(), 3);
143 assert_eq!(groups[0].packages[0].name, "openssl");
144 }
145
146 #[test]
147 fn popularity_packing_respects_max_layers() {
148 let pkgs: Vec<_> = (0..10).map(|i| pkg(&format!("pkg{i:02}"))).collect();
149 let groups = pack(
150 &pkgs,
151 &PackingStrategy::PopularityBased { max_layers: 5 },
152 None,
153 );
154 assert_eq!(groups.len(), 4);
156 assert_eq!(groups.last().unwrap().packages.len(), 7); }
158
159 #[test]
160 fn popularity_packing_degenerate_small_input() {
161 let pkgs = vec![pkg("samtools")];
162 let groups = pack(
163 &pkgs,
164 &PackingStrategy::PopularityBased { max_layers: 64 },
165 None,
166 );
167 assert_eq!(groups.len(), 1);
168 assert_eq!(groups[0].packages[0].name, "samtools");
169 }
170
171 #[test]
172 fn popular_packages_placed_before_rare_ones() {
173 let mut pop = PopularityMap::new();
174 for _ in 0..10 {
176 pop.record_tool(&["openssl".into()]);
177 }
178 for _ in 0..3 {
179 pop.record_tool(&["zlib".into()]);
180 }
181 pop.record_tool(&["rare".into()]);
182
183 let pkgs = vec![pkg("rare"), pkg("zlib"), pkg("openssl")];
184 let groups = pack(
185 &pkgs,
186 &PackingStrategy::PopularityBased { max_layers: 64 },
187 Some(&pop),
188 );
189
190 assert_eq!(groups.len(), 3);
192 assert_eq!(groups[0].packages[0].name, "openssl");
193 assert_eq!(groups[1].packages[0].name, "zlib");
194 assert_eq!(groups[2].packages[0].name, "rare");
195 }
196
197 #[test]
198 fn rare_packages_land_in_long_tail() {
199 let mut pop = PopularityMap::new();
200 pop.record_tool(&["openssl".into(), "zlib".into()]);
201 pop.record_tool(&["openssl".into(), "bz2".into()]);
202
203 let pkgs = vec![
205 pkg("openssl"),
206 pkg("zlib"),
207 pkg("bz2"),
208 pkg("rare1"),
209 pkg("rare2"),
210 ];
211 let groups = pack(
212 &pkgs,
213 &PackingStrategy::PopularityBased { max_layers: 5 },
214 Some(&pop),
215 );
216
217 assert_eq!(groups.len(), 4);
219 assert_eq!(groups[0].packages[0].name, "openssl");
220 let tail = groups.last().unwrap();
222 assert_eq!(tail.packages.len(), 2);
223 }
224
225 #[test]
226 fn packing_is_deterministic_for_same_scores() {
227 let mut pop = PopularityMap::new();
228 pop.record_tool(&["aa".into(), "bb".into(), "cc".into()]);
229
230 let pkgs = vec![pkg("cc"), pkg("aa"), pkg("bb")];
231 let groups1 = pack(
232 &pkgs,
233 &PackingStrategy::PopularityBased { max_layers: 64 },
234 Some(&pop),
235 );
236 let groups2 = pack(
237 &pkgs,
238 &PackingStrategy::PopularityBased { max_layers: 64 },
239 Some(&pop),
240 );
241
242 let names1: Vec<_> = groups1
243 .iter()
244 .map(|g| g.packages[0].name.as_str())
245 .collect();
246 let names2: Vec<_> = groups2
247 .iter()
248 .map(|g| g.packages[0].name.as_str())
249 .collect();
250 assert_eq!(names1, names2, "packing must be deterministic");
251 assert_eq!(names1, vec!["aa", "bb", "cc"]);
253 }
254
255 #[test]
260 fn shared_popular_packages_get_solo_layers_across_tools() {
261 const NUM_TOOLS: usize = 100;
262 const MAX_LAYERS: usize = 64;
263 const SHARED_PKGS: &[&str] = &[
264 "openssl",
265 "zlib",
266 "libgcc",
267 "libstdcxx",
268 "ncurses",
269 "xz",
270 "bzip2",
271 ];
272 const UNIQUE_SUFFIX: &str = "tool-specific-pkg";
273
274 let all_tool_packages: Vec<Vec<String>> = (0..NUM_TOOLS)
276 .map(|i| {
277 let mut pkgs: Vec<String> = SHARED_PKGS.iter().map(|s| s.to_string()).collect();
278 pkgs.push(format!("{UNIQUE_SUFFIX}-{i}"));
279 pkgs
280 })
281 .collect();
282
283 let mut pop = PopularityMap::new();
285 for tool_pkgs in &all_tool_packages {
286 pop.record_tool(tool_pkgs);
287 }
288
289 for tool_idx in [0usize, 42, 99] {
291 let pkgs: Vec<_> = all_tool_packages[tool_idx]
292 .iter()
293 .map(|name| crate::spec::ResolvedPackage {
294 name: name.clone(),
295 version: "1.0.0".into(),
296 build: "h0_0".into(),
297 channel: "conda-forge".into(),
298 url: format!("https://example.com/{name}.conda"),
299 sha256: format!("sha256-{name}"),
300 filename: format!("{name}-1.0.0-h0_0.conda"),
301 depends: vec![],
302 })
303 .collect();
304
305 let groups = pack(
306 &pkgs,
307 &PackingStrategy::PopularityBased {
308 max_layers: MAX_LAYERS,
309 },
310 Some(&pop),
311 );
312
313 for shared in SHARED_PKGS {
315 let solo = groups
316 .iter()
317 .any(|g| g.packages.len() == 1 && g.packages[0].name == *shared);
318 assert!(
319 solo,
320 "shared package '{}' must get its own layer in tool-{tool_idx}",
321 shared
322 );
323 }
324 }
325 }
326
327 #[test]
330 fn shared_package_has_same_solo_group_across_tools() {
331 let mut pop = PopularityMap::new();
332 pop.record_tool(&["openssl".into(), "samtools".into()]);
333 pop.record_tool(&["openssl".into(), "bwa".into()]);
334
335 let samtools_pkgs = vec![pkg("openssl"), pkg("samtools")];
336 let bwa_pkgs = vec![pkg("openssl"), pkg("bwa")];
337
338 let groups_s = pack(
339 &samtools_pkgs,
340 &PackingStrategy::PopularityBased { max_layers: 64 },
341 Some(&pop),
342 );
343 let groups_b = pack(
344 &bwa_pkgs,
345 &PackingStrategy::PopularityBased { max_layers: 64 },
346 Some(&pop),
347 );
348
349 assert_eq!(groups_s[0].packages[0].name, "openssl");
351 assert_eq!(groups_b[0].packages[0].name, "openssl");
352
353 assert_eq!(
356 groups_s[0].packages[0].sha256,
357 groups_b[0].packages[0].sha256,
358 );
359 }
360}