1use std::fmt::Debug;
7
8#[cfg(feature = "native")]
9mod segment_manager;
10#[cfg(feature = "native")]
11pub use segment_manager::SegmentManager;
12
13#[derive(Debug, Clone)]
15pub struct SegmentInfo {
16 pub id: String,
18 pub num_docs: u32,
20}
21
22#[derive(Debug, Clone)]
24pub struct MergeCandidate {
25 pub segment_ids: Vec<String>,
27}
28
29pub trait MergePolicy: Send + Sync + Debug {
33 fn find_merges(&self, segments: &[SegmentInfo]) -> Vec<MergeCandidate>;
36
37 fn clone_box(&self) -> Box<dyn MergePolicy>;
39
40 fn max_segment_docs(&self) -> Option<u32> {
43 None
44 }
45}
46
47impl Clone for Box<dyn MergePolicy> {
48 fn clone(&self) -> Self {
49 self.clone_box()
50 }
51}
52
53#[derive(Debug, Clone, Default)]
55pub struct NoMergePolicy;
56
57impl MergePolicy for NoMergePolicy {
58 fn find_merges(&self, _segments: &[SegmentInfo]) -> Vec<MergeCandidate> {
59 Vec::new()
60 }
61
62 fn clone_box(&self) -> Box<dyn MergePolicy> {
63 Box::new(self.clone())
64 }
65}
66
67#[derive(Debug, Clone)]
83pub struct TieredMergePolicy {
84 pub segments_per_tier: usize,
86 pub max_merge_at_once: usize,
89 pub tier_factor: f64,
91 pub tier_floor: u32,
93 pub max_merged_docs: u32,
95
96 pub floor_segment_docs: u32,
100 pub oversized_threshold: f64,
103 pub min_growth_ratio: f64,
107 pub budget_trigger: bool,
111 pub scored_selection: bool,
114 pub max_segment_docs: u32,
117}
118
119impl Default for TieredMergePolicy {
120 fn default() -> Self {
121 Self {
122 segments_per_tier: 10,
123 max_merge_at_once: 10,
124 tier_factor: 10.0,
125 tier_floor: 1000,
126 max_merged_docs: 5_000_000,
127 floor_segment_docs: 1000,
128 oversized_threshold: 0.5,
129 min_growth_ratio: 0.0,
130 budget_trigger: false,
131 scored_selection: false,
132 max_segment_docs: 10_000_000,
133 }
134 }
135}
136
137impl TieredMergePolicy {
138 pub fn new() -> Self {
140 Self::default()
141 }
142
143 pub fn aggressive() -> Self {
149 Self {
150 segments_per_tier: 3,
151 max_merge_at_once: 10,
152 tier_factor: 10.0,
153 tier_floor: 500,
154 max_merged_docs: 10_000_000,
155 max_segment_docs: 10_000_000,
156 ..Default::default()
157 }
158 }
159
160 pub fn large_scale() -> Self {
168 Self {
169 segments_per_tier: 10,
170 max_merge_at_once: 10,
171 tier_factor: 10.0,
172 tier_floor: 50_000,
173 max_merged_docs: 20_000_000,
174 floor_segment_docs: 50_000,
175 oversized_threshold: 0.5,
176 min_growth_ratio: 0.5,
177 budget_trigger: true,
178 scored_selection: true,
179 max_segment_docs: 20_000_000,
180 }
181 }
182
183 pub fn bulk_indexing() -> Self {
188 Self {
189 segments_per_tier: 20,
190 max_merge_at_once: 20,
191 tier_factor: 10.0,
192 tier_floor: 100_000,
193 max_merged_docs: 50_000_000,
194 floor_segment_docs: 100_000,
195 oversized_threshold: 0.5,
196 min_growth_ratio: 0.75,
197 budget_trigger: true,
198 scored_selection: true,
199 max_segment_docs: 50_000_000,
200 }
201 }
202}
203
204impl TieredMergePolicy {
205 fn effective_max_docs(&self) -> u32 {
208 self.max_merged_docs.min(self.max_segment_docs)
209 }
210
211 fn compute_ideal_segment_count(&self, total_docs: u64) -> usize {
215 if total_docs == 0 {
216 return 0;
217 }
218 let floor = self.floor_segment_docs.max(1) as f64;
219 let num_tiers = ((total_docs as f64 / floor).max(1.0))
221 .log(self.tier_factor)
222 .ceil() as usize;
223 let num_tiers = num_tiers.max(1);
224 num_tiers * self.segments_per_tier
225 }
226
227 fn score_candidate(&self, group: &[usize], sorted: &[&SegmentInfo]) -> f64 {
232 let floor = self.floor_segment_docs.max(1) as f64;
233 let mut total_floored = 0.0f64;
234 let mut largest_floored = 0.0f64;
235 for &idx in group {
236 let floored = (sorted[idx].num_docs as f64).max(floor);
237 total_floored += floored;
238 if floored > largest_floored {
239 largest_floored = floored;
240 }
241 }
242 if total_floored == 0.0 {
243 return f64::MAX;
244 }
245 let skew = largest_floored / total_floored;
246 skew * total_floored.powf(0.05)
247 }
248
249 fn passes_min_growth(&self, group: &[usize], sorted: &[&SegmentInfo]) -> bool {
252 if self.min_growth_ratio <= 0.0 || group.len() < 2 {
253 return true;
254 }
255 let largest = group
256 .iter()
257 .map(|&i| sorted[i].num_docs as u64)
258 .max()
259 .unwrap_or(0);
260 let total: u64 = group.iter().map(|&i| sorted[i].num_docs as u64).sum();
261 total as f64 >= (1.0 + self.min_growth_ratio) * largest as f64
262 }
263
264 fn find_merges_greedy(&self, sorted: &[&SegmentInfo]) -> Vec<MergeCandidate> {
266 let mut candidates = Vec::new();
267 let mut used = vec![false; sorted.len()];
268 let max_ratio = self.tier_factor as u64;
269 let effective_max = self.effective_max_docs() as u64;
270
271 let mut start = 0;
272 loop {
273 while start < sorted.len() && used[start] {
274 start += 1;
275 }
276 if start >= sorted.len() {
277 break;
278 }
279
280 let mut group = vec![start];
281 let mut total_docs: u64 = sorted[start].num_docs as u64;
282
283 for j in (start + 1)..sorted.len() {
284 if used[j] {
285 continue;
286 }
287 if group.len() >= self.max_merge_at_once {
288 break;
289 }
290 let next_docs = sorted[j].num_docs as u64;
291 if total_docs + next_docs > effective_max {
292 break;
293 }
294 if next_docs > total_docs.max(1) * max_ratio {
295 break;
296 }
297 group.push(j);
298 total_docs += next_docs;
299 }
300
301 if group.len() >= self.segments_per_tier
302 && group.len() >= 2
303 && self.passes_min_growth(&group, sorted)
304 {
305 for &i in &group {
306 used[i] = true;
307 }
308 candidates.push(MergeCandidate {
309 segment_ids: group.iter().map(|&i| sorted[i].id.clone()).collect(),
310 });
311 }
312
313 start += 1;
314 }
315
316 candidates
317 }
318
319 fn find_merges_scored(&self, sorted: &[&SegmentInfo]) -> Vec<MergeCandidate> {
322 let max_ratio = self.tier_factor as u64;
323 let effective_max = self.effective_max_docs() as u64;
324
325 let mut scored_groups: Vec<(f64, Vec<usize>)> = Vec::new();
327
328 for start in 0..sorted.len() {
329 let mut group = vec![start];
330 let mut total_docs: u64 = sorted[start].num_docs as u64;
331
332 for j in (start + 1)..sorted.len() {
333 if group.len() >= self.max_merge_at_once {
334 break;
335 }
336 let next_docs = sorted[j].num_docs as u64;
337 if total_docs + next_docs > effective_max {
338 break;
339 }
340 if next_docs > total_docs.max(1) * max_ratio {
341 break;
342 }
343 group.push(j);
344 total_docs += next_docs;
345
346 if group.len() >= self.segments_per_tier
348 && group.len() >= 2
349 && self.passes_min_growth(&group, sorted)
350 {
351 let score = self.score_candidate(&group, sorted);
352 scored_groups.push((score, group.clone()));
353 }
354 }
355 }
356
357 scored_groups.sort_by(|a, b| a.0.total_cmp(&b.0));
359
360 let mut used = vec![false; sorted.len()];
362 let mut candidates = Vec::new();
363
364 for (_score, group) in scored_groups {
365 if group.iter().any(|&i| used[i]) {
366 continue;
367 }
368 for &i in &group {
369 used[i] = true;
370 }
371 candidates.push(MergeCandidate {
372 segment_ids: group.iter().map(|&i| sorted[i].id.clone()).collect(),
373 });
374 }
375
376 candidates
377 }
378}
379
380impl MergePolicy for TieredMergePolicy {
381 fn find_merges(&self, segments: &[SegmentInfo]) -> Vec<MergeCandidate> {
382 if segments.len() < 2 {
383 return Vec::new();
384 }
385
386 let effective_max = self.effective_max_docs();
388 let oversized_limit = (effective_max as f64 * self.oversized_threshold) as u64;
389 let eligible: Vec<&SegmentInfo> = segments
390 .iter()
391 .filter(|s| (s.num_docs as u64) <= oversized_limit || oversized_limit == 0)
392 .collect();
393
394 if eligible.len() < 2 {
395 return Vec::new();
396 }
397
398 if self.budget_trigger {
400 let total_docs: u64 = segments.iter().map(|s| s.num_docs as u64).sum();
401 let ideal = self.compute_ideal_segment_count(total_docs);
402 if eligible.len() <= ideal {
403 return Vec::new();
404 }
405 }
406
407 let mut sorted = eligible;
409 sorted.sort_by_key(|s| s.num_docs);
410
411 if self.scored_selection {
413 self.find_merges_scored(&sorted)
414 } else {
415 self.find_merges_greedy(&sorted)
416 }
417 }
418
419 fn clone_box(&self) -> Box<dyn MergePolicy> {
420 Box::new(self.clone())
421 }
422
423 fn max_segment_docs(&self) -> Option<u32> {
424 Some(self.max_segment_docs)
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431
432 fn compute_tier(policy: &TieredMergePolicy, num_docs: u32) -> usize {
434 if num_docs <= policy.tier_floor {
435 return 0;
436 }
437 let ratio = num_docs as f64 / policy.tier_floor as f64;
438 (ratio.log(policy.tier_factor).floor() as usize) + 1
439 }
440
441 #[test]
442 fn test_tiered_policy_compute_tier() {
443 let policy = TieredMergePolicy::default();
444
445 assert_eq!(compute_tier(&policy, 500), 0);
447 assert_eq!(compute_tier(&policy, 1000), 0);
448
449 assert_eq!(compute_tier(&policy, 1001), 1);
451 assert_eq!(compute_tier(&policy, 5000), 1);
452 assert_eq!(compute_tier(&policy, 9999), 1);
453
454 assert_eq!(compute_tier(&policy, 10000), 2);
456 assert_eq!(compute_tier(&policy, 50000), 2);
457
458 assert_eq!(compute_tier(&policy, 100000), 3);
460 }
461
462 #[test]
463 fn test_tiered_policy_no_merge_few_segments() {
464 let policy = TieredMergePolicy::default();
465
466 let segments = vec![
467 SegmentInfo {
468 id: "a".into(),
469 num_docs: 100,
470 },
471 SegmentInfo {
472 id: "b".into(),
473 num_docs: 200,
474 },
475 ];
476
477 assert!(policy.find_merges(&segments).is_empty());
478 }
479
480 #[test]
481 fn test_tiered_policy_merge_same_size() {
482 let policy = TieredMergePolicy {
483 segments_per_tier: 3,
484 ..Default::default()
485 };
486
487 let segments: Vec<_> = (0..5)
489 .map(|i| SegmentInfo {
490 id: format!("seg_{}", i),
491 num_docs: 100 + i * 10,
492 })
493 .collect();
494
495 let candidates = policy.find_merges(&segments);
496 assert_eq!(candidates.len(), 1);
497 assert_eq!(candidates[0].segment_ids.len(), 5);
498 }
499
500 #[test]
501 fn test_tiered_policy_cross_tier_promotion() {
502 let policy = TieredMergePolicy {
503 segments_per_tier: 3,
504 tier_factor: 10.0,
505 tier_floor: 1000,
506 max_merge_at_once: 20,
507 max_merged_docs: 5_000_000,
508 ..Default::default()
509 };
510
511 let mut segments: Vec<_> = (0..4)
514 .map(|i| SegmentInfo {
515 id: format!("small_{}", i),
516 num_docs: 100 + i * 10,
517 })
518 .collect();
519 for i in 0..3 {
520 segments.push(SegmentInfo {
521 id: format!("medium_{}", i),
522 num_docs: 2000 + i * 500,
523 });
524 }
525
526 let candidates = policy.find_merges(&segments);
527 assert_eq!(
528 candidates.len(),
529 1,
530 "should merge all into one cross-tier group"
531 );
532 assert_eq!(
533 candidates[0].segment_ids.len(),
534 7,
535 "all 7 segments should be in the merge"
536 );
537 }
538
539 #[test]
540 fn test_tiered_policy_ratio_guard_separates_groups() {
541 let policy = TieredMergePolicy {
542 segments_per_tier: 3,
543 tier_factor: 10.0,
544 tier_floor: 100,
545 max_merge_at_once: 20,
546 max_merged_docs: 5_000_000,
547 ..Default::default()
548 };
549
550 let mut segments: Vec<_> = (0..4)
556 .map(|i| SegmentInfo {
557 id: format!("tiny_{}", i),
558 num_docs: 10,
559 })
560 .collect();
561 for i in 0..4 {
562 segments.push(SegmentInfo {
563 id: format!("large_{}", i),
564 num_docs: 100_000 + i * 100,
565 });
566 }
567
568 let candidates = policy.find_merges(&segments);
569 assert_eq!(candidates.len(), 2, "should produce two separate groups");
570
571 assert_eq!(candidates[0].segment_ids.len(), 4);
573 assert!(candidates[0].segment_ids[0].starts_with("tiny_"));
574
575 assert_eq!(candidates[1].segment_ids.len(), 4);
577 assert!(candidates[1].segment_ids[0].starts_with("large_"));
578 }
579
580 #[test]
581 fn test_tiered_policy_small_segments_skip_to_large_group() {
582 let policy = TieredMergePolicy {
583 segments_per_tier: 3,
584 tier_factor: 10.0,
585 tier_floor: 1000,
586 max_merge_at_once: 10,
587 max_merged_docs: 5_000_000,
588 ..Default::default()
589 };
590
591 let mut segments = vec![
594 SegmentInfo {
595 id: "tiny_0".into(),
596 num_docs: 10,
597 },
598 SegmentInfo {
599 id: "tiny_1".into(),
600 num_docs: 20,
601 },
602 ];
603 for i in 0..5 {
604 segments.push(SegmentInfo {
605 id: format!("medium_{}", i),
606 num_docs: 5000 + i * 100,
607 });
608 }
609
610 let candidates = policy.find_merges(&segments);
611 assert!(
612 !candidates.is_empty(),
613 "should find a merge even though tiny segments can't form a group"
614 );
615 let total_segs: usize = candidates.iter().map(|c| c.segment_ids.len()).sum();
617 assert!(
618 total_segs >= 5,
619 "should merge at least the 5 medium segments"
620 );
621 }
622
623 #[test]
624 fn test_tiered_policy_respects_max_merged_docs() {
625 let policy = TieredMergePolicy {
626 segments_per_tier: 3,
627 max_merge_at_once: 100,
628 tier_factor: 10.0,
629 tier_floor: 1000,
630 max_merged_docs: 500,
631 ..Default::default()
632 };
633
634 let segments: Vec<_> = (0..10)
636 .map(|i| SegmentInfo {
637 id: format!("seg_{}", i),
638 num_docs: 100,
639 })
640 .collect();
641
642 let candidates = policy.find_merges(&segments);
643 for c in &candidates {
644 let total: u64 = c
645 .segment_ids
646 .iter()
647 .map(|id| segments.iter().find(|s| s.id == *id).unwrap().num_docs as u64)
648 .sum();
649 assert!(
650 total <= 500,
651 "merge total {} exceeds max_merged_docs 500",
652 total
653 );
654 }
655 }
656
657 #[test]
658 fn test_tiered_policy_large_segment_not_remerged_with_small() {
659 let policy = TieredMergePolicy::default(); let mut segments = vec![SegmentInfo {
667 id: "large_merged".into(),
668 num_docs: 50_000,
669 }];
670 for i in 0..5 {
671 segments.push(SegmentInfo {
672 id: format!("new_{}", i),
673 num_docs: 500,
674 });
675 }
676
677 let candidates = policy.find_merges(&segments);
680 assert!(
681 candidates.is_empty(),
682 "should not re-merge large segment with 5 small ones: {:?}",
683 candidates
684 );
685
686 for i in 5..10 {
689 segments.push(SegmentInfo {
690 id: format!("new_{}", i),
691 num_docs: 500,
692 });
693 }
694
695 let candidates = policy.find_merges(&segments);
696 assert_eq!(candidates.len(), 1, "should merge the 10 small segments");
697 assert!(
698 !candidates[0].segment_ids.contains(&"large_merged".into()),
699 "large segment must NOT be in the merge group"
700 );
701 assert_eq!(
702 candidates[0].segment_ids.len(),
703 10,
704 "all 10 small segments should be merged"
705 );
706 }
707
708 #[test]
709 fn test_no_merge_policy() {
710 let policy = NoMergePolicy;
711
712 let segments = vec![
713 SegmentInfo {
714 id: "a".into(),
715 num_docs: 100,
716 },
717 SegmentInfo {
718 id: "b".into(),
719 num_docs: 200,
720 },
721 ];
722
723 assert!(policy.find_merges(&segments).is_empty());
724 }
725
726 #[test]
727 fn test_oversized_exclusion() {
728 let policy = TieredMergePolicy {
730 segments_per_tier: 3,
731 max_merged_docs: 1_000_000,
732 oversized_threshold: 0.5,
733 ..Default::default()
734 };
735
736 let mut segments: Vec<_> = (0..4)
738 .map(|i| SegmentInfo {
739 id: format!("small_{}", i),
740 num_docs: 1000,
741 })
742 .collect();
743 segments.push(SegmentInfo {
744 id: "oversized_0".into(),
745 num_docs: 600_000,
746 });
747 segments.push(SegmentInfo {
748 id: "oversized_1".into(),
749 num_docs: 700_000,
750 });
751
752 let candidates = policy.find_merges(&segments);
753 for c in &candidates {
755 assert!(
756 !c.segment_ids.contains(&"oversized_0".into()),
757 "oversized_0 should be excluded"
758 );
759 assert!(
760 !c.segment_ids.contains(&"oversized_1".into()),
761 "oversized_1 should be excluded"
762 );
763 }
764 }
765
766 #[test]
767 fn test_budget_trigger_prevents_unnecessary_merge() {
768 let policy = TieredMergePolicy {
769 segments_per_tier: 10,
770 tier_factor: 10.0,
771 tier_floor: 1000,
772 floor_segment_docs: 1000,
773 budget_trigger: true,
774 ..Default::default()
775 };
776
777 let segments: Vec<_> = (0..5)
781 .map(|i| SegmentInfo {
782 id: format!("seg_{}", i),
783 num_docs: 10_000,
784 })
785 .collect();
786
787 let candidates = policy.find_merges(&segments);
788 assert!(
789 candidates.is_empty(),
790 "should not merge when under budget: {:?}",
791 candidates
792 );
793 }
794
795 #[test]
796 fn test_budget_trigger_allows_merge_when_over_budget() {
797 let policy = TieredMergePolicy {
798 segments_per_tier: 3,
799 tier_factor: 10.0,
800 tier_floor: 1000,
801 floor_segment_docs: 1000,
802 budget_trigger: true,
803 ..Default::default()
804 };
805
806 let segments: Vec<_> = (0..10)
810 .map(|i| SegmentInfo {
811 id: format!("seg_{}", i),
812 num_docs: 1000,
813 })
814 .collect();
815
816 let candidates = policy.find_merges(&segments);
817 assert!(!candidates.is_empty(), "should merge when over budget");
818 }
819
820 #[test]
821 fn test_min_growth_ratio_rejects_wasteful_merge() {
822 let policy = TieredMergePolicy {
823 segments_per_tier: 3,
824 min_growth_ratio: 0.5,
825 max_merge_at_once: 10,
826 ..Default::default()
827 };
828
829 let mut segments = vec![SegmentInfo {
833 id: "big".into(),
834 num_docs: 100_000,
835 }];
836 for i in 0..3 {
837 segments.push(SegmentInfo {
838 id: format!("tiny_{}", i),
839 num_docs: 10,
840 });
841 }
842
843 let candidates = policy.find_merges(&segments);
844 for c in &candidates {
847 if c.segment_ids.contains(&"big".into()) {
848 let total: u64 = c
849 .segment_ids
850 .iter()
851 .map(|id| segments.iter().find(|s| s.id == *id).unwrap().num_docs as u64)
852 .sum();
853 let largest: u64 = c
854 .segment_ids
855 .iter()
856 .map(|id| segments.iter().find(|s| s.id == *id).unwrap().num_docs as u64)
857 .max()
858 .unwrap();
859 assert!(
860 total as f64 >= 1.5 * largest as f64,
861 "merge with 'big' segment violates min_growth_ratio: total={}, largest={}",
862 total,
863 largest
864 );
865 }
866 }
867 }
868
869 #[test]
870 fn test_scored_selection_prefers_balanced_merge() {
871 let policy = TieredMergePolicy {
872 segments_per_tier: 3,
873 max_merge_at_once: 5,
874 scored_selection: true,
875 ..Default::default()
876 };
877
878 let segments = vec![
881 SegmentInfo {
882 id: "unbal_0".into(),
883 num_docs: 100,
884 },
885 SegmentInfo {
886 id: "unbal_1".into(),
887 num_docs: 100,
888 },
889 SegmentInfo {
890 id: "bal_0".into(),
891 num_docs: 1000,
892 },
893 SegmentInfo {
894 id: "bal_1".into(),
895 num_docs: 1100,
896 },
897 SegmentInfo {
898 id: "bal_2".into(),
899 num_docs: 1200,
900 },
901 SegmentInfo {
902 id: "unbal_2".into(),
903 num_docs: 5000,
904 },
905 ];
906
907 let candidates = policy.find_merges(&segments);
908 assert!(!candidates.is_empty(), "should find at least one merge");
909
910 let first = &candidates[0];
912 let has_balanced = first.segment_ids.iter().any(|id| id.starts_with("bal_"));
913 assert!(
914 has_balanced,
915 "scored selection should prefer balanced group, got: {:?}",
916 first.segment_ids
917 );
918 }
919
920 #[test]
921 fn test_large_scale_preset_values() {
922 let p = TieredMergePolicy::large_scale();
923 assert_eq!(p.tier_floor, 50_000);
924 assert_eq!(p.max_merged_docs, 20_000_000);
925 assert_eq!(p.max_segment_docs, 20_000_000);
926 assert_eq!(p.floor_segment_docs, 50_000);
927 assert!(p.budget_trigger);
928 assert!(p.scored_selection);
929 assert_eq!(p.segments_per_tier, 10);
930 assert!((p.min_growth_ratio - 0.5).abs() < f64::EPSILON);
931 assert!((p.oversized_threshold - 0.5).abs() < f64::EPSILON);
932 }
933
934 #[test]
935 fn test_bulk_indexing_preset_values() {
936 let p = TieredMergePolicy::bulk_indexing();
937 assert_eq!(p.segments_per_tier, 20);
938 assert_eq!(p.max_merge_at_once, 20);
939 assert_eq!(p.tier_floor, 100_000);
940 assert_eq!(p.max_merged_docs, 50_000_000);
941 assert_eq!(p.max_segment_docs, 50_000_000);
942 assert_eq!(p.floor_segment_docs, 100_000);
943 assert!(p.budget_trigger);
944 assert!(p.scored_selection);
945 assert!((p.min_growth_ratio - 0.75).abs() < f64::EPSILON);
946 }
947
948 #[test]
949 fn test_default_max_segment_docs() {
950 let p = TieredMergePolicy::default();
951 assert_eq!(p.max_segment_docs, 10_000_000);
952 assert_eq!(p.max_segment_docs().unwrap(), 10_000_000);
953 }
954
955 #[test]
956 fn test_max_segment_docs_caps_merge_output() {
957 let policy = TieredMergePolicy {
959 segments_per_tier: 3,
960 max_merge_at_once: 100,
961 max_merged_docs: 50_000_000,
962 max_segment_docs: 5_000_000,
963 ..Default::default()
964 };
965
966 let segments: Vec<_> = (0..10)
968 .map(|i| SegmentInfo {
969 id: format!("seg_{}", i),
970 num_docs: 1_000_000,
971 })
972 .collect();
973
974 let candidates = policy.find_merges(&segments);
975 for c in &candidates {
976 let total: u64 = c
977 .segment_ids
978 .iter()
979 .map(|id| segments.iter().find(|s| s.id == *id).unwrap().num_docs as u64)
980 .sum();
981 assert!(
982 total <= 5_000_000,
983 "merge total {} exceeds max_segment_docs 5M",
984 total
985 );
986 }
987 }
988}