use std::fmt::Debug;
use crate::vector::index::hnsw::segment::manager::{ManagedSegmentInfo, SegmentManagerConfig};
pub trait MergePolicy: Debug + Send + Sync {
fn candidates(
&self,
segments: &[ManagedSegmentInfo],
config: &SegmentManagerConfig,
) -> Option<Vec<String>>;
}
#[derive(Debug, Default)]
pub struct SimpleMergePolicy;
impl SimpleMergePolicy {
pub fn new() -> Self {
Self
}
}
impl MergePolicy for SimpleMergePolicy {
fn candidates(
&self,
segments: &[ManagedSegmentInfo],
config: &SegmentManagerConfig,
) -> Option<Vec<String>> {
if segments.len() < config.max_segments as usize {
return None;
}
let merge_factor = config.merge_factor as usize;
if segments.len() < merge_factor {
return None;
}
let mut sorted_segments: Vec<(usize, &ManagedSegmentInfo)> =
segments.iter().enumerate().collect();
sorted_segments.sort_by_key(|(_, s)| s.vector_count);
let candidates: Vec<String> = sorted_segments
.iter()
.take(merge_factor)
.map(|(_, s)| s.segment_id.clone())
.collect();
if candidates.is_empty() {
None
} else {
Some(candidates)
}
}
}
#[derive(Debug, Default)]
pub struct ForceMergePolicy;
impl ForceMergePolicy {
pub fn new() -> Self {
Self
}
}
impl MergePolicy for ForceMergePolicy {
fn candidates(
&self,
segments: &[ManagedSegmentInfo],
_config: &SegmentManagerConfig,
) -> Option<Vec<String>> {
if segments.is_empty() {
return None;
}
Some(segments.iter().map(|s| s.segment_id.clone()).collect())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vector::index::hnsw::segment::manager::ManagedSegmentInfo;
fn create_info(id: &str, count: u64) -> ManagedSegmentInfo {
ManagedSegmentInfo {
segment_id: id.to_string(),
vector_count: count,
vector_offset: 0,
generation: 1,
has_deletions: false,
size_bytes: count * 100,
}
}
#[test]
fn test_simple_merge_policy_candidates() {
let policy = SimpleMergePolicy::new();
let config = SegmentManagerConfig {
max_segments: 5,
merge_factor: 3,
..Default::default()
};
let segments = vec![create_info("1", 100), create_info("2", 100)];
assert!(policy.candidates(&segments, &config).is_none());
let segments = vec![
create_info("1", 1000), create_info("2", 100), create_info("3", 100), create_info("4", 100), create_info("5", 1000), create_info("6", 1000), ];
let candidates = policy.candidates(&segments, &config).unwrap();
assert_eq!(candidates.len(), 3);
assert!(candidates.contains(&"2".to_string()));
assert!(candidates.contains(&"3".to_string()));
assert!(candidates.contains(&"4".to_string()));
}
}