use crate::error::ShardexError;
use crate::identifiers::DocumentId;
use bytemuck::{Pod, Zeroable};
use serde::{Deserialize, Serialize};
use std::fmt::{self, Display, Formatter};
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct Posting {
pub document_id: DocumentId,
pub start: u32,
pub length: u32,
pub vector: Vec<f32>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SearchResult {
pub document_id: DocumentId,
pub start: u32,
pub length: u32,
pub vector: Vec<f32>,
pub similarity_score: f32,
}
#[derive(Debug, Clone, Copy, PartialEq)]
#[repr(C)]
pub struct PostingHeader {
pub document_id: DocumentId,
pub start: u32,
pub length: u32,
pub vector_len: u32,
pub vector_offset: u64,
}
#[derive(Debug, Clone, Copy, PartialEq)]
#[repr(C)]
pub struct SearchResultHeader {
pub document_id: DocumentId,
pub start: u32,
pub length: u32,
pub vector_len: u32,
pub vector_offset: u64,
pub similarity_score: f32,
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct IndexStats {
pub total_shards: usize,
pub total_postings: usize,
pub pending_operations: usize,
pub memory_usage: usize,
pub active_postings: usize,
pub deleted_postings: usize,
pub average_shard_utilization: f32,
pub vector_dimension: usize,
pub disk_usage: usize,
pub search_latency_p50: std::time::Duration,
pub search_latency_p95: std::time::Duration,
pub search_latency_p99: std::time::Duration,
pub write_throughput: f64,
pub bloom_filter_hit_rate: f64,
}
unsafe impl Pod for PostingHeader {}
unsafe impl Zeroable for PostingHeader {}
unsafe impl Pod for SearchResultHeader {}
unsafe impl Zeroable for SearchResultHeader {}
impl Posting {
pub fn new(
document_id: DocumentId,
start: u32,
length: u32,
vector: Vec<f32>,
expected_dimension: usize,
) -> Result<Self, ShardexError> {
if vector.len() != expected_dimension {
return Err(ShardexError::InvalidDimension {
expected: expected_dimension,
actual: vector.len(),
});
}
Ok(Self {
document_id,
start,
length,
vector,
})
}
pub fn vector_dimension(&self) -> usize {
self.vector.len()
}
pub fn to_header(&self, vector_offset: u64) -> PostingHeader {
PostingHeader {
document_id: self.document_id,
start: self.start,
length: self.length,
vector_len: self.vector.len() as u32,
vector_offset,
}
}
pub fn validate_dimension(&self, expected_dimension: usize) -> Result<(), ShardexError> {
if self.vector.len() != expected_dimension {
return Err(ShardexError::invalid_dimension_with_context(
expected_dimension,
self.vector.len(),
"posting_vector",
));
}
Ok(())
}
}
impl SearchResult {
pub fn new(
document_id: DocumentId,
start: u32,
length: u32,
vector: Vec<f32>,
similarity_score: f32,
expected_dimension: usize,
) -> Result<Self, ShardexError> {
if vector.len() != expected_dimension {
return Err(ShardexError::invalid_dimension_with_context(
expected_dimension,
vector.len(),
"search_result",
));
}
if !(0.0..=1.0).contains(&similarity_score) || similarity_score.is_nan() {
return Err(ShardexError::invalid_similarity_score_with_suggestion(similarity_score));
}
Ok(Self {
document_id,
start,
length,
vector,
similarity_score,
})
}
pub fn from_posting(posting: Posting, similarity_score: f32) -> Result<Self, ShardexError> {
if !(0.0..=1.0).contains(&similarity_score) || similarity_score.is_nan() {
return Err(ShardexError::InvalidSimilarityScore {
score: similarity_score,
});
}
Ok(Self {
document_id: posting.document_id,
start: posting.start,
length: posting.length,
vector: posting.vector,
similarity_score,
})
}
pub fn vector_dimension(&self) -> usize {
self.vector.len()
}
pub fn to_header(&self, vector_offset: u64) -> SearchResultHeader {
SearchResultHeader {
document_id: self.document_id,
start: self.start,
length: self.length,
vector_len: self.vector.len() as u32,
vector_offset,
similarity_score: self.similarity_score,
}
}
pub fn validate_dimension(&self, expected_dimension: usize) -> Result<(), ShardexError> {
if self.vector.len() != expected_dimension {
return Err(ShardexError::invalid_dimension_with_context(
expected_dimension,
self.vector.len(),
"search_result",
));
}
Ok(())
}
pub fn is_highly_similar(&self) -> bool {
self.similarity_score >= 0.8
}
pub fn is_moderately_similar(&self) -> bool {
self.similarity_score >= 0.5
}
pub fn is_weakly_similar(&self) -> bool {
self.similarity_score >= 0.3
}
pub fn is_more_similar_than(&self, other: &SearchResult) -> bool {
self.similarity_score > other.similarity_score
}
pub fn is_similar_to(&self, other: &SearchResult, threshold: f32) -> bool {
(self.similarity_score - other.similarity_score).abs() <= threshold
}
pub fn similarity_tier(&self) -> &'static str {
if self.similarity_score >= 0.8 {
"high"
} else if self.similarity_score >= 0.5 {
"moderate"
} else if self.similarity_score >= 0.3 {
"weak"
} else {
"low"
}
}
}
impl PostingHeader {
pub fn new_zero() -> Self {
Self::zeroed()
}
pub fn is_valid(&self) -> bool {
self.vector_len > 0 && self.length > 0
}
}
impl SearchResultHeader {
pub fn new_zero() -> Self {
Self::zeroed()
}
pub fn is_valid(&self) -> bool {
self.vector_len > 0 && self.length > 0
}
}
impl IndexStats {
pub fn new() -> Self {
Self {
total_shards: 0,
total_postings: 0,
pending_operations: 0,
memory_usage: 0,
active_postings: 0,
deleted_postings: 0,
average_shard_utilization: 0.0,
vector_dimension: 0,
disk_usage: 0,
search_latency_p50: std::time::Duration::ZERO,
search_latency_p95: std::time::Duration::ZERO,
search_latency_p99: std::time::Duration::ZERO,
write_throughput: 0.0,
bloom_filter_hit_rate: 0.0,
}
}
pub fn shard_utilization_percent(&self) -> f32 {
self.average_shard_utilization * 100.0
}
pub fn deletion_ratio(&self) -> f32 {
if self.total_postings == 0 {
0.0
} else {
self.deleted_postings as f32 / self.total_postings as f32
}
}
pub fn has_pending_operations(&self) -> bool {
self.pending_operations > 0
}
pub fn memory_usage_mb(&self) -> f32 {
self.memory_usage as f32 / (1024.0 * 1024.0)
}
pub fn disk_usage_mb(&self) -> f32 {
self.disk_usage as f32 / (1024.0 * 1024.0)
}
pub fn search_latency_p50_ms(&self) -> u64 {
self.search_latency_p50.as_millis() as u64
}
pub fn search_latency_p95_ms(&self) -> u64 {
self.search_latency_p95.as_millis() as u64
}
pub fn search_latency_p99_ms(&self) -> u64 {
self.search_latency_p99.as_millis() as u64
}
pub fn write_ops_per_second(&self) -> f64 {
self.write_throughput
}
pub fn bloom_filter_hit_rate_percent(&self) -> f32 {
(self.bloom_filter_hit_rate * 100.0) as f32
}
pub fn is_performance_healthy(&self) -> bool {
let latency_healthy = self.search_latency_p95 < std::time::Duration::from_millis(1000);
let bloom_healthy = self.bloom_filter_hit_rate == 0.0 || self.bloom_filter_hit_rate > 0.7;
let throughput_healthy = self.write_throughput == 0.0 || self.write_throughput > 10.0;
latency_healthy && bloom_healthy && throughput_healthy
}
pub fn with_total_shards(mut self, total_shards: usize) -> Self {
self.total_shards = total_shards;
self
}
pub fn with_total_postings(mut self, total_postings: usize) -> Self {
self.total_postings = total_postings;
self
}
pub fn with_pending_operations(mut self, pending_operations: usize) -> Self {
self.pending_operations = pending_operations;
self
}
pub fn with_memory_usage(mut self, memory_usage: usize) -> Self {
self.memory_usage = memory_usage;
self
}
pub fn with_active_postings(mut self, active_postings: usize) -> Self {
self.active_postings = active_postings;
self
}
pub fn with_deleted_postings(mut self, deleted_postings: usize) -> Self {
self.deleted_postings = deleted_postings;
self
}
pub fn with_average_shard_utilization(mut self, average_shard_utilization: f32) -> Self {
self.average_shard_utilization = average_shard_utilization;
self
}
pub fn with_vector_dimension(mut self, vector_dimension: usize) -> Self {
self.vector_dimension = vector_dimension;
self
}
pub fn with_disk_usage(mut self, disk_usage: usize) -> Self {
self.disk_usage = disk_usage;
self
}
pub fn with_search_latency_p50(mut self, latency: std::time::Duration) -> Self {
self.search_latency_p50 = latency;
self
}
pub fn with_search_latency_p95(mut self, latency: std::time::Duration) -> Self {
self.search_latency_p95 = latency;
self
}
pub fn with_search_latency_p99(mut self, latency: std::time::Duration) -> Self {
self.search_latency_p99 = latency;
self
}
pub fn with_write_throughput(mut self, throughput: f64) -> Self {
self.write_throughput = throughput;
self
}
pub fn with_bloom_filter_hit_rate(mut self, hit_rate: f64) -> Self {
self.bloom_filter_hit_rate = hit_rate;
self
}
pub fn for_test() -> Self {
Self::new()
.with_total_shards(5)
.with_total_postings(1000)
.with_active_postings(900)
.with_deleted_postings(100)
.with_vector_dimension(128)
.with_memory_usage(1024 * 1024) .with_disk_usage(2 * 1024 * 1024) .with_average_shard_utilization(0.75)
.with_pending_operations(10)
.with_search_latency_p50(std::time::Duration::from_millis(50))
.with_search_latency_p95(std::time::Duration::from_millis(150))
.with_search_latency_p99(std::time::Duration::from_millis(300))
.with_write_throughput(100.0)
.with_bloom_filter_hit_rate(0.85)
}
}
impl Display for IndexStats {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"IndexStats {{ shards: {}, postings: {} (active: {}, deleted: {}), \
pending: {}, memory: {:.2}MB, disk: {:.2}MB, utilization: {:.1}%, \
latency: p50={}ms/p95={}ms/p99={}ms, write_throughput: {:.1}ops/s, \
bloom_hit_rate: {:.1}% }}",
self.total_shards,
self.total_postings,
self.active_postings,
self.deleted_postings,
self.pending_operations,
self.memory_usage_mb(),
self.disk_usage_mb(),
self.shard_utilization_percent(),
self.search_latency_p50.as_millis(),
self.search_latency_p95.as_millis(),
self.search_latency_p99.as_millis(),
self.write_throughput,
self.bloom_filter_hit_rate * 100.0
)
}
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct FlushStats {
pub wal_flush_duration: std::time::Duration,
pub shard_apply_duration: std::time::Duration,
pub shard_sync_duration: std::time::Duration,
pub validation_duration: std::time::Duration,
pub total_duration: std::time::Duration,
pub shards_synced: usize,
pub operations_applied: usize,
pub bytes_synced: u64,
}
impl FlushStats {
pub fn total_duration_ms(&self) -> u64 {
self.total_duration.as_millis() as u64
}
pub fn wal_flush_duration_ms(&self) -> u64 {
self.wal_flush_duration.as_millis() as u64
}
pub fn shard_sync_duration_ms(&self) -> u64 {
self.shard_sync_duration.as_millis() as u64
}
pub fn validation_duration_ms(&self) -> u64 {
self.validation_duration.as_millis() as u64
}
pub fn sync_throughput_mbps(&self) -> f64 {
if self.shard_sync_duration.is_zero() {
0.0
} else {
let mb_synced = self.bytes_synced as f64 / (1024.0 * 1024.0);
let seconds = self.shard_sync_duration.as_secs_f64();
mb_synced / seconds
}
}
pub fn operations_per_second(&self) -> f64 {
if self.shard_apply_duration.is_zero() {
0.0
} else {
let seconds = self.shard_apply_duration.as_secs_f64();
self.operations_applied as f64 / seconds
}
}
pub fn is_fast_flush(&self) -> bool {
self.total_duration < std::time::Duration::from_millis(100)
}
pub fn is_slow_flush(&self) -> bool {
self.total_duration > std::time::Duration::from_millis(1000)
}
pub fn slowest_phase(&self) -> &'static str {
let durations = [
("wal_flush", self.wal_flush_duration),
("shard_apply", self.shard_apply_duration),
("shard_sync", self.shard_sync_duration),
("validation", self.validation_duration),
];
durations
.iter()
.max_by_key(|(_, duration)| *duration)
.map(|(name, _)| *name)
.unwrap_or("unknown")
}
}
impl Display for FlushStats {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"FlushStats {{ total: {}ms, wal: {}ms, apply: {}ms, sync: {}ms, validation: {}ms, \
shards: {}, ops: {}, throughput: {:.2}MB/s }}",
self.total_duration_ms(),
self.wal_flush_duration_ms(),
self.shard_apply_duration.as_millis(),
self.shard_sync_duration_ms(),
self.validation_duration_ms(),
self.shards_synced,
self.operations_applied,
self.sync_throughput_mbps()
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_posting_creation() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0];
let posting = Posting::new(doc_id, 0, 100, vector.clone(), 3).unwrap();
assert_eq!(posting.document_id, doc_id);
assert_eq!(posting.start, 0);
assert_eq!(posting.length, 100);
assert_eq!(posting.vector, vector);
assert_eq!(posting.vector_dimension(), 3);
}
#[test]
fn test_posting_dimension_validation() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0];
let result = Posting::new(doc_id, 0, 100, vector.clone(), 4);
assert!(result.is_err());
if let Err(ShardexError::InvalidDimension { expected, actual }) = result {
assert_eq!(expected, 4);
assert_eq!(actual, 3);
} else {
panic!("Expected InvalidDimension error");
}
}
#[test]
fn test_search_result_creation() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0];
let result = SearchResult::new(doc_id, 0, 100, vector.clone(), 0.85, 3).unwrap();
assert_eq!(result.document_id, doc_id);
assert_eq!(result.start, 0);
assert_eq!(result.length, 100);
assert_eq!(result.vector, vector);
assert_eq!(result.similarity_score, 0.85);
assert_eq!(result.vector_dimension(), 3);
}
#[test]
fn test_search_result_from_posting() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0];
let posting = Posting::new(doc_id, 10, 50, vector.clone(), 3).unwrap();
let search_result = SearchResult::from_posting(posting.clone(), 0.75).unwrap();
assert_eq!(search_result.document_id, posting.document_id);
assert_eq!(search_result.start, posting.start);
assert_eq!(search_result.length, posting.length);
assert_eq!(search_result.vector, posting.vector);
assert_eq!(search_result.similarity_score, 0.75);
}
#[test]
fn test_search_result_similarity_score_validation() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0];
assert!(SearchResult::new(doc_id, 0, 10, vector.clone(), 0.0, 3).is_ok());
assert!(SearchResult::new(doc_id, 0, 10, vector.clone(), 0.5, 3).is_ok());
assert!(SearchResult::new(doc_id, 0, 10, vector.clone(), 1.0, 3).is_ok());
let result = SearchResult::new(doc_id, 0, 10, vector.clone(), -0.1, 3);
assert!(
matches!(result.unwrap_err(), ShardexError::InvalidInput { field, reason, .. } if field == "similarity_score" && reason.contains("Negative similarity scores"))
);
let result = SearchResult::new(doc_id, 0, 10, vector.clone(), 1.5, 3);
assert!(
matches!(result.unwrap_err(), ShardexError::InvalidInput { field, reason, .. } if field == "similarity_score" && reason.contains("Similarity score too large"))
);
let result = SearchResult::new(doc_id, 0, 10, vector.clone(), f32::NAN, 3);
assert!(matches!(
result.unwrap_err(),
ShardexError::InvalidInput { field, reason, .. } if field == "similarity_score" && reason.contains("NaN values are not allowed")
));
let posting = Posting::new(doc_id, 0, 10, vector.clone(), 3).unwrap();
assert!(SearchResult::from_posting(posting.clone(), 0.8).is_ok());
let result = SearchResult::from_posting(posting.clone(), 2.0);
assert!(matches!(result.unwrap_err(), ShardexError::InvalidSimilarityScore { score } if score == 2.0));
let result = SearchResult::from_posting(posting, -1.0);
assert!(matches!(result.unwrap_err(), ShardexError::InvalidSimilarityScore { score } if score == -1.0));
}
#[test]
fn test_posting_header_creation() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0, 4.0];
let posting = Posting::new(doc_id, 20, 80, vector, 4).unwrap();
let header = posting.to_header(1024);
assert_eq!(header.document_id, doc_id);
assert_eq!(header.start, 20);
assert_eq!(header.length, 80);
assert_eq!(header.vector_len, 4);
assert_eq!(header.vector_offset, 1024);
assert!(header.is_valid());
}
#[test]
fn test_search_result_header_creation() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0, 4.0];
let result = SearchResult::new(doc_id, 20, 80, vector, 0.92, 4).unwrap();
let header = result.to_header(2048);
assert_eq!(header.document_id, doc_id);
assert_eq!(header.start, 20);
assert_eq!(header.length, 80);
assert_eq!(header.vector_len, 4);
assert_eq!(header.vector_offset, 2048);
assert_eq!(header.similarity_score, 0.92);
assert!(header.is_valid());
}
#[test]
fn test_posting_header_bytemuck() {
let header = PostingHeader {
document_id: DocumentId::new(),
start: 100,
length: 200,
vector_len: 384,
vector_offset: 4096,
};
let bytes: &[u8] = bytemuck::bytes_of(&header);
assert_eq!(bytes.len(), std::mem::size_of::<PostingHeader>());
let header_restored: PostingHeader = bytemuck::pod_read_unaligned(bytes);
assert_eq!(header, header_restored);
}
#[test]
fn test_search_result_header_bytemuck() {
let header = SearchResultHeader {
document_id: DocumentId::new(),
start: 100,
length: 200,
vector_len: 384,
vector_offset: 4096,
similarity_score: 0.88,
};
let bytes: &[u8] = bytemuck::bytes_of(&header);
assert_eq!(bytes.len(), std::mem::size_of::<SearchResultHeader>());
let header_restored: SearchResultHeader = bytemuck::pod_read_unaligned(bytes);
assert_eq!(header, header_restored);
}
#[test]
fn test_search_result_similarity_convenience_methods() {
let doc_id = DocumentId::new();
let vector = vec![0.1, 0.2, 0.3];
let high_result = SearchResult::new(doc_id, 0, 10, vector.clone(), 0.85, 3).unwrap();
assert!(high_result.is_highly_similar());
assert!(high_result.is_moderately_similar());
assert!(high_result.is_weakly_similar());
assert_eq!(high_result.similarity_tier(), "high");
let moderate_result = SearchResult::new(doc_id, 10, 10, vector.clone(), 0.65, 3).unwrap();
assert!(!moderate_result.is_highly_similar());
assert!(moderate_result.is_moderately_similar());
assert!(moderate_result.is_weakly_similar());
assert_eq!(moderate_result.similarity_tier(), "moderate");
let weak_result = SearchResult::new(doc_id, 20, 10, vector.clone(), 0.45, 3).unwrap();
assert!(!weak_result.is_highly_similar());
assert!(!weak_result.is_moderately_similar());
assert!(weak_result.is_weakly_similar());
assert_eq!(weak_result.similarity_tier(), "weak");
let low_result = SearchResult::new(doc_id, 30, 10, vector, 0.15, 3).unwrap();
assert!(!low_result.is_highly_similar());
assert!(!low_result.is_moderately_similar());
assert!(!low_result.is_weakly_similar());
assert_eq!(low_result.similarity_tier(), "low");
assert!(high_result.is_more_similar_than(&moderate_result));
assert!(moderate_result.is_more_similar_than(&weak_result));
assert!(weak_result.is_more_similar_than(&low_result));
assert!(high_result.is_similar_to(&high_result, 0.0)); assert!(!high_result.is_similar_to(&low_result, 0.1)); assert!(high_result.is_similar_to(&moderate_result, 0.25)); }
#[test]
fn test_index_stats_creation() {
let stats = IndexStats::new();
assert_eq!(stats.total_shards, 0);
assert_eq!(stats.total_postings, 0);
assert_eq!(stats.pending_operations, 0);
assert_eq!(stats.memory_usage, 0);
assert_eq!(stats.active_postings, 0);
assert_eq!(stats.deleted_postings, 0);
assert_eq!(stats.average_shard_utilization, 0.0);
assert_eq!(stats.vector_dimension, 0);
assert_eq!(stats.disk_usage, 0);
}
#[test]
fn test_index_stats_calculations() {
let mut stats = IndexStats::new();
stats.total_postings = 1000;
stats.active_postings = 800;
stats.deleted_postings = 200;
stats.average_shard_utilization = 0.75;
stats.memory_usage = 2048 * 1024; stats.disk_usage = 5 * 1024 * 1024;
assert_eq!(stats.shard_utilization_percent(), 75.0);
assert_eq!(stats.deletion_ratio(), 0.2);
assert!(!stats.has_pending_operations());
assert_eq!(stats.memory_usage_mb(), 2.0);
assert_eq!(stats.disk_usage_mb(), 5.0);
}
#[test]
fn test_index_stats_display() {
let mut stats = IndexStats::new();
stats.total_shards = 5;
stats.total_postings = 1000;
stats.active_postings = 950;
stats.deleted_postings = 50;
stats.pending_operations = 10;
stats.memory_usage = 1024 * 1024; stats.disk_usage = 2 * 1024 * 1024; stats.average_shard_utilization = 0.8;
let display_str = format!("{}", stats);
assert!(display_str.contains("shards: 5"));
assert!(display_str.contains("postings: 1000"));
assert!(display_str.contains("active: 950"));
assert!(display_str.contains("deleted: 50"));
assert!(display_str.contains("pending: 10"));
assert!(display_str.contains("memory: 1.00MB"));
assert!(display_str.contains("disk: 2.00MB"));
assert!(display_str.contains("utilization: 80.0%"));
}
#[test]
fn test_index_stats_builder_methods() {
let stats = IndexStats::new()
.with_total_shards(10)
.with_total_postings(5000)
.with_active_postings(4500)
.with_deleted_postings(500)
.with_vector_dimension(256)
.with_memory_usage(5 * 1024 * 1024) .with_disk_usage(10 * 1024 * 1024) .with_average_shard_utilization(0.9)
.with_pending_operations(25);
assert_eq!(stats.total_shards, 10);
assert_eq!(stats.total_postings, 5000);
assert_eq!(stats.active_postings, 4500);
assert_eq!(stats.deleted_postings, 500);
assert_eq!(stats.vector_dimension, 256);
assert_eq!(stats.memory_usage, 5 * 1024 * 1024);
assert_eq!(stats.disk_usage, 10 * 1024 * 1024);
assert_eq!(stats.average_shard_utilization, 0.9);
assert_eq!(stats.pending_operations, 25);
}
#[test]
fn test_index_stats_for_test_builder() {
let stats = IndexStats::for_test();
assert_eq!(stats.total_shards, 5);
assert_eq!(stats.total_postings, 1000);
assert_eq!(stats.active_postings, 900);
assert_eq!(stats.deleted_postings, 100);
assert_eq!(stats.vector_dimension, 128);
assert_eq!(stats.memory_usage, 1024 * 1024);
assert_eq!(stats.disk_usage, 2 * 1024 * 1024);
assert_eq!(stats.average_shard_utilization, 0.75);
assert_eq!(stats.pending_operations, 10);
assert_eq!(stats.shard_utilization_percent(), 75.0);
assert_eq!(stats.deletion_ratio(), 0.1);
assert!(stats.has_pending_operations());
assert_eq!(stats.memory_usage_mb(), 1.0);
assert_eq!(stats.disk_usage_mb(), 2.0);
}
#[test]
fn test_header_validity() {
let valid_posting = PostingHeader {
document_id: DocumentId::new(),
start: 0,
length: 100,
vector_len: 384,
vector_offset: 0,
};
assert!(valid_posting.is_valid());
let valid_search_result = SearchResultHeader {
document_id: DocumentId::new(),
start: 0,
length: 100,
vector_len: 384,
vector_offset: 0,
similarity_score: 0.5,
};
assert!(valid_search_result.is_valid());
let invalid_posting = PostingHeader {
document_id: DocumentId::new(),
start: 0,
length: 0, vector_len: 384,
vector_offset: 0,
};
assert!(!invalid_posting.is_valid());
let invalid_search_result = SearchResultHeader {
document_id: DocumentId::new(),
start: 0,
length: 100,
vector_len: 0, vector_offset: 0,
similarity_score: 0.5,
};
assert!(!invalid_search_result.is_valid());
}
#[test]
fn test_zero_initialized_headers() {
let zero_posting = PostingHeader::new_zero();
let zero_search_result = SearchResultHeader::new_zero();
assert!(!zero_posting.is_valid()); assert!(!zero_search_result.is_valid());
let zero_posting_bytemuck: PostingHeader = PostingHeader::zeroed();
let zero_search_result_bytemuck: SearchResultHeader = SearchResultHeader::zeroed();
assert_eq!(zero_posting_bytemuck.start, 0);
assert_eq!(zero_posting_bytemuck.length, 0);
assert_eq!(zero_posting_bytemuck.vector_len, 0);
assert_eq!(zero_posting_bytemuck.vector_offset, 0);
assert_eq!(zero_search_result_bytemuck.start, 0);
assert_eq!(zero_search_result_bytemuck.length, 0);
assert_eq!(zero_search_result_bytemuck.vector_len, 0);
assert_eq!(zero_search_result_bytemuck.vector_offset, 0);
assert_eq!(zero_search_result_bytemuck.similarity_score, 0.0);
}
#[test]
fn test_memory_layout() {
use std::mem;
let posting_size = mem::size_of::<PostingHeader>();
let search_result_size = mem::size_of::<SearchResultHeader>();
assert!(
posting_size >= 36,
"PostingHeader size {} should be at least 36 bytes",
posting_size
);
assert!(
posting_size % mem::align_of::<PostingHeader>() == 0,
"PostingHeader should be properly aligned"
);
assert!(
search_result_size >= posting_size,
"SearchResultHeader size {} should be at least as large as PostingHeader size {}",
search_result_size,
posting_size
);
assert!(
search_result_size % mem::align_of::<SearchResultHeader>() == 0,
"SearchResultHeader should be properly aligned"
);
assert!(mem::align_of::<PostingHeader>() >= 8); assert!(mem::align_of::<SearchResultHeader>() >= 8);
}
#[test]
fn test_dimension_validation() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0];
let posting = Posting::new(doc_id, 0, 100, vector, 3).unwrap();
assert!(posting.validate_dimension(3).is_ok());
assert!(posting.validate_dimension(4).is_err());
let vector = vec![1.0, 2.0, 3.0, 4.0];
let search_result = SearchResult::new(doc_id, 0, 100, vector, 0.8, 4).unwrap();
assert!(search_result.validate_dimension(4).is_ok());
assert!(search_result.validate_dimension(3).is_err());
}
#[test]
fn test_serialization() {
let doc_id = DocumentId::new();
let vector = vec![1.0, 2.0, 3.0];
let posting = Posting::new(doc_id, 10, 50, vector.clone(), 3).unwrap();
let posting_json = serde_json::to_string(&posting).unwrap();
let posting_restored: Posting = serde_json::from_str(&posting_json).unwrap();
assert_eq!(posting, posting_restored);
let search_result = SearchResult::new(doc_id, 10, 50, vector, 0.9, 3).unwrap();
let result_json = serde_json::to_string(&search_result).unwrap();
let result_restored: SearchResult = serde_json::from_str(&result_json).unwrap();
assert_eq!(search_result, result_restored);
let mut stats = IndexStats::new();
stats.total_shards = 10;
stats.total_postings = 1000;
let stats_json = serde_json::to_string(&stats).unwrap();
let stats_restored: IndexStats = serde_json::from_str(&stats_json).unwrap();
assert_eq!(stats, stats_restored);
}
#[test]
fn test_index_stats_default() {
let default_stats = IndexStats::default();
let new_stats = IndexStats::new();
assert_eq!(default_stats, new_stats);
assert_eq!(default_stats.total_shards, 0);
assert_eq!(default_stats.total_postings, 0);
assert_eq!(default_stats.pending_operations, 0);
assert_eq!(default_stats.memory_usage, 0);
assert_eq!(default_stats.active_postings, 0);
assert_eq!(default_stats.deleted_postings, 0);
assert_eq!(default_stats.average_shard_utilization, 0.0);
assert_eq!(default_stats.vector_dimension, 0);
assert_eq!(default_stats.disk_usage, 0);
assert_eq!(default_stats.search_latency_p50, std::time::Duration::ZERO);
assert_eq!(default_stats.search_latency_p95, std::time::Duration::ZERO);
assert_eq!(default_stats.search_latency_p99, std::time::Duration::ZERO);
assert_eq!(default_stats.write_throughput, 0.0);
assert_eq!(default_stats.bloom_filter_hit_rate, 0.0);
}
}