use crate::api::context::ShardexContext;
use std::sync::OnceLock;
static RUNTIME: OnceLock<tokio::runtime::Runtime> = OnceLock::new();
fn execute_sync<F, T>(future: F) -> Result<T, ShardexError>
where
F: std::future::Future<Output = Result<T, ShardexError>>,
{
let rt = RUNTIME.get_or_init(|| tokio::runtime::Runtime::new().expect("Failed to create shared Tokio runtime"));
rt.block_on(future)
}
#[derive(Debug, Clone)]
pub struct BatchStats {
pub postings_added: usize,
pub processing_time: Duration,
pub throughput_docs_per_sec: f64,
pub operations_flushed: u64,
}
impl BatchStats {
pub fn new(postings_added: usize, processing_time: Duration, operations_flushed: u64) -> Self {
let throughput_docs_per_sec = if processing_time.as_secs_f64() > 0.0 {
postings_added as f64 / processing_time.as_secs_f64()
} else {
0.0
};
Self {
postings_added,
processing_time,
throughput_docs_per_sec,
operations_flushed,
}
}
}
#[derive(Debug, Clone)]
pub struct PerformanceStats {
pub total_operations: u64,
pub average_latency: Duration,
pub throughput: f64,
pub memory_usage: u64,
pub detailed_metrics: Option<DetailedPerformanceMetrics>,
}
impl PerformanceStats {
pub fn basic(total_operations: u64, average_latency: Duration, throughput: f64, memory_usage: u64) -> Self {
Self {
total_operations,
average_latency,
throughput,
memory_usage,
detailed_metrics: None,
}
}
pub fn with_details(
total_operations: u64,
average_latency: Duration,
throughput: f64,
memory_usage: u64,
detailed_metrics: DetailedPerformanceMetrics,
) -> Self {
Self {
total_operations,
average_latency,
throughput,
memory_usage,
detailed_metrics: Some(detailed_metrics),
}
}
}
#[derive(Debug, Clone)]
pub struct DetailedPerformanceMetrics {
pub index_time: Duration,
pub flush_time: Duration,
pub search_time: Duration,
pub operations_breakdown: HashMap<String, u64>,
}
impl DetailedPerformanceMetrics {
pub fn new() -> Self {
Self {
index_time: Duration::ZERO,
flush_time: Duration::ZERO,
search_time: Duration::ZERO,
operations_breakdown: HashMap::new(),
}
}
pub fn record_operation(&mut self, operation_type: &str, duration: Duration) {
*self
.operations_breakdown
.entry(operation_type.to_string())
.or_insert(0) += 1;
match operation_type {
"index" | "add_postings" | "batch_add" => self.index_time += duration,
"flush" => self.flush_time += duration,
"search" => self.search_time += duration,
_ => {} }
}
}
impl Default for DetailedPerformanceMetrics {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct IncrementalStats {
pub batch_id: Option<String>,
pub postings_added: usize,
pub total_postings: usize,
pub processing_time: Duration,
}
impl IncrementalStats {
pub fn new(
batch_id: Option<String>,
postings_added: usize,
total_postings: usize,
processing_time: Duration,
) -> Self {
Self {
batch_id,
postings_added,
total_postings,
processing_time,
}
}
}
#[derive(Debug, Clone)]
pub struct RemovalStats {
pub documents_removed: usize,
pub documents_not_found: usize,
pub processing_time: Duration,
}
impl RemovalStats {
pub fn new(documents_removed: usize, documents_not_found: usize, processing_time: Duration) -> Self {
Self {
documents_removed,
documents_not_found,
processing_time,
}
}
pub fn total_processed(&self) -> usize {
self.documents_removed + self.documents_not_found
}
}
#[derive(Debug, Clone)]
pub struct BatchDocumentTextStats {
pub documents_stored: usize,
pub total_text_size: usize,
pub processing_time: Duration,
pub average_document_size: usize,
pub total_postings: usize,
pub operations_flushed: u64,
}
impl BatchDocumentTextStats {
pub fn new(
documents_stored: usize,
total_text_size: usize,
processing_time: Duration,
total_postings: usize,
operations_flushed: u64,
) -> Self {
let average_document_size = if documents_stored > 0 {
total_text_size / documents_stored
} else {
0
};
Self {
documents_stored,
total_text_size,
processing_time,
average_document_size,
total_postings,
operations_flushed,
}
}
pub fn throughput_docs_per_sec(&self) -> f64 {
if self.processing_time.as_secs_f64() > 0.0 {
self.documents_stored as f64 / self.processing_time.as_secs_f64()
} else {
0.0
}
}
pub fn throughput_bytes_per_sec(&self) -> f64 {
if self.processing_time.as_secs_f64() > 0.0 {
self.total_text_size as f64 / self.processing_time.as_secs_f64()
} else {
0.0
}
}
}
#[derive(Debug, Clone)]
pub struct SearchResultWithText {
pub search_result: SearchResult,
pub document_text: Option<String>,
pub snippet: Option<String>,
}
use crate::api::parameters::{
AddPostingsParams, BatchAddPostingsParams, BatchStoreDocumentTextParams, CreateIndexParams, ExtractSnippetParams,
FlushParams, GetDocumentTextParams, GetPerformanceStatsParams, GetStatsParams, IncrementalAddParams,
OpenIndexParams, RemoveDocumentsParams, SearchParams, StoreDocumentTextParams, ValidateConfigParams,
};
use crate::config::ShardexConfig;
use crate::error::ShardexError;
use crate::shardex::{Shardex, ShardexImpl};
use crate::structures::{FlushStats, IndexStats, SearchResult};
use apithing::ApiOperation;
use std::collections::HashMap;
use std::time::Duration;
pub struct CreateIndex;
impl ApiOperation<ShardexContext, CreateIndexParams> for CreateIndex {
type Output = ();
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &CreateIndexParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
if context.is_initialized() {
return Err(ShardexError::config_error(
"context",
"index already initialized",
"Use a new context or clear the existing index before creating a new one",
));
}
let config = ShardexConfig::new()
.directory_path(¶meters.directory_path)
.vector_size(parameters.vector_size)
.shard_size(parameters.shard_size)
.batch_write_interval_ms(parameters.batch_write_interval_ms)
.wal_segment_size(parameters.wal_segment_size)
.bloom_filter_size(parameters.bloom_filter_size);
let index = execute_sync(ShardexImpl::create(config))?;
context.set_index(index);
Ok(())
}
}
pub struct OpenIndex;
impl ApiOperation<ShardexContext, OpenIndexParams> for OpenIndex {
type Output = ();
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &OpenIndexParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
if context.is_initialized() {
return Err(ShardexError::config_error(
"context",
"index already initialized",
"Use a new context or clear the existing index before opening another one",
));
}
if !parameters.directory_path.exists() {
return Err(ShardexError::config_error(
"directory_path",
format!("directory does not exist: {:?}", parameters.directory_path),
"Ensure the directory path points to an existing index directory",
));
}
if !parameters.directory_path.is_dir() {
return Err(ShardexError::config_error(
"directory_path",
format!("path is not a directory: {:?}", parameters.directory_path),
"Provide a path to a directory containing a Shardex index",
));
}
let index = execute_sync(ShardexImpl::open(¶meters.directory_path))?;
context.set_index(index);
Ok(())
}
}
pub struct ValidateConfig;
impl ApiOperation<ShardexContext, ValidateConfigParams> for ValidateConfig {
type Output = bool;
type Error = ShardexError;
fn execute(_context: &mut ShardexContext, parameters: &ValidateConfigParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
match parameters.get_config().validate() {
Ok(()) => Ok(true),
Err(_) => Ok(false), }
}
}
pub struct AddPostings;
impl ApiOperation<ShardexContext, AddPostingsParams> for AddPostings {
type Output = ();
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &AddPostingsParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index_mut().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before adding postings",
)
})?;
execute_sync(index.add_postings(parameters.postings.clone()))?;
Ok(())
}
}
pub struct Search;
impl ApiOperation<ShardexContext, SearchParams> for Search {
type Output = Vec<SearchResult>;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &SearchParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before performing searches",
)
})?;
let results = execute_sync(index.search(
¶meters.query_vector,
parameters.k,
parameters.slop_factor.map(|s| s as usize),
))?;
Ok(results)
}
}
pub struct Flush;
impl ApiOperation<ShardexContext, FlushParams> for Flush {
type Output = Option<FlushStats>;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &FlushParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index_mut().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before performing flush operations",
)
})?;
if parameters.with_stats {
let stats = execute_sync(index.flush_with_stats())?;
Ok(Some(stats))
} else {
execute_sync(index.flush())?;
Ok(None)
}
}
}
pub struct GetStats;
impl ApiOperation<ShardexContext, GetStatsParams> for GetStats {
type Output = IndexStats;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &GetStatsParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before retrieving statistics",
)
})?;
let stats = execute_sync(index.stats())?;
Ok(stats)
}
}
pub struct BatchAddPostings;
impl ApiOperation<ShardexContext, BatchAddPostingsParams> for BatchAddPostings {
type Output = BatchStats;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &BatchAddPostingsParams) -> Result<Self::Output, Self::Error> {
use std::time::Instant;
parameters.validate()?;
let index = context.get_index_mut().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before performing batch operations",
)
})?;
let start_time = Instant::now();
execute_sync(index.add_postings(parameters.postings.clone()))?;
let mut operations_flushed = 0;
if parameters.flush_immediately {
let flush_stats = execute_sync(index.flush_with_stats())?;
operations_flushed = flush_stats.operations_applied;
}
let processing_time = start_time.elapsed();
let stats = BatchStats::new(parameters.postings.len(), processing_time, operations_flushed as u64);
if context.is_performance_tracking_active() {
context.record_operation("BatchAddPostings", processing_time);
}
Ok(stats)
}
}
pub struct GetPerformanceStats;
impl ApiOperation<ShardexContext, GetPerformanceStatsParams> for GetPerformanceStats {
type Output = PerformanceStats;
type Error = ShardexError;
fn execute(
context: &mut ShardexContext,
parameters: &GetPerformanceStatsParams,
) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before retrieving performance statistics",
)
})?;
let index_stats = execute_sync(index.stats())?;
let (total_operations, overall_latency, throughput) = if context.is_performance_tracking_active() {
(
context.get_total_operations(),
context.get_average_latency(),
context.get_throughput(),
)
} else {
(0, Duration::default(), 0.0)
};
let final_total_operations = if total_operations > 0 {
total_operations
} else {
u64::try_from(index_stats.total_postings).unwrap_or(u64::MAX)
};
let memory_usage = u64::try_from(index_stats.memory_usage).unwrap_or(u64::MAX);
let final_average_latency = if overall_latency > Duration::default() {
overall_latency
} else {
index_stats.search_latency_p50 };
let final_throughput = if throughput > 0.0 {
throughput
} else if final_total_operations > 0 {
1000.0 / final_average_latency.as_millis().max(1) as f64
} else {
0.0 };
let stats = if parameters.include_detailed {
let detailed = DetailedPerformanceMetrics::new();
PerformanceStats::with_details(
final_total_operations,
final_average_latency,
final_throughput,
memory_usage,
detailed,
)
} else {
PerformanceStats::basic(
final_total_operations,
final_average_latency,
final_throughput,
memory_usage,
)
};
Ok(stats)
}
}
pub struct IncrementalAdd;
impl ApiOperation<ShardexContext, IncrementalAddParams> for IncrementalAdd {
type Output = IncrementalStats;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &IncrementalAddParams) -> Result<Self::Output, Self::Error> {
use std::time::Instant;
parameters.validate()?;
let index = context.get_index_mut().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before performing incremental operations",
)
})?;
let start_time = Instant::now();
execute_sync(index.add_postings(parameters.postings.clone()))?;
let processing_time = start_time.elapsed();
let stats = IncrementalStats::new(
parameters.batch_id.clone(),
parameters.postings.len(),
parameters.postings.len(), processing_time,
);
Ok(stats)
}
}
pub struct RemoveDocuments;
impl ApiOperation<ShardexContext, RemoveDocumentsParams> for RemoveDocuments {
type Output = RemovalStats;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &RemoveDocumentsParams) -> Result<Self::Output, Self::Error> {
use std::time::Instant;
parameters.validate()?;
let index = context.get_index_mut().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before performing removal operations",
)
})?;
let start_time = Instant::now();
execute_sync(index.remove_documents(parameters.document_ids.clone()))?;
let processing_time = start_time.elapsed();
let documents_removed = parameters.document_ids.len();
let documents_not_found = 0;
let stats = RemovalStats::new(documents_removed, documents_not_found, processing_time);
Ok(stats)
}
}
pub struct StoreDocumentText;
impl ApiOperation<ShardexContext, StoreDocumentTextParams> for StoreDocumentText {
type Output = ();
type Error = ShardexError;
fn execute(
context: &mut ShardexContext,
parameters: &StoreDocumentTextParams,
) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index_mut().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before storing document text",
)
})?;
execute_sync(index.replace_document_with_postings(
parameters.document_id,
parameters.text.clone(),
parameters.postings.clone(),
))?;
Ok(())
}
}
pub struct GetDocumentText;
impl ApiOperation<ShardexContext, GetDocumentTextParams> for GetDocumentText {
type Output = String;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &GetDocumentTextParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before retrieving document text",
)
})?;
let text = execute_sync(index.get_document_text(parameters.document_id))?;
Ok(text)
}
}
pub struct ExtractSnippet;
impl ApiOperation<ShardexContext, ExtractSnippetParams> for ExtractSnippet {
type Output = String;
type Error = ShardexError;
fn execute(context: &mut ShardexContext, parameters: &ExtractSnippetParams) -> Result<Self::Output, Self::Error> {
parameters.validate()?;
let index = context.get_index().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before extracting text snippets",
)
})?;
let posting = crate::structures::Posting {
document_id: parameters.document_id,
start: parameters.start,
length: parameters.length,
vector: vec![], };
let snippet = execute_sync(index.extract_text(&posting))?;
Ok(snippet)
}
}
pub struct BatchStoreDocumentText;
impl ApiOperation<ShardexContext, BatchStoreDocumentTextParams> for BatchStoreDocumentText {
type Output = BatchDocumentTextStats;
type Error = ShardexError;
fn execute(
context: &mut ShardexContext,
parameters: &BatchStoreDocumentTextParams,
) -> Result<Self::Output, Self::Error> {
use std::time::Instant;
parameters.validate()?;
let index = context.get_index_mut().ok_or_else(|| {
ShardexError::config_error(
"context",
"index not initialized",
"Create or open an index before performing batch document text operations",
)
})?;
let start_time = Instant::now();
for entry in ¶meters.documents {
execute_sync(index.replace_document_with_postings_staged(
entry.document_id,
entry.text.clone(),
entry.postings.clone(),
))?;
}
let mut operations_flushed = 0;
if parameters.flush_immediately {
let flush_stats = execute_sync(index.flush_with_stats())?;
operations_flushed = flush_stats.operations_applied;
}
let processing_time = start_time.elapsed();
let documents_stored = parameters.documents.len();
let total_text_size = parameters.total_text_size();
let total_postings = parameters.total_postings();
let stats = BatchDocumentTextStats::new(
documents_stored,
total_text_size,
processing_time,
total_postings,
operations_flushed as u64,
);
if parameters.track_performance {
context.record_operation("batch_store_document_text", processing_time);
}
Ok(stats)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::identifiers::DocumentId;
use crate::structures::Posting;
use std::path::PathBuf;
use tempfile::tempdir;
#[test]
fn test_create_index_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(128)
.shard_size(1000)
.build()
.unwrap();
let result = CreateIndex::execute(&mut context, ¶ms);
assert!(result.is_ok());
assert!(context.is_initialized());
}
#[test]
fn test_create_index_already_initialized() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(128)
.build()
.unwrap();
CreateIndex::execute(&mut context, ¶ms).unwrap();
let result = CreateIndex::execute(&mut context, ¶ms);
assert!(result.is_err());
}
#[test]
fn test_add_postings_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let posting = Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 100,
vector: vec![0.1, 0.2, 0.3],
};
let params = AddPostingsParams::new(vec![posting]).unwrap();
let result = AddPostings::execute(&mut context, ¶ms);
assert!(result.is_ok());
}
#[test]
fn test_add_postings_no_index() {
let mut context = ShardexContext::new();
let posting = Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 100,
vector: vec![0.1, 0.2, 0.3],
};
let params = AddPostingsParams::new(vec![posting]).unwrap();
let result = AddPostings::execute(&mut context, ¶ms);
assert!(result.is_err());
}
#[test]
fn test_search_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.shard_size(1000)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let posting = Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 100,
vector: vec![0.1, 0.2, 0.3],
};
let add_params = AddPostingsParams::new(vec![posting]).unwrap();
AddPostings::execute(&mut context, &add_params).unwrap();
let search_params = SearchParams::builder()
.query_vector(vec![0.1, 0.2, 0.3])
.k(10)
.build()
.unwrap();
let result = Search::execute(&mut context, &search_params);
assert!(result.is_ok(), "Search failed: {:?}", result.err());
let results = result.unwrap();
assert!(
results.len() <= 1,
"Expected at most 1 search result, got {}",
results.len()
);
}
#[test]
fn test_flush_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let params = FlushParams::new();
let result = Flush::execute(&mut context, ¶ms);
assert!(result.is_ok());
assert!(result.unwrap().is_none());
let params = FlushParams::with_stats();
let result = Flush::execute(&mut context, ¶ms);
assert!(result.is_ok());
assert!(result.unwrap().is_some());
}
#[test]
fn test_get_stats_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let params = GetStatsParams::new();
let result = GetStats::execute(&mut context, ¶ms);
assert!(result.is_ok());
let stats = result.unwrap();
assert_eq!(stats.vector_dimension, 3);
}
#[test]
fn test_open_index_operation() {
let temp_dir = tempdir().unwrap();
let mut context1 = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(128)
.shard_size(1000)
.build()
.unwrap();
CreateIndex::execute(&mut context1, &create_params).unwrap();
let posting = Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 100,
vector: vec![0.1; 128],
};
let add_params = AddPostingsParams::new(vec![posting]).unwrap();
AddPostings::execute(&mut context1, &add_params).unwrap();
let flush_params = FlushParams::new();
Flush::execute(&mut context1, &flush_params).unwrap();
drop(context1);
let mut context2 = ShardexContext::new();
let open_params = OpenIndexParams::new(temp_dir.path().to_path_buf());
let result = OpenIndex::execute(&mut context2, &open_params);
assert!(result.is_ok());
assert!(context2.is_initialized());
let stats_params = GetStatsParams::new();
let stats_result = GetStats::execute(&mut context2, &stats_params);
assert!(stats_result.is_ok());
let stats = stats_result.unwrap();
assert_eq!(stats.vector_dimension, 128);
}
#[test]
fn test_open_index_nonexistent_directory() {
let mut context = ShardexContext::new();
let open_params = OpenIndexParams::new(PathBuf::from("./nonexistent_directory"));
let result = OpenIndex::execute(&mut context, &open_params);
assert!(result.is_err());
assert!(!context.is_initialized());
}
#[test]
fn test_open_index_already_initialized() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(128)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let open_params = OpenIndexParams::new(temp_dir.path().to_path_buf());
let result = OpenIndex::execute(&mut context, &open_params);
assert!(result.is_err());
}
#[test]
fn test_validate_config_operation_valid() {
let mut context = ShardexContext::new();
let config = ShardexConfig::new()
.vector_size(384)
.shard_size(10000)
.directory_path("./test_valid");
let params = ValidateConfigParams::new(config);
let result = ValidateConfig::execute(&mut context, ¶ms);
assert!(result.is_ok());
assert!(result.unwrap());
}
#[test]
fn test_validate_config_operation_invalid() {
let mut context = ShardexContext::new();
let config = ShardexConfig::new()
.vector_size(0) .shard_size(10000)
.directory_path("./test_invalid");
let params = ValidateConfigParams::new(config);
let result = ValidateConfig::execute(&mut context, ¶ms);
assert!(result.is_ok());
assert!(!result.unwrap());
}
#[test]
fn test_batch_add_postings_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let posting = Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 100,
vector: vec![0.1, 0.2, 0.3],
};
let params = BatchAddPostingsParams::with_performance_tracking(vec![posting]).unwrap();
let result = BatchAddPostings::execute(&mut context, ¶ms);
assert!(result.is_ok());
let stats = result.unwrap();
assert_eq!(stats.postings_added, 1);
}
#[test]
fn test_batch_add_postings_with_flush() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let posting = Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 100,
vector: vec![0.1, 0.2, 0.3],
};
let params = BatchAddPostingsParams::with_flush_and_tracking(vec![posting]).unwrap();
let result = BatchAddPostings::execute(&mut context, ¶ms);
assert!(result.is_ok());
let stats = result.unwrap();
assert_eq!(stats.postings_added, 1);
}
#[test]
fn test_get_performance_stats_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let params = GetPerformanceStatsParams::new();
let result = GetPerformanceStats::execute(&mut context, ¶ms);
assert!(result.is_ok());
let stats = result.unwrap();
assert!(stats.detailed_metrics.is_none());
let detailed_params = GetPerformanceStatsParams::detailed();
let detailed_result = GetPerformanceStats::execute(&mut context, &detailed_params);
assert!(detailed_result.is_ok());
let detailed_stats = detailed_result.unwrap();
assert!(detailed_stats.detailed_metrics.is_some());
}
#[test]
fn test_incremental_add_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let posting = Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 100,
vector: vec![0.1, 0.2, 0.3],
};
let params = IncrementalAddParams::with_batch_id(vec![posting], "batch_001".to_string()).unwrap();
let result = IncrementalAdd::execute(&mut context, ¶ms);
if let Err(ref e) = result {
panic!("IncrementalAdd failed: {:?}", e);
}
let stats = result.unwrap();
assert_eq!(stats.postings_added, 1);
assert_eq!(stats.batch_id.as_ref().unwrap(), "batch_001");
assert_eq!(stats.total_postings, 1); }
#[test]
fn test_remove_documents_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let document_id = DocumentId::from_raw(1);
let posting = Posting {
document_id,
start: 0,
length: 100,
vector: vec![0.1, 0.2, 0.3],
};
let add_params = AddPostingsParams::new(vec![posting]).unwrap();
AddPostings::execute(&mut context, &add_params).unwrap();
let flush_params = crate::api::parameters::FlushParams::new();
crate::api::operations::Flush::execute(&mut context, &flush_params).unwrap();
let raw_id = document_id.raw();
let params = RemoveDocumentsParams::new(vec![raw_id]).unwrap();
let result = RemoveDocuments::execute(&mut context, ¶ms);
assert!(result.is_ok());
let stats = result.unwrap();
assert_eq!(stats.total_processed(), 1);
}
#[test]
fn test_batch_stats() {
let stats = BatchStats::new(100, Duration::from_millis(1000), 50);
assert_eq!(stats.postings_added, 100);
assert_eq!(stats.processing_time, Duration::from_millis(1000));
assert_eq!(stats.throughput_docs_per_sec, 100.0);
assert_eq!(stats.operations_flushed, 50);
}
#[test]
fn test_detailed_performance_metrics() {
let mut metrics = DetailedPerformanceMetrics::new();
metrics.record_operation("index", Duration::from_millis(100));
metrics.record_operation("flush", Duration::from_millis(50));
metrics.record_operation("search", Duration::from_millis(25));
assert_eq!(metrics.index_time, Duration::from_millis(100));
assert_eq!(metrics.flush_time, Duration::from_millis(50));
assert_eq!(metrics.search_time, Duration::from_millis(25));
assert_eq!(metrics.operations_breakdown.get("index"), Some(&1));
assert_eq!(metrics.operations_breakdown.get("flush"), Some(&1));
assert_eq!(metrics.operations_breakdown.get("search"), Some(&1));
}
#[test]
fn test_removal_stats() {
let stats = RemovalStats::new(5, 2, Duration::from_millis(200));
assert_eq!(stats.documents_removed, 5);
assert_eq!(stats.documents_not_found, 2);
assert_eq!(stats.total_processed(), 7);
assert_eq!(stats.processing_time, Duration::from_millis(200));
}
#[test]
fn test_batch_document_text_stats() {
let stats = BatchDocumentTextStats::new(
5, 1000, Duration::from_millis(500), 25, 15, );
assert_eq!(stats.documents_stored, 5);
assert_eq!(stats.total_text_size, 1000);
assert_eq!(stats.processing_time, Duration::from_millis(500));
assert_eq!(stats.average_document_size, 200); assert_eq!(stats.total_postings, 25);
assert_eq!(stats.operations_flushed, 15);
assert_eq!(stats.throughput_docs_per_sec(), 10.0); assert_eq!(stats.throughput_bytes_per_sec(), 2000.0); }
#[test]
fn test_store_document_text_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let doc_id = DocumentId::from_raw(1);
let text = "Hello world test".to_string();
let posting = Posting {
document_id: doc_id,
start: 0,
length: 5,
vector: vec![0.1, 0.2, 0.3],
};
let params = StoreDocumentTextParams::new(doc_id, text, vec![posting]).unwrap();
let result = StoreDocumentText::execute(&mut context, ¶ms);
match result {
Ok(()) => {
println!("Document text stored successfully");
}
Err(e) => {
println!("Expected error for text storage: {}", e);
}
}
}
#[test]
fn test_store_document_text_no_index() {
let mut context = ShardexContext::new();
let doc_id = DocumentId::from_raw(1);
let posting = Posting {
document_id: doc_id,
start: 0,
length: 5,
vector: vec![0.1, 0.2, 0.3],
};
let params = StoreDocumentTextParams::new(doc_id, "Hello".to_string(), vec![posting]).unwrap();
let result = StoreDocumentText::execute(&mut context, ¶ms);
assert!(result.is_err());
}
#[test]
fn test_get_document_text_operation() {
let mut context = ShardexContext::new();
let doc_id = DocumentId::from_raw(1);
let params = GetDocumentTextParams::new(doc_id);
let result = GetDocumentText::execute(&mut context, ¶ms);
assert!(result.is_err());
}
#[test]
fn test_extract_snippet_operation() {
let mut context = ShardexContext::new();
let doc_id = DocumentId::from_raw(1);
let params = ExtractSnippetParams::new(doc_id, 5, 10).unwrap();
let result = ExtractSnippet::execute(&mut context, ¶ms);
assert!(result.is_err());
}
#[test]
fn test_extract_snippet_from_posting() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
let doc_id = DocumentId::from_raw(1);
let posting = Posting {
document_id: doc_id,
start: 4,
length: 5,
vector: vec![0.1, 0.2, 0.3],
};
let params = ExtractSnippetParams::from_posting(&posting);
assert_eq!(params.document_id, doc_id);
assert_eq!(params.start, 4);
assert_eq!(params.length, 5);
let result = ExtractSnippet::execute(&mut context, ¶ms);
assert!(result.is_err());
}
#[test]
fn test_batch_store_document_text_operation() {
let temp_dir = tempdir().unwrap();
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.path().to_path_buf())
.vector_size(3)
.build()
.unwrap();
CreateIndex::execute(&mut context, &create_params).unwrap();
use crate::api::parameters::DocumentTextEntry;
let entries = vec![
DocumentTextEntry::new(
DocumentId::from_raw(1),
"First document".to_string(),
vec![Posting {
document_id: DocumentId::from_raw(1),
start: 0,
length: 5,
vector: vec![0.1, 0.2, 0.3],
}],
),
DocumentTextEntry::new(
DocumentId::from_raw(2),
"Second document".to_string(),
vec![Posting {
document_id: DocumentId::from_raw(2),
start: 0,
length: 6,
vector: vec![0.4, 0.5, 0.6],
}],
),
];
let params = BatchStoreDocumentTextParams::with_performance_tracking(entries).unwrap();
let result = BatchStoreDocumentText::execute(&mut context, ¶ms);
match result {
Ok(stats) => {
assert_eq!(stats.documents_stored, 2);
assert_eq!(stats.total_postings, 2);
assert!(stats.total_text_size > 0);
}
Err(e) => {
println!("Expected error for batch text storage: {}", e);
}
}
}
#[test]
fn test_batch_store_document_text_no_index() {
let mut context = ShardexContext::new();
use crate::api::parameters::DocumentTextEntry;
let entries = vec![DocumentTextEntry::new(
DocumentId::from_raw(1),
"Test document".to_string(),
vec![],
)];
let params = BatchStoreDocumentTextParams::simple(entries).unwrap();
let result = BatchStoreDocumentText::execute(&mut context, ¶ms);
assert!(result.is_err());
}
}