use apithing::ApiOperation;
use shardex::api::{
AddPostings, AddPostingsParams, CreateIndex, CreateIndexParams, Flush, FlushParams, GetStats, GetStatsParams,
Search, SearchParams, ShardexContext,
};
use shardex::{DocumentId, Posting};
use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> {
println!("Shardex Basic Usage Example");
println!("===========================");
let temp_dir = std::env::temp_dir().join("shardex_basic_example");
if temp_dir.exists() {
std::fs::remove_dir_all(&temp_dir)?;
}
std::fs::create_dir_all(&temp_dir)?;
println!("Creating new index at: {}", temp_dir.display());
let mut context = ShardexContext::new();
let create_params = CreateIndexParams::builder()
.directory_path(temp_dir.clone())
.vector_size(128) .shard_size(10000) .batch_write_interval_ms(100) .build()?;
println!("About to call CreateIndex::execute...");
CreateIndex::execute(&mut context, &create_params)?;
println!("CreateIndex::execute completed successfully!");
let sample_documents = [
(
"Document about cats and dogs",
generate_text_vector("cats dogs pets animals"),
),
(
"Article on machine learning",
generate_text_vector("machine learning AI neural networks"),
),
(
"Cooking recipe for pasta",
generate_text_vector("pasta cooking recipe italian food"),
),
(
"Travel guide to Japan",
generate_text_vector("japan travel guide tokyo culture"),
),
(
"Programming tutorial",
generate_text_vector("programming tutorial code software development"),
),
];
let mut postings = Vec::new();
for (i, (text, vector)) in sample_documents.iter().enumerate() {
let document_id = DocumentId::from_raw((i + 1) as u128);
let posting = Posting {
document_id,
start: 0,
length: text.len() as u32,
vector: vector.clone(),
};
postings.push(posting);
println!("Added document {}: {}", i + 1, text);
}
println!("\nIndexing {} documents...", postings.len());
let add_params = AddPostingsParams::new(postings)?;
AddPostings::execute(&mut context, &add_params)?;
let flush_params = FlushParams::with_stats();
let flush_stats = Flush::execute(&mut context, &flush_params)?;
if let Some(stats) = flush_stats {
println!("Flushed to disk - Operations: {}", stats.operations_applied);
}
let stats_params = GetStatsParams::new();
let stats = GetStats::execute(&mut context, &stats_params)?;
println!("\nIndex Statistics:");
println!("- Total shards: {}", stats.total_shards);
println!("- Total postings: {}", stats.total_postings);
println!("- Active postings: {}", stats.active_postings);
println!("- Vector dimension: {}", stats.vector_dimension);
println!("- Memory usage: {:.2} MB", stats.memory_usage as f64 / 1024.0 / 1024.0);
println!("\nPerforming similarity searches:");
println!("==============================");
let search_queries = vec![
("pets and animals", "cats dogs pets animals"),
("artificial intelligence", "artificial intelligence machine learning"),
("cooking and food", "cooking food recipes"),
("travel and tourism", "travel tourism destinations"),
("software engineering", "programming software engineering"),
];
for (query_desc, query_terms) in search_queries {
println!("\nSearching for: {}", query_desc);
let query_vector = generate_text_vector(query_terms);
let search_params = SearchParams::builder()
.query_vector(query_vector)
.k(3)
.slop_factor(None)
.build()?;
let results = Search::execute(&mut context, &search_params)?;
for (i, result) in results.iter().enumerate() {
println!(
" {}. Document {} (similarity: {:.3})",
i + 1,
result.document_id.raw(),
result.similarity_score
);
}
}
println!("\nSearching with custom slop factor (broader search):");
let query_vector = generate_text_vector("food cooking");
let search_params = SearchParams::builder()
.query_vector(query_vector)
.k(2)
.slop_factor(Some(3))
.build()?;
let results = Search::execute(&mut context, &search_params)?;
println!("Results with slop factor 3:");
for result in results {
println!(
" Document {} (similarity: {:.3})",
result.document_id.raw(),
result.similarity_score
);
}
std::fs::remove_dir_all(&temp_dir)?;
println!("\nExample completed successfully!");
Ok(())
}
fn generate_text_vector(text: &str) -> Vec<f32> {
let mut vector = vec![0.0; 128];
let words: Vec<&str> = text.split_whitespace().collect();
for (i, word) in words.iter().enumerate() {
let hash = simple_hash(word);
let index = (hash % 128) as usize;
vector[index] += 1.0 / (i + 1) as f32;
}
let magnitude: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude > 0.0 {
for value in &mut vector {
*value /= magnitude;
}
}
vector
}
fn simple_hash(s: &str) -> u32 {
s.bytes()
.fold(0u32, |acc, byte| acc.wrapping_mul(31).wrapping_add(byte as u32))
}