use memista::{AppState, Config, InsertChunkRequest, SearchRequest, ChunkData, ensure_table_exists, load_or_create_index};
use async_sqlite::{PoolBuilder, JournalMode};
use std::sync::Arc;
fn create_embedding(text: &str) -> Vec<f32> {
let word_count = text.split_whitespace().count() as f32;
let char_count = text.chars().count() as f32;
vec![word_count / 100.0, char_count / 1000.0] }
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Demonstrating advanced Memista library usage...");
let config = Config {
database_path: "memista_advanced_example.db".to_string(),
server_host: "127.0.0.1".to_string(),
server_port: 8086,
log_level: "info".to_string(),
};
let db_pool = PoolBuilder::new()
.path(&config.database_path)
.journal_mode(JournalMode::Wal)
.open()
.await
.expect("Failed to create database pool");
let app_state = Arc::new(AppState { db_pool });
let database_id = "documents_db";
ensure_table_exists(&app_state.db_pool, database_id).await?;
let documents = vec![
"Rust is a systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.",
"Python is a high-level programming language known for its simplicity and readability. It's widely used in data science and web development.",
"JavaScript is a versatile programming language primarily used for web development. It enables interactive web pages and is an essential part of web applications.",
"Java is a class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible.",
"Go is an open source programming language that makes it easy to build simple, reliable, and efficient software."
];
let chunks: Vec<ChunkData> = documents.iter().enumerate().map(|(i, doc)| {
ChunkData {
embedding: create_embedding(doc),
text: doc.to_string(),
metadata: format!("{{\"doc_id\": \"doc_{}\", \"source\": \"example\"}}", i),
}
}).collect();
println!("Processing {} documents...", chunks.len());
let mut index = load_or_create_index(database_id)?;
let insert_request = InsertChunkRequest {
database_id: database_id.to_string(),
chunks: chunks.clone(),
};
let table_name = format!("chunks_{}", database_id);
let mut inserted_ids = Vec::new();
for chunk in &insert_request.chunks {
let chunk = chunk.clone();
let table_name = table_name.clone();
let chunk_id: i64 = app_state.db_pool.conn(move |conn| {
conn.query_row(
&format!("INSERT INTO {} (text, metadata) VALUES (?, ?) RETURNING chunk_id", table_name),
[&chunk.text, &chunk.metadata],
|row| row.get(0),
)
}).await?;
index.add(chunk_id as u64, &chunk.embedding)?;
inserted_ids.push(chunk_id);
}
let index_file = format!("{}.usearch", database_id);
index.save(&index_file)?;
println!("Successfully processed and stored {} documents", chunks.len());
println!("\nSearching for documents similar to: \"Programming languages are used to build software applications.\"");
let query_text = "Programming languages are used to build software applications.";
let query_embedding = create_embedding(query_text);
let search_request = SearchRequest {
database_id: database_id.to_string(),
embeddings: vec![query_embedding],
num_results: 3, };
let index = load_or_create_index(&search_request.database_id)?;
let query_embedding = &search_request.embeddings[0];
let results = index.search(query_embedding, search_request.num_results)?;
println!("\nTop {} similar documents:", results.keys.len());
for (i, (chunk_id, score)) in results.keys.iter().zip(results.distances.iter()).enumerate() {
let chunk_id = *chunk_id;
let score = *score;
let table_name = table_name.clone();
let chunk = app_state.db_pool.conn(move |conn| {
conn.query_row(
&format!("SELECT text, metadata FROM {} WHERE chunk_id = ?", table_name),
[chunk_id.to_string()],
|row| Ok((row.get::<_, String>(0)?, row.get::<_, Option<String>>(1)?)),
)
}).await?;
println!("\n{}. Similarity Score: {:.4}", i + 1, score);
println!(" Text: {}", chunk.0);
if let Some(metadata) = chunk.1 {
println!(" Metadata: {}", metadata);
}
}
println!("\nAdvanced usage example completed successfully!");
Ok(())
}