use tcvectordb::{
document::{AnnSearch, KeywordSearch, Rerank},
enums::{FieldType, IndexType, MetricType, ReadConsistency},
index::HNSWParams,
Document, Embedding, FilterIndex, Index, Result, SparseIndex, VectorDBClient, VectorIndex,
};
#[tokio::main]
async fn main() -> Result<()> {
println!("🔍 Python Hybrid Search Complete Example - Python 混合搜索完整示例");
let url = std::env::var("VECTORDB_URL").unwrap_or_else(|_| "http://localhost:8100".to_string());
let username = std::env::var("VECTORDB_USERNAME").unwrap_or_else(|_| "root".to_string());
let api_key = std::env::var("VECTORDB_API_KEY")
.expect("VECTORDB_API_KEY environment variable is required");
let client = VectorDBClient::new(
&url,
&username,
&api_key,
ReadConsistency::EventualConsistency,
30,
)?;
println!("✅ VectorDB client created successfully");
let db_name = "db-test";
let db = client.create_database_if_not_exists(db_name).await?;
println!("✅ Database '{}' ready", db_name);
let mut index = Index::new();
index.add_filter_index(FilterIndex::new(
"id",
FieldType::String,
IndexType::PRIMARY_KEY,
))?;
index.add_vector_index(VectorIndex::new(
"vector",
768, IndexType::HNSW,
MetricType::COSINE,
Some(tcvectordb::index::IndexParams::HNSW(HNSWParams::new(
16, 200,
))),
))?;
index.add_sparse_index(SparseIndex::new(
"sparse_vector",
IndexType::SPARSE_INVERTED,
MetricType::IP,
))?;
index.add_filter_index(FilterIndex::new(
"text",
FieldType::String,
IndexType::FILTER,
))?;
let embedding = Embedding::new("vector", "text").with_model_name("bge-base-zh");
println!("🔧 Creating collection with hybrid search support...");
let collection_name = "book-emb";
let collection = db
.create_collection_if_not_exists(
collection_name,
1, 0, Some("Collection for Python hybrid search equivalent demo".to_string()),
Some(index),
Some(embedding),
None, )
.await?;
println!(
"✅ Collection '{}' ready for hybrid search",
collection.name()
);
println!("\n📝 Inserting test documents with sparse vectors...");
let sparse_vector_1 = vec![vec![0.8, 0.9, 0.7, 0.5]]; let sparse_vector_2 = vec![vec![0.9, 0.8, 0.7, 0.4]]; let sparse_vector_3 = vec![vec![0.8, 0.7, 0.6, 0.3]];
let documents = vec![
Document::new()
.with_id("doc1")
.with_field("text", "腾讯云向量数据库是一种高性能的向量存储和检索服务")
.with_field("sparse_vector", sparse_vector_1),
Document::new()
.with_id("doc2")
.with_field("text", "机器学习算法在人工智能领域发挥重要作用")
.with_field("sparse_vector", sparse_vector_2),
Document::new()
.with_id("doc3")
.with_field("text", "云计算服务器提供可靠的数据存储解决方案")
.with_field("sparse_vector", sparse_vector_3),
];
match collection.upsert(documents, None, true).await {
Ok(_) => println!("✅ {} documents inserted successfully", 3),
Err(e) => {
println!("❌ Failed to insert documents: {}", e);
println!("💡 Continuing with search demo anyway...");
}
}
println!("\n⏳ Waiting for index to build...");
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
let bm25_encoded_query = vec![vec![
vec![0.8, 0.6, 0.4, 0.3], ]];
println!("\n🔍 Executing hybrid search...");
println!("📝 Query: '什么是腾讯云向量数据库'");
println!("🔑 BM25 Keywords: '向量数据库'");
let ann_search = vec![AnnSearch::new()
.with_field_name("text")
.with_text("什么是腾讯云向量数据库")
.with_limit(2)];
let keyword_search = KeywordSearch::new()
.with_field_name("sparse_vector")
.with_terminate_after(4000)
.with_cutoff_frequency(0.1)
.with_data(bm25_encoded_query)
.with_limit(2);
let rerank = Rerank::weighted(
vec!["vector".to_string(), "sparse_vector".to_string()],
vec![0.9, 0.1], );
match collection
.hybrid_search(
ann_search,
Some(keyword_search),
Some(rerank),
3, None, None, )
.await
{
Ok(documents) => {
println!("✅ Hybrid search successful!");
println!("\n📋 Found {} documents", documents.len());
for (i, doc) in documents.iter().enumerate() {
println!("\n 📄 Document {}:", i);
println!(" ID: {:?}", doc.get_id());
if let Some(text) = doc.get("text") {
println!(" Text: {:?}", text);
}
if let Some(score) = doc.get("_score") {
println!(" Score: {:?}", score);
}
println!(" Full Document: {:?}", doc);
}
println!("\n🎯 Search completed successfully!");
}
Err(e) => {
println!("❌ Hybrid search failed: {}", e);
println!("\n💡 This demonstrates the API structure even if the search fails.");
println!(" In a real scenario with proper BM25 encoding, this would work.");
}
}
println!("\n🧹 Cleaning up...");
match db.drop_collection(collection_name).await {
Ok(_) => println!("✅ Collection '{}' dropped", collection_name),
Err(e) => println!("⚠️ Failed to drop collection: {}", e),
}
match client.drop_database(db_name).await {
Ok(_) => println!("✅ Database '{}' dropped", db_name),
Err(e) => println!("⚠️ Failed to drop database: {}", e),
}
println!("\n🎉 Python hybrid search equivalent demo completed!");
println!("\n📚 Python to Rust Mapping Summary:");
println!("+-------------------------------------------------------------+");
println!("| Python Code | Rust Equivalent |");
println!("+-------------------------------------------------------------+");
println!("| from tcvectordb.model.document| use tcvectordb::document:: |");
println!("| import AnnSearch, KeywordSearch| {{AnnSearch, KeywordSearch}} |");
println!("| | |");
println!("| bm25 = BM25Encoder.default() | // Use external BM25 lib |");
println!("| data = bm25.encode_queries() | let data = vec![vec![...]]; |");
println!("| | |");
println!("| AnnSearch(field_name=\"text\", | AnnSearch::new() |");
println!("| data='query') | .with_field_name(\"text\") |");
println!("| | .with_text(\"query\") |");
println!("| | |");
println!("| KeywordSearch(field_name=..., | KeywordSearch::new() |");
println!("| data=bm25_data) | .with_field_name(...) |");
println!("| | .with_data(bm25_data) |");
println!("| | |");
println!("| WeightedRerank(field_list=[], | Rerank::weighted( |");
println!("| weight=[]) | vec![...], vec![...]) |");
println!("| | |");
println!("| client.hybrid_search( | collection.hybrid_search( |");
println!("| database_name='db-test', | ann_search, |");
println!("| collection_name='book-emb', | Some(keyword_search), |");
println!("| ann=ann, match=match, | Some(rerank), |");
println!("| rerank=rerank, limit=3) | 3, None, None).await |");
println!("| | |");
println!("| for i, docs in enumerate(): | for (i, doc) in |");
println!("| for doc in docs: | documents.iter().enum(): |");
println!("| print(doc) | println!(\"{{:?}}\", doc) |");
println!("+-------------------------------------------------------------+");
Ok(())
}