use anyhow::Result;
use rag_module::*;
#[tokio::main]
async fn main() -> Result<()> {
println!("🔍 DEBUGGING VECTOR STORE INDEXING VS SEARCH");
println!("============================================\n");
let rag = create_rag_module("./test_data").await?;
rag.initialize().await?;
let user_id = "test_user_123";
println!("📋 STEP 1: Check Vector Store Raw Contents");
println!("==========================================");
let vector_index_file = std::path::Path::new("test_data/qdrant-data/test_user_123/aws_estate-vector-index.json");
if vector_index_file.exists() {
let content = std::fs::read_to_string(vector_index_file)?;
let index_data: serde_json::Value = serde_json::from_str(&content)?;
if let Some(vectors) = index_data.get("vectors").and_then(|v| v.as_array()) {
println!(" 📊 Vector index contains {} vectors", vectors.len());
for (i, vector) in vectors.iter().take(5).enumerate() {
let doc_id = vector.get("documentId").and_then(|v| v.as_str()).unwrap_or("N/A");
let vector_id = vector.get("vectorId").and_then(|v| v.as_str()).unwrap_or("N/A");
let position = vector.get("position").and_then(|v| v.as_u64()).unwrap_or(0);
println!(" {}. Doc: {} → Vector: {} (pos: {})", i + 1, doc_id, vector_id, position);
}
}
}
println!("\n📋 STEP 2: Check Document Collection");
println!("====================================");
let collection_docs = rag.get_collection_documents("aws_estate", user_id).await?;
println!(" 📊 Collection documents: {}", collection_docs.len());
for (i, doc) in collection_docs.iter().take(5).enumerate() {
println!(" {}. Document ID: {}", i + 1, doc.id);
}
println!("\n📋 STEP 3: Perform Vector Search and Trace IDs");
println!("==============================================");
let search_options = rag_module::types::SearchOptions {
limit: Some(10),
score_threshold: Some(0.001),
filter: None,
collection_name: None,
privacy_level: None,
with_payload: Some(true),
};
let search_results = rag.vector_store.search(
"aws_estate",
vec![0.1; 1024], search_options,
).await?;
println!(" 📊 Vector search returned {} results", search_results.len());
for (i, result) in search_results.iter().take(5).enumerate() {
println!(" {}. Vector search result ID: {} (score: {:.4})",
i + 1, result.id, result.score);
let exists_in_collection = collection_docs.iter().any(|doc| doc.id == result.id);
println!(" Exists in collection: {}", exists_in_collection);
}
println!("\n📋 STEP 4: Cross-Reference Analysis");
println!("===================================");
let collection_ids: std::collections::HashSet<_> = collection_docs.iter().map(|doc| &doc.id).collect();
let search_result_ids: std::collections::HashSet<_> = search_results.iter().map(|result| &result.id).collect();
let intersection: Vec<_> = collection_ids.intersection(&search_result_ids).collect();
let collection_only: Vec<_> = collection_ids.difference(&search_result_ids).collect();
let search_only: Vec<_> = search_result_ids.difference(&collection_ids).collect();
println!(" 📊 ID Analysis:");
println!(" Documents in both collection & search: {}", intersection.len());
println!(" Documents only in collection: {}", collection_only.len());
println!(" Documents only in search results: {}", search_only.len());
if !collection_only.is_empty() {
println!("\n 🔍 Collection-only documents (first 3):");
for (i, id) in collection_only.iter().take(3).enumerate() {
println!(" {}. {}", i + 1, id);
}
}
if !search_only.is_empty() {
println!("\n 🔍 Search-only documents (first 3):");
for (i, id) in search_only.iter().take(3).enumerate() {
println!(" {}. {}", i + 1, id);
}
}
println!("\n📋 STEP 5: Debug Estate Search Function");
println!("======================================");
let estate_search_options = rag_module::services::search_service::EstateSearchOptions {
resource_types: None,
account_ids: None,
regions: None,
services: None,
states: None,
environment: None,
application: None,
synced_after: None,
limit: Some(5),
score_threshold: Some(0.001),
include_metadata: true,
use_anonymous_ids: false,
};
let estate_results = rag.search_service.search_estate_resources(
"aws resources",
estate_search_options,
None,
user_id,
).await?;
println!(" 📊 Estate search returned {} results", estate_results.len());
for (i, result) in estate_results.iter().take(3).enumerate() {
let result_id = result.get("id").and_then(|v| v.as_str()).unwrap_or("N/A");
let service = result.get("service").and_then(|v| v.as_str()).unwrap_or("null");
let resource_type = result.get("resource_type").and_then(|v| v.as_str()).unwrap_or("null");
println!(" {}. Estate result ID: {} (service: {}, type: {})",
i + 1, result_id, service, resource_type);
let exists_in_collection = collection_docs.iter().any(|doc| doc.id == result_id);
println!(" Exists in collection: {}", exists_in_collection);
}
println!("\n🏁 DIAGNOSIS:");
println!("=============");
if intersection.is_empty() {
println!("❌ CRITICAL: No overlap between indexed documents and search results!");
println!(" This indicates the vector search is accessing a different dataset");
println!(" than what was indexed. Possible causes:");
println!(" - User context isolation bug");
println!(" - Collection name mismatch");
println!(" - Vector store corruption");
} else {
println!("✅ Some documents found in both collection and search");
}
Ok(())
}