use anyhow::Result;
use rag_module::*;
#[tokio::main]
async fn main() -> Result<()> {
println!("🔍 DEBUGGING UUID MISMATCH STEP BY STEP");
println!("======================================\n");
let rag = create_rag_module("./test_data").await?;
rag.initialize().await?;
let user_id = "test_user_123";
println!("📋 STEP 1: Check Raw Documents in Collection");
println!("============================================");
if let Some(embedded_store) = rag.vector_store.as_any().downcast_ref::<crate::db::EmbeddedQdrantVectorStore>() {
embedded_store.set_user_context(user_id).await;
}
let documents_file = format!("test_data/qdrant-data/{}/aws_estate-documents.json", user_id);
let docs_content = match std::fs::read_to_string(&documents_file) {
Ok(content) => {
let json_value: serde_json::Value = serde_json::from_str(&content)?;
if let Some(docs_array) = json_value.get("documents").and_then(|d| d.as_array()) {
docs_array.len()
} else {
0
}
},
Err(_) => {
println!(" ❌ Could not read documents file: {}", documents_file);
0
}
};
println!(" 📊 Total documents in file system: {}", docs_content);
if docs_content > 0 {
let content = std::fs::read_to_string(&documents_file)?;
let json_value: serde_json::Value = serde_json::from_str(&content)?;
if let Some(docs_array) = json_value.get("documents").and_then(|d| d.as_array()) {
println!(" 🔍 First 3 document IDs from file system:");
for (i, doc) in docs_array.iter().take(3).enumerate() {
let doc_id = doc.get("id").and_then(|v| v.as_str()).unwrap_or("N/A");
println!(" {}. {}", i + 1, doc_id);
}
}
}
println!("\n📋 STEP 2: Test Vector Store Search Directly");
println!("===========================================");
if let Some(embedded_store) = rag.vector_store.as_any().downcast_ref::<crate::db::EmbeddedQdrantVectorStore>() {
embedded_store.set_user_context(user_id).await;
}
let search_options = rag_module::types::SearchOptions {
limit: Some(5),
score_threshold: Some(0.001),
filter: None,
collection_name: None,
privacy_level: None,
with_payload: Some(true),
};
println!(" 🔍 Performing direct vector search...");
let direct_results = rag.vector_store.search(
"aws_estate",
vec![0.1; 1024], search_options,
).await?;
println!(" 📊 Direct vector search returned {} results", direct_results.len());
println!(" 🔍 Direct search result IDs:");
for (i, result) in direct_results.iter().take(3).enumerate() {
println!(" {}. {} (score: {:.4})", i + 1, result.id, result.score);
let exists = docs_content > 0; println!(" Exists in file system: {}", exists);
}
println!("\n📋 STEP 3: Test Estate Search Function");
println!("=====================================");
let search_options = rag_module::services::search_service::EstateSearchOptions {
resource_types: None,
account_ids: None,
regions: None,
services: None,
states: None,
environment: None,
application: None,
synced_after: None,
limit: Some(3),
score_threshold: Some(0.001),
include_metadata: true,
use_anonymous_ids: false,
};
println!(" 🔍 Performing estate search...");
let estate_results = rag.search_service.search_estate_resources(
"aws resource",
search_options,
None,
user_id,
).await?;
println!(" 📊 Estate search returned {} results", estate_results.len());
println!(" 🔍 Estate search result IDs:");
for (i, result) in estate_results.iter().take(3).enumerate() {
let result_id = result.get("id").and_then(|v| v.as_str()).unwrap_or("N/A");
println!(" {}. {}", i + 1, result_id);
let exists = docs_content > 0; println!(" Exists in file system: {}", exists);
let matches_direct = direct_results.iter().any(|dr| dr.id == result_id);
println!(" Matches direct search: {}", matches_direct);
}
println!("\n📋 STEP 4: Cross-Reference Analysis");
println!("===================================");
let collection_ids: std::collections::HashSet<&str> = std::collections::HashSet::new();
let direct_ids: std::collections::HashSet<_> = direct_results.iter().map(|r| r.id.as_str()).collect();
let estate_ids: std::collections::HashSet<_> = estate_results.iter()
.filter_map(|r| r.get("id").and_then(|v| v.as_str()))
.collect();
println!(" 📊 ID Set Sizes:");
println!(" Collection IDs: {}", collection_ids.len());
println!(" Direct search IDs: {}", direct_ids.len());
println!(" Estate search IDs: {}", estate_ids.len());
let collection_direct_overlap: Vec<_> = collection_ids.intersection(&direct_ids).collect();
let direct_estate_overlap: Vec<_> = direct_ids.intersection(&estate_ids).collect();
let collection_estate_overlap: Vec<_> = collection_ids.intersection(&estate_ids).collect();
println!("\n 📊 ID Overlaps:");
println!(" Collection ∩ Direct: {} IDs", collection_direct_overlap.len());
println!(" Direct ∩ Estate: {} IDs", direct_estate_overlap.len());
println!(" Collection ∩ Estate: {} IDs", collection_estate_overlap.len());
if collection_direct_overlap.is_empty() {
println!(" ❌ CRITICAL: No overlap between collection and direct search!");
println!(" This means direct vector search returns different IDs than stored docs");
}
if direct_estate_overlap.is_empty() {
println!(" ❌ CRITICAL: No overlap between direct and estate search!");
println!(" This means estate search transforms/generates new IDs");
}
println!("\n📋 STEP 5: Examine Vector Index Mapping");
println!("=======================================");
let vector_index_content = std::fs::read_to_string("test_data/qdrant-data/test_user_123/aws_estate-vector-index.json")?;
let vector_index: serde_json::Value = serde_json::from_str(&vector_index_content)?;
if let Some(vectors) = vector_index.get("vectors").and_then(|v| v.as_array()) {
println!(" 📊 Vector index contains {} mappings", vectors.len());
println!(" 🔍 First 3 vector mappings:");
for (i, vector) in vectors.iter().take(3).enumerate() {
let doc_id = vector.get("documentId").and_then(|v| v.as_str()).unwrap_or("N/A");
let vector_id = vector.get("vectorId").and_then(|v| v.as_str()).unwrap_or("N/A");
println!(" {}. Doc: {} → Vector: {}", i + 1, doc_id, vector_id);
let vector_in_direct = direct_results.iter().any(|r| r.id == vector_id);
let vector_in_estate = estate_ids.contains(&vector_id);
let doc_in_direct = direct_results.iter().any(|r| r.id == doc_id);
let doc_in_estate = estate_ids.contains(&doc_id);
println!(" Vector ID in direct results: {}", vector_in_direct);
println!(" Vector ID in estate results: {}", vector_in_estate);
println!(" Doc ID in direct results: {}", doc_in_direct);
println!(" Doc ID in estate results: {}", doc_in_estate);
}
}
println!("\n🏁 DIAGNOSIS:");
println!("=============");
if collection_direct_overlap.is_empty() {
println!("❌ BUG: Vector search is returning vector IDs instead of document IDs!");
println!(" The issue is in the vector store search implementation.");
println!(" It should return document IDs but is returning vector UUIDs.");
} else if direct_estate_overlap.is_empty() {
println!("❌ BUG: Estate search is transforming IDs incorrectly!");
println!(" The vector search works but estate processing breaks the IDs.");
} else {
println!("✅ ID mapping appears correct - issue might be elsewhere");
}
Ok(())
}