use super::*;
pub struct EnhancedVectorIndex {
embedding_manager: EmbeddingManager,
index: AdvancedVectorIndex,
document_mapping: HashMap<String, RagDocument>,
triple_index: HashMap<String, Triple>,
}
impl EnhancedVectorIndex {
pub async fn new() -> Result<Self> {
let embedding_manager = super::embedding::EmbeddingManager::new();
let index_config = IndexConfig {
index_type: IndexType::Hnsw,
distance_metric: DistanceMetric::Cosine,
..Default::default()
};
let index = AdvancedVectorIndex::new(index_config);
Ok(Self {
embedding_manager,
index,
document_mapping: HashMap::new(),
triple_index: HashMap::new(),
})
}
pub async fn add_document(
&mut self,
id: String,
content: String,
triple: Option<Triple>,
metadata: HashMap<String, String>,
) -> Result<()> {
let vector = self.embedding_manager.get_embedding(&content, None).await?;
self.index.insert(id.clone(), vector.clone())?;
let document = RagDocument {
id: id.clone(),
content,
triple: triple.clone(),
metadata,
embedding: Some(vector.as_f32()),
};
self.document_mapping.insert(id.clone(), document);
if let Some(triple) = triple {
self.triple_index.insert(id, triple);
}
Ok(())
}
pub async fn search(&mut self, query: &str, limit: usize) -> Result<Vec<SearchDocument>> {
let query_vector = self.embedding_manager.get_embedding(query, None).await?;
let search_results: Vec<VecSearchResult> =
self.index.search(&query_vector.as_f32(), limit)?;
let mut documents = Vec::new();
for result in search_results {
if let Some(document) = self.document_mapping.get(&result.uri) {
let search_doc = SearchDocument {
document: document.triple.clone().unwrap_or_else(|| {
Triple::new(
Subject::NamedNode(NamedNode::new_unchecked(&result.uri)),
NamedNode::new_unchecked("http://www.w3.org/2000/01/rdf-schema#label"),
Object::Literal(document.content.clone().into()),
)
}),
score: result.distance,
};
documents.push(search_doc);
}
}
Ok(documents)
}
pub fn len(&self) -> usize {
self.document_mapping.len()
}
pub fn is_empty(&self) -> bool {
self.document_mapping.is_empty()
}
pub fn get_triples(&self) -> Vec<Triple> {
self.triple_index.values().cloned().collect()
}
}
pub struct SearchDocument {
pub document: Triple,
pub score: f32,
}
#[derive(Debug, Clone)]
pub struct RagDocument {
pub id: String,
pub content: String,
pub triple: Option<Triple>,
pub metadata: HashMap<String, String>,
pub embedding: Option<Vec<f32>>,
}
pub struct RagIndex {
enhanced_index: EnhancedVectorIndex,
}
impl RagIndex {
pub async fn new() -> Result<Self> {
Ok(Self {
enhanced_index: EnhancedVectorIndex::new().await?,
})
}
pub async fn add_document(
&mut self,
id: String,
content: String,
triple: Option<Triple>,
metadata: HashMap<String, String>,
) -> Result<()> {
self.enhanced_index
.add_document(id, content, triple, metadata)
.await
}
pub async fn search(&mut self, query: &str, limit: usize) -> Result<Vec<SearchDocument>> {
self.enhanced_index.search(query, limit).await
}
pub fn len(&self) -> usize {
self.enhanced_index.len()
}
pub fn is_empty(&self) -> bool {
self.enhanced_index.is_empty()
}
}