reasonkit-core 0.1.8

The Reasoning Engine — Auditable Reasoning for Production AI | Rust-Native | Turn Prompts into Protocols
//! Hypothetical Document Embeddings (HyDE) query expansion
//!
//! Implementation of the HyDE technique from "Precise Zero-Shot Dense Retrieval without Relevance Labels"
//! (arXiv:2212.10496).
//!
//! Process:
//! 1. Generate hypothetical answer document using LLM
//! 2. Embed the hypothetical document
//! 3. Use embedding for similarity search (document-to-document)
//!
//! This module provides a query expander that can be integrated into the RAG engine.

use crate::thinktool::{LlmClient, LlmRequest, UnifiedLlmClient};
use crate::{Error, Result};

/// HyDE query expander
pub struct HyDEExpander {
    /// LLM client for generating hypothetical answers
    llm_client: UnifiedLlmClient,
}

impl HyDEExpander {
    /// Create a new HyDE expander with the given LLM client
    pub fn new(llm_client: UnifiedLlmClient) -> Self {
        Self { llm_client }
    }

    /// Expand a query using HyDE technique
    ///
    /// Generates a hypothetical answer document that would answer the query,
    /// then returns that document as the expanded query.
    pub async fn expand_query(&self, query: &str) -> Result<String> {
        let prompt = format!(
            r#"Given the question below, write a detailed paragraph that would
answer this question. This paragraph will be used for document retrieval,
so include specific technical terms and concepts that would appear in
authoritative sources.

Question: {query}

Hypothetical Answer Document:"#
        );

        let request = LlmRequest::new(&prompt)
            .with_max_tokens(500)
            .with_temperature(0.7);

        let response = self
            .llm_client
            .complete(request)
            .await
            .map_err(|e| Error::network(format!("LLM generation failed: {}", e)))?;

        Ok(response.content)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::thinktool::MockLlmClient;

    #[tokio::test]
    async fn test_hyde_expansion() {
        // Create a mock LLM client that returns a known answer
        let mock_client = MockLlmClient::new();
        mock_client.expect_complete().returning(|_| {
            Ok(crate::thinktool::LlmResponse {
                content: "Hypothetical answer about machine learning.".to_string(),
                usage: crate::thinktool::LlmUsage {
                    prompt_tokens: 10,
                    completion_tokens: 20,
                    total_tokens: 30,
                },
            })
        });

        let expander = HyDEExpander::new(UnifiedLlmClient::Mock(mock_client));
        let expanded = expander
            .expand_query("What is machine learning?")
            .await
            .unwrap();

        assert!(expanded.contains("Hypothetical answer"));
    }
}