graphrag-core 0.2.0

//! PageRank-based retrieval system for GraphRAG
//!
//! This module is only available when the "pagerank" feature is enabled.
#![cfg(feature = "pagerank")]

use crate::{
    core::traits::Retriever,
    core::{ChunkId, EntityId, GraphRAGError, KnowledgeGraph, Result},
    graph::pagerank::{MultiModalScores, PageRankConfig, PersonalizedPageRank, ScoreWeights},
    vector::VectorIndex,
};
use lru::LruCache;
use parking_lot::RwLock;
use rayon::prelude::*;
use std::collections::HashMap;
use std::num::NonZeroUsize;
use std::sync::Arc;

/// High-performance PageRank-based retrieval system implementing fast-GraphRAG approach
pub struct PageRankRetrievalSystem {
    vector_index: Option<VectorIndex>,
    score_weights: ScoreWeights,
    max_results: usize,
    min_score_threshold: f64,
    pagerank_config: PageRankConfig,
    /// Query cache for fast repeated lookups
    query_cache: Arc<RwLock<LruCache<String, Vec<ScoredResult>>>>,
    /// Entity rank cache for personalized rankings
    entity_rank_cache: Arc<RwLock<LruCache<String, HashMap<EntityId, f64>>>>,
    /// Enable incremental PageRank updates
    incremental_mode: bool,
    /// Pre-computed global PageRank scores
    global_pagerank: Option<HashMap<EntityId, f64>>,
    /// Optional in-memory knowledge graph used by trait-based search_with_context
    graph: Option<Arc<KnowledgeGraph>>,
}

/// A result with multiple scoring signals
#[derive(Debug, Clone)]
pub struct ScoredResult {
    /// The entity identifier
    pub entity_id: EntityId,
    /// The chunk identifier
    pub chunk_id: ChunkId,
    /// The text content
    pub content: String,
    /// Overall score
    pub score: f64,
    /// Vector similarity score
    pub vector_score: f64,
    /// PageRank importance score
    pub pagerank_score: f64,
    /// Combined weighted score
    pub combined_score: f64,
}

impl PageRankRetrievalSystem {
    /// Create a new PageRank-based retrieval system
    pub fn new(max_results: usize) -> Self {
        let cache_size = NonZeroUsize::new(1000).expect("non-zero literal");

        Self {
            vector_index: None,
            score_weights: ScoreWeights::default(),
            max_results,
            min_score_threshold: 0.1,
            pagerank_config: PageRankConfig::default(),
            query_cache: Arc::new(RwLock::new(LruCache::new(cache_size))),
            entity_rank_cache: Arc::new(RwLock::new(LruCache::new(cache_size))),
            incremental_mode: true,
            global_pagerank: None,
            graph: None,
        }
    }

    /// Create new system with custom PageRank configuration
    pub fn with_pagerank_config(mut self, config: PageRankConfig) -> Self {
        self.pagerank_config = config;
        self
    }

    /// Enable or disable incremental mode
    pub fn with_incremental_mode(mut self, enabled: bool) -> Self {
        self.incremental_mode = enabled;
        self
    }

    /// Configure scoring weights for combining signals
    pub fn with_score_weights(mut self, weights: ScoreWeights) -> Self {
        self.score_weights = weights;
        self
    }

    /// Set minimum score threshold for filtering results
    pub fn with_min_threshold(mut self, threshold: f64) -> Self {
        self.min_score_threshold = threshold;
        self
    }

    /// Initialize vector index from the knowledge graph
    pub fn initialize_vector_index(&mut self, graph: &KnowledgeGraph) -> Result<()> {
        // Extract all text content for vector indexing
        let mut content_items = Vec::new();

        // Add entity names and descriptions as content
        for entity in graph.entities() {
            let content = format!("{} {}", entity.name, entity.entity_type);
            content_items.push((entity.id.to_string(), content));
        }

        // Add chunk content
        for chunk in graph.chunks() {
            content_items.push((chunk.id.to_string(), chunk.content.clone()));
        }

        // For now, we'll create a simple mock vector index
        // In a real implementation, this would use actual embeddings
        self.vector_index = Some(VectorIndex::new());

        println!(
            "🔍 Vector index initialized with {} items",
            content_items.len()
        );

        Ok(())
    }

    /// Provide a knowledge graph handle for trait-based search methods
    pub fn set_graph(&mut self, graph: Arc<KnowledgeGraph>) {
        self.graph = Some(graph);
    }

    /// Execute comprehensive search using both vector similarity and PageRank
    pub fn search_with_pagerank(
        &self,
        query: &str,
        graph: &KnowledgeGraph,
        max_results: Option<usize>,
    ) -> Result<Vec<ScoredResult>> {
        let max_results = max_results.unwrap_or(self.max_results);

        println!("🔍 Starting PageRank-enhanced search for: '{query}'");

        // Step 1: Vector similarity search
        let vector_scores = self.vector_similarity_search(query, graph)?;
        println!("📊 Vector search found {} candidates", vector_scores.len());

        if vector_scores.is_empty() {
            return Ok(Vec::new());
        }

        // Step 2: Build PageRank calculator and compute scores
        let pagerank_calculator = graph.build_pagerank_calculator()?;
        let pagerank_scores =
            self.compute_personalized_pagerank(&vector_scores, &pagerank_calculator)?;
        println!("📈 PageRank computation completed");

        // Step 3: Combine scores using multi-modal approach
        let mut multi_scores = MultiModalScores::new();
        multi_scores.vector_scores = vector_scores;
        multi_scores.pagerank_scores = pagerank_scores;

        let combined_scores = multi_scores.combine_scores(&self.score_weights);

        // Step 4: Create scored results with detailed information
        let mut scored_results = Vec::new();
        for (entity_id, combined_score) in combined_scores {
            if combined_score < self.min_score_threshold {
                continue;
            }

            // Find related chunks for this entity
            for chunk in graph.chunks() {
                if chunk.entities.contains(&entity_id) {
                    let vector_score = multi_scores.vector_scores.get(&entity_id).unwrap_or(&0.0);
                    let pagerank_score =
                        multi_scores.pagerank_scores.get(&entity_id).unwrap_or(&0.0);

                    let result = ScoredResult {
                        entity_id: entity_id.clone(),
                        chunk_id: chunk.id.clone(),
                        content: chunk.content.clone(),
                        score: combined_score,
                        vector_score: *vector_score,
                        pagerank_score: *pagerank_score,
                        combined_score,
                    };

                    scored_results.push(result);
                }
            }

            // If no chunks found, create result with entity information
            if scored_results.is_empty() || !scored_results.iter().any(|r| r.entity_id == entity_id)
            {
                if let Some(entity) = graph.get_entity(&entity_id) {
                    let vector_score = multi_scores.vector_scores.get(&entity_id).unwrap_or(&0.0);
                    let pagerank_score =
                        multi_scores.pagerank_scores.get(&entity_id).unwrap_or(&0.0);

                    let result = ScoredResult {
                        entity_id: entity_id.clone(),
                        chunk_id: ChunkId::new(format!("entity_{entity_id}")),
                        content: format!("{}: {}", entity.name, entity.entity_type),
                        score: combined_score,
                        vector_score: *vector_score,
                        pagerank_score: *pagerank_score,
                        combined_score,
                    };

                    scored_results.push(result);
                }
            }
        }

        // Step 5: Sort by combined score and limit results
        scored_results.sort_by(|a, b| {
            b.score
                .partial_cmp(&a.score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        scored_results.truncate(max_results);

        println!(
            "✅ Search completed: {} results returned",
            scored_results.len()
        );

        Ok(scored_results)
    }

    fn vector_similarity_search(
        &self,
        query: &str,
        graph: &KnowledgeGraph,
    ) -> Result<HashMap<EntityId, f64>> {
        let mut scores = HashMap::new();

        // Simple text-based similarity for demonstration
        // In a real implementation, this would use actual vector embeddings
        let query_lower = query.to_lowercase();
        let query_words: Vec<&str> = query_lower.split_whitespace().collect();

        for entity in graph.entities() {
            let entity_text = format!(
                "{} {}",
                entity.name.to_lowercase(),
                entity.entity_type.to_lowercase()
            );
            let entity_words: Vec<&str> = entity_text.split_whitespace().collect();

            // Calculate Jaccard similarity as a proxy for vector similarity
            let intersection_count = query_words
                .iter()
                .filter(|word| entity_words.contains(word))
                .count();

            if intersection_count > 0 {
                let union_count = query_words.len() + entity_words.len() - intersection_count;
                let similarity = intersection_count as f64 / union_count as f64;

                if similarity > 0.1 {
                    scores.insert(entity.id.clone(), similarity);
                }
            }

            // Also check entity mentions in chunks
            for mention in &entity.mentions {
                if let Some(chunk) = graph.get_chunk(&mention.chunk_id) {
                    let chunk_lower = chunk.content.to_lowercase();
                    if chunk_lower.contains(&query_lower) {
                        let bonus_score = 0.3;
                        let current_score = scores.get(&entity.id).unwrap_or(&0.0);
                        scores.insert(entity.id.clone(), current_score + bonus_score);
                    }
                }
            }
        }

        Ok(scores)
    }

    fn compute_personalized_pagerank(
        &self,
        vector_scores: &HashMap<EntityId, f64>,
        pagerank_calculator: &PersonalizedPageRank,
    ) -> Result<HashMap<EntityId, f64>> {
        if vector_scores.is_empty() {
            return Ok(HashMap::new());
        }

        // Use vector scores as reset probabilities for personalized PageRank
        let reset_probabilities = self.normalize_reset_probabilities(vector_scores);

        // Calculate PageRank scores
        let pagerank_scores = pagerank_calculator.calculate_scores(&reset_probabilities)?;

        Ok(pagerank_scores)
    }

    fn normalize_reset_probabilities(
        &self,
        vector_scores: &HashMap<EntityId, f64>,
    ) -> HashMap<EntityId, f64> {
        let total_score: f64 = vector_scores.values().sum();

        if total_score > 0.0 {
            vector_scores
                .iter()
                .map(|(id, score)| (id.clone(), score / total_score))
                .collect()
        } else {
            HashMap::new()
        }
    }

    /// Get search statistics
    pub fn get_search_statistics(&self) -> SearchStatistics {
        SearchStatistics {
            has_vector_index: self.vector_index.is_some(),
            score_weights: self.score_weights.clone(),
            max_results: self.max_results,
            min_score_threshold: self.min_score_threshold,
        }
    }

    /// Update score weights for different search scenarios
    pub fn update_score_weights(&mut self, weights: ScoreWeights) {
        self.score_weights = weights;
    }

    /// Perform a quick entity lookup by name
    pub fn quick_entity_search(
        &self,
        entity_name: &str,
        graph: &KnowledgeGraph,
    ) -> Vec<ScoredResult> {
        let name_lower = entity_name.to_lowercase();
        let mut results = Vec::new();

        for entity in graph.entities() {
            if entity.name.to_lowercase().contains(&name_lower) {
                let score = if entity.name.to_lowercase() == name_lower {
                    1.0 // Exact match
                } else {
                    0.8 // Partial match
                };

                let result = ScoredResult {
                    entity_id: entity.id.clone(),
                    chunk_id: ChunkId::new(format!("entity_{}", entity.id)),
                    content: format!("{}: {}", entity.name, entity.entity_type),
                    score,
                    vector_score: score,
                    pagerank_score: 0.0,
                    combined_score: score,
                };

                results.push(result);
            }
        }

        results.sort_by(|a, b| {
            b.score
                .partial_cmp(&a.score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        results.truncate(self.max_results);

        results
    }

    /// Pre-compute global PageRank scores for faster query processing
    pub fn precompute_global_pagerank(&mut self, graph: &KnowledgeGraph) -> Result<()> {
        println!("🚀 Pre-computing global PageRank scores...");

        let pagerank_calculator = graph.build_pagerank_calculator()?;
        let empty_reset = HashMap::new();
        let global_scores = pagerank_calculator.calculate_scores(&empty_reset)?;

        self.global_pagerank = Some(global_scores);

        println!(
            "✅ Global PageRank scores computed for {} entities",
            self.global_pagerank.as_ref().expect("checked above").len()
        );

        Ok(())
    }

    /// High-performance batch search for multiple queries
    pub fn batch_search(
        &self,
        queries: &[&str],
        graph: &KnowledgeGraph,
        max_results_per_query: Option<usize>,
    ) -> Result<Vec<Vec<ScoredResult>>> {
        if queries.is_empty() {
            return Ok(Vec::new());
        }

        println!("🔍 Starting batch search for {} queries", queries.len());

        // Process queries in parallel for maximum performance
        let results: Result<Vec<_>> = queries
            .par_iter()
            .map(|&query| self.search_with_pagerank(query, graph, max_results_per_query))
            .collect();

        let batch_results = results?;

        println!("✅ Batch search completed");

        Ok(batch_results)
    }

    /// Execute search using dynamic edge weighting based on query context (Phase 2.2)
    ///
    /// This method applies dynamic weights to relationships based on:
    /// - Semantic similarity between relationship and query
    /// - Temporal relevance of relationships
    /// - Conceptual matching with query concepts
    /// - Causal strength for causal relationships
    ///
    /// # Arguments
    ///
    /// * `query` - The search query
    /// * `graph` - Reference to the knowledge graph
    /// * `query_embedding` - Optional embedding vector for the query
    /// * `max_results` - Optional maximum number of results
    ///
    /// # Returns
    ///
    /// Scored results with dynamically weighted relationships
    pub fn search_with_dynamic_weights(
        &self,
        query: &str,
        graph: &KnowledgeGraph,
        query_embedding: Option<&[f32]>,
        max_results: Option<usize>,
    ) -> Result<Vec<ScoredResult>> {
        let max_results = max_results.unwrap_or(self.max_results);

        #[cfg(feature = "tracing")]
        tracing::debug!(
            query = %query,
            has_embedding = query_embedding.is_some(),
            "Starting dynamic weight search"
        );

        // Extract concepts from query
        let query_concepts = extract_query_concepts(query);

        #[cfg(feature = "tracing")]
        tracing::debug!(
            concepts = ?query_concepts,
            "Extracted query concepts"
        );

        // Step 1: Perform initial vector similarity search
        let vector_scores = self.vector_similarity_search(query, graph)?;

        if vector_scores.is_empty() {
            return Ok(Vec::new());
        }

        // Step 2: Calculate dynamic weights for all relationships
        // This creates a modified view of the graph with context-aware weights
        let mut weighted_edges: HashMap<(EntityId, EntityId), f32> = HashMap::new();

        for rel in graph.get_all_relationships() {
            let dynamic_weight = graph.dynamic_weight(rel, query_embedding, &query_concepts);

            weighted_edges.insert((rel.source.clone(), rel.target.clone()), dynamic_weight);

            #[cfg(feature = "tracing")]
            if dynamic_weight > rel.confidence * 1.5 {
                tracing::trace!(
                    source = %rel.source.0,
                    target = %rel.target.0,
                    original_weight = rel.confidence,
                    dynamic_weight = dynamic_weight,
                    "Applied significant boost"
                );
            }
        }

        // Step 3: Build PageRank calculator (using base graph structure)
        let pagerank_calculator = graph.build_pagerank_calculator()?;

        // Step 4: Compute personalized PageRank with vector scores as reset probabilities
        let pagerank_scores =
            self.compute_personalized_pagerank(&vector_scores, &pagerank_calculator)?;

        // Step 5: Apply additional boost based on dynamic weights
        // Entities connected via high-weight edges get extra score
        let mut boosted_scores = pagerank_scores.clone();
        for ((source_id, target_id), weight) in &weighted_edges {
            if let Some(target_score) = boosted_scores.get_mut(target_id) {
                if let Some(&source_pr) = pagerank_scores.get(source_id) {
                    // Propagate score along high-weight edges
                    let boost = source_pr * (*weight as f64 - 1.0).max(0.0) * 0.1;
                    *target_score += boost;
                }
            }
        }

        // Step 6: Combine scores
        let mut multi_scores = MultiModalScores::new();
        multi_scores.vector_scores = vector_scores;
        multi_scores.pagerank_scores = boosted_scores;

        let combined_scores = multi_scores.combine_scores(&self.score_weights);

        // Step 7: Create scored results
        let mut scored_results = Vec::new();
        for (entity_id, combined_score) in combined_scores {
            if combined_score < self.min_score_threshold {
                continue;
            }

            // Find related chunks for this entity
            for chunk in graph.chunks() {
                if chunk.entities.contains(&entity_id) {
                    let vector_score = multi_scores.vector_scores.get(&entity_id).unwrap_or(&0.0);
                    let pagerank_score =
                        multi_scores.pagerank_scores.get(&entity_id).unwrap_or(&0.0);

                    let result = ScoredResult {
                        entity_id: entity_id.clone(),
                        chunk_id: chunk.id.clone(),
                        content: chunk.content.clone(),
                        score: combined_score,
                        vector_score: *vector_score,
                        pagerank_score: *pagerank_score,
                        combined_score,
                    };

                    scored_results.push(result);
                }
            }
        }

        // Step 8: Sort by combined score and limit results
        scored_results.sort_by(|a, b| {
            b.score
                .partial_cmp(&a.score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        scored_results.truncate(max_results);

        #[cfg(feature = "tracing")]
        tracing::info!(
            results_count = scored_results.len(),
            "Dynamic weight search completed"
        );

        Ok(scored_results)
    }
}

/// Extract conceptual keywords from a query for dynamic weighting
///
/// Simple heuristic that extracts meaningful words (>3 chars, not stopwords)
fn extract_query_concepts(query: &str) -> Vec<String> {
    let stopwords = [
        "what", "when", "where", "who", "which", "how", "does", "the", "a", "an", "is", "are",
        "was", "were", "been", "be",
    ];

    query
        .to_lowercase()
        .split_whitespace()
        .filter(|word| word.len() > 3 && !stopwords.contains(word))
        .map(|word| {
            // Remove punctuation
            word.trim_matches(|c: char| !c.is_alphanumeric())
                .to_string()
        })
        .filter(|word| !word.is_empty())
        .collect()
}

/// Implementation of the Retriever trait for PageRankRetrievalSystem
impl Retriever for PageRankRetrievalSystem {
    type Query = String;
    type Result = ScoredResult;
    type Error = GraphRAGError;

    fn search(&self, _query: String, _k: usize) -> Result<Vec<ScoredResult>> {
        // This is a simplified implementation since we need a KnowledgeGraph
        // In practice, this would be stored as part of the retriever state
        Err(GraphRAGError::Retrieval {
            message: "Use search_with_pagerank method with KnowledgeGraph parameter".to_string(),
        })
    }

    fn search_with_context(
        &self,
        query: String,
        context: &str,
        k: usize,
    ) -> Result<Vec<ScoredResult>> {
        // Enhanced query with context
        let enhanced_query = format!("{query} {context}");

        // Use in-memory graph if available; otherwise, return a clear error
        if let Some(graph) = &self.graph {
            // Ensure vector index is initialized lazily if not set
            if self.vector_index.is_none() {
                // It's safe to initialize here; ignore index content size for now
                // as initialize_vector_index requires &mut self, we can't call it here.
                // So, require callers to initialize explicitly.
            }
            // Delegate to PageRank-enhanced search
            self.search_with_pagerank(&enhanced_query, graph, Some(k))
        } else {
            Err(GraphRAGError::Retrieval {
                message: "No KnowledgeGraph set. Call set_graph(Arc<KnowledgeGraph>) or use search_with_pagerank(query, &graph, ...)".to_string(),
            })
        }
    }

    fn update(&mut self, _content: Vec<String>) -> Result<()> {
        // Clear caches when content is updated
        {
            let mut query_cache = self.query_cache.write();
            query_cache.clear();
        }
        {
            let mut entity_cache = self.entity_rank_cache.write();
            entity_cache.clear();
        }

        Ok(())
    }
}

/// Statistics about the search system configuration
#[derive(Debug, Clone)]
pub struct SearchStatistics {
    /// Whether a vector index is available
    pub has_vector_index: bool,
    /// Weights used for combining scores
    pub score_weights: ScoreWeights,
    /// Maximum number of results to return
    pub max_results: usize,
    /// Minimum score threshold for filtering
    pub min_score_threshold: f64,
}

impl SearchStatistics {
    /// Print search statistics to stdout
    pub fn print(&self) {
        println!("🔍 PageRank Retrieval Statistics");
        println!(
            "  Vector index: {}",
            if self.has_vector_index {
                "Available"
            } else {
                "Not initialized"
            }
        );
        println!("  Score weights:");
        println!("    Vector: {:.2}", self.score_weights.vector_weight);
        println!("    PageRank: {:.2}", self.score_weights.pagerank_weight);
        println!("    Chunk: {:.2}", self.score_weights.chunk_weight);
        println!(
            "    Relationship: {:.2}",
            self.score_weights.relationship_weight
        );
        println!("  Max results: {}", self.max_results);
        println!("  Min threshold: {:.3}", self.min_score_threshold);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::core::{
        ChunkId, DocumentId, Entity, EntityId, KnowledgeGraph, Relationship, TextChunk,
    };

    fn create_test_graph() -> KnowledgeGraph {
        let mut graph = KnowledgeGraph::new();

        // Add test entities
        let entity1 = Entity::new(
            EntityId::new("entity1".to_string()),
            "Apple Inc".to_string(),
            "ORGANIZATION".to_string(),
            0.9,
        );
        let entity2 = Entity::new(
            EntityId::new("entity2".to_string()),
            "iPhone".to_string(),
            "PRODUCT".to_string(),
            0.8,
        );
        let entity3 = Entity::new(
            EntityId::new("entity3".to_string()),
            "Steve Jobs".to_string(),
            "PERSON".to_string(),
            0.9,
        );

        graph.add_entity(entity1).unwrap();
        graph.add_entity(entity2).unwrap();
        graph.add_entity(entity3).unwrap();

        // Add test relationship
        let relationship = Relationship {
            source: EntityId::new("entity1".to_string()),
            target: EntityId::new("entity2".to_string()),
            relation_type: "PRODUCES".to_string(),
            confidence: 0.8,
            context: vec![],
            embedding: None,
            temporal_type: None,
            temporal_range: None,
            causal_strength: None,
        };
        graph.add_relationship(relationship).unwrap();

        // Add test chunks
        let chunk1 = TextChunk::new(
            ChunkId::new("chunk1".to_string()),
            DocumentId::new("doc1".to_string()),
            "Apple Inc is a technology company that produces the iPhone.".to_string(),
            0,
            56,
        )
        .with_entities(vec![
            EntityId::new("entity1".to_string()),
            EntityId::new("entity2".to_string()),
        ]);

        graph.add_chunk(chunk1).unwrap();

        graph
    }

    #[test]
    fn test_pagerank_retrieval_system_creation() {
        let retrieval = PageRankRetrievalSystem::new(10);
        let stats = retrieval.get_search_statistics();

        assert_eq!(stats.max_results, 10);
        assert!(!stats.has_vector_index);
    }

    #[test]
    fn test_vector_similarity_search() {
        let graph = create_test_graph();
        let retrieval = PageRankRetrievalSystem::new(10);

        let scores = retrieval
            .vector_similarity_search("Apple technology", &graph)
            .unwrap();

        // Should find Apple Inc entity
        assert!(!scores.is_empty());
        assert!(scores.contains_key(&EntityId::new("entity1".to_string())));
    }

    #[test]
    fn test_quick_entity_search() {
        let graph = create_test_graph();
        let retrieval = PageRankRetrievalSystem::new(10);

        let results = retrieval.quick_entity_search("Apple", &graph);

        assert!(!results.is_empty());
        assert_eq!(results[0].entity_id, EntityId::new("entity1".to_string()));
        assert!(results[0].score > 0.7);
    }

    #[test]
    fn test_search_with_pagerank() {
        let graph = create_test_graph();
        let mut retrieval = PageRankRetrievalSystem::new(10);

        retrieval.initialize_vector_index(&graph).unwrap();

        let results = retrieval
            .search_with_pagerank("Apple iPhone", &graph, None)
            .unwrap();

        // Should return results with both vector and PageRank scores
        if !results.is_empty() {
            assert!(results[0].vector_score >= 0.0);
            assert!(results[0].pagerank_score >= 0.0);
            assert!(results[0].combined_score > 0.0);
        }
    }

    #[test]
    fn test_precompute_global_pagerank() {
        let graph = create_test_graph();
        let mut retrieval = PageRankRetrievalSystem::new(10);

        retrieval.precompute_global_pagerank(&graph).unwrap();
        assert!(retrieval.global_pagerank.is_some());

        let global_scores = retrieval.global_pagerank.as_ref().unwrap();
        assert!(!global_scores.is_empty());

        // Verify scores are normalized (sum approximately 1.0)
        let total_score: f64 = global_scores.values().sum();
        assert!((total_score - 1.0).abs() < 0.01);
    }

    #[test]
    fn test_batch_search() {
        let graph = create_test_graph();
        let mut retrieval = PageRankRetrievalSystem::new(5);

        retrieval.initialize_vector_index(&graph).unwrap();

        let queries = vec!["Apple", "iPhone", "Steve Jobs"];
        let results = retrieval.batch_search(&queries, &graph, Some(3)).unwrap();

        assert_eq!(results.len(), 3);
        for query_results in &results {
            assert!(query_results.len() <= 3);
        }
    }

    #[test]
    fn test_pagerank_config_performance() {
        let graph = create_test_graph();

        // Test parallel vs sequential
        let parallel_config = PageRankConfig {
            parallel_enabled: true,
            cache_size: 100,
            ..PageRankConfig::default()
        };

        let sequential_config = PageRankConfig {
            parallel_enabled: false,
            cache_size: 100,
            ..PageRankConfig::default()
        };

        let mut parallel_retrieval =
            PageRankRetrievalSystem::new(5).with_pagerank_config(parallel_config);
        let mut sequential_retrieval =
            PageRankRetrievalSystem::new(5).with_pagerank_config(sequential_config);

        parallel_retrieval.initialize_vector_index(&graph).unwrap();
        sequential_retrieval
            .initialize_vector_index(&graph)
            .unwrap();

        let query = "Apple iPhone";
        let parallel_results = parallel_retrieval
            .search_with_pagerank(query, &graph, None)
            .unwrap();
        let sequential_results = sequential_retrieval
            .search_with_pagerank(query, &graph, None)
            .unwrap();

        // Both should return valid results
        assert!(!parallel_results.is_empty());
        assert!(!sequential_results.is_empty());
    }

    #[test]
    fn test_cache_effectiveness() {
        let graph = create_test_graph();
        let mut retrieval = PageRankRetrievalSystem::new(5).with_pagerank_config(PageRankConfig {
            cache_size: 1000,
            ..PageRankConfig::default()
        });

        retrieval.initialize_vector_index(&graph).unwrap();

        let query = "Apple";

        // First query should populate cache
        let start_time = std::time::Instant::now();
        let _results1 = retrieval.search_with_pagerank(query, &graph, None).unwrap();
        let first_duration = start_time.elapsed();

        // Second query should be faster due to cache
        let start_time = std::time::Instant::now();
        let _results2 = retrieval.search_with_pagerank(query, &graph, None).unwrap();
        let second_duration = start_time.elapsed();

        // Cache should make the second query faster (though this might be flaky in tests)
        // At minimum, both queries should complete successfully
        assert!(first_duration > std::time::Duration::from_nanos(0));
        assert!(second_duration > std::time::Duration::from_nanos(0));
    }

    #[test]
    fn test_incremental_mode() {
        let graph = create_test_graph();
        let mut retrieval = PageRankRetrievalSystem::new(5).with_incremental_mode(true);

        retrieval.initialize_vector_index(&graph).unwrap();

        // Test that incremental mode doesn't break functionality
        let results = retrieval
            .search_with_pagerank("Apple", &graph, None)
            .unwrap();
        assert!(!results.is_empty());

        // Update content should clear caches
        let update_result = retrieval.update(vec!["new content".to_string()]);
        assert!(update_result.is_ok());
    }
}