graphrag-core 0.2.0

#![cfg_attr(not(feature = "async"), allow(unused_imports))]

//! LLM-based entity and relationship extraction
//!
//! This module provides TRUE LLM-based extraction using Ollama or any other LLM service.
//! Unlike pattern-based extraction, this uses actual language model inference to extract
//! entities and relationships from text with deep semantic understanding.

use crate::{
    core::{ChunkId, Entity, EntityId, EntityMention, Relationship, TextChunk},
    entity::prompts::{EntityData, ExtractionOutput, PromptBuilder, RelationshipData},
    ollama::OllamaClient,
    GraphRAGError, Result,
};
use serde_json;

/// LLM-based entity extractor that uses actual language model calls
pub struct LLMEntityExtractor {
    #[cfg_attr(not(feature = "async"), allow(dead_code))]
    ollama_client: OllamaClient,
    #[cfg_attr(not(feature = "async"), allow(dead_code))]
    prompt_builder: PromptBuilder,
    temperature: f32,
    max_tokens: usize,
    /// keep_alive value forwarded to every Ollama request (e.g. "1h").
    /// When set, Ollama keeps the model in VRAM between requests so the KV
    /// cache for the static prompt prefix is preserved across all chunks.
    keep_alive: Option<String>,
}

impl LLMEntityExtractor {
    /// Create a new LLM-based entity extractor
    ///
    /// # Arguments
    /// * `ollama_client` - Ollama client for LLM inference
    /// * `entity_types` - List of entity types to extract (e.g., ["PERSON", "LOCATION", "ORGANIZATION"])
    pub fn new(ollama_client: OllamaClient, entity_types: Vec<String>) -> Self {
        Self {
            ollama_client,
            prompt_builder: PromptBuilder::new(entity_types),
            temperature: 0.0, // Zero temperature for deterministic JSON extraction
            max_tokens: 1500,
            keep_alive: None,
        }
    }

    /// Set temperature for LLM generation (default: 0.1)
    pub fn with_temperature(mut self, temperature: f32) -> Self {
        self.temperature = temperature;
        self
    }

    /// Set maximum tokens for LLM generation (default: 1500)
    pub fn with_max_tokens(mut self, max_tokens: usize) -> Self {
        self.max_tokens = max_tokens;
        self
    }

    /// Set keep_alive for KV cache persistence (e.g. "1h", "30m").
    pub fn with_keep_alive(mut self, keep_alive: Option<String>) -> Self {
        self.keep_alive = keep_alive;
        self
    }

    /// Estimate token count from text length (≈ 1 token per 4 chars).
    pub fn estimate_tokens(text: &str) -> u32 {
        (text.len() / 4) as u32
    }

    /// Calculate the required `num_ctx` for one entity-extraction LLM call.
    ///
    /// Formula (mirrors `ContextualEnricher::calculate_num_ctx`):
    /// ```text
    /// num_ctx = tokens(built_prompt)   // instructions + entity types + chunk text
    ///         + max_output_tokens       // JSON with entities + relationships
    ///         + 20% safety margin       // avoids silent truncation
    /// ```
    /// Result is rounded up to the nearest 1024 and clamped to [4096, 131072].
    pub fn calculate_entity_num_ctx(built_prompt: &str, max_output_tokens: u32) -> u32 {
        let prompt_tokens = Self::estimate_tokens(built_prompt);
        let total = prompt_tokens + max_output_tokens;
        let with_margin = (total as f32 * 1.20) as u32;
        let rounded = ((with_margin + 1023) / 1024) * 1024;
        rounded.clamp(4096, 131_072)
    }

    /// Extract entities and relationships from a text chunk using LLM
    ///
    /// This is the REAL extraction that makes actual LLM API calls.
    /// Expected time: 15-30 seconds per chunk depending on chunk size and model.
    #[cfg(feature = "async")]
    pub async fn extract_from_chunk(
        &self,
        chunk: &TextChunk,
    ) -> Result<(Vec<Entity>, Vec<Relationship>)> {
        #[cfg(feature = "tracing")]
        tracing::debug!(
            "LLM extraction for chunk: {} (size: {} chars)",
            chunk.id,
            chunk.content.len()
        );

        // Build extraction prompt
        let prompt = self.prompt_builder.build_extraction_prompt(&chunk.content);

        // Call LLM for extraction (THIS IS THE REAL LLM CALL!)
        let llm_response = self.call_llm_with_retry(&prompt).await?;

        // Parse response into structured data
        let extraction_output = self.parse_extraction_response(&llm_response)?;

        // Convert to domain entities and relationships
        let entities =
            self.convert_to_entities(&extraction_output.entities, &chunk.id, &chunk.content)?;
        let relationships =
            self.convert_to_relationships(&extraction_output.relationships, &entities)?;

        #[cfg(feature = "tracing")]
        tracing::info!(
            "LLM extracted {} entities and {} relationships from chunk {}",
            entities.len(),
            relationships.len(),
            chunk.id
        );

        Ok((entities, relationships))
    }

    /// Extract additional entities in a gleaning round (continuation)
    ///
    /// This is used after the initial extraction to catch missed entities.
    #[cfg(feature = "async")]
    pub async fn extract_additional(
        &self,
        chunk: &TextChunk,
        previous_entities: &[EntityData],
        previous_relationships: &[RelationshipData],
    ) -> Result<(Vec<Entity>, Vec<Relationship>)> {
        #[cfg(feature = "tracing")]
        tracing::debug!("LLM gleaning round for chunk: {}", chunk.id);

        // Build continuation prompt with previous extraction
        let prompt = self.prompt_builder.build_continuation_prompt(
            &chunk.content,
            previous_entities,
            previous_relationships,
        );

        // Call LLM for additional extraction
        let llm_response = self.call_llm_with_retry(&prompt).await?;

        // Parse response
        let extraction_output = self.parse_extraction_response(&llm_response)?;

        // Convert to domain entities
        let entities =
            self.convert_to_entities(&extraction_output.entities, &chunk.id, &chunk.content)?;
        let relationships =
            self.convert_to_relationships(&extraction_output.relationships, &entities)?;

        #[cfg(feature = "tracing")]
        tracing::info!(
            "LLM gleaning extracted {} additional entities and {} relationships",
            entities.len(),
            relationships.len()
        );

        Ok((entities, relationships))
    }

    /// Check if extraction is complete using LLM judgment
    ///
    /// Uses the LLM to determine if all significant entities have been extracted.
    #[cfg(feature = "async")]
    pub async fn check_completion(
        &self,
        chunk: &TextChunk,
        entities: &[EntityData],
        relationships: &[RelationshipData],
    ) -> Result<bool> {
        #[cfg(feature = "tracing")]
        tracing::debug!("LLM completion check for chunk: {}", chunk.id);

        // Build completion check prompt
        let prompt =
            self.prompt_builder
                .build_completion_prompt(&chunk.content, entities, relationships);

        // Call LLM with logit bias for YES/NO response
        let llm_response = self.call_llm_completion_check(&prompt).await?;

        // Parse YES/NO response
        let response_trimmed = llm_response.trim().to_uppercase();
        let is_complete = response_trimmed.starts_with("YES") || response_trimmed.contains("YES");

        #[cfg(feature = "tracing")]
        tracing::debug!(
            "LLM completion check result: {} (response: {})",
            if is_complete {
                "COMPLETE"
            } else {
                "INCOMPLETE"
            },
            llm_response.trim()
        );

        Ok(is_complete)
    }

    /// Call LLM with dynamic num_ctx and retry logic for extraction.
    ///
    /// `num_ctx` is calculated from the built prompt via
    /// [`calculate_entity_num_ctx`] before this call, ensuring Ollama never
    /// silently truncates the prompt. `keep_alive` keeps the model loaded in
    /// VRAM between chunk requests so the KV cache is preserved.
    #[cfg(feature = "async")]
    async fn call_llm_with_retry(&self, prompt: &str) -> Result<String> {
        use crate::ollama::OllamaGenerationParams;
        let num_ctx = Self::calculate_entity_num_ctx(prompt, self.max_tokens as u32);
        #[cfg(feature = "tracing")]
        tracing::debug!(
            "Entity extraction: prompt_len={} num_ctx={} keep_alive={:?}",
            prompt.len(),
            num_ctx,
            self.keep_alive,
        );
        let params = OllamaGenerationParams {
            num_predict: Some(self.max_tokens as u32),
            temperature: Some(self.temperature),
            num_ctx: Some(num_ctx),
            keep_alive: self.keep_alive.clone(),
            ..Default::default()
        };
        match self
            .ollama_client
            .generate_with_params(prompt, params.clone())
            .await
        {
            Ok(response) => Ok(response),
            Err(e) => {
                #[cfg(feature = "tracing")]
                tracing::warn!("LLM call failed, retrying: {}", e);
                tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
                self.ollama_client
                    .generate_with_params(prompt, params)
                    .await
            },
        }
    }

    /// Call LLM for completion check (expects a short YES/NO answer).
    ///
    /// Uses a much smaller `num_predict` (50 tokens) and a deterministic
    /// temperature of 0.0 to get a reliable YES/NO response.
    #[cfg(feature = "async")]
    async fn call_llm_completion_check(&self, prompt: &str) -> Result<String> {
        use crate::ollama::OllamaGenerationParams;
        let num_ctx = Self::calculate_entity_num_ctx(prompt, 50);
        let params = OllamaGenerationParams {
            num_predict: Some(50),
            temperature: Some(0.0),
            num_ctx: Some(num_ctx),
            keep_alive: self.keep_alive.clone(),
            ..Default::default()
        };
        self.ollama_client
            .generate_with_params(prompt, params)
            .await
    }

    /// Parse LLM response into structured extraction output
    ///
    /// Handles multiple JSON formats and attempts repair if needed
    #[cfg(feature = "async")]
    fn parse_extraction_response(&self, response: &str) -> Result<ExtractionOutput> {
        // Strategy 1: Try direct JSON parsing
        if let Ok(output) = serde_json::from_str::<ExtractionOutput>(response) {
            return Ok(output);
        }

        // Strategy 2: Try to extract JSON from markdown code blocks
        if let Some(json_str) = Self::extract_json_from_markdown(response) {
            if let Ok(output) = serde_json::from_str::<ExtractionOutput>(json_str) {
                return Ok(output);
            }
        }

        // Strategy 3: Try JSON repair using jsonfixer
        match self.repair_and_parse_json(response) {
            Ok(output) => return Ok(output),
            Err(_e) => {
                #[cfg(feature = "tracing")]
                tracing::warn!("JSON repair failed: {}", _e);
            },
        }

        // Strategy 4: Look for JSON anywhere in the response
        if let Some(json_str) = Self::find_json_in_text(response) {
            if let Ok(output) = serde_json::from_str::<ExtractionOutput>(json_str) {
                return Ok(output);
            }

            // Try repairing the extracted JSON
            if let Ok(output) = self.repair_and_parse_json(json_str) {
                return Ok(output);
            }
        }

        // If all strategies fail, return empty extraction
        #[cfg(feature = "tracing")]
        tracing::error!(
            "Failed to parse LLM response as JSON. Response preview: {}",
            &response.chars().take(200).collect::<String>()
        );
        Ok(ExtractionOutput {
            entities: vec![],
            relationships: vec![],
        })
    }

    /// Extract JSON from markdown code blocks
    #[cfg(feature = "async")]
    fn extract_json_from_markdown(text: &str) -> Option<&str> {
        // Look for ```json ... ``` or ``` ... ```
        if let Some(start) = text.find("```json") {
            let json_start = start + 7; // length of ```json
            if let Some(end) = text[json_start..].find("```") {
                return Some(text[json_start..json_start + end].trim());
            }
        }

        if let Some(start) = text.find("```") {
            let json_start = start + 3;
            if let Some(end) = text[json_start..].find("```") {
                let candidate = &text[json_start..json_start + end].trim();
                // Check if it looks like JSON
                if candidate.starts_with('{') || candidate.starts_with('[') {
                    return Some(candidate);
                }
            }
        }

        None
    }

    /// Find JSON object or array anywhere in text
    #[cfg(feature = "async")]
    fn find_json_in_text(text: &str) -> Option<&str> {
        // Find first { and last }
        if let Some(start) = text.find('{') {
            if let Some(end) = text.rfind('}') {
                if end > start {
                    return Some(&text[start..=end]);
                }
            }
        }
        None
    }

    /// Attempt to repair malformed JSON using jsonfixer
    #[cfg(feature = "async")]
    fn repair_and_parse_json(&self, json_str: &str) -> Result<ExtractionOutput> {
        // jsonfixer::repair_json returns Result<String, Error>
        let options = jsonfixer::JsonRepairOptions::default();
        let fixed_json =
            jsonfixer::repair_json(json_str, options).map_err(|e| GraphRAGError::Generation {
                message: format!("JSON repair failed: {:?}", e),
            })?;

        serde_json::from_str::<ExtractionOutput>(&fixed_json).map_err(|e| {
            GraphRAGError::Generation {
                message: format!("Failed to parse repaired JSON: {}", e),
            }
        })
    }

    /// Convert EntityData to domain Entity objects
    #[cfg(feature = "async")]
    fn convert_to_entities(
        &self,
        entity_data: &[EntityData],
        chunk_id: &ChunkId,
        chunk_text: &str,
    ) -> Result<Vec<Entity>> {
        let mut entities = Vec::new();

        for entity_item in entity_data {
            // Generate entity ID
            let entity_id = EntityId::new(format!(
                "{}_{}",
                entity_item.entity_type,
                self.normalize_name(&entity_item.name)
            ));

            // Find mentions in chunk
            let mentions = self.find_mentions(&entity_item.name, chunk_id, chunk_text);

            // Create entity with mentions
            // Note: Description is stored in the entity but not used in current Entity struct
            // We store it in the entity name or as a separate field if needed
            let entity = Entity::new(
                entity_id,
                entity_item.name.clone(),
                entity_item.entity_type.clone(),
                0.9, // High confidence since it's LLM-extracted
            )
            .with_mentions(mentions);

            entities.push(entity);
        }

        Ok(entities)
    }

    /// Find all mentions of an entity name in the chunk text
    #[cfg(feature = "async")]
    fn find_mentions(&self, name: &str, chunk_id: &ChunkId, text: &str) -> Vec<EntityMention> {
        let mut mentions = Vec::new();
        let mut start = 0;

        while let Some(pos) = text[start..].find(name) {
            let actual_pos = start + pos;
            mentions.push(EntityMention {
                chunk_id: chunk_id.clone(),
                start_offset: actual_pos,
                end_offset: actual_pos + name.len(),
                confidence: 0.9,
            });
            start = actual_pos + name.len();
        }

        // If no exact matches, try case-insensitive
        if mentions.is_empty() {
            let name_lower = name.to_lowercase();
            let text_lower = text.to_lowercase();
            let mut start = 0;

            while let Some(pos) = text_lower[start..].find(&name_lower) {
                let actual_pos = start + pos;
                mentions.push(EntityMention {
                    chunk_id: chunk_id.clone(),
                    start_offset: actual_pos,
                    end_offset: actual_pos + name.len(),
                    confidence: 0.85, // Slightly lower confidence for case-insensitive match
                });
                start = actual_pos + name.len();
            }
        }

        mentions
    }

    /// Convert RelationshipData to domain Relationship objects
    #[cfg(feature = "async")]
    fn convert_to_relationships(
        &self,
        relationship_data: &[RelationshipData],
        entities: &[Entity],
    ) -> Result<Vec<Relationship>> {
        let mut relationships = Vec::new();

        // Build entity name to ID mapping
        let mut name_to_entity: std::collections::HashMap<String, &Entity> =
            std::collections::HashMap::new();
        for entity in entities {
            name_to_entity.insert(entity.name.to_lowercase(), entity);
        }

        for rel_item in relationship_data {
            // Find source and target entities
            let source_entity = name_to_entity.get(&rel_item.source.to_lowercase());
            let target_entity = name_to_entity.get(&rel_item.target.to_lowercase());

            if let (Some(source), Some(target)) = (source_entity, target_entity) {
                let relationship = Relationship {
                    source: source.id.clone(),
                    target: target.id.clone(),
                    relation_type: rel_item.description.clone(),
                    confidence: rel_item.strength as f32,
                    context: vec![], // No context chunks for this relationship
                    embedding: None,
                    temporal_type: None,
                    temporal_range: None,
                    causal_strength: None,
                };

                relationships.push(relationship);
            } else {
                #[cfg(feature = "tracing")]
                tracing::warn!(
                    "Skipping relationship: entity not found. Source: {}, Target: {}",
                    rel_item.source,
                    rel_item.target
                );
            }
        }

        Ok(relationships)
    }

    /// Normalize entity name for ID generation
    #[cfg(feature = "async")]
    fn normalize_name(&self, name: &str) -> String {
        name.to_lowercase()
            .chars()
            .filter(|c| c.is_alphanumeric() || *c == '_')
            .collect::<String>()
            .replace(' ', "_")
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{core::DocumentId, ollama::OllamaConfig};

    fn create_test_chunk() -> TextChunk {
        TextChunk::new(
            ChunkId::new("chunk_001".to_string()),
            DocumentId::new("doc_001".to_string()),
            "Tom Sawyer is a young boy who lives in St. Petersburg with his Aunt Polly. \
             Tom is best friends with Huckleberry Finn. They often go on adventures together."
                .to_string(),
            0,
            150,
        )
    }

    #[test]
    fn test_extract_json_from_markdown() {
        let markdown = r#"
Here's the extraction:
```json
{
  "entities": [],
  "relationships": []
}
```
"#;
        let json = LLMEntityExtractor::extract_json_from_markdown(markdown);
        assert!(json.is_some());
        assert!(json.unwrap().contains("entities"));
    }

    #[test]
    fn test_find_json_in_text() {
        let text = "Some text before { \"entities\": [] } some text after";
        let json = LLMEntityExtractor::find_json_in_text(text);
        assert!(json.is_some());
        assert_eq!(json.unwrap(), "{ \"entities\": [] }");
    }

    #[test]
    fn test_parse_valid_json() {
        let ollama_config = OllamaConfig::default();
        let ollama_client = OllamaClient::new(ollama_config);
        let extractor = LLMEntityExtractor::new(
            ollama_client,
            vec!["PERSON".to_string(), "LOCATION".to_string()],
        );

        let response = r#"
{
  "entities": [
    {
      "name": "Tom Sawyer",
      "type": "PERSON",
      "description": "A young boy"
    }
  ],
  "relationships": []
}
"#;

        let result = extractor.parse_extraction_response(response);
        assert!(result.is_ok());
        let output = result.unwrap();
        assert_eq!(output.entities.len(), 1);
        assert_eq!(output.entities[0].name, "Tom Sawyer");
    }

    #[test]
    fn test_convert_to_entities() {
        let ollama_config = OllamaConfig::default();
        let ollama_client = OllamaClient::new(ollama_config);
        let extractor = LLMEntityExtractor::new(ollama_client, vec!["PERSON".to_string()]);

        let chunk = create_test_chunk();
        let entity_data = vec![EntityData {
            name: "Tom Sawyer".to_string(),
            entity_type: "PERSON".to_string(),
            description: "A young boy".to_string(),
        }];

        let entities = extractor
            .convert_to_entities(&entity_data, &chunk.id, &chunk.content)
            .unwrap();

        assert_eq!(entities.len(), 1);
        assert_eq!(entities[0].name, "Tom Sawyer");
        assert_eq!(entities[0].entity_type, "PERSON");
        assert!(!entities[0].mentions.is_empty());
    }

    #[test]
    fn test_find_mentions() {
        let ollama_config = OllamaConfig::default();
        let ollama_client = OllamaClient::new(ollama_config);
        let extractor = LLMEntityExtractor::new(ollama_client, vec!["PERSON".to_string()]);

        let chunk = create_test_chunk();
        let mentions = extractor.find_mentions("Tom", &chunk.id, &chunk.content);

        assert!(!mentions.is_empty());
        assert!(mentions.len() >= 2); // "Tom Sawyer" and "Tom is best friends"
    }
}