simplify_baml 0.2.0

/// Main Runtime API
///
/// This is the primary interface for executing BAML functions.
/// It orchestrates: Template rendering -> LLM call -> Response parsing

use anyhow::{Context, Result};
use std::collections::HashMap;
use thiserror::Error;

use crate::{
    client::{DynLLMClient, LLMClientTrait},
    ir::{BamlValue, FieldType, IR},
    parser::Parser,
    partial_parser::try_parse_partial_json,
    renderer::PromptRenderer,
    streaming_value::StreamingBamlValue,
};
use std::sync::Arc;

/// Error types for BAML execution
#[derive(Debug, Error)]
pub enum BamlError {
    /// Network/transport errors (not retryable by default)
    #[error("Network error: {0}")]
    Network(String),

    /// LLM response parsing errors (retryable)
    #[error("Parse error: {0}")]
    Parse(String),

    /// Validation errors (retryable)
    #[error("Validation error: {0}")]
    Validation(String),

    /// Other errors
    #[error("{0}")]
    Other(String),
}

impl BamlError {
    /// Returns true if this error type should be retried
    pub fn is_retryable(&self) -> bool {
        matches!(self, BamlError::Parse(_) | BamlError::Validation(_))
    }
}

/// Configuration for retry behavior
#[derive(Debug, Clone)]
pub struct RetryConfig {
    /// Maximum number of retry attempts
    pub max_retries: usize,
    /// Whether to retry when LLM returns empty arrays
    pub retry_empty_arrays: bool,
}

impl Default for RetryConfig {
    fn default() -> Self {
        Self {
            max_retries: 3,
            retry_empty_arrays: true,
        }
    }
}

impl RetryConfig {
    /// Create a new RetryConfig with specified max retries
    pub fn new(max_retries: usize) -> Self {
        Self {
            max_retries,
            ..Default::default()
        }
    }

    /// Disable empty array retries
    pub fn without_empty_array_retry(mut self) -> Self {
        self.retry_empty_arrays = false;
        self
    }
}

/// Validate that a parsed result meets expectations
///
/// This checks for common issues like:
/// - Empty arrays that should be populated
/// - Missing required fields
fn validate_result(ir: &IR, value: &BamlValue, expected_type: &FieldType) -> Result<()> {
    match expected_type {
        FieldType::Class(class_name) => {
            if let Some(class) = ir.find_class(class_name) {
                if let BamlValue::Map(map) = value {
                    // Check each field
                    for field in &class.fields {
                        if let Some(field_value) = map.get(&field.name) {
                            // Recursively validate nested structures
                            validate_result(ir, field_value, &field.field_type)?;
                        }
                    }
                }
            }
        }
        FieldType::List(_inner) => {
            if let BamlValue::List(items) = value {
                if items.is_empty() {
                    // Log a warning but don't fail - some arrays might legitimately be empty
                    eprintln!("Warning: LLM returned empty array. This might indicate incomplete output.");
                }
            }
        }
        _ => {}
    }
    Ok(())
}

/// Generate a prompt from IR, template, and parameters
///
/// This function takes an IR (Intermediate Representation), a Jinja2 template,
/// input parameters, and an output type, and generates the final prompt string
/// that will be sent to the LLM. It automatically injects the schema based on
/// the output type.
///
/// # Arguments
/// * `ir` - The Intermediate Representation containing type definitions
/// * `template` - The Jinja2 template string
/// * `params` - Input parameters as a HashMap of BamlValues
/// * `output_type` - The expected output type for schema generation
///
/// # Returns
/// The rendered prompt string with schema appended
///
/// # Example
/// ```rust,ignore
/// use simplify_baml::*;
/// use std::collections::HashMap;
///
/// let ir = IR::new();
/// let template = "Extract person info from: {{ text }}";
/// let mut params = HashMap::new();
/// params.insert("text".to_string(), BamlValue::String("John is 30".to_string()));
///
/// let prompt = generate_prompt_from_ir(
///     &ir,
///     template,
///     &params,
///     &FieldType::Class("Person".to_string())
/// ).unwrap();
/// ```
pub fn generate_prompt_from_ir(
    ir: &IR,
    template: &str,
    params: &HashMap<String, BamlValue>,
    output_type: &FieldType,
) -> Result<String> {
    let renderer = PromptRenderer::new(ir);
    renderer.render(template, params, output_type)
        .context("Failed to render prompt from IR")
}

/// Parse an LLM response using IR type definitions
///
/// This function takes a raw LLM response string and parses it into a typed
/// BamlValue based on the IR (Intermediate Representation). It handles:
/// - Extracting JSON from markdown code blocks
/// - Lenient JSON parsing
/// - Type coercion (e.g., string "30" → int 30)
/// - Enum validation with case-insensitive matching
/// - Nested structure validation
///
/// # Arguments
/// * `ir` - The Intermediate Representation containing type definitions
/// * `raw_response` - The raw string response from the LLM
/// * `target_type` - The expected output type to parse into
///
/// # Returns
/// The parsed and type-coerced BamlValue
///
/// # Example
/// ```rust,ignore
/// use simplify_baml::*;
///
/// let ir = IR::new();
/// let raw_response = r#"```json
/// {"name": "John", "age": "30"}
/// ```"#;
///
/// let result = parse_llm_response_with_ir(
///     &ir,
///     raw_response,
///     &FieldType::Class("Person".to_string())
/// ).unwrap();
/// ```
pub fn parse_llm_response_with_ir(
    ir: &IR,
    raw_response: &str,
    target_type: &FieldType,
) -> Result<BamlValue> {
    let parser = Parser::new(ir);
    parser.parse(raw_response, target_type)
        .context("Failed to parse LLM response using IR")
}

/// Try to parse a partial LLM response from streaming
///
/// This function attempts to parse potentially incomplete JSON from streaming
/// LLM responses. It uses heuristics to auto-close incomplete structures and
/// will return None if the JSON is too incomplete to parse.
///
/// # Arguments
/// * `ir` - The Intermediate Representation containing type definitions
/// * `partial_response` - The potentially incomplete response from streaming
/// * `target_type` - The expected output type to parse into
///
/// # Returns
/// * `Ok(Some(BamlValue))` - Successfully parsed partial response
/// * `Ok(None)` - Too incomplete to parse, need more data
/// * `Err(...)` - Parsing error
///
/// # Example
/// ```rust,ignore
/// use simplify_baml::*;
///
/// let ir = IR::new();
///
/// // Streaming chunks
/// let chunk1 = r#"{"name": "Joh"#;
/// let chunk2 = r#"{"name": "John", "age": 3"#;
/// let chunk3 = r#"{"name": "John", "age": 30}"#;
///
/// // Try parsing each chunk
/// assert!(try_parse_partial_response(&ir, chunk1, &target_type).unwrap().is_some());
/// assert!(try_parse_partial_response(&ir, chunk2, &target_type).unwrap().is_some());
/// assert!(try_parse_partial_response(&ir, chunk3, &target_type).unwrap().is_some());
/// ```
pub fn try_parse_partial_response(
    ir: &IR,
    partial_response: &str,
    target_type: &FieldType,
) -> Result<Option<BamlValue>> {
    // First, try to extract and auto-close partial JSON
    match try_parse_partial_json(partial_response)? {
        Some(json_value) => {
            // We got a JSON value, now try to coerce it using the parser
            let json_str = serde_json::to_string(&json_value)?;
            match parse_llm_response_with_ir(ir, &json_str, target_type) {
                Ok(baml_value) => Ok(Some(baml_value)),
                Err(_) => Ok(None), // Coercion failed, need more data
            }
        }
        None => Ok(None), // Not enough data yet
    }
}

/// Parse streaming response with schema-aware structure (RECOMMENDED for UIs)
///
/// This function provides the best UX for streaming by always returning the full
/// schema structure. Fields are filled in as data arrives, but the structure
/// never changes. This makes UI rendering much simpler and more predictable.
///
/// # Arguments
/// * `streaming_value` - The streaming value to update (create with `StreamingBamlValue::from_ir_skeleton`)
/// * `ir` - The Intermediate Representation
/// * `partial_response` - The current accumulated response
/// * `target_type` - The expected output type
/// * `is_final` - Whether this is the final chunk (marks as complete)
///
/// # Example
/// ```rust,ignore
/// use simplify_baml::*;
///
/// // Create skeleton with full structure
/// let mut streaming = StreamingBamlValue::from_ir_skeleton(&ir, &target_type);
///
/// // As chunks arrive, update in place
/// while let Some(chunk) = stream.next().await {
///     accumulated.push_str(&chunk?);
///
///     update_streaming_response(&mut streaming, &ir, &accumulated, &target_type, false)?;
///
///     // UI always gets full structure!
///     println!("{}", serde_json::to_string_pretty(&streaming)?);
///     // {
///     //   "value": {"name": "John", "age": null, "occupation": null},
///     //   "state": "partial"
///     // }
/// }
///
/// // Mark final
/// update_streaming_response(&mut streaming, &ir, &accumulated, &target_type, true)?;
/// ```
pub fn update_streaming_response(
    streaming_value: &mut StreamingBamlValue,
    ir: &IR,
    partial_response: &str,
    target_type: &FieldType,
    is_final: bool,
) -> Result<()> {
    if is_final {
        let final_value = parse_llm_response_with_ir(ir, partial_response, target_type)
            .context("Final streaming chunk failed to parse")?;
        streaming_value.update_from_partial(ir, final_value, target_type);
        streaming_value.mark_complete();
    } else {
        if let Some(partial_baml) = try_parse_partial_response(ir, partial_response, target_type)? {
            streaming_value.update_from_partial(ir, partial_baml, target_type);
        }
    }

    Ok(())
}

pub struct BamlRuntime {
    ir: IR,
    clients: HashMap<String, DynLLMClient>,
}

impl BamlRuntime {
    /// Create a new runtime with the given IR
    pub fn new(ir: IR) -> Self {
        Self {
            ir,
            clients: HashMap::new(),
        }
    }

    /// Register an LLM client with a name (accepts any LLMClientTrait impl)
    pub fn register_client<C: LLMClientTrait + 'static>(&mut self, name: impl Into<String>, client: C) {
        self.clients.insert(name.into(), Arc::new(client));
    }

    /// Register a pre-wrapped Arc client
    pub fn register_dyn_client(&mut self, name: impl Into<String>, client: DynLLMClient) {
        self.clients.insert(name.into(), client);
    }

    /// Execute a BAML function
    ///
    /// # Arguments
    /// * `function_name` - Name of the function to execute
    /// * `params` - Input parameters as a HashMap
    ///
    /// # Returns
    /// The parsed result as a BamlValue
    pub async fn execute(
        &self,
        function_name: &str,
        params: HashMap<String, BamlValue>,
    ) -> Result<BamlValue> {
        // Find the function
        let function = self.ir.find_function(function_name)
            .ok_or_else(|| anyhow::anyhow!("Function '{}' not found", function_name))?;

        // Get the client
        let client = self.clients.get(&function.client)
            .ok_or_else(|| anyhow::anyhow!("Client '{}' not found", function.client))?;

        // Generate the prompt using the extracted function
        let prompt = generate_prompt_from_ir(
            &self.ir,
            &function.prompt_template,
            &params,
            &function.output
        )?;

        // Call the LLM
        let raw_response = client.call(&prompt)
            .await
            .context("Failed to call LLM")?;

        // Parse the response using the extracted function
        let result = parse_llm_response_with_ir(
            &self.ir,
            &raw_response,
            &function.output
        )?;

        // Validate the result (logs warnings for suspicious patterns)
        validate_result(&self.ir, &result, &function.output)?;

        Ok(result)
    }

    /// Execute a BAML function with smart retry logic
    ///
    /// This is like `execute()` but will retry up to `max_retries` times
    /// only for retryable errors (parse/validation errors). Network errors
    /// are not retried by default.
    ///
    /// # Arguments
    /// * `function_name` - Name of the function to execute
    /// * `params` - Input parameters as a HashMap
    /// * `max_retries` - Maximum number of retries (0 = no retries)
    ///
    /// # Returns
    /// The parsed result as a BamlValue
    pub async fn execute_with_retry(
        &self,
        function_name: &str,
        params: HashMap<String, BamlValue>,
        max_retries: usize,
    ) -> Result<BamlValue> {
        self.execute_with_retry_config(function_name, params, RetryConfig {
            max_retries,
            retry_empty_arrays: true,
        }).await
    }

    /// Execute a BAML function with configurable retry behavior
    ///
    /// # Arguments
    /// * `function_name` - Name of the function to execute
    /// * `params` - Input parameters as a HashMap
    /// * `config` - Retry configuration
    ///
    /// # Returns
    /// The parsed result as a BamlValue
    pub async fn execute_with_retry_config(
        &self,
        function_name: &str,
        params: HashMap<String, BamlValue>,
        config: RetryConfig,
    ) -> Result<BamlValue> {
        let mut attempts = 0;

        loop {
            match self.try_execute(function_name, params.clone()).await {
                Ok(result) => {
                    if config.retry_empty_arrays && has_empty_arrays(&result) && attempts < config.max_retries {
                        eprintln!("Attempt {}: LLM returned empty arrays, retrying...", attempts + 1);
                        attempts += 1;
                        continue;
                    }
                    return Ok(result);
                }
                Err(baml_error) => {
                    if !baml_error.is_retryable() || attempts >= config.max_retries {
                        return Err(anyhow::anyhow!(baml_error));
                    }
                    attempts += 1;
                    eprintln!("Attempt {} failed ({}), retrying...", attempts, baml_error);
                }
            }
        }
    }

    /// Internal execute that returns typed BamlError
    async fn try_execute(
        &self,
        function_name: &str,
        params: HashMap<String, BamlValue>,
    ) -> std::result::Result<BamlValue, BamlError> {
        let function = self.ir.find_function(function_name)
            .ok_or_else(|| BamlError::Other(format!("Function '{}' not found", function_name)))?;

        let client = self.clients.get(&function.client)
            .ok_or_else(|| BamlError::Other(format!("Client '{}' not found", function.client)))?;

        let prompt = generate_prompt_from_ir(
            &self.ir,
            &function.prompt_template,
            &params,
            &function.output
        ).map_err(|e| BamlError::Other(e.to_string()))?;

        let raw_response = client.call(&prompt)
            .await
            .map_err(|e| BamlError::Network(e.to_string()))?;

        let result = parse_llm_response_with_ir(
            &self.ir,
            &raw_response,
            &function.output
        ).map_err(|e| BamlError::Parse(e.to_string()))?;

        validate_result(&self.ir, &result, &function.output)
            .map_err(|e| BamlError::Validation(e.to_string()))?;

        Ok(result)
    }

    /// Get the IR (for inspection/debugging)
    pub fn ir(&self) -> &IR {
        &self.ir
    }
}

/// Check if a BamlValue contains any empty arrays
fn has_empty_arrays(value: &BamlValue) -> bool {
    match value {
        BamlValue::List(items) => items.is_empty(),
        BamlValue::Map(map) => {
            for val in map.values() {
                if has_empty_arrays(val) {
                    return true;
                }
            }
            false
        }
        _ => false,
    }
}

/// Builder for constructing a BamlRuntime
pub struct RuntimeBuilder {
    ir: IR,
    clients: HashMap<String, DynLLMClient>,
}

impl RuntimeBuilder {
    pub fn new() -> Self {
        Self {
            ir: IR::new(),
            clients: HashMap::new(),
        }
    }

    pub fn ir(mut self, ir: IR) -> Self {
        self.ir = ir;
        self
    }

    pub fn client<C: LLMClientTrait + 'static>(mut self, name: impl Into<String>, client: C) -> Self {
        self.clients.insert(name.into(), Arc::new(client));
        self
    }

    pub fn dyn_client(mut self, name: impl Into<String>, client: DynLLMClient) -> Self {
        self.clients.insert(name.into(), client);
        self
    }

    pub fn build(self) -> BamlRuntime {
        let mut runtime = BamlRuntime::new(self.ir);
        for (name, client) in self.clients {
            runtime.register_dyn_client(name, client);
        }
        runtime
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::client::LLMClient;
    use crate::ir::*;

    #[tokio::test]
    async fn test_runtime_execution() {
        // Build IR
        let mut ir = IR::new();
        ir.classes.push(Class {
            name: "Person".to_string(),
            description: None,
            fields: vec![
                Field {
                    name: "name".to_string(),
                    field_type: FieldType::String,
                    optional: false,
                description: None,
                },
                Field {
                    name: "age".to_string(),
                    field_type: FieldType::Int,
                    optional: false,
                description: None,
                },
            ],
        });

        ir.functions.push(Function {
            name: "ExtractPerson".to_string(),
            inputs: vec![
                Field {
                    name: "text".to_string(),
                    field_type: FieldType::String,
                    optional: false,
                description: None,
                }
            ],
            output: FieldType::Class("Person".to_string()),
            prompt_template: "Extract person info from: {{ text }}".to_string(),
            client: "test_client".to_string(),
        });

        // Create mock client that we can control
        // Note: Since we can't use MockLLMClient directly with LLMClient,
        // this test would need a real API key or a more sophisticated mock setup
        // For now, we'll skip the actual execution in tests

        let runtime = BamlRuntime::new(ir);

        // Verify function exists
        assert!(runtime.ir().find_function("ExtractPerson").is_some());
    }

    #[test]
    fn test_runtime_builder() {
        let ir = IR::new();
        let client = LLMClient::openai("test-key".to_string(), "gpt-4".to_string());

        let runtime = RuntimeBuilder::new()
            .ir(ir)
            .client("openai", client)
            .build();

        assert!(runtime.clients.contains_key("openai"));
    }

    #[test]
    fn test_final_chunk_with_invalid_json_returns_error() {
        use crate::streaming_value::StreamingBamlValue;

        let mut ir = IR::new();
        ir.classes.push(Class {
            name: "Person".to_string(),
            description: None,
            fields: vec![
                Field {
                    name: "name".to_string(),
                    field_type: FieldType::String,
                    optional: false,
                    description: None,
                },
                Field {
                    name: "age".to_string(),
                    field_type: FieldType::Int,
                    optional: false,
                    description: None,
                },
            ],
        });

        let target_type = FieldType::Class("Person".to_string());
        let mut streaming = StreamingBamlValue::from_ir_skeleton(&ir, &target_type);

        let invalid_json = r#"{"name": "John", "age":"#;
        let result = update_streaming_response(
            &mut streaming,
            &ir,
            invalid_json,
            &target_type,
            true,
        );

        assert!(result.is_err(), "Final chunk with invalid JSON should return error");
        assert_ne!(
            streaming.completion_state,
            crate::streaming_value::CompletionState::Complete,
            "Should not mark as complete on parse failure"
        );
    }

    #[test]
    fn test_final_chunk_with_valid_json_succeeds() {
        use crate::streaming_value::StreamingBamlValue;

        let mut ir = IR::new();
        ir.classes.push(Class {
            name: "Person".to_string(),
            description: None,
            fields: vec![
                Field {
                    name: "name".to_string(),
                    field_type: FieldType::String,
                    optional: false,
                    description: None,
                },
                Field {
                    name: "age".to_string(),
                    field_type: FieldType::Int,
                    optional: false,
                    description: None,
                },
            ],
        });

        let target_type = FieldType::Class("Person".to_string());
        let mut streaming = StreamingBamlValue::from_ir_skeleton(&ir, &target_type);

        let valid_json = r#"{"name": "John", "age": 30}"#;
        let result = update_streaming_response(
            &mut streaming,
            &ir,
            valid_json,
            &target_type,
            true,
        );

        assert!(result.is_ok(), "Final chunk with valid JSON should succeed");
        assert_eq!(
            streaming.completion_state,
            crate::streaming_value::CompletionState::Complete,
            "Should mark as complete on success"
        );
    }

    #[test]
    fn test_baml_error_retryable() {
        assert!(BamlError::Parse("invalid json".to_string()).is_retryable());
        assert!(BamlError::Validation("missing field".to_string()).is_retryable());
        assert!(!BamlError::Network("connection refused".to_string()).is_retryable());
        assert!(!BamlError::Other("unknown error".to_string()).is_retryable());
    }

    #[test]
    fn test_retry_config_default() {
        let config = RetryConfig::default();
        assert_eq!(config.max_retries, 3);
        assert!(config.retry_empty_arrays);
    }

    #[test]
    fn test_retry_config_builder() {
        let config = RetryConfig::new(5).without_empty_array_retry();
        assert_eq!(config.max_retries, 5);
        assert!(!config.retry_empty_arrays);
    }

    #[tokio::test]
    async fn test_mock_client_with_runtime() {
        use crate::client::MockLLMClient;

        let mut ir = IR::new();
        ir.classes.push(Class {
            name: "Person".to_string(),
            description: None,
            fields: vec![
                Field {
                    name: "name".to_string(),
                    field_type: FieldType::String,
                    optional: false,
                    description: None,
                },
                Field {
                    name: "age".to_string(),
                    field_type: FieldType::Int,
                    optional: false,
                    description: None,
                },
            ],
        });

        ir.functions.push(Function {
            name: "ExtractPerson".to_string(),
            inputs: vec![Field {
                name: "text".to_string(),
                field_type: FieldType::String,
                optional: false,
                description: None,
            }],
            output: FieldType::Class("Person".to_string()),
            prompt_template: "Extract person info from: {{ text }}".to_string(),
            client: "mock".to_string(),
        });

        let mut mock_client = MockLLMClient::new();
        mock_client.add_response("Extract person", r#"{"name": "Alice", "age": 25}"#);

        let runtime = RuntimeBuilder::new()
            .ir(ir)
            .client("mock", mock_client)
            .build();

        let mut params = HashMap::new();
        params.insert("text".to_string(), BamlValue::String("Alice is 25".to_string()));

        let result = runtime.execute("ExtractPerson", params).await.unwrap();
        let map = result.as_map().unwrap();
        assert_eq!(map.get("name").unwrap().as_string(), Some("Alice"));
        assert_eq!(map.get("age").unwrap().as_int(), Some(25));
    }
}