backdisco 0.4.0

use anyhow::{Context, Result};
use crate::pattern::Pattern;
use crate::wordlist::HostnameStructure;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use std::io::{self, Write};
use std::collections::{HashMap, HashSet};
use regex::Regex;
use once_cell::sync::Lazy;
use std::sync::Arc;
use indicatif::ProgressBar;

#[derive(Debug, Clone, PartialEq)]
enum ApiType {
    OpenAICompatible,  // OpenAI-compatible API (most common)
    Ollama,            // Ollama-specific endpoints
    Anthropic,         // Anthropic Claude API
    Unknown,           // Fallback to OpenAI-compatible
}

// Compiled regex patterns for API type detection (compiled once at startup)
static OLLAMA_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
    vec![
        Regex::new(r"(?i)ollama").unwrap(),
        Regex::new(r":11434").unwrap(),
        Regex::new(r"/api/[^v]").unwrap(), // /api/ followed by something that's not 'v'
        Regex::new(r"/api$").unwrap(), // /api at end of URL
    ]
});

static ANTHROPIC_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
    vec![
        Regex::new(r"(?i)anthropic").unwrap(),
        Regex::new(r"api\.anthropic\.com").unwrap(),
    ]
});

static OPENAI_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
    vec![
        Regex::new(r"(?i)openai").unwrap(),
        Regex::new(r"/v1(?:/|$)").unwrap(), // /v1 at end or followed by /
        Regex::new(r"api\.openai\.com").unwrap(),
    ]
});

/// Detect API type from URL using regex patterns
fn detect_api_type(url: &str) -> ApiType {
    let url_lower = url.to_lowercase();
    
    // Special check for /api/v1 - this should be OpenAI-compatible, not Ollama
    // We need to check this before the general /api/ pattern
    if url_lower.contains("/api/v1") {
        return ApiType::OpenAICompatible;
    }
    
    // Check Ollama patterns
    for pattern in OLLAMA_PATTERNS.iter() {
        if pattern.is_match(&url_lower) {
            return ApiType::Ollama;
        }
    }
    
    // Check Anthropic patterns
    for pattern in ANTHROPIC_PATTERNS.iter() {
        if pattern.is_match(&url_lower) {
            return ApiType::Anthropic;
        }
    }
    
    // Check OpenAI patterns
    for pattern in OPENAI_PATTERNS.iter() {
        if pattern.is_match(&url_lower) {
            return ApiType::OpenAICompatible;
        }
    }
    
    // Default to OpenAI-compatible (most common standard)
    ApiType::OpenAICompatible
}

/// Fetch available models from the API
pub async fn fetch_available_models(base_url: &str) -> Result<Vec<String>> {
    let api_type = detect_api_type(base_url);
    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(10))
        .build()?;

    let models = match api_type {
        ApiType::Ollama => {
            // Ollama uses /api/tags endpoint
            let url = if base_url.ends_with("/v1") {
                base_url.replace("/v1", "/api/tags")
            } else if base_url.ends_with("/api") {
                format!("{}/tags", base_url)
            } else {
                format!("{}/api/tags", base_url.trim_end_matches('/'))
            };
            
            #[derive(Debug, Deserialize)]
            struct OllamaModelsResponse {
                models: Vec<OllamaModelInfo>,
            }
            
            #[derive(Debug, Deserialize)]
            struct OllamaModelInfo {
                name: String,
            }
            
            let resp = client.get(&url).send().await?;
            if !resp.status().is_success() {
                // Fallback to OpenAI-compatible endpoint
                return fetch_openai_models(&client, base_url).await;
            }
            
            let ollama_resp: OllamaModelsResponse = resp.json().await?;
            ollama_resp.models.into_iter().map(|m| m.name).collect()
        }
        ApiType::Anthropic => {
            // Anthropic doesn't have a models endpoint, return empty
            // User will need to specify model explicitly
            Vec::new()
        }
        ApiType::OpenAICompatible | ApiType::Unknown => {
            fetch_openai_models(&client, base_url).await?
        }
    };

    Ok(models)
}

async fn fetch_openai_models(client: &reqwest::Client, base_url: &str) -> Result<Vec<String>> {
    let url = if base_url.ends_with('/') {
        format!("{}models", base_url)
    } else {
        format!("{}/models", base_url)
    };
    
    let resp = client.get(&url).send().await?;
    
    if !resp.status().is_success() {
        anyhow::bail!("Failed to fetch models: {}", resp.status());
    }
    
    let models: ModelsResponse = resp.json().await?;
    Ok(models.data.into_iter().map(|m| m.id).collect())
}

/// Prompt user to select a model from a numbered list
pub fn prompt_model_selection(models: &[String]) -> Result<String> {
    if models.is_empty() {
        anyhow::bail!("No models available. Please specify a model with --model");
    }
    
    println!("\nAvailable models:");
    for (idx, model) in models.iter().enumerate() {
        println!("  {}. {}", idx + 1, model);
    }
    
    print!("\nSelect a model (1-{}): ", models.len());
    io::stdout().flush()?;
    
    let mut input = String::new();
    io::stdin().read_line(&mut input)?;
    
    let choice: usize = input.trim().parse()
        .context("Invalid selection. Please enter a number.")?;
    
    if choice < 1 || choice > models.len() {
        anyhow::bail!("Selection out of range. Please choose 1-{}", models.len());
    }
    
    Ok(models[choice - 1].clone())
}

#[derive(Debug, Serialize)]
struct ChatMessage {
    role: String,
    content: String,
}

#[derive(Debug, Serialize)]
struct ChatRequest {
    model: String,
    messages: Vec<ChatMessage>,
    temperature: f64,
}

#[derive(Debug, Deserialize)]
struct ChatResponse {
    choices: Vec<ChatChoice>,
}

#[derive(Debug, Deserialize)]
struct ChatChoice {
    message: ChatMessageResponse,
}

#[derive(Debug, Deserialize)]
struct ChatMessageResponse {
    content: String,
}

#[derive(Debug, Deserialize)]
struct ModelsResponse {
    data: Vec<ModelInfo>,
}

#[derive(Debug, Deserialize)]
struct ModelInfo {
    id: String,
}

pub async fn preflight_check(base_url: &str, model: &str) -> Result<()> {
    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(10))
        .build()?;

    let api_type = detect_api_type(base_url);
    let models_url = match api_type {
        ApiType::Ollama => {
            if base_url.ends_with("/v1") {
                base_url.replace("/v1", "/api/tags")
            } else if base_url.ends_with("/api") {
                format!("{}/tags", base_url)
            } else {
                format!("{}/api/tags", base_url.trim_end_matches('/'))
            }
        }
        _ => {
            if base_url.ends_with('/') {
                format!("{}models", base_url)
            } else {
                format!("{}/models", base_url)
            }
        }
    };

    let resp = client.get(&models_url).send().await?;

    if !resp.status().is_success() {
        anyhow::bail!("LLM endpoint returned {}", resp.status());
    }

    // Verify model is available
    let available_models: Vec<String> = match api_type {
        ApiType::Ollama => {
            #[derive(Debug, Deserialize)]
            struct OllamaModelsResponse {
                models: Vec<OllamaModelInfo>,
            }
            
            #[derive(Debug, Deserialize)]
            struct OllamaModelInfo {
                name: String,
            }
            
            let ollama_resp: OllamaModelsResponse = resp.json().await?;
            ollama_resp.models.into_iter().map(|m| m.name).collect()
        }
        _ => {
            let models: ModelsResponse = resp.json().await?;
            models.data.into_iter().map(|m| m.id).collect()
        }
    };

    let model_available = available_models.iter().any(|m| m == model);

    if !model_available {
        anyhow::bail!(
            "Model '{}' not found in available models",
            model
        );
    }

    Ok(())
}

/// Deterministic pattern derivation by analyzing structural differences between
/// frontend and backend hostnames. Used as primary approach and as fallback when
/// LLM-derived patterns fail validation.
pub fn derive_patterns_deterministic(front: &str, back: &str) -> Vec<Pattern> {
    let mut patterns = Vec::new();

    // Normalize: strip trailing dots
    let front = front.trim_end_matches('.');
    let back = back.trim_end_matches('.');

    // Case 1: Backend is frontend with a prefix added
    // e.g., www.shopify.com -> origin.www.shopify.com (prefix "origin.")
    // e.g., myquotes.dell.com -> origin-myquotes.dell.com (prefix "origin-")
    if back.ends_with(front) && back.len() > front.len() {
        let prefix = &back[..back.len() - front.len()];
        // Verify the prefix ends with a separator (. or -)
        if prefix.ends_with('.') || prefix.ends_with('-') {
            patterns.push(Pattern {
                find: String::new(),
                replace: prefix.to_string(),
                position: "prefix".to_string(),
            });
        }
    }

    // Case 2: Backend is frontend with a suffix added
    // e.g., api.example.com -> api.example.com.cdn.internal
    if back.starts_with(front) && back.len() > front.len() {
        let suffix = &back[front.len()..];
        if suffix.starts_with('.') || suffix.starts_with('-') {
            patterns.push(Pattern {
                find: String::new(),
                replace: suffix.to_string(),
                position: "suffix".to_string(),
            });
        }
    }

    // Case 3: First label replacement
    // e.g., cdn.example.com -> origin.example.com
    // e.g., shop.att.com -> origin-shop-alpha.att.com
    let front_labels: Vec<&str> = front.split('.').collect();
    let back_labels: Vec<&str> = back.split('.').collect();

    if front_labels.len() >= 2 && back_labels.len() >= 2 {
        // Check if base domains match (last N labels)
        let front_base = if front_labels.len() >= 2 {
            front_labels[front_labels.len()-2..].join(".")
        } else {
            front.to_string()
        };
        let back_base = if back_labels.len() >= 2 {
            back_labels[back_labels.len()-2..].join(".")
        } else {
            back.to_string()
        };

        if front_base == back_base && front_labels.len() == back_labels.len() {
            // Same depth, same base - first label changed
            let front_first = front_labels[0];
            let back_first = back_labels[0];
            if front_first != back_first {
                patterns.push(Pattern {
                    find: format!("{}.", front_first),
                    replace: format!("{}.", back_first),
                    position: "prefix".to_string(),
                });
            }
        }

        // Case 4: First label embeds the frontend label with additions
        // e.g., shop.att.com -> origin-shop-alpha.att.com
        if back_labels.len() == front_labels.len() && front_base == back_base {
            let front_first = front_labels[0];
            let back_first = back_labels[0];
            if back_first.contains(front_first) && back_first != front_first {
                // Add the specific full-label replacement
                let find_str = format!("{}.", front_first);
                let replace_str = format!("{}.", back_first);
                if !patterns.iter().any(|p| p.find == find_str && p.replace == replace_str) {
                    patterns.push(Pattern {
                        find: find_str,
                        replace: replace_str,
                        position: "prefix".to_string(),
                    });
                }

                // Also extract the general prefix if the backend label starts with
                // a prefix before the frontend label
                // e.g., "origin-shop-alpha" contains "shop" -> prefix is "origin-"
                if let Some(pos) = back_first.find(front_first) {
                    let general_prefix = if pos > 0 {
                        let gp = &back_first[..pos];
                        // Only use if prefix ends with a separator
                        if gp.ends_with('-') || gp.ends_with('.') {
                            patterns.push(Pattern {
                                find: String::new(),
                                replace: gp.to_string(),
                                position: "prefix".to_string(),
                            });
                        }
                        gp.to_string()
                    } else {
                        String::new()
                    };
                    // Also extract the suffix after the frontend label if present
                    // e.g., "origin-shop-alpha" -> suffix after "shop" is "-alpha"
                    let after_pos = pos + front_first.len();
                    if after_pos < back_first.len() {
                        let suffix_part = &back_first[after_pos..];
                        if suffix_part.starts_with('-') || suffix_part.starts_with('.') {
                            // Generate a combined prefix+suffix pattern
                            // This transforms "shop" -> "origin-shop-alpha" within the first label
                            let label_find = format!("{}.", front_first);
                            let label_replace = format!("{}{}{}.", general_prefix, front_first, suffix_part);
                            if !patterns.iter().any(|p| p.find == label_find && p.replace == label_replace) {
                                patterns.push(Pattern {
                                    find: label_find,
                                    replace: label_replace,
                                    position: "prefix".to_string(),
                                });
                            }
                        }
                    }
                }
            }
        }
    }

    patterns
}

/// Analyze multiple seed pairs to extract label-level templates with variant suffixes.
/// Returns additional candidates that single-pair analysis would miss.
///
/// Example: from pairs (www -> origin-www-alpha, shop -> origin-shop-alpha, nobf -> origin-nobf)
/// discovers template: origin-{label}{suffix} where suffix is one of ["-alpha", ""]
pub fn derive_multi_pair_templates(pairs: &[(String, String)]) -> Vec<Pattern> {
    if pairs.len() < 2 {
        return Vec::new();
    }

    let mut patterns = Vec::new();

    // Analyze first-label transformations across all pairs
    struct LabelTransform {
        prefix: String,   // e.g., "origin-"
        suffix: String,   // e.g., "-alpha" or ""
    }

    let mut transforms: Vec<LabelTransform> = Vec::new();

    for (front, back) in pairs {
        let front = front.trim_end_matches('.');
        let back = back.trim_end_matches('.');
        let front_labels: Vec<&str> = front.split('.').collect();
        let back_labels: Vec<&str> = back.split('.').collect();

        if front_labels.len() < 2 || back_labels.len() < 2 {
            continue;
        }

        // Check same base domain
        let front_base = front_labels[front_labels.len()-2..].join(".");
        let back_base = back_labels[back_labels.len()-2..].join(".");
        if front_base != back_base {
            continue;
        }

        // Same depth - analyze first label transformation
        if front_labels.len() == back_labels.len() {
            let fl = front_labels[0];
            let bl = back_labels[0];

            if let Some(pos) = bl.find(fl) {
                let prefix = bl[..pos].to_string();
                let suffix = bl[pos + fl.len()..].to_string();
                transforms.push(LabelTransform { prefix, suffix });
            }
        }

        // Backend has extra depth - analyze prefix label
        if back_labels.len() == front_labels.len() + 1 {
            let extra_label = back_labels[0];
            let fl = front_labels[0];
            let bl = back_labels[1];

            if fl == bl {
                // Simple label prepend: origin.www.example.com
                // Already handled by derive_patterns_deterministic
            } else if let Some(pos) = bl.find(fl) {
                let prefix = format!("{}.", extra_label);
                let suffix = bl[pos + fl.len()..].to_string();
                transforms.push(LabelTransform {
                    prefix: format!("{}{}", prefix, &bl[..pos]),
                    suffix,
                });
            }
        }
    }

    if transforms.is_empty() {
        return patterns;
    }

    // Find common prefix across transforms
    let common_prefix = if transforms.iter().all(|t| t.prefix == transforms[0].prefix) {
        Some(transforms[0].prefix.clone())
    } else {
        None
    };

    // Collect unique suffixes
    let mut suffixes: Vec<String> = transforms.iter()
        .map(|t| t.suffix.clone())
        .collect::<std::collections::HashSet<_>>()
        .into_iter()
        .collect();
    suffixes.sort();

    if let Some(prefix) = common_prefix {
        if !prefix.is_empty() && suffixes.len() > 1 {
            // We have a common prefix with multiple suffix variants
            // Generate "label_template" patterns for each suffix variant
            for suffix in &suffixes {
                let template_marker = format!("{}{{label}}{}", prefix, suffix);
                patterns.push(Pattern {
                    find: template_marker,
                    replace: format!("{}{}", prefix, suffix),
                    position: "label_template".to_string(),
                });
            }
        }
    }

    patterns
}

/// Apply label_template patterns to targets.
/// Template patterns transform the first label: prefix + original_label + suffix
pub fn apply_label_templates(targets: &[String], templates: &[Pattern]) -> Vec<String> {
    let mut candidates = std::collections::HashSet::new();

    let templates: Vec<&Pattern> = templates.iter()
        .filter(|p| p.position == "label_template")
        .collect();

    if templates.is_empty() {
        return Vec::new();
    }

    for target in targets {
        let labels: Vec<&str> = target.split('.').collect();
        if labels.len() < 2 {
            continue;
        }

        let first_label = labels[0];
        let rest = labels[1..].join(".");

        for template in &templates {
            // template.find contains the template like "origin-{label}-alpha"
            // template.replace contains "prefix" + "suffix" parts
            // We parse the find field to extract prefix and suffix around {label}
            if let Some(label_pos) = template.find.find("{label}") {
                let prefix = &template.find[..label_pos];
                let suffix = &template.find[label_pos + 7..]; // 7 = "{label}".len()
                let new_label = format!("{}{}{}", prefix, first_label, suffix);
                let candidate = format!("{}.{}", new_label, rest);
                candidates.insert(candidate);
            }
        }
    }

    let mut result: Vec<String> = candidates.into_iter().collect();
    result.sort();
    result
}

/// Validate derived patterns against the known seed pair.
/// Returns only patterns that correctly transform the frontend into the backend.
pub fn validate_patterns(patterns: &[Pattern], front: &str, back: &str) -> Vec<Pattern> {
    let mut valid = Vec::new();
    for pattern in patterns {
        if let Some(result) = apply_single_pattern(front, pattern) {
            if result == back {
                valid.push(pattern.clone());
            }
        }
    }
    valid
}

/// Apply a single pattern to a target (mirrors pattern.rs logic but available in llm.rs)
fn apply_single_pattern(target: &str, pattern: &Pattern) -> Option<String> {
    match pattern.position.as_str() {
        "prefix" => {
            if pattern.find.is_empty() || target.starts_with(&pattern.find) {
                Some(target.replacen(&pattern.find, &pattern.replace, 1))
            } else {
                None
            }
        }
        "suffix" => {
            if pattern.find.is_empty() || target.ends_with(&pattern.find) {
                let mut result = target.to_string();
                if pattern.find.is_empty() {
                    result.push_str(&pattern.replace);
                } else {
                    let start = result.len() - pattern.find.len();
                    result.replace_range(start.., &pattern.replace);
                }
                Some(result)
            } else {
                None
            }
        }
        "contains" => {
            if target.contains(&pattern.find) {
                Some(target.replacen(&pattern.find, &pattern.replace, 1))
            } else {
                None
            }
        }
        _ => None,
    }
}

pub async fn query_patterns(base_url: &str, model: &str, front: &str, back: &str) -> Result<Vec<Pattern>> {
    // First try deterministic approach (fast, reliable)
    let deterministic = derive_patterns_deterministic(front, back);
    if !deterministic.is_empty() {
        // Deterministic patterns are derived from structural analysis and are
        // reliable by construction. Return all of them (both specific validated
        // patterns and general patterns for broader discovery).
        return Ok(deterministic);
    }

    // Deterministic failed - fall back to LLM with improved prompt
    let prompt = format!(
        r#"You are a hostname pattern analyzer. Given a CDN frontend hostname and its backend origin, derive find/replace rules to transform OTHER frontend hostnames into potential backend origins.

Position types:
- "prefix": match/replace at the START of the hostname. Use find="" to prepend text.
- "suffix": match/replace at the END of the hostname. Use find="" to append text.
- "contains": match/replace ANYWHERE in the hostname (first occurrence only).

Examples:
Frontend: www.example.com → Backend: origin.www.example.com
Answer: {{"patterns": [{{"find": "", "replace": "origin.", "position": "prefix"}}]}}

Frontend: cdn.example.com → Backend: origin.example.com
Answer: {{"patterns": [{{"find": "cdn.", "replace": "origin.", "position": "prefix"}}]}}

Frontend: api.example.com → Backend: api-origin.example.com
Answer: {{"patterns": [{{"find": "api.", "replace": "api-origin.", "position": "prefix"}}]}}

Now analyze this pair. The pattern must transform the frontend into the backend:
Frontend: {front}
Backend: {back}

Respond ONLY with JSON: {{"patterns": [{{"find": "...", "replace": "...", "position": "prefix|suffix|contains"}}]}}"#,
        front = front, back = back
    );

    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(30))
        .build()?;

    let request = ChatRequest {
        model: model.to_string(),
        messages: vec![ChatMessage {
            role: "user".to_string(),
            content: prompt,
        }],
        temperature: 0.1, // Low temperature for more deterministic output
    };

    let api_type = detect_api_type(base_url);
    let url = match api_type {
        ApiType::Ollama => {
            // Ollama uses /api/generate or /v1/chat/completions
            if base_url.ends_with("/v1") {
                format!("{}/chat/completions", base_url)
            } else if base_url.ends_with("/api") {
                format!("{}/generate", base_url)
            } else {
                format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
            }
        }
        _ => {
            if base_url.ends_with('/') {
                format!("{}chat/completions", base_url)
            } else {
                format!("{}/chat/completions", base_url)
            }
        }
    };

    let resp = client.post(&url).json(&request).send().await?;

    if !resp.status().is_success() {
        anyhow::bail!("LLM API returned status: {}", resp.status());
    }

    let chat_response: ChatResponse = resp.json().await?;
    let content = chat_response
        .choices
        .first()
        .and_then(|c| Some(c.message.content.clone()))
        .context("No response content from LLM")?;

    // Extract JSON from response (may have markdown code blocks)
    let json_content = extract_json(&content)?;

    // Parse patterns
    let pattern_response: PatternResponse = match serde_json::from_str(&json_content) {
        Ok(pr) => pr,
        Err(e) => {
            eprintln!("Warning: Failed to parse LLM JSON response: {}. Falling back to deterministic patterns.", e);
            // Fall back to deterministic (even if unvalidated) rather than erroring
            let fallback = derive_patterns_deterministic(front, back);
            if !fallback.is_empty() {
                return Ok(fallback);
            }
            return Err(anyhow::anyhow!("Failed to parse LLM response as JSON and deterministic fallback produced no patterns"));
        }
    };

    // Validate LLM patterns against the known seed pair
    let validated = validate_patterns(&pattern_response.patterns, front, back);
    if !validated.is_empty() {
        return Ok(validated);
    }

    // LLM patterns didn't validate - use deterministic as final fallback
    eprintln!("Warning: LLM-derived patterns failed validation against seed pair. Using deterministic fallback.");
    let fallback = derive_patterns_deterministic(front, back);
    if !fallback.is_empty() {
        Ok(fallback)
    } else {
        // Return LLM patterns anyway with a warning - they might still be useful
        eprintln!("Warning: No deterministic patterns could be derived either. Using unvalidated LLM patterns.");
        Ok(pattern_response.patterns)
    }
}

#[derive(Debug, Deserialize)]
struct PatternResponse {
    patterns: Vec<Pattern>,
}

/// Expand words at a specific position using LLM
/// Context-aware expansion that understands position semantics
/// Example: "dev" at environment position -> ["dev", "prod", "test", "staging", "qa", "uat"]
pub async fn expand_position_words(
    base_url: &str,
    model: &str,
    words: &[String],
    position: usize,
    total_positions: usize,
    hostname_context: &str,
) -> Result<Vec<String>> {
    if words.is_empty() {
        return Ok(Vec::new());
    }

    // Determine position context for better LLM prompts
    let position_context = if position == 0 {
        "service/application name"
    } else if position == total_positions - 1 {
        "organization/team identifier"
    } else if words.iter().any(|w| ["dev", "prod", "test", "staging", "qa", "uat"].contains(&w.as_str())) {
        "environment identifier"
    } else if words.iter().any(|w| ["api", "app", "web", "backend"].contains(&w.as_str())) {
        "service type"
    } else {
        "subdomain segment"
    };

    let prompt = format!(
        r#"You are helping discover backend server hostnames. Given these words from position {} (context: {}) in hostname "{}":
{:?}

Generate related words that might appear in the same position in other backend/internal server hostnames.
Think about:
- For environment positions: dev -> prod, test, staging, qa, uat, preprod, sandbox
- For service positions: api -> rest, graphql, rpc, service, gateway, app, web
- For organization positions: corp -> internal, int, private, team, org
- Synonyms and variations
- Common abbreviations
- Related concepts

Respond ONLY with a JSON array of unique strings, no duplicates, no explanations:
["word1", "word2", "word3", ...]"#,
        position, position_context, hostname_context, words
    );

    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(60))
        .build()?;

    let request = ChatRequest {
        model: model.to_string(),
        messages: vec![ChatMessage {
            role: "user".to_string(),
            content: prompt,
        }],
        temperature: 0.7, // Higher temperature for more creativity
    };

    let api_type = detect_api_type(base_url);
    let url = match api_type {
        ApiType::Ollama => {
            if base_url.ends_with("/v1") {
                format!("{}/chat/completions", base_url)
            } else if base_url.ends_with("/api") {
                format!("{}/generate", base_url)
            } else {
                format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
            }
        }
        _ => {
            if base_url.ends_with('/') {
                format!("{}chat/completions", base_url)
            } else {
                format!("{}/chat/completions", base_url)
            }
        }
    };

    let resp = client.post(&url).json(&request).send().await?;

    if !resp.status().is_success() {
        anyhow::bail!("LLM API returned status: {}", resp.status());
    }

    let chat_response: ChatResponse = resp.json().await?;
    let content = chat_response
        .choices
        .first()
        .and_then(|c| Some(c.message.content.clone()))
        .context("No response content from LLM")?;

    // Extract JSON array from response
    let json_content = extract_json(&content)?;
    
    // Parse as array of strings
    let mut words_result: Vec<String> = serde_json::from_str(&json_content)
        .unwrap_or_else(|_| Vec::new());

    // Always include original words
    for word in words {
        if !words_result.contains(word) {
            words_result.push(word.clone());
        }
    }

    // Clean and filter words
    let cleaned: Vec<String> = words_result
        .into_iter()
        .map(|w| w.to_lowercase().trim().to_string())
        .filter(|w| !w.is_empty() && w.len() < 50 && !w.contains(' ') && !w.contains('.'))
        .collect();

    Ok(cleaned)
}

/// Batch expand positions across multiple hostname structures using batched LLM calls
/// Returns 3D array: [hostname_index][position_index][alternatives]
/// Groups words by position and processes them in configurable batches
pub async fn batch_expand_positions(
    base_url: &str,
    model: &str,
    structures: &[HostnameStructure],
    progress_bar: Option<Arc<ProgressBar>>,
    batch_size: usize,
) -> Result<Vec<Vec<Vec<String>>>> {
    if structures.is_empty() {
        return Ok(Vec::new());
    }
    
    // Find maximum position depth across all structures
    let max_positions = structures.iter()
        .map(|s| s.subdomain_segments.len())
        .max()
        .unwrap_or(0);
    
    if max_positions == 0 {
        return Ok(structures.iter().map(|_| Vec::new()).collect());
    }
    
    // Collect all unique words at each position across all structures
    // Structure: position -> set of unique words -> list of (structure_idx, segment_idx) that use this word
    let mut position_words: Vec<HashMap<String, Vec<(usize, usize)>>> = vec![HashMap::new(); max_positions];
    
    for (struct_idx, structure) in structures.iter().enumerate() {
        for (pos_idx, segment) in structure.subdomain_segments.iter().enumerate() {
            if pos_idx < max_positions {
                position_words[pos_idx]
                    .entry(segment.clone())
                    .or_insert_with(Vec::new)
                    .push((struct_idx, pos_idx));
            }
        }
    }
    
    // Create expansion map: (struct_idx, pos_idx) -> expanded words
    let mut expansion_map: HashMap<(usize, usize), Vec<String>> = HashMap::new();
    let mut completed = 0usize;
    let total_operations: usize = position_words.iter()
        .map(|words| (words.len() + batch_size - 1) / batch_size) // Ceiling division for batches
        .sum();
    
    // Process each position
    for (pos_idx, words_map) in position_words.iter().enumerate() {
        let unique_words: Vec<String> = words_map.keys().cloned().collect();
        
        // Determine position context (use first structure as reference)
        let sample_structure = &structures[0];
        let position_context = if pos_idx == 0 {
            "service/application name"
        } else if pos_idx == sample_structure.subdomain_segments.len() - 1 {
            "organization/team identifier"
        } else if unique_words.iter().any(|w| ["dev", "prod", "test", "staging", "qa", "uat"].contains(&w.as_str())) {
            "environment identifier"
        } else if unique_words.iter().any(|w| ["api", "app", "web", "backend"].contains(&w.as_str())) {
            "service type"
        } else {
            "subdomain segment"
        };
        
        // Process words in batches
        for batch in unique_words.chunks(batch_size) {
            let batch_words = batch.to_vec();
            
            // Create a representative hostname context for this batch
            let context_hostname = if let Some(structure) = structures.first() {
                format!("{}.{}", structure.subdomain_segments.join("."), structure.base_domain)
            } else {
                "hostname".to_string()
            };
            
            match expand_position_words_batch(
                base_url,
                model,
                &batch_words,
                pos_idx,
                max_positions,
                position_context,
                &context_hostname,
            ).await {
                Ok(expanded_map) => {
                    // Map expanded words back to structures
                    for word in batch_words {
                        if let Some(expanded) = expanded_map.get(&word) {
                            if let Some(structure_indices) = words_map.get(&word) {
                                for &(struct_idx, seg_idx) in structure_indices {
                                    expansion_map.insert((struct_idx, seg_idx), expanded.clone());
                                }
                            }
                        }
                    }
                }
                Err(e) => {
                    eprintln!("Warning: Failed to expand batch at position {}: {}", pos_idx, e);
                    // Fallback: use original words only
                    for word in batch_words {
                        if let Some(structure_indices) = words_map.get(&word) {
                            for &(struct_idx, seg_idx) in structure_indices {
                                expansion_map.insert((struct_idx, seg_idx), vec![word.clone()]);
                            }
                        }
                    }
                }
            }
            
            completed += 1;
            if let Some(ref pb) = progress_bar {
                pb.set_position(completed as u64);
            }
        }
    }
    
    // Reconstruct results in original order
    let mut results = Vec::new();
    for (struct_idx, structure) in structures.iter().enumerate() {
        let mut hostname_expansions = Vec::new();
        for (pos_idx, segment) in structure.subdomain_segments.iter().enumerate() {
            if let Some(expanded) = expansion_map.get(&(struct_idx, pos_idx)) {
                hostname_expansions.push(expanded.clone());
            } else {
                // Fallback: use original word only
                hostname_expansions.push(vec![segment.clone()]);
            }
        }
        results.push(hostname_expansions);
    }

    Ok(results)
}

/// Expand a batch of words at a specific position using LLM
/// Returns a map from original word to expanded alternatives
async fn expand_position_words_batch(
    base_url: &str,
    model: &str,
    words: &[String],
    position: usize,
    total_positions: usize,
    position_context: &str,
    hostname_context: &str,
) -> Result<HashMap<String, Vec<String>>> {
    if words.is_empty() {
        return Ok(HashMap::new());
    }

    let prompt = format!(
        r#"You are helping discover backend server hostnames. Given these words from position {} (context: {}) in hostname "{}":
{:?}

For EACH word in the list, generate related words that might appear in the same position in other backend/internal server hostnames.
Think about:
- For environment positions: dev -> prod, test, staging, qa, uat, preprod, sandbox
- For service positions: api -> rest, graphql, rpc, service, gateway, app, web
- For organization positions: corp -> internal, int, private, team, org
- Synonyms and variations
- Common abbreviations
- Related concepts

Respond ONLY with a JSON object mapping each input word to an array of related words:
{{"word1": ["related1", "related2", ...], "word2": ["related1", "related2", ...], ...}}

Include the original word in each array. No explanations, only the JSON."#,
        position, position_context, hostname_context, words
    );

    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(120)) // Longer timeout for batch processing
        .build()?;

    let request = ChatRequest {
        model: model.to_string(),
        messages: vec![ChatMessage {
            role: "user".to_string(),
            content: prompt,
        }],
        temperature: 0.7,
    };

    let api_type = detect_api_type(base_url);
    let url = match api_type {
        ApiType::Ollama => {
            if base_url.ends_with("/v1") {
                format!("{}/chat/completions", base_url)
            } else if base_url.ends_with("/api") {
                format!("{}/generate", base_url)
            } else {
                format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
            }
        }
        _ => {
            if base_url.ends_with('/') {
                format!("{}chat/completions", base_url)
            } else {
                format!("{}/chat/completions", base_url)
            }
        }
    };

    let resp = client.post(&url).json(&request).send().await?;

    if !resp.status().is_success() {
        anyhow::bail!("LLM API returned status: {}", resp.status());
    }

    let chat_response: ChatResponse = resp.json().await?;
    let content = chat_response
        .choices
        .first()
        .and_then(|c| Some(c.message.content.clone()))
        .context("No response content from LLM")?;

    // Extract JSON from response
    let json_content = extract_json(&content)?;
    
    // Parse as object mapping words to arrays
    let mut result_map: HashMap<String, Vec<String>> = serde_json::from_str(&json_content)
        .unwrap_or_else(|_| HashMap::new());

    // Ensure all input words are in the result, and include original word in each array
    for word in words {
        let entry = result_map.entry(word.clone()).or_insert_with(Vec::new);
        if !entry.contains(word) {
            entry.insert(0, word.clone());
        }
        
        // Clean and filter words
        let cleaned: Vec<String> = entry.iter()
            .map(|w: &String| w.to_lowercase().trim().to_string())
            .filter(|w: &String| !w.is_empty() && w.len() < 50 && !w.contains(' ') && !w.contains('.'))
            .collect::<HashSet<String>>()
            .into_iter()
            .collect();
        *entry = cleaned;
    }

    Ok(result_map)
}

/// Expand seed words using LLM to generate related subdomain words
/// For example, "api" might expand to ["rest", "graphql", "rpc", "service", "services", "gateway"]
pub async fn expand_words_with_llm(base_url: &str, model: &str, seed_words: &[String], count_per_word: usize) -> Result<Vec<String>> {
    if seed_words.is_empty() || count_per_word == 0 {
        return Ok(Vec::new());
    }

    let prompt = format!(
        r#"You are helping discover backend server hostnames. Given these subdomain words extracted from a known backend URL: {:?}

For each word, generate {} related words that might appear in backend/internal server hostnames.
Think about:
- Synonyms (api -> rest, graphql, rpc)
- Versions (api -> api-v1, api-v2, api2)
- Related concepts (dev -> staging, test, qa, uat)
- Common abbreviations (development -> dev, production -> prod)
- Internal naming patterns (backend -> origin, internal, int)

Respond ONLY with a JSON array of unique strings, no duplicates, no explanations:
["word1", "word2", "word3", ...]"#,
        seed_words, count_per_word
    );

    let client = reqwest::Client::builder()
        .timeout(Duration::from_secs(60))
        .build()?;

    let request = ChatRequest {
        model: model.to_string(),
        messages: vec![ChatMessage {
            role: "user".to_string(),
            content: prompt,
        }],
        temperature: 0.7, // Higher temperature for more creativity
    };

    let api_type = detect_api_type(base_url);
    let url = match api_type {
        ApiType::Ollama => {
            if base_url.ends_with("/v1") {
                format!("{}/chat/completions", base_url)
            } else if base_url.ends_with("/api") {
                format!("{}/generate", base_url)
            } else {
                format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
            }
        }
        _ => {
            if base_url.ends_with('/') {
                format!("{}chat/completions", base_url)
            } else {
                format!("{}/chat/completions", base_url)
            }
        }
    };

    let resp = client.post(&url).json(&request).send().await?;

    if !resp.status().is_success() {
        anyhow::bail!("LLM API returned status: {}", resp.status());
    }

    let chat_response: ChatResponse = resp.json().await?;
    let content = chat_response
        .choices
        .first()
        .and_then(|c| Some(c.message.content.clone()))
        .context("No response content from LLM")?;

    // Extract JSON array from response
    let json_content = extract_json(&content)?;
    
    // Parse as array of strings
    let words: Vec<String> = serde_json::from_str(&json_content)
        .unwrap_or_else(|_| Vec::new());

    // Clean and filter words
    let cleaned: Vec<String> = words
        .into_iter()
        .map(|w| w.to_lowercase().trim().to_string())
        .filter(|w| !w.is_empty() && w.len() < 50 && !w.contains(' '))
        .collect();

    Ok(cleaned)
}

fn extract_json(content: &str) -> Result<String> {
    // Try to find JSON in the response, handling markdown code blocks
    let trimmed = content.trim();

    // Check if wrapped in markdown code blocks
    if trimmed.starts_with("```") {
        let lines: Vec<&str> = trimmed.lines().collect();
        let mut json_lines = Vec::new();
        let mut in_code_block = false;

        for line in lines {
            if line.trim().starts_with("```json") || line.trim().starts_with("```") {
                in_code_block = true;
                continue;
            }
            if line.trim() == "```" && in_code_block {
                break;
            }
            if in_code_block {
                json_lines.push(line);
            }
        }

        if !json_lines.is_empty() {
            return Ok(json_lines.join("\n"));
        }
    }

    // Try to find JSON object boundaries
    if let Some(start) = trimmed.find('{') {
        if let Some(end) = trimmed.rfind('}') {
            if end > start {
                return Ok(trimmed[start..=end].to_string());
            }
        }
    }

    // Fallback: return as-is
    Ok(trimmed.to_string())
}