swarm-engine-llm 0.1.6

//! LLM レスポンスのパース処理（共通モジュール）
//!
//! 複数の LLM バックエンド（Ollama, llama-server, llama-cpp）で共通の
//! レスポンスパース・修復ロジックを提供する。
//!
//! # 機能
//!
//! - JSON 抽出（マークダウンブロック対応）
//! - JSON 構文エラー修復（末尾カンマ等）
//! - フィールド名タイポ修復（taget → target）
//! - ツール名タイポ修復（Raed → Read）

use std::collections::HashMap;

use fuzzy_parser::distance::{find_closest, Algorithm};
use fuzzy_parser::{repair_object_fields, sanitize_json, FuzzyOptions, ObjectSchema};

use crate::decider::{ActionCandidate, DecisionResponse, LlmError};

/// ActionSelector 用の有効なフィールド名
pub const ACTION_FIELDS: ObjectSchema =
    ObjectSchema::new(&["tool", "target", "args", "confidence"]);

/// 候補の名前リストを取得（fuzzy repair 用）
pub fn candidate_names(candidates: &[ActionCandidate]) -> Vec<String> {
    candidates.iter().map(|c| c.name.clone()).collect()
}

/// JSON 部分を抽出
///
/// マークダウンの ```json ブロックや、テキスト中の { ... } を抽出する。
/// ブレースのバランスを取って正しいJSONオブジェクトを抽出。
pub fn extract_json(text: &str) -> Result<String, LlmError> {
    // ```json ... ``` ブロックを探す
    if let Some(start) = text.find("```json") {
        let content_start = start + 7; // "```json".len()
        let remaining = &text[content_start..];

        // 閉じる ``` を探す（改行の有無に関わらず）
        if let Some(end) = remaining.find("```") {
            let json = remaining[..end].trim();
            if !json.is_empty() {
                return Ok(json.to_string());
            }
        }
    }

    // { ... } を探す（バランスを取って）
    if let Some(json) = extract_balanced_json(text) {
        return Ok(json);
    }

    Err(LlmError::permanent(format!(
        "No JSON found in response: {}",
        text
    )))
}

/// バランスの取れたJSONオブジェクトを抽出
fn extract_balanced_json(text: &str) -> Option<String> {
    let start = text.find('{')?;
    let chars: Vec<char> = text[start..].chars().collect();
    let mut depth = 0;
    let mut in_string = false;
    let mut escape_next = false;

    for (i, &ch) in chars.iter().enumerate() {
        if escape_next {
            escape_next = false;
            continue;
        }

        match ch {
            '\\' if in_string => escape_next = true,
            '"' => in_string = !in_string,
            '{' if !in_string => depth += 1,
            '}' if !in_string => {
                depth -= 1;
                if depth == 0 {
                    return Some(chars[..=i].iter().collect());
                }
            }
            _ => {}
        }
    }

    None
}

/// クォートされていないキーを修復する
///
/// `{tool: "value"}` → `{"tool": "value"}`
fn fix_unquoted_keys(json: &str) -> String {
    let mut result = String::with_capacity(json.len() * 2);
    let chars: Vec<char> = json.chars().collect();
    let len = chars.len();
    let mut i = 0;

    while i < len {
        let ch = chars[i];

        // { または , の後にクォートされていないキーがあるかチェック
        if ch == '{' || ch == ',' {
            result.push(ch);
            i += 1;

            // 空白をスキップ
            while i < len && chars[i].is_whitespace() {
                result.push(chars[i]);
                i += 1;
            }

            // キーの開始をチェック（英字またはアンダースコア、かつダブルクォートではない）
            if i < len && (chars[i].is_alphabetic() || chars[i] == '_') {
                // キー名を収集
                let key_start = i;
                while i < len && (chars[i].is_alphanumeric() || chars[i] == '_') {
                    i += 1;
                }
                let key: String = chars[key_start..i].iter().collect();

                // 空白をスキップ
                while i < len && chars[i].is_whitespace() {
                    i += 1;
                }

                // : が続くならこれはキー
                if i < len && chars[i] == ':' {
                    result.push('"');
                    result.push_str(&key);
                    result.push('"');
                    result.push(':');
                    i += 1;
                } else {
                    // キーではなかった、元に戻す
                    result.push_str(&key);
                }
            }
        } else {
            result.push(ch);
            i += 1;
        }
    }

    result
}

/// JSON パース（fuzzy repair 対応）
///
/// `candidates` は fuzzy repair のスコープ。tool 名の typo 修正は
/// candidates 内でのみ行われる。
pub fn parse_json(json: &str, candidates: &[String]) -> Result<DecisionResponse, LlmError> {
    // まずクォートされていないキーを修復
    let fixed_json = fix_unquoted_keys(json);

    // serde_json でパース
    let mut parsed: serde_json::Value = serde_json::from_str(&fixed_json)
        .map_err(|e| LlmError::permanent(format!("JSON parse error: {} (json: {})", e, json)))?;

    let options = FuzzyOptions::default();

    // Step 1: フィールド名のタイポ修復 (taget → target, etc.)
    if let Some(obj) = parsed.as_object_mut() {
        let corrections = repair_object_fields(obj, &ACTION_FIELDS, "$", &options);
        if !corrections.is_empty() {
            tracing::debug!(
                corrections = ?corrections.iter().map(|c| format!("{} → {}", c.original, c.corrected)).collect::<Vec<_>>(),
                "Fuzzy repaired field names"
            );
        }
    }

    // Step 2: tool 値のタイポ修復 (candidates 内で最も近いものに修正)
    let tool = if let Some(tool_val) = parsed["tool"].as_str() {
        // candidates 内に完全一致があればそのまま
        if candidates.iter().any(|c| c == tool_val) {
            tool_val.to_string()
        } else if !candidates.is_empty() {
            // candidates 内で fuzzy match
            let candidate_strs: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
            if let Some(m) = find_closest(tool_val, candidate_strs, 0.6, Algorithm::JaroWinkler) {
                tracing::debug!(
                    original = tool_val,
                    corrected = %m.candidate,
                    similarity = m.similarity,
                    "Fuzzy repaired tool name"
                );
                m.candidate.to_string()
            } else {
                tool_val.to_string() // 修復できない場合はそのまま
            }
        } else {
            tool_val.to_string() // candidates が空なら修復しない
        }
    } else {
        return Err(LlmError::permanent("Missing 'tool' field"));
    };

    let target = parsed["target"]
        .as_str()
        .ok_or_else(|| LlmError::permanent("Missing 'target' field"))?
        .to_string();

    let confidence = parsed["confidence"].as_f64().unwrap_or(0.5);

    // args をパース
    let mut args = HashMap::new();
    if let Some(args_obj) = parsed["args"].as_object() {
        for (key, value) in args_obj {
            if let Some(v) = value.as_str() {
                args.insert(key.clone(), v.to_string());
            }
        }
    }

    Ok(DecisionResponse {
        tool,
        target,
        args,
        reasoning: None,
        confidence,
        prompt: None,
        raw_response: None,
    })
}

/// レスポンスをパース（fuzzy repair 対応）
///
/// JSON 抽出 → 構文修復 → パース＆タイポ修復 のフルパイプライン。
pub fn parse_response(text: &str, candidates: &[String]) -> Result<DecisionResponse, LlmError> {
    tracing::debug!(raw_output = %text, "LLM raw response");

    // JSON を抽出
    let json_str = extract_json(text)?;

    // JSON 構文エラーを修復（末尾カンマ等）
    let sanitized = sanitize_json(&json_str);
    tracing::debug!(sanitized = %sanitized, "Sanitized JSON");

    // パース＆タイポ修復
    parse_json(&sanitized, candidates)
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    // =========================================================================
    // candidate_names Tests
    // =========================================================================

    #[test]
    fn test_candidate_names() {
        let candidates = vec![
            ActionCandidate {
                name: "Read".to_string(),
                description: "Read a file".to_string(),
                params: vec![],
                example: None,
            },
            ActionCandidate {
                name: "Write".to_string(),
                description: "Write a file".to_string(),
                params: vec![],
                example: None,
            },
        ];

        let names = candidate_names(&candidates);
        assert_eq!(names, vec!["Read".to_string(), "Write".to_string()]);
    }

    #[test]
    fn test_candidate_names_empty() {
        let candidates: Vec<ActionCandidate> = vec![];
        let names = candidate_names(&candidates);
        assert!(names.is_empty());
    }

    // =========================================================================
    // extract_json Tests
    // =========================================================================

    #[test]
    fn test_extract_json_direct() {
        let text = r#"{"tool": "Read", "target": "src/main.rs", "confidence": 0.8}"#;
        let extracted = extract_json(text).unwrap();
        assert_eq!(extracted, text);
    }

    #[test]
    fn test_extract_json_with_prefix() {
        let text =
            r#"Here is the action: {"tool": "Read", "target": "file.rs", "confidence": 0.8}"#;
        let extracted = extract_json(text).unwrap();
        assert!(extracted.starts_with('{'));
        assert!(extracted.ends_with('}'));
        assert!(extracted.contains("Read"));
    }

    #[test]
    fn test_extract_json_with_suffix() {
        let text = r#"{"tool": "Grep", "target": "pattern", "confidence": 0.9} That's the action."#;
        let extracted = extract_json(text).unwrap();
        assert!(extracted.contains("Grep"));
    }

    #[test]
    fn test_extract_json_markdown_block_with_newline() {
        let text = "```json\n{\"tool\": \"Read\", \"target\": \"file.rs\"}\n```";
        let extracted = extract_json(text).unwrap();
        assert!(extracted.contains("\"tool\": \"Read\""));
    }

    #[test]
    fn test_extract_json_markdown_block_without_newline() {
        let text = "```json{\"tool\": \"Read\", \"target\": \"file.rs\"}```";
        let extracted = extract_json(text).unwrap();
        assert!(extracted.contains("\"tool\": \"Read\""));
    }

    #[test]
    fn test_extract_json_no_json() {
        let text = "This is just plain text without any JSON.";
        let result = extract_json(text);
        assert!(result.is_err());
        assert!(result.unwrap_err().message().contains("No JSON found"));
    }

    #[test]
    fn test_extract_json_nested_braces() {
        let text = r#"{"tool": "Read", "target": "src/main.rs", "args": {"encoding": "utf8"}, "confidence": 0.8}"#;
        let extracted = extract_json(text).unwrap();
        assert!(extracted.contains("args"));
        assert!(extracted.contains("encoding"));
    }

    // =========================================================================
    // parse_json Tests
    // =========================================================================

    #[test]
    fn test_parse_json_basic() {
        let candidates = vec!["Read".to_string(), "Write".to_string(), "Grep".to_string()];
        let json = r#"{"tool": "Read", "target": "src/main.rs", "confidence": 0.85}"#;

        let response = parse_json(json, &candidates).unwrap();

        assert_eq!(response.tool, "Read");
        assert_eq!(response.target, "src/main.rs");
        assert!((response.confidence - 0.85).abs() < 0.01);
    }

    #[test]
    fn test_parse_json_with_args() {
        let candidates = vec!["Grep".to_string()];
        let json =
            r#"{"tool": "Grep", "target": "fn main", "args": {"path": "src/"}, "confidence": 0.9}"#;

        let response = parse_json(json, &candidates).unwrap();

        assert_eq!(response.tool, "Grep");
        assert_eq!(response.target, "fn main");
        assert_eq!(response.args.get("path"), Some(&"src/".to_string()));
    }

    #[test]
    fn test_parse_json_default_confidence() {
        let candidates = vec!["Read".to_string()];
        let json = r#"{"tool": "Read", "target": "file.rs"}"#;

        let response = parse_json(json, &candidates).unwrap();
        assert!((response.confidence - 0.5).abs() < 0.01);
    }

    #[test]
    fn test_parse_json_missing_tool() {
        let candidates = vec!["Read".to_string()];
        let json = r#"{"target": "file.rs", "confidence": 0.8}"#;

        let result = parse_json(json, &candidates);
        assert!(result.is_err());
        assert!(result.unwrap_err().message().contains("tool"));
    }

    #[test]
    fn test_parse_json_missing_target() {
        let candidates = vec!["Read".to_string()];
        let json = r#"{"tool": "Read", "confidence": 0.8}"#;

        let result = parse_json(json, &candidates);
        assert!(result.is_err());
        assert!(result.unwrap_err().message().contains("target"));
    }

    // =========================================================================
    // Fuzzy Repair Tests
    // =========================================================================

    #[test]
    fn test_fuzzy_repair_tool_typo() {
        let candidates = vec![
            "Read".to_string(),
            "Grep".to_string(),
            "Dir".to_string(),
            "Write".to_string(),
        ];

        // "Raed" → "Read"
        let json = r#"{"tool": "Raed", "target": "src/main.rs", "confidence": 0.8}"#;
        let response = parse_json(json, &candidates).unwrap();
        assert_eq!(response.tool, "Read");

        // "Grpe" → "Grep"
        let json = r#"{"tool": "Grpe", "target": "pattern", "confidence": 0.8}"#;
        let response = parse_json(json, &candidates).unwrap();
        assert_eq!(response.tool, "Grep");
    }

    #[test]
    fn test_fuzzy_repair_field_typo() {
        let candidates = vec!["Read".to_string()];

        // "taget" → "target"
        let json = r#"{"tool": "Read", "taget": "src/main.rs", "confidence": 0.8}"#;
        let response = parse_json(json, &candidates).unwrap();
        assert_eq!(response.target, "src/main.rs");

        // "confindence" → "confidence"
        let json = r#"{"tool": "Read", "target": "file.rs", "confindence": 0.9}"#;
        let response = parse_json(json, &candidates).unwrap();
        assert!((response.confidence - 0.9).abs() < 0.01);
    }

    #[test]
    fn test_fuzzy_repair_scoped_to_candidates() {
        // "Writ" should match "Wait" when candidates = ["Wait", "Start"]
        let candidates = vec!["Wait".to_string(), "Start".to_string()];
        let json = r#"{"tool": "Writ", "target": "task", "confidence": 0.8}"#;
        let response = parse_json(json, &candidates).unwrap();
        assert_eq!(response.tool, "Wait"); // Not "Write"
    }

    #[test]
    fn test_fuzzy_repair_no_candidates() {
        let empty: Vec<String> = vec![];
        let json = r#"{"tool": "Raed", "target": "file.rs", "confidence": 0.8}"#;
        let response = parse_json(json, &empty).unwrap();
        assert_eq!(response.tool, "Raed"); // Not repaired
    }

    // =========================================================================
    // parse_response (Full Pipeline) Tests
    // =========================================================================

    #[test]
    fn test_parse_response_with_prefix() {
        let candidates = vec!["Read".to_string(), "Grep".to_string()];
        let text =
            r#"I'll read the file: {"tool": "Read", "target": "src/main.rs", "confidence": 0.85}"#;

        let response = parse_response(text, &candidates).unwrap();
        assert_eq!(response.tool, "Read");
    }

    #[test]
    fn test_parse_response_trailing_comma() {
        let candidates = vec!["Read".to_string()];
        let text = r#"{"tool": "Read", "target": "file.rs", "confidence": 0.8,}"#;

        let response = parse_response(text, &candidates).unwrap();
        assert_eq!(response.tool, "Read");
    }

    #[test]
    fn test_parse_response_with_reasoning() {
        let candidates = vec!["Grep".to_string()];
        let text = r#"Based on the task, I should search for the pattern.
{"tool": "Grep", "target": "fn main", "confidence": 0.9}"#;

        let response = parse_response(text, &candidates).unwrap();
        assert_eq!(response.tool, "Grep");
        assert_eq!(response.target, "fn main");
    }

    #[test]
    fn test_parse_response_combined_repairs() {
        let candidates = vec!["Read".to_string(), "Write".to_string()];
        // Multiple typos: tool name + field name + trailing comma
        let text = r#"{"tool": "Raed", "taget": "src/lib.rs", "confindence": 0.75,}"#;

        let response = parse_response(text, &candidates).unwrap();
        assert_eq!(response.tool, "Read");
        assert_eq!(response.target, "src/lib.rs");
        assert!((response.confidence - 0.75).abs() < 0.01);
    }
}