reasonkit-core 0.1.8

//! # Proof Parser
//!
//! Parse mathematical proofs from various formats (MathML, LaTeX, structured JSON).

use crate::error::{Error, Result};
use crate::verification::types::*;
use serde_json::Value;

/// Supported proof formats
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProofFormat {
    /// Structured JSON format
    JSON,
    /// LaTeX mathematical notation
    LaTeX,
    /// MathML XML format
    MathML,
    /// Plain text with LaTeX inline expressions
    Text,
}

/// Proof parser for mathematical proofs
pub struct ProofParser {
    format: ProofFormat,
}

impl ProofParser {
    /// Create new parser with specified format
    pub fn new(format: ProofFormat) -> Self {
        Self { format }
    }

    /// Create parser that auto-detects format
    pub fn auto(content: &str) -> Self {
        let format = if content.starts_with('{') && content.contains("\"statements\"") {
            ProofFormat::JSON
        } else if content.contains("<math>") || content.contains("[itex]") {
            ProofFormat::MathML
        } else if content.contains("$$") || content.contains("\\[") {
            ProofFormat::LaTeX
        } else {
            ProofFormat::Text
        };

        Self { format }
    }

    /// Parse proof from string
    pub fn parse(&self, content: &str) -> Result<Proof> {
        match self.format {
            ProofFormat::JSON => self.parse_json(content),
            ProofFormat::LaTeX => self.parse_latex(content),
            ProofFormat::MathML => self.parse_mathml(content),
            ProofFormat::Text => self.parse_text(content),
        }
    }

    /// Parse JSON formatted proof
    fn parse_json(&self, content: &str) -> Result<Proof> {
        let json: Value = serde_json::from_str(content)
            .map_err(|e| Error::Verification(format!("Failed to parse JSON proof: {}", e)))?;

        let problem = json["problem"]
            .as_str()
            .ok_or_else(|| Error::Verification("Missing 'problem' field".to_string()))?
            .to_string();

        let method = json["method"].as_str().unwrap_or("unknown").to_string();

        let answer = json["answer"].as_str().map(|s| s.to_string());

        let statements: Vec<MathStatement> = json["statements"]
            .as_array()
            .ok_or_else(|| Error::Verification("Missing 'statements' array".to_string()))?
            .iter()
            .map(|stmt| self.parse_statement(stmt))
            .collect::<Vec<_>>()
            .into_iter()
            .collect::<Vec<_>>();

        let theorems: Vec<String> = json["theorems_used"]
            .as_array()
            .map(|arr| {
                arr.iter()
                    .filter_map(|v| v.as_str())
                    .map(String::from)
                    .collect()
            })
            .unwrap_or_default();

        let confidence = json["confidence"].as_f64().unwrap_or(0.8);

        Ok(Proof {
            id: uuid::Uuid::new_v4().to_string(),
            problem,
            method,
            statements,
            answer,
            theorems,
            confidence,
            metadata: ProofMetadata {
                generated_at: json["generated_at"]
                    .as_str()
                    .unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
                    .to_string(),
                generated_by: json["generated_by"]
                    .as_str()
                    .unwrap_or("unknown")
                    .to_string(),
                step_count: statements.len(),
                variant: json["variant"].as_str().map(String::from),
                execution_time_ms: json["execution_time_ms"].as_u64().unwrap_or(0),
                token_usage: TokenUsage {
                    input_tokens: json["token_usage"]
                        .get("input_tokens")
                        .and_then(|v| v.as_u64())
                        .unwrap_or(0) as u32,
                    output_tokens: json["token_usage"]
                        .get("output_tokens")
                        .and_then(|v| v.as_u64())
                        .unwrap_or(0) as u32,
                    total_tokens: json["token_usage"]
                        .get("total_tokens")
                        .and_then(|v| v.as_u64())
                        .unwrap_or(0) as u32,
                },
            },
        })
    }

    /// Parse single statement from JSON
    fn parse_statement(&self, stmt: &Value) -> Result<MathStatement> {
        let stmt_type = stmt["type"]
            .as_str()
            .ok_or_else(|| Error::Verification("Missing statement type".to_string()))?;

        let latex = stmt["latex"]
            .as_str()
            .unwrap_or_else(|| stmt["statement"].as_str().unwrap_or(""))
            .to_string();

        match stmt_type {
            "axiom" => Ok(MathStatement::Axiom {
                name: stmt["name"].as_str().unwrap_or("unknown").to_string(),
                latex,
            }),
            "conclusion" => Ok(MathStatement::Conclusion {
                statement: stmt["statement"].as_str().unwrap_or("").to_string(),
                latex,
            }),
            "step" | _ => Ok(MathStatement::Step {
                statement: stmt["statement"].as_str().unwrap_or("").to_string(),
                latex,
                justification: stmt["justification"]
                    .as_str()
                    .unwrap_or("unknown")
                    .to_string(),
                theorems: stmt["theorems"]
                    .as_array()
                    .map(|arr| {
                        arr.iter()
                            .filter_map(|v| v.as_str())
                            .map(String::from)
                            .collect()
                    })
                    .unwrap_or_default(),
            }),
        }
    }

    /// Parse LaTeX formatted proof
    fn parse_latex(&self, content: &str) -> Result<Proof> {
        let lines: Vec<&str> = content.lines().collect();
        let mut statements = Vec::new();
        let mut theorems = Vec::new();
        let mut current_block = String::new();
        let mut in_display_math = false;

        // Extract problem id (simple heuristic)
        let problem = if content.contains("Problem") {
            content
                .lines()
                .find(|l| l.contains("Problem"))
                .map(|l| l.trim().to_string())
                .unwrap_or_else(|| "unknown".to_string())
        } else {
            "unknown".to_string()
        };

        for line in &lines {
            let line = line.trim();

            // Detect display math blocks
            if line.contains("$$") || line.contains("\\[") {
                in_display_math = !in_display_math;
                continue;
            }

            // Extract inline math
            let latex_parts: Vec<&str> = line.split('$').collect();
            let latex = if latex_parts.len() >= 3 {
                latex_parts[1] // Get content between first and second $
            } else {
                ""
            };

            // Build statement
            if !line.is_empty() && !line.starts_with("%") && !line.starts_with("#") {
                let statement = MathStatement::Step {
                    statement: line.replace('$', "").to_string(),
                    latex: if !latex.is_empty() {
                        format!("$${}$$", latex)
                    } else {
                        line.to_string()
                    },
                    justification: "LaTeX proof".to_string(),
                    theorems: if line.to_lowercase().contains("theorem") {
                        vec!["theorem".to_string()]
                    } else {
                        Vec::new()
                    },
                };
                statements.push(statement);
            }

            // Extract theorem references
            if line.to_lowercase().contains("theorem") {
                let words: Vec<&str> = line.split_whitespace().collect();
                if let Some(idx) = words.iter().position(|&w| w.to_lowercase() == "theorem") {
                    if idx + 1 < words.len() {
                        let theorem_name = words[idx + 1].trim_matches(&['.', ',', ':'][..]);
                        if !theorem_name.is_empty() && !theorems.contains(&theorem_name.to_string())
                        {
                            theorems.push(theorem_name.to_string());
                        }
                    }
                }
            }
        }

        Ok(Proof {
            id: uuid::Uuid::new_v4().to_string(),
            problem,
            method: "LaTeX proof".to_string(),
            statements,
            answer: None, // Would need more sophisticated extraction
            theorems,
            confidence: 0.8, // Placeholder
            metadata: ProofMetadata {
                generated_at: chrono::Utc::now().to_rfc3339(),
                generated_by: "LaTeX parser".to_string(),
                step_count: statements.len(),
                variant: None,
                execution_time_ms: 0,
                token_usage: TokenUsage {
                    input_tokens: 0,
                    output_tokens: 0,
                    total_tokens: 0,
                },
            },
        })
    }

    /// Parse MathML formatted proof (basic implementation)
    fn parse_mathml(&self, content: &str) -> Result<Proof> {
        // This is a simplified MathML parser
        // A full implementation would use an XML parser

        let statements = vec![MathStatement::Axiom {
            name: "MathML".to_string(),
            latex: content.to_string(),
        }];

        Ok(Proof {
            id: uuid::Uuid::new_v4().to_string(),
            problem: "MathML proof".to_string(),
            method: "MathML".to_string(),
            statements,
            answer: None,
            theorems: vec![],
            confidence: 0.7,
            metadata: ProofMetadata {
                generated_at: chrono::Utc::now().to_rfc3339(),
                generated_by: "MathML parser".to_string(),
                step_count: statements.len(),
                variant: None,
                execution_time_ms: 0,
                token_usage: TokenUsage::default(),
            },
        })
    }

    /// Parse plain text proof
    fn parse_text(&self, content: &str) -> Result<Proof> {
        let statements: Vec<MathStatement> = content
            .lines()
            .filter(|line| !line.trim().is_empty() && !line.trim().starts_with("#"))
            .map(|line| MathStatement::Step {
                statement: line.trim().to_string(),
                latex: line.trim().to_string(),
                justification: "Text proof".to_string(),
                theorems: Vec::new(),
            })
            .collect();

        Ok(Proof {
            id: uuid::Uuid::new_v4().to_string(),
            problem: "Text proof".to_string(),
            method: "text".to_string(),
            statements,
            answer: None,
            theorems: vec![],
            confidence: 0.75,
            metadata: ProofMetadata {
                generated_at: chrono::Utc::now().to_rfc3339(),
                generated_by: "Text parser".to_string(),
                step_count: statements.len(),
                variant: None,
                execution_time_ms: 0,
                token_usage: TokenUsage::default(),
            },
        })
    }
}

impl Default for TokenUsage {
    fn default() -> Self {
        Self {
            input_tokens: 0,
            output_tokens: 0,
            total_tokens: 0,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_json_parser() {
        let json = r#"{
            "problem": "Test problem",
            "method": "direct",
            "statements": [
                {
                    "type": "step",
                    "statement": "x = 2",
                    "latex": "$x = 2$",
                    "justification": "Given",
                    "theorems": []
                }
            ],
            "answer": "2",
            "confidence": 0.9
        }"#;

        let parser = ProofParser::new(ProofFormat::JSON);
        let proof = parser.parse(json).unwrap();

        assert_eq!(proof.problem, "Test problem");
        assert_eq!(proof.statements.len(), 1);
        assert_eq!(proof.confidence, 0.9);
    }

    #[test]
    fn test_auto_format_detection() {
        let json_content = r#"{"problem": "test", "statements": []}"#;
        let latex_content = r#"$$x^2 + y^2$$"#;
        let text_content = "Plain text proof";

        assert!(matches!(
            ProofParser::auto(json_content).format,
            ProofFormat::JSON
        ));
        assert!(matches!(
            ProofParser::auto(latex_content).format,
            ProofFormat::LaTeX
        ));
        assert!(matches!(
            ProofParser::auto(text_content).format,
            ProofFormat::Text
        ));
    }

    #[test]
    fn test_latex_parser() {
        let latex = r#"Problem: Quadratic equation

Let $ax^2 + bx + c = 0$.

The quadratic formula is:
$$x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}$$

By the theorem of quadratics.

Answer: 2"#;

        let parser = ProofParser::new(ProofFormat::LaTeX);
        let proof = parser.parse(latex).unwrap();

        assert!(!proof.statements.is_empty());
        assert!(!proof.theorems.is_empty());
    }
}