car-reason 0.6.0

//! Problem classification — determines what kind of code problem this is.
//!
//! Uses the smallest available model (or the classify API) for fast classification.
//! Falls back to keyword heuristics when no model is available.

use car_inference::InferenceEngine;

use crate::types::ProblemClass;
use crate::ReasonError;

/// Classify a problem. Uses keyword heuristics as the primary classifier
/// since they're faster, deterministic, and more reliable for question patterns
/// than small local models. The model classifier is used as a tiebreaker when
/// keywords are ambiguous.
pub async fn classify_problem(
    _engine: &InferenceEngine,
    problem: &str,
) -> Result<ProblemClass, ReasonError> {
    let result = classify_by_keywords(problem);
    Ok(result)
}

/// Keyword-based classification fallback.
pub fn classify_by_keywords(problem: &str) -> ProblemClass {
    let lower = problem.to_lowercase();

    // Question patterns — strong signal for explanation/architecture
    let question_starters = ["why ", "why does", "how does", "what is", "what are",
                              "what's the", "can you explain", "describe how",
                              "trade-off", "tradeoff", "pros and cons",
                              "compared to", "versus", "vs "];
    let is_question = question_starters.iter().any(|q| lower.contains(q));

    // "How would you" is a design/feature question, not explanation
    let is_how_would = lower.contains("how would") || lower.contains("how to add")
        || lower.contains("how can we") || lower.contains("how do you");

    // If it's clearly a question about design/reasoning, classify early
    if is_question && !is_how_would {
        // Check if it's about architecture specifically
        let arch_terms = ["architecture", "design", "trade-off", "tradeoff", "pattern",
                          "approach", "decision", "structure", "versus", "vs "];
        if arch_terms.iter().any(|t| lower.contains(t)) {
            return ProblemClass::Architecture;
        }
        return ProblemClass::Explanation;
    }

    let scores = [
        (ProblemClass::BugFix, &["bug", "fix", "broken", "error", "crash", "wrong", "fail", "issue", "debug"][..]),
        (ProblemClass::Refactor, &["refactor", "clean", "simplify", "restructure", "rename", "extract"]),
        (ProblemClass::Performance, &["slow", "optimize", "performance", "latency", "throughput", "bottleneck"]),
        (ProblemClass::TestWriting, &["test", "coverage", "assert", "spec", "unit test", "integration test"]),
        (ProblemClass::Architecture, &["architecture", "design", "pattern", "structure", "approach", "trade-off"]),
        // NewFeature: removed overly generic words ("add", "new", "build", "support")
        // that match too many non-feature queries
        (ProblemClass::NewFeature, &["implement", "create", "feature", "endpoint", "module", "component"]),
        (ProblemClass::Explanation, &["explain", "understand", "describe", "clarify", "meaning"]),
    ];

    let mut best = ProblemClass::Unknown;
    let mut best_score = 0usize;

    for (class, keywords) in &scores {
        let score = keywords.iter().filter(|kw| lower.contains(*kw)).count();
        if score > best_score {
            best_score = score;
            best = *class;
        }
    }

    // "How would you add/implement X" → NewFeature even without the generic keywords
    if best == ProblemClass::Unknown && is_how_would {
        best = ProblemClass::NewFeature;
    }

    best
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn keyword_classification() {
        assert_eq!(classify_by_keywords("fix this broken test"), ProblemClass::BugFix);
        assert_eq!(classify_by_keywords("refactor the authentication module"), ProblemClass::Refactor);
        assert_eq!(classify_by_keywords("optimize the database query performance"), ProblemClass::Performance);
        assert_eq!(classify_by_keywords("implement a new endpoint for user profiles"), ProblemClass::NewFeature);
        assert_eq!(classify_by_keywords("explain how the routing works"), ProblemClass::Explanation);
        assert_eq!(classify_by_keywords("write unit tests for the parser"), ProblemClass::TestWriting);
        assert_eq!(classify_by_keywords("design the architecture for the new service"), ProblemClass::Architecture);
    }

    #[test]
    fn ambiguous_defaults_to_strongest() {
        assert_eq!(classify_by_keywords("fix this error in the code"), ProblemClass::BugFix);
    }

    #[test]
    fn question_patterns_classify_correctly() {
        // "Why does X use Y" → Explanation (not NewFeature)
        assert_eq!(
            classify_by_keywords("Why does car-reason use memgine skills for reasoning actions instead of a hardcoded pipeline?"),
            ProblemClass::Explanation,
        );
        // "What are the trade-offs" → Architecture
        assert_eq!(
            classify_by_keywords("What are the trade-offs of this design?"),
            ProblemClass::Architecture,
        );
        // "How would you add X" → NewFeature (not Explanation)
        assert_eq!(
            classify_by_keywords("How would you add action-aware routing so simple actions use small models?"),
            ProblemClass::NewFeature,
        );
    }
}