bito-lint-core 0.1.1

//! Grammar issue detection.
//!
//! Checks for common grammar issues: subject-verb agreement, double negatives,
//! run-on sentences, comma splices, double spaces, and missing punctuation.

use std::sync::LazyLock;

use regex::Regex;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

/// A detected grammar issue.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct GrammarIssue {
    /// The type of grammar issue.
    pub issue_type: GrammarIssueType,
    /// Human-readable description of the issue.
    pub message: String,
    /// The sentence number (1-indexed) where the issue was found.
    pub sentence_num: usize,
    /// Severity of the issue.
    pub severity: Severity,
}

/// Types of grammar issues that can be detected.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub enum GrammarIssueType {
    /// Singular subject with plural verb or vice versa.
    SubjectVerbAgreement,
    /// Two negatives in the same clause.
    DoubleNegative,
    /// Overly long sentence with multiple independent clauses.
    RunOnSentence,
    /// Two independent clauses joined only by a comma.
    CommaSplice,
    /// Multiple consecutive spaces.
    DoubleSpace,
    /// Sentence missing terminal punctuation.
    MissingPunctuation,
}

/// Issue severity.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema)]
pub enum Severity {
    /// Style suggestion — not necessarily wrong.
    Low,
    /// Likely issue worth addressing.
    Medium,
    /// Clear grammar error.
    High,
}

// -- Regex patterns --------------------------------------------------------

/// Subject-verb agreement patterns and their descriptions.
static SUBJECT_VERB_PATTERNS: LazyLock<Vec<(Regex, &'static str)>> = LazyLock::new(|| {
    vec![
        // Singular subject with plural verb
        (
            Regex::new(r"\b(he|she|it)\s+(are|were|have)\b").expect("valid regex"),
            "Singular subject with plural verb",
        ),
        (
            Regex::new(r"\b(the\s+\w+)\s+(are|were|have)\b").expect("valid regex"),
            "Possible singular subject with plural verb",
        ),
        // Plural subject with singular verb
        (
            Regex::new(r"\b(they|we|you)\s+(is|was|has)\b").expect("valid regex"),
            "Plural subject with singular verb",
        ),
        (
            Regex::new(r"\b(the\s+\w+s)\s+(is|was|has)\b").expect("valid regex"),
            "Possible plural subject with singular verb",
        ),
    ]
});

/// Double negative pattern.
static DOUBLE_NEGATIVE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"\b(don't|doesn't|didn't|won't|can't|couldn't|shouldn't|wouldn't)\s+\w+\s+(no|nothing|nobody|never|nowhere|neither)\b",
    )
    .expect("valid regex")
});

/// Run-on sentence indicators (repeated conjunction patterns).
static RUN_ON_INDICATORS: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r",\s+(and|but|or|so)\s+\w+\s+\w+\s+,\s+(and|but|or|so)").expect("valid regex")
});

/// Multiple consecutive spaces.
static DOUBLE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"  +").expect("valid regex"));

/// Check a list of sentences for grammar issues.
///
/// Returns all detected issues across all sentences.
#[tracing::instrument(skip_all, fields(sentence_count = sentences.len()))]
pub fn check_grammar(sentences: &[String]) -> Vec<GrammarIssue> {
    let mut issues = Vec::new();

    for (idx, sentence) in sentences.iter().enumerate() {
        let sentence_num = idx + 1;
        let lower = sentence.to_lowercase();

        // Double spaces (Low)
        if DOUBLE_SPACE.is_match(sentence) {
            issues.push(GrammarIssue {
                issue_type: GrammarIssueType::DoubleSpace,
                message: "Multiple consecutive spaces found".to_string(),
                sentence_num,
                severity: Severity::Low,
            });
        }

        // Missing end punctuation (Medium)
        let trimmed = sentence.trim();
        if !trimmed.is_empty()
            && !trimmed.ends_with('.')
            && !trimmed.ends_with('!')
            && !trimmed.ends_with('?')
        {
            issues.push(GrammarIssue {
                issue_type: GrammarIssueType::MissingPunctuation,
                message: "Sentence missing terminal punctuation".to_string(),
                sentence_num,
                severity: Severity::Medium,
            });
        }

        // Subject-verb agreement (High)
        for (pattern, desc) in SUBJECT_VERB_PATTERNS.iter() {
            if pattern.is_match(&lower) {
                issues.push(GrammarIssue {
                    issue_type: GrammarIssueType::SubjectVerbAgreement,
                    message: (*desc).to_string(),
                    sentence_num,
                    severity: Severity::High,
                });
            }
        }

        // Double negatives (High)
        if DOUBLE_NEGATIVE.is_match(&lower) {
            issues.push(GrammarIssue {
                issue_type: GrammarIssueType::DoubleNegative,
                message: "Double negative detected".to_string(),
                sentence_num,
                severity: Severity::High,
            });
        }

        // Run-on sentences (Medium)
        if RUN_ON_INDICATORS.is_match(&lower) {
            issues.push(GrammarIssue {
                issue_type: GrammarIssueType::RunOnSentence,
                message: "Possible run-on sentence (multiple conjunction clauses)".to_string(),
                sentence_num,
                severity: Severity::Medium,
            });
        }

        // Comma splices (Medium)
        if check_comma_splice(sentence) {
            issues.push(GrammarIssue {
                issue_type: GrammarIssueType::CommaSplice,
                message: "Possible comma splice (two independent clauses joined by a comma)"
                    .to_string(),
                sentence_num,
                severity: Severity::Medium,
            });
        }
    }

    issues
}

/// Check for comma splice: two independent clauses joined only by a comma.
fn check_comma_splice(sentence: &str) -> bool {
    let parts: Vec<&str> = sentence.split(',').collect();
    if parts.len() < 2 {
        return false;
    }

    // Count how many comma-separated parts look like independent clauses
    let clause_count = parts
        .iter()
        .filter(|part| has_subject_and_verb(part.trim()))
        .count();

    // Two or more independent clauses separated by commas = comma splice
    clause_count >= 2
}

/// Simplified check for whether text has both a subject and verb.
fn has_subject_and_verb(text: &str) -> bool {
    if text.split_whitespace().count() < 3 {
        return false;
    }

    let has_subject = text.split_whitespace().any(|w| {
        matches!(
            w.to_lowercase().as_str(),
            "i" | "you"
                | "he"
                | "she"
                | "it"
                | "we"
                | "they"
                | "the"
                | "a"
                | "an"
                | "this"
                | "that"
        )
    });

    let has_verb = text.split_whitespace().any(|w| {
        matches!(
            w.to_lowercase().as_str(),
            "is" | "are"
                | "was"
                | "were"
                | "be"
                | "been"
                | "being"
                | "have"
                | "has"
                | "had"
                | "do"
                | "does"
                | "did"
                | "will"
                | "would"
                | "could"
                | "should"
                | "may"
                | "might"
                | "must"
                | "can"
                | "shall"
                | "go"
                | "goes"
                | "went"
                | "gone"
                | "make"
                | "makes"
                | "made"
                | "get"
                | "gets"
                | "got"
                | "say"
                | "says"
                | "said"
                | "know"
                | "knows"
                | "knew"
                | "think"
                | "thinks"
                | "thought"
                | "come"
                | "comes"
                | "came"
                | "take"
                | "takes"
                | "took"
                | "see"
                | "sees"
                | "saw"
                | "want"
                | "wants"
                | "wanted"
                | "look"
                | "looks"
                | "looked"
                | "use"
                | "uses"
                | "used"
                | "find"
                | "finds"
                | "found"
                | "give"
                | "gives"
                | "gave"
                | "tell"
                | "tells"
                | "told"
                | "work"
                | "works"
                | "worked"
                | "call"
                | "calls"
                | "called"
                | "try"
                | "tries"
                | "tried"
                | "ask"
                | "asks"
                | "asked"
                | "need"
                | "needs"
                | "needed"
                | "feel"
                | "feels"
                | "felt"
                | "become"
                | "becomes"
                | "became"
                | "leave"
                | "leaves"
                | "left"
                | "put"
                | "puts"
                | "run"
                | "runs"
                | "ran"
                | "keep"
                | "keeps"
                | "kept"
                | "let"
                | "lets"
                | "begin"
                | "begins"
                | "began"
                | "show"
                | "shows"
                | "showed"
                | "hear"
                | "hears"
                | "heard"
                | "play"
                | "plays"
                | "played"
                | "move"
                | "moves"
                | "moved"
                | "live"
                | "lives"
                | "lived"
                | "happen"
                | "happens"
                | "happened"
                | "write"
                | "writes"
                | "wrote"
                | "provide"
                | "provides"
                | "provided"
                | "read"
                | "reads"
                | "stand"
                | "stands"
                | "stood"
        )
    });

    has_subject && has_verb
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn detects_subject_verb_agreement() {
        let sentences = vec!["He are going to the store.".to_string()];
        let issues = check_grammar(&sentences);
        assert!(
            issues
                .iter()
                .any(|i| i.issue_type == GrammarIssueType::SubjectVerbAgreement),
            "should detect subject-verb disagreement"
        );
    }

    #[test]
    fn detects_double_negative() {
        let sentences = vec!["She didn't do nothing wrong.".to_string()];
        let issues = check_grammar(&sentences);
        assert!(
            issues
                .iter()
                .any(|i| i.issue_type == GrammarIssueType::DoubleNegative),
            "should detect double negative"
        );
    }

    #[test]
    fn detects_double_space() {
        let sentences = vec!["There are  two spaces here.".to_string()];
        let issues = check_grammar(&sentences);
        assert!(
            issues
                .iter()
                .any(|i| i.issue_type == GrammarIssueType::DoubleSpace),
            "should detect double spaces"
        );
    }

    #[test]
    fn detects_missing_punctuation() {
        let sentences = vec!["This sentence has no ending".to_string()];
        let issues = check_grammar(&sentences);
        assert!(
            issues
                .iter()
                .any(|i| i.issue_type == GrammarIssueType::MissingPunctuation),
            "should detect missing punctuation"
        );
    }

    #[test]
    fn clean_sentence_no_issues() {
        let sentences = vec!["The cat sat on the mat.".to_string()];
        let issues = check_grammar(&sentences);
        // May have some false positives from comma splice check, but
        // should NOT have subject-verb, double-neg, or double-space
        let has_serious = issues.iter().any(|i| {
            matches!(
                i.issue_type,
                GrammarIssueType::SubjectVerbAgreement
                    | GrammarIssueType::DoubleNegative
                    | GrammarIssueType::DoubleSpace
            )
        });
        assert!(
            !has_serious,
            "clean sentence should have no serious grammar issues"
        );
    }
}