use std::sync::LazyLock;
use regex::Regex;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct GrammarIssue {
pub issue_type: GrammarIssueType,
pub message: String,
pub sentence_num: usize,
pub severity: Severity,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub enum GrammarIssueType {
SubjectVerbAgreement,
DoubleNegative,
RunOnSentence,
CommaSplice,
DoubleSpace,
MissingPunctuation,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema)]
pub enum Severity {
Low,
Medium,
High,
}
static SUBJECT_VERB_PATTERNS: LazyLock<Vec<(Regex, &'static str)>> = LazyLock::new(|| {
vec![
(
Regex::new(r"\b(he|she|it)\s+(are|were|have)\b").expect("valid regex"),
"Singular subject with plural verb",
),
(
Regex::new(r"\b(the\s+\w+)\s+(are|were|have)\b").expect("valid regex"),
"Possible singular subject with plural verb",
),
(
Regex::new(r"\b(they|we|you)\s+(is|was|has)\b").expect("valid regex"),
"Plural subject with singular verb",
),
(
Regex::new(r"\b(the\s+\w+s)\s+(is|was|has)\b").expect("valid regex"),
"Possible plural subject with singular verb",
),
]
});
static DOUBLE_NEGATIVE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"\b(don't|doesn't|didn't|won't|can't|couldn't|shouldn't|wouldn't)\s+\w+\s+(no|nothing|nobody|never|nowhere|neither)\b",
)
.expect("valid regex")
});
static RUN_ON_INDICATORS: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r",\s+(and|but|or|so)\s+\w+\s+\w+\s+,\s+(and|but|or|so)").expect("valid regex")
});
static DOUBLE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" +").expect("valid regex"));
#[tracing::instrument(skip_all, fields(sentence_count = sentences.len()))]
pub fn check_grammar(sentences: &[String]) -> Vec<GrammarIssue> {
let mut issues = Vec::new();
for (idx, sentence) in sentences.iter().enumerate() {
let sentence_num = idx + 1;
let lower = sentence.to_lowercase();
if DOUBLE_SPACE.is_match(sentence) {
issues.push(GrammarIssue {
issue_type: GrammarIssueType::DoubleSpace,
message: "Multiple consecutive spaces found".to_string(),
sentence_num,
severity: Severity::Low,
});
}
let trimmed = sentence.trim();
if !trimmed.is_empty()
&& !trimmed.ends_with('.')
&& !trimmed.ends_with('!')
&& !trimmed.ends_with('?')
{
issues.push(GrammarIssue {
issue_type: GrammarIssueType::MissingPunctuation,
message: "Sentence missing terminal punctuation".to_string(),
sentence_num,
severity: Severity::Medium,
});
}
for (pattern, desc) in SUBJECT_VERB_PATTERNS.iter() {
if pattern.is_match(&lower) {
issues.push(GrammarIssue {
issue_type: GrammarIssueType::SubjectVerbAgreement,
message: (*desc).to_string(),
sentence_num,
severity: Severity::High,
});
}
}
if DOUBLE_NEGATIVE.is_match(&lower) {
issues.push(GrammarIssue {
issue_type: GrammarIssueType::DoubleNegative,
message: "Double negative detected".to_string(),
sentence_num,
severity: Severity::High,
});
}
if RUN_ON_INDICATORS.is_match(&lower) {
issues.push(GrammarIssue {
issue_type: GrammarIssueType::RunOnSentence,
message: "Possible run-on sentence (multiple conjunction clauses)".to_string(),
sentence_num,
severity: Severity::Medium,
});
}
if check_comma_splice(sentence) {
issues.push(GrammarIssue {
issue_type: GrammarIssueType::CommaSplice,
message: "Possible comma splice (two independent clauses joined by a comma)"
.to_string(),
sentence_num,
severity: Severity::Medium,
});
}
}
issues
}
fn check_comma_splice(sentence: &str) -> bool {
let parts: Vec<&str> = sentence.split(',').collect();
if parts.len() < 2 {
return false;
}
let clause_count = parts
.iter()
.filter(|part| has_subject_and_verb(part.trim()))
.count();
clause_count >= 2
}
fn has_subject_and_verb(text: &str) -> bool {
if text.split_whitespace().count() < 3 {
return false;
}
let has_subject = text.split_whitespace().any(|w| {
matches!(
w.to_lowercase().as_str(),
"i" | "you"
| "he"
| "she"
| "it"
| "we"
| "they"
| "the"
| "a"
| "an"
| "this"
| "that"
)
});
let has_verb = text.split_whitespace().any(|w| {
matches!(
w.to_lowercase().as_str(),
"is" | "are"
| "was"
| "were"
| "be"
| "been"
| "being"
| "have"
| "has"
| "had"
| "do"
| "does"
| "did"
| "will"
| "would"
| "could"
| "should"
| "may"
| "might"
| "must"
| "can"
| "shall"
| "go"
| "goes"
| "went"
| "gone"
| "make"
| "makes"
| "made"
| "get"
| "gets"
| "got"
| "say"
| "says"
| "said"
| "know"
| "knows"
| "knew"
| "think"
| "thinks"
| "thought"
| "come"
| "comes"
| "came"
| "take"
| "takes"
| "took"
| "see"
| "sees"
| "saw"
| "want"
| "wants"
| "wanted"
| "look"
| "looks"
| "looked"
| "use"
| "uses"
| "used"
| "find"
| "finds"
| "found"
| "give"
| "gives"
| "gave"
| "tell"
| "tells"
| "told"
| "work"
| "works"
| "worked"
| "call"
| "calls"
| "called"
| "try"
| "tries"
| "tried"
| "ask"
| "asks"
| "asked"
| "need"
| "needs"
| "needed"
| "feel"
| "feels"
| "felt"
| "become"
| "becomes"
| "became"
| "leave"
| "leaves"
| "left"
| "put"
| "puts"
| "run"
| "runs"
| "ran"
| "keep"
| "keeps"
| "kept"
| "let"
| "lets"
| "begin"
| "begins"
| "began"
| "show"
| "shows"
| "showed"
| "hear"
| "hears"
| "heard"
| "play"
| "plays"
| "played"
| "move"
| "moves"
| "moved"
| "live"
| "lives"
| "lived"
| "happen"
| "happens"
| "happened"
| "write"
| "writes"
| "wrote"
| "provide"
| "provides"
| "provided"
| "read"
| "reads"
| "stand"
| "stands"
| "stood"
)
});
has_subject && has_verb
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_subject_verb_agreement() {
let sentences = vec!["He are going to the store.".to_string()];
let issues = check_grammar(&sentences);
assert!(
issues
.iter()
.any(|i| i.issue_type == GrammarIssueType::SubjectVerbAgreement),
"should detect subject-verb disagreement"
);
}
#[test]
fn detects_double_negative() {
let sentences = vec!["She didn't do nothing wrong.".to_string()];
let issues = check_grammar(&sentences);
assert!(
issues
.iter()
.any(|i| i.issue_type == GrammarIssueType::DoubleNegative),
"should detect double negative"
);
}
#[test]
fn detects_double_space() {
let sentences = vec!["There are two spaces here.".to_string()];
let issues = check_grammar(&sentences);
assert!(
issues
.iter()
.any(|i| i.issue_type == GrammarIssueType::DoubleSpace),
"should detect double spaces"
);
}
#[test]
fn detects_missing_punctuation() {
let sentences = vec!["This sentence has no ending".to_string()];
let issues = check_grammar(&sentences);
assert!(
issues
.iter()
.any(|i| i.issue_type == GrammarIssueType::MissingPunctuation),
"should detect missing punctuation"
);
}
#[test]
fn clean_sentence_no_issues() {
let sentences = vec!["The cat sat on the mat.".to_string()];
let issues = check_grammar(&sentences);
let has_serious = issues.iter().any(|i| {
matches!(
i.issue_type,
GrammarIssueType::SubjectVerbAgreement
| GrammarIssueType::DoubleNegative
| GrammarIssueType::DoubleSpace
)
});
assert!(
!has_serious,
"clean sentence should have no serious grammar issues"
);
}
}