harper-core 2.0.0

The language checker for developers.
Documentation
use crate::{Span, Token, patterns::WordSet};

use super::{Expr, SequenceExpr};

pub struct PronounBe {
    expr: SequenceExpr,
}

impl Default for PronounBe {
    fn default() -> Self {
        Self {
            expr: SequenceExpr::default().then_any_of(vec![
                Box::new(
                    SequenceExpr::default()
                        .then_subject_pronoun()
                        .t_ws()
                        .t_set(&["am", "are", "is", "was", "were"]),
                ),
                Box::new(WordSet::new(&[
                    "i'm", "we're", "you're", "he's", "she's", "it's", "they're",
                ])),
            ]),
        }
    }
}

impl Expr for PronounBe {
    fn run(&self, cursor: usize, toks: &[Token], src: &[char]) -> Option<Span<Token>> {
        if toks.is_empty() {
            return None;
        }

        self.expr.run(cursor, toks, src)
    }
}

#[cfg(test)]
mod tests {
    use crate::{
        Document,
        expr::{ExprExt, PronounBe},
    };

    fn assert_count(text: &str, expected_count: usize) {
        assert_eq!(
            PronounBe::default()
                .iter_matches_in_doc(&Document::new_plain_english_curated(text))
                .count(),
            expected_count
        );
    }

    #[test]
    fn ok_i_am() {
        assert_count("I am an auditor of software in M&A.", 1);
    }

    #[test]
    fn ok_i_m() {
        assert_count(
            "I'm using Beads in every new project and adding it to every old project I visit with an agent.",
            1,
        );
    }

    #[test]
    fn ok_we_are() {
        assert_count(
            "Error: we are enable to complete your request at this time.",
            1,
        );
    }

    #[test]
    fn ok_we_re() {
        assert_count(
            "We're currently experiencing high demand, which may cause temporary errors.",
            1,
        );
    }

    #[test]
    fn ok_you_are() {
        assert_count(
            "You are Dolphin, an uncensored and unbiased AI assistant.",
            1,
        );
    }

    #[test]
    fn ok_you_re() {
        assert_count(
            "You're trying to use a SD1 LoRA model with a SDXL Stable Diffusion model.",
            1,
        );
    }

    #[test]
    fn ok_he_is() {
        assert_count("He is just a modulary guy for checking service health.", 1);
    }

    #[test]
    fn ok_he_s_and_it_is() {
        assert_count(
            "He's Dead, Jim is a link checking program, specifically it is a command-line tool for finding and reporting dead links",
            2,
        );
    }

    #[test]
    fn ok_she_is() {
        assert_count(
            "SHE is designed by following the SHE functional specification.",
            1,
        );
    }

    #[test]
    fn ok_she_s() {
        assert_count(
            "She's Coding is an open-source website project currently under development in cooperation with the documentary film CODE: Debugging the Gender Gap.",
            1,
        );
    }

    #[test]
    fn ok_it_is() {
        assert_count("It is same as ms-dos command where.exe.", 1);
    }

    #[test]
    fn ok_it_s_and_you_are() {
        assert_count(
            "It's not working · Microcontrollers have only a limited amount of RAM: Verify that you are not running out of available RAM!",
            2,
        );
    }

    #[test]
    fn ok_they_are() {
        assert_count(
            "Multidist binaries ignore the way they are called and check only the running binary name, making them hard to invoke from a single one.",
            1,
        );
    }

    #[test]
    fn ok_they_re() {
        assert_count("Cannot load dataset, they're greyed out.", 1);
    }

    #[test]
    fn various_apostrophes() {
        assert_count("it's/it’s", 2);
        assert_count("it;s/it´s", 0);
    }
}