harper_core/patterns/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
//!
//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
//! build Harper [rules](crate::linting::Linter).
//!
//! See the page about [`SequencePattern`] for a concrete example of their use.

use std::collections::VecDeque;

use crate::{Document, Span, Token, VecExt};

mod all;
mod any_capitalization;
mod any_pattern;
mod consumes_remaining_pattern;
mod either_pattern;
mod exact_phrase;
mod invert;
mod is_not_title_case;
mod naive_pattern_group;
mod noun_phrase;
mod repeating_pattern;
mod sequence_pattern;
mod similar_to_phrase;
mod split_compound_word;
mod token_kind_pattern_group;
mod whitespace_pattern;
mod within_edit_distance;
mod word_pattern_group;
mod word_set;

pub use all::All;
pub use any_capitalization::AnyCapitalization;
pub use any_pattern::AnyPattern;
use blanket::blanket;
pub use consumes_remaining_pattern::ConsumesRemainingPattern;
pub use either_pattern::EitherPattern;
pub use exact_phrase::ExactPhrase;
pub use invert::Invert;
pub use is_not_title_case::IsNotTitleCase;
pub use naive_pattern_group::NaivePatternGroup;
pub use noun_phrase::NounPhrase;
pub use repeating_pattern::RepeatingPattern;
pub use sequence_pattern::SequencePattern;
pub use similar_to_phrase::SimilarToPhrase;
pub use split_compound_word::SplitCompoundWord;
pub use token_kind_pattern_group::TokenKindPatternGroup;
pub use whitespace_pattern::WhitespacePattern;
pub use word_pattern_group::WordPatternGroup;
pub use word_set::WordSet;

#[cfg(not(feature = "concurrent"))]
#[blanket(derive(Rc, Arc))]
pub trait Pattern {
    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
}

#[cfg(feature = "concurrent")]
#[blanket(derive(Arc))]
pub trait Pattern: Send + Sync {
    fn matches(&self, tokens: &[Token], source: &[char]) -> usize;
}

pub trait PatternExt {
    /// Search through all tokens to locate all non-overlapping pattern matches.
    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
}

impl<P> PatternExt for P
where
    P: Pattern,
{
    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
        let mut found = Vec::new();

        for i in 0..tokens.len() {
            let len = self.matches(&tokens[i..], source);

            if len > 0 {
                found.push(Span::new_with_len(i, len));
            }
        }

        if found.len() < 2 {
            return found;
        }

        let mut remove_indices = VecDeque::new();

        for i in 0..found.len() - 1 {
            let cur = &found[i];
            let next = &found[i + 1];

            if cur.overlaps_with(*next) {
                remove_indices.push_back(i + 1);
            }
        }

        found.remove_indices(remove_indices);

        found
    }
}

#[cfg(feature = "concurrent")]
impl<F> Pattern for F
where
    F: Fn(&Token, &[char]) -> bool,
    F: Send + Sync,
{
    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
        if tokens.is_empty() {
            return 0;
        }

        let tok = &tokens[0];

        if self(tok, source) {
            1
        } else {
            0
        }
    }
}

#[cfg(not(feature = "concurrent"))]
impl<F> Pattern for F
where
    F: Fn(&Token, &[char]) -> bool,
{
    fn matches(&self, tokens: &[Token], source: &[char]) -> usize {
        if tokens.is_empty() {
            return 0;
        }

        let tok = &tokens[0];

        if self(tok, source) {
            1
        } else {
            0
        }
    }
}

pub trait DocPattern {
    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span>;
}

impl<P: PatternExt> DocPattern for P {
    fn find_all_matches_in_doc(&self, document: &Document) -> Vec<Span> {
        self.find_all_matches(document.get_tokens(), document.get_source())
    }
}