kmpm/
lib.rs

1//! # kmpm
2//! 
3//! [![github]](https://github.com/Tom-game-project/kmpm) [![crates-io]](https://crates.io/crates/kmpm) [![docs-rs]](https://docs.rs/kmpm/latest/kmpm/)
4//! 
5//! [github]: https://img.shields.io/badge/github-30363d?style=for-the-badge&labelColor=555555&logo=github
6//! [crates-io]: https://img.shields.io/badge/crates.io-253323?style=for-the-badge&labelColor=555555&logo=rust
7//! [docs-rs]: https://img.shields.io/badge/docs.rs-2a2a2a?style=for-the-badge&labelColor=555555&logo=docs.rs
8//! 
9//! KMPM (Knuth-Morris-Pratt algorithm) library. KMPM is one of effective character query algorithm.
10//! 
11//! If the length of the text is n and the length of the pattern is m, the KMP algorithm processes in O(n+m) time
12//! 
13
14
15/// # kmpm_str 
16/// 
17/// return the first and only match found
18/// 
19/// ```
20/// use kmpm::kmpm_str;
21/// 
22/// fn main(){
23///   let text =    "hello world !";
24///   let pattern = "world";
25///   let ctr = kmpm_str(text, pattern);
26///   match ctr {
27///       Some(cursor)=>{
28///           println!("matched index {}",cursor)
29///       }
30///       None=>{
31///           println!("\"{}\" does not match",pattern);
32///       }
33///   }
34/// }
35/// ```
36/// 
37/// ```text
38/// matched index 6
39/// 
40/// ========================
41/// 
42/// "hello world !" :text
43///       "world"   :pattern
44///  ------^^^^^
45///        |
46///        #6 -> return 6
47/// ```
48pub fn kmpm_str(text:&str,pattern:&str)->Option<usize>{
49    let sm_list = skipmap(pattern);
50    let mut cursor:usize= 0;
51    let mut prev_skip_step:usize = 0;
52    let endpoint = text.chars().count() - pattern.chars().count();
53    loop{
54        let mut skipstep:usize = 1;
55        let mut elseflag = false;
56        // 不必要なチェックを省きつつ調べていく
57        for i in 0..pattern.chars().count()-prev_skip_step{
58            // 文字同士が一致しない場合
59            if text.chars().nth(cursor+prev_skip_step+i)!=pattern.chars().nth(prev_skip_step+i){
60                skipstep=sm_list[i];
61                prev_skip_step = skipstep;
62                elseflag=true;
63                break;
64            }
65        }
66        if !elseflag{
67            return Some(cursor)
68        }
69        if cursor>endpoint{
70            return None
71        }
72        else {
73            cursor += skipstep;
74        }
75    }
76}
77
78
79/// # kmpm_str_all
80/// 
81/// find all matching.allow duplicates.
82/// 
83/// ## Example
84/// 
85/// ```
86/// use kmpm::kmpm_str_all;
87/// 
88/// fn main(){
89///   let text = "abababa";
90///   let pattern = "aba";
91///   let match_list = kmpm_str_all(text,pattern);
92///   println!("output {:?}",match_list);
93/// }
94/// ```
95/// 
96/// ```text
97/// output [0,2,4]
98/// 
99/// ========================
100/// 
101/// "abababa" :text
102/// "aba"     :pattern
103///  ^^^
104///  |"aba" <- This matching is allowed
105///  | ^^^
106///  | |"aba"
107///  | | ^^^
108///  | | |
109///  #0#2#4 -> return [0,2,4]
110/// ```
111/// 
112pub fn kmpm_str_all(text:&str,pattern:&str)->Vec<usize>{
113    let mut rarr:Vec<usize> = Vec::new();
114    let sm_list = skipmap(pattern);
115    let mut cursor:usize= 0;
116    let mut prev_skip_step:usize = 0;
117    let endpoint = text.chars().count() - pattern.chars().count();
118    loop{
119        let mut skipstep:usize = 1;
120        let mut elseflag = false;
121        for i in 0..pattern.chars().count()-prev_skip_step{
122            if text.chars().nth(cursor+prev_skip_step+i)!=pattern.chars().nth(prev_skip_step+i){
123                skipstep=sm_list[i];
124                prev_skip_step = skipstep;
125                elseflag = true;
126                break;
127            }
128        }
129        if !elseflag{
130            rarr.push(cursor);
131        }
132        if cursor>endpoint{
133            return rarr
134        }else {
135            cursor += skipstep;
136        }
137    }
138}
139
140/// # kmpm_str_nad
141/// 
142/// find all matching.not allow duplicates (nad).
143/// 
144/// ## Example
145/// 
146/// ```
147/// use kmpm::kmpm_str_nad;
148/// 
149/// fn main(){
150///   let text="abababa";
151///   let pattern = "aba";
152///   println!("{:?}",kmpm_str_nad(text, pattern,0));
153/// }
154/// ```
155/// 
156/// ```text
157/// "abababa" :text
158/// "aba"     :pattern :cursor_start = 0
159///  ^^^
160///  |"aba" <- This matching is **not** allowed
161///  +-***
162///  |  "aba"
163///  +---^^^
164///  |   |
165///  #0  #4 -> return [0,4]
166/// ```
167/// 
168pub fn kmpm_str_nad(text:&str,pattern:&str,cursor_start:usize)->Vec<usize>{
169    let mut rarr:Vec<usize> = Vec::new();
170    let sm_list = skipmap(pattern);
171    let mut cursor:usize= cursor_start;
172    let mut prev_skip_step:usize = 0;
173    let endpoint = text.chars().count() - pattern.chars().count();
174    loop{
175        let mut skipstep:usize = 1;
176        let mut elseflag = false;
177        // ここでパターンが一致しているかどうか最初から調べる
178        // prev_skip_step変数はさらに効率を上げるため
179        for i in 0..pattern.chars().count()-prev_skip_step{
180            if text.chars().nth(cursor+prev_skip_step+i)!=pattern.chars().nth(prev_skip_step+i){
181                skipstep=sm_list[i];
182                prev_skip_step = skipstep;
183                elseflag = true;
184                break;
185            }
186        }
187        if !elseflag{
188            println!("cursor {}\nskipstep {}\nprev_skip_step {}\n========",cursor,skipstep,prev_skip_step);
189            rarr.push(cursor);
190        }else {
191            println!("cursor {}\nskipstep {}\nprev_skip_step {}\n========",cursor,skipstep,prev_skip_step);
192        }
193        if cursor>endpoint{
194            return rarr;
195        }else {
196            if !elseflag{
197                cursor += skipstep + pattern.len();
198            }else {
199                cursor += skipstep;
200            }
201        }
202    }
203}
204
205
206fn dup(txt0:&Vec<char>,txt1:&Vec<char>,gap:usize)->bool{
207    let diflen = txt0.len()-gap;
208    let dig_txt0 = &txt0[gap..];
209    let dig_txt1 = &txt1[..diflen];
210    dig_txt0==dig_txt1
211}
212
213fn capable_gap(txt0:&Vec<char>,txt1:&Vec<char>)->usize{
214    let txt0_len = txt0.len();
215    if txt0_len==0{
216        return 1;
217    }
218    for i in 1..txt0_len{
219        if dup(&txt0, &txt1, i){
220            return i;
221        }
222    }
223    txt0_len
224}
225
226fn skipmap(txt:&str)->Vec<usize>{
227    let txt_vec:Vec<char>= txt.chars().collect();
228    let arr:Vec<usize> = (0..txt.chars().count())
229    .map(
230        |i|capable_gap(
231            &txt_vec[..i].to_vec(),
232            &txt_vec
233            )
234        )
235    .collect();
236    return arr;
237}
238
239
240#[cfg(test)]
241mod tests {
242
243    use super::*;
244
245    #[test]
246    fn test00(){
247        let text =   "hello world";
248        let pattern ="world";
249        let ctr = kmpm_str(text, pattern);
250        match ctr {
251            Some(cursor)=>{
252                assert!(cursor==6);
253                println!("matched index {}",cursor)
254            }
255            None=>{
256                println!("\"{}\" does not match",pattern);
257            }
258        }
259    }
260
261    #[test]
262    fn test01(){
263        let text =   "........................失敗...fosososos";
264        let pattern ="失敗";
265        let ctr = kmpm_str(text, pattern);
266        match ctr {
267            Some(cursor)=>{
268                assert!(cursor==24);
269                println!("matched index {}",cursor)
270            }
271            None=>{
272                println!("\"{}\" does not match",pattern);
273            }
274        }
275    }
276
277    #[test]
278    fn test02() {
279        let text="hello こんにちは world 世界";
280        let pattern = "こんにちは";
281
282        let ctr = kmpm_str(text, pattern).unwrap();
283        assert!(ctr==6);
284    }
285
286    #[test]
287    fn test03(){
288        let text="hello こんにちは world 世界";
289        let pattern = "2024";
290
291        assert!(kmpm_str(text, pattern).is_none());
292    }
293
294    #[test]
295    fn test04(){
296        let text="abababa";
297        let pattern = "aba";
298        println!("{:?}",kmpm_str_all(text, pattern));
299    }
300
301    #[test]
302    fn test05(){
303        let text="abababa";
304        let pattern = "aba";
305        println!("{:?}",kmpm_str_nad(text, pattern,0));//[0,4]
306        println!("{:?}",kmpm_str_nad(text, pattern,1));//[2]
307    }
308}