1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
//! Rule-based grammatical error correction through parsing LanguageTool rules.
//! # Overview
//!
//! nlprule has the following core abstractions:
//! - A [Tokenizer][tokenizer::Tokenizer] to split a text into tokens and analyze it by chunking, lemmatizing and part-of-speech tagging. Can also be used independently of the grammatical rules.
//! - A [Rules][rules::Rules] structure containing a set of grammatical error correction rules.
//!
//! # Examples
//!
//! Correct a text:
//!
//! ```no_run
//! use nlprule::{Tokenizer, Rules};
//!
//! let tokenizer = Tokenizer::new("path/to/en_tokenizer.bin")?;
//! let rules = Rules::new("path/to/en_rules.bin")?;
//!
//! assert_eq!(
//! rules.correct("She was not been here since Monday.", &tokenizer),
//! String::from("She was not here since Monday.")
//! );
//! # Ok::<(), nlprule::Error>(())
//! ```
//!
//! Get suggestions and correct a text:
//!
//! ```no_run
//! use nlprule::{Tokenizer, Rules, types::Suggestion, rules::apply_suggestions};
//!
//! let tokenizer = Tokenizer::new("path/to/en_tokenizer.bin")?;
//! let rules = Rules::new("path/to/en_rules.bin")?;
//!
//! let text = "She was not been here since Monday.";
//!
//! let suggestions = rules.suggest(text, &tokenizer);
//! assert_eq!(*suggestions[0].span().char(), 4usize..16);
//! assert_eq!(suggestions[0].replacements(), vec!["was not", "has not been"]);
//! assert_eq!(suggestions[0].source(), "GRAMMAR/WAS_BEEN/1");
//! assert_eq!(suggestions[0].message(), "Did you mean was not or has not been?");
//!
//! let corrected = apply_suggestions(text, &suggestions);
//!
//! assert_eq!(corrected, "She was not here since Monday.");
//! # Ok::<(), nlprule::Error>(())
//! ```
//!
//! Tokenize & analyze a text:
//!
//! ```no_run
//! use nlprule::Tokenizer;
//!
//! let tokenizer = Tokenizer::new("path/to/en_tokenizer.bin")?;
//!
//! let text = "A brief example is shown.";
//!
//! // returns an iterator over sentences
//! let sentence = tokenizer.pipe(text).next().expect("`text` contains one sentence.");
//!
//! println!("{:#?}", sentence);
//! assert_eq!(sentence.tokens()[1].word().text().as_str(), "brief");
//! assert_eq!(sentence.tokens()[1].word().tags()[0].pos().as_str(), "JJ");
//! assert_eq!(sentence.tokens()[1].chunks(), vec!["I-NP-singular"]);
//! // some other information like char / byte span, lemmas etc. is also set!
//! # Ok::<(), nlprule::Error>(())
//! ```
//! ---
//! Binaries are distributed with [Github releases](https://github.com/bminixhofer/nlprule/releases).
use io;
use Error;
pub
pub use Rules;
pub use Tokenizer;
/// Gets the canonical filename for the tokenizer binary for a language code in ISO 639-1 (two-letter) format.
/// Gets the canonical filename for the rules binary for a language code in ISO 639-1 (two-letter) format.
/// Gets the canonical filename for the tokenizer binary for a language code in ISO 639-1 (two-letter) format.
/// Gets the canonical filename for the rules binary for a language code in ISO 639-1 (two-letter) format.