1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
//! # `scnr`
//! The `scnr` crate is a library that provides lexical scanner for programming languages.
//! It is designed to be used in a parser of a compiler or interpreter for a programming language
//! or in similar tools that require lexical analysis, e.g. in a language server.
//! It provides multiple scanner modes out of the box, which can be switched at runtime depending
//! on the context of the input.
//! A parser can use different modes for different parts of the input, e.g. to scan comments in one
//! mode and code in another.
//! The scanner is designed to be fast and efficient, and it is implemented with the help of
//! finite state machines.
//! To parse the given regular expressions, the crate uses the `regex-syntax` crate.
//!
//! # Example with a simple pattern list
//! ```rust
//! use scnr::ScannerBuilder;
//!
//! static PATTERNS: &[&str] = &[
//! r";", // Semicolon
//! r"0|[1-9][0-9]*", // Number
//! r"//.*(\r\n|\r|\n)", // Line comment
//! r"/\*([^*]|\*[^/])*\*/", // Block comment
//! r"[a-zA-Z_]\w*", // Identifier
//! r"=", // Assignment
//! ];
//!
//! const INPUT: &str = r#"
//! // This is a comment
//! a = 10;
//! b = 20;
//! /* This is a block comment
//! that spans multiple lines */
//! c = a;
//! "#;
//!
//! fn main() {
//! let scanner = ScannerBuilder::new()
//! .add_patterns(PATTERNS)
//! .build()
//! .expect("ScannerBuilder error");
//! let find_iter = scanner.find_iter(INPUT);
//! for ma in find_iter {
//! println!("Match: {:?}: '{}'", ma, &INPUT[ma.span().range()]);
//! }
//! }
//! ```
//! The output of the example is:
//! ```text
//! Match: Match { token_type: 2, span: Span { start: 1, end: 22 } }: '// This is a comment
//! '
//! Match: Match { token_type: 4, span: Span { start: 22, end: 23 } }: 'a'
//! Match: Match { token_type: 5, span: Span { start: 24, end: 25 } }: '='
//! Match: Match { token_type: 1, span: Span { start: 26, end: 28 } }: '10'
//! Match: Match { token_type: 0, span: Span { start: 28, end: 29 } }: ';'
//! Match: Match { token_type: 4, span: Span { start: 30, end: 31 } }: 'b'
//! Match: Match { token_type: 5, span: Span { start: 32, end: 33 } }: '='
//! Match: Match { token_type: 1, span: Span { start: 34, end: 36 } }: '20'
//! Match: Match { token_type: 0, span: Span { start: 36, end: 37 } }: ';'
//! Match: Match { token_type: 3, span: Span { start: 38, end: 96 } }: '/* This is a block comment
//! that spans multiple lines */'
//! Match: Match { token_type: 4, span: Span { start: 97, end: 98 } }: 'c'
//! Match: Match { token_type: 5, span: Span { start: 99, end: 100 } }: '='
//! Match: Match { token_type: 4, span: Span { start: 101, end: 102 } }: 'a'
//! Match: Match { token_type: 0, span: Span { start: 102, end: 103 } }: ';'
//! ```
//!
//! # Example with scanner modes and position information
//! ```rust
//! use std::sync::LazyLock;
//!
//! use scnr::{MatchExtIterator, Pattern, ScannerBuilder, ScannerMode};
//!
//! static SCANNER_MODES: LazyLock<Vec<ScannerMode>> = LazyLock::new(|| {
//! vec![
//! ScannerMode::new(
//! "INITIAL",
//! vec![
//! Pattern::new(r"\r\n|\r|\n".to_string(), 0), // Newline
//! Pattern::new(r"[a-zA-Z_]\w*".to_string(), 4), // Identifier
//! Pattern::new(r#"""#.to_string(), 6), // String delimiter
//! ],
//! vec![
//! (6, 1), // Token "String delimiter" -> Mode "STRING"
//! ],
//! ),
//! ScannerMode::new(
//! "STRING",
//! vec![
//! Pattern::new(r#"""#.to_string(), 6), // String delimiter
//! Pattern::new(r#"[^"]+"#.to_string(), 5), // String content
//! ],
//! vec![
//! (6, 0), // Token "String delimiter" -> Mode "INITIAL"
//! ],
//! ),
//! ]
//! });
//!
//! const INPUT: &str = r#"Id1 "1. String" "2. String""#;
//!
//! fn main() {
//! let scanner = ScannerBuilder::new()
//! .add_scanner_modes(&SCANNER_MODES)
//! .build()
//! .expect("ScannerBuilder error");
//! let find_iter = scanner.find_iter(INPUT).with_positions();
//! for ma in find_iter {
//! println!("{:?}: '{}'", ma, &INPUT[ma.span().range()]);
//! }
//! }
//! ```
//!
//! The output of this example is:
//! ```text
//! MatchExt { token_type: 4, span: Span { start: 0, end: 3 }, start_position: Position { line: 1, column: 1 }, end_position: Position { line: 1, column: 4 } }: 'Id1'
//! MatchExt { token_type: 6, span: Span { start: 4, end: 5 }, start_position: Position { line: 1, column: 5 }, end_position: Position { line: 1, column: 6 } }: '"'
//! MatchExt { token_type: 5, span: Span { start: 5, end: 14 }, start_position: Position { line: 1, column: 6 }, end_position: Position { line: 1, column: 15 } }: '1. String'
//! MatchExt { token_type: 6, span: Span { start: 14, end: 15 }, start_position: Position { line: 1, column: 15 }, end_position: Position { line: 1, column: 16 } }: '"'
//! MatchExt { token_type: 6, span: Span { start: 16, end: 17 }, start_position: Position { line: 1, column: 17 }, end_position: Position { line: 1, column: 18 } }: '"'
//! MatchExt { token_type: 5, span: Span { start: 17, end: 26 }, start_position: Position { line: 1, column: 18 }, end_position: Position { line: 1, column: 27 } }: '2. String'
//! MatchExt { token_type: 6, span: Span { start: 26, end: 27 }, start_position: Position { line: 1, column: 27 }, end_position: Position { line: 1, column: 28 } }: '"'
//! ```
//!
//! # Crate features
//! The crate has the following features:
//! - `default`: This is the default feature set. When it is enabled it uses the `scnr` crate's own
//! regex engine.
//!
//! - `regex_automata`: This feature is not enabled by default. It instructs the lib to use the
//! crate `regex_automata` as regex engine.
//!
//! Both features are mutually exclusive. You can enable one of them, but not both at the same time.
//!
//! Enabling the default feature usually results in a slower scanner, but it is faster at compiling
//! the regexes. The `regex_automata` feature is faster at scanning the input, but it is possibly
//! slower at compiling the regexes. This depends on the size of your scanner modes, i.e. the number
//! of regexes you use.
/// Module with error definitions
pub use ;
/// Module that provides a FindMatches type
pub use ;
/// The module with internal implementation details.
/// Module that provides a Match type
pub use ;
/// Module that provides a Pattern type and a Lookahead type
pub use ;
/// Module that provides a position type
pub use ;
/// The module with the scanner.
pub use ;
/// The module with the scanner builder.
pub use ScannerBuilder;
/// The module with the scanner mode.
pub use ScannerMode;
/// Module that provides a Span type
pub use Span;
/// Module that provides a WithPositions type
pub use ;