1use std::fmt::{Display, Formatter};
2
3#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq, Eq)]
4pub struct Location {
5 line: usize,
6 column: usize,
7}
8
9impl Location {
10 pub const fn at(line: usize, column: usize) -> Location {
11 Self { line, column }
12 }
13
14 pub const fn line(&self) -> usize {
15 self.line
16 }
17
18 pub const fn column(&self) -> usize {
19 self.column
20 }
21}
22
23#[derive(Debug, PartialOrd, PartialEq, Eq, Clone)]
24pub enum Token {
25 Text(Location, String),
26 Separator(Location, char),
27}
28
29impl From<char> for Token {
30 fn from(separator: char) -> Self {
31 Token::Separator(Location::default(), separator)
32 }
33}
34
35impl From<String> for Token {
36 fn from(text: String) -> Self {
37 Token::Text(Location::default(), text)
38 }
39}
40
41impl Display for Token {
42 fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
43 match self {
44 Token::Text(_, text) => write!(f, "\"{}\"", text),
45 Token::Separator(_, separator) => write!(f, "\'{}\'", separator),
46 }
47 }
48}
49
50impl Token {
51 fn append(self, other: Token) -> (Token, Option<Token>) {
52 match (self, other) {
53 (Token::Text(location, mut text), Token::Text(_, other)) => (
54 Token::Text(location, {
55 text.push_str(&other);
56 text
57 }),
58 None,
59 ),
60 (a, b) => (a, Some(b)),
61 }
62 }
63
64 pub fn location(&self) -> Location {
65 match self {
66 Token::Text(location, _) => *location,
67 Token::Separator(location, _) => *location,
68 }
69 }
70
71 pub fn eq_text(&self, text: &str) -> bool {
72 self.text().map(|t| t.eq(text)).unwrap_or(false)
73 }
74
75 pub fn eq_text_ignore_ascii_case(&self, text: &str) -> bool {
76 self.text()
77 .map(|t| t.eq_ignore_ascii_case(text))
78 .unwrap_or(false)
79 }
80
81 pub fn eq_separator(&self, separator: char) -> bool {
82 self.separator().map(|s| s == separator).unwrap_or(false)
83 }
84
85 pub fn text(&self) -> Option<&str> {
86 match self {
87 Token::Text(_, text) => Some(text),
88 _ => None,
89 }
90 }
91
92 pub fn separator(&self) -> Option<char> {
93 match self {
94 Token::Separator(_, char) => Some(*char),
95 _ => None,
96 }
97 }
98
99 pub fn is_text(&self) -> bool {
100 self.text().is_some()
101 }
102
103 pub fn is_separator(&self) -> bool {
104 self.separator().is_some()
105 }
106
107 pub fn into_text(self) -> Option<String> {
108 if let Token::Text(_, text) = self {
109 Some(text)
110 } else {
111 None
112 }
113 }
114
115 pub fn into_text_or_else<E, F: FnOnce(Token) -> E>(self, f: F) -> Result<String, E> {
116 match self {
117 Token::Text(_, text) => Ok(text),
118 token => Err(f(token)),
119 }
120 }
121
122 pub fn into_separator_or_else<E, F: FnOnce(Token) -> E>(self, f: F) -> Result<char, E> {
123 match self {
124 Token::Separator(_, separator) => Ok(separator),
125 token => Err(f(token)),
126 }
127 }
128}
129
130#[derive(Default)]
131pub struct Tokenizer;
132
133impl Tokenizer {
134 pub fn parse(&self, asn: &str) -> Vec<Token> {
141 let mut previous = None;
142 let mut tokens = Vec::new();
143 let mut nest_lvl = 0; for (line_0, line) in asn.lines().enumerate() {
146 let mut token = None;
147 let mut content_iterator = line.chars().enumerate().peekable();
148
149 while let Some((column_0, char)) = content_iterator.next() {
150 if nest_lvl > 0 {
151 match char {
152 '*' => {
153 if let Some((_, '/')) = content_iterator.peek() {
154 nest_lvl -= 1;
155 content_iterator.next(); }
157 }
158 '/' => {
159 if let Some((_, '*')) = content_iterator.peek() {
160 nest_lvl += 1;
161 content_iterator.next(); }
163 }
164 _ => {
165 if content_iterator.peek().is_none()
166 && line_0 == asn.lines().count() - 1
167 {
168 panic!("The file has unclosed comment blocks. Nested comment blocks are counted.");
169 } else {
170 continue;
171 }
172 }
173 }
174 continue;
175 }
176 if nest_lvl == 0
178 && char == '-'
179 && content_iterator.peek().map(|&(_, ch)| ch) == Some('-')
180 {
181 content_iterator.next(); break; }
184 match char {
185 '/' if content_iterator.peek().map(|&(_, ch)| ch) == Some('*') => {
186 content_iterator.next(); nest_lvl += 1;
188 }
189 ':' | ';' | '=' | '(' | ')' | '{' | '}' | '.' | ',' | '[' | ']' | '\''
191 | '"' => {
192 token = Some(Token::Separator(
193 Location::at(line_0 + 1, column_0 + 1),
194 char,
195 ))
196 }
197 c if !c.is_control() && c != ' ' => {
199 token = Some(Token::Text(
200 Location::at(line_0 + 1, column_0 + 1),
201 format!("{}", c),
202 ));
203 }
204 ' ' | '\r' | '\n' | '\t' => {
206 if let Some(token) = previous.take() {
207 tokens.push(token);
208 }
209 }
210 c => eprintln!(
211 "Ignoring unexpected character: {}-0x{:02x}-{:03}",
212 c, c as u8, c as u8
213 ),
214 }
215
216 if let Some(token) = token.take() {
217 previous = match previous {
218 None => Some(token),
219 Some(current) => {
220 let (token, second) = current.append(token);
221 match second {
222 None => Some(token),
223 Some(next) => {
224 tokens.push(token);
225 Some(next)
226 }
227 }
228 }
229 }
230 }
231 }
232
233 if let Some(token) = previous.take() {
234 tokens.push(token);
235 }
236 }
237
238 if let Some(token) = previous {
239 tokens.push(token);
240 }
241
242 tokens
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
251 pub fn test_separator_tokens_not_merged() {
252 let result = Tokenizer.parse(":;=(){}.,[]");
253 let mut iter = result.into_iter();
254 assert!(iter.next().unwrap().eq_separator(':'));
255 assert!(iter.next().unwrap().eq_separator(';'));
256 assert!(iter.next().unwrap().eq_separator('='));
257 assert!(iter.next().unwrap().eq_separator('('));
258 assert!(iter.next().unwrap().eq_separator(')'));
259 assert!(iter.next().unwrap().eq_separator('{'));
260 assert!(iter.next().unwrap().eq_separator('}'));
261 assert!(iter.next().unwrap().eq_separator('.'));
262 assert!(iter.next().unwrap().eq_separator(','));
263 assert!(iter.next().unwrap().eq_separator('['));
264 assert!(iter.next().unwrap().eq_separator(']'));
265 assert!(iter.next().is_none());
266 }
267
268 #[test]
269 pub fn test_text_between_seapators_is_represented_as_one_text_token() {
270 let result = Tokenizer.parse("::=ASN{");
271 let mut iter = result.into_iter();
272 assert!(iter.next().unwrap().eq_separator(':'));
273 assert!(iter.next().unwrap().eq_separator(':'));
274 assert!(iter.next().unwrap().eq_separator('='));
275 assert!(iter.next().unwrap().eq_text("ASN"));
276 assert!(iter.next().unwrap().eq_separator('{'));
277 assert!(iter.next().is_none());
278 }
279
280 #[test]
281 pub fn test_invisible_separator_characters() {
282 let result = Tokenizer.parse("a b\rc\nd\te AB\rCD\nEF\tGH aa bb\r\rcc\n\ndd\t\tee");
283 let mut iter = result.into_iter();
284 assert!(iter.next().unwrap().eq_text("a"));
285 assert!(iter.next().unwrap().eq_text("b"));
286 assert!(iter.next().unwrap().eq_text("c"));
287 assert!(iter.next().unwrap().eq_text("d"));
288 assert!(iter.next().unwrap().eq_text("e"));
289 assert!(iter.next().unwrap().eq_text("AB"));
290 assert!(iter.next().unwrap().eq_text("CD"));
291 assert!(iter.next().unwrap().eq_text("EF"));
292 assert!(iter.next().unwrap().eq_text("GH"));
293 assert!(iter.next().unwrap().eq_text("aa"));
294 assert!(iter.next().unwrap().eq_text("bb"));
295 assert!(iter.next().unwrap().eq_text("cc"));
296 assert!(iter.next().unwrap().eq_text("dd"));
297 assert!(iter.next().unwrap().eq_text("ee"));
298 assert!(iter.next().is_none());
299 }
300
301 #[test]
302 pub fn test_token_text() {
303 let token = Token::from("some text".to_string());
304 assert_eq!(token.text(), Some("some text"));
305 assert_eq!(token.separator(), None);
306 }
307
308 #[test]
309 pub fn test_token_separator() {
310 let result = Tokenizer.parse("AS\x00N");
311 let mut iter = result.into_iter();
312 assert!(iter.next().unwrap().eq_text("ASN"));
313 assert!(iter.next().is_none());
314 }
315
316 #[test]
317 pub fn test_control_char_is_ignored() {
318 let token = Token::from(':');
319 assert_eq!(token.text(), None);
320 assert_eq!(token.separator(), Some(':'),)
321 }
322
323 #[test]
324 pub fn test_ignores_line_comments() {
325 let result = Tokenizer::default().parse(
326 r"
327 Some ::= None -- very clever
328 -- ignore true ::= false
329 ",
330 );
331 let mut iter = result.into_iter();
332 assert!(iter.next().unwrap().eq_text("Some"));
333 assert!(iter.next().unwrap().eq_separator(':'));
334 assert!(iter.next().unwrap().eq_separator(':'));
335 assert!(iter.next().unwrap().eq_separator('='));
336 assert!(iter.next().unwrap().eq_text("None"));
337 assert!(iter.next().is_none());
338 }
339 #[test]
340 pub fn test_ignores_multiline_comments() {
341 let result = Tokenizer::default().parse(
342 r"
343 ASN1 DEFINITION ::= BEGIN
344 /* This is a comment */
345 -- This is also a comment
346 SomeTypeDef ::= SEQUENCE {
347 /* Nested comment level 1
348 /* Nested comment -- level 2 */
349 still in level 1 comment */
350 integer INTEGER
351 }
352 END",
353 );
354 let mut iter = result.into_iter();
355 assert!(iter.next().unwrap().eq_text("ASN1"));
356 assert!(iter.next().unwrap().eq_text("DEFINITION"));
357 assert!(iter.next().unwrap().eq_separator(':'));
358 assert!(iter.next().unwrap().eq_separator(':'));
359 assert!(iter.next().unwrap().eq_separator('='));
360 assert!(iter.next().unwrap().eq_text("BEGIN"));
361 assert!(iter.next().unwrap().eq_text("SomeTypeDef"));
362 assert!(iter.next().unwrap().eq_separator(':'));
363 assert!(iter.next().unwrap().eq_separator(':'));
364 assert!(iter.next().unwrap().eq_separator('='));
365 assert!(iter.next().unwrap().eq_text("SEQUENCE"));
366 assert!(iter.next().unwrap().eq_separator('{'));
367 assert!(iter.next().unwrap().eq_text("integer"));
368 assert!(iter.next().unwrap().eq_text("INTEGER"));
369 assert!(iter.next().unwrap().eq_separator('}'));
370 assert!(iter.next().unwrap().eq_text("END"));
371 assert!(iter.next().is_none());
372 }
373
374 #[test]
375 #[should_panic(
376 expected = "The file has unclosed comment blocks. Nested comment blocks are counted."
377 )]
378 pub fn test_unclosed_comment() {
379 let _ = Tokenizer::default().parse(
380 r"
381 ASN1 DEFINITION ::= BEGIN
382 /* This is a comment
383 SomeTypeDef ::= SEQUENCE {
384 /* Nested comment level 1
385 /* Nested comment -- level 2 */
386 still in level 1 comment */
387 integer INTEGER
388 }
389 END",
390 );
391 }
392
393 #[test]
394 pub fn test_token_is_separator() {
395 assert!(Token::Separator(Location::default(), ',').is_separator());
396 }
397
398 #[test]
399 pub fn test_token_is_text() {
400 assert!(Token::Text(Location::default(), String::default()).is_text());
401 }
402
403 #[test]
404 pub fn test_token_location_separator() {
405 let location = Location::at(42, 1337);
406 assert_eq!(location, Token::Separator(location, ',').location());
407 }
408
409 #[test]
410 pub fn test_token_location_text() {
411 let location = Location::at(42, 1337);
412 assert_eq!(
413 location,
414 Token::Text(location, String::default()).location()
415 );
416 }
417
418 #[test]
419 pub fn test_token_eq_text() {
420 assert!(Token::Text(Location::default(), "aBc".to_string()).eq_text("aBc"));
421 assert!(!Token::Text(Location::default(), "aBc".to_string()).eq_text("abc"));
422 assert!(!Token::Text(Location::default(), "aBc".to_string()).eq_text("cde"));
423 }
424
425 #[test]
426 pub fn test_token_eq_text_ignore_ascii_case() {
427 assert!(
428 Token::Text(Location::default(), "aBc".to_string()).eq_text_ignore_ascii_case("aBc")
429 );
430 assert!(
431 Token::Text(Location::default(), "aBc".to_string()).eq_text_ignore_ascii_case("abc")
432 );
433 assert!(
434 !Token::Text(Location::default(), "aBc".to_string()).eq_text_ignore_ascii_case("cde")
435 );
436 }
437
438 #[test]
439 pub fn test_token_display_text() {
440 assert_eq!(
441 "\"The text\"",
442 format!(
443 "{}",
444 Token::Text(Location::default(), "The text".to_string())
445 )
446 );
447 }
448
449 #[test]
450 pub fn test_token_display_separator() {
451 assert_eq!(
452 "'.'",
453 format!("{}", Token::Separator(Location::default(), '.'))
454 );
455 }
456
457 #[test]
458 pub fn test_token_into_text_none() {
459 assert_eq!(None, Token::Separator(Location::default(), '.').into_text());
460 }
461
462 #[test]
463 pub fn test_token_into_text_or_else_succeed() {
464 assert_eq!(
465 Ok("SEQUENCE".to_string()),
466 Token::Text(Location::default(), "SEQUENCE".to_string())
467 .into_text_or_else(|_| unreachable!())
468 );
469 }
470
471 #[test]
472 pub fn test_token_into_text_or_else_fail() {
473 assert_eq!(
474 Err(()),
475 Token::Separator(Location::default(), '.').into_text_or_else(|_| ())
476 );
477 }
478
479 #[test]
480 pub fn test_token_into_separator_or_else_succeed() {
481 assert_eq!(
482 Ok('.'),
483 Token::Separator(Location::default(), '.').into_separator_or_else(|_| unreachable!())
484 );
485 }
486
487 #[test]
488 pub fn test_token_into_separator_or_else_fail() {
489 assert_eq!(
490 Err(()),
491 Token::Text(Location::default(), String::default()).into_separator_or_else(|_| ())
492 );
493 }
494}