1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
//! <p align="center">
//!      <img src="https://raw.github.com/maciejhirsz/logos/master/logos.png?sanitize=true" width="60%" alt="Logos">
//! </p>
//!
//! ## Create ridiculously fast Lexers.
//!
//! **Logos** works by:
//! + Resolving all logical branching of token definitions into a tree.
//! + Optimizing complex patterns into [Lookup Tables](https://en.wikipedia.org/wiki/Lookup_table).
//! + Always using a Lookup Table for the first byte of a token.
//! + Producing code that never backtracks, thus running at linear time or close to it.
//!
//! In practice it means that for most grammars the lexing performance is virtually unaffected by the number
//! of tokens defined in the grammar. Or, in other words, **it is really fast**.
//!
//! ## Example
//!
//! ```rust
//! extern crate logos;
//! #[macro_use]
//! extern crate logos_derive;
//!
//! use logos::Logos;
//!
//! #[derive(Debug, PartialEq, Logos)]
//! enum Token {
//!     // Logos requires that we define two default variants,
//!     // one for end of input source,
//!     #[end]
//!     End,
//!
//!     // ...and one for errors. Those can be named anything
//!     // you wish as long as the attributes are there.
//!     #[error]
//!     Error,
//!
//!     // Tokens can be literal strings, of any length.
//!     #[token = "fast"]
//!     Fast,
//!
//!     #[token = "."]
//!     Period,
//!
//!     // Or regular expressions.
//!     #[regex = "[a-zA-Z]+"]
//!     Text,
//! }
//!
//! fn main() {
//!     let mut lexer = Token::lexer("Create ridiculously fast Lexers.");
//!
//!     assert_eq!(lexer.token, Token::Text);
//!     assert_eq!(lexer.slice(), "Create");
//!     assert_eq!(lexer.range(), 0..6);
//!
//!     lexer.advance();
//!
//!     assert_eq!(lexer.token, Token::Text);
//!     assert_eq!(lexer.slice(), "ridiculously");
//!     assert_eq!(lexer.range(), 7..19);
//!
//!     lexer.advance();
//!
//!     assert_eq!(lexer.token, Token::Fast);
//!     assert_eq!(lexer.slice(), "fast");
//!     assert_eq!(lexer.range(), 20..24);
//!
//!     lexer.advance();
//!
//!     assert_eq!(lexer.token, Token::Text);
//!     assert_eq!(lexer.slice(), "Lexers");
//!     assert_eq!(lexer.range(), 25..31);
//!
//!     lexer.advance();
//!
//!     assert_eq!(lexer.token, Token::Period);
//!     assert_eq!(lexer.slice(), ".");
//!     assert_eq!(lexer.range(), 31..32);
//!
//!     lexer.advance();
//!
//!     assert_eq!(lexer.token, Token::End);
//! }
//! ```

#![warn(missing_docs)]

#[cfg(feature = "nul_term_source")]
extern crate toolshed;

mod lexer;
mod source;

#[doc(hidden)]
pub mod internal;

pub use lexer::{Lexer, Lexicon, Extras};
pub use source::Source;

/// Trait implemented for an enum representing all tokens. You should never have
/// to implement it manually, use the `#[derive(Logos)]` attribute on your enum.
pub trait Logos: Sized {
    /// Associated `Extras` for the particular lexer. Those can handle things that
    /// aren't necessarily tokens, such as comments or Automatic Semicolon Insertion
    /// in JavaScript.
    type Extras: self::Extras;

    /// `SIZE` is simply a number of possible variants of the `Logos` enum. The
    /// `derive` macro will make sure that all variants don't hold values larger
    /// or equal to `SIZE`.
    ///
    /// This can be extremely useful for creating `Logos` Lookup Tables.
    const SIZE: usize;

    /// Helper `const` of the variant marked as `#[error]`.
    const ERROR: Self;

    /// Returns a lookup table for the `Lexer`
    fn lexicon<S: Source>() -> Lexicon<Lexer<Self, S>>;

    /// Create a new instance of a `Lexer` that will produce tokens implementing
    /// this `Logos`.
    fn lexer<S: Source>(source: S) -> Lexer<Self, S> {
        Lexer::new(source)
    }
}