1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
//! Print line and column positions for each word in a file.
//!
//! Usage:
//!     cargo run --example extras <path/to/file>
//!
//! Example:
//!     cargo run --example extras Cargo.toml
//!
//! This is a small example on how to use
//! [`Extras`](https://docs.rs/logos2/latest/logos/trait.Logos.html#associatedtype.Extras)
//! to convey some (mutable) internal state from token to token.
//!
//! Here, the extras will be a tuple with the following fields:
//!
//! + 0. the line number;
//! + 1. the char index of the current line.
//!
//! From then, one can easily compute the column number of some token by computing:
//!
//! ```rust,no_run,no_playground
//! fn get_column(lex: &Lexer<Token>) -> usize {
//!     lex.span().start - lex.extras.1
//! }
//! ```

/* ANCHOR: all */
use logos::{Lexer, Logos, Skip};
use std::env;
use std::fs;

/* ANCHOR: callbacks */
/// Update the line count and the char index.
fn newline_callback(lex: &mut Lexer<Token>) -> Skip {
    lex.extras.0 += 1;
    lex.extras.1 = lex.span().end;
    Skip
}

/// Compute the line and column position for the current word.
fn word_callback(lex: &mut Lexer<Token>) -> (usize, usize) {
    let line = lex.extras.0;
    let column = lex.span().start - lex.extras.1;

    (line, column)
}
/* ANCHOR_END: callbacks */

/* ANCHOR: tokens */
/// Simple tokens to retrieve words and their location.
#[derive(Debug, Logos)]
#[logos(extras = (usize, usize))]
enum Token {
    #[regex(r"\n", newline_callback)]
    Newline,

    #[regex(r"\w+", word_callback)]
    Word((usize, usize)),
}
/* ANCHOR_END: tokens */

fn main() {
    let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
        .expect("Failed to read file");

    let mut lex = Token::lexer(src.as_str());

    while let Some(token) = lex.next() {
        match token {
            Ok(Token::Word((line, column))) => {
                println!("Word '{}' found at ({}, {})", lex.slice(), line, column);
            }
            _ => (),
        }
    }
}
/* ANCHOR_END: all */