sqlite3_parser/lexer/scan.rs
1//! Adaptation/port of [Go scanner](http://tip.golang.org/pkg/bufio/#Scanner).
2
3use log::debug;
4
5use std::error::Error;
6use std::fmt;
7use std::io;
8
9/// Error with position
10pub trait ScanError: Error + From<io::Error> + Sized {
11 /// Update the position where the error occurs
12 fn position(&mut self, line: u64, column: usize);
13}
14
15/// The `(&[u8], TokenType)` is the token.
16/// And the `usize` is the amount of bytes to consume.
17type SplitResult<'input, TokenType, Error> =
18 Result<(Option<(&'input [u8], TokenType)>, usize), Error>;
19
20/// Split function used to tokenize the input
21pub trait Splitter: Sized {
22 /// Potential error raised
23 type Error: ScanError;
24 //type Item: ?Sized;
25 /// Token generated
26 type TokenType;
27
28 /// The arguments are an initial substring of the remaining unprocessed
29 /// data.
30 ///
31 /// If the returned error is non-nil, scanning stops and the error
32 /// is returned to the client.
33 ///
34 /// The function is never called with an empty data slice.
35 fn split<'input>(
36 &mut self,
37 data: &'input [u8],
38 ) -> SplitResult<'input, Self::TokenType, Self::Error>;
39}
40
41/// Like a `BufReader` but with a growable buffer.
42/// Successive calls to the `scan` method will step through the 'tokens'
43/// of a file, skipping the bytes between the tokens.
44///
45/// Scanning stops unrecoverably at EOF, the first I/O error, or a token too
46/// large to fit in the buffer. When a scan stops, the reader may have
47/// advanced arbitrarily far past the last token.
48pub struct Scanner<S: Splitter> {
49 /// offset in `input`
50 offset: usize,
51 /// mark
52 mark: (usize, u64, usize),
53 /// The function to tokenize the input.
54 splitter: S,
55 /// current line number
56 line: u64,
57 /// current column number (byte offset, not char offset)
58 column: usize,
59}
60
61impl<S: Splitter> Scanner<S> {
62 /// Constructor
63 pub fn new(splitter: S) -> Self {
64 Self {
65 offset: 0,
66 mark: (0, 0, 0),
67 splitter,
68 line: 1,
69 column: 1,
70 }
71 }
72
73 /// Current line number
74 pub fn line(&self) -> u64 {
75 self.line
76 }
77
78 /// Current column number (byte offset, not char offset)
79 pub fn column(&self) -> usize {
80 self.column
81 }
82 /// Associated splitter
83 pub fn splitter(&self) -> &S {
84 &self.splitter
85 }
86 /// Mark current position
87 pub fn mark(&mut self) {
88 self.mark = (self.offset, self.line, self.column);
89 }
90 /// Reset to mark
91 pub fn reset_to_mark(&mut self) {
92 (self.offset, self.line, self.column) = self.mark;
93 }
94
95 /// Reset the scanner such that it behaves as if it had never been used.
96 pub fn reset(&mut self) {
97 self.offset = 0;
98 self.line = 1;
99 self.column = 1;
100 }
101}
102
103type ScanResult<'input, TokenType, Error> =
104 Result<(usize, Option<(&'input [u8], TokenType)>, usize), Error>;
105
106impl<S: Splitter> Scanner<S> {
107 /// Advance the Scanner to next token.
108 /// Return the token as a byte slice.
109 /// Return `None` when the end of the input is reached.
110 /// Return any error that occurs while reading the input.
111 pub fn scan<'input>(
112 &mut self,
113 input: &'input [u8],
114 ) -> ScanResult<'input, S::TokenType, S::Error> {
115 debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column);
116 // Loop until we have a token.
117 loop {
118 // See if we can get a token with what we already have.
119 if self.offset < input.len() {
120 let data = &input[self.offset..];
121 match self.splitter.split(data) {
122 Err(mut e) => {
123 e.position(self.line, self.column);
124 return Err(e);
125 }
126 Ok((None, 0)) => {
127 // Done
128 }
129 Ok((None, amt)) => {
130 // Ignore/skip this data
131 self.consume(data, amt);
132 continue;
133 }
134 Ok((tok, amt)) => {
135 let start = self.offset;
136 self.consume(data, amt);
137 return Ok((start, tok, self.offset));
138 }
139 }
140 }
141 // We cannot generate a token with what we are holding.
142 // we are done.
143 return Ok((self.offset, None, self.offset));
144 }
145 }
146
147 /// Consume `amt` bytes of the buffer.
148 fn consume(&mut self, data: &[u8], amt: usize) {
149 debug!(target: "scanner", "consume({})", amt);
150 debug_assert!(amt <= data.len());
151 for byte in &data[..amt] {
152 if *byte == b'\n' {
153 self.line += 1;
154 self.column = 1;
155 } else {
156 self.column += 1;
157 }
158 }
159 self.offset += amt;
160 }
161}
162
163impl<S: Splitter> fmt::Debug for Scanner<S> {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 f.debug_struct("Scanner")
166 .field("offset", &self.offset)
167 .field("mark", &self.mark)
168 .field("line", &self.line)
169 .field("column", &self.column)
170 .finish()
171 }
172}