limbo_sqlite3_parser/lexer/scan.rs
1//! Adaptation/port of [Go scanner](http://tip.golang.org/pkg/bufio/#Scanner).
2
3use std::error::Error;
4use std::fmt;
5use std::io;
6
7/// Error with position
8pub trait ScanError: Error + From<io::Error> + Sized {
9 /// Update the position where the error occurs
10 fn position(&mut self, line: u64, column: usize, offset: usize);
11}
12
13/// The `(&[u8], TokenType)` is the token.
14/// And the `usize` is the amount of bytes to consume.
15type SplitResult<'input, TokenType, Error> =
16 Result<(Option<(&'input [u8], TokenType)>, usize), Error>;
17
18/// Split function used to tokenize the input
19pub trait Splitter: Sized {
20 /// Potential error raised
21 type Error: ScanError;
22 //type Item: ?Sized;
23 /// Token generated
24 type TokenType;
25
26 /// The arguments are an initial substring of the remaining unprocessed
27 /// data.
28 ///
29 /// If the returned error is non-nil, scanning stops and the error
30 /// is returned to the client.
31 ///
32 /// The function is never called with an empty data slice.
33 fn split<'input>(
34 &mut self,
35 data: &'input [u8],
36 ) -> SplitResult<'input, Self::TokenType, Self::Error>;
37}
38
39/// Like a `BufReader` but with a growable buffer.
40/// Successive calls to the `scan` method will step through the 'tokens'
41/// of a file, skipping the bytes between the tokens.
42///
43/// Scanning stops unrecoverably at EOF, the first I/O error, or a token too
44/// large to fit in the buffer. When a scan stops, the reader may have
45/// advanced arbitrarily far past the last token.
46pub struct Scanner<S: Splitter> {
47 /// offset in `input`
48 offset: usize,
49 /// mark
50 mark: (usize, u64, usize),
51 /// The function to tokenize the input.
52 splitter: S,
53 /// current line number
54 line: u64,
55 /// current column number (byte offset, not char offset)
56 column: usize,
57}
58
59impl<S: Splitter> Scanner<S> {
60 /// Constructor
61 pub fn new(splitter: S) -> Self {
62 Self {
63 offset: 0,
64 mark: (0, 0, 0),
65 splitter,
66 line: 1,
67 column: 1,
68 }
69 }
70
71 /// Current line number
72 pub fn line(&self) -> u64 {
73 self.line
74 }
75
76 /// Current column number (byte offset, not char offset)
77 pub fn column(&self) -> usize {
78 self.column
79 }
80
81 /// Current byte offset in the source string
82 pub fn offset(&self) -> usize {
83 self.offset
84 }
85
86 /// Associated splitter
87 pub fn splitter(&self) -> &S {
88 &self.splitter
89 }
90 /// Mark current position
91 pub fn mark(&mut self) {
92 self.mark = (self.offset, self.line, self.column);
93 }
94 /// Reset to mark
95 pub fn reset_to_mark(&mut self) {
96 (self.offset, self.line, self.column) = self.mark;
97 }
98
99 /// Reset the scanner such that it behaves as if it had never been used.
100 pub fn reset(&mut self) {
101 self.offset = 0;
102 self.line = 1;
103 self.column = 1;
104 }
105}
106
107type ScanResult<'input, TokenType, Error> =
108 Result<(usize, Option<(&'input [u8], TokenType)>, usize), Error>;
109
110impl<S: Splitter> Scanner<S> {
111 /// Advance the Scanner to next token.
112 /// Return the token as a byte slice.
113 /// Return `None` when the end of the input is reached.
114 /// Return any error that occurs while reading the input.
115 pub fn scan<'input>(
116 &mut self,
117 input: &'input [u8],
118 ) -> ScanResult<'input, S::TokenType, S::Error> {
119 // Loop until we have a token.
120 loop {
121 // See if we can get a token with what we already have.
122 if self.offset < input.len() {
123 let data = &input[self.offset..];
124 match self.splitter.split(data) {
125 Err(mut e) => {
126 e.position(self.line, self.column, self.offset);
127 return Err(e);
128 }
129 Ok((None, 0)) => {
130 // Done
131 }
132 Ok((None, amt)) => {
133 // Ignore/skip this data
134 self.consume(data, amt);
135 continue;
136 }
137 Ok((tok, amt)) => {
138 let start = self.offset;
139 self.consume(data, amt);
140 return Ok((start, tok, self.offset));
141 }
142 }
143 }
144 // We cannot generate a token with what we are holding.
145 // we are done.
146 return Ok((self.offset, None, self.offset));
147 }
148 }
149
150 /// Consume `amt` bytes of the buffer.
151 fn consume(&mut self, data: &[u8], amt: usize) {
152 debug_assert!(amt <= data.len());
153 for byte in &data[..amt] {
154 if *byte == b'\n' {
155 self.line += 1;
156 self.column = 1;
157 } else {
158 self.column += 1;
159 }
160 }
161 self.offset += amt;
162 }
163}
164
165impl<S: Splitter> fmt::Debug for Scanner<S> {
166 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167 f.debug_struct("Scanner")
168 .field("offset", &self.offset)
169 .field("mark", &self.mark)
170 .field("line", &self.line)
171 .field("column", &self.column)
172 .finish()
173 }
174}