1pub mod token_type;
2use crate::language::CsvLanguage;
3use oak_core::{Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
4pub use token_type::CsvTokenType;
5
6type State<'a, S> = LexerState<'a, S, CsvLanguage>;
7
8#[derive(Clone)]
9pub struct CsvLexer<'config> {
10 _config: &'config CsvLanguage,
11 field_separator: char,
12 quote_char: char,
13}
14
15impl<'config> Lexer<CsvLanguage> for CsvLexer<'config> {
16 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl oak_core::LexerCache<CsvLanguage>) -> LexOutput<CsvLanguage> {
17 let mut state = State::new(text);
18 let result = self.run(&mut state);
19 if result.is_ok() {
20 state.add_eof();
21 }
22 state.finish_with_cache(result, cache)
23 }
24}
25
26impl<'config> CsvLexer<'config> {
27 pub fn new(config: &'config CsvLanguage) -> Self {
28 Self { _config: config, field_separator: ',', quote_char: '"' }
29 }
30
31 pub fn with_separator(mut self, separator: char) -> Self {
32 self.field_separator = separator;
33 self
34 }
35
36 pub fn with_quote_char(mut self, quote: char) -> Self {
37 self.quote_char = quote;
38 self
39 }
40
41 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
43 let start_pos = state.get_position();
44 let mut found_whitespace = false;
45
46 while let Some(ch) = state.peek() {
47 if ch == ' ' || ch == '\t' {
48 state.advance(ch.len_utf8());
49 found_whitespace = true;
50 }
51 else {
52 break;
53 }
54 }
55
56 if found_whitespace {
57 state.add_token(CsvTokenType::Whitespace, start_pos, state.get_position());
58 true
59 }
60 else {
61 false
62 }
63 }
64
65 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
67 let start_pos = state.get_position();
68
69 if let Some(ch) = state.peek() {
70 if ch == '\r' {
71 state.advance(1);
72 if state.peek() == Some('\n') {
74 state.advance(1);
75 }
76 state.add_token(CsvTokenType::Newline, start_pos, state.get_position());
77 true
78 }
79 else if ch == '\n' {
80 state.advance(1);
81 state.add_token(CsvTokenType::Newline, start_pos, state.get_position());
82 true
83 }
84 else {
85 false
86 }
87 }
88 else {
89 false
90 }
91 }
92
93 fn lex_quoted_field<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
95 let start_pos = state.get_position();
96
97 if let Some(ch) = state.peek() {
98 if ch == self.quote_char {
99 state.advance(ch.len_utf8()); while let Some(ch) = state.peek() {
101 if ch == self.quote_char {
102 state.advance(ch.len_utf8());
103 if state.peek() == Some(self.quote_char) {
105 state.advance(self.quote_char.len_utf8()); }
107 else {
108 break;
110 }
111 }
112 else {
113 state.advance(ch.len_utf8());
114 }
115 }
116 state.add_token(CsvTokenType::Field, start_pos, state.get_position());
117 true
118 }
119 else {
120 false
121 }
122 }
123 else {
124 false
125 }
126 }
127
128 fn lex_unquoted_field<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
130 let start_pos = state.get_position();
131 let mut found_char = false;
132
133 while let Some(ch) = state.peek() {
134 if ch == self.field_separator || ch == '\n' || ch == '\r' {
135 break;
136 }
137 else {
138 state.advance(ch.len_utf8());
139 found_char = true;
140 }
141 }
142
143 if found_char {
144 state.add_token(CsvTokenType::Field, start_pos, state.get_position());
145 true
146 }
147 else {
148 false
149 }
150 }
151
152 fn lex_comma<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
154 let start_pos = state.get_position();
155
156 if let Some(ch) = state.peek() {
157 if ch == self.field_separator {
158 state.advance(ch.len_utf8());
159 state.add_token(CsvTokenType::Comma, start_pos, state.get_position());
160 true
161 }
162 else {
163 false
164 }
165 }
166 else {
167 false
168 }
169 }
170
171 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
172 while state.not_at_end() {
173 if self.skip_whitespace(state) {
175 continue;
176 }
177
178 if self.lex_newline(state) {
179 continue;
180 }
181
182 if self.lex_comma(state) {
183 continue;
184 }
185
186 if self.lex_quoted_field(state) {
187 continue;
188 }
189
190 if self.lex_unquoted_field(state) {
191 continue;
192 }
193
194 let start_pos = state.get_position();
196 if let Some(ch) = state.peek() {
197 state.advance(ch.len_utf8());
198 state.add_token(CsvTokenType::Error, start_pos, state.get_position());
199 }
200 }
201 Ok(())
202 }
203}