1use crate::tokenize::{TokType, TokenSpan, TokenizationError};
2
3#[derive(Debug, PartialEq, Clone)]
4pub struct TokenContext {
5 pub start_lineno: usize,
6 pub start_colno: usize,
7 pub start_byte_offset: usize,
8 pub start_char_index: usize,
9 pub end_byte_offset: usize,
10}
11
12#[derive(Debug, PartialEq, Clone)]
22pub struct Token {
23 pub lexeme: String,
25
26 pub tok_type: TokType,
28
29 pub context: Option<TokenContext>
32}
33
34pub fn source_to_tokens(text: &str) -> Result<Vec<Token>, TokenizationError> {
63 use crate::tokenize::tokenize_rt_str;
64 let tokens = tokenize_rt_str(text)?;
69 let mut source_tokens: Vec<Token> = Vec::with_capacity(tokens.tok_spans.len());
70 let mut spans = tokens.tok_spans.iter();
71 let mut current_span = spans.next().unwrap(); let mut next_span: &TokenSpan;
73 match spans.next() {
74 None => {
75 source_tokens.push(Token{lexeme: String::new(), tok_type:TokType::EOF, context: Some(TokenContext{start_byte_offset: 0, start_colno: 1, start_lineno: 1, start_char_index: 0, end_byte_offset: 0})});
77 return Ok(source_tokens)
78 }
79 Some(span) => {
80 next_span = span;
81 }
82 }
83 let mut lineno: usize = 1;
84 let mut colno: usize = 0;
85
86 let mut start_lineno = lineno;
87 let mut start_colno = 1;
88 let mut start_char_index = 0;
89
90 let mut current_token_buffer = String::with_capacity(current_span.2 - current_span.0);
91 for (codepoint_off, (byte_off, char)) in text.char_indices().enumerate() {
92 colno += 1;
93 current_token_buffer.push(char);
94 if char == '\n' {
95 if current_token_buffer.len() == current_token_buffer.capacity() {
96 let context = TokenContext{start_byte_offset: current_span.0, start_lineno, start_colno, start_char_index, end_byte_offset: current_span.2};
97 let token = Token{lexeme: current_token_buffer, tok_type: current_span.1.clone(), context: Some(context)};
98 source_tokens.push(token);
99 match spans.next() {
100 None => {
101 assert_eq!(next_span.1, TokType::EOF, "Unexpected end of document while token remaining {:?}", next_span);
102 let context = TokenContext{start_lineno:lineno, start_colno: colno, start_byte_offset: next_span.0, start_char_index: codepoint_off, end_byte_offset: next_span.0};
103 let token = Token{lexeme: String::with_capacity(0), context: Some(context), tok_type: TokType::EOF};
104 source_tokens.push(token);
105 return Ok(source_tokens)
106 }
107 Some(span) => {
108 current_span = next_span;
109 next_span = span;
110 current_token_buffer = String::with_capacity(current_span.2 - current_span.0);
111 lineno += 1;
112 colno = 0;
113 start_lineno = lineno;
114 start_colno = 1;
115 start_char_index = codepoint_off + 1;
116 continue
117 }
118 }
119 } else {
120 lineno += 1;
121 colno = 0;
122 }
123 }
124 if current_token_buffer.len() < current_token_buffer.capacity() {
125 continue
126 }
127 if current_token_buffer.len() == current_token_buffer.capacity() {
128
129 let context = TokenContext{start_byte_offset: current_span.0, start_lineno, start_colno, start_char_index, end_byte_offset: current_span.2};
130 let token = Token{lexeme: current_token_buffer, tok_type: current_span.1.clone(), context: Some(context)};
131 source_tokens.push(token);
132 match spans.next() {
133 None => {
134 assert_eq!(next_span.1, TokType::EOF, "Unexpected end of document while token remaining {:?}", next_span);
135 let context = TokenContext{start_lineno:lineno, start_colno: colno, start_byte_offset: next_span.0, start_char_index: codepoint_off, end_byte_offset: next_span.0};
136 let token = Token{lexeme: String::with_capacity(0), context: Some(context), tok_type: TokType::EOF};
137 source_tokens.push(token);
138 return Ok(source_tokens)
139 }
140 Some(span) => {
141 current_span = next_span;
142 next_span = span;
143 current_token_buffer = String::with_capacity(current_span.2 - current_span.0);
144 start_lineno = lineno;
145 start_colno = colno + 1;
146 start_char_index = codepoint_off + 1;
147 continue
148 }
149 }
150 }
151 if byte_off > current_span.2 {
152 unreachable!("moved beyond current span")
153 }
154 }
155 unreachable!("Unexpected end of document");
156}
157
158pub fn tokens_to_source(tokens: &Vec<Token>) -> String {
182 let mut size = 0_usize;
183 for tok in tokens.iter() {
184 size += tok.lexeme.len()
185 }
186 let mut ret = String::with_capacity(size);
187 for tok in tokens.iter() {
188 ret.push_str(&tok.lexeme)
189 }
190 ret
191}
192
193
194
195mod tests {
196 use super::*;
197 #[test]
198 fn test() {
199 let text = "{\"foo\":\"bar\"}";
200 let tokens = source_to_tokens(text).unwrap();
201 assert_eq!(text, tokens_to_source(&tokens));
202 }
203
204 #[test]
205 fn test_rt() {
206 let text = r#"// A JSON5 document
207 {my: "value",
208// ^^^^^
209// There is trailing whitespace above that will be formatted
210 another: "value"
211 } // there is also trailing whitespace at the end of the doc
212 "#;
213 let tokens = source_to_tokens(text).unwrap();
214 println!("{:?}", tokens);
215 assert_eq!(text, tokens_to_source(&tokens));
216
217 }
218}