1use std::sync::Arc;
25
26#[derive(Debug, PartialEq, Clone)]
35pub enum Token {
36 OBra(usize),
38 CBra(usize),
40 Comma(usize),
42 Text(Arc<String>, usize),
46 Number(Arc<String>, usize),
50 Range(usize),
52}
53
54#[derive(Debug, PartialEq, Clone)]
66pub enum TokenizationError {
67 EmptyContent,
69 FormatNotSupported,
72 NoBraces,
74}
75
76impl std::fmt::Display for TokenizationError {
77 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78 match self {
79 TokenizationError::EmptyContent => write!(f, "Content is empty."),
80 TokenizationError::FormatNotSupported => {
81 write!(f, "Only opening brace or closing brace is used.")
82 }
83 TokenizationError::NoBraces => write!(f, "No braces have been used."),
84 }
85 }
86}
87
88impl std::error::Error for TokenizationError {}
89
90pub fn tokenize(content: &str) -> Result<Vec<Token>, TokenizationError> {
138 if content.is_empty() {
139 return Err(TokenizationError::EmptyContent);
140 }
141 let mut tokens = Vec::<Token>::new();
142 let mut is_escape = false;
143 let mut count = (0_usize, 0_usize);
145 let mut buffers = (String::new(), String::new());
147 let mut iter = content.chars().enumerate();
148 let tokenize_text_buffer = |tokens: &mut Vec<Token>, buffers: &mut (String, String), i| {
149 if !buffers.0.is_empty() {
150 tokens.push(Token::Text(
151 Arc::new(buffers.0.clone()),
152 i - buffers.0.len(),
153 ));
154 buffers.0.clear();
155 }
156 };
157 let tokenize_number_buffer = |tokens: &mut Vec<Token>, buffers: &mut (String, String), i| {
158 if !buffers.1.is_empty() {
159 tokens.push(Token::Number(
160 Arc::new(buffers.1.clone()),
161 i - buffers.1.len(),
162 ));
163 buffers.1.clear();
164 }
165 };
166 let tokenize_buffers = |tokens: &mut Vec<Token>, buffers: &mut (String, String), i| {
168 tokenize_text_buffer(tokens, buffers, i);
169 tokenize_number_buffer(tokens, buffers, i);
170 };
171 while let Some((i, c)) = iter.next() {
172 match (c, is_escape) {
173 (_, true) => {
174 if !buffers.1.is_empty() {
175 buffers.0.push_str(&buffers.1);
176 buffers.1.clear();
177 }
178 buffers.0.push(c);
179 buffers.1.clear();
180 is_escape = false;
181 }
182 ('\\', false) => is_escape = true,
183 ('{' | '}' | ',', _) => {
188 tokenize_buffers(&mut tokens, &mut buffers, i);
189 match c {
190 '{' => {
191 count.0 += 1;
192 tokens.push(Token::OBra(i));
193 }
194 '}' => {
195 count.1 += 1;
196 tokens.push(Token::CBra(i));
197 }
198 ',' => tokens.push(Token::Comma(i)),
199 _ => unreachable!(),
205 }
206 }
207 ('.', _) => {
208 let mut r_iter = iter.clone();
209 if let Some((_ix, cx)) = r_iter.next() {
210 match cx {
211 '.' if count.0 == count.1 => {
212 buffers.0.push(c);
213 buffers.0.push(cx);
214 tokenize_buffers(&mut tokens, &mut buffers, i + 2);
215 iter = r_iter;
216 }
217 '.' => {
218 tokenize_buffers(&mut tokens, &mut buffers, i);
219 tokens.push(Token::Range(i));
220 iter = r_iter;
221 continue;
222 }
223 _ => {
224 tokenize_number_buffer(&mut tokens, &mut buffers, i);
225 buffers.0.push(c);
226 }
227 }
228 } else {
229 buffers.0.push(c);
230 }
231 }
232 ('0'..='9', _) => {
233 tokenize_text_buffer(&mut tokens, &mut buffers, i);
234 buffers.1.push(c);
235 }
236 _ => {
237 tokenize_number_buffer(&mut tokens, &mut buffers, i);
238 buffers.0.push(c);
239 }
240 }
241 }
242 match count {
243 (0, 0) => return Err(TokenizationError::NoBraces),
244 (0, _) | (_, 0) => return Err(TokenizationError::FormatNotSupported),
245 (_, _) => (),
246 }
247 tokenize_buffers(&mut tokens, &mut buffers, content.len());
248 Ok(tokens)
249}
250
251#[cfg(test)]
252mod tests {
253 use super::*;
254
255 #[test]
256 fn test_empty_content() {
257 assert_eq!(tokenize(""), Err(TokenizationError::EmptyContent));
258 assert_eq!(
259 tokenize(String::new().as_str()),
260 Err(TokenizationError::EmptyContent)
261 );
262 }
263
264 #[test]
265 fn test_double_dots_noerror() {
266 assert_eq!(
267 tokenize("..{a,b}",),
268 Ok(vec![
269 Token::Text(Arc::new("..".to_string()), 0),
270 Token::OBra(2),
271 Token::Text(Arc::new("a".to_string()), 3),
272 Token::Comma(4),
273 Token::Text(Arc::new("b".to_string()), 5),
274 Token::CBra(6),
275 ])
276 )
277 }
278
279 #[test]
280 fn test_no_braces() {
281 assert_eq!(tokenize("a"), Err(TokenizationError::NoBraces));
282 assert_eq!(tokenize("1..3"), Err(TokenizationError::NoBraces));
283 assert_eq!(tokenize("a,b"), Err(TokenizationError::NoBraces));
284 assert_eq!(
285 tokenize("arst1..3.(arst)xt"),
286 Err(TokenizationError::NoBraces)
287 );
288 }
289
290 #[test]
291 fn test_format_not_supported() {
292 assert_eq!(
293 tokenize("{a, b, c, d"),
294 Err(TokenizationError::FormatNotSupported)
295 );
296 assert_eq!(
297 tokenize("{{a, b, c, d"),
298 Err(TokenizationError::FormatNotSupported)
299 );
300 assert_eq!(
301 tokenize("a, b, c, d}}"),
302 Err(TokenizationError::FormatNotSupported)
303 );
304 assert_eq!(
305 tokenize("a{, b{, c{, d{"),
306 Err(TokenizationError::FormatNotSupported)
307 );
308 }
309
310 #[test]
311 fn test_tokenize_single_brace_expansion() {
312 let content = "A{1..3}";
313 let expected_result: Result<Vec<Token>, TokenizationError> = Ok(vec![
314 Token::Text(Arc::new("A".to_string()), 0),
315 Token::OBra(1),
316 Token::Number(Arc::new("1".to_string()), 2),
317 Token::Range(3),
318 Token::Number(Arc::new("3".to_string()), 5),
319 Token::CBra(6),
320 ]);
321 assert_eq!(tokenize(content), expected_result);
322 let content = "{AB12}";
323 let expected_result: Result<Vec<Token>, TokenizationError> = Ok(vec![
324 Token::OBra(0),
325 Token::Text(Arc::new("AB".to_string()), 1),
326 Token::Number(Arc::new("12".to_string()), 3),
327 Token::CBra(5),
328 ]);
329 assert_eq!(tokenize(content), expected_result);
330 let content = "{12AB}";
331 let expected_result: Result<Vec<Token>, TokenizationError> = Ok(vec![
332 Token::OBra(0),
333 Token::Number(Arc::new("12".to_string()), 1),
334 Token::Text(Arc::new("AB".to_string()), 3),
335 Token::CBra(5),
336 ]);
337 assert_eq!(tokenize(content), expected_result);
338 }
339
340 #[test]
341 fn test_tokenize_multiple_brace_expansions() {
342 let content = "A{1,2}..B{3,4}";
343 let expected_result: Result<Vec<Token>, TokenizationError> = Ok(vec![
344 Token::Text(Arc::new("A".to_string()), 0),
345 Token::OBra(1),
346 Token::Number(Arc::new("1".to_string()), 2),
347 Token::Comma(3),
348 Token::Number(Arc::new("2".to_string()), 4),
349 Token::CBra(5),
350 Token::Text(Arc::new("..".to_string()), 6),
351 Token::Text(Arc::new("B".to_string()), 8),
352 Token::OBra(9),
353 Token::Number(Arc::new("3".to_string()), 10),
354 Token::Comma(11),
355 Token::Number(Arc::new("4".to_string()), 12),
356 Token::CBra(13),
357 ]);
358 assert_eq!(tokenize(content), expected_result);
359 }
360
361 #[test]
362 fn test_tokenize() {
363 assert_eq!(
365 tokenize("{1..3}"),
366 Ok(vec![
367 Token::OBra(0),
368 Token::Number(Arc::new("1".to_owned()), 1),
369 Token::Range(2),
370 Token::Number(Arc::new("3".to_owned()), 4),
371 Token::CBra(5)
372 ])
373 );
374
375 assert_eq!(
377 tokenize("{a,b,c}"),
378 Ok(vec![
379 Token::OBra(0),
380 Token::Text(Arc::new("a".to_owned()), 1),
381 Token::Comma(2),
382 Token::Text(Arc::new("b".to_owned()), 3),
383 Token::Comma(4),
384 Token::Text(Arc::new("c".to_owned()), 5),
385 Token::CBra(6)
386 ])
387 );
388
389 assert_eq!(
391 tokenize("A{1..3}..B{2,5}"),
392 Ok(vec![
393 Token::Text(Arc::new("A".to_owned()), 0),
394 Token::OBra(1),
395 Token::Number(Arc::new("1".to_owned()), 2),
396 Token::Range(3),
397 Token::Number(Arc::new("3".to_owned()), 5),
398 Token::CBra(6),
399 Token::Text(Arc::new("..".to_owned()), 7),
400 Token::Text(Arc::new("B".to_owned()), 9),
401 Token::OBra(10),
402 Token::Number(Arc::new("2".to_owned()), 11),
403 Token::Comma(12),
404 Token::Number(Arc::new("5".to_owned()), 13),
405 Token::CBra(14)
406 ])
407 );
408 }
409
410 #[test]
411 fn test_dots() {
412 assert_eq!(
413 tokenize("{1..3}"),
414 Ok(vec![
415 Token::OBra(0),
416 Token::Number(Arc::new("1".to_owned()), 1),
417 Token::Range(2),
418 Token::Number(Arc::new("3".to_owned()), 4),
419 Token::CBra(5),
420 ])
421 );
422 assert_eq!(
423 tokenize("{1.2.3,b}"),
424 Ok(vec![
425 Token::OBra(0),
426 Token::Number(Arc::new("1".to_owned()), 1),
427 Token::Text(Arc::new(".".to_owned()), 2),
428 Token::Number(Arc::new("2".to_owned()), 3),
429 Token::Text(Arc::new(".".to_owned()), 4),
430 Token::Number(Arc::new("3".to_owned()), 5),
431 Token::Comma(6),
432 Token::Text(Arc::new("b".to_owned()), 7),
433 Token::CBra(8),
434 ])
435 );
436 assert_eq!(
437 tokenize("{a.b.c,d}"),
438 Ok(vec![
439 Token::OBra(0),
440 Token::Text(Arc::new("a.b.c".to_owned()), 1),
441 Token::Comma(6),
442 Token::Text(Arc::new("d".to_owned()), 7),
443 Token::CBra(8),
444 ])
445 );
446 }
447
448 #[test]
449 fn test_numbers_with_proceeding_escapees_are_text_now() {
450 assert_eq!(
451 tokenize("1\\\\{a,b}"),
452 Ok(vec![
453 Token::Text(Arc::new("1\\".into()), 1),
454 Token::OBra(3),
455 Token::Text(Arc::new("a".into()), 4),
456 Token::Comma(5),
457 Token::Text(Arc::new("b".into()), 6),
458 Token::CBra(7),
459 ])
460 );
461 assert_eq!(
462 tokenize("1\\a{b,c}"),
463 Ok(vec![
464 Token::Text(Arc::new("1a".into()), 1),
465 Token::OBra(3),
466 Token::Text(Arc::new("b".into()), 4),
467 Token::Comma(5),
468 Token::Text(Arc::new("c".into()), 6),
469 Token::CBra(7),
470 ])
471 );
472 assert_eq!(
473 tokenize("{1\\2,3\\\\{4\\5,6\\7}}"),
474 Ok(vec![
475 Token::OBra(0),
476 Token::Text(Arc::new("12".into()), 2),
477 Token::Comma(4),
478 Token::Text(Arc::new("3\\".into()), 6),
479 Token::OBra(8),
480 Token::Text(Arc::new("45".into()), 10),
481 Token::Comma(12),
482 Token::Text(Arc::new("67".into()), 14),
483 Token::CBra(16),
484 Token::CBra(17),
485 ])
486 );
487 }
488}