1#![doc = include_str!("../README.md")]
2
3use std::borrow::Cow;
4use std::collections::VecDeque;
5use std::error::Error;
6use std::fmt::Display;
7use std::iter::Enumerate;
8use std::mem::take;
9use std::str::CharIndices;
10
11const NEWLINE: char = '\u{000A}';
12
13pub fn parse(source_text: &str) -> Result<Vec<Vec<Option<Cow<'_, str>>>>, WSVError> {
33 parse_with_col_count(source_text, 0)
35}
36
37pub fn parse_with_col_count(
41 source_text: &str,
42 col_count: usize,
43) -> Result<Vec<Vec<Option<Cow<'_, str>>>>, WSVError> {
44 let mut result = Vec::new();
45 result.push(Vec::with_capacity(col_count));
46 let mut last_line_num = 0;
47
48 for fallible_token in WSVTokenizer::new(source_text) {
49 let token = fallible_token?;
50 match token {
51 WSVToken::LF => {
52 result.push(Vec::with_capacity(col_count));
53 last_line_num += 1;
54 }
55 WSVToken::Null => {
56 result[last_line_num].push(None);
57 }
58 WSVToken::Value(value) => {
59 result[last_line_num].push(Some(value));
60 }
61 WSVToken::Comment(_) => {}
62 }
63 }
64
65 if result[last_line_num].len() == 0 {
68 result.pop();
69 }
70
71 Ok(result)
72}
73
74pub fn parse_lazy<Chars: IntoIterator<Item = char>>(source_text: Chars) -> WSVLineIterator<Chars> {
80 WSVLineIterator::new(source_text)
81}
82
83pub struct WSVLineIterator<Chars>
86where
87 Chars: IntoIterator<Item = char>,
88{
89 tokenizer: WSVLazyTokenizer<Chars>,
90 lookahead_error: Option<WSVError>,
91 errored: bool,
92 finished: bool,
93}
94
95impl<Chars> WSVLineIterator<Chars>
96where
97 Chars: IntoIterator<Item = char>,
98{
99 fn new(source_text: Chars) -> Self {
100 Self {
101 tokenizer: WSVLazyTokenizer::new(source_text),
102 lookahead_error: None,
103 errored: false,
104 finished: false,
105 }
106 }
107}
108
109impl<Chars> Iterator for WSVLineIterator<Chars>
110where
111 Chars: IntoIterator<Item = char>,
112{
113 type Item = Result<Vec<Option<String>>, WSVError>;
114
115 fn next(&mut self) -> Option<Self::Item> {
116 if self.finished {
117 return None;
118 }
119
120 if let Some(err) = take(&mut self.lookahead_error) {
121 return Some(Err(err));
122 }
123
124 if self.errored {
125 return None;
126 }
127
128 let mut line = Vec::new();
129 loop {
130 let token = self.tokenizer.next();
131 match token {
132 None => {
133 if line.is_empty() {
134 return None;
135 } else {
136 return Some(Ok(line));
137 }
138 }
139 Some(token) => match token {
140 Err(err) => {
141 self.errored = true;
142 if line.is_empty() {
143 return Some(Err(err));
144 } else {
145 self.lookahead_error = Some(err);
146 return Some(Ok(line));
147 }
148 }
149 Ok(token) => match token {
150 OwnedWSVToken::Comment(_) => {}
151 OwnedWSVToken::LF => return Some(Ok(line)),
152 OwnedWSVToken::Null => line.push(None),
153 OwnedWSVToken::Value(val) => line.push(Some(val)),
154 },
155 },
156 }
157 }
158 }
159}
160
161pub struct WSVWriter<OuterIter, InnerIter, BorrowStr>
163where
164 OuterIter: IntoIterator<Item = InnerIter>,
165 InnerIter: IntoIterator<Item = Option<BorrowStr>>,
166 BorrowStr: AsRef<str>,
167{
168 align_columns: ColumnAlignment,
169 values: Enumerate<OuterIter::IntoIter>,
170 current_inner: Option<InnerIter::IntoIter>,
171 lookahead_chars: VecDeque<char>,
172}
173
174impl<OuterIter, InnerIter, BorrowStr> WSVWriter<OuterIter, InnerIter, BorrowStr>
175where
176 OuterIter: Iterator<Item = InnerIter>,
177 InnerIter: IntoIterator<Item = Option<BorrowStr>>,
178 BorrowStr: AsRef<str> + ToString,
179{
180 pub fn new<OuterInto>(values: OuterInto) -> Self
181 where
182 OuterInto: IntoIterator<Item = InnerIter, IntoIter = OuterIter>,
183 {
184 let outer_into = values.into_iter();
185
186 Self {
187 align_columns: ColumnAlignment::default(),
188 values: outer_into.enumerate(),
189 current_inner: None,
190 lookahead_chars: VecDeque::new(),
191 }
192 }
193
194 pub fn align_columns(mut self, alignment: ColumnAlignment) -> Self {
199 self.align_columns = alignment;
200 self
201 }
202
203 pub fn to_string(self) -> String {
204 match self.align_columns {
205 ColumnAlignment::Packed => self.collect::<String>(),
206 ColumnAlignment::Left | ColumnAlignment::Right => {
207 let mut max_col_widths = Vec::new();
208
209 let vecs = self
210 .values
211 .map(|(line_num, inner)| {
212 (
213 line_num,
214 inner
215 .into_iter()
216 .enumerate()
217 .map(|(index, value)| {
218 let mut needs_quotes = false;
222 let mut value_len = 0;
223 match value.as_ref() {
224 None => value_len = 1,
225 Some(val) => {
226 for ch in val.as_ref().chars() {
227 match ch {
228 '\n' => {
230 value_len += 3;
231 needs_quotes = true;
232 }
233 '"' => {
234 value_len += 2;
235 needs_quotes = true;
236 }
237 '#' => {
238 value_len += 1;
239 needs_quotes = true;
240 }
241 ch => {
242 value_len += 1;
243 needs_quotes |= ch == '#'
244 || WSVTokenizer::is_whitespace(ch);
245 }
246 }
247 }
248 }
249 }
250
251 if needs_quotes {
252 value_len += 2;
253 }
254 match max_col_widths.get_mut(index) {
255 None => max_col_widths.push(value_len),
256 Some(longest_len) => {
257 if value_len > *longest_len {
258 *longest_len = value_len
259 }
260 }
261 }
262 return (needs_quotes, value_len, value);
263 })
264 .collect::<Vec<_>>(),
265 )
266 })
267 .collect::<Vec<_>>();
268
269 let mut result = String::new();
270 for (line_num, line) in vecs {
271 if line_num != 0 {
272 result.push('\n');
273 }
274
275 for (i, col) in line.into_iter().enumerate() {
276 if i != 0 {
277 result.push(' ');
278 }
279
280 let value = match col.2.as_ref() {
281 None => "-",
282 Some(string) => string.as_ref(),
283 };
284
285 if let &ColumnAlignment::Right = &self.align_columns {
286 for _ in col.1..max_col_widths[i] {
287 result.push(' ');
288 }
289 }
290
291 if col.0 {
292 result.push('"');
293 }
294
295 for ch in value.chars() {
296 if ch == '\n' {
297 result.push('"');
298 result.push('/');
299 result.push('"');
300 } else if ch == '"' {
301 result.push('"');
302 result.push('"');
303 } else {
304 result.push(ch);
305 }
306 }
307
308 if col.0 {
309 result.push('"');
310 }
311
312 if let &ColumnAlignment::Left = &self.align_columns {
313 for _ in col.1..max_col_widths[i] {
314 result.push(' ');
315 }
316 }
317 }
318 }
319
320 result
321 }
322 }
323 }
324}
325
326impl<OuterIter, InnerIter, BorrowStr> Iterator for WSVWriter<OuterIter, InnerIter, BorrowStr>
327where
328 OuterIter: Iterator<Item = InnerIter>,
329 InnerIter: IntoIterator<Item = Option<BorrowStr>>,
330 BorrowStr: AsRef<str> + ToString,
331{
332 type Item = char;
333 fn next(&mut self) -> Option<Self::Item> {
334 loop {
335 if let Some(ch) = self.lookahead_chars.pop_front() {
336 return Some(ch);
337 }
338
339 if let Some(inner_mut) = self.current_inner.as_mut() {
340 match inner_mut.next() {
341 None => {
342 self.current_inner = None;
343 }
344 Some(next_string_like) => match next_string_like {
345 None => {
346 self.lookahead_chars.push_back(' ');
347 return Some('-');
348 }
349 Some(string_like) => {
350 let mut needs_quotes = false;
351 for ch in string_like.as_ref().chars() {
352 match ch {
353 '\n' => {
354 self.lookahead_chars.push_back('"');
355 self.lookahead_chars.push_back('/');
356 self.lookahead_chars.push_back('"');
357 needs_quotes = true;
358 }
359 '"' => {
360 self.lookahead_chars.push_back('"');
361 self.lookahead_chars.push_back('"');
362 needs_quotes = true;
363 }
364 ch => {
365 self.lookahead_chars.push_back(ch);
366 needs_quotes |=
367 ch == '#' || WSVTokenizer::is_whitespace(ch);
368 }
369 }
370 }
371 if needs_quotes {
372 self.lookahead_chars.push_front('"');
373 self.lookahead_chars.push_back('"');
374 }
375 self.lookahead_chars.push_back(' ');
376 continue;
377 }
378 },
379 }
380 }
381
382 match self.values.next() {
383 None => return None,
384 Some((i, inner)) => {
385 self.current_inner = Some(inner.into_iter());
386 if i != 0 {
387 return Some('\n');
388 }
389 }
390 }
391 }
392 }
393}
394#[derive(Default)]
395pub enum ColumnAlignment {
396 Left,
397 Right,
398 #[default]
399 Packed,
400}
401
402pub struct WSVTokenizer<'wsv> {
407 source: &'wsv str,
408 chars: CharIndices<'wsv>,
409 peeked: Option<(usize, char)>,
410 current_location: Location,
411 lookahead_error: Option<WSVError>,
412 errored: bool,
413}
414
415impl<'wsv> WSVTokenizer<'wsv> {
416 pub fn new(source_text: &'wsv str) -> Self {
418 Self {
419 source: source_text,
420 chars: source_text.char_indices(),
421 peeked: None,
422 current_location: Location::default(),
423 lookahead_error: None,
424 errored: false,
425 }
426 }
427
428 fn match_string(&mut self) -> Option<Result<WSVToken<'wsv>, WSVError>> {
429 if self.match_char('"').is_none() {
430 return None;
431 }
432 let mut chunks = Vec::with_capacity(1);
433 let mut chunk_start = None;
434 loop {
435 if self.match_char('"').is_some() {
436 if self.match_char('"').is_some() {
437 let end_location = self.current_location.byte_index - 1;
439 chunks.push(&self.source[chunk_start.unwrap_or(end_location)..end_location]);
440 chunk_start = Some(self.current_location.byte_index);
441 } else if self.match_char('/').is_some() {
442 if self.match_char('"').is_none() {
443 self.errored = true;
444 return Some(Err(WSVError {
445 err_type: WSVErrorType::InvalidStringLineBreak,
446 location: self.current_location.clone(),
447 }));
448 }
449 let end_index = self.current_location.byte_index - 2;
450 chunks.push(&self.source[chunk_start.unwrap_or(end_index)..end_index]);
451 chunks.push("\n");
452 chunk_start = Some(self.current_location.byte_index + 1);
453 } else {
454 chunks.push(
456 &self.source[chunk_start.unwrap_or(self.current_location.byte_index)
457 ..self.current_location.byte_index],
458 );
459 break;
460 }
461 } else if let Some(NEWLINE) = self.peek() {
462 if let Some(NEWLINE) = self.peek() {
463 self.errored = true;
464 return Some(Err(WSVError {
465 err_type: WSVErrorType::StringNotClosed,
466 location: self.current_location.clone(),
467 }));
468 }
469 } else if let None = chunk_start {
470 chunk_start = Some(match self.peek_location() {
471 None => self.source.len(),
472 Some(val) => val.byte_index,
473 });
474 } else if self.match_char_if(&mut |_| true).is_none() {
475 return Some(Err(WSVError {
476 err_type: WSVErrorType::StringNotClosed,
477 location: self.peek_location().into_iter().next().unwrap_or_else(|| {
478 let mut loc = self.current_location.clone();
479 loc.byte_index = self.source.len();
480 return loc;
481 }),
482 }));
483 }
484 }
485
486 if chunks.len() == 1 {
487 return Some(Ok(WSVToken::Value(Cow::Borrowed(chunks[0]))));
488 } else {
489 return Some(Ok(WSVToken::Value(Cow::Owned(
490 chunks.into_iter().collect::<String>(),
491 ))));
492 }
493 }
494
495 fn match_char_while<F: FnMut(char) -> bool>(&mut self, mut predicate: F) -> Option<&'wsv str> {
496 let mut start = None;
497 loop {
498 match self.match_char_if(&mut predicate) {
499 None => break,
500 Some((index, _)) => {
501 if let None = start {
502 start = Some(index);
503 }
504 }
505 }
506 }
507
508 let start_val = match start {
509 None => return None,
510 Some(val) => val,
511 };
512
513 self.peek();
515 let end_val = match self.peeked.as_ref() {
516 None => self.source.len(),
517 Some((index, _)) => *index,
518 };
519
520 return Some(&self.source[start_val..end_val]);
521 }
522
523 fn match_char(&mut self, ch: char) -> Option<(usize, char)> {
524 self.match_char_if(&mut |found_char| ch == found_char)
525 }
526
527 fn match_char_if<F: FnMut(char) -> bool>(
528 &mut self,
529 predicate: &mut F,
530 ) -> Option<(usize, char)> {
531 if let Some(found_char) = self.peek() {
532 if predicate(found_char) {
533 let consumed = take(&mut self.peeked);
534
535 match consumed {
536 None => {
537 return None;
538 }
539 Some((i, ch)) => {
540 if ch == NEWLINE {
541 self.current_location.line += 1;
542 self.current_location.col = 1;
543 } else {
544 self.current_location.col += 1;
545 }
546 self.current_location.byte_index = i;
547 }
548 }
549
550 return consumed.clone();
551 }
552 }
553
554 return None;
555 }
556
557 fn peek_location(&mut self) -> Option<Location> {
558 self.peek_inner();
559 match self.peeked.as_ref() {
560 None => None,
561 Some((i, _)) => {
562 let mut peeked_pos = self.current_location.clone();
563 peeked_pos.col += 1;
564 peeked_pos.byte_index = *i;
565 Some(peeked_pos)
566 }
567 }
568 }
569
570 fn peek(&mut self) -> Option<char> {
571 match self.peek_inner() {
572 None => None,
573 Some(peeked) => Some(peeked.1),
574 }
575 }
576
577 fn peek_inner(&mut self) -> Option<&(usize, char)> {
578 if let None = self.peeked.as_ref() {
579 self.peeked = self.chars.next();
580 }
581 self.peeked.as_ref()
582 }
583
584 fn is_whitespace(ch: char) -> bool {
585 match ch {
586 '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0020}' | '\u{0085}'
587 | '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}'
588 | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}'
589 | '\u{200A}' | '\u{2028}' | '\u{2029}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
590 _ => false,
591 }
592 }
593}
594
595impl<'wsv> Iterator for WSVTokenizer<'wsv> {
596 type Item = Result<WSVToken<'wsv>, WSVError>;
597
598 fn next(&mut self) -> Option<Self::Item> {
599 if self.errored {
600 return None;
601 }
602 if let Some(err) = take(&mut self.lookahead_error) {
603 self.errored = true;
604 return Some(Err(err));
605 }
606 self.match_char_while(|ch| Self::is_whitespace(ch));
607
608 let str = self.match_string();
609 if str.is_some() {
610 let lookahead = self.peek().unwrap_or(' ');
611 if lookahead != NEWLINE && lookahead != '#' && !Self::is_whitespace(lookahead) {
612 self.lookahead_error = Some(WSVError {
613 location: self.current_location.clone(),
614 err_type: WSVErrorType::InvalidCharacterAfterString,
615 });
616 }
617 return str;
618 } else if self.match_char('#').is_some() {
619 return Some(Ok(WSVToken::Comment(
621 self.match_char_while(|ch| ch != NEWLINE).unwrap_or(""),
622 )));
623 } else if self.match_char(NEWLINE).is_some() {
624 return Some(Ok(WSVToken::LF));
625 } else {
626 match self.match_char_while(|ch| {
628 if ch == NEWLINE {
629 return false;
630 }
631 if ch == '"' {
632 return false;
633 }
634 if ch == '#' {
635 return false;
636 }
637 if Self::is_whitespace(ch) {
638 return false;
639 }
640 return true;
641 }) {
642 Some(str) => {
643 if str == "-" {
644 return Some(Ok(WSVToken::Null));
645 }
646 if let Some('"') = self.peek() {
647 self.lookahead_error = Some(WSVError {
648 location: self.current_location.clone(),
649 err_type: WSVErrorType::InvalidDoubleQuoteAfterValue,
650 });
651 }
652 return Some(Ok(WSVToken::Value(Cow::Borrowed(str))));
653 }
654 None => None,
655 }
656 }
657 }
658}
659
660pub struct WSVLazyTokenizer<Chars: IntoIterator<Item = char>> {
665 source: Chars::IntoIter,
666 peeked: Option<char>,
667 current_location: Location,
668 lookahead_error: Option<WSVError>,
669 errored: bool,
670}
671
672impl<Chars> WSVLazyTokenizer<Chars>
673where
674 Chars: IntoIterator<Item = char>,
675{
676 pub fn new(source_text: Chars) -> Self {
677 Self {
678 source: source_text.into_iter(),
679 peeked: None,
680 current_location: Location::default(),
681 lookahead_error: None,
682 errored: false,
683 }
684 }
685
686 fn match_string(&mut self) -> Option<Result<OwnedWSVToken, WSVError>> {
687 if self.match_char('"').is_none() {
688 return None;
689 }
690 let mut result = String::new();
691 loop {
692 if self.match_char('"').is_some() {
693 if self.match_char('"').is_some() {
694 result.push('"');
696 } else if self.match_char('/').is_some() {
697 if self.match_char('"').is_none() {
698 self.errored = true;
699 return Some(Err(WSVError {
700 err_type: WSVErrorType::InvalidStringLineBreak,
701 location: self.current_location.clone(),
702 }));
703 }
704 result.push('\n');
705 } else {
706 return Some(Ok(OwnedWSVToken::Value(result)));
707 }
708 } else if let Some(NEWLINE) = self.peek() {
709 if let Some(NEWLINE) = self.peek() {
710 self.errored = true;
711 return Some(Err(WSVError {
712 err_type: WSVErrorType::StringNotClosed,
713 location: self.current_location.clone(),
714 }));
715 }
716 } else if let Some(ch) = self.match_char_if(&mut |_| true) {
717 result.push(ch);
718 } else {
719 return Some(Err(WSVError {
720 err_type: WSVErrorType::StringNotClosed,
721 location: self
722 .peek_location()
723 .into_iter()
724 .next()
725 .unwrap_or_else(|| self.current_location.clone()),
726 }));
727 }
728 }
729 }
730
731 fn match_char_while<F: FnMut(char) -> bool>(&mut self, mut predicate: F) -> Option<String> {
732 let mut str = String::new();
733 loop {
734 match self.match_char_if(&mut predicate) {
735 None => break,
736 Some(ch) => {
737 str.push(ch);
738 }
739 }
740 }
741
742 if str.len() == 0 {
743 return None;
744 } else {
745 return Some(str);
746 }
747 }
748
749 fn match_char(&mut self, ch: char) -> Option<char> {
750 self.match_char_if(&mut |found_char| ch == found_char)
751 }
752
753 fn match_char_if<F: FnMut(char) -> bool>(&mut self, predicate: &mut F) -> Option<char> {
754 if let Some(found_char) = self.peek() {
755 if predicate(found_char) {
756 let consumed = take(&mut self.peeked);
757
758 match consumed {
759 None => {
760 return None;
761 }
762 Some(ch) => {
763 if ch == NEWLINE {
764 self.current_location.line += 1;
765 self.current_location.col = 1;
766 } else {
767 self.current_location.col += 1;
768 }
769 return Some(ch);
770 }
771 }
772 }
773 }
774
775 return None;
776 }
777
778 fn peek_location(&mut self) -> Option<Location> {
779 self.peek_inner();
780 match self.peeked.as_ref() {
781 None => None,
782 Some(_) => {
783 let mut peeked_pos = self.current_location.clone();
784 peeked_pos.col += 1;
785 Some(peeked_pos)
786 }
787 }
788 }
789
790 fn peek(&mut self) -> Option<char> {
791 match self.peek_inner() {
792 None => None,
793 Some(peeked) => Some(*peeked),
794 }
795 }
796
797 fn peek_inner(&mut self) -> Option<&char> {
798 if let None = self.peeked.as_ref() {
799 self.peeked = self.source.next();
800 }
801 self.peeked.as_ref()
802 }
803
804 fn is_whitespace(ch: char) -> bool {
805 match ch {
806 '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0020}' | '\u{0085}'
807 | '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}'
808 | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}'
809 | '\u{200A}' | '\u{2028}' | '\u{2029}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
810 _ => false,
811 }
812 }
813}
814
815impl<Chars> Iterator for WSVLazyTokenizer<Chars>
816where
817 Chars: IntoIterator<Item = char>,
818{
819 type Item = Result<OwnedWSVToken, WSVError>;
820 fn next(&mut self) -> Option<Self::Item> {
821 if self.errored {
822 return None;
823 }
824 if let Some(err) = take(&mut self.lookahead_error) {
825 self.errored = true;
826 return Some(Err(err));
827 }
828 self.match_char_while(|ch| Self::is_whitespace(ch));
829
830 let str = self.match_string();
831 if str.is_some() {
832 let lookahead = self.peek().unwrap_or(' ');
833 if lookahead != NEWLINE && lookahead != '#' && !Self::is_whitespace(lookahead) {
834 self.lookahead_error = Some(WSVError {
835 location: self.current_location.clone(),
836 err_type: WSVErrorType::InvalidCharacterAfterString,
837 });
838 }
839 return str;
840 } else if self.match_char('#').is_some() {
841 return Some(Ok(OwnedWSVToken::Comment(
843 self.match_char_while(|ch| ch != NEWLINE)
844 .unwrap_or_else(|| "".to_string()),
845 )));
846 } else if self.match_char(NEWLINE).is_some() {
847 return Some(Ok(OwnedWSVToken::LF));
848 } else {
849 match self.match_char_while(|ch| {
851 if ch == NEWLINE {
852 return false;
853 }
854 if ch == '"' {
855 return false;
856 }
857 if ch == '#' {
858 return false;
859 }
860 if Self::is_whitespace(ch) {
861 return false;
862 }
863 return true;
864 }) {
865 Some(str) => {
866 if str == "-" {
867 return Some(Ok(OwnedWSVToken::Null));
868 }
869 if let Some('"') = self.peek() {
870 self.lookahead_error = Some(WSVError {
871 location: self.current_location.clone(),
872 err_type: WSVErrorType::InvalidDoubleQuoteAfterValue,
873 });
874 }
875 return Some(Ok(OwnedWSVToken::Value(str)));
876 }
877 None => None,
878 }
879 }
880 }
881}
882
883#[derive(Debug, Clone)]
885pub enum WSVToken<'wsv> {
886 LF,
888 Null,
890 Value(Cow<'wsv, str>),
892 Comment(&'wsv str),
894}
895
896pub enum OwnedWSVToken {
898 LF,
900 Null,
902 Value(String),
904 Comment(String),
906}
907
908#[derive(Debug, Clone)]
912pub struct WSVError {
913 err_type: WSVErrorType,
914 location: Location,
915}
916
917impl WSVError {
918 pub fn err_type(&self) -> WSVErrorType {
919 self.err_type
920 }
921
922 pub fn location(&self) -> Location {
923 self.location.clone()
924 }
925}
926
927impl Display for WSVError {
928 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
929 let mut description = String::new();
930
931 let location = self.location();
932 description.push_str("(line: ");
933 description.push_str(&location.line().to_string());
934 description.push_str(", column: ");
935 description.push_str(&location.col().to_string());
936 description.push_str(") ");
937
938 match self.err_type() {
939 WSVErrorType::InvalidCharacterAfterString => {
940 description.push_str("Invalid Character After String");
941 }
942 WSVErrorType::InvalidDoubleQuoteAfterValue => {
943 description.push_str("Invalid Double Quote After Value");
944 }
945 WSVErrorType::InvalidStringLineBreak => {
946 description.push_str("Invalid String Line Break");
947 }
948 WSVErrorType::StringNotClosed => {
949 description.push_str("String Not Closed");
950 }
951 }
952
953 write!(f, "{}", description)?;
954 Ok(())
955 }
956}
957impl Error for WSVError {}
958
959#[derive(Clone, Copy, Debug, PartialEq, Eq)]
962pub enum WSVErrorType {
963 StringNotClosed,
964 InvalidDoubleQuoteAfterValue,
965 InvalidCharacterAfterString,
966 InvalidStringLineBreak,
967}
968
969#[derive(Debug, Clone)]
971pub struct Location {
972 byte_index: usize,
973 line: usize,
974 col: usize,
975}
976
977impl Location {
978 pub fn line(&self) -> usize {
980 self.line
981 }
982 pub fn col(&self) -> usize {
984 self.col
985 }
986}
987
988impl Default for Location {
989 fn default() -> Self {
990 Self {
991 byte_index: 0,
992 line: 1,
993 col: 1,
994 }
995 }
996}
997
998#[cfg(debug_assertions)]
999mod tests {
1000 use crate::{
1001 parse_lazy, OwnedWSVToken, WSVError, WSVErrorType, WSVLazyTokenizer, WSVToken, WSVTokenizer,
1002 };
1003
1004 use super::{parse, WSVWriter};
1005 use std::{borrow::Cow, fmt::write};
1006
1007 #[test]
1008 fn read_and_write() {
1009 let str = include_str!("../tests/1_stenway.com");
1010 let result = parse(str).unwrap();
1011
1012 let result_str = WSVWriter::new(result)
1013 .align_columns(super::ColumnAlignment::Packed)
1014 .to_string();
1015
1016 println!("{}", result_str);
1017 }
1018
1019 #[test]
1020 fn read_and_write_lazy() {
1021 let str = r#"a U+0061 61 0061 "Latin Small Letter A"
1022~ U+007E 7E 007E Tilde
1023¥ U+00A5 C2_A5 00A5 "Yen Sign"
1024» U+00BB C2_BB 00BB "Right-Pointing Double Angle Quotation Mark"
1025½ U+00BD C2_BD 00BD "Vulgar Fraction One Half"
1026¿ U+00BF C2_BF 00BF "Inverted#Question Mark" # This is a comment
1027ß U+00DF C3_9F 00DF "Latin Small Letter Sharp S"
1028ä U+00E4 C3_A4 00E4 "Latin Small Letter A with Diaeresis"
1029ï U+00EF C3_AF 00EF "Latin Small Letter I with Diaeresis"
1030œ U+0153 C5_93 0153 "Latin Small Ligature Oe"
1031€ U+20AC E2_82_AC 20AC "Euro Sign"
1032東 U+6771 E6_9D_B1 6771 "CJK Unified Ideograph-6771"
1033𝄞 U+1D11E F0_9D_84_9E D834_DD1E "Musical Symbol G Clef"
1034𠀇 U+20007 F0_A0_80_87 D840_DC07 "CJK Unified Ideograph-20007"
1035- hyphen qwro-qweb -dasbe "A hyphen character - represents null""#;
1036 let result = parse_lazy(str.chars());
1037
1038 let result = result.map(|line| {
1039 line.unwrap().into_iter().map(|value| {
1040 let mut prefix = "-".to_string();
1041 prefix.push_str(&value.unwrap_or("-".to_string()));
1042 Some(prefix)
1043 })
1044 });
1045
1046 let result_str = WSVWriter::new(result)
1047 .align_columns(super::ColumnAlignment::Packed)
1048 .to_string();
1049
1050 println!("{}", result_str);
1051 }
1052
1053 #[test]
1054 fn e2e_test() {
1055 let str = include_str!("../tests/1_stenway.com");
1056 let result = parse(str);
1057
1058 let assert_matches_expected =
1059 |result: Result<Vec<Vec<Option<Cow<'_, str>>>>, WSVError>| match result {
1060 Err(_) => panic!("Should not have error"),
1061 Ok(values) => {
1062 let expected = vec![
1063 vec![
1064 "a",
1065 "U+0061",
1066 "61",
1067 "0061",
1068 "Latin Small Letter A",
1069 "\n\"\"",
1070 ],
1071 vec!["~", "U+007E", "7E", "007E", "Tilde"],
1072 vec!["¥", "U+00A5", "C2_A5", "00A5", "Yen Sign"],
1073 vec![
1074 "»",
1075 "U+00BB",
1076 "C2_BB",
1077 "00BB",
1078 "Right-Pointing Double Angle Quotation Mark",
1079 ],
1080 vec!["½", "U+00BD", "C2_BD", "00BD", "Vulgar Fraction One Half"],
1081 vec!["¿", "U+00BF", "C2_BF", "00BF", "Inverted#Question Mark"],
1082 vec!["ß", "U+00DF", "C3_9F", "00DF", "Latin Small Letter Sharp S"],
1083 vec![
1084 "ä",
1085 "U+00E4",
1086 "C3_A4",
1087 "00E4",
1088 "Latin Small Letter A with Diaeresis",
1089 ],
1090 vec![
1091 "ï",
1092 "U+00EF",
1093 "C3_AF",
1094 "00EF",
1095 "Latin Small Letter I with Diaeresis",
1096 ],
1097 vec!["œ", "U+0153", "C5_93", "0153", "Latin Small Ligature Oe"],
1098 vec!["€", "U+20AC", "E2_82_AC", "20AC", "Euro Sign"],
1099 vec![
1100 "東",
1101 "U+6771",
1102 "E6_9D_B1",
1103 "6771",
1104 "CJK Unified Ideograph-6771",
1105 ],
1106 vec![
1107 "𝄞",
1108 "U+1D11E",
1109 "F0_9D_84_9E",
1110 "D834_DD1E",
1111 "Musical Symbol G Clef",
1112 ],
1113 vec![
1114 "𠀇",
1115 "U+20007",
1116 "F0_A0_80_87",
1117 "D840_DC07",
1118 "CJK Unified Ideograph-20007",
1119 ],
1120 vec![
1121 "-",
1122 "hyphen",
1123 "qwro-qweb",
1124 "-dasbe",
1125 "A hyphen character - represents null",
1126 ],
1127 ];
1128
1129 let mut expected_iter = expected.into_iter();
1130 let mut acutal_iter = values.into_iter();
1131
1132 loop {
1133 let expected_line = expected_iter.next();
1134 let actual_line = acutal_iter.next();
1135
1136 assert_eq!(
1137 expected_line.is_some(),
1138 actual_line.is_some(),
1139 "Line numbers should match"
1140 );
1141 if expected_line.is_none() || actual_line.is_none() {
1142 break;
1143 }
1144
1145 let mut expected_value_iter = expected_line.unwrap().into_iter();
1146 let mut actual_value_iter = actual_line.unwrap().into_iter();
1147 loop {
1148 let expected_value = expected_value_iter.next();
1149 let actual_value = actual_value_iter.next();
1150
1151 assert_eq!(
1152 expected_value.is_some(),
1153 expected_value.is_some(),
1154 "Value counts should match"
1155 );
1156 if expected_value.is_none() || actual_value.is_none() {
1157 break;
1158 }
1159
1160 if expected_value.unwrap() == "-" {
1161 assert_eq!(None, actual_value.unwrap(), "'-' should parse to None");
1162 } else {
1163 let actual_value = actual_value
1164 .expect("Actual value to be populated at this poitn.")
1165 .expect(
1166 "actual value should parse to Some() if expected is not '-'",
1167 );
1168 let expected = expected_value.as_ref().unwrap();
1169 let actual = actual_value.as_ref();
1170 if expected_value.unwrap().to_owned() != actual_value.to_owned() {
1171 println!("Mismatch: \nExpected: {expected}\nActual: {actual}");
1172 panic!();
1173 }
1174 }
1175 }
1176 }
1177 }
1178 };
1179
1180 assert_matches_expected(result);
1181
1182 let parsed = parse(str).unwrap();
1183 let written = WSVWriter::new(parsed).to_string();
1184 println!("Writer output: {}", written);
1185 let reparsed = parse(&written);
1186 println!("Reparsed: {:?}", reparsed);
1187 assert_matches_expected(reparsed);
1188 }
1189
1190 #[test]
1191 fn e2e_test_lazy() {
1192 let str = include_str!("../tests/1_stenway.com");
1193 let result = parse_lazy(str.chars())
1194 .map(|line| line.unwrap())
1195 .collect::<Vec<_>>();
1196
1197 let assert_matches_expected = |values: Vec<Vec<Option<String>>>| {
1198 let expected = vec![
1199 vec![
1200 "a",
1201 "U+0061",
1202 "61",
1203 "0061",
1204 "Latin Small Letter A",
1205 "\n\"\"",
1206 ],
1207 vec!["~", "U+007E", "7E", "007E", "Tilde"],
1208 vec!["¥", "U+00A5", "C2_A5", "00A5", "Yen Sign"],
1209 vec![
1210 "»",
1211 "U+00BB",
1212 "C2_BB",
1213 "00BB",
1214 "Right-Pointing Double Angle Quotation Mark",
1215 ],
1216 vec!["½", "U+00BD", "C2_BD", "00BD", "Vulgar Fraction One Half"],
1217 vec!["¿", "U+00BF", "C2_BF", "00BF", "Inverted#Question Mark"],
1218 vec!["ß", "U+00DF", "C3_9F", "00DF", "Latin Small Letter Sharp S"],
1219 vec![
1220 "ä",
1221 "U+00E4",
1222 "C3_A4",
1223 "00E4",
1224 "Latin Small Letter A with Diaeresis",
1225 ],
1226 vec![
1227 "ï",
1228 "U+00EF",
1229 "C3_AF",
1230 "00EF",
1231 "Latin Small Letter I with Diaeresis",
1232 ],
1233 vec!["œ", "U+0153", "C5_93", "0153", "Latin Small Ligature Oe"],
1234 vec!["€", "U+20AC", "E2_82_AC", "20AC", "Euro Sign"],
1235 vec![
1236 "東",
1237 "U+6771",
1238 "E6_9D_B1",
1239 "6771",
1240 "CJK Unified Ideograph-6771",
1241 ],
1242 vec![
1243 "𝄞",
1244 "U+1D11E",
1245 "F0_9D_84_9E",
1246 "D834_DD1E",
1247 "Musical Symbol G Clef",
1248 ],
1249 vec![
1250 "𠀇",
1251 "U+20007",
1252 "F0_A0_80_87",
1253 "D840_DC07",
1254 "CJK Unified Ideograph-20007",
1255 ],
1256 vec![
1257 "-",
1258 "hyphen",
1259 "qwro-qweb",
1260 "-dasbe",
1261 "A hyphen character - represents null",
1262 ],
1263 ];
1264
1265 let mut expected_iter = expected.into_iter();
1266 let mut acutal_iter = values.into_iter();
1267
1268 loop {
1269 let expected_line = expected_iter.next();
1270 let actual_line = acutal_iter.next();
1271
1272 assert_eq!(
1273 expected_line.is_some(),
1274 actual_line.is_some(),
1275 "Line numbers should match"
1276 );
1277 if expected_line.is_none() || actual_line.is_none() {
1278 break;
1279 }
1280
1281 let mut expected_value_iter = expected_line.unwrap().into_iter();
1282 let mut actual_value_iter = actual_line.unwrap().into_iter();
1283 loop {
1284 let expected_value = expected_value_iter.next();
1285 let actual_value = actual_value_iter.next();
1286
1287 assert_eq!(
1288 expected_value.is_some(),
1289 expected_value.is_some(),
1290 "Value counts should match"
1291 );
1292 if expected_value.is_none() || actual_value.is_none() {
1293 break;
1294 }
1295
1296 if expected_value.unwrap() == "-" {
1297 assert_eq!(None, actual_value.unwrap(), "'-' should parse to None");
1298 } else {
1299 let actual_value = actual_value
1300 .expect("Actual value to be populated at this poitn.")
1301 .expect("actual value should parse to Some() if expected is not '-'");
1302 assert_eq!(
1303 expected_value.unwrap().to_owned(),
1304 actual_value.to_owned(),
1305 "string values should match"
1306 );
1307 }
1308 }
1309 }
1310 };
1311
1312 assert_matches_expected(result);
1313
1314 let parsed = parse(str).unwrap();
1315 let written = WSVWriter::new(parsed).to_string();
1316 let reparsed = parse_lazy(written.chars())
1317 .map(|line| line.unwrap())
1318 .collect();
1319 assert_matches_expected(reparsed);
1320 }
1321
1322 #[test]
1323 fn readme_example_write() {
1324 use std::fs::File;
1325 use std::io::BufReader;
1326 use crate::{parse_lazy, WSVWriter};
1329 use utf8_chars::BufReadCharsExt;
1330
1331 let mut reader = BufReader::new(File::open("./my_very_large_file.txt").unwrap());
1332
1333 let chars = reader.chars().map(|ch| ch.unwrap());
1334
1335 let lines_lazy = parse_lazy(chars).map(|line| {
1336 let sum = line
1338 .unwrap()
1339 .into_iter()
1340 .flat_map(|opt| opt)
1343 .map(|value| value.parse::<i32>().unwrap_or(0))
1344 .sum::<i32>();
1345
1346 vec![Some(sum.to_string())]
1350 });
1351 for ch in WSVWriter::new(lines_lazy) {
1357 print!("{}", ch);
1359 }
1360 }
1361
1362 #[test]
1363 fn in_and_out_with_cows() {
1364 let str = include_str!("../tests/1_stenway.com");
1365
1366 let values = parse(str).unwrap_or_else(|err| panic!("{:?}", err));
1367 let output = WSVWriter::new(values)
1368 .align_columns(crate::ColumnAlignment::Right)
1369 .to_string();
1370
1371 println!("{}", output);
1372 }
1373
1374 #[test]
1375 fn writing_strings() {
1376 let values = vec![vec![None, Some("test".to_string())]];
1377
1378 let output = WSVWriter::new(values)
1379 .align_columns(crate::ColumnAlignment::Packed)
1380 .to_string();
1381
1382 println!("{}", output);
1383 }
1384
1385 #[test]
1386 fn tokenizes_strings_correctly() {
1387 let input = "\"this is a string\"";
1388 let mut tokenizer = WSVTokenizer::new(input);
1389 assert!(are_equal(
1390 Ok(WSVToken::Value(Cow::Borrowed("this is a string"))),
1391 tokenizer.next().unwrap()
1392 ));
1393 assert!(tokenizer.next().is_none());
1394 }
1395
1396 #[test]
1397 fn tokenizes_string_and_immediate_comment_correctly() {
1398 let input = "somekindofvalue#thenacomment";
1399 let mut tokenizer = WSVTokenizer::new(input);
1400 assert!(are_equal(
1401 Ok(WSVToken::Value(Cow::Borrowed("somekindofvalue"))),
1402 tokenizer.next().unwrap()
1403 ));
1404 assert!(are_equal(
1405 Ok(WSVToken::Comment("thenacomment")),
1406 tokenizer.next().unwrap()
1407 ));
1408 }
1409
1410 #[test]
1411 fn tokenizes_string_and_immediate_comment_correctly_lazily() {
1412 let input = "somekindofvalue#thenacomment";
1413 let mut tokenizer = WSVLazyTokenizer::new(input.chars());
1414 assert!(owned_are_equal(
1415 Ok(OwnedWSVToken::Value("somekindofvalue".to_string())),
1416 tokenizer.next().unwrap()
1417 ));
1418 assert!(owned_are_equal(
1419 Ok(OwnedWSVToken::Comment("thenacomment".to_string())),
1420 tokenizer.next().unwrap()
1421 ));
1422 }
1423
1424 #[test]
1425 fn catches_invalid_line_breaks() {
1426 let input = "\"this is a string with an invalid \"/ line break.\"";
1427 let mut tokenizer = WSVTokenizer::new(input);
1428 if let Err(err) = tokenizer.next().unwrap() {
1429 if let WSVErrorType::InvalidStringLineBreak = err.err_type() {
1430 assert!(tokenizer.next().is_none());
1431 return;
1432 }
1433 }
1434 panic!("Expected to find an InvalidStringLineBreak error");
1435 }
1436
1437 #[test]
1438 fn doesnt_err_on_false_positive_line_breaks() {
1439 let input = "\"string \"\"/\"";
1440 let mut tokenizer = WSVTokenizer::new(input);
1441 let token = tokenizer.next().unwrap();
1442 assert!(are_equal(
1443 Ok(WSVToken::Value(Cow::Owned("string \"/".to_string()))),
1444 token
1445 ));
1446 assert!(tokenizer.next().is_none());
1447 }
1448
1449 #[test]
1450 fn escapes_quotes_correctly() {
1451 let input = "\"\"\"\"\"\"\"\"";
1452 let mut tokenizer = WSVTokenizer::new(input);
1453 assert!(are_equal(
1454 Ok(WSVToken::Value(Cow::Owned("\"\"\"".to_string()))),
1455 tokenizer.next().unwrap()
1456 ));
1457 assert!(tokenizer.next().is_none());
1458 }
1459
1460 #[test]
1461 fn escapes_new_lines_correctly() {
1462 let input = "\"\"/\"\"/\"\"/\"\"";
1463 let mut tokenizer = WSVTokenizer::new(input);
1464 let token = tokenizer.next().unwrap();
1465 println!("{:?}", token);
1466 assert!(are_equal(
1467 Ok(WSVToken::Value(Cow::Owned("\n\n\n".to_string()))),
1468 token
1469 ));
1470 }
1471
1472 #[test]
1473 fn parses_quoted_string_and_immediate_comment_correctly() {
1474 let input = "\"somekindofvalue\"#thenacomment";
1475 let mut tokenizer = WSVTokenizer::new(input);
1476 assert!(are_equal(
1477 Ok(WSVToken::Value(Cow::Borrowed("somekindofvalue"))),
1478 tokenizer.next().unwrap()
1479 ));
1480 assert!(are_equal(
1481 Ok(WSVToken::Comment("thenacomment")),
1482 tokenizer.next().unwrap()
1483 ));
1484 }
1485
1486 #[test]
1487 fn catches_unclosed_string() {
1488 let input = "\"this is an unclosed string";
1489 let mut tokenizer = WSVTokenizer::new(input);
1490 assert!(are_equal(
1491 Err(WSVError {
1492 location: crate::Location::default(),
1493 err_type: WSVErrorType::StringNotClosed
1494 }),
1495 tokenizer.next().unwrap()
1496 ));
1497 assert!(tokenizer.next().is_none());
1498 }
1499
1500 #[test]
1501 fn atrocious_wsv() {
1502 let result = parse(include_str!("../tests/my_test.txt"));
1503 println!("{:?}", result.unwrap());
1504 }
1505
1506 #[allow(dead_code)]
1507 fn are_equal(first: Result<WSVToken, WSVError>, second: Result<WSVToken, WSVError>) -> bool {
1508 match first {
1509 Ok(WSVToken::LF) => {
1510 if let Ok(WSVToken::LF) = second {
1511 return true;
1512 } else {
1513 return false;
1514 }
1515 }
1516 Ok(WSVToken::Null) => {
1517 if let Ok(WSVToken::Null) = second {
1518 return true;
1519 } else {
1520 return false;
1521 }
1522 }
1523 Ok(WSVToken::Comment(str1)) => {
1524 if let Ok(WSVToken::Comment(str2)) = second {
1525 return str1 == str2;
1526 } else {
1527 return false;
1528 }
1529 }
1530 Ok(WSVToken::Value(value1)) => {
1531 if let Ok(WSVToken::Value(value2)) = second {
1532 return value1.as_ref() == value2.as_ref();
1533 } else {
1534 return false;
1535 }
1536 }
1537 Err(err1) => {
1538 if let Err(err2) = second {
1539 return err1.err_type() == err2.err_type();
1540 } else {
1541 return false;
1542 }
1543 }
1544 }
1545 }
1546
1547 #[allow(dead_code)]
1548 fn owned_are_equal(
1549 first: Result<OwnedWSVToken, WSVError>,
1550 second: Result<OwnedWSVToken, WSVError>,
1551 ) -> bool {
1552 match first {
1553 Ok(OwnedWSVToken::LF) => {
1554 if let Ok(OwnedWSVToken::LF) = second {
1555 return true;
1556 } else {
1557 return false;
1558 }
1559 }
1560 Ok(OwnedWSVToken::Null) => {
1561 if let Ok(OwnedWSVToken::Null) = second {
1562 return true;
1563 } else {
1564 return false;
1565 }
1566 }
1567 Ok(OwnedWSVToken::Comment(str1)) => {
1568 if let Ok(OwnedWSVToken::Comment(str2)) = second {
1569 return str1 == str2;
1570 } else {
1571 return false;
1572 }
1573 }
1574 Ok(OwnedWSVToken::Value(value1)) => {
1575 if let Ok(OwnedWSVToken::Value(value2)) = second {
1576 return value1 == value2;
1577 } else {
1578 return false;
1579 }
1580 }
1581 Err(err1) => {
1582 if let Err(err2) = second {
1583 return err1.err_type() == err2.err_type();
1584 } else {
1585 return false;
1586 }
1587 }
1588 }
1589 }
1590
1591 #[test]
1592 fn write_really_large_file() {
1593 let values = (0..u32::MAX).map(|_| (0..10).into_iter().map(|val| Some(val.to_string())));
1594 for ch in WSVWriter::new(values) {
1595 print!("{}", ch);
1596 break;
1598 }
1599 }
1600
1601 #[test]
1602 fn lazy_parse_write_example() {
1603 use crate::{parse_lazy, WSVWriter};
1604
1605 let input = String::new();
1608 let chars = input.chars();
1609
1610 let lines = parse_lazy(chars).map(|line| {
1611 let sum = line
1614 .unwrap()
1615 .into_iter()
1616 .flat_map(|opt| opt)
1618 .map(|value| value.parse::<i32>().unwrap_or(0))
1619 .sum::<i32>();
1620
1621 vec![Some(sum.to_string())]
1622 });
1623
1624 for ch in WSVWriter::new(lines) {
1625 print!("{}", ch)
1627 }
1628 }
1629
1630 #[test]
1631 fn error_location_reporting_is_correct() {
1632 let input = r#"some values would go here
1633 and this is a second line,
1634 but the realy error happens
1635"here where the string is unclosed.
1636"#;
1637
1638 for result in WSVLazyTokenizer::new(input.chars()) {
1639 match result {
1640 Ok(_) => {}
1641 Err(err) => {
1642 assert_eq!(4, err.location().line());
1643 assert_eq!(36, err.location().col());
1644 }
1645 }
1646 }
1647 }
1648
1649 #[test]
1650 fn jagged_array_no_panic() {
1651 super::WSVWriter::new([vec![Some("1")], vec![Some("3"), None]])
1652 .align_columns(super::ColumnAlignment::Left)
1653 .to_string();
1654 }
1655}