whitespacesv/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::borrow::Cow;
4use std::collections::VecDeque;
5use std::error::Error;
6use std::fmt::Display;
7use std::iter::Enumerate;
8use std::mem::take;
9use std::str::CharIndices;
10
11const NEWLINE: char = '\u{000A}';
12
13/// Parses the contents of a .wsv (whitespace separated value) file.
14/// The result is either a 2 dimensional vec where the outer layer is
15/// the line and the inner layer is the column or a WSVError. '-' values will be
16/// converted to 'None' and all other values will be 'Some'
17///
18/// For example, given the wsv file:
19/// ```wsv
20/// 1 -
21/// 3 4
22/// ```
23/// the returned value would be [[Some(1), None], [Some(3), Some(4)]]
24///
25/// The source text will be sanitized. That is to say:
26/// 1. All `"/"` escape sequences within quoted strings will be replaced with
27/// `\n` inside the string.
28/// 2. All `""` (two double-quote character) escape sequences within strings
29/// will be replaced with `"` (one double-quote character)
30/// 3. Any wrapping quotes around a string will be removed. Ex. `"hello world!"`
31/// will just be `hello world!` in the output.
32pub fn parse(source_text: &str) -> Result<Vec<Vec<Option<Cow<'_, str>>>>, WSVError> {
33    // Just use the vec default size of 0.
34    parse_with_col_count(source_text, 0)
35}
36
37/// Same as parse (see the documentation there for behavior details),
38/// but accepts an expected column count to avoid unnecessary reallocations
39/// of the Vecs.
40pub fn parse_with_col_count(
41    source_text: &str,
42    col_count: usize,
43) -> Result<Vec<Vec<Option<Cow<'_, str>>>>, WSVError> {
44    let mut result = Vec::new();
45    result.push(Vec::with_capacity(col_count));
46    let mut last_line_num = 0;
47
48    for fallible_token in WSVTokenizer::new(source_text) {
49        let token = fallible_token?;
50        match token {
51            WSVToken::LF => {
52                result.push(Vec::with_capacity(col_count));
53                last_line_num += 1;
54            }
55            WSVToken::Null => {
56                result[last_line_num].push(None);
57            }
58            WSVToken::Value(value) => {
59                result[last_line_num].push(Some(value));
60            }
61            WSVToken::Comment(_) => {}
62        }
63    }
64
65    // We pushed extra vecs on eagerly every time we saw an
66    // LF, so pop the last one if it was empty.
67    if result[last_line_num].len() == 0 {
68        result.pop();
69    }
70
71    Ok(result)
72}
73
74/// Same as parse, (see the documentation there for behavior details),
75/// but parses lazily. The input will be read a single line at a time,
76/// allowing for lazy loading of very large files to be pushed thorugh
77/// this API without issues. If you need to be even lazier (loading the
78/// file token-by-token), use WSVLazyTokenizer directly.
79pub fn parse_lazy<Chars: IntoIterator<Item = char>>(source_text: Chars) -> WSVLineIterator<Chars> {
80    WSVLineIterator::new(source_text)
81}
82
83/// An iterator over the lines of a WSV file. This is used to allow lazy
84/// parsing of files that do not fit into memory.
85pub struct WSVLineIterator<Chars>
86where
87    Chars: IntoIterator<Item = char>,
88{
89    tokenizer: WSVLazyTokenizer<Chars>,
90    lookahead_error: Option<WSVError>,
91    errored: bool,
92    finished: bool,
93}
94
95impl<Chars> WSVLineIterator<Chars>
96where
97    Chars: IntoIterator<Item = char>,
98{
99    fn new(source_text: Chars) -> Self {
100        Self {
101            tokenizer: WSVLazyTokenizer::new(source_text),
102            lookahead_error: None,
103            errored: false,
104            finished: false,
105        }
106    }
107}
108
109impl<Chars> Iterator for WSVLineIterator<Chars>
110where
111    Chars: IntoIterator<Item = char>,
112{
113    type Item = Result<Vec<Option<String>>, WSVError>;
114
115    fn next(&mut self) -> Option<Self::Item> {
116        if self.finished {
117            return None;
118        }
119
120        if let Some(err) = take(&mut self.lookahead_error) {
121            return Some(Err(err));
122        }
123
124        if self.errored {
125            return None;
126        }
127
128        let mut line = Vec::new();
129        loop {
130            let token = self.tokenizer.next();
131            match token {
132                None => {
133                    if line.is_empty() {
134                        return None;
135                    } else {
136                        return Some(Ok(line));
137                    }
138                }
139                Some(token) => match token {
140                    Err(err) => {
141                        self.errored = true;
142                        if line.is_empty() {
143                            return Some(Err(err));
144                        } else {
145                            self.lookahead_error = Some(err);
146                            return Some(Ok(line));
147                        }
148                    }
149                    Ok(token) => match token {
150                        OwnedWSVToken::Comment(_) => {}
151                        OwnedWSVToken::LF => return Some(Ok(line)),
152                        OwnedWSVToken::Null => line.push(None),
153                        OwnedWSVToken::Value(val) => line.push(Some(val)),
154                    },
155                },
156            }
157        }
158    }
159}
160
161/// A struct for writing values to a .wsv file.
162pub struct WSVWriter<OuterIter, InnerIter, BorrowStr>
163where
164    OuterIter: IntoIterator<Item = InnerIter>,
165    InnerIter: IntoIterator<Item = Option<BorrowStr>>,
166    BorrowStr: AsRef<str>,
167{
168    align_columns: ColumnAlignment,
169    values: Enumerate<OuterIter::IntoIter>,
170    current_inner: Option<InnerIter::IntoIter>,
171    lookahead_chars: VecDeque<char>,
172}
173
174impl<OuterIter, InnerIter, BorrowStr> WSVWriter<OuterIter, InnerIter, BorrowStr>
175where
176    OuterIter: Iterator<Item = InnerIter>,
177    InnerIter: IntoIterator<Item = Option<BorrowStr>>,
178    BorrowStr: AsRef<str> + ToString,
179{
180    pub fn new<OuterInto>(values: OuterInto) -> Self
181    where
182        OuterInto: IntoIterator<Item = InnerIter, IntoIter = OuterIter>,
183    {
184        let outer_into = values.into_iter();
185
186        Self {
187            align_columns: ColumnAlignment::default(),
188            values: outer_into.enumerate(),
189            current_inner: None,
190            lookahead_chars: VecDeque::new(),
191        }
192    }
193
194    /// Sets the column alignment of this Writer.
195    /// Note: Left and Right alignments cannot use lazy
196    /// evaluation, so do not set this value if you need
197    /// lazy evaluation.
198    pub fn align_columns(mut self, alignment: ColumnAlignment) -> Self {
199        self.align_columns = alignment;
200        self
201    }
202
203    pub fn to_string(self) -> String {
204        match self.align_columns {
205            ColumnAlignment::Packed => self.collect::<String>(),
206            ColumnAlignment::Left | ColumnAlignment::Right => {
207                let mut max_col_widths = Vec::new();
208
209                let vecs = self
210                    .values
211                    .map(|(line_num, inner)| {
212                        (
213                            line_num,
214                            inner
215                                .into_iter()
216                                .enumerate()
217                                .map(|(index, value)| {
218                                    // Figure out 2 things while consuming the iterators:
219                                    // 1. Whether or not the value needs quotes
220                                    // 2. The length of the string we will be writing
221                                    let mut needs_quotes = false;
222                                    let mut value_len = 0;
223                                    match value.as_ref() {
224                                        None => value_len = 1,
225                                        Some(val) => {
226                                            for ch in val.as_ref().chars() {
227                                                match ch {
228                                                    // account for escape sequences.
229                                                    '\n' => {
230                                                        value_len += 3;
231                                                        needs_quotes = true;
232                                                    }
233                                                    '"' => {
234                                                        value_len += 2;
235                                                        needs_quotes = true;
236                                                    }
237                                                    '#' => {
238                                                        value_len += 1;
239                                                        needs_quotes = true;
240                                                    }
241                                                    ch => {
242                                                        value_len += 1;
243                                                        needs_quotes |= ch == '#'
244                                                            || WSVTokenizer::is_whitespace(ch);
245                                                    }
246                                                }
247                                            }
248                                        }
249                                    }
250
251                                    if needs_quotes {
252                                        value_len += 2;
253                                    }
254                                    match max_col_widths.get_mut(index) {
255                                        None => max_col_widths.push(value_len),
256                                        Some(longest_len) => {
257                                            if value_len > *longest_len {
258                                                *longest_len = value_len
259                                            }
260                                        }
261                                    }
262                                    return (needs_quotes, value_len, value);
263                                })
264                                .collect::<Vec<_>>(),
265                        )
266                    })
267                    .collect::<Vec<_>>();
268
269                let mut result = String::new();
270                for (line_num, line) in vecs {
271                    if line_num != 0 {
272                        result.push('\n');
273                    }
274
275                    for (i, col) in line.into_iter().enumerate() {
276                        if i != 0 {
277                            result.push(' ');
278                        }
279
280                        let value = match col.2.as_ref() {
281                            None => "-",
282                            Some(string) => string.as_ref(),
283                        };
284
285                        if let &ColumnAlignment::Right = &self.align_columns {
286                            for _ in col.1..max_col_widths[i] {
287                                result.push(' ');
288                            }
289                        }
290
291                        if col.0 {
292                            result.push('"');
293                        }
294
295                        for ch in value.chars() {
296                            if ch == '\n' {
297                                result.push('"');
298                                result.push('/');
299                                result.push('"');
300                            } else if ch == '"' {
301                                result.push('"');
302                                result.push('"');
303                            } else {
304                                result.push(ch);
305                            }
306                        }
307
308                        if col.0 {
309                            result.push('"');
310                        }
311
312                        if let &ColumnAlignment::Left = &self.align_columns {
313                            for _ in col.1..max_col_widths[i] {
314                                result.push(' ');
315                            }
316                        }
317                    }
318                }
319
320                result
321            }
322        }
323    }
324}
325
326impl<OuterIter, InnerIter, BorrowStr> Iterator for WSVWriter<OuterIter, InnerIter, BorrowStr>
327where
328    OuterIter: Iterator<Item = InnerIter>,
329    InnerIter: IntoIterator<Item = Option<BorrowStr>>,
330    BorrowStr: AsRef<str> + ToString,
331{
332    type Item = char;
333    fn next(&mut self) -> Option<Self::Item> {
334        loop {
335            if let Some(ch) = self.lookahead_chars.pop_front() {
336                return Some(ch);
337            }
338
339            if let Some(inner_mut) = self.current_inner.as_mut() {
340                match inner_mut.next() {
341                    None => {
342                        self.current_inner = None;
343                    }
344                    Some(next_string_like) => match next_string_like {
345                        None => {
346                            self.lookahead_chars.push_back(' ');
347                            return Some('-');
348                        }
349                        Some(string_like) => {
350                            let mut needs_quotes = false;
351                            for ch in string_like.as_ref().chars() {
352                                match ch {
353                                    '\n' => {
354                                        self.lookahead_chars.push_back('"');
355                                        self.lookahead_chars.push_back('/');
356                                        self.lookahead_chars.push_back('"');
357                                        needs_quotes = true;
358                                    }
359                                    '"' => {
360                                        self.lookahead_chars.push_back('"');
361                                        self.lookahead_chars.push_back('"');
362                                        needs_quotes = true;
363                                    }
364                                    ch => {
365                                        self.lookahead_chars.push_back(ch);
366                                        needs_quotes |=
367                                            ch == '#' || WSVTokenizer::is_whitespace(ch);
368                                    }
369                                }
370                            }
371                            if needs_quotes {
372                                self.lookahead_chars.push_front('"');
373                                self.lookahead_chars.push_back('"');
374                            }
375                            self.lookahead_chars.push_back(' ');
376                            continue;
377                        }
378                    },
379                }
380            }
381
382            match self.values.next() {
383                None => return None,
384                Some((i, inner)) => {
385                    self.current_inner = Some(inner.into_iter());
386                    if i != 0 {
387                        return Some('\n');
388                    }
389                }
390            }
391        }
392    }
393}
394#[derive(Default)]
395pub enum ColumnAlignment {
396    Left,
397    Right,
398    #[default]
399    Packed,
400}
401
402/// A tokenizer for the .wsv (whitespace separated value)
403/// file format. This struct implements Iterator, so to
404/// extract the tokens use your desired iterator method
405/// or a standard for loop.
406pub struct WSVTokenizer<'wsv> {
407    source: &'wsv str,
408    chars: CharIndices<'wsv>,
409    peeked: Option<(usize, char)>,
410    current_location: Location,
411    lookahead_error: Option<WSVError>,
412    errored: bool,
413}
414
415impl<'wsv> WSVTokenizer<'wsv> {
416    /// Creates a .wsv tokenizer from .wsv source text.
417    pub fn new(source_text: &'wsv str) -> Self {
418        Self {
419            source: source_text,
420            chars: source_text.char_indices(),
421            peeked: None,
422            current_location: Location::default(),
423            lookahead_error: None,
424            errored: false,
425        }
426    }
427
428    fn match_string(&mut self) -> Option<Result<WSVToken<'wsv>, WSVError>> {
429        if self.match_char('"').is_none() {
430            return None;
431        }
432        let mut chunks = Vec::with_capacity(1);
433        let mut chunk_start = None;
434        loop {
435            if self.match_char('"').is_some() {
436                if self.match_char('"').is_some() {
437                    // a quote is ascii, so subtracting 1 bytes should always be safe.
438                    let end_location = self.current_location.byte_index - 1;
439                    chunks.push(&self.source[chunk_start.unwrap_or(end_location)..end_location]);
440                    chunk_start = Some(self.current_location.byte_index);
441                } else if self.match_char('/').is_some() {
442                    if self.match_char('"').is_none() {
443                        self.errored = true;
444                        return Some(Err(WSVError {
445                            err_type: WSVErrorType::InvalidStringLineBreak,
446                            location: self.current_location.clone(),
447                        }));
448                    }
449                    let end_index = self.current_location.byte_index - 2;
450                    chunks.push(&self.source[chunk_start.unwrap_or(end_index)..end_index]);
451                    chunks.push("\n");
452                    chunk_start = Some(self.current_location.byte_index + 1);
453                } else {
454                    // a quote is ascii, so subtracting 1 bytes should always be safe.
455                    chunks.push(
456                        &self.source[chunk_start.unwrap_or(self.current_location.byte_index)
457                            ..self.current_location.byte_index],
458                    );
459                    break;
460                }
461            } else if let Some(NEWLINE) = self.peek() {
462                if let Some(NEWLINE) = self.peek() {
463                    self.errored = true;
464                    return Some(Err(WSVError {
465                        err_type: WSVErrorType::StringNotClosed,
466                        location: self.current_location.clone(),
467                    }));
468                }
469            } else if let None = chunk_start {
470                chunk_start = Some(match self.peek_location() {
471                    None => self.source.len(),
472                    Some(val) => val.byte_index,
473                });
474            } else if self.match_char_if(&mut |_| true).is_none() {
475                return Some(Err(WSVError {
476                    err_type: WSVErrorType::StringNotClosed,
477                    location: self.peek_location().into_iter().next().unwrap_or_else(|| {
478                        let mut loc = self.current_location.clone();
479                        loc.byte_index = self.source.len();
480                        return loc;
481                    }),
482                }));
483            }
484        }
485
486        if chunks.len() == 1 {
487            return Some(Ok(WSVToken::Value(Cow::Borrowed(chunks[0]))));
488        } else {
489            return Some(Ok(WSVToken::Value(Cow::Owned(
490                chunks.into_iter().collect::<String>(),
491            ))));
492        }
493    }
494
495    fn match_char_while<F: FnMut(char) -> bool>(&mut self, mut predicate: F) -> Option<&'wsv str> {
496        let mut start = None;
497        loop {
498            match self.match_char_if(&mut predicate) {
499                None => break,
500                Some((index, _)) => {
501                    if let None = start {
502                        start = Some(index);
503                    }
504                }
505            }
506        }
507
508        let start_val = match start {
509            None => return None,
510            Some(val) => val,
511        };
512
513        // Just get the side effect of setting peeked
514        self.peek();
515        let end_val = match self.peeked.as_ref() {
516            None => self.source.len(),
517            Some((index, _)) => *index,
518        };
519
520        return Some(&self.source[start_val..end_val]);
521    }
522
523    fn match_char(&mut self, ch: char) -> Option<(usize, char)> {
524        self.match_char_if(&mut |found_char| ch == found_char)
525    }
526
527    fn match_char_if<F: FnMut(char) -> bool>(
528        &mut self,
529        predicate: &mut F,
530    ) -> Option<(usize, char)> {
531        if let Some(found_char) = self.peek() {
532            if predicate(found_char) {
533                let consumed = take(&mut self.peeked);
534
535                match consumed {
536                    None => {
537                        return None;
538                    }
539                    Some((i, ch)) => {
540                        if ch == NEWLINE {
541                            self.current_location.line += 1;
542                            self.current_location.col = 1;
543                        } else {
544                            self.current_location.col += 1;
545                        }
546                        self.current_location.byte_index = i;
547                    }
548                }
549
550                return consumed.clone();
551            }
552        }
553
554        return None;
555    }
556
557    fn peek_location(&mut self) -> Option<Location> {
558        self.peek_inner();
559        match self.peeked.as_ref() {
560            None => None,
561            Some((i, _)) => {
562                let mut peeked_pos = self.current_location.clone();
563                peeked_pos.col += 1;
564                peeked_pos.byte_index = *i;
565                Some(peeked_pos)
566            }
567        }
568    }
569
570    fn peek(&mut self) -> Option<char> {
571        match self.peek_inner() {
572            None => None,
573            Some(peeked) => Some(peeked.1),
574        }
575    }
576
577    fn peek_inner(&mut self) -> Option<&(usize, char)> {
578        if let None = self.peeked.as_ref() {
579            self.peeked = self.chars.next();
580        }
581        self.peeked.as_ref()
582    }
583
584    fn is_whitespace(ch: char) -> bool {
585        match ch {
586            '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0020}' | '\u{0085}'
587            | '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}'
588            | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}'
589            | '\u{200A}' | '\u{2028}' | '\u{2029}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
590            _ => false,
591        }
592    }
593}
594
595impl<'wsv> Iterator for WSVTokenizer<'wsv> {
596    type Item = Result<WSVToken<'wsv>, WSVError>;
597
598    fn next(&mut self) -> Option<Self::Item> {
599        if self.errored {
600            return None;
601        }
602        if let Some(err) = take(&mut self.lookahead_error) {
603            self.errored = true;
604            return Some(Err(err));
605        }
606        self.match_char_while(|ch| Self::is_whitespace(ch));
607
608        let str = self.match_string();
609        if str.is_some() {
610            let lookahead = self.peek().unwrap_or(' ');
611            if lookahead != NEWLINE && lookahead != '#' && !Self::is_whitespace(lookahead) {
612                self.lookahead_error = Some(WSVError {
613                    location: self.current_location.clone(),
614                    err_type: WSVErrorType::InvalidCharacterAfterString,
615                });
616            }
617            return str;
618        } else if self.match_char('#').is_some() {
619            // Comment
620            return Some(Ok(WSVToken::Comment(
621                self.match_char_while(|ch| ch != NEWLINE).unwrap_or(""),
622            )));
623        } else if self.match_char(NEWLINE).is_some() {
624            return Some(Ok(WSVToken::LF));
625        } else {
626            // Value
627            match self.match_char_while(|ch| {
628                if ch == NEWLINE {
629                    return false;
630                }
631                if ch == '"' {
632                    return false;
633                }
634                if ch == '#' {
635                    return false;
636                }
637                if Self::is_whitespace(ch) {
638                    return false;
639                }
640                return true;
641            }) {
642                Some(str) => {
643                    if str == "-" {
644                        return Some(Ok(WSVToken::Null));
645                    }
646                    if let Some('"') = self.peek() {
647                        self.lookahead_error = Some(WSVError {
648                            location: self.current_location.clone(),
649                            err_type: WSVErrorType::InvalidDoubleQuoteAfterValue,
650                        });
651                    }
652                    return Some(Ok(WSVToken::Value(Cow::Borrowed(str))));
653                }
654                None => None,
655            }
656        }
657    }
658}
659
660/// A lazy tokenizer for the .wsv (whitespace separated
661/// value) file format. This struct implements Iterator,
662/// so to extract the tokens use your desired iterator
663/// method or a standard for loop.
664pub struct WSVLazyTokenizer<Chars: IntoIterator<Item = char>> {
665    source: Chars::IntoIter,
666    peeked: Option<char>,
667    current_location: Location,
668    lookahead_error: Option<WSVError>,
669    errored: bool,
670}
671
672impl<Chars> WSVLazyTokenizer<Chars>
673where
674    Chars: IntoIterator<Item = char>,
675{
676    pub fn new(source_text: Chars) -> Self {
677        Self {
678            source: source_text.into_iter(),
679            peeked: None,
680            current_location: Location::default(),
681            lookahead_error: None,
682            errored: false,
683        }
684    }
685
686    fn match_string(&mut self) -> Option<Result<OwnedWSVToken, WSVError>> {
687        if self.match_char('"').is_none() {
688            return None;
689        }
690        let mut result = String::new();
691        loop {
692            if self.match_char('"').is_some() {
693                if self.match_char('"').is_some() {
694                    // a quote is ascii, so subtracting 1 bytes should always be safe.
695                    result.push('"');
696                } else if self.match_char('/').is_some() {
697                    if self.match_char('"').is_none() {
698                        self.errored = true;
699                        return Some(Err(WSVError {
700                            err_type: WSVErrorType::InvalidStringLineBreak,
701                            location: self.current_location.clone(),
702                        }));
703                    }
704                    result.push('\n');
705                } else {
706                    return Some(Ok(OwnedWSVToken::Value(result)));
707                }
708            } else if let Some(NEWLINE) = self.peek() {
709                if let Some(NEWLINE) = self.peek() {
710                    self.errored = true;
711                    return Some(Err(WSVError {
712                        err_type: WSVErrorType::StringNotClosed,
713                        location: self.current_location.clone(),
714                    }));
715                }
716            } else if let Some(ch) = self.match_char_if(&mut |_| true) {
717                result.push(ch);
718            } else {
719                return Some(Err(WSVError {
720                    err_type: WSVErrorType::StringNotClosed,
721                    location: self
722                        .peek_location()
723                        .into_iter()
724                        .next()
725                        .unwrap_or_else(|| self.current_location.clone()),
726                }));
727            }
728        }
729    }
730
731    fn match_char_while<F: FnMut(char) -> bool>(&mut self, mut predicate: F) -> Option<String> {
732        let mut str = String::new();
733        loop {
734            match self.match_char_if(&mut predicate) {
735                None => break,
736                Some(ch) => {
737                    str.push(ch);
738                }
739            }
740        }
741
742        if str.len() == 0 {
743            return None;
744        } else {
745            return Some(str);
746        }
747    }
748
749    fn match_char(&mut self, ch: char) -> Option<char> {
750        self.match_char_if(&mut |found_char| ch == found_char)
751    }
752
753    fn match_char_if<F: FnMut(char) -> bool>(&mut self, predicate: &mut F) -> Option<char> {
754        if let Some(found_char) = self.peek() {
755            if predicate(found_char) {
756                let consumed = take(&mut self.peeked);
757
758                match consumed {
759                    None => {
760                        return None;
761                    }
762                    Some(ch) => {
763                        if ch == NEWLINE {
764                            self.current_location.line += 1;
765                            self.current_location.col = 1;
766                        } else {
767                            self.current_location.col += 1;
768                        }
769                        return Some(ch);
770                    }
771                }
772            }
773        }
774
775        return None;
776    }
777
778    fn peek_location(&mut self) -> Option<Location> {
779        self.peek_inner();
780        match self.peeked.as_ref() {
781            None => None,
782            Some(_) => {
783                let mut peeked_pos = self.current_location.clone();
784                peeked_pos.col += 1;
785                Some(peeked_pos)
786            }
787        }
788    }
789
790    fn peek(&mut self) -> Option<char> {
791        match self.peek_inner() {
792            None => None,
793            Some(peeked) => Some(*peeked),
794        }
795    }
796
797    fn peek_inner(&mut self) -> Option<&char> {
798        if let None = self.peeked.as_ref() {
799            self.peeked = self.source.next();
800        }
801        self.peeked.as_ref()
802    }
803
804    fn is_whitespace(ch: char) -> bool {
805        match ch {
806            '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0020}' | '\u{0085}'
807            | '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}'
808            | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}'
809            | '\u{200A}' | '\u{2028}' | '\u{2029}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true,
810            _ => false,
811        }
812    }
813}
814
815impl<Chars> Iterator for WSVLazyTokenizer<Chars>
816where
817    Chars: IntoIterator<Item = char>,
818{
819    type Item = Result<OwnedWSVToken, WSVError>;
820    fn next(&mut self) -> Option<Self::Item> {
821        if self.errored {
822            return None;
823        }
824        if let Some(err) = take(&mut self.lookahead_error) {
825            self.errored = true;
826            return Some(Err(err));
827        }
828        self.match_char_while(|ch| Self::is_whitespace(ch));
829
830        let str = self.match_string();
831        if str.is_some() {
832            let lookahead = self.peek().unwrap_or(' ');
833            if lookahead != NEWLINE && lookahead != '#' && !Self::is_whitespace(lookahead) {
834                self.lookahead_error = Some(WSVError {
835                    location: self.current_location.clone(),
836                    err_type: WSVErrorType::InvalidCharacterAfterString,
837                });
838            }
839            return str;
840        } else if self.match_char('#').is_some() {
841            // Comment
842            return Some(Ok(OwnedWSVToken::Comment(
843                self.match_char_while(|ch| ch != NEWLINE)
844                    .unwrap_or_else(|| "".to_string()),
845            )));
846        } else if self.match_char(NEWLINE).is_some() {
847            return Some(Ok(OwnedWSVToken::LF));
848        } else {
849            // Value
850            match self.match_char_while(|ch| {
851                if ch == NEWLINE {
852                    return false;
853                }
854                if ch == '"' {
855                    return false;
856                }
857                if ch == '#' {
858                    return false;
859                }
860                if Self::is_whitespace(ch) {
861                    return false;
862                }
863                return true;
864            }) {
865                Some(str) => {
866                    if str == "-" {
867                        return Some(Ok(OwnedWSVToken::Null));
868                    }
869                    if let Some('"') = self.peek() {
870                        self.lookahead_error = Some(WSVError {
871                            location: self.current_location.clone(),
872                            err_type: WSVErrorType::InvalidDoubleQuoteAfterValue,
873                        });
874                    }
875                    return Some(Ok(OwnedWSVToken::Value(str)));
876                }
877                None => None,
878            }
879        }
880    }
881}
882
883/// A collection of all token types in a WSV file.
884#[derive(Debug, Clone)]
885pub enum WSVToken<'wsv> {
886    /// Represents a line feed character (ex. '\n')
887    LF,
888    /// Represents a null value in the input (ex. '-')
889    Null,
890    /// Represents a non-null value in the input (ex. 'value')
891    Value(Cow<'wsv, str>),
892    /// Represents a comment (ex. '# comment')
893    Comment(&'wsv str),
894}
895
896/// A collection of all token types in a WSV file.
897pub enum OwnedWSVToken {
898    /// Represents a line feed character (ex. '\n')
899    LF,
900    /// Represents a null value in the input (ex. '-')
901    Null,
902    /// Represents a non-null value in the input (ex. 'value')
903    Value(String),
904    /// Represents a comment (ex. '# comment')
905    Comment(String),
906}
907
908/// A struct to represent an error in a WSV file. This contains
909/// both the type of error and location of the error in the source
910/// text.
911#[derive(Debug, Clone)]
912pub struct WSVError {
913    err_type: WSVErrorType,
914    location: Location,
915}
916
917impl WSVError {
918    pub fn err_type(&self) -> WSVErrorType {
919        self.err_type
920    }
921
922    pub fn location(&self) -> Location {
923        self.location.clone()
924    }
925}
926
927impl Display for WSVError {
928    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
929        let mut description = String::new();
930
931        let location = self.location();
932        description.push_str("(line: ");
933        description.push_str(&location.line().to_string());
934        description.push_str(", column: ");
935        description.push_str(&location.col().to_string());
936        description.push_str(") ");
937
938        match self.err_type() {
939            WSVErrorType::InvalidCharacterAfterString => {
940                description.push_str("Invalid Character After String");
941            }
942            WSVErrorType::InvalidDoubleQuoteAfterValue => {
943                description.push_str("Invalid Double Quote After Value");
944            }
945            WSVErrorType::InvalidStringLineBreak => {
946                description.push_str("Invalid String Line Break");
947            }
948            WSVErrorType::StringNotClosed => {
949                description.push_str("String Not Closed");
950            }
951        }
952
953        write!(f, "{}", description)?;
954        Ok(())
955    }
956}
957impl Error for WSVError {}
958
959/// For details on these error types, see the Parser Errors
960/// section of [https://dev.stenway.com/WSV/Specification.html](https://dev.stenway.com/WSV/Specification.html)
961#[derive(Clone, Copy, Debug, PartialEq, Eq)]
962pub enum WSVErrorType {
963    StringNotClosed,
964    InvalidDoubleQuoteAfterValue,
965    InvalidCharacterAfterString,
966    InvalidStringLineBreak,
967}
968
969/// Represents a location in the source text
970#[derive(Debug, Clone)]
971pub struct Location {
972    byte_index: usize,
973    line: usize,
974    col: usize,
975}
976
977impl Location {
978    /// The line number in the source text.
979    pub fn line(&self) -> usize {
980        self.line
981    }
982    /// The column number in the source text.
983    pub fn col(&self) -> usize {
984        self.col
985    }
986}
987
988impl Default for Location {
989    fn default() -> Self {
990        Self {
991            byte_index: 0,
992            line: 1,
993            col: 1,
994        }
995    }
996}
997
998#[cfg(debug_assertions)]
999mod tests {
1000    use crate::{
1001        parse_lazy, OwnedWSVToken, WSVError, WSVErrorType, WSVLazyTokenizer, WSVToken, WSVTokenizer,
1002    };
1003
1004    use super::{parse, WSVWriter};
1005    use std::{borrow::Cow, fmt::write};
1006
1007    #[test]
1008    fn read_and_write() {
1009        let str = include_str!("../tests/1_stenway.com");
1010        let result = parse(str).unwrap();
1011
1012        let result_str = WSVWriter::new(result)
1013            .align_columns(super::ColumnAlignment::Packed)
1014            .to_string();
1015
1016        println!("{}", result_str);
1017    }
1018
1019    #[test]
1020    fn read_and_write_lazy() {
1021        let str = r#"a 	U+0061    61            0061        "Latin Small Letter A"
1022~ 	U+007E    7E            007E        Tilde
1023¥ 	U+00A5    C2_A5         00A5        "Yen Sign"
1024» 	U+00BB    C2_BB         00BB        "Right-Pointing Double Angle Quotation Mark"
1025½ 	U+00BD    C2_BD         00BD        "Vulgar Fraction One Half"
1026¿ 	U+00BF    C2_BF         00BF        "Inverted#Question Mark" # This is a comment
1027ß 	U+00DF    C3_9F         00DF        "Latin Small Letter Sharp S"
1028ä 	U+00E4    C3_A4         00E4        "Latin Small Letter A with Diaeresis"
1029ï 	U+00EF    C3_AF         00EF        "Latin Small Letter I with Diaeresis"
1030œ 	U+0153    C5_93         0153        "Latin Small Ligature Oe"
1031€ 	U+20AC    E2_82_AC      20AC        "Euro Sign"
1032東 	U+6771    E6_9D_B1      6771        "CJK Unified Ideograph-6771"
1033𝄞 	U+1D11E   F0_9D_84_9E   D834_DD1E   "Musical Symbol G Clef"
1034𠀇 	U+20007   F0_A0_80_87   D840_DC07   "CJK Unified Ideograph-20007"
1035-   hyphen    qwro-qweb     -dasbe      "A hyphen character - represents null""#;
1036        let result = parse_lazy(str.chars());
1037
1038        let result = result.map(|line| {
1039            line.unwrap().into_iter().map(|value| {
1040                let mut prefix = "-".to_string();
1041                prefix.push_str(&value.unwrap_or("-".to_string()));
1042                Some(prefix)
1043            })
1044        });
1045
1046        let result_str = WSVWriter::new(result)
1047            .align_columns(super::ColumnAlignment::Packed)
1048            .to_string();
1049
1050        println!("{}", result_str);
1051    }
1052
1053    #[test]
1054    fn e2e_test() {
1055        let str = include_str!("../tests/1_stenway.com");
1056        let result = parse(str);
1057
1058        let assert_matches_expected =
1059            |result: Result<Vec<Vec<Option<Cow<'_, str>>>>, WSVError>| match result {
1060                Err(_) => panic!("Should not have error"),
1061                Ok(values) => {
1062                    let expected = vec![
1063                        vec![
1064                            "a",
1065                            "U+0061",
1066                            "61",
1067                            "0061",
1068                            "Latin Small Letter A",
1069                            "\n\"\"",
1070                        ],
1071                        vec!["~", "U+007E", "7E", "007E", "Tilde"],
1072                        vec!["¥", "U+00A5", "C2_A5", "00A5", "Yen Sign"],
1073                        vec![
1074                            "»",
1075                            "U+00BB",
1076                            "C2_BB",
1077                            "00BB",
1078                            "Right-Pointing Double Angle Quotation Mark",
1079                        ],
1080                        vec!["½", "U+00BD", "C2_BD", "00BD", "Vulgar Fraction One Half"],
1081                        vec!["¿", "U+00BF", "C2_BF", "00BF", "Inverted#Question Mark"],
1082                        vec!["ß", "U+00DF", "C3_9F", "00DF", "Latin Small Letter Sharp S"],
1083                        vec![
1084                            "ä",
1085                            "U+00E4",
1086                            "C3_A4",
1087                            "00E4",
1088                            "Latin Small Letter A with Diaeresis",
1089                        ],
1090                        vec![
1091                            "ï",
1092                            "U+00EF",
1093                            "C3_AF",
1094                            "00EF",
1095                            "Latin Small Letter I with Diaeresis",
1096                        ],
1097                        vec!["œ", "U+0153", "C5_93", "0153", "Latin Small Ligature Oe"],
1098                        vec!["€", "U+20AC", "E2_82_AC", "20AC", "Euro Sign"],
1099                        vec![
1100                            "東",
1101                            "U+6771",
1102                            "E6_9D_B1",
1103                            "6771",
1104                            "CJK Unified Ideograph-6771",
1105                        ],
1106                        vec![
1107                            "𝄞",
1108                            "U+1D11E",
1109                            "F0_9D_84_9E",
1110                            "D834_DD1E",
1111                            "Musical Symbol G Clef",
1112                        ],
1113                        vec![
1114                            "𠀇",
1115                            "U+20007",
1116                            "F0_A0_80_87",
1117                            "D840_DC07",
1118                            "CJK Unified Ideograph-20007",
1119                        ],
1120                        vec![
1121                            "-",
1122                            "hyphen",
1123                            "qwro-qweb",
1124                            "-dasbe",
1125                            "A hyphen character - represents null",
1126                        ],
1127                    ];
1128
1129                    let mut expected_iter = expected.into_iter();
1130                    let mut acutal_iter = values.into_iter();
1131
1132                    loop {
1133                        let expected_line = expected_iter.next();
1134                        let actual_line = acutal_iter.next();
1135
1136                        assert_eq!(
1137                            expected_line.is_some(),
1138                            actual_line.is_some(),
1139                            "Line numbers should match"
1140                        );
1141                        if expected_line.is_none() || actual_line.is_none() {
1142                            break;
1143                        }
1144
1145                        let mut expected_value_iter = expected_line.unwrap().into_iter();
1146                        let mut actual_value_iter = actual_line.unwrap().into_iter();
1147                        loop {
1148                            let expected_value = expected_value_iter.next();
1149                            let actual_value = actual_value_iter.next();
1150
1151                            assert_eq!(
1152                                expected_value.is_some(),
1153                                expected_value.is_some(),
1154                                "Value counts should match"
1155                            );
1156                            if expected_value.is_none() || actual_value.is_none() {
1157                                break;
1158                            }
1159
1160                            if expected_value.unwrap() == "-" {
1161                                assert_eq!(None, actual_value.unwrap(), "'-' should parse to None");
1162                            } else {
1163                                let actual_value = actual_value
1164                                .expect("Actual value to be populated at this poitn.")
1165                                .expect(
1166                                    "actual value should parse to Some() if expected is not '-'",
1167                                );
1168                                let expected = expected_value.as_ref().unwrap();
1169                                let actual = actual_value.as_ref();
1170                                if expected_value.unwrap().to_owned() != actual_value.to_owned() {
1171                                    println!("Mismatch: \nExpected: {expected}\nActual: {actual}");
1172                                    panic!();
1173                                }
1174                            }
1175                        }
1176                    }
1177                }
1178            };
1179
1180        assert_matches_expected(result);
1181
1182        let parsed = parse(str).unwrap();
1183        let written = WSVWriter::new(parsed).to_string();
1184        println!("Writer output: {}", written);
1185        let reparsed = parse(&written);
1186        println!("Reparsed: {:?}", reparsed);
1187        assert_matches_expected(reparsed);
1188    }
1189
1190    #[test]
1191    fn e2e_test_lazy() {
1192        let str = include_str!("../tests/1_stenway.com");
1193        let result = parse_lazy(str.chars())
1194            .map(|line| line.unwrap())
1195            .collect::<Vec<_>>();
1196
1197        let assert_matches_expected = |values: Vec<Vec<Option<String>>>| {
1198            let expected = vec![
1199                vec![
1200                    "a",
1201                    "U+0061",
1202                    "61",
1203                    "0061",
1204                    "Latin Small Letter A",
1205                    "\n\"\"",
1206                ],
1207                vec!["~", "U+007E", "7E", "007E", "Tilde"],
1208                vec!["¥", "U+00A5", "C2_A5", "00A5", "Yen Sign"],
1209                vec![
1210                    "»",
1211                    "U+00BB",
1212                    "C2_BB",
1213                    "00BB",
1214                    "Right-Pointing Double Angle Quotation Mark",
1215                ],
1216                vec!["½", "U+00BD", "C2_BD", "00BD", "Vulgar Fraction One Half"],
1217                vec!["¿", "U+00BF", "C2_BF", "00BF", "Inverted#Question Mark"],
1218                vec!["ß", "U+00DF", "C3_9F", "00DF", "Latin Small Letter Sharp S"],
1219                vec![
1220                    "ä",
1221                    "U+00E4",
1222                    "C3_A4",
1223                    "00E4",
1224                    "Latin Small Letter A with Diaeresis",
1225                ],
1226                vec![
1227                    "ï",
1228                    "U+00EF",
1229                    "C3_AF",
1230                    "00EF",
1231                    "Latin Small Letter I with Diaeresis",
1232                ],
1233                vec!["œ", "U+0153", "C5_93", "0153", "Latin Small Ligature Oe"],
1234                vec!["€", "U+20AC", "E2_82_AC", "20AC", "Euro Sign"],
1235                vec![
1236                    "東",
1237                    "U+6771",
1238                    "E6_9D_B1",
1239                    "6771",
1240                    "CJK Unified Ideograph-6771",
1241                ],
1242                vec![
1243                    "𝄞",
1244                    "U+1D11E",
1245                    "F0_9D_84_9E",
1246                    "D834_DD1E",
1247                    "Musical Symbol G Clef",
1248                ],
1249                vec![
1250                    "𠀇",
1251                    "U+20007",
1252                    "F0_A0_80_87",
1253                    "D840_DC07",
1254                    "CJK Unified Ideograph-20007",
1255                ],
1256                vec![
1257                    "-",
1258                    "hyphen",
1259                    "qwro-qweb",
1260                    "-dasbe",
1261                    "A hyphen character - represents null",
1262                ],
1263            ];
1264
1265            let mut expected_iter = expected.into_iter();
1266            let mut acutal_iter = values.into_iter();
1267
1268            loop {
1269                let expected_line = expected_iter.next();
1270                let actual_line = acutal_iter.next();
1271
1272                assert_eq!(
1273                    expected_line.is_some(),
1274                    actual_line.is_some(),
1275                    "Line numbers should match"
1276                );
1277                if expected_line.is_none() || actual_line.is_none() {
1278                    break;
1279                }
1280
1281                let mut expected_value_iter = expected_line.unwrap().into_iter();
1282                let mut actual_value_iter = actual_line.unwrap().into_iter();
1283                loop {
1284                    let expected_value = expected_value_iter.next();
1285                    let actual_value = actual_value_iter.next();
1286
1287                    assert_eq!(
1288                        expected_value.is_some(),
1289                        expected_value.is_some(),
1290                        "Value counts should match"
1291                    );
1292                    if expected_value.is_none() || actual_value.is_none() {
1293                        break;
1294                    }
1295
1296                    if expected_value.unwrap() == "-" {
1297                        assert_eq!(None, actual_value.unwrap(), "'-' should parse to None");
1298                    } else {
1299                        let actual_value = actual_value
1300                            .expect("Actual value to be populated at this poitn.")
1301                            .expect("actual value should parse to Some() if expected is not '-'");
1302                        assert_eq!(
1303                            expected_value.unwrap().to_owned(),
1304                            actual_value.to_owned(),
1305                            "string values should match"
1306                        );
1307                    }
1308                }
1309            }
1310        };
1311
1312        assert_matches_expected(result);
1313
1314        let parsed = parse(str).unwrap();
1315        let written = WSVWriter::new(parsed).to_string();
1316        let reparsed = parse_lazy(written.chars())
1317            .map(|line| line.unwrap())
1318            .collect();
1319        assert_matches_expected(reparsed);
1320    }
1321
1322    #[test]
1323    fn readme_example_write() {
1324        use std::fs::File;
1325        use std::io::BufReader;
1326        // I recommend you pull in the utf8-chars crate as a dependency if
1327        // you need lazy parsing
1328        use crate::{parse_lazy, WSVWriter};
1329        use utf8_chars::BufReadCharsExt;
1330
1331        let mut reader = BufReader::new(File::open("./my_very_large_file.txt").unwrap());
1332
1333        let chars = reader.chars().map(|ch| ch.unwrap());
1334
1335        let lines_lazy = parse_lazy(chars).map(|line| {
1336            // For this example we will assume we have valid WSV
1337            let sum = line
1338                .unwrap()
1339                .into_iter()
1340                // We're counting None as 0 in my case,
1341                // so flat_map the Nones out.
1342                .flat_map(|opt| opt)
1343                .map(|value| value.parse::<i32>().unwrap_or(0))
1344                .sum::<i32>();
1345
1346            // The writer needs a 2D iterator of Option<String>,
1347            // so wrap the value in a Some and .to_string() it.
1348            // Also wrap in a Vec to make it a 2D iterator
1349            vec![Some(sum.to_string())]
1350        });
1351        // CAREFUL: Don't call .collect() here or we'll run out of memory!
1352
1353        // The WSVWriter when using ColumnAlignment::Packed
1354        // (the default) is also lazy, so we can pass our
1355        // result in directly.
1356        for ch in WSVWriter::new(lines_lazy) {
1357            // Your code to dump the output to a file goes here.
1358            print!("{}", ch);
1359        }
1360    }
1361
1362    #[test]
1363    fn in_and_out_with_cows() {
1364        let str = include_str!("../tests/1_stenway.com");
1365
1366        let values = parse(str).unwrap_or_else(|err| panic!("{:?}", err));
1367        let output = WSVWriter::new(values)
1368            .align_columns(crate::ColumnAlignment::Right)
1369            .to_string();
1370
1371        println!("{}", output);
1372    }
1373
1374    #[test]
1375    fn writing_strings() {
1376        let values = vec![vec![None, Some("test".to_string())]];
1377
1378        let output = WSVWriter::new(values)
1379            .align_columns(crate::ColumnAlignment::Packed)
1380            .to_string();
1381
1382        println!("{}", output);
1383    }
1384
1385    #[test]
1386    fn tokenizes_strings_correctly() {
1387        let input = "\"this is a string\"";
1388        let mut tokenizer = WSVTokenizer::new(input);
1389        assert!(are_equal(
1390            Ok(WSVToken::Value(Cow::Borrowed("this is a string"))),
1391            tokenizer.next().unwrap()
1392        ));
1393        assert!(tokenizer.next().is_none());
1394    }
1395
1396    #[test]
1397    fn tokenizes_string_and_immediate_comment_correctly() {
1398        let input = "somekindofvalue#thenacomment";
1399        let mut tokenizer = WSVTokenizer::new(input);
1400        assert!(are_equal(
1401            Ok(WSVToken::Value(Cow::Borrowed("somekindofvalue"))),
1402            tokenizer.next().unwrap()
1403        ));
1404        assert!(are_equal(
1405            Ok(WSVToken::Comment("thenacomment")),
1406            tokenizer.next().unwrap()
1407        ));
1408    }
1409
1410    #[test]
1411    fn tokenizes_string_and_immediate_comment_correctly_lazily() {
1412        let input = "somekindofvalue#thenacomment";
1413        let mut tokenizer = WSVLazyTokenizer::new(input.chars());
1414        assert!(owned_are_equal(
1415            Ok(OwnedWSVToken::Value("somekindofvalue".to_string())),
1416            tokenizer.next().unwrap()
1417        ));
1418        assert!(owned_are_equal(
1419            Ok(OwnedWSVToken::Comment("thenacomment".to_string())),
1420            tokenizer.next().unwrap()
1421        ));
1422    }
1423
1424    #[test]
1425    fn catches_invalid_line_breaks() {
1426        let input = "\"this is a string with an invalid \"/ line break.\"";
1427        let mut tokenizer = WSVTokenizer::new(input);
1428        if let Err(err) = tokenizer.next().unwrap() {
1429            if let WSVErrorType::InvalidStringLineBreak = err.err_type() {
1430                assert!(tokenizer.next().is_none());
1431                return;
1432            }
1433        }
1434        panic!("Expected to find an InvalidStringLineBreak error");
1435    }
1436
1437    #[test]
1438    fn doesnt_err_on_false_positive_line_breaks() {
1439        let input = "\"string \"\"/\"";
1440        let mut tokenizer = WSVTokenizer::new(input);
1441        let token = tokenizer.next().unwrap();
1442        assert!(are_equal(
1443            Ok(WSVToken::Value(Cow::Owned("string \"/".to_string()))),
1444            token
1445        ));
1446        assert!(tokenizer.next().is_none());
1447    }
1448
1449    #[test]
1450    fn escapes_quotes_correctly() {
1451        let input = "\"\"\"\"\"\"\"\"";
1452        let mut tokenizer = WSVTokenizer::new(input);
1453        assert!(are_equal(
1454            Ok(WSVToken::Value(Cow::Owned("\"\"\"".to_string()))),
1455            tokenizer.next().unwrap()
1456        ));
1457        assert!(tokenizer.next().is_none());
1458    }
1459
1460    #[test]
1461    fn escapes_new_lines_correctly() {
1462        let input = "\"\"/\"\"/\"\"/\"\"";
1463        let mut tokenizer = WSVTokenizer::new(input);
1464        let token = tokenizer.next().unwrap();
1465        println!("{:?}", token);
1466        assert!(are_equal(
1467            Ok(WSVToken::Value(Cow::Owned("\n\n\n".to_string()))),
1468            token
1469        ));
1470    }
1471
1472    #[test]
1473    fn parses_quoted_string_and_immediate_comment_correctly() {
1474        let input = "\"somekindofvalue\"#thenacomment";
1475        let mut tokenizer = WSVTokenizer::new(input);
1476        assert!(are_equal(
1477            Ok(WSVToken::Value(Cow::Borrowed("somekindofvalue"))),
1478            tokenizer.next().unwrap()
1479        ));
1480        assert!(are_equal(
1481            Ok(WSVToken::Comment("thenacomment")),
1482            tokenizer.next().unwrap()
1483        ));
1484    }
1485
1486    #[test]
1487    fn catches_unclosed_string() {
1488        let input = "\"this is an unclosed string";
1489        let mut tokenizer = WSVTokenizer::new(input);
1490        assert!(are_equal(
1491            Err(WSVError {
1492                location: crate::Location::default(),
1493                err_type: WSVErrorType::StringNotClosed
1494            }),
1495            tokenizer.next().unwrap()
1496        ));
1497        assert!(tokenizer.next().is_none());
1498    }
1499
1500    #[test]
1501    fn atrocious_wsv() {
1502        let result = parse(include_str!("../tests/my_test.txt"));
1503        println!("{:?}", result.unwrap());
1504    }
1505
1506    #[allow(dead_code)]
1507    fn are_equal(first: Result<WSVToken, WSVError>, second: Result<WSVToken, WSVError>) -> bool {
1508        match first {
1509            Ok(WSVToken::LF) => {
1510                if let Ok(WSVToken::LF) = second {
1511                    return true;
1512                } else {
1513                    return false;
1514                }
1515            }
1516            Ok(WSVToken::Null) => {
1517                if let Ok(WSVToken::Null) = second {
1518                    return true;
1519                } else {
1520                    return false;
1521                }
1522            }
1523            Ok(WSVToken::Comment(str1)) => {
1524                if let Ok(WSVToken::Comment(str2)) = second {
1525                    return str1 == str2;
1526                } else {
1527                    return false;
1528                }
1529            }
1530            Ok(WSVToken::Value(value1)) => {
1531                if let Ok(WSVToken::Value(value2)) = second {
1532                    return value1.as_ref() == value2.as_ref();
1533                } else {
1534                    return false;
1535                }
1536            }
1537            Err(err1) => {
1538                if let Err(err2) = second {
1539                    return err1.err_type() == err2.err_type();
1540                } else {
1541                    return false;
1542                }
1543            }
1544        }
1545    }
1546
1547    #[allow(dead_code)]
1548    fn owned_are_equal(
1549        first: Result<OwnedWSVToken, WSVError>,
1550        second: Result<OwnedWSVToken, WSVError>,
1551    ) -> bool {
1552        match first {
1553            Ok(OwnedWSVToken::LF) => {
1554                if let Ok(OwnedWSVToken::LF) = second {
1555                    return true;
1556                } else {
1557                    return false;
1558                }
1559            }
1560            Ok(OwnedWSVToken::Null) => {
1561                if let Ok(OwnedWSVToken::Null) = second {
1562                    return true;
1563                } else {
1564                    return false;
1565                }
1566            }
1567            Ok(OwnedWSVToken::Comment(str1)) => {
1568                if let Ok(OwnedWSVToken::Comment(str2)) = second {
1569                    return str1 == str2;
1570                } else {
1571                    return false;
1572                }
1573            }
1574            Ok(OwnedWSVToken::Value(value1)) => {
1575                if let Ok(OwnedWSVToken::Value(value2)) = second {
1576                    return value1 == value2;
1577                } else {
1578                    return false;
1579                }
1580            }
1581            Err(err1) => {
1582                if let Err(err2) = second {
1583                    return err1.err_type() == err2.err_type();
1584                } else {
1585                    return false;
1586                }
1587            }
1588        }
1589    }
1590
1591    #[test]
1592    fn write_really_large_file() {
1593        let values = (0..u32::MAX).map(|_| (0..10).into_iter().map(|val| Some(val.to_string())));
1594        for ch in WSVWriter::new(values) {
1595            print!("{}", ch);
1596            // This is so my computer doesn't fry when running unit tests.
1597            break;
1598        }
1599    }
1600
1601    #[test]
1602    fn lazy_parse_write_example() {
1603        use crate::{parse_lazy, WSVWriter};
1604
1605        // pretend that this input is some iterator over
1606        // all the characters in a 300 Gigabyte file.
1607        let input = String::new();
1608        let chars = input.chars();
1609
1610        let lines = parse_lazy(chars).map(|line| {
1611            // You probably want to handle errors in your case
1612            // unless you are guaranteed to have valid WSV.
1613            let sum = line
1614                .unwrap()
1615                .into_iter()
1616                // We're counting None as 0, so flat_map them out.
1617                .flat_map(|opt| opt)
1618                .map(|value| value.parse::<i32>().unwrap_or(0))
1619                .sum::<i32>();
1620
1621            vec![Some(sum.to_string())]
1622        });
1623
1624        for ch in WSVWriter::new(lines) {
1625            // Your code to dump the output to a file goes here.
1626            print!("{}", ch)
1627        }
1628    }
1629
1630    #[test]
1631    fn error_location_reporting_is_correct() {
1632        let input = r#"some values would go here
1633        and this is a second line,
1634        but the realy error happens
1635"here where the string is unclosed.
1636"#;
1637
1638        for result in WSVLazyTokenizer::new(input.chars()) {
1639            match result {
1640                Ok(_) => {}
1641                Err(err) => {
1642                    assert_eq!(4, err.location().line());
1643                    assert_eq!(36, err.location().col());
1644                }
1645            }
1646        }
1647    }
1648
1649    #[test]
1650    fn jagged_array_no_panic() {
1651        super::WSVWriter::new([vec![Some("1")], vec![Some("3"), None]])
1652            .align_columns(super::ColumnAlignment::Left)
1653            .to_string();
1654    }
1655}