1use crate::span::Span;
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum TokenKind {
21 FieldName,
23 Colon,
25 SectionHeader,
28 SectionArg,
30 If,
32 Else,
34 Elif,
36 Value,
39 Comma,
41 LParen,
43 RParen,
45 Not,
47 And,
49 Or,
51 CompOp,
53 Comment,
57 Eof,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum TriviaKind {
64 Whitespace,
66 Newline,
68 Comment,
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75pub struct TriviaPiece {
76 pub kind: TriviaKind,
77 pub span: Span,
78}
79
80#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct Token {
83 pub kind: TokenKind,
84 pub span: Span,
86 pub indent: usize,
89 pub leading_trivia: Vec<TriviaPiece>,
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
100enum LineKind {
101 Blank,
103 Comment,
105 SectionHeader,
107 Conditional,
109 Field,
111 Value,
113}
114
115#[derive(Debug, Clone)]
117struct RawLine {
118 start: usize,
120 end: usize,
122 newline_start: usize,
125 line_end_with_newline: usize,
128 indent: Option<usize>,
131 content_start: usize,
133 kind: LineKind,
135}
136
137const SECTION_KEYWORDS: &[&str] = &[
142 "library",
143 "executable",
144 "test-suite",
145 "benchmark",
146 "flag",
147 "source-repository",
148 "common",
149 "custom-setup",
150 "foreign-library",
151];
152
153const CONDITIONAL_KEYWORDS: &[&str] = &["if", "else", "elif"];
154
155fn is_section_keyword(word: &str) -> bool {
157 SECTION_KEYWORDS
158 .iter()
159 .any(|kw| kw.eq_ignore_ascii_case(word))
160}
161
162fn is_conditional_keyword(word: &str) -> bool {
163 CONDITIONAL_KEYWORDS
164 .iter()
165 .any(|kw| kw.eq_ignore_ascii_case(word))
166}
167
168fn visual_column(source: &[u8], start: usize, end: usize) -> usize {
175 let mut col: usize = 0;
176 for &b in &source[start..end] {
177 if b == b'\t' {
178 col = (col + 8) & !7; } else {
180 col += 1;
181 }
182 }
183 col
184}
185
186fn scan_word(source: &[u8], pos: usize) -> (usize, usize) {
189 let start = pos;
190 let mut i = pos;
191 while i < source.len()
192 && (source[i].is_ascii_alphanumeric() || source[i] == b'-' || source[i] == b'_')
193 {
194 i += 1;
195 }
196 (start, i)
197}
198
199fn skip_hspace(source: &[u8], pos: usize) -> usize {
201 let mut i = pos;
202 while i < source.len() && (source[i] == b' ' || source[i] == b'\t') {
203 i += 1;
204 }
205 i
206}
207
208fn split_lines(source: &str) -> Vec<RawLine> {
214 let bytes = source.as_bytes();
215 let len = bytes.len();
216 let mut lines = Vec::new();
217 let mut pos = 0;
218
219 while pos <= len {
220 let line_start = pos;
221
222 let mut end = pos;
224 while end < len && bytes[end] != b'\n' && bytes[end] != b'\r' {
225 end += 1;
226 }
227 let content_end = end;
228
229 let newline_start = end;
231 if end < len && bytes[end] == b'\r' {
232 end += 1;
233 }
234 if end < len && bytes[end] == b'\n' {
235 end += 1;
236 }
237 let line_end = end;
238
239 let mut first_non_ws = line_start;
241 while first_non_ws < content_end
242 && (bytes[first_non_ws] == b' ' || bytes[first_non_ws] == b'\t')
243 {
244 first_non_ws += 1;
245 }
246
247 let indent = if first_non_ws == content_end {
248 None } else {
250 Some(visual_column(bytes, line_start, first_non_ws))
251 };
252
253 let kind = classify_line(source, first_non_ws, content_end, indent.is_none());
254
255 lines.push(RawLine {
256 start: line_start,
257 end: content_end,
258 newline_start,
259 line_end_with_newline: line_end,
260 indent,
261 content_start: first_non_ws,
262 kind,
263 });
264
265 if line_end == pos {
267 break;
268 }
269 pos = line_end;
270 }
271
272 reclassify_braced_freeform_blocks(&mut lines, source);
277
278 lines
279}
280
281fn reclassify_braced_freeform_blocks(lines: &mut [RawLine], source: &str) {
285 let bytes = source.as_bytes();
286 let mut i = 0;
287 while i < lines.len() {
288 if lines[i].kind == LineKind::Field {
290 let line = &lines[i];
291 let mut check = line.end;
294 while check > line.content_start
295 && (bytes[check - 1] == b' ' || bytes[check - 1] == b'\t')
296 {
297 check -= 1;
298 }
299 if check > line.content_start && bytes[check - 1] == b'{' {
300 i += 1;
304 while i < lines.len() {
305 let inner = &lines[i];
306 let trimmed_start = inner.content_start;
308 let trimmed_end = inner.end;
309 if trimmed_start < trimmed_end
310 && bytes[trimmed_start] == b'}'
311 && is_only_closing_brace(bytes, trimmed_start, trimmed_end)
312 {
313 lines[i].kind = LineKind::Value;
315 i += 1;
316 break;
317 }
318 if inner.kind != LineKind::Blank {
320 lines[i].kind = LineKind::Value;
321 }
322 i += 1;
323 }
324 continue;
325 }
326 }
327 i += 1;
328 }
329}
330
331fn is_only_closing_brace(bytes: &[u8], start: usize, end: usize) -> bool {
334 if start >= end || bytes[start] != b'}' {
335 return false;
336 }
337 for &b in &bytes[start + 1..end] {
338 if b != b' ' && b != b'\t' {
339 return false;
340 }
341 }
342 true
343}
344
345fn classify_line(
347 source: &str,
348 content_start: usize,
349 content_end: usize,
350 is_blank: bool,
351) -> LineKind {
352 if is_blank {
353 return LineKind::Blank;
354 }
355
356 let bytes = source.as_bytes();
357
358 if content_start + 1 < content_end
360 && bytes[content_start] == b'-'
361 && bytes[content_start + 1] == b'-'
362 {
363 return LineKind::Comment;
366 }
367
368 let (word_start, word_end) = scan_word(bytes, content_start);
370 if word_start == word_end {
371 return LineKind::Value;
373 }
374 let word = &source[word_start..word_end];
375
376 if is_section_keyword(word) {
378 if word_end >= content_end {
382 return LineKind::SectionHeader;
384 }
385 let ch = bytes[word_end];
386 if ch == b' ' || ch == b'\t' || ch == b'{' {
387 return LineKind::SectionHeader;
388 }
389 }
390
391 if is_conditional_keyword(word) {
393 let after_word = skip_hspace(bytes, word_end);
394 if after_word >= content_end || bytes[after_word] != b':' {
395 return LineKind::Conditional;
396 }
397 }
398
399 let after_word = skip_hspace(bytes, word_end);
402 if after_word < content_end && bytes[after_word] == b':' {
403 return LineKind::Field;
404 }
405
406 LineKind::Value
408}
409
410pub fn tokenize(source: &str) -> Vec<Token> {
420 let lines = split_lines(source);
421 let mut tokens = Vec::new();
422 let mut pending_trivia: Vec<TriviaPiece> = Vec::new();
423
424 for line in &lines {
425 match line.kind {
426 LineKind::Blank => {
427 if line.start < line.end {
429 pending_trivia.push(TriviaPiece {
430 kind: TriviaKind::Whitespace,
431 span: Span::new(line.start, line.end),
432 });
433 }
434 if line.newline_start < line.line_end_with_newline {
435 pending_trivia.push(TriviaPiece {
436 kind: TriviaKind::Newline,
437 span: Span::new(line.newline_start, line.line_end_with_newline),
438 });
439 }
440 }
441
442 LineKind::Comment => {
443 if line.start < line.content_start {
445 pending_trivia.push(TriviaPiece {
446 kind: TriviaKind::Whitespace,
447 span: Span::new(line.start, line.content_start),
448 });
449 }
450 let comment_span = Span::new(line.content_start, line.end);
452 let trivia = std::mem::take(&mut pending_trivia);
455 tokens.push(Token {
458 kind: TokenKind::Comment,
459 span: comment_span,
460 indent: line.indent.unwrap_or(0),
461 leading_trivia: trivia,
462 });
463 if line.newline_start < line.line_end_with_newline {
465 pending_trivia.push(TriviaPiece {
466 kind: TriviaKind::Newline,
467 span: Span::new(line.newline_start, line.line_end_with_newline),
468 });
469 }
470 }
471
472 LineKind::SectionHeader => {
473 tokenize_section_header(source, line, &mut tokens, &mut pending_trivia);
474 }
475
476 LineKind::Conditional => {
477 tokenize_conditional(source, line, &mut tokens, &mut pending_trivia);
478 }
479
480 LineKind::Field => {
481 tokenize_field(source, line, &mut tokens, &mut pending_trivia);
482 }
483
484 LineKind::Value => {
485 tokenize_value_line(source, line, &mut tokens, &mut pending_trivia);
486 }
487 }
488 }
489
490 let eof_offset = source.len();
492 tokens.push(Token {
493 kind: TokenKind::Eof,
494 span: Span::empty(eof_offset),
495 indent: 0,
496 leading_trivia: std::mem::take(&mut pending_trivia),
497 });
498
499 tokens
500}
501
502fn tokenize_section_header(
504 source: &str,
505 line: &RawLine,
506 tokens: &mut Vec<Token>,
507 pending_trivia: &mut Vec<TriviaPiece>,
508) {
509 let bytes = source.as_bytes();
510
511 if line.start < line.content_start {
513 pending_trivia.push(TriviaPiece {
514 kind: TriviaKind::Whitespace,
515 span: Span::new(line.start, line.content_start),
516 });
517 }
518
519 let (kw_start, kw_end) = scan_word(bytes, line.content_start);
521 tokens.push(Token {
522 kind: TokenKind::SectionHeader,
523 span: Span::new(kw_start, kw_end),
524 indent: line.indent.unwrap_or(0),
525 leading_trivia: std::mem::take(pending_trivia),
526 });
527
528 let mut pos = kw_end;
530 let ws_start = pos;
532 pos = skip_hspace(bytes, pos);
533 if ws_start < pos {
534 pending_trivia.push(TriviaPiece {
535 kind: TriviaKind::Whitespace,
536 span: Span::new(ws_start, pos),
537 });
538 }
539
540 if pos < line.end {
542 let mut arg_end = line.end;
544 while arg_end > pos && (bytes[arg_end - 1] == b' ' || bytes[arg_end - 1] == b'\t') {
545 arg_end -= 1;
546 }
547 if pos < arg_end {
548 tokens.push(Token {
549 kind: TokenKind::SectionArg,
550 span: Span::new(pos, arg_end),
551 indent: visual_column(bytes, line.start, pos),
552 leading_trivia: std::mem::take(pending_trivia),
553 });
554 if arg_end < line.end {
556 pending_trivia.push(TriviaPiece {
557 kind: TriviaKind::Whitespace,
558 span: Span::new(arg_end, line.end),
559 });
560 }
561 }
562 }
563
564 if line.newline_start < line.line_end_with_newline {
566 pending_trivia.push(TriviaPiece {
567 kind: TriviaKind::Newline,
568 span: Span::new(line.newline_start, line.line_end_with_newline),
569 });
570 }
571}
572
573fn tokenize_conditional(
575 source: &str,
576 line: &RawLine,
577 tokens: &mut Vec<Token>,
578 pending_trivia: &mut Vec<TriviaPiece>,
579) {
580 let bytes = source.as_bytes();
581
582 if line.start < line.content_start {
584 pending_trivia.push(TriviaPiece {
585 kind: TriviaKind::Whitespace,
586 span: Span::new(line.start, line.content_start),
587 });
588 }
589
590 let (kw_start, kw_end) = scan_word(bytes, line.content_start);
592 let kw_str = &source[kw_start..kw_end];
593 let kind = if kw_str.eq_ignore_ascii_case("if") {
594 TokenKind::If
595 } else if kw_str.eq_ignore_ascii_case("else") {
596 TokenKind::Else
597 } else {
598 TokenKind::Elif
599 };
600
601 tokens.push(Token {
602 kind,
603 span: Span::new(kw_start, kw_end),
604 indent: line.indent.unwrap_or(0),
605 leading_trivia: std::mem::take(pending_trivia),
606 });
607
608 if kind == TokenKind::If || kind == TokenKind::Elif {
611 tokenize_condition_expr(source, bytes, kw_end, line, tokens, pending_trivia);
612 } else if kind == TokenKind::Else {
613 let after_kw = skip_hspace(bytes, kw_end);
615 if after_kw < line.end {
616 if kw_end < after_kw {
618 pending_trivia.push(TriviaPiece {
619 kind: TriviaKind::Whitespace,
620 span: Span::new(kw_end, after_kw),
621 });
622 }
623 tokens.push(Token {
624 kind: TokenKind::Value,
625 span: Span::new(after_kw, line.end),
626 indent: visual_column(bytes, line.start, after_kw),
627 leading_trivia: std::mem::take(pending_trivia),
628 });
629 }
630 }
631
632 if line.newline_start < line.line_end_with_newline {
634 pending_trivia.push(TriviaPiece {
635 kind: TriviaKind::Newline,
636 span: Span::new(line.newline_start, line.line_end_with_newline),
637 });
638 }
639}
640
641fn tokenize_condition_expr(
646 _source: &str,
647 bytes: &[u8],
648 start: usize,
649 line: &RawLine,
650 tokens: &mut Vec<Token>,
651 pending_trivia: &mut Vec<TriviaPiece>,
652) {
653 let end = line.end;
654 let mut pos = start;
655
656 while pos < end {
657 let b = bytes[pos];
658 match b {
659 b' ' | b'\t' => {
660 let ws_start = pos;
661 pos = skip_hspace(bytes, pos);
662 pending_trivia.push(TriviaPiece {
663 kind: TriviaKind::Whitespace,
664 span: Span::new(ws_start, pos),
665 });
666 }
667 b'(' => {
668 tokens.push(Token {
669 kind: TokenKind::LParen,
670 span: Span::new(pos, pos + 1),
671 indent: visual_column(bytes, line.start, pos),
672 leading_trivia: std::mem::take(pending_trivia),
673 });
674 pos += 1;
675 }
676 b')' => {
677 tokens.push(Token {
678 kind: TokenKind::RParen,
679 span: Span::new(pos, pos + 1),
680 indent: visual_column(bytes, line.start, pos),
681 leading_trivia: std::mem::take(pending_trivia),
682 });
683 pos += 1;
684 }
685 b'!' => {
686 tokens.push(Token {
687 kind: TokenKind::Not,
688 span: Span::new(pos, pos + 1),
689 indent: visual_column(bytes, line.start, pos),
690 leading_trivia: std::mem::take(pending_trivia),
691 });
692 pos += 1;
693 }
694 b'&' => {
695 if pos + 1 < end && bytes[pos + 1] == b'&' {
696 tokens.push(Token {
697 kind: TokenKind::And,
698 span: Span::new(pos, pos + 2),
699 indent: visual_column(bytes, line.start, pos),
700 leading_trivia: std::mem::take(pending_trivia),
701 });
702 pos += 2;
703 } else {
704 tokens.push(Token {
706 kind: TokenKind::Value,
707 span: Span::new(pos, pos + 1),
708 indent: visual_column(bytes, line.start, pos),
709 leading_trivia: std::mem::take(pending_trivia),
710 });
711 pos += 1;
712 }
713 }
714 b'|' => {
715 if pos + 1 < end && bytes[pos + 1] == b'|' {
716 tokens.push(Token {
717 kind: TokenKind::Or,
718 span: Span::new(pos, pos + 2),
719 indent: visual_column(bytes, line.start, pos),
720 leading_trivia: std::mem::take(pending_trivia),
721 });
722 pos += 2;
723 } else {
724 tokens.push(Token {
725 kind: TokenKind::Value,
726 span: Span::new(pos, pos + 1),
727 indent: visual_column(bytes, line.start, pos),
728 leading_trivia: std::mem::take(pending_trivia),
729 });
730 pos += 1;
731 }
732 }
733 b'>' if pos + 1 < end && bytes[pos + 1] == b'=' => {
734 tokens.push(Token {
735 kind: TokenKind::CompOp,
736 span: Span::new(pos, pos + 2),
737 indent: visual_column(bytes, line.start, pos),
738 leading_trivia: std::mem::take(pending_trivia),
739 });
740 pos += 2;
741 }
742 b'<' if pos + 1 < end && bytes[pos + 1] == b'=' => {
743 tokens.push(Token {
744 kind: TokenKind::CompOp,
745 span: Span::new(pos, pos + 2),
746 indent: visual_column(bytes, line.start, pos),
747 leading_trivia: std::mem::take(pending_trivia),
748 });
749 pos += 2;
750 }
751 b'=' => {
752 let len = if pos + 1 < end && bytes[pos + 1] == b'=' {
753 2
754 } else {
755 1
756 };
757 tokens.push(Token {
758 kind: TokenKind::CompOp,
759 span: Span::new(pos, pos + len),
760 indent: visual_column(bytes, line.start, pos),
761 leading_trivia: std::mem::take(pending_trivia),
762 });
763 pos += len;
764 }
765 b'>' => {
766 tokens.push(Token {
767 kind: TokenKind::CompOp,
768 span: Span::new(pos, pos + 1),
769 indent: visual_column(bytes, line.start, pos),
770 leading_trivia: std::mem::take(pending_trivia),
771 });
772 pos += 1;
773 }
774 b'<' => {
775 tokens.push(Token {
776 kind: TokenKind::CompOp,
777 span: Span::new(pos, pos + 1),
778 indent: visual_column(bytes, line.start, pos),
779 leading_trivia: std::mem::take(pending_trivia),
780 });
781 pos += 1;
782 }
783 b',' => {
784 tokens.push(Token {
785 kind: TokenKind::Comma,
786 span: Span::new(pos, pos + 1),
787 indent: visual_column(bytes, line.start, pos),
788 leading_trivia: std::mem::take(pending_trivia),
789 });
790 pos += 1;
791 }
792 b'-' if pos + 1 < end && bytes[pos + 1] == b'-' => {
793 pending_trivia.push(TriviaPiece {
795 kind: TriviaKind::Comment,
796 span: Span::new(pos, end),
797 });
798 pos = end;
799 }
800 _ => {
801 let val_start = pos;
805 pos += 1;
806 while pos < end
807 && !matches!(
808 bytes[pos],
809 b' ' | b'\t' | b'(' | b')' | b'!' | b',' | b'&' | b'|' | b'>' | b'<' | b'='
810 )
811 {
812 pos += 1;
813 }
814 tokens.push(Token {
815 kind: TokenKind::Value,
816 span: Span::new(val_start, pos),
817 indent: visual_column(bytes, line.start, val_start),
818 leading_trivia: std::mem::take(pending_trivia),
819 });
820 }
821 }
822 }
823}
824
825fn tokenize_field(
827 source: &str,
828 line: &RawLine,
829 tokens: &mut Vec<Token>,
830 pending_trivia: &mut Vec<TriviaPiece>,
831) {
832 let bytes = source.as_bytes();
833
834 if line.start < line.content_start {
836 pending_trivia.push(TriviaPiece {
837 kind: TriviaKind::Whitespace,
838 span: Span::new(line.start, line.content_start),
839 });
840 }
841
842 let (name_start, name_end) = scan_word(bytes, line.content_start);
844 tokens.push(Token {
845 kind: TokenKind::FieldName,
846 span: Span::new(name_start, name_end),
847 indent: line.indent.unwrap_or(0),
848 leading_trivia: std::mem::take(pending_trivia),
849 });
850
851 let mut pos = name_end;
853 let ws_start = pos;
854 pos = skip_hspace(bytes, pos);
855 if ws_start < pos {
856 pending_trivia.push(TriviaPiece {
857 kind: TriviaKind::Whitespace,
858 span: Span::new(ws_start, pos),
859 });
860 }
861
862 if pos < line.end && bytes[pos] == b':' {
864 tokens.push(Token {
865 kind: TokenKind::Colon,
866 span: Span::new(pos, pos + 1),
867 indent: visual_column(bytes, line.start, pos),
868 leading_trivia: std::mem::take(pending_trivia),
869 });
870 pos += 1;
871 }
872
873 let ws_start2 = pos;
875 pos = skip_hspace(bytes, pos);
876 if ws_start2 < pos {
877 pending_trivia.push(TriviaPiece {
878 kind: TriviaKind::Whitespace,
879 span: Span::new(ws_start2, pos),
880 });
881 }
882
883 if pos < line.end {
885 let val_end = line.end;
887 tokens.push(Token {
888 kind: TokenKind::Value,
889 span: Span::new(pos, val_end),
890 indent: visual_column(bytes, line.start, pos),
891 leading_trivia: std::mem::take(pending_trivia),
892 });
893 }
894
895 if line.newline_start < line.line_end_with_newline {
897 pending_trivia.push(TriviaPiece {
898 kind: TriviaKind::Newline,
899 span: Span::new(line.newline_start, line.line_end_with_newline),
900 });
901 }
902}
903
904fn tokenize_value_line(
906 source: &str,
907 line: &RawLine,
908 tokens: &mut Vec<Token>,
909 pending_trivia: &mut Vec<TriviaPiece>,
910) {
911 let _ = source;
912
913 if line.start < line.content_start {
915 pending_trivia.push(TriviaPiece {
916 kind: TriviaKind::Whitespace,
917 span: Span::new(line.start, line.content_start),
918 });
919 }
920
921 if line.content_start < line.end {
922 tokens.push(Token {
923 kind: TokenKind::Value,
924 span: Span::new(line.content_start, line.end),
925 indent: line.indent.unwrap_or(0),
926 leading_trivia: std::mem::take(pending_trivia),
927 });
928 }
929
930 if line.newline_start < line.line_end_with_newline {
932 pending_trivia.push(TriviaPiece {
933 kind: TriviaKind::Newline,
934 span: Span::new(line.newline_start, line.line_end_with_newline),
935 });
936 }
937}
938
939#[cfg(test)]
944mod tests {
945 use super::*;
946
947 fn tok_pairs(source: &str) -> Vec<(TokenKind, &str)> {
949 let tokens = tokenize(source);
950 tokens
951 .iter()
952 .map(|t| (t.kind, t.span.slice(source)))
953 .collect()
954 }
955
956 #[test]
957 fn lex_simple_field() {
958 let src = "name: foo\n";
959 let pairs = tok_pairs(src);
960 assert_eq!(
961 pairs,
962 vec![
963 (TokenKind::FieldName, "name"),
964 (TokenKind::Colon, ":"),
965 (TokenKind::Value, "foo"),
966 (TokenKind::Eof, ""),
967 ]
968 );
969 }
970
971 #[test]
972 fn lex_field_with_spaces() {
973 let src = "build-depends: base >=4.14\n";
974 let pairs = tok_pairs(src);
975 assert_eq!(
976 pairs,
977 vec![
978 (TokenKind::FieldName, "build-depends"),
979 (TokenKind::Colon, ":"),
980 (TokenKind::Value, "base >=4.14"),
981 (TokenKind::Eof, ""),
982 ]
983 );
984 }
985
986 #[test]
987 fn lex_section_header_no_arg() {
988 let src = "library\n";
989 let pairs = tok_pairs(src);
990 assert_eq!(
991 pairs,
992 vec![(TokenKind::SectionHeader, "library"), (TokenKind::Eof, ""),]
993 );
994 }
995
996 #[test]
997 fn lex_section_header_with_arg() {
998 let src = "executable my-exe\n";
999 let pairs = tok_pairs(src);
1000 assert_eq!(
1001 pairs,
1002 vec![
1003 (TokenKind::SectionHeader, "executable"),
1004 (TokenKind::SectionArg, "my-exe"),
1005 (TokenKind::Eof, ""),
1006 ]
1007 );
1008 }
1009
1010 #[test]
1011 fn lex_conditional_if() {
1012 let src = " if flag(dev)\n";
1013 let pairs = tok_pairs(src);
1014 assert_eq!(
1015 pairs,
1016 vec![
1017 (TokenKind::If, "if"),
1018 (TokenKind::Value, "flag"),
1019 (TokenKind::LParen, "("),
1020 (TokenKind::Value, "dev"),
1021 (TokenKind::RParen, ")"),
1022 (TokenKind::Eof, ""),
1023 ]
1024 );
1025 }
1026
1027 #[test]
1028 fn lex_conditional_complex() {
1029 let src = " if flag(dev) && !os(windows)\n";
1030 let pairs = tok_pairs(src);
1031 assert_eq!(
1032 pairs,
1033 vec![
1034 (TokenKind::If, "if"),
1035 (TokenKind::Value, "flag"),
1036 (TokenKind::LParen, "("),
1037 (TokenKind::Value, "dev"),
1038 (TokenKind::RParen, ")"),
1039 (TokenKind::And, "&&"),
1040 (TokenKind::Not, "!"),
1041 (TokenKind::Value, "os"),
1042 (TokenKind::LParen, "("),
1043 (TokenKind::Value, "windows"),
1044 (TokenKind::RParen, ")"),
1045 (TokenKind::Eof, ""),
1046 ]
1047 );
1048 }
1049
1050 #[test]
1051 fn lex_else() {
1052 let src = " else\n";
1053 let pairs = tok_pairs(src);
1054 assert_eq!(
1055 pairs,
1056 vec![(TokenKind::Else, "else"), (TokenKind::Eof, ""),]
1057 );
1058 }
1059
1060 #[test]
1061 fn lex_comment_line() {
1062 let src = "-- this is a comment\n";
1063 let pairs = tok_pairs(src);
1064 assert_eq!(
1065 pairs,
1066 vec![
1067 (TokenKind::Comment, "-- this is a comment"),
1068 (TokenKind::Eof, ""),
1069 ]
1070 );
1071 }
1072
1073 #[test]
1074 fn lex_blank_lines() {
1075 let src = "name: foo\n\nversion: 0.1\n";
1076 let tokens = tokenize(src);
1077 let version_tok = tokens
1079 .iter()
1080 .find(|t| t.kind == TokenKind::FieldName && t.span.slice(src) == "version");
1081 assert!(version_tok.is_some());
1082 let trivia_kinds: Vec<_> = version_tok
1083 .unwrap()
1084 .leading_trivia
1085 .iter()
1086 .map(|t| t.kind)
1087 .collect();
1088 assert!(trivia_kinds.contains(&TriviaKind::Newline));
1090 }
1091
1092 #[test]
1093 fn lex_indented_field() {
1094 let src = " exposed-modules: Foo\n";
1095 let pairs = tok_pairs(src);
1096 assert_eq!(
1097 pairs,
1098 vec![
1099 (TokenKind::FieldName, "exposed-modules"),
1100 (TokenKind::Colon, ":"),
1101 (TokenKind::Value, "Foo"),
1102 (TokenKind::Eof, ""),
1103 ]
1104 );
1105 let tokens = tokenize(src);
1107 assert_eq!(tokens[0].indent, 2);
1108 }
1109
1110 #[test]
1111 fn lex_continuation_value() {
1112 let src = " base >=4.14\n";
1113 let pairs = tok_pairs(src);
1114 assert_eq!(
1115 pairs,
1116 vec![(TokenKind::Value, "base >=4.14"), (TokenKind::Eof, ""),]
1117 );
1118 let tokens = tokenize(src);
1119 assert_eq!(tokens[0].indent, 4);
1120 }
1121
1122 #[test]
1123 fn lex_full_span_coverage() {
1124 let src = "name: foo\nversion: 0.1\n";
1125 let tokens = tokenize(src);
1126 let mut covered = vec![false; src.len()];
1128 for tok in &tokens {
1129 for tp in &tok.leading_trivia {
1130 for i in tp.span.start..tp.span.end {
1131 assert!(
1132 !covered[i],
1133 "byte {i} covered twice (trivia on {:?})",
1134 tok.kind
1135 );
1136 covered[i] = true;
1137 }
1138 }
1139 for i in tok.span.start..tok.span.end {
1140 assert!(!covered[i], "byte {i} covered twice (token {:?})", tok.kind);
1141 covered[i] = true;
1142 }
1143 }
1144 for (i, &c) in covered.iter().enumerate() {
1145 assert!(c, "byte {i} ({:?}) not covered", src.as_bytes()[i] as char);
1146 }
1147 }
1148
1149 #[test]
1150 fn lex_impl_condition() {
1151 let src = " if impl(ghc >= 9.6)\n";
1152 let pairs = tok_pairs(src);
1153 assert_eq!(
1154 pairs,
1155 vec![
1156 (TokenKind::If, "if"),
1157 (TokenKind::Value, "impl"),
1158 (TokenKind::LParen, "("),
1159 (TokenKind::Value, "ghc"),
1160 (TokenKind::CompOp, ">="),
1161 (TokenKind::Value, "9.6"),
1162 (TokenKind::RParen, ")"),
1163 (TokenKind::Eof, ""),
1164 ]
1165 );
1166 }
1167
1168 #[test]
1169 fn lex_field_no_value() {
1170 let src = "build-depends:\n";
1171 let pairs = tok_pairs(src);
1172 assert_eq!(
1173 pairs,
1174 vec![
1175 (TokenKind::FieldName, "build-depends"),
1176 (TokenKind::Colon, ":"),
1177 (TokenKind::Eof, ""),
1178 ]
1179 );
1180 }
1181
1182 #[test]
1183 fn lex_import_as_field() {
1184 let src = " import: warnings\n";
1186 let pairs = tok_pairs(src);
1187 assert_eq!(
1188 pairs,
1189 vec![
1190 (TokenKind::FieldName, "import"),
1191 (TokenKind::Colon, ":"),
1192 (TokenKind::Value, "warnings"),
1193 (TokenKind::Eof, ""),
1194 ]
1195 );
1196 }
1197
1198 #[test]
1199 fn lex_tab_indent() {
1200 let src = "\texposed-modules: Foo\n";
1201 let tokens = tokenize(src);
1202 assert_eq!(tokens[0].indent, 8);
1204 }
1205
1206 #[test]
1207 fn lex_no_trailing_newline() {
1208 let src = "name: foo";
1209 let pairs = tok_pairs(src);
1210 assert_eq!(
1211 pairs,
1212 vec![
1213 (TokenKind::FieldName, "name"),
1214 (TokenKind::Colon, ":"),
1215 (TokenKind::Value, "foo"),
1216 (TokenKind::Eof, ""),
1217 ]
1218 );
1219 }
1220
1221 #[test]
1222 fn lex_common_stanza() {
1223 let src = "common warnings\n";
1224 let pairs = tok_pairs(src);
1225 assert_eq!(
1226 pairs,
1227 vec![
1228 (TokenKind::SectionHeader, "common"),
1229 (TokenKind::SectionArg, "warnings"),
1230 (TokenKind::Eof, ""),
1231 ]
1232 );
1233 }
1234
1235 #[test]
1236 fn full_span_coverage_multiline() {
1237 let src = "cabal-version: 3.0\nname: foo\n\n-- A comment\n\nlibrary\n exposed-modules: Foo\n build-depends:\n base >=4.14\n";
1238 let tokens = tokenize(src);
1239 let mut covered = vec![false; src.len()];
1240 for tok in &tokens {
1241 for tp in &tok.leading_trivia {
1242 for i in tp.span.start..tp.span.end {
1243 assert!(!covered[i], "byte {i} covered twice (trivia)");
1244 covered[i] = true;
1245 }
1246 }
1247 for i in tok.span.start..tok.span.end {
1248 assert!(!covered[i], "byte {i} covered twice (token {:?})", tok.kind);
1249 covered[i] = true;
1250 }
1251 }
1252 for (i, &c) in covered.iter().enumerate() {
1253 assert!(c, "byte {i} ({:?}) not covered", src.as_bytes()[i] as char);
1254 }
1255 }
1256}