1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
// Copyright (c) 2017 Fabian Schuiki

use moore_common::grind::{Grinder, Lookahead};
use moore_common::errors::DiagBuilder2;
use moore_common::source::*;
use crate::lexer::categorizer::Category;


/// A grinder that bundles up categorized characters into groups and converts
/// spaces and comments into single tokens, dropping any information about their
/// content. String and bit string literals are formed here as well.
pub struct Bundler<T: Grinder> {
	inner: Lookahead<T>,
	src: Source,
}

impl<T: Grinder> Bundler<T> {
	/// Create a new bundler.
	pub fn new<I>(inner: I, src: Source) -> Bundler<T> where I: Into<Lookahead<T>> {
		Bundler { inner: inner.into(), src: src }
	}
}

impl<T> Grinder for Bundler<T> where T: Grinder<Item=Option<(usize, char, u8, Category)>, Error=DiagBuilder2> {
	type Item = Option<Spanned<Bundle>>;
	type Error = DiagBuilder2;

	fn emit(&mut self, err: Self::Error) {
		self.inner.emit(err);
	}

	fn next(&mut self) -> Self::Item {
		let (begin, c, sz, cat) = match self.inner.next() {
			Some(v) => v,
			None => return None,
		};
		let mut sp = Span::new(self.src, begin, begin + sz as usize);

		// Handle single-line comments.
		if c == '-' {
			if let Some((_, '-', _, _)) = *self.inner.lookahead(0) {
				self.inner.next();
				while let &Some((offset, d, sz, _)) = self.inner.lookahead(0) {
					if d == '\n' {
						break;
					} else {
						sp.end = offset + sz as usize;
						self.inner.next();
					}
				}
				return Some(Spanned::new(Bundle::Comment, sp));
			}
		}

		// Handle multi-line comments.
		if c == '/' {
			if let Some((_, '*', _, _)) = *self.inner.lookahead(0) {
				self.inner.next();
				let mut p0 = None;
				let mut p1 = None;
				while let Some((offset, d, sz, _)) = *self.inner.lookahead(0) {
					if p0 == Some('*') && p1 == Some('/') {
						break;
					} else {
						p0 = p1;
						p1 = Some(d);
						sp.end = offset + sz as usize;
						self.inner.next();
					}
				}
				return Some(Spanned::new(Bundle::Comment, sp));
			}
		}

		// Handle bit string literals.
		if c == '\'' {
			if let Some((_, c, _, _)) = *self.inner.lookahead(0) {
				if c != '(' && c != ')' {
					if let Some((offset, '\'', sz, _)) = *self.inner.lookahead(1) {
						self.inner.next();
						self.inner.next();
						sp.end = offset + sz as usize;
						return Some(Spanned::new(Bundle::BitLiteral(c), sp));
					}
				}
			}
		}

		// Handle string literals.
		if c == '"' {
			let mut s = String::new();
			while let Some((offset, d, sz, _)) = self.inner.next() {
				sp.end = offset + sz as usize;
				if d == '"' {
					if let Some((_, '"', _, _)) = *self.inner.lookahead(0) {
						s.push('"');
						self.inner.next();
					} else {
						break;
					}
				} else if d == '\n' {
					self.emit(DiagBuilder2::error("String literal must not contain line breaks.")
						.span(sp.end())
						.add_note("Use string concatenation (e.g. \"abc\" & \"def\") to break strings across lines"));
				} else {
					s.push(d);
				}
			}
			return Some(Spanned::new(Bundle::StringLiteral(s), sp));
		}

		// Handle extended identifiers.
		if c == '\\' {
			let mut s = String::new();
			s.push(c);
			while let Some((offset, d, sz, _)) = self.inner.next() {
				sp.end = offset + sz as usize;
				if d == '\\' {
					s.push('\\');
					if let Some((_, '\\', _, _)) = *self.inner.lookahead(0) {
						self.inner.next();
					} else {
						break;
					}
				} else if d == '\n' {
					self.emit(DiagBuilder2::error("Extended identifier must not contain line breaks.")
						.span(sp.end()));
				} else {
					s.push(d);
				}
			}
			return Some(Spanned::new(Bundle::ExtendedIdent(s), sp));
		}

		// Bundle up the remaining characters.
		match cat {

			// If the character is a letter or digit, aggregate all following
			// characters of the same kind into a string.
			Category::Letter | Category::Digit => {
				let mut s = String::new();
				s.push(c);
				while let &Some((offset, d, sz, c)) = self.inner.lookahead(0) {
					if c == cat {
						s.push(d);
						sp.end = offset + sz as usize;
						self.inner.next();
					} else {
						break;
					}
				}
				Some(Spanned::new(
					match cat {
						Category::Letter => Bundle::Letters(s),
						Category::Digit  => Bundle::Digits(s),
						_ => unreachable!(),
					},
					sp
				))
			}

			// If the character is a space, consume adjacent spaces and emit a
			// token that covers the correct span, but does not contain the
			// spaces themselves.
			Category::Space => {
				while let Some((offset, _, sz, Category::Space)) = *self.inner.lookahead(0) {
					sp.end = offset + sz as usize;
					self.inner.next();
				}
				Some(Spanned::new(Bundle::Space, sp))
			}

			// Emit special characters as 1-char bundles.
			Category::Special => Some(Spanned::new(Bundle::Special(c), sp)),

			// Throw errors for invalid characters.
			Category::Other => {
				self.emit(DiagBuilder2::error(format!("Character `{}` not allowed in VHDL source text", c)).span(sp));
				None
			}
		}
	}
}

/// A bundle of characters. These are the most fundamental groups of characters
/// as per the VHDL standard. Lexical analysis will aggregate one or more of
/// these into more meaningful tokens.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Bundle {
	Letters(String),
	Digits(String),
	Special(char),
	StringLiteral(String),
	BitLiteral(char),
	ExtendedIdent(String),
	Space,
	Comment,
}

impl Bundle {
	/// Check whether the bundle has syntactic significance, i.e. is not a
	/// comment or space.
	pub fn is_significant(&self) -> bool {
		match *self {
			Bundle::Space | Bundle::Comment => false,
			_ => true,
		}
	}
}