1use std::{
36 borrow::Cow,
37 fmt::{Debug, Display, Formatter},
38 iter::{FusedIterator, Peekable, Take},
39 slice::Iter,
40 str::Chars,
41};
42
43use crate::{
44 arena::{Entry, Id, Identifiable},
45 format::SnippetFormatter,
46 lexis::{
47 cursor::TokenBufferCursor,
48 session::{BufferLexisSession, Cursor},
49 ByteIndex,
50 Chunk,
51 Length,
52 LineIndex,
53 Site,
54 SourceCode,
55 ToSpan,
56 Token,
57 TokenCount,
58 CHUNK_SIZE,
59 },
60 report::{ld_assert, ld_unreachable},
61};
62
63pub struct TokenBuffer<T: Token> {
82 id: Id,
83 pub(crate) tokens: Vec<T>,
84 pub(super) sites: Vec<Site>,
85 pub(crate) spans: Vec<Length>,
86 pub(crate) indices: Vec<ByteIndex>,
87 pub(crate) text: String,
88 pub(crate) lines: LineIndex,
89}
90
91impl<T: Token> Debug for TokenBuffer<T> {
92 #[inline]
93 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
94 formatter
95 .debug_struct("TokenBuffer")
96 .field("id", &self.id)
97 .field("length", &self.length())
98 .finish_non_exhaustive()
99 }
100}
101
102impl<T: Token> Display for TokenBuffer<T> {
103 #[inline(always)]
104 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
105 formatter
106 .snippet(self)
107 .set_caption(format!("TokenBuffer({})", self.id))
108 .finish()
109 }
110}
111
112impl<T: Token, S: AsRef<str>> From<S> for TokenBuffer<T> {
113 #[inline(always)]
114 fn from(string: S) -> Self {
115 let string = string.as_ref();
116
117 let token_capacity = (string.len() / CHUNK_SIZE + 1).next_power_of_two();
118
119 let mut buffer = TokenBuffer::with_capacity(token_capacity, string.len());
120
121 buffer.append(string);
122
123 buffer
124 }
125}
126
127impl<T: Token> Drop for TokenBuffer<T> {
128 fn drop(&mut self) {
129 self.id.clear_name();
130 }
131}
132
133impl<T: Token> Identifiable for TokenBuffer<T> {
134 #[inline(always)]
135 fn id(&self) -> Id {
136 self.id
137 }
138}
139
140impl<T: Token> SourceCode for TokenBuffer<T> {
141 type Token = T;
142
143 type Cursor<'code> = TokenBufferCursor<'code, Self::Token>;
144
145 type CharIterator<'code> = Take<Chars<'code>>;
146
147 fn chars(&self, span: impl ToSpan) -> Self::CharIterator<'_> {
148 let span = match span.to_site_span(self) {
149 None => panic!("Specified span is invalid."),
150 Some(span) => span,
151 };
152
153 let span_length = span.end - span.start;
154
155 if span_length == 0 {
156 return "".chars().take(0);
157 };
158
159 let rest = match self.search(span.start) {
160 Ok(byte_index) => {
161 ld_assert!(byte_index < self.text.len(), "Byte index out of bounds.");
162
163 unsafe { self.text.get_unchecked(byte_index..) }.chars()
164 }
165
166 Err((byte_index, mut remaining)) => {
167 ld_assert!(byte_index < self.text.len(), "Byte index out of bounds.");
168
169 let mut rest = unsafe { self.text.get_unchecked(byte_index..) }.chars();
170
171 while remaining > 0 {
172 remaining -= 1;
173 let _ = rest.next();
174 }
175
176 rest
177 }
178 };
179
180 rest.take(span_length)
181 }
182
183 fn substring(&self, span: impl ToSpan) -> Cow<str> {
184 let span = match span.to_site_span(self) {
185 None => panic!("Specified span is invalid."),
186 Some(span) => span,
187 };
188
189 let span_length = span.end - span.start;
190
191 if span_length == 0 {
192 return Cow::from("");
193 };
194
195 let start = match self.search(span.start) {
196 Ok(byte_index) => byte_index,
197
198 Err((byte_index, remaining)) => {
199 ld_assert!(byte_index < self.text.len(), "Byte index out of bounds.");
200
201 let rest = unsafe { self.text.get_unchecked(byte_index..) }.char_indices();
202
203 let Some((offset, _)) = rest.take(remaining + 1).last() else {
204 unsafe { ld_unreachable!("Empty tail.") };
205 };
206
207 byte_index + offset
208 }
209 };
210
211 let end = match self.search(span.end) {
212 Ok(byte_index) => byte_index,
213
214 Err((byte_index, remaining)) => {
215 ld_assert!(byte_index < self.text.len(), "Byte index out of bounds.");
216
217 let rest = unsafe { self.text.get_unchecked(byte_index..) }.char_indices();
218
219 let Some((offset, _)) = rest.take(remaining + 1).last() else {
220 unsafe { ld_unreachable!("Empty tail.") };
221 };
222
223 byte_index + offset
224 }
225 };
226
227 ld_assert!(start <= end, "Invalid byte bounds.");
228 ld_assert!(end <= self.text.len(), "Invalid byte bounds.");
229
230 unsafe { Cow::from(self.text.get_unchecked(start..end)) }
231 }
232
233 #[inline(always)]
234 fn has_chunk(&self, entry: &Entry) -> bool {
235 if entry.version > 0 {
236 return false;
237 }
238
239 entry.index < self.tokens()
240 }
241
242 #[inline(always)]
243 fn get_token(&self, entry: &Entry) -> Option<Self::Token> {
244 if entry.version > 0 {
245 return None;
246 }
247
248 self.tokens.get(entry.index).copied()
249 }
250
251 #[inline(always)]
252 fn get_site(&self, entry: &Entry) -> Option<Site> {
253 if entry.version > 0 {
254 return None;
255 }
256
257 self.sites.get(entry.index).copied()
258 }
259
260 #[inline(always)]
261 fn get_string(&self, entry: &Entry) -> Option<&str> {
262 if entry.version > 0 {
263 return None;
264 }
265
266 let start = *self.indices.get(entry.index)?;
267
268 let end = self
269 .indices
270 .get(entry.index + 1)
271 .copied()
272 .unwrap_or(self.text.len());
273
274 Some(unsafe { self.text.get_unchecked(start..end) })
275 }
276
277 #[inline(always)]
278 fn get_length(&self, entry: &Entry) -> Option<Length> {
279 if entry.version > 0 {
280 return None;
281 }
282
283 self.spans.get(entry.index).copied()
284 }
285
286 #[inline(always)]
287 fn cursor(&self, span: impl ToSpan) -> TokenBufferCursor<'_, Self::Token> {
288 let span = match span.to_site_span(self) {
289 None => panic!("Specified span is invalid."),
290 Some(span) => span,
291 };
292
293 Self::Cursor::new(self, span)
294 }
295
296 #[inline(always)]
297 fn length(&self) -> Length {
298 self.lines.code_length()
299 }
300
301 #[inline(always)]
302 fn tokens(&self) -> TokenCount {
303 self.tokens.len()
304 }
305
306 #[inline(always)]
307 fn lines(&self) -> &LineIndex {
308 &self.lines
309 }
310}
311
312impl<T: Token> Default for TokenBuffer<T> {
313 #[inline]
314 fn default() -> Self {
315 Self::new()
316 }
317}
318
319impl<'buffer, T: Token> IntoIterator for &'buffer TokenBuffer<T> {
320 type Item = <Self::IntoIter as Iterator>::Item;
321 type IntoIter = TokenBufferIter<'buffer, T>;
322
323 #[inline(always)]
324 fn into_iter(self) -> Self::IntoIter {
325 Self::IntoIter {
326 site: 0,
327 tokens: self.tokens.iter(),
328 spans: self.spans.iter(),
329 indices: self.indices.iter().peekable(),
330 text: self.text.as_str(),
331 }
332 }
333}
334
335impl<T: Token> TokenBuffer<T> {
336 #[inline(always)]
338 pub fn parse(text: impl AsRef<str>) -> Self {
339 Self::from(text)
340 }
341
342 #[inline(always)]
347 pub fn new() -> Self {
348 Self {
349 id: Id::new(),
350 tokens: Vec::new(),
351 sites: Vec::new(),
352 spans: Vec::new(),
353 indices: Vec::new(),
354 text: String::new(),
355 lines: LineIndex::new(),
356 }
357 }
358
359 #[inline(always)]
363 pub fn with_capacity(tokens: TokenCount, text: usize) -> Self {
364 Self {
365 id: Id::new(),
366 tokens: Vec::with_capacity(tokens),
367 sites: Vec::with_capacity(tokens),
368 spans: Vec::with_capacity(tokens),
369 indices: Vec::with_capacity(tokens),
370 text: String::with_capacity(text),
371 lines: LineIndex::with_capacity(text),
372 }
373 }
374
375 pub fn append(&mut self, text: impl AsRef<str>) {
378 let text = text.as_ref();
379
380 if text.is_empty() {
381 return;
382 }
383
384 let mut byte = self.text.len();
385 let mut site = self.length();
386
387 let mut lookback = 0;
388
389 while lookback < T::LOOKBACK {
390 let Some(span) = self.spans.pop() else {
391 break;
392 };
393
394 lookback += span;
395
396 if self.tokens.pop().is_none() {
397 unsafe { ld_unreachable!("TokenBuffer inconsistency.") };
400 }
401
402 byte = match self.indices.pop() {
403 Some(index) => index,
404 None => unsafe { ld_unreachable!("TokenBuffer inconsistency.") },
407 };
408
409 site = match self.sites.pop() {
410 Some(site) => site,
411 None => unsafe { ld_unreachable!("TokenBuffer inconsistency.") },
414 }
415 }
416
417 self.text.push_str(text);
418 self.lines.append(text);
419
420 BufferLexisSession::run(self, byte, site);
421 }
422
423 pub fn reserve(&mut self, tokens: TokenCount, text: usize) {
426 self.tokens.reserve(tokens);
427 self.sites.reserve(tokens);
428 self.spans.reserve(tokens);
429 self.indices.reserve(tokens);
430 self.text.reserve(text);
431 self.lines.reserve(text);
432 }
433
434 pub fn shrink_to_fit(&mut self) {
436 self.tokens.shrink_to_fit();
437 self.sites.shrink_to_fit();
438 self.spans.shrink_to_fit();
439 self.indices.shrink_to_fit();
440 self.text.shrink_to_fit();
441 self.lines.shrink_to_fit();
442 }
443
444 pub fn clear(&mut self) {
446 self.tokens.clear();
447 self.sites.clear();
448 self.spans.clear();
449 self.indices.clear();
450 self.text.clear();
451 self.lines.clear();
452 }
453
454 #[inline(always)]
455 pub(crate) fn reset_id(&mut self) {
456 self.id = Id::new();
457 }
458
459 #[inline(always)]
460 pub(crate) fn update_line_index(&mut self) {
461 self.lines.clear();
462 self.lines.append(self.text.as_str());
463 }
464
465 #[inline]
466 pub(super) fn push(&mut self, token: T, from: &Cursor<Site>, to: &Cursor<Site>) {
467 let span = to.site - from.site;
468
469 ld_assert!(span > 0, "Empty span.");
470
471 let _ = self.tokens.push(token);
472 let _ = self.sites.push(from.site);
473 let _ = self.spans.push(span);
474 let _ = self.indices.push(from.byte);
475 }
476
477 #[inline]
478 fn search(&self, site: Site) -> Result<ByteIndex, (ByteIndex, Length)> {
479 if site >= self.length() {
480 return Ok(self.text.len());
481 }
482
483 match self.sites.binary_search(&site) {
484 Ok(index) => {
485 ld_assert!(index < self.indices.len(), "Index out of bounds.");
486
487 let byte_index = unsafe { self.indices.get_unchecked(index) };
488
489 Ok(*byte_index)
490 }
491
492 Err(mut index) => {
493 ld_assert!(index > 0, "Index out of bounds.");
494
495 index -= 1;
496
497 ld_assert!(index < self.indices.len(), "Index out of bounds.");
498 ld_assert!(index < self.sites.len(), "Index out of bounds.");
499
500 let byte_index = unsafe { self.indices.get_unchecked(index) };
501 let nearest_site = unsafe { self.sites.get_unchecked(index) };
502
503 Err((*byte_index, site - *nearest_site))
504 }
505 }
506 }
507}
508
509pub struct TokenBufferIter<'buffer, T: Token> {
510 site: Site,
511 tokens: Iter<'buffer, T>,
512 spans: Iter<'buffer, Length>,
513 indices: Peekable<Iter<'buffer, ByteIndex>>,
514 text: &'buffer str,
515}
516
517impl<'buffer, T: Token> Iterator for TokenBufferIter<'buffer, T> {
518 type Item = Chunk<'buffer, T>;
519
520 #[inline(always)]
521 fn next(&mut self) -> Option<Self::Item> {
522 let token = *self.tokens.next()?;
523 let site = self.site;
524 let length = *self.spans.next()?;
525 let start = *self.indices.next()?;
526
527 let string = match self.indices.peek() {
528 None => unsafe { self.text.get_unchecked(start..) },
530 Some(end) => unsafe { self.text.get_unchecked(start..**end) },
532 };
533
534 self.site += length;
535
536 Some(Self::Item {
537 token,
538 site,
539 length,
540 string,
541 })
542 }
543}
544
545impl<'buffer, T: Token> FusedIterator for TokenBufferIter<'buffer, T> {}