lady_deirdre/lexis/
code.rs

1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation       //
3// technology.                                                                //
4//                                                                            //
5// This work is proprietary software with source-available code.              //
6//                                                                            //
7// To copy, use, distribute, or contribute to this work, you must agree to    //
8// the terms of the General License Agreement:                                //
9//                                                                            //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md           //
11//                                                                            //
12// The agreement grants a Basic Commercial License, allowing you to use       //
13// this work in non-commercial and limited commercial products with a total   //
14// gross revenue cap. To remove this commercial limit for one of your         //
15// products, you must acquire a Full Commercial License.                      //
16//                                                                            //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions.             //
19// Contributions are governed by the "Contributions" section of the General   //
20// License Agreement.                                                         //
21//                                                                            //
22// Copying the work in parts is strictly forbidden, except as permitted       //
23// under the General License Agreement.                                       //
24//                                                                            //
25// If you do not or cannot agree to the terms of this Agreement,              //
26// do not use this work.                                                      //
27//                                                                            //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid.                         //
30//                                                                            //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин).                 //
32// All rights reserved.                                                       //
33////////////////////////////////////////////////////////////////////////////////
34
35use std::{borrow::Cow, iter::FusedIterator, marker::PhantomData};
36
37use crate::{
38    arena::{Entry, Identifiable},
39    lexis::{Chunk, Length, LineIndex, Site, SiteRef, ToSpan, Token, TokenCount, TokenCursor},
40};
41
42/// An object that provides access to the source code text and the lexical
43/// structure of a compilation unit.
44///
45/// The lexical structure is a sequence of [tokens](crate::lexis::Token) that
46/// fully covers the source code text exhaustively and without overlaps.
47///
48/// This trait provides a low-level interface to access tokens and their
49/// metadata by the [versioned index](Entry) from the compilation unit.
50///
51/// A higher-level referential object, [TokenRef](crate::lexis::TokenRef),
52/// offers a more convenient interface to access these objects from
53/// the SourceCode.
54///
55/// Additionally, the SourceCode interface provides functions to access
56/// substrings of the source code text, to iterate through individual characters
57/// in range, and to iterate through the tokens and their metadata in range.
58pub trait SourceCode: Identifiable {
59    /// Specifies the type of the source code token and the lexical scanner
60    /// of a programming language through the [Token::scan] function.
61    type Token: Token;
62
63    /// Specifies the type of the [token cursor](TokenCursor) that iterates
64    /// through the streams of tokens of this source code type.
65    type Cursor<'code>: TokenCursor<'code, Token = Self::Token>
66    where
67        Self: 'code;
68
69    /// Specifies the type of the iterator that iterates through the unicode
70    /// characters of the source code text substrings.
71    type CharIterator<'code>: Iterator<Item = char> + FusedIterator + 'code
72    where
73        Self: 'code;
74
75    /// Returns an iterator of the source code tokens [metadata](Chunk)
76    /// in the specified `span`.
77    ///
78    /// The iterator will yield all token chunks if the `span` intersects with
79    /// the token spans, including the span bounds.
80    ///
81    /// For example, in the text `FooBarBaz` with three tokens, the span `3..6`
82    /// will iterate through all three tokens because this span covers
83    /// the token `Bar` and intersects with the tokens `Foo` and `Baz`
84    /// by their bounds. The span `3..3` would iterate through the `Foo` and
85    /// `Bar` tokens because this span intersects with these two token bounds.
86    /// The span `4..5` would yield the `Bar` token only because this span
87    /// intersects with the token's span.
88    ///
89    /// **Panic**
90    ///
91    /// This function may panic if the specified `span` is not
92    /// [valid](ToSpan::is_valid_span) for this source code.
93    #[inline(always)]
94    fn chunks(&self, span: impl ToSpan) -> ChunkIter<'_, Self::Cursor<'_>>
95    where
96        Self: Sized,
97    {
98        let span = match span.to_site_span(self) {
99            None => panic!("Specified span is invalid."),
100            Some(span) => span,
101        };
102
103        let cursor = self.cursor(span.clone());
104
105        ChunkIter {
106            cursor,
107            _code_lifetime: PhantomData::default(),
108        }
109    }
110
111    /// Returns an iterator that iterates over the [Unicode chars](char) of the
112    /// source code text substring in the specified `span`.
113    ///
114    /// For example, in the text `FooBarBaz` with three tokens, the span `2..6`
115    /// will iterate the characters of the `oBar` substring.
116    ///
117    /// **Panic**
118    ///
119    /// This function may panic if the specified `span` is not
120    /// [valid](ToSpan::is_valid_span) for this source code.
121    fn chars(&self, span: impl ToSpan) -> Self::CharIterator<'_>;
122
123    /// Returns a borrowed or an owned substring of the source code text in
124    /// the specified `span`.
125    ///
126    /// For example, in the text `FooBarBaz` with three tokens, the span `2..6`
127    /// will yield an `oBar` substring.
128    ///
129    /// The decision about the returning string ownership is implementation
130    /// dependent.
131    ///
132    /// **Panic**
133    ///
134    /// This function may panic if the specified `span` is not
135    /// [valid](ToSpan::is_valid_span) for this source code.
136    fn substring(&self, span: impl ToSpan) -> Cow<str>
137    where
138        Self: Sized,
139    {
140        let span = match span.to_site_span(self) {
141            None => panic!("Specified span is invalid."),
142            Some(span) => span,
143        };
144
145        let mut cursor = self.cursor(span.clone());
146
147        if cursor.site(0) == Some(span.start) && cursor.site(1) == Some(span.end) {
148            if let Some(string) = cursor.string(0) {
149                return Cow::Borrowed(string);
150            }
151        }
152
153        Cow::from(self.chars(span).collect::<String>())
154    }
155
156    /// Checks if the token referred to by the versioned index exists in this
157    /// source code.
158    fn has_chunk(&self, entry: &Entry) -> bool;
159
160    /// Returns a copy of the token referred to by the versioned index.
161    ///
162    /// If the index parameter `entry` is not valid, returns None.
163    fn get_token(&self, entry: &Entry) -> Option<Self::Token>;
164
165    /// Returns a start site of the token referred to by the versioned index.
166    ///
167    /// If the index parameter `entry` is not valid, returns None.
168    fn get_site(&self, entry: &Entry) -> Option<Site>;
169
170    /// Returns a reference to the source code text substring covered by
171    /// the token referred to by the versioned index.
172    ///
173    /// If the index parameter `entry` is not valid, returns None.
174    fn get_string(&self, entry: &Entry) -> Option<&str>;
175
176    /// Returns the [length](Length) of the source code text substring covered
177    /// by the token referred to by the versioned index.
178    ///
179    /// If the index parameter `entry` is not valid, returns None.
180    fn get_length(&self, entry: &Entry) -> Option<Length>;
181
182    /// Returns a cursor of the source code tokens stream in
183    /// the specified `span`.
184    ///
185    /// The token stream includes a source code token if the `span` intersects
186    /// with the span of this token, including the span bounds.
187    ///
188    /// The returning object does not implement the [Iterator] interface
189    /// but provides the capabilities for manual control over the iteration
190    /// process, including the capabilities to look ahead of the token stream.
191    ///
192    /// If you need just a normal Iterator over the token chunks,
193    /// use the [chunks](Self::chunks) function instead.
194    ///
195    /// **Panic**
196    ///
197    /// This function may panic if the specified `span` is not
198    /// [valid](ToSpan::is_valid_span) for this source code.
199    fn cursor(&self, span: impl ToSpan) -> Self::Cursor<'_>;
200
201    /// Returns a [SiteRef] that points to the end of this source code.
202    #[inline(always)]
203    fn end_site_ref(&self) -> SiteRef {
204        SiteRef::end_of(self.id())
205    }
206
207    /// Returns the total number of Unicode characters in this source code text.
208    fn length(&self) -> Length;
209
210    /// Returns the total number of tokens in this source code.
211    fn tokens(&self) -> TokenCount;
212
213    /// Provides access to the [line index](LineIndex) of this source code.
214    ///
215    /// From this object, you can convert char sites back and forth to their
216    /// line indices, and to reveal the total number of lines in this source
217    /// code.
218    fn lines(&self) -> &LineIndex;
219
220    /// Returns true if the source code text is empty.
221    #[inline(always)]
222    fn is_empty(&self) -> bool {
223        self.length() == 0
224    }
225}
226
227/// An iterator over the [SourceCode] tokens and their metadata.
228///
229/// This object is created by the [SourceCode::chunks] function.
230#[repr(transparent)]
231pub struct ChunkIter<'code, C: TokenCursor<'code>> {
232    cursor: C,
233    _code_lifetime: PhantomData<&'code ()>,
234}
235
236impl<'code, C: TokenCursor<'code>> Iterator for ChunkIter<'code, C> {
237    type Item = Chunk<'code, <C as TokenCursor<'code>>::Token>;
238
239    #[inline]
240    fn next(&mut self) -> Option<Self::Item> {
241        let token = self.cursor.token(0);
242        let site = self.cursor.site(0)?;
243        let length = self.cursor.length(0)?;
244        let string = self.cursor.string(0)?;
245
246        if !self.cursor.advance() {
247            return None;
248        }
249
250        Some(Self::Item {
251            token,
252            site,
253            length,
254            string,
255        })
256    }
257}
258
259impl<'code, C: TokenCursor<'code>> FusedIterator for ChunkIter<'code, C> {}