lady_deirdre/lexis/
span.rs

1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation       //
3// technology.                                                                //
4//                                                                            //
5// This work is proprietary software with source-available code.              //
6//                                                                            //
7// To copy, use, distribute, or contribute to this work, you must agree to    //
8// the terms of the General License Agreement:                                //
9//                                                                            //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md           //
11//                                                                            //
12// The agreement grants a Basic Commercial License, allowing you to use       //
13// this work in non-commercial and limited commercial products with a total   //
14// gross revenue cap. To remove this commercial limit for one of your         //
15// products, you must acquire a Full Commercial License.                      //
16//                                                                            //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions.             //
19// Contributions are governed by the "Contributions" section of the General   //
20// License Agreement.                                                         //
21//                                                                            //
22// Copying the work in parts is strictly forbidden, except as permitted       //
23// under the General License Agreement.                                       //
24//                                                                            //
25// If you do not or cannot agree to the terms of this Agreement,              //
26// do not use this work.                                                      //
27//                                                                            //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid.                         //
30//                                                                            //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин).                 //
32// All rights reserved.                                                       //
33////////////////////////////////////////////////////////////////////////////////
34
35use std::{
36    fmt::{Debug, Display, Formatter},
37    ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
38};
39
40use crate::{
41    arena::{Id, Identifiable},
42    format::{AnnotationPriority, SnippetFormatter},
43    lexis::{Position, Site, SiteRef, SourceCode, ToSite},
44    report::ld_unreachable,
45};
46
47/// A span between two Unicode characters.
48///
49/// For example, `10..18` is a site span that starts from the 10nth character
50/// (inclusive) and lasts until the 18th character (exclusive). The total
51/// length of such a span is 8 Unicode chars.
52///
53/// A **SiteSpan is considered valid for any [SourceCode]** as long as the
54/// end bound of the range is greater or equal to the start bound. If the bounds
55/// exceed source code length, they will be clamped.
56///
57/// See [ToSpan] for details.
58pub type SiteSpan = Range<Site>;
59
60/// A span between two [pinned sites](SiteRef).
61///
62/// This kind of span between two SiteRefs resolves to the [SiteSpan] of the
63/// sites of these SiteRefs.
64///
65/// This object allows you to specify a span between two pinned points in the
66/// source code rather than the absolute Unicode character [sites](Site).
67///
68/// Whenever the end user writes text to the underlying compilation unit
69/// outside of the span or inside the span, but the text edit does not include
70/// the SiteRefSpan bounds, the SiteRefSpan bounds automatically adjust in
71/// accordance with the source code edits: the absolute site of the bound will
72/// be shifted left or right accordingly.
73///
74/// However, if the edit affects the SiteRefSpan bound tokens during
75/// incremental reparsing, the entire object may become invalid.
76///
77/// See [ToSpan] for details.
78pub type SiteRefSpan = Range<SiteRef>;
79
80impl Identifiable for SiteRefSpan {
81    #[inline(always)]
82    fn id(&self) -> Id {
83        let id = self.start.id();
84
85        if self.end.id() != id {
86            return Id::nil();
87        }
88
89        id
90    }
91}
92
93/// A span between two Unicode characters addressed by the text
94/// [line-column positions](Position).
95///
96/// For example, `Position::new(2, 10)..Position(2, 18)` is a position span that
97/// starts from the 9nth character of the second line (inclusive) and lasts
98/// until the 17th character of the second line (exclusive). The total
99/// length of such a span is 8 Unicode chars.
100///
101/// A PositionSpan is considered valid for any [SourceCode] as long as the
102/// end bound of the range is greater or equal to the start bound. If the bounds
103/// exceed source code length, they will be clamped.
104///
105/// See [ToSpan] for details.
106pub type PositionSpan = Range<Position>;
107
108/// An object that addresses a fragment of the source code text.
109///
110/// In Lady Deirdre, a minimal unit of text measurement is a Unicode character.
111///
112/// Addressing code ranges ("spans") just by the ranges of the Unicode
113/// absolute indices ([SiteSpan]) would be inconvenient.
114///
115/// The ToSpan trait is a generic interface that provides conversion between
116/// custom types of spans and the SiteSpans.
117///
118/// In particular, Lady Deirdre provides the following custom span types that
119/// implement the ToSpan trait:
120///
121///  - The [SiteSpan] itself, which is a range of absolute Unicode char
122///    indices: `10..20`.
123///  - The [PositionSpan], which is a range in terms of the line-column indices:
124///    `Position::new(10, 20)..Position::new(15, 28)`
125///  - The [SiteRefSpan], which is a range between the
126///    [TokenRef](crate::lexis::TokenRef) bounds.
127///
128/// You are encouraged to provide your own implementations of the [ToSpan] on
129/// custom span types depending on the needs.
130///
131/// For convenient purposes, for any type that implements [ToSite] trait, which
132/// is a trait of custom text indices, standard Rust ranges with the bounds
133/// of this type implement the ToSpan trait: `10..=20`, `Position::new(8, 6)..`
134/// are all valid span types.
135///
136/// Additionally, the `..` ([RangeFull]) implements the [ToSpan] trait and
137/// denotes the full source code text range.
138///
139/// Also, if the type `T` implements to ToSpan, its referential type `&T`
140/// implements ToSpan too.
141///
142/// **Safety**
143///
144/// The implementor of the trait guarantees the following:
145///
146///  1. If the [ToSpan::to_site_span] function returns Some site span, the
147///     lower range bound is less than or equal to the upper bound, and
148///     the upper bound is less than or equal to the `code`'s
149///     [length](SourceCode::length).
150///     In other words, the function returns a valid span within the source
151///     code text bounds.
152///
153///  2. The [ToSpan::to_site_span] and [ToSpan::to_position_span] return Some
154///     value if and only if the [ToSpan::is_valid_span] returns true for
155///     the same source code.
156pub unsafe trait ToSpan {
157    /// Returns a [SiteSpan] representation of this span object.
158    ///
159    /// The `code` parameter specifies a source code to which this span object
160    /// belongs.
161    ///
162    /// The returning SiteSpan is a valid range within the [SourceCode] bounds.
163    ///
164    /// Returns None, if the span object is not [valid](Self::is_valid_span).
165    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan>;
166
167    /// Returns a [line-column range](PositionSpan) representation of this span
168    /// object.
169    ///
170    /// The `code` parameter specifies a source code to which this span object
171    /// belongs.
172    ///
173    /// Returns None, if the span object is not [valid](Self::is_valid_span).
174    fn to_position_span(&self, code: &impl SourceCode) -> Option<PositionSpan> {
175        let span = self.to_site_span(code)?;
176
177        Some(span.start.to_position(code)?..span.end.to_position(code)?)
178    }
179
180    /// Returns true if this span object considered valid within the `code`
181    /// [SourceCode].
182    ///
183    /// The span validity is implementation dependent.
184    ///
185    /// For the range-like spans (such as [Range], [RangeTo], etc), the range
186    /// considered valid as long as the range bounds
187    /// are [valid sites](ToSite::is_valid_site), and the start site of
188    /// the range does not exceed the range's end site.
189    ///
190    /// Note that the [SiteSpan] range (with the start bound less than or equal
191    /// to the end bound) is always valid span because the [ToSite]
192    /// implementation of the [Site] always clamps the site to the SourceCode
193    /// [length](SourceCode::length).
194    fn is_valid_span(&self, code: &impl SourceCode) -> bool;
195
196    /// Returns a displayable object that prints the underlying span object
197    /// for debugging purposes.
198    #[inline(always)]
199    fn display<'a>(&self, code: &'a impl SourceCode) -> impl Debug + Display + 'a {
200        DisplaySpan {
201            code,
202            span: self.to_site_span(code),
203        }
204    }
205}
206
207unsafe impl<S: ToSpan> ToSpan for &S {
208    #[inline(always)]
209    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan> {
210        (*self).to_site_span(code)
211    }
212
213    #[inline(always)]
214    fn is_valid_span(&self, code: &impl SourceCode) -> bool {
215        (*self).is_valid_span(code)
216    }
217}
218
219unsafe impl ToSpan for RangeFull {
220    #[inline(always)]
221    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan> {
222        Some(0..code.length())
223    }
224
225    #[inline(always)]
226    fn is_valid_span(&self, _code: &impl SourceCode) -> bool {
227        true
228    }
229}
230
231unsafe impl<Site: ToSite> ToSpan for Range<Site> {
232    #[inline]
233    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan> {
234        let start = self.start.to_site(code);
235        let end = self.end.to_site(code);
236
237        match (start, end) {
238            (Some(start), Some(end)) if start <= end => Some(start..end),
239            _ => None,
240        }
241    }
242
243    #[inline]
244    fn is_valid_span(&self, code: &impl SourceCode) -> bool {
245        let start = self.start.to_site(code);
246        let end = self.end.to_site(code);
247
248        match (start, end) {
249            (Some(start), Some(end)) if start <= end => true,
250            _ => false,
251        }
252    }
253}
254
255unsafe impl<Site: ToSite> ToSpan for RangeInclusive<Site> {
256    #[inline]
257    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan> {
258        let start = self.start().to_site(code);
259        let end = self.end().to_site(code);
260
261        match (start, end) {
262            (Some(start), Some(mut end)) if start <= end => {
263                if end < code.length() && end < usize::MAX {
264                    end += 1;
265                }
266
267                Some(start..end)
268            }
269            _ => None,
270        }
271    }
272
273    #[inline]
274    fn is_valid_span(&self, code: &impl SourceCode) -> bool {
275        let start = self.start().to_site(code);
276        let end = self.end().to_site(code);
277
278        match (start, end) {
279            (Some(start), Some(end)) if start <= end => true,
280            _ => false,
281        }
282    }
283}
284
285unsafe impl<Site: ToSite> ToSpan for RangeFrom<Site> {
286    #[inline]
287    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan> {
288        let start = match self.start.to_site(code) {
289            None => return None,
290            Some(site) => site,
291        };
292
293        let end = code.length();
294
295        Some(start..end)
296    }
297
298    #[inline(always)]
299    fn is_valid_span(&self, code: &impl SourceCode) -> bool {
300        self.start.is_valid_site(code)
301    }
302}
303
304unsafe impl<Site: ToSite> ToSpan for RangeTo<Site> {
305    #[inline]
306    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan> {
307        let end = match self.end.to_site(code) {
308            None => return None,
309            Some(site) => site,
310        };
311
312        Some(0..end)
313    }
314
315    #[inline(always)]
316    fn is_valid_span(&self, code: &impl SourceCode) -> bool {
317        self.end.is_valid_site(code)
318    }
319}
320
321unsafe impl<Site: ToSite> ToSpan for RangeToInclusive<Site> {
322    #[inline]
323    fn to_site_span(&self, code: &impl SourceCode) -> Option<SiteSpan> {
324        let end = match self.end.to_site(code) {
325            None => return None,
326            Some(site) => {
327                if site < code.length() && site < usize::MAX {
328                    site + 1
329                } else {
330                    site
331                }
332            }
333        };
334
335        Some(0..end)
336    }
337
338    #[inline(always)]
339    fn is_valid_span(&self, code: &impl SourceCode) -> bool {
340        self.end.is_valid_site(code)
341    }
342}
343
344struct DisplaySpan<'a, Code: SourceCode> {
345    code: &'a Code,
346    span: Option<SiteSpan>,
347}
348
349impl<'a, Code> Debug for DisplaySpan<'a, Code>
350where
351    Code: SourceCode,
352{
353    #[inline(always)]
354    fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
355        Display::fmt(self, formatter)
356    }
357}
358
359impl<'a, Code> Display for DisplaySpan<'a, Code>
360where
361    Code: SourceCode,
362{
363    fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
364        let span = match &self.span {
365            None => return formatter.write_str("?"),
366            Some(span) => span.clone(),
367        };
368
369        if !formatter.alternate() {
370            let chars = span.end - span.start;
371            let breaks = self.code.chars(&span).filter(|ch| *ch == '\n').count();
372
373            let span = match span.to_position_span(self.code) {
374                Some(span) => span,
375
376                // Safety: Site spans are always valid to resolve.
377                None => unsafe { ld_unreachable!("Invalid position span.") },
378            };
379
380            formatter.write_fmt(format_args!("{}", span.start))?;
381
382            if chars > 0 {
383                formatter.write_str(" (")?;
384
385                match chars > 1 {
386                    false => formatter.write_str("1 char")?,
387                    true => formatter.write_fmt(format_args!("{chars} chars"))?,
388                }
389
390                match breaks {
391                    0 => (),
392                    1 => formatter.write_str(", 1 line break")?,
393                    _ => formatter.write_fmt(format_args!(", {breaks} line breaks"))?,
394                }
395
396                formatter.write_str(")")?;
397            }
398
399            return Ok(());
400        }
401
402        formatter
403            .snippet(self.code)
404            .set_caption(format!("Unit({})", self.code.id()))
405            .set_summary(format!(
406                "Site span: {}..{}\nPosition span: {}",
407                span.start,
408                span.end,
409                span.display(self.code),
410            ))
411            .annotate(span, AnnotationPriority::Default, "")
412            .finish()
413    }
414}