lady_deirdre/units/unit.rs
1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation //
3// technology. //
4// //
5// This work is proprietary software with source-available code. //
6// //
7// To copy, use, distribute, or contribute to this work, you must agree to //
8// the terms of the General License Agreement: //
9// //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md //
11// //
12// The agreement grants a Basic Commercial License, allowing you to use //
13// this work in non-commercial and limited commercial products with a total //
14// gross revenue cap. To remove this commercial limit for one of your //
15// products, you must acquire a Full Commercial License. //
16// //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions. //
19// Contributions are governed by the "Contributions" section of the General //
20// License Agreement. //
21// //
22// Copying the work in parts is strictly forbidden, except as permitted //
23// under the General License Agreement. //
24// //
25// If you do not or cannot agree to the terms of this Agreement, //
26// do not use this work. //
27// //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid. //
30// //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин). //
32// All rights reserved. //
33////////////////////////////////////////////////////////////////////////////////
34
35use std::fmt::{Debug, Display, Formatter};
36
37use crate::{
38 arena::{Entry, Identifiable},
39 lexis::{
40 Chunk,
41 Length,
42 LineIndex,
43 Site,
44 SiteSpan,
45 SourceCode,
46 ToSpan,
47 Token,
48 TokenBuffer,
49 TokenCount,
50 TokenCursor,
51 },
52 syntax::{Capture, Node, NodeRef, PolyRef, PolyVariant, SyntaxError, SyntaxTree},
53 units::{Document, ImmutableUnit, MutableUnit},
54};
55
56/// An object that grants access to the lexical and syntax structure of
57/// an individual file within the compilation project.
58///
59/// [Document], [ImmutableUnit] and [MutableUnit] are compilation units
60/// because they offer access to both components of the language grammar,
61/// but, for instance, [TokenBuffer] is not because it only provides an access
62/// to the lexical structure only.
63///
64/// CompilationUnit trait provides conventional functions to convert this unit
65/// into other types of units and some syntax analysis functions
66/// that require access to the full grammar structure of the file.
67///
68/// If you intend to implement this trait on your object, take a look at the
69/// [Lexis] and the [Syntax] facade-interfaces; they will assist you in exposing
70/// particular components of the grammar.
71pub trait CompilationUnit:
72 SourceCode<Token = <<Self as SyntaxTree>::Node as Node>::Token> + SyntaxTree
73{
74 /// Returns `true` if the compilation unit allows document write operations
75 /// after creation.
76 fn is_mutable(&self) -> bool;
77
78 /// Returns `true` if the compilation unit does not have write capabilities
79 /// after creation.
80 #[inline(always)]
81 fn is_immutable(&self) -> bool {
82 !self.is_mutable()
83 }
84
85 /// Extracts lexical structure of the compilation unit.
86 fn into_token_buffer(self) -> TokenBuffer<<Self as SourceCode>::Token>;
87
88 /// Converts this compilation unit into [Document].
89 ///
90 /// The mutable capabilities of the returning document depend on the
91 /// [CompilationUnit::is_mutable] value.
92 ///
93 /// Depending on the implementation this function may require full
94 /// source code reparsing, but implementors typically make the best effort
95 /// to reduce overhead. In particular, [Document]'s into_document is noop.
96 #[inline(always)]
97 fn into_document(self) -> Document<<Self as SyntaxTree>::Node>
98 where
99 Self: Sized,
100 {
101 match self.is_mutable() {
102 true => Document::Mutable(self.into_mutable_unit()),
103 false => Document::Immutable(self.into_immutable_unit()),
104 }
105 }
106
107 /// Converts this compilation unit [MutableUnit].
108 ///
109 /// Depending on the implementation this function may require full
110 /// source code reparsing, but implementors typically make the best effort
111 /// to reduce overhead. In particular, [MutableUnit]'s into_mutable_unit
112 /// is noop.
113 #[inline(always)]
114 fn into_mutable_unit(self) -> MutableUnit<<Self as SyntaxTree>::Node>
115 where
116 Self: Sized,
117 {
118 self.into_token_buffer().into_mutable_unit()
119 }
120
121 /// Converts this compilation unit [ImmutableUnit].
122 ///
123 /// Depending on the implementation this function may require full
124 /// source code reparsing, but implementors typically make the best effort
125 /// to reduce overhead. In particular, [ImmutableUnit]'s into_immutable_unit
126 /// is noop.
127 #[inline(always)]
128 fn into_immutable_unit(self) -> ImmutableUnit<<Self as SyntaxTree>::Node>
129 where
130 Self: Sized,
131 {
132 self.into_token_buffer().into_immutable_unit()
133 }
134
135 /// Searches for the top-most node in the syntax tree that fully covers
136 /// specified source code [span](ToSpan).
137 ///
138 /// For example, in the case of JSON `{"foo": [123]}`, the coverage of the
139 /// "123" token could be the `[123]` array, and the coverage of the ":"
140 /// token could be the `"foo": [bar]` entry of the JSON object.
141 ///
142 /// The result depends on the particular programming language grammar.
143 ///
144 /// In the worst case scenario, if the algorithm fails to find the top-most
145 /// node, it returns the reference to the root node.
146 ///
147 /// **Panic**
148 ///
149 /// Panics if the specified span is not valid for this compilation unit.
150 #[inline(always)]
151 fn cover(&self, span: impl ToSpan) -> NodeRef
152 where
153 Self: Sized,
154 {
155 let span = match span.to_site_span(self) {
156 None => panic!("Specified span is invalid."),
157
158 Some(span) => span,
159 };
160
161 let root = self.root_node_ref();
162
163 match NodeCoverage::cover(self, &root, &span) {
164 NodeCoverage::Fit(result) => result,
165 _ => root,
166 }
167 }
168
169 /// Returns an object that prints the underlying grammar structure.
170 ///
171 /// The `poly_ref` parameter specifies a reference to a particular grammar
172 /// component to debug. It could be a [NodeRef],
173 /// [TokenRef](crate::lexis::TokenRef) or [PolyVariant].
174 ///
175 /// To print the entire syntax tree with all nodes and tokens metadata, you
176 /// can obtain a root NodeRef using [SyntaxTree::root_node_ref] function.
177 ///
178 /// The default implementation is infallible regardless of the `poly_ref`
179 /// validity.
180 #[inline(always)]
181 fn display(&self, poly_ref: &(impl PolyRef + ?Sized)) -> impl Debug + Display + '_
182 where
183 Self: Sized,
184 {
185 DisplayTree {
186 unit: self,
187 variant: poly_ref.as_variant(),
188 }
189 }
190}
191
192/// A facade of the lexical structure.
193///
194/// This trait auto-implements [SourceCode] on the target object by delegating
195/// all calls to the required function [Lexis::lexis].
196///
197/// ```rust
198/// use lady_deirdre::units::Lexis;
199/// use lady_deirdre::arena::{Id, Identifiable};
200/// use lady_deirdre::lexis::{Token, TokenBuffer};
201///
202/// struct MyDocument<T: Token> {
203/// buf: TokenBuffer<T>,
204/// }
205///
206/// impl<T: Token> Identifiable for MyDocument<T> {
207/// fn id(&self) -> Id {
208/// self.buf.id()
209/// }
210/// }
211///
212/// impl<T: Token> Lexis for MyDocument<T> {
213/// type Lexis = TokenBuffer<T>;
214///
215/// fn lexis(&self) -> &Self::Lexis {
216/// &self.buf
217/// }
218/// }
219/// ```
220pub trait Lexis: Identifiable {
221 /// The target [SourceCode] delegation type.
222 type Lexis: SourceCode;
223
224 /// This function fully exposes underlying [SourceCode] interface
225 /// of this object.
226 fn lexis(&self) -> &Self::Lexis;
227}
228
229impl<F: Lexis> SourceCode for F {
230 type Token = <F::Lexis as SourceCode>::Token;
231
232 type Cursor<'code> = <F::Lexis as SourceCode>::Cursor<'code>
233 where Self: 'code;
234
235 type CharIterator<'code> = <F::Lexis as SourceCode>::CharIterator<'code>
236 where
237 Self: 'code;
238
239 #[inline(always)]
240 fn chars(&self, span: impl ToSpan) -> Self::CharIterator<'_> {
241 self.lexis().chars(span)
242 }
243
244 #[inline(always)]
245 fn has_chunk(&self, entry: &Entry) -> bool {
246 self.lexis().has_chunk(entry)
247 }
248
249 #[inline(always)]
250 fn get_token(&self, entry: &Entry) -> Option<Self::Token> {
251 self.lexis().get_token(entry)
252 }
253
254 #[inline(always)]
255 fn get_site(&self, entry: &Entry) -> Option<Site> {
256 self.lexis().get_site(entry)
257 }
258
259 #[inline(always)]
260 fn get_string(&self, entry: &Entry) -> Option<&str> {
261 self.lexis().get_string(entry)
262 }
263
264 #[inline(always)]
265 fn get_length(&self, entry: &Entry) -> Option<Length> {
266 self.lexis().get_length(entry)
267 }
268
269 #[inline(always)]
270 fn cursor(&self, span: impl ToSpan) -> Self::Cursor<'_> {
271 self.lexis().cursor(span)
272 }
273
274 #[inline(always)]
275 fn length(&self) -> Length {
276 self.lexis().length()
277 }
278
279 #[inline(always)]
280 fn tokens(&self) -> TokenCount {
281 self.lexis().tokens()
282 }
283
284 #[inline(always)]
285 fn lines(&self) -> &LineIndex {
286 self.lexis().lines()
287 }
288}
289
290/// A facade of the syntax structure.
291///
292/// This trait auto-implements [SyntaxTree] on the target object by delegating
293/// all calls to the required functions [Syntax::syntax]
294/// and [Syntax::syntax_mut].
295///
296/// ```rust
297/// use lady_deirdre::units::Syntax;
298/// use lady_deirdre::arena::{Id, Identifiable};
299/// use lady_deirdre::syntax::{Node, ImmutableSyntaxTree};
300///
301/// struct MyDocument<N: Node> {
302/// tree: ImmutableSyntaxTree<N>,
303/// }
304///
305/// impl<N: Node> Identifiable for MyDocument<N> {
306/// fn id(&self) -> Id {
307/// self.tree.id()
308/// }
309/// }
310///
311/// impl<N: Node> Syntax for MyDocument<N> {
312/// type Syntax = ImmutableSyntaxTree<N>;
313///
314/// fn syntax(&self) -> &Self::Syntax {
315/// &self.tree
316/// }
317///
318/// fn syntax_mut(&mut self) -> &mut Self::Syntax {
319/// &mut self.tree
320/// }
321/// }
322/// ```
323pub trait Syntax: Identifiable {
324 /// The target [SyntaxTree] delegation type.
325 type Syntax: SyntaxTree;
326
327 /// This function fully exposes immutable access to the underlying
328 /// [SyntaxTree] interface of this object.
329 fn syntax(&self) -> &Self::Syntax;
330
331 /// This function fully exposes mutable access to the underlying
332 /// [SyntaxTree] interface of this object.
333 fn syntax_mut(&mut self) -> &mut Self::Syntax;
334}
335
336impl<F: Syntax> SyntaxTree for F {
337 type Node = <F::Syntax as SyntaxTree>::Node;
338
339 type NodeIterator<'tree> = <F::Syntax as SyntaxTree>::NodeIterator<'tree> where Self: 'tree;
340
341 type ErrorIterator<'tree> = <F::Syntax as SyntaxTree>::ErrorIterator<'tree> where Self: 'tree;
342
343 #[inline(always)]
344 fn root_node_ref(&self) -> NodeRef {
345 self.syntax().root_node_ref()
346 }
347
348 #[inline(always)]
349 fn node_refs(&self) -> Self::NodeIterator<'_> {
350 self.syntax().node_refs()
351 }
352
353 #[inline(always)]
354 fn error_refs(&self) -> Self::ErrorIterator<'_> {
355 self.syntax().error_refs()
356 }
357
358 #[inline(always)]
359 fn has_node(&self, entry: &Entry) -> bool {
360 self.syntax().has_node(entry)
361 }
362
363 #[inline(always)]
364 fn get_node(&self, entry: &Entry) -> Option<&Self::Node> {
365 self.syntax().get_node(entry)
366 }
367
368 #[inline(always)]
369 fn get_node_mut(&mut self, entry: &Entry) -> Option<&mut Self::Node> {
370 self.syntax_mut().get_node_mut(entry)
371 }
372
373 #[inline(always)]
374 fn has_error(&self, entry: &Entry) -> bool {
375 self.syntax().has_error(entry)
376 }
377
378 #[inline(always)]
379 fn get_error(&self, entry: &Entry) -> Option<&SyntaxError> {
380 self.syntax().get_error(entry)
381 }
382}
383
384struct DisplayTree<
385 'unit,
386 N: Node,
387 C: TokenCursor<'unit>,
388 U: CompilationUnit<Cursor<'unit> = C, Node = N>,
389> {
390 unit: &'unit U,
391 variant: PolyVariant,
392}
393
394impl<'unit, N, C, U> Debug for DisplayTree<'unit, N, C, U>
395where
396 N: Node,
397 C: TokenCursor<'unit>,
398 U: CompilationUnit<Cursor<'unit> = C, Node = N>,
399{
400 #[inline(always)]
401 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
402 Display::fmt(self, formatter)
403 }
404}
405
406impl<'unit, N, C, U> Display for DisplayTree<'unit, N, C, U>
407where
408 N: Node,
409 C: TokenCursor<'unit>,
410 U: CompilationUnit<Cursor<'unit> = C, Node = N>,
411{
412 fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
413 match &self.variant {
414 PolyVariant::Token(variant) => {
415 let chunk: Chunk<U::Token> = match variant.chunk(self.unit) {
416 None => return Debug::fmt(variant, formatter),
417 Some(chunk) => chunk,
418 };
419
420 let name = chunk.token.name().unwrap_or("TokenRef");
421
422 let mut debug_struct =
423 formatter.debug_struct(&format!("${name}(chunk_entry: {:?})", variant.entry));
424
425 debug_struct.field("string", &chunk.string);
426 debug_struct.field("length", &chunk.length);
427
428 if let Some(site_span) = chunk.to_site_span(self.unit) {
429 debug_struct.field("site_span", &site_span);
430
431 debug_struct.field(
432 "position_span",
433 &format_args!("{}", chunk.display(self.unit)),
434 );
435 }
436
437 debug_struct.finish()
438 }
439
440 PolyVariant::Node(variant) => {
441 let node: &N = match variant.deref(self.unit) {
442 None => return Debug::fmt(variant, formatter),
443 Some(node) => node,
444 };
445
446 let name = node.name().unwrap_or("NodeRef");
447
448 let alternate = formatter.alternate();
449
450 let mut debug_struct =
451 formatter.debug_struct(&format!("{name}(entry: {:?})", variant.entry));
452
453 for key in node.capture_keys() {
454 let Some(capture) = node.capture(*key) else {
455 continue;
456 };
457
458 let key = key.to_string();
459
460 match capture {
461 Capture::SingleNode(capture) => match alternate {
462 true => debug_struct
463 .field(&key, &format_args!("{:#}", self.unit.display(capture))),
464 false => debug_struct
465 .field(&key, &format_args!("{}", self.unit.display(capture))),
466 },
467
468 Capture::ManyNodes(capture) => {
469 let poly_refs = capture
470 .into_iter()
471 .map(|poly_ref| self.unit.display(poly_ref))
472 .collect::<Vec<_>>();
473
474 debug_struct.field(&key, &poly_refs)
475 }
476
477 Capture::SingleToken(capture) => match alternate {
478 true => debug_struct
479 .field(&key, &format_args!("{:#}", self.unit.display(capture))),
480 false => debug_struct
481 .field(&key, &format_args!("{}", self.unit.display(capture))),
482 },
483
484 Capture::ManyTokens(capture) => {
485 let poly_refs = capture
486 .into_iter()
487 .map(|poly_ref| self.unit.display(poly_ref))
488 .collect::<Vec<_>>();
489
490 debug_struct.field(&key, &poly_refs)
491 }
492 };
493 }
494
495 debug_struct.finish()
496 }
497 }
498 }
499}
500
501#[derive(Debug)]
502pub(super) enum NodeCoverage {
503 Nil,
504 Fit(NodeRef),
505 Misfit(Site),
506}
507
508impl NodeCoverage {
509 pub(super) fn cover<
510 'unit,
511 N: Node,
512 C: TokenCursor<'unit>,
513 U: CompilationUnit<Cursor<'unit> = C, Node = N>,
514 >(
515 unit: &'unit U,
516 node_ref: &NodeRef,
517 span: &SiteSpan,
518 ) -> Self {
519 let node: &N = match node_ref.deref(unit) {
520 None => return Self::Nil,
521 Some(node) => node,
522 };
523
524 let node_span = match node.span(unit) {
525 None => return Self::Nil,
526 Some(span) => span,
527 };
528
529 if node_span.start > span.start || node_span.end < span.end {
530 return Self::Misfit(node_span.start);
531 }
532
533 for child in node.children_iter() {
534 if !child.kind().is_node() {
535 continue;
536 }
537
538 match Self::cover(unit, child.as_node_ref(), span) {
539 Self::Nil => continue,
540 Self::Misfit(start) => {
541 if start > span.start {
542 break;
543 }
544 }
545 other => return other,
546 }
547 }
548
549 Self::Fit(*node_ref)
550 }
551}