Skip to main content

lintspec_core/
parser.rs

1//! Solidity parser interface
2use std::{collections::HashMap, io, path::Path};
3
4use crate::{
5    definitions::{
6        Definition, constructor::ConstructorDefinition, enumeration::EnumDefinition,
7        error::ErrorDefinition, event::EventDefinition, modifier::ModifierDefinition,
8        structure::StructDefinition,
9    },
10    error::{ErrorKind, Result},
11    prelude::OrPanic as _,
12    textindex::{TextIndex, TextRange, compute_indices},
13};
14
15#[cfg_attr(docsrs, doc(cfg(feature = "slang")))]
16#[cfg(feature = "slang")]
17pub mod slang;
18
19#[cfg_attr(docsrs, doc(cfg(feature = "solar")))]
20#[cfg(feature = "solar")]
21pub mod solar;
22
23#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
24pub struct DocumentId(u64);
25
26impl DocumentId {
27    /// Generate a new random and unique document ID
28    #[must_use]
29    pub fn new() -> Self {
30        DocumentId(fastrand::u64(..))
31    }
32}
33
34/// The result of parsing and identifying source items in a document
35#[derive(Debug)]
36pub struct ParsedDocument {
37    /// A unique ID for the document given by the parser
38    ///
39    /// Can be used to retrieve the document contents after parsing (via [`Parse::get_sources`]).
40    pub id: DocumentId,
41
42    /// The list of definitions found in the document
43    pub definitions: Vec<Definition>,
44}
45
46/// The trait implemented by all parsers
47///
48/// Ideally, cloning a parser should not duplicate the contents of the sources. The underlying data should be wrapped
49/// in an [`Arc`][std::sync::Arc], so that the last clone of a parser is able to retrieve the sources' contents for
50/// all files.
51pub trait Parse: Clone {
52    /// Parse a document from a reader and identify the relevant source items
53    ///
54    /// If a path is provided, then this can be used to enrich diagnostics.
55    /// The fact that this takes in a mutable reference to the parser allows for stateful parsers.
56    fn parse_document(
57        &mut self,
58        input: impl io::Read,
59        path: Option<impl AsRef<Path>>,
60        keep_contents: bool,
61    ) -> Result<ParsedDocument>;
62
63    /// Retrieve the contents of the source files after parsing is done
64    ///
65    /// This consumes the parser, so that ownership of the contents can be retrieved safely.
66    /// Note that documents which were parsed with `keep_contents` to `false` will no be present in the map.
67    ///
68    /// This can return an error if there are more than one clone of the parser.
69    fn get_sources(self) -> Result<HashMap<DocumentId, String>>;
70}
71
72/// Gather all the start and end byte offsets for the definitions, including their members/params/returns.
73///
74/// This requires that the definitions are sorted by start offset. The result is also sorted.
75fn gather_offsets(definitions: &[Definition]) -> Vec<usize> {
76    fn register_span(offsets: &mut Vec<usize>, span: &TextRange) {
77        offsets.push(span.start.utf8);
78        offsets.push(span.end.utf8);
79    }
80    let mut offsets = Vec::with_capacity(definitions.len() * 16); // seems about right from code in the wild
81    // register all start and end utf-8 offsets for the definitions and their relevant properties
82    // definitions are sorted by start offset due to how the AST is traversed
83    for def in definitions {
84        def.span().inspect(|s| register_span(&mut offsets, s));
85        match def {
86            Definition::Constructor(ConstructorDefinition { params, .. })
87            | Definition::Error(ErrorDefinition { params, .. })
88            | Definition::Event(EventDefinition { params, .. })
89            | Definition::Modifier(ModifierDefinition { params, .. })
90            | Definition::Enumeration(EnumDefinition {
91                members: params, ..
92            })
93            | Definition::Struct(StructDefinition {
94                members: params, ..
95            }) => {
96                for p in params {
97                    register_span(&mut offsets, &p.span);
98                }
99            }
100            Definition::Function(d) => {
101                d.params
102                    .iter()
103                    .for_each(|i| register_span(&mut offsets, &i.span));
104                d.returns
105                    .iter()
106                    .for_each(|i| register_span(&mut offsets, &i.span));
107            }
108            Definition::NatspecParsingError(ErrorKind::NatspecParsingError { span, .. }) => {
109                register_span(&mut offsets, span);
110            }
111            Definition::Contract(_)
112            | Definition::Interface(_)
113            | Definition::Library(_)
114            | Definition::Variable(_)
115            | Definition::NatspecParsingError(_) => {}
116        }
117    }
118    // we might have duplicate offsets and they are out of order (because a struct definition's span end is greater than
119    // the span start of its first member for example)
120    // we will deduplicate on the fly as we iterate to avoid re-allocating
121    offsets.sort_unstable();
122    offsets
123}
124
125/// Fill in the missing values in the spans of definitions.
126fn populate(text_indices: &[TextIndex], definitions: &mut Vec<Definition>) {
127    fn populate_span(indices: &[TextIndex], start_idx: usize, span: &mut TextRange) -> usize {
128        let idx;
129        (idx, span.start) = indices
130            .iter()
131            .enumerate()
132            .skip(start_idx)
133            .find_map(|(i, ti)| (ti.utf8 >= span.start.utf8).then_some((i, *ti)))
134            .or_panic("utf8 start offset should be present in cache");
135        span.end = *indices
136            .iter()
137            .skip(idx + 1)
138            .find(|ti| ti.utf8 >= span.end.utf8)
139            .or_panic("utf8 end offset should be present in cache");
140        // for the next definition or item inside of a definition, we can start after the start of this item
141        // because start indices increase monotonically
142        idx + 1
143    }
144    // definitions are sorted by start offset due to how the AST is traversed
145    // likewise, params, members, etc., are also sorted by start offset
146    // this means that we can populate spans while ignoring all items in `text_indices` prior to the index corresponding
147    // to the start offset of the previous definition
148    let mut idx = 0;
149    for def in definitions {
150        if let Some(span) = def.span_mut() {
151            idx = populate_span(text_indices, idx, span);
152        }
153        match def {
154            Definition::Constructor(ConstructorDefinition { params, .. })
155            | Definition::Error(ErrorDefinition { params, .. })
156            | Definition::Event(EventDefinition { params, .. })
157            | Definition::Modifier(ModifierDefinition { params, .. })
158            | Definition::Enumeration(EnumDefinition {
159                members: params, ..
160            })
161            | Definition::Struct(StructDefinition {
162                members: params, ..
163            }) => {
164                for p in params {
165                    idx = populate_span(text_indices, idx, &mut p.span);
166                }
167            }
168            Definition::Function(d) => {
169                for p in &mut d.params {
170                    idx = populate_span(text_indices, idx, &mut p.span);
171                }
172                for p in &mut d.returns {
173                    idx = populate_span(text_indices, idx, &mut p.span);
174                }
175            }
176            Definition::NatspecParsingError(ErrorKind::NatspecParsingError { span, .. }) => {
177                idx = populate_span(text_indices, idx, span);
178            }
179            Definition::Contract(_)
180            | Definition::Interface(_)
181            | Definition::Library(_)
182            | Definition::Variable(_)
183            | Definition::NatspecParsingError(_) => {}
184        }
185    }
186}
187
188/// Complete the [`TextRange`] of a list of [`Definition`].
189///
190/// Parsers might only give us partial information (e.g. utf-8 byte offsets), but we need the full
191/// line/column information in all encodings. This function computes the missing fields.
192pub fn complete_text_ranges(source: &str, definitions: &mut Vec<Definition>) {
193    let offsets = gather_offsets(definitions);
194    if offsets.is_empty() {
195        return;
196    }
197
198    let text_indices = compute_indices(source, &offsets);
199
200    populate(&text_indices, definitions);
201}