open_vaf/parser/preprocessor/
source_map.rs

1/*
2 * ******************************************************************************************
3 * Copyright (c) 2019 Pascal Kuthe. This file is part of the OpenVAF project.
4 * It is subject to the license terms in the LICENSE file found in the top-level directory
5 *  of this distribution and at  https://gitlab.com/DSPOM/OpenVAF/blob/master/LICENSE.
6 *  No part of OpenVAF, including this file, may be copied, modified, propagated, or
7 *  distributed except according to the terms contained in the LICENSE file.
8 * *****************************************************************************************
9 */
10use std::marker::{PhantomData, PhantomPinned};
11use std::ops::Range;
12use std::path::Path;
13use std::ptr::NonNull;
14
15use bumpalo::Bump;
16use core::cell::Cell;
17use intrusive_collections::rbtree::CursorMut;
18use intrusive_collections::{Bound, KeyAdapter, RBTree, RBTreeLink};
19
20use crate::span::{Index, LineNumber};
21use crate::{Lexer, Span};
22use pretty_assertions::assert_ne;
23
24pub type ArgumentIndex = u8;
25pub type CallDepth = u8;
26
27//Map Declarations/
28
29intrusive_adapter!(SourceMapAdapter<'source_map> = &'source_map Substitution<'source_map> : Substitution<'source_map> {link:RBTreeLink});
30impl<'source_map, 'lt> KeyAdapter<'lt> for SourceMapAdapter<'source_map> {
31    type Key = Index;
32
33    fn get_key(&self, value: &'lt Substitution<'source_map>) -> Self::Key {
34        value.start
35    }
36}
37
38#[derive(Debug)]
39pub(crate) struct Substitution<'source_map> {
40    name: &'source_map str,
41    start: Index,
42    end: Cell<Index>,
43    stype: SourceType,
44    original_span: Span,
45    original_first_line: LineNumber,
46    original_last_line: LineNumber,
47    link: RBTreeLink, //    children: RBTree<LocationMapAdapter>,
48}
49#[derive(Debug)]
50pub(crate) enum SourceType {
51    Macro { definition_line: LineNumber },
52    File,
53}
54#[derive(Debug)]
55pub struct SourceMap<'source_map> {
56    pub main_file_name: &'source_map str,
57    pub expanded_source: &'source_map str,
58    children: RBTree<SourceMapAdapter<'source_map>>,
59    map: RBTree<SourceMapAdapter<'source_map>>,
60    _pin_marker: PhantomPinned,
61}
62
63impl<'source_map> SourceMap<'source_map> {
64    pub fn new(main_file_name: &'source_map str) -> Self {
65        Self {
66            main_file_name,
67            expanded_source: "",
68            children: Default::default(),
69            map: RBTree::new(SourceMapAdapter::new()),
70            _pin_marker: PhantomPinned,
71        }
72    }
73
74    pub fn resolve_span_within_line(
75        &self,
76        span: Span,
77        translate_lines: bool,
78    ) -> (&'source_map str, LineNumber, Option<String>, Range<Index>) {
79        let expansion_end = self.expanded_source.len();
80
81        let start = self.expanded_source[..span.get_start() as usize]
82            .rfind('\n')
83            .map_or(0, |line_pos| line_pos + 1);
84        //we don't want to include the newline
85        let end = self.expanded_source[span.get_end() as usize..]
86            .find('\n')
87            .unwrap_or(expansion_end);
88        let range = Range {
89            start: span.get_start() - start as Index,
90            end: span.get_end() - start as Index,
91        };
92        let end = span.get_end() as usize + end;
93
94        let mut containing_expansion_description = None;
95        let cursor = self.map.upper_bound(Bound::Included(&(start as Index)));
96        let line_number = if let Some(substitution) = cursor.get() {
97            match substitution.end.get() as usize {
98                previous_end if translate_lines && (0..=start).contains(&previous_end) => {
99                    substitution.original_last_line
100                        + bytecount::count(
101                            self.expanded_source[previous_end..start].as_bytes(),
102                            b'\n',
103                        ) as LineNumber
104                }
105                start_substitution_end
106                    if translate_lines && (0..end).contains(&start_substitution_end) =>
107                {
108                    substitution.original_last_line
109                        - bytecount::count(
110                            self.expanded_source[span.get_start() as usize..start_substitution_end]
111                                .as_bytes(),
112                            b'\n',
113                        ) as LineNumber
114                }
115                parent_substitution_end
116                    if translate_lines
117                        && (end..=expansion_end).contains(&parent_substitution_end) =>
118                {
119                    let mut column: u16 = 0;
120                    for &byte in self.expanded_source
121                        [..substitution.original_span.get_start() as usize]
122                        .as_bytes()
123                        .iter()
124                        .rev()
125                    {
126                        if byte == b'\n' && (byte >> 6) != 0b10 {
127                            break;
128                        }
129                        column += ((byte >> 6) != 0b10) as u16
130                    } //This is here so that non ASCII characters don't count as multiple characters. Its done this way and not using iterators for performance reasons (see std reverse count method for explanation of a similar optimization)
131                    let (name, line_offset) = match substitution.stype {
132                        SourceType::File => (
133                            format!(
134                                "{}:{}:{} Occurred inside include expansion\n --> {}",
135                                self.main_file_name,
136                                substitution.original_first_line + 1,
137                                column,
138                                substitution.name
139                            ),
140                            0,
141                        ),
142                        SourceType::Macro { definition_line } => (
143                            format!(
144                                "{}:{}:{} Occurred inside macro expansion of `{}\n --> {}",
145                                self.main_file_name,
146                                substitution.original_first_line + 1,
147                                column,
148                                substitution.name,
149                                self.main_file_name
150                            ),
151                            definition_line,
152                        ),
153                    };
154                    containing_expansion_description = Some(name);
155                    if translate_lines {
156                        let bytes_to_substitution_start =
157                            self.expanded_source[substitution.start as usize..start].as_bytes();
158                        line_offset
159                            + bytecount::count(bytes_to_substitution_start, b'\n') as LineNumber
160                    } else {
161                        bytecount::count(self.expanded_source[..start].as_bytes(), b'\n')
162                            as LineNumber
163                    }
164                }
165                _ => {
166                    debug_assert!(!translate_lines);
167                    bytecount::count(self.expanded_source[..start].as_bytes(), b'\n') as LineNumber
168                }
169            }
170        } else {
171            let str_to_count = self.expanded_source[..start as usize].as_bytes();
172            bytecount::count(str_to_count, b'\n') as LineNumber
173        };
174        (
175            &self.expanded_source[start..end],
176            line_number + 1,
177            containing_expansion_description,
178            range,
179        )
180    }
181
182    #[must_use]
183    pub fn resolve_span(
184        &self,
185        span: Span,
186        translate_lines: bool,
187    ) -> (
188        &'source_map str,
189        LineNumber,
190        Option<(&'source_map str, LineNumber)>,
191    ) {
192        let expansion_end = self.expanded_source.len();
193        let start = span.get_start() as usize;
194        let end = span.get_end() as usize;
195        let mut containing_expansion_description = None;
196        let cursor = self.map.upper_bound(Bound::Included(&(start as Index)));
197        let line_number = if let Some(substitution) = cursor.get() {
198            match substitution.end.get() as usize {
199                previous_end if translate_lines && (0..=start).contains(&previous_end) => {
200                    substitution.original_last_line
201                        + bytecount::count(
202                            self.expanded_source[previous_end..start].as_bytes(),
203                            b'\n',
204                        ) as LineNumber
205                }
206                start_substitution_end
207                    if translate_lines && (0..end).contains(&start_substitution_end) =>
208                {
209                    substitution.original_last_line
210                        - bytecount::count(
211                            self.expanded_source[start..start_substitution_end].as_bytes(),
212                            b'\n',
213                        ) as LineNumber
214                }
215                parent_substitution_end
216                    if translate_lines
217                        && (end..=expansion_end).contains(&parent_substitution_end) =>
218                {
219                    let (name, line_offset) = match substitution.stype {
220                        SourceType::File => (substitution.name, 0),
221                        SourceType::Macro { definition_line } => {
222                            (substitution.name, definition_line)
223                        }
224                    };
225                    containing_expansion_description =
226                        Some((name, substitution.original_first_line));
227                    if translate_lines {
228                        let bytes_to_substitution_start =
229                            self.expanded_source[substitution.start as usize..start].as_bytes();
230                        line_offset
231                            + bytecount::count(bytes_to_substitution_start, b'\n') as LineNumber
232                    } else {
233                        bytecount::count(self.expanded_source[..start].as_bytes(), b'\n')
234                            as LineNumber
235                    }
236                }
237                _ => {
238                    debug_assert!(!translate_lines);
239                    bytecount::count(self.expanded_source[..start].as_bytes(), b'\n') as LineNumber
240                }
241            }
242        } else {
243            let str_to_count = self.expanded_source[..start as usize].as_bytes();
244            bytecount::count(str_to_count, b'\n') as LineNumber
245        };
246        (
247            &self.expanded_source[start..end],
248            line_number + 1,
249            containing_expansion_description,
250        )
251    }
252}
253
254#[derive(Debug)]
255struct SourceMapBuilderState<'lt> {
256    source: &'lt str,
257    offset: Index,
258}
259
260pub(super) struct SourceMapBuilder<'lt, 'source_map> {
261    source_map_allocator: &'source_map Bump,
262    allocator: &'lt Bump,
263    cursor: CursorMut<'source_map, SourceMapAdapter<'source_map>>,
264    expansion: String,
265    substitution_stack: Vec<SourceMapBuilderState<'lt>>,
266    source_map: NonNull<SourceMap<'source_map>>,
267    root_line: LineNumber,
268    root_file_contents: &'lt str,
269    _phantom_data: PhantomData<CursorMut<'source_map, SourceMapAdapter<'source_map>>>,
270}
271
272impl<'lt, 'source_map> SourceMapBuilder<'lt, 'source_map> {
273    pub(super) fn new(
274        source_map_allocator: &'source_map Bump,
275        builder_allocator: &'lt Bump,
276        main_file: &Path,
277    ) -> std::io::Result<(Self, Lexer<'lt>)> {
278        let root_file_contents = builder_allocator.alloc_str(&std::fs::read_to_string(main_file)?);
279        let name = &*source_map_allocator.alloc_str(main_file.to_str().unwrap());
280        let source_map = source_map_allocator.alloc_with(move || SourceMap::new(name));
281        let res = Self {
282            source_map_allocator,
283            allocator: builder_allocator,
284            source_map: NonNull::from(&*source_map),
285            cursor: source_map.map.cursor_mut(),
286            root_line: 0,
287            substitution_stack: Vec::new(),
288            expansion: "".to_string(),
289            root_file_contents,
290            _phantom_data: Default::default(),
291        };
292        Ok((res, Lexer::new(root_file_contents)))
293    }
294
295    pub fn done(self) -> &'source_map SourceMap<'source_map> {
296        let mut string = bumpalo::collections::String::new_in(self.source_map_allocator);
297        string.push_str(self.expansion.as_str());
298        string.push_str(&self.root_file_contents[self.get_current_root_offset()..]);
299        let res = unsafe { &mut *self.source_map.as_ptr() }; //this is save since we know that the source_map will outlive the builder since its allocated in the arena which is guaranteed to live for 'sorcemap (its only a pointer so we can have a mutable reference to it in the form of cursor while remebering its location in the arena which wont be used past this point)
300        res.expanded_source = string.into_bump_str();
301        &*res
302    }
303
304    fn get_current_root_offset(&self) -> usize {
305        self.cursor.get().map_or(0, |substitution| {
306            substitution.original_span.get_end() as usize
307        })
308    }
309
310    pub(super) fn new_lines(&mut self, lines: LineNumber) {
311        if self.substitution_stack.is_empty() {
312            //we only keep track of macro expansion independent line numbers in the mainfile
313            self.root_line += lines;
314        }
315    }
316
317    pub(super) fn current_root_line(&mut self) -> LineNumber {
318        self.root_line
319    }
320
321    fn enter_root_substitution(
322        &mut self,
323        start: Index,
324        stype: SourceType,
325        original_span: Span,
326        source: &'lt str,
327        name: &str,
328    ) {
329        let substitution = {
330            let name = &*self.source_map_allocator.alloc_str(name);
331            let range: Range<usize> = original_span.into();
332            let original_source = &self.root_file_contents[range];
333            let original_lines = bytecount::count(original_source.as_bytes(), b'\n') as LineNumber;
334            let root_line = self.root_line;
335            self.source_map_allocator.alloc_with(|| Substitution {
336                name,
337                start,
338                end: Cell::new(0),
339                stype,
340                original_span,
341                original_first_line: root_line,
342                original_last_line: root_line + original_lines,
343                link: RBTreeLink::new(),
344            })
345        };
346        let preceding_root_slice_start = self.get_current_root_offset();
347        let preceding_root_slice_end =
348            self.cursor
349                .get()
350                .map_or(original_span.get_start() as usize, |substitution| {
351                    original_span.get_start() as usize - preceding_root_slice_start
352                        + substitution.original_span.get_end() as usize
353                });
354        self.expansion.push_str(
355            &self.root_file_contents[preceding_root_slice_start..preceding_root_slice_end],
356        );
357        self.cursor.insert_after(substitution);
358        self.cursor.move_next();
359        self.expansion.reserve(source.len()); //Expansions are typically longer than their names (they would be pointless otherwise)
360        self.substitution_stack
361            .push(SourceMapBuilderState { source, offset: 0 })
362    }
363
364    pub(super) fn enter_non_root_substitution(&mut self, original_span: Span, source: &'lt str) {
365        #[cfg(debug_assertions)]
366        assert_ne!(self.substitution_stack.len(), 0);
367        let parent_src_state = self.substitution_stack.last_mut().unwrap();
368        let old_offset = parent_src_state.offset as usize;
369        parent_src_state.offset = original_span.get_end();
370        self.expansion
371            .push_str(&parent_src_state.source[old_offset..original_span.get_start() as usize]);
372        self.substitution_stack
373            .push(SourceMapBuilderState { source, offset: 0 });
374    }
375
376    /// This function is called when the end of any substitution (macro / file include) is reached
377    ///
378    /// # Returns
379    /// The original length of the substitution
380    pub(super) fn finish_substitution(&mut self) -> Index {
381        let SourceMapBuilderState { source, offset } = self
382            .substitution_stack
383            .pop()
384            .expect("SourceBuilder: Substitution stack is empty");
385
386        let remaining_str = &source[offset as usize..];
387
388        self.expansion.push_str(remaining_str);
389
390        if self.substitution_stack.is_empty() {
391            self.cursor
392                .get()
393                .unwrap()
394                .end
395                .set(self.expansion.len() as Index);
396        }
397
398        source.len() as Index
399    }
400
401    /// This Function is called when a new File is entered (using the `include` directive).
402    pub(crate) fn enter_file(
403        &mut self,
404        path: &Path,
405        start: Index,
406        original_span: Span,
407    ) -> std::io::Result<Lexer<'lt>> {
408        let contents = std::fs::read_to_string(path)?;
409        let contents = &*self.allocator.alloc_str(&contents);
410
411        if self.substitution_stack.is_empty() {
412            self.enter_root_substitution(
413                start,
414                SourceType::File,
415                original_span,
416                contents,
417                path.to_str().unwrap(),
418            );
419        } else {
420            self.enter_non_root_substitution(original_span, contents)
421        }
422
423        Ok(Lexer::new(contents))
424    }
425
426    ///  The preprocessor calls this function to indicate that a Macro reference has been encountered and that tokens will now be consumed from this Macro
427    pub(super) fn enter_macro(
428        &mut self,
429        start: Index,
430        original_span: Span,
431        definition: &'lt str,
432        definition_line: LineNumber,
433        name: &str,
434    ) {
435        if self.substitution_stack.is_empty() {
436            self.enter_root_substitution(
437                start,
438                SourceType::Macro { definition_line },
439                original_span,
440                definition,
441                name,
442            )
443        } else {
444            self.enter_non_root_substitution(original_span, definition)
445        }
446    }
447
448    pub(super) fn source(&self) -> &'lt str {
449        if let Some(state) = self.substitution_stack.last() {
450            state.source
451        } else {
452            self.root_file_contents
453        }
454    }
455}