codespan_preprocessed/
codemap.rs

1use codespan_reporting::files;
2use codespan_reporting::files::{Files, SimpleFile};
3use std::cmp::Ordering;
4use std::io::Read;
5use std::iter;
6use std::ops::{Index, Range};
7use std::path::Path;
8
9#[derive(Clone, Debug)]
10struct LineDirective {
11    line_index: usize,
12    byte_index: usize,
13    offset: isize,
14    filename: Option<Range<usize>>,
15}
16
17/// Slice of the input file.
18///
19/// The input file is sliced into different
20/// parts corresponding to new location directive.
21/// This slicing is so used as file identification for
22/// `codespan_reporting`.
23#[derive(Clone, Debug, PartialEq)]
24pub struct FileSlice {
25    name: Range<usize>,
26    bytes: Range<usize>,
27    lines: Range<usize>,
28    offset: isize,
29}
30
31/// The codemap of a preprocessed file.
32#[derive(Debug)]
33pub struct PreprocessedFile<Source> {
34    ids: Vec<FileSlice>,
35    lines: Vec<Range<usize>>,
36    contents: Source,
37}
38
39impl<'a, S: 'a + AsRef<str>> Files<'a> for PreprocessedFile<S> {
40    type FileId = &'a FileSlice;
41    type Name = &'a str;
42    type Source = &'a str;
43
44    fn name(&'a self, id: Self::FileId) -> Result<Self::Name, files::Error> {
45        Ok(self.contents.as_ref().index(id.name.clone()))
46    }
47
48    fn source(&'a self, _: Self::FileId) -> Result<Self::Source, files::Error> {
49        Ok(self.contents.as_ref())
50    }
51
52    fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, files::Error> {
53        if id.bytes.end <= byte_index {
54            Ok((id.lines.end as isize - 1 - id.offset) as usize)
55        } else if byte_index < id.bytes.start {
56            Err(files::Error::FileMissing)
57        } else {
58            Ok((self
59                .lines
60                .binary_search_by(|bytes| {
61                    if byte_index < bytes.start {
62                        Ordering::Greater
63                    } else if byte_index > bytes.end {
64                        Ordering::Less
65                    } else {
66                        Ordering::Equal
67                    }
68                })
69                .unwrap() as isize
70                - id.offset) as usize)
71        }
72    }
73
74    fn line_range(
75        &'a self,
76        id: Self::FileId,
77        line_index: usize,
78    ) -> Result<Range<usize>, files::Error> {
79        self.lines
80            .get((line_index as isize + id.offset) as usize)
81            .cloned()
82            .ok_or(files::Error::LineTooLarge {
83                given: line_index,
84                max: self.lines.len(),
85            })
86    }
87}
88
89impl<Source> PreprocessedFile<Source>
90where
91    Source: AsRef<str>,
92{
93    pub fn new(contents: Source) -> Self {
94        let mut line_endings = contents
95            .as_ref()
96            .match_indices('\n')
97            .map(|(b, _)| b)
98            .collect::<Vec<_>>();
99
100        // if the last line is not terminated with an EOL, assume it
101        match line_endings.last() {
102            // nothing to do,the last line will have an EOL
103            Some(l) if *l == contents.as_ref().len() - 1 => {}
104            // the data has no EOL at the end...
105            _ => line_endings.push(contents.as_ref().len()),
106        }
107
108        let line_ranges = iter::once(0)
109            .chain(line_endings.iter().map(|e| *e + 1))
110            .zip(line_endings.iter())
111            .map(|(s, e)| s..*e)
112            .collect::<Vec<_>>();
113
114        let directives = line_ranges
115            .iter()
116            .enumerate()
117            .filter(|(_, r)| contents.as_ref()[r.start..r.end].starts_with("#line"))
118            .map(|(l, r)| {
119                let str = &contents.as_ref()[r.start..r.end];
120                if let Some(sep) = str[6..].find(' ') {
121                    let sep = sep + 6;
122                    LineDirective {
123                        line_index: l,
124                        byte_index: r.start,
125                        offset: l as isize + 2 - str[6..sep].parse::<isize>().unwrap(),
126                        filename: Some(r.start + sep + 2..r.start + str.len() - 1),
127                    }
128                } else {
129                    LineDirective {
130                        line_index: l,
131                        byte_index: r.start,
132                        offset: l as isize + 2 - str[6..].parse::<isize>().unwrap(),
133                        filename: None,
134                    }
135                }
136            })
137            .collect::<Vec<_>>();
138
139        let mut current = 0..0;
140        let mut files = Vec::with_capacity(directives.len() + 2);
141
142        if let Some(first) = directives.first() {
143            if first.line_index > 0 {
144                files.push(FileSlice {
145                    name: current.clone(),
146                    bytes: 0..first.byte_index,
147                    lines: 0..first.line_index,
148                    offset: 0,
149                });
150            }
151            files.extend(
152                directives
153                    .iter()
154                    .zip(directives.iter().skip(1))
155                    .map(|(start, end)| {
156                        if let Some(filename) = start.filename.clone() {
157                            current = filename;
158                        }
159                        FileSlice {
160                            name: current.clone(),
161                            bytes: line_ranges[start.line_index + 1].start..end.byte_index,
162                            lines: start.line_index + 1..end.line_index,
163                            offset: start.offset,
164                        }
165                    }),
166            );
167
168            let last_directive = directives.last().unwrap();
169
170            // if the file ends with a directive (which should never
171            // happen when it comes from m4 or cpp), we ignore this last one
172            // since it will generate out of bounds for line_ranges access
173            if last_directive.line_index + 1 < line_ranges.len() {
174                // ok, here, we know that there is some chars behind the directive
175                files.push(FileSlice {
176                    name: last_directive.filename.clone().unwrap_or(current),
177                    bytes: line_ranges[last_directive.line_index + 1].start
178                        ..line_ranges.last().unwrap().end,
179                    lines: last_directive.line_index + 1..line_ranges.len(),
180                    offset: last_directive.offset,
181                });
182            }
183        } else {
184            files.push(FileSlice {
185                name: current,
186                bytes: 0..line_ranges.last().unwrap().end,
187                lines: 0..line_ranges.len(),
188                offset: 0,
189            })
190        }
191
192        PreprocessedFile {
193            ids: files,
194            lines: line_ranges,
195            contents,
196        }
197    }
198
199    #[inline]
200    pub fn source(&self) -> &str {
201        self.contents.as_ref()
202    }
203
204    #[inline]
205    pub fn len(&self) -> usize {
206        self.source().len()
207    }
208
209    #[inline]
210    pub fn is_empty(&self) -> bool {
211        self.source().is_empty()
212    }
213}
214
215impl PreprocessedFile<String> {
216    pub fn open<P: AsRef<Path>>(filename: P) -> Result<Self, std::io::Error> {
217        let mut file = std::fs::File::open(&filename)?;
218        let mut buf = Vec::new();
219        file.read_to_end(&mut buf)?;
220        // prepend '#line' directive to correctly locate diagnosis
221        let contents = format!(
222            "#line 1 \"{}\"\n{}",
223            filename.as_ref().to_string_lossy(),
224            String::from_utf8(buf).expect("invalid UTF-8 characters in file")
225        );
226        Ok(PreprocessedFile::new(contents))
227    }
228
229    pub fn from_stdin() -> Result<Self, std::io::Error> {
230        let mut buf = Vec::new();
231        std::io::stdin().read_to_end(&mut buf)?;
232        let contents = String::from_utf8(buf).expect("invalid UTF-8 characters on stdin");
233        Ok(PreprocessedFile::new(contents))
234    }
235}
236
237pub trait EasyLocation<'a>: Files<'a> {
238    fn file_id(&'a self, byte_index: usize) -> <Self as Files<'a>>::FileId;
239}
240
241impl<'a, S: 'a + AsRef<str>> EasyLocation<'a> for PreprocessedFile<S> {
242    fn file_id(&'a self, byte_index: usize) -> <Self as Files<'a>>::FileId {
243        // as ids are sorted according to the byte order of the input,
244        // we could use a binary_search...
245        match self.ids.binary_search_by(|x| {
246            if byte_index < x.bytes.start {
247                Ordering::Greater
248            } else if byte_index > x.bytes.end {
249                Ordering::Less
250            } else {
251                Ordering::Equal
252            }
253        }) {
254            Ok(i) => &self.ids[i],
255            Err(i) if i < self.ids.len() => &self.ids[i],
256            _ => self.ids.last().unwrap(),
257        }
258    }
259}
260
261impl<'a, N, S> EasyLocation<'a> for SimpleFile<N, S>
262where
263    N: 'a + std::fmt::Display + Clone,
264    S: 'a + AsRef<str>,
265{
266    fn file_id(&'a self, _: usize) -> <Self as Files<'a>>::FileId {}
267}